summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefrag.am3
-rw-r--r--kern/kernel.c2
-rw-r--r--kern/task.c4
-rw-r--r--kern/thread.c390
-rw-r--r--kern/thread.h141
-rw-r--r--kern/thread_i.h31
-rw-r--r--kern/turnstile.c788
-rw-r--r--kern/turnstile.h191
-rw-r--r--kern/turnstile_types.h58
9 files changed, 1487 insertions, 121 deletions
diff --git a/Makefrag.am b/Makefrag.am
index 8f4c4b62..8a760759 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -69,6 +69,9 @@ x15_SOURCES += \
kern/thread.c \
kern/thread.h \
kern/thread_i.h \
+ kern/turnstile.c \
+ kern/turnstile.h \
+ kern/turnstile_types.h \
kern/types.h \
kern/work.c \
kern/work.h \
diff --git a/kern/kernel.c b/kern/kernel.c
index 9b8d4eec..cd3d3e3b 100644
--- a/kern/kernel.c
+++ b/kern/kernel.c
@@ -24,6 +24,7 @@
#include <kern/sref.h>
#include <kern/task.h>
#include <kern/thread.h>
+#include <kern/turnstile.h>
#include <kern/work.h>
#include <kern/xcall.h>
#include <machine/cpu.h>
@@ -43,6 +44,7 @@ kernel_main(void)
xcall_setup();
task_setup();
sleepq_setup();
+ turnstile_setup();
thread_setup();
work_setup();
llsync_setup();
diff --git a/kern/task.c b/kern/task.c
index afc2407a..d7d91c17 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -150,8 +150,8 @@ task_info(struct task *task)
thread_state_to_chr(thread),
thread_wchan_desc(thread),
(unsigned long)thread_wchan_addr(thread),
- thread_sched_class_to_str(thread_sched_class(thread)),
- thread_priority(thread),
+ thread_sched_class_to_str(thread_user_sched_class(thread)),
+ thread_user_priority(thread),
thread->name);
}
diff --git a/kern/thread.c b/kern/thread.c
index 069881c0..3eccebda 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -105,6 +105,7 @@
#include <kern/sref.h>
#include <kern/task.h>
#include <kern/thread.h>
+#include <kern/turnstile.h>
#include <kern/work.h>
#include <machine/atomic.h>
#include <machine/cpu.h>
@@ -261,13 +262,13 @@ struct thread_runq {
* Operations of a scheduling class.
*/
struct thread_sched_ops {
- void (*init_sched)(struct thread *thread, unsigned short priority);
struct thread_runq * (*select_runq)(struct thread *thread);
void (*add)(struct thread_runq *runq, struct thread *thread);
void (*remove)(struct thread_runq *runq, struct thread *thread);
void (*put_prev)(struct thread_runq *runq, struct thread *thread);
struct thread * (*get_next)(struct thread_runq *runq);
- void (*set_priority)(struct thread *thread, unsigned short priority);
+ void (*reset_priority)(struct thread *thread, unsigned short priority);
+ void (*update_priority)(struct thread *thread, unsigned short priority);
unsigned int (*get_global_priority)(unsigned short priority);
void (*set_next)(struct thread_runq *runq, struct thread *thread);
void (*tick)(struct thread_runq *runq, struct thread *thread);
@@ -343,6 +344,13 @@ struct thread_zombie {
struct thread *thread;
};
+static unsigned char
+thread_policy_to_class(unsigned char policy)
+{
+ assert(policy < ARRAY_SIZE(thread_policy_table));
+ return thread_policy_table[policy];
+}
+
static void
thread_set_wchan(struct thread *thread, const void *wchan_addr,
const char *wchan_desc)
@@ -358,15 +366,24 @@ thread_clear_wchan(struct thread *thread)
}
static const struct thread_sched_ops *
-thread_get_sched_ops(const struct thread *thread)
+thread_get_sched_ops(unsigned char sched_class)
{
- unsigned char sched_class;
-
- sched_class = thread_sched_class(thread);
assert(sched_class < ARRAY_SIZE(thread_sched_ops));
return &thread_sched_ops[sched_class];
}
+static const struct thread_sched_ops *
+thread_get_user_sched_ops(const struct thread *thread)
+{
+ return thread_get_sched_ops(thread_user_sched_class(thread));
+}
+
+static const struct thread_sched_ops *
+thread_get_real_sched_ops(const struct thread *thread)
+{
+ return thread_get_sched_ops(thread_real_sched_class(thread));
+}
+
static void __init
thread_runq_init_rt(struct thread_runq *runq)
{
@@ -458,7 +475,7 @@ thread_runq_add(struct thread_runq *runq, struct thread *thread)
spinlock_assert_locked(&runq->lock);
assert(!thread->in_runq);
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
ops->add(runq, thread);
if (runq->nr_threads == 0) {
@@ -467,7 +484,8 @@ thread_runq_add(struct thread_runq *runq, struct thread *thread)
runq->nr_threads++;
- if (thread_sched_class(thread) < thread_sched_class(runq->current)) {
+ if (thread_real_sched_class(thread)
+ < thread_real_sched_class(runq->current)) {
thread_set_flag(runq->current, THREAD_YIELD);
}
@@ -490,7 +508,7 @@ thread_runq_remove(struct thread_runq *runq, struct thread *thread)
cpumap_set_atomic(&thread_idle_runqs, thread_runq_cpu(runq));
}
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
ops->remove(runq, thread);
thread->in_runq = false;
@@ -504,7 +522,7 @@ thread_runq_put_prev(struct thread_runq *runq, struct thread *thread)
assert(!cpu_intr_enabled());
spinlock_assert_locked(&runq->lock);
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
if (ops->put_prev != NULL) {
ops->put_prev(runq, thread);
@@ -538,7 +556,7 @@ thread_runq_set_next(struct thread_runq *runq, struct thread *thread)
{
const struct thread_sched_ops *ops;
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
if (ops->set_next != NULL) {
ops->set_next(runq, thread);
@@ -660,13 +678,6 @@ thread_runq_double_lock(struct thread_runq *a, struct thread_runq *b)
}
}
-static void
-thread_sched_rt_init_sched(struct thread *thread, unsigned short priority)
-{
- assert(priority <= THREAD_SCHED_RT_PRIO_MAX);
- thread->rt_data.time_slice = THREAD_DEFAULT_RR_TIME_SLICE;
-}
-
static struct thread_runq *
thread_sched_rt_select_runq(struct thread *thread)
{
@@ -693,15 +704,17 @@ thread_sched_rt_add(struct thread_runq *runq, struct thread *thread)
struct list *threads;
rt_runq = &runq->rt_runq;
- threads = &rt_runq->threads[thread_priority(thread)];
+ threads = &rt_runq->threads[thread_real_priority(thread)];
list_insert_tail(threads, &thread->rt_data.node);
if (list_singular(threads)) {
- rt_runq->bitmap |= (1ULL << thread_priority(thread));
+ rt_runq->bitmap |= (1ULL << thread_real_priority(thread));
}
- if ((thread_sched_class(thread) == thread_sched_class(runq->current))
- && (thread_priority(thread) > thread_priority(runq->current))) {
+ if ((thread_real_sched_class(thread)
+ == thread_real_sched_class(runq->current))
+ && (thread_real_priority(thread)
+ > thread_real_priority(runq->current))) {
thread_set_flag(runq->current, THREAD_YIELD);
}
}
@@ -713,11 +726,11 @@ thread_sched_rt_remove(struct thread_runq *runq, struct thread *thread)
struct list *threads;
rt_runq = &runq->rt_runq;
- threads = &rt_runq->threads[thread_priority(thread)];
+ threads = &rt_runq->threads[thread_real_priority(thread)];
list_remove(&thread->rt_data.node);
if (list_empty(threads)) {
- rt_runq->bitmap &= ~(1ULL << thread_priority(thread));
+ rt_runq->bitmap &= ~(1ULL << thread_real_priority(thread));
}
}
@@ -749,6 +762,13 @@ thread_sched_rt_get_next(struct thread_runq *runq)
return thread;
}
+static void
+thread_sched_rt_reset_priority(struct thread *thread, unsigned short priority)
+{
+ assert(priority <= THREAD_SCHED_RT_PRIO_MAX);
+ thread->rt_data.time_slice = THREAD_DEFAULT_RR_TIME_SLICE;
+}
+
static unsigned int
thread_sched_rt_get_global_priority(unsigned short priority)
{
@@ -766,7 +786,7 @@ thread_sched_rt_tick(struct thread_runq *runq, struct thread *thread)
{
(void)runq;
- if (thread_sched_policy(thread) != THREAD_SCHED_POLICY_RR) {
+ if (thread_real_sched_policy(thread) != THREAD_SCHED_POLICY_RR) {
return;
}
@@ -786,16 +806,6 @@ thread_sched_fs_prio2weight(unsigned short priority)
return ((priority + 1) * THREAD_FS_ROUND_SLICE_BASE);
}
-static void
-thread_sched_fs_init_sched(struct thread *thread, unsigned short priority)
-{
- assert(priority <= THREAD_SCHED_FS_PRIO_MAX);
- thread->fs_data.fs_runq = NULL;
- thread->fs_data.round = 0;
- thread->fs_data.weight = thread_sched_fs_prio2weight(priority);
- thread->fs_data.work = 0;
-}
-
static struct thread_runq *
thread_sched_fs_select_runq(struct thread *thread)
{
@@ -897,7 +907,7 @@ thread_sched_fs_enqueue(struct thread_fs_runq *fs_runq, unsigned long round,
assert(thread->fs_data.fs_runq == NULL);
assert(thread->fs_data.work <= thread->fs_data.weight);
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
group_weight = group->weight + thread->fs_data.weight;
total_weight = fs_runq->weight + thread->fs_data.weight;
node = (group->weight == 0)
@@ -973,7 +983,7 @@ thread_sched_fs_restart(struct thread_runq *runq)
assert(node != NULL);
fs_runq->current = list_entry(node, struct thread_fs_group, node);
- if (thread_sched_class(runq->current) == THREAD_SCHED_CLASS_FS) {
+ if (thread_real_sched_class(runq->current) == THREAD_SCHED_CLASS_FS) {
thread_set_flag(runq->current, THREAD_YIELD);
}
}
@@ -1009,7 +1019,7 @@ thread_sched_fs_dequeue(struct thread *thread)
assert(thread->fs_data.fs_runq != NULL);
fs_runq = thread->fs_data.fs_runq;
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
thread->fs_data.fs_runq = NULL;
list_remove(&thread->fs_data.runq_node);
@@ -1084,7 +1094,7 @@ thread_sched_fs_put_prev(struct thread_runq *runq, struct thread *thread)
struct thread_fs_group *group;
fs_runq = runq->fs_runq_active;
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
list_insert_tail(&group->threads, &thread->fs_data.group_node);
if (thread->fs_data.work >= thread->fs_data.weight) {
@@ -1152,8 +1162,19 @@ thread_sched_fs_get_next(struct thread_runq *runq)
}
static void
-thread_sched_fs_set_priority(struct thread *thread, unsigned short priority)
+thread_sched_fs_reset_priority(struct thread *thread, unsigned short priority)
+{
+ assert(priority <= THREAD_SCHED_FS_PRIO_MAX);
+ thread->fs_data.fs_runq = NULL;
+ thread->fs_data.round = 0;
+ thread->fs_data.weight = thread_sched_fs_prio2weight(priority);
+ thread->fs_data.work = 0;
+}
+
+static void
+thread_sched_fs_update_priority(struct thread *thread, unsigned short priority)
{
+ assert(priority <= THREAD_SCHED_FS_PRIO_MAX);
thread->fs_data.weight = thread_sched_fs_prio2weight(priority);
if (thread->fs_data.work >= thread->fs_data.weight) {
@@ -1184,7 +1205,7 @@ thread_sched_fs_tick(struct thread_runq *runq, struct thread *thread)
fs_runq = runq->fs_runq_active;
fs_runq->work++;
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
group->work++;
thread_set_flag(thread, THREAD_YIELD);
thread->fs_data.work++;
@@ -1235,7 +1256,8 @@ thread_sched_fs_balance_eligible(struct thread_runq *runq,
if ((nr_threads == 0)
|| ((nr_threads == 1)
- && (thread_sched_class(runq->current) == THREAD_SCHED_CLASS_FS))) {
+ && (thread_real_sched_class(runq->current)
+ == THREAD_SCHED_CLASS_FS))) {
return 0;
}
@@ -1525,39 +1547,40 @@ thread_sched_idle_get_global_priority(unsigned short priority)
return THREAD_SCHED_GLOBAL_PRIO_IDLE;
}
-static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES] = {
+static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES]
+ = {
[THREAD_SCHED_CLASS_RT] = {
- .init_sched = thread_sched_rt_init_sched,
.select_runq = thread_sched_rt_select_runq,
.add = thread_sched_rt_add,
.remove = thread_sched_rt_remove,
.put_prev = thread_sched_rt_put_prev,
.get_next = thread_sched_rt_get_next,
- .set_priority = NULL,
+ .reset_priority = thread_sched_rt_reset_priority,
+ .update_priority = NULL,
.get_global_priority = thread_sched_rt_get_global_priority,
.set_next = thread_sched_rt_set_next,
.tick = thread_sched_rt_tick,
},
[THREAD_SCHED_CLASS_FS] = {
- .init_sched = thread_sched_fs_init_sched,
.select_runq = thread_sched_fs_select_runq,
.add = thread_sched_fs_add,
.remove = thread_sched_fs_remove,
.put_prev = thread_sched_fs_put_prev,
.get_next = thread_sched_fs_get_next,
- .set_priority = thread_sched_fs_set_priority,
+ .reset_priority = thread_sched_fs_reset_priority,
+ .update_priority = thread_sched_fs_update_priority,
.get_global_priority = thread_sched_fs_get_global_priority,
.set_next = thread_sched_fs_set_next,
.tick = thread_sched_fs_tick,
},
[THREAD_SCHED_CLASS_IDLE] = {
- .init_sched = NULL,
.select_runq = thread_sched_idle_select_runq,
.add = thread_sched_idle_add,
.remove = thread_sched_idle_remove,
.put_prev = NULL,
.get_next = thread_sched_idle_get_next,
- .set_priority = NULL,
+ .reset_priority = NULL,
+ .update_priority = NULL,
.get_global_priority = thread_sched_idle_get_global_priority,
.set_next = NULL,
.tick = NULL,
@@ -1565,30 +1588,95 @@ static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES] =
};
static void
-thread_set_sched_policy(struct thread *thread, unsigned char sched_policy)
+thread_set_user_sched_policy(struct thread *thread, unsigned char sched_policy)
+{
+ thread->user_sched_data.sched_policy = sched_policy;
+}
+
+static void
+thread_set_user_sched_class(struct thread *thread, unsigned char sched_class)
+{
+ thread->user_sched_data.sched_class = sched_class;
+}
+
+static void
+thread_set_user_priority(struct thread *thread, unsigned short priority)
+{
+ const struct thread_sched_ops *ops;
+
+ ops = thread_get_user_sched_ops(thread);
+
+ thread->user_sched_data.priority = priority;
+ thread->user_sched_data.global_priority
+ = ops->get_global_priority(priority);
+}
+
+static void
+thread_update_user_priority(struct thread *thread, unsigned short priority)
+{
+ thread_set_user_priority(thread, priority);
+}
+
+static void
+thread_set_real_sched_policy(struct thread *thread, unsigned char sched_policy)
{
- thread->sched_data.sched_policy = sched_policy;
+ thread->real_sched_data.sched_policy = sched_policy;
}
static void
-thread_set_sched_class(struct thread *thread, unsigned char sched_class)
+thread_set_real_sched_class(struct thread *thread, unsigned char sched_class)
{
- thread->sched_data.sched_class = sched_class;
+ thread->real_sched_data.sched_class = sched_class;
}
static void
-thread_set_priority(struct thread *thread, unsigned short priority)
+thread_set_real_priority(struct thread *thread, unsigned short priority)
{
const struct thread_sched_ops *ops;
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
- if (ops->set_priority != NULL) {
- ops->set_priority(thread, priority);
+ thread->real_sched_data.priority = priority;
+ thread->real_sched_data.global_priority
+ = ops->get_global_priority(priority);
+
+ if (ops->reset_priority != NULL) {
+ ops->reset_priority(thread, priority);
+ }
+}
+
+static void
+thread_update_real_priority(struct thread *thread, unsigned short priority)
+{
+ const struct thread_sched_ops *ops;
+
+ ops = thread_get_real_sched_ops(thread);
+
+ thread->real_sched_data.priority = priority;
+ thread->real_sched_data.global_priority
+ = ops->get_global_priority(priority);
+
+ if (ops->update_priority != NULL) {
+ ops->update_priority(thread, priority);
}
+}
+
+static void
+thread_reset_real_priority(struct thread *thread)
+{
+ const struct thread_sched_ops *ops;
+ struct thread_sched_data *user, *real;
- thread->sched_data.priority = priority;
- thread->sched_data.global_priority = ops->get_global_priority(priority);
+ user = &thread->user_sched_data;
+ real = &thread->real_sched_data;
+ *real = *user;
+ thread->boosted = false;
+
+ ops = thread_get_user_sched_ops(thread);
+
+ if (ops->reset_priority != NULL) {
+ ops->reset_priority(thread, real->priority);
+ }
}
static void __init
@@ -1604,9 +1692,10 @@ thread_bootstrap_common(unsigned int cpu)
booter->flags = 0;
booter->preempt = 1;
cpumap_fill(&booter->cpumap);
- thread_set_sched_policy(booter, THREAD_SCHED_POLICY_IDLE);
- thread_set_sched_class(booter, THREAD_SCHED_CLASS_IDLE);
- thread_set_priority(booter, 0);
+ thread_set_user_sched_policy(booter, THREAD_SCHED_POLICY_IDLE);
+ thread_set_user_sched_class(booter, THREAD_SCHED_CLASS_IDLE);
+ thread_set_user_priority(booter, 0);
+ thread_reset_real_priority(booter);
memset(booter->tsd, 0, sizeof(booter->tsd));
booter->task = kernel_task;
thread_runq_init(percpu_ptr(thread_runq, cpu), cpu, booter);
@@ -1620,8 +1709,8 @@ thread_bootstrap(void)
thread_fs_highest_round = THREAD_FS_INITIAL_ROUND;
- thread_bootstrap_common(0);
tcb_set_current(&thread_booters[0].tcb);
+ thread_bootstrap_common(0);
}
void __init
@@ -1677,23 +1766,9 @@ thread_destroy_tsd(struct thread *thread)
}
}
-static void
-thread_init_sched(struct thread *thread, unsigned short priority)
-{
- const struct thread_sched_ops *ops;
-
- ops = thread_get_sched_ops(thread);
-
- if (ops->init_sched != NULL) {
- ops->init_sched(thread, priority);
- }
-
- thread->sched_data.priority = priority;
- thread->sched_data.global_priority = ops->get_global_priority(priority);
-}
-
static int
-thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
+thread_init(struct thread *thread, void *stack,
+ const struct thread_attr *attr,
void (*fn)(void *), void *arg)
{
struct thread *caller;
@@ -1720,13 +1795,23 @@ thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
goto error_sleepq;
}
+ thread->priv_turnstile = turnstile_create();
+
+ if (thread->priv_turnstile == NULL) {
+ error = ERROR_NOMEM;
+ goto error_turnstile;
+ }
+
+ turnstile_td_init(&thread->turnstile_td);
+ thread->propagate_priority = false;
thread->preempt = THREAD_SUSPEND_PREEMPT_LEVEL;
thread->pinned = 0;
thread->llsync_read = 0;
cpumap_copy(&thread->cpumap, cpumap);
- thread_set_sched_policy(thread, attr->policy);
- thread_set_sched_class(thread, thread_policy_table[attr->policy]);
- thread_init_sched(thread, attr->priority);
+ thread_set_user_sched_policy(thread, attr->policy);
+ thread_set_user_sched_class(thread, thread_policy_to_class(attr->policy));
+ thread_set_user_priority(thread, attr->priority);
+ thread_reset_real_priority(thread);
memset(thread->tsd, 0, sizeof(thread->tsd));
mutex_init(&thread->join_lock);
condition_init(&thread->join_cond);
@@ -1753,6 +1838,8 @@ thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
error_tcb:
thread_destroy_tsd(thread);
+ turnstile_destroy(thread->priv_turnstile);
+error_turnstile:
sleepq_destroy(thread->priv_sleepq);
error_sleepq:
return error;
@@ -1799,6 +1886,7 @@ thread_destroy(struct thread *thread)
thread_destroy_tsd(thread);
task_remove_thread(thread->task, thread);
+ turnstile_destroy(thread->priv_turnstile);
sleepq_destroy(thread->priv_sleepq);
kmem_cache_free(&thread_stack_cache, thread->stack);
kmem_cache_free(&thread_cache, thread);
@@ -2224,7 +2312,7 @@ thread_wakeup(struct thread *thread)
thread->state = THREAD_RUNNING;
} else {
/*
- * If another wakeup was attempted right before this one, the thread
+ * If another wake-up was attempted right before this one, the thread
* may currently be pushed on a remote run queue, and the run queue
* being locked here is actually the previous one. The run queue
* pointer may be modified concurrently, now being protected by the
@@ -2249,7 +2337,7 @@ thread_wakeup(struct thread *thread)
cpu_intr_save(&flags);
if (!thread->pinned) {
- runq = thread_sched_ops[thread_sched_class(thread)].select_runq(thread);
+ runq = thread_get_real_sched_ops(thread)->select_runq(thread);
} else {
runq = thread->runq;
spinlock_lock(&runq->lock);
@@ -2341,7 +2429,7 @@ thread_tick_intr(void)
thread_balance_idle_tick(runq);
}
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
if (ops->tick != NULL) {
ops->tick(runq, thread);
@@ -2350,6 +2438,7 @@ thread_tick_intr(void)
spinlock_unlock(&runq->lock);
}
+/* TODO Move outside */
char
thread_state_to_chr(const struct thread *thread)
{
@@ -2365,10 +2454,11 @@ thread_state_to_chr(const struct thread *thread)
}
}
+/* TODO Move outside */
const char *
-thread_sched_class_to_str(unsigned char sched_policy)
+thread_sched_class_to_str(unsigned char sched_class)
{
- switch (sched_policy) {
+ switch (sched_class) {
case THREAD_SCHED_CLASS_RT:
return "rt";
case THREAD_SCHED_CLASS_FS:
@@ -2385,13 +2475,96 @@ thread_setscheduler(struct thread *thread, unsigned char policy,
unsigned short priority)
{
struct thread_runq *runq;
+ struct turnstile_td *td;
+ unsigned long flags;
+ bool requeue, current, update;
+
+ td = thread_turnstile_td(thread);
+
+ turnstile_td_lock(td);
+ runq = thread_lock_runq(thread, &flags);
+
+ if ((thread_user_sched_policy(thread) == policy)
+ && (thread_user_priority(thread) == priority)) {
+ goto out;
+ }
+
+ requeue = thread->in_runq;
+
+ if (!requeue) {
+ current = false;
+ } else {
+ if (thread != runq->current) {
+ current = false;
+ } else {
+ thread_runq_put_prev(runq, thread);
+ current = true;
+ }
+
+ thread_runq_remove(runq, thread);
+ }
+
+ if (thread_user_sched_policy(thread) == policy) {
+ thread_update_user_priority(thread, priority);
+ update = true;
+ } else {
+ thread_set_user_sched_policy(thread, policy);
+ thread_set_user_sched_class(thread, thread_policy_to_class(policy));
+ thread_set_user_priority(thread, priority);
+ update = false;
+ }
+
+ if (thread->boosted) {
+ if (thread_user_global_priority(thread)
+ >= thread_real_global_priority(thread)) {
+ thread_reset_real_priority(thread);
+ }
+ } else {
+ if (update) {
+ thread_update_real_priority(thread, priority);
+ } else {
+ thread_set_real_sched_policy(thread, policy);
+ thread_set_real_sched_class(thread, thread_policy_to_class(policy));
+ thread_set_real_priority(thread, priority);
+ }
+ }
+
+ if (requeue) {
+ thread_runq_add(runq, thread);
+
+ if (current) {
+ thread_runq_set_next(runq, thread);
+ }
+ }
+
+out:
+ thread_unlock_runq(runq, flags);
+ turnstile_td_unlock(td);
+
+ turnstile_td_propagate_priority(td);
+}
+
+void
+thread_pi_setscheduler(struct thread *thread, unsigned char policy,
+ unsigned short priority)
+{
+ const struct thread_sched_ops *ops;
+ struct thread_runq *runq;
+ struct turnstile_td *td;
+ unsigned int global_priority;
unsigned long flags;
bool requeue, current;
+ td = thread_turnstile_td(thread);
+ turnstile_td_assert_lock(td);
+
+ ops = thread_get_sched_ops(thread_policy_to_class(policy));
+ global_priority = ops->get_global_priority(priority);
+
runq = thread_lock_runq(thread, &flags);
- if ((thread_sched_policy(thread) == policy)
- && (thread_priority(thread) == priority)) {
+ if ((thread_real_sched_policy(thread) == policy)
+ && (thread_real_priority(thread) == priority)) {
goto out;
}
@@ -2410,13 +2583,18 @@ thread_setscheduler(struct thread *thread, unsigned char policy,
thread_runq_remove(runq, thread);
}
- if (thread_sched_policy(thread) == policy) {
- thread_set_priority(thread, priority);
+ if (global_priority <= thread_user_global_priority(thread)) {
+ thread_reset_real_priority(thread);
} else {
- thread_set_sched_policy(thread, policy);
- assert(policy < ARRAY_SIZE(thread_policy_table));
- thread_set_sched_class(thread, thread_policy_table[policy]);
- thread_init_sched(thread, priority);
+ if (thread_real_sched_policy(thread) == policy) {
+ thread_update_real_priority(thread, priority);
+ } else {
+ thread_set_real_sched_policy(thread, policy);
+ thread_set_real_sched_class(thread, thread_policy_to_class(policy));
+ thread_set_real_priority(thread, priority);
+ }
+
+ thread->boosted = true;
}
if (requeue) {
@@ -2432,6 +2610,28 @@ out:
}
void
+thread_propagate_priority(void)
+{
+ struct thread *thread;
+
+ /*
+ * Although it's possible to propagate priority with preemption
+ * disabled, the operation can be too expensive to allow it.
+ */
+ if (!thread_preempt_enabled()) {
+ thread_set_priority_propagation_needed();
+ return;
+ }
+
+ thread = thread_self();
+
+ /* Clear before propagation to avoid infinite recursion */
+ thread->propagate_priority = false;
+
+ turnstile_td_propagate_priority(thread_turnstile_td(thread));
+}
+
+void
thread_key_create(unsigned int *keyp, thread_dtor_fn_t dtor)
{
unsigned int key;
diff --git a/kern/thread.h b/kern/thread.h
index 1542e7ae..15babbfc 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -33,30 +33,23 @@
#ifndef _KERN_THREAD_H
#define _KERN_THREAD_H
+#include <stdbool.h>
+#include <stddef.h>
+
#include <kern/assert.h>
#include <kern/cpumap.h>
#include <kern/macros.h>
+#include <kern/spinlock_types.h>
+#include <kern/turnstile_types.h>
#include <machine/atomic.h>
#include <machine/tcb.h>
/*
- * Forward declaration
- */
-struct spinlock;
-
-/*
* Thread structure.
*/
struct thread;
/*
- * Thread name buffer size.
- */
-#define THREAD_NAME_SIZE 32
-
-/*
- * Common scheduling data.
- *
* The global priority of a thread is meant to be compared against
* another global priority to determine which thread has higher priority.
*/
@@ -67,6 +60,11 @@ struct thread_sched_data {
unsigned int global_priority;
};
+/*
+ * Thread name buffer size.
+ */
+#define THREAD_NAME_SIZE 32
+
#include <kern/thread_i.h>
#define THREAD_KERNEL_PREFIX PACKAGE "_"
@@ -261,6 +259,15 @@ void thread_tick_intr(void);
void thread_setscheduler(struct thread *thread, unsigned char policy,
unsigned short priority);
+/*
+ * Variant used for priority inheritance.
+ *
+ * The caller must hold the turnstile thread data lock and no turnstile
+ * locks when calling this function.
+ */
+void thread_pi_setscheduler(struct thread *thread, unsigned char policy,
+ unsigned short priority);
+
static inline void
thread_ref(struct thread *thread)
{
@@ -301,33 +308,72 @@ thread_wchan_desc(const struct thread *thread)
char thread_state_to_chr(const struct thread *thread);
static inline const struct thread_sched_data *
-thread_get_sched_data(const struct thread *thread)
+thread_get_user_sched_data(const struct thread *thread)
+{
+ return &thread->user_sched_data;
+}
+
+static inline const struct thread_sched_data *
+thread_get_real_sched_data(const struct thread *thread)
+{
+ return &thread->real_sched_data;
+}
+
+/*
+ * If the caller requires the scheduling data to be stable, it
+ * must lock one of the following objects :
+ * - the containing run queue
+ * - the per-thread turnstile data (turnstile_td)
+ *
+ * Both are locked when scheduling data are updated.
+ */
+
+static inline unsigned char
+thread_user_sched_policy(const struct thread *thread)
+{
+ return thread_get_user_sched_data(thread)->sched_policy;
+}
+
+static inline unsigned char
+thread_user_sched_class(const struct thread *thread)
{
- return &thread->sched_data;
+ return thread_get_user_sched_data(thread)->sched_class;
+}
+
+static inline unsigned short
+thread_user_priority(const struct thread *thread)
+{
+ return thread_get_user_sched_data(thread)->priority;
+}
+
+static inline unsigned int
+thread_user_global_priority(const struct thread *thread)
+{
+ return thread_get_user_sched_data(thread)->global_priority;
}
static inline unsigned char
-thread_sched_policy(const struct thread *thread)
+thread_real_sched_policy(const struct thread *thread)
{
- return thread_get_sched_data(thread)->sched_policy;
+ return thread_get_real_sched_data(thread)->sched_policy;
}
static inline unsigned char
-thread_sched_class(const struct thread *thread)
+thread_real_sched_class(const struct thread *thread)
{
- return thread_get_sched_data(thread)->sched_class;
+ return thread_get_real_sched_data(thread)->sched_class;
}
static inline unsigned short
-thread_priority(const struct thread *thread)
+thread_real_priority(const struct thread *thread)
{
- return thread_get_sched_data(thread)->priority;
+ return thread_get_real_sched_data(thread)->priority;
}
static inline unsigned int
-thread_global_priority(const struct thread *thread)
+thread_real_global_priority(const struct thread *thread)
{
- return thread_get_sched_data(thread)->global_priority;
+ return thread_get_real_sched_data(thread)->global_priority;
}
/*
@@ -390,6 +436,53 @@ thread_sleepq_return(struct sleepq *sleepq)
}
/*
+ * Turnstile lending functions.
+ */
+
+static inline struct turnstile *
+thread_turnstile_lend(void)
+{
+ struct turnstile *turnstile;
+
+ turnstile = thread_self()->priv_turnstile;
+ assert(turnstile != NULL);
+ thread_self()->priv_turnstile = NULL;
+ return turnstile;
+}
+
+static inline void
+thread_turnstile_return(struct turnstile *turnstile)
+{
+ assert(turnstile != NULL);
+ assert(thread_self()->priv_turnstile == NULL);
+ thread_self()->priv_turnstile = turnstile;
+}
+
+static inline struct turnstile_td *
+thread_turnstile_td(struct thread *thread)
+{
+ return &thread->turnstile_td;
+}
+
+/*
+ * Priority propagation functions.
+ */
+
+static inline bool
+thread_priority_propagation_needed(void)
+{
+ return thread_self()->propagate_priority;
+}
+
+static inline void
+thread_set_priority_propagation_needed(void)
+{
+ thread_self()->propagate_priority = true;
+}
+
+void thread_propagate_priority(void);
+
+/*
* Migration control functions.
*
* Functions that change the migration state are implicit compiler barriers.
@@ -444,6 +537,10 @@ thread_preempt_enable_no_resched(void)
thread = thread_self();
assert(thread->preempt != 0);
thread->preempt--;
+
+ if (thread_preempt_enabled() && thread_priority_propagation_needed()) {
+ thread_propagate_priority();
+ }
}
static inline void
diff --git a/kern/thread_i.h b/kern/thread_i.h
index 55f96082..865787a0 100644
--- a/kern/thread_i.h
+++ b/kern/thread_i.h
@@ -26,6 +26,7 @@
#include <kern/macros.h>
#include <kern/mutex_types.h>
#include <kern/param.h>
+#include <kern/turnstile_types.h>
#include <machine/atomic.h>
#include <machine/tcb.h>
@@ -103,6 +104,19 @@ struct thread {
/* Sleep queue available for lending */
struct sleepq *priv_sleepq;
+ /* Turnstile available for lending */
+ struct turnstile *priv_turnstile;
+
+ /* Per-thread turnstile data */
+ struct turnstile_td turnstile_td;
+
+ /*
+ * True if priority must be propagated when preemption is reenabled
+ *
+ * This member is thread-local.
+ */
+ bool propagate_priority;
+
/* Thread-local members */
unsigned short preempt;
unsigned short pinned;
@@ -111,8 +125,21 @@ struct thread {
/* Processors on which this thread is allowed to run */
struct cpumap cpumap;
- /* Scheduling data */
- struct thread_sched_data sched_data;
+ /*
+ * Scheduling data.
+ */
+ struct thread_sched_data user_sched_data; /* User-provided */
+ struct thread_sched_data real_sched_data; /* Computed from
+ priority propagation */
+
+ /*
+ * True if the real scheduling data are not the user scheduling data.
+ *
+ * Note that it doesn't provide any information about priority inheritance.
+ * A thread may be part of a priority inheritance chain without its
+ * priority being boosted.
+ */
+ bool boosted;
/* Class specific scheduling data */
union {
diff --git a/kern/turnstile.c b/kern/turnstile.c
new file mode 100644
index 00000000..43629376
--- /dev/null
+++ b/kern/turnstile.c
@@ -0,0 +1,788 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation is based on "Solaris(tm) Internals: Solaris 10 and
+ * OpenSolaris Kernel Architecture, Second Edition" by Richard McDougall
+ * and Jim Mauro. See "Part Six: Platform Specifics, Chapter 17: Locking
+ * and Synchronization, Section 7 Turnstiles and Priority Inheritance".
+ *
+ * The differences are outlined below.
+ *
+ * This implementation doesn't support read/write locks, only mutual
+ * exclusion, because ownership doesn't apply well to read/write locks.
+ *
+ * Instead of using an external sleep queue object, this module implements
+ * that functionality itself. The reasons behind this decision are :
+ * - the use of expensive priority lists used to queue threads, that
+ * a simpler sleep queue implementation shouldn't use
+ * - increased coupling with the scheduler
+ *
+ * Locking order : bucket (turnstile) -> turnstile_td -> thread_runq
+ *
+ * This order allows changing the owner of a turnstile without unlocking it
+ * which is important because a turnstile is normally used to synchronize
+ * access to the owner. Unlocking a turnstile would allow the owner to
+ * change and make complex transient states visible. The drawback is that
+ * a thread cannot be requeued atomically when its priority is changed.
+ * That deferred requeue is done during priority propagation.
+ *
+ * TODO Analyse hash parameters.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <kern/assert.h>
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/list.h>
+#include <kern/macros.h>
+#include <kern/param.h>
+#include <kern/plist.h>
+#include <kern/spinlock.h>
+#include <kern/thread.h>
+#include <kern/turnstile.h>
+#include <kern/turnstile_types.h>
+
+/*
+ * Locking keys :
+ * (b) bucket
+ */
+struct turnstile_bucket {
+ struct spinlock lock;
+ struct list list; /* (b) */
+} __aligned(CPU_L1_SIZE);
+
+/*
+ * Adding/removing waiters to/from a turnstile are performed while
+ * holding both the waiter's thread data and the turnstile locks.
+ *
+ * Changing the owner of a turnstile and linking/unlinking the turnstile
+ * into/from the owner's list of owned turnstiles are done atomically,
+ * while holding both the owner's thread data and the turnstile locks.
+ *
+ * Locking keys :
+ * (b) bucket
+ * (t) turnstile_td
+ *
+ * (*) A turnstile is referenced in thread data after/before being
+ * added/removed to/from its bucket. Referencing a turnstile in
+ * thread data requires holding the thread data lock. This implies
+ * that, while holding the thread data lock, if the referenced
+ * turnstile isn't NULL, the bucket pointer is also not NULL and
+ * stable.
+ */
+struct turnstile {
+ struct turnstile_bucket *bucket; /* (b*) */
+ struct list node; /* (b) */
+ const void *sync_obj; /* (b) */
+ struct plist waiters; /* (b,t) */
+ struct turnstile *next_free; /* (b) */
+ struct turnstile_waiter *top_waiter; /* (b,t) */
+ struct thread *owner; /* (b,t) */
+ struct plist_node td_node; /* (t) */
+};
+
+/*
+ * Locking keys :
+ * (b) bucket
+ * (t) turnstile_td
+ */
+struct turnstile_waiter {
+ struct plist_node node; /* (b,t) */
+ struct thread *thread; /* (b,t) */
+ bool awaken; /* (b) */
+};
+
+#define TURNSTILE_HTABLE_SIZE 128
+
+#if !ISP2(TURNSTILE_HTABLE_SIZE)
+#error hash table size must be a power of two
+#endif /* !ISP2(TURNSTILE_HTABLE_SIZE) */
+
+#define TURNSTILE_HTABLE_MASK (TURNSTILE_HTABLE_SIZE - 1)
+
+static struct turnstile_bucket turnstile_htable[TURNSTILE_HTABLE_SIZE];
+
+static struct kmem_cache turnstile_cache;
+
+static uintptr_t
+turnstile_hash(const void *addr)
+{
+ return ((uintptr_t)addr >> 8) ^ (uintptr_t)addr;
+}
+
+static void
+turnstile_waiter_init(struct turnstile_waiter *waiter, struct thread *thread)
+{
+ plist_node_init(&waiter->node, thread_real_global_priority(thread));
+ waiter->thread = thread;
+ waiter->awaken = false;
+}
+
+static unsigned int
+turnstile_waiter_priority(const struct turnstile_waiter *waiter)
+{
+ return plist_node_priority(&waiter->node);
+}
+
+static bool
+turnstile_waiter_awaken(const struct turnstile_waiter *waiter)
+{
+ return waiter->awaken;
+}
+
+static void
+turnstile_waiter_set_awaken(struct turnstile_waiter *waiter)
+{
+ waiter->awaken = true;
+}
+
+static void
+turnstile_waiter_clear_awaken(struct turnstile_waiter *waiter)
+{
+ waiter->awaken = false;
+}
+
+static void
+turnstile_waiter_wakeup(struct turnstile_waiter *waiter)
+{
+ if (turnstile_waiter_awaken(waiter)) {
+ return;
+ }
+
+ thread_wakeup(waiter->thread);
+ turnstile_waiter_set_awaken(waiter);
+}
+
+static void
+turnstile_update_top_waiter(struct turnstile *turnstile)
+{
+ if (turnstile_empty(turnstile)) {
+ turnstile->top_waiter = NULL;
+ return;
+ }
+
+ turnstile->top_waiter = plist_last_entry(&turnstile->waiters,
+ struct turnstile_waiter, node);
+}
+
+static void
+turnstile_add_waiter(struct turnstile *turnstile,
+ struct turnstile_waiter *waiter)
+{
+ assert(!turnstile_waiter_awaken(waiter));
+ plist_add(&turnstile->waiters, &waiter->node);
+ turnstile_update_top_waiter(turnstile);
+}
+
+static void
+turnstile_remove_waiter(struct turnstile *turnstile,
+ struct turnstile_waiter *waiter)
+{
+ assert(turnstile_waiter_awaken(waiter));
+ plist_remove(&turnstile->waiters, &waiter->node);
+ turnstile_update_top_waiter(turnstile);
+}
+
+static void
+turnstile_waiter_requeue(struct turnstile *turnstile,
+ struct turnstile_waiter *waiter)
+{
+ unsigned int global_priority;
+
+ global_priority = thread_real_global_priority(waiter->thread);
+ assert(global_priority != plist_node_priority(&waiter->node));
+
+ plist_remove(&turnstile->waiters, &waiter->node);
+ plist_node_set_priority(&waiter->node, global_priority);
+ plist_add(&turnstile->waiters, &waiter->node);
+ turnstile_update_top_waiter(turnstile);
+}
+
+static void
+turnstile_td_set_waiter(struct turnstile_td *td,
+ struct turnstile_waiter *waiter)
+{
+ td->waiter = waiter;
+}
+
+static struct turnstile_waiter *
+turnstile_td_get_waiter(const struct turnstile_td *td)
+{
+ return td->waiter;
+}
+
+static void
+turnstile_td_update_top_priority(struct turnstile_td *td)
+{
+ struct turnstile_waiter *top_waiter;
+ struct turnstile *top_turnstile;
+
+ if (plist_empty(&td->owned_turnstiles)) {
+ td->top_global_priority = 0;
+ return;
+ }
+
+ top_turnstile = plist_last_entry(&td->owned_turnstiles,
+ struct turnstile, td_node);
+ top_waiter = top_turnstile->top_waiter;
+
+ if (top_waiter == NULL) {
+ td->top_global_priority = 0;
+ } else {
+ td->top_global_priority = turnstile_waiter_priority(top_waiter);
+ td->top_sched_policy = thread_real_sched_policy(top_waiter->thread);
+ td->top_priority = thread_real_priority(top_waiter->thread);
+ }
+}
+
+static void
+turnstile_td_own(struct turnstile_td *td, struct turnstile *turnstile)
+{
+ struct turnstile_waiter *top_waiter;
+ unsigned int top_priority;
+
+ assert(turnstile->owner == NULL);
+
+ top_waiter = turnstile->top_waiter;
+ assert(top_waiter != NULL);
+ top_priority = thread_real_global_priority(top_waiter->thread);
+ plist_node_init(&turnstile->td_node, top_priority);
+ plist_add(&td->owned_turnstiles, &turnstile->td_node);
+ turnstile_td_update_top_priority(td);
+
+ turnstile->owner = structof(td, struct thread, turnstile_td);
+}
+
+static void
+turnstile_td_disown(struct turnstile_td *td, struct turnstile *turnstile)
+{
+ assert(turnstile->owner == structof(td, struct thread, turnstile_td));
+
+ assert(!plist_node_unlinked(&turnstile->td_node));
+ plist_remove(&td->owned_turnstiles, &turnstile->td_node);
+ turnstile_td_update_top_priority(td);
+
+ turnstile->owner = NULL;
+}
+
+static void
+turnstile_td_reown(struct turnstile_td *td, struct turnstile *turnstile)
+{
+ assert(turnstile->owner == structof(td, struct thread, turnstile_td));
+ assert(!plist_node_unlinked(&turnstile->td_node));
+ plist_remove(&td->owned_turnstiles, &turnstile->td_node);
+ turnstile->owner = NULL;
+ turnstile_td_own(td, turnstile);
+}
+
+/*
+ * The caller must hold the turnstile thread data lock and no turnstile
+ * locks when calling this function. The thread data are unlocked on return.
+ *
+ * In addition, this function drops a reference on the thread associated
+ * with the given thread data.
+ */
+static void
+turnstile_td_propagate_priority_loop(struct turnstile_td *td)
+{
+ unsigned int user_priority, real_priority, top_priority;
+ struct turnstile_waiter *waiter;
+ struct turnstile *turnstile;
+ struct thread *thread;
+ unsigned short priority;
+ unsigned char policy;
+ int error;
+
+ /*
+ * At the very least, this function must make sure that :
+ * - the given thread has its intended priority, which is the
+ * highest among its own and all the waiters in the turnstiles
+ * it owns, and
+ * - the thread is at its intended position in the turnstile it's
+ * waiting on, if any.
+ */
+
+ for (;;) {
+ thread = structof(td, struct thread, turnstile_td);
+ user_priority = thread_user_global_priority(thread);
+ real_priority = thread_real_global_priority(thread);
+ top_priority = td->top_global_priority;
+
+ if (top_priority > user_priority) {
+ policy = td->top_sched_policy;
+ priority = td->top_priority;
+ } else {
+ top_priority = user_priority;
+ policy = thread_user_sched_policy(thread);
+ priority = thread_user_priority(thread);
+ }
+
+ if (top_priority != real_priority) {
+ thread_pi_setscheduler(thread, policy, priority);
+ }
+
+ waiter = turnstile_td_get_waiter(td);
+
+ if ((waiter == NULL)
+ || (top_priority == turnstile_waiter_priority(waiter))) {
+ spinlock_unlock(&td->lock);
+ thread_unref(thread);
+ return;
+ }
+
+ turnstile = turnstile_td_get_turnstile(td);
+ assert(turnstile != NULL);
+
+ error = spinlock_trylock(&turnstile->bucket->lock);
+
+ if (error) {
+ spinlock_unlock(&td->lock);
+ spinlock_lock(&td->lock);
+ continue;
+ }
+
+ /*
+ * This couldn't be done while changing the thread's priority
+ * because of locking restrictions. Do it now.
+ */
+ turnstile_waiter_requeue(turnstile, waiter);
+
+ spinlock_unlock(&td->lock);
+ thread_unref(thread);
+
+ thread = turnstile->owner;
+
+ if (thread == NULL) {
+ break;
+ }
+
+ td = thread_turnstile_td(thread);
+
+ thread_ref(thread);
+ spinlock_lock(&td->lock);
+
+ turnstile_td_reown(td, turnstile);
+
+ spinlock_unlock(&turnstile->bucket->lock);
+ }
+
+ spinlock_unlock(&turnstile->bucket->lock);
+}
+
+void
+turnstile_td_propagate_priority(struct turnstile_td *td)
+{
+ struct thread *thread;
+
+ thread = structof(td, struct thread, turnstile_td);
+
+ thread_ref(thread);
+ spinlock_lock(&td->lock);
+ turnstile_td_propagate_priority_loop(td);
+}
+
+static void
+turnstile_assert_init_state(const struct turnstile *turnstile)
+{
+ assert(turnstile->bucket == NULL);
+ assert(turnstile->sync_obj == NULL);
+ assert(plist_empty(&turnstile->waiters));
+ assert(turnstile->next_free == NULL);
+ assert(turnstile->top_waiter == NULL);
+ assert(turnstile->owner == NULL);
+}
+
+static void
+turnstile_use(struct turnstile *turnstile, const void *sync_obj)
+{
+ assert(turnstile->sync_obj == NULL);
+ turnstile->sync_obj = sync_obj;
+}
+
+static void
+turnstile_unuse(struct turnstile *turnstile)
+{
+ assert(turnstile->sync_obj != NULL);
+ turnstile->sync_obj = NULL;
+}
+
+static bool
+turnstile_in_use(const struct turnstile *turnstile)
+{
+ return turnstile->sync_obj != NULL;
+}
+
+static bool
+turnstile_in_use_by(const struct turnstile *turnstile, const void *sync_obj)
+{
+ return turnstile->sync_obj == sync_obj;
+}
+
+static void
+turnstile_bucket_init(struct turnstile_bucket *bucket)
+{
+ spinlock_init(&bucket->lock);
+ list_init(&bucket->list);
+}
+
+static struct turnstile_bucket *
+turnstile_bucket_get(const void *sync_obj)
+{
+ uintptr_t index;
+
+ index = turnstile_hash(sync_obj) & TURNSTILE_HTABLE_MASK;
+ assert(index < ARRAY_SIZE(turnstile_htable));
+ return &turnstile_htable[index];
+}
+
+static void
+turnstile_bucket_add(struct turnstile_bucket *bucket,
+ struct turnstile *turnstile)
+{
+ assert(turnstile->bucket == NULL);
+ turnstile->bucket = bucket;
+ list_insert_tail(&bucket->list, &turnstile->node);
+}
+
+static void
+turnstile_bucket_remove(struct turnstile_bucket *bucket,
+ struct turnstile *turnstile)
+{
+ assert(turnstile->bucket == bucket);
+ turnstile->bucket = NULL;
+ list_remove(&turnstile->node);
+}
+
+static struct turnstile *
+turnstile_bucket_lookup(const struct turnstile_bucket *bucket,
+ const void *sync_obj)
+{
+ struct turnstile *turnstile;
+
+ list_for_each_entry(&bucket->list, turnstile, node) {
+ if (turnstile_in_use_by(turnstile, sync_obj)) {
+ return turnstile;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+turnstile_ctor(void *ptr)
+{
+ struct turnstile *turnstile;
+
+ turnstile = ptr;
+ turnstile->bucket = NULL;
+ turnstile->sync_obj = NULL;
+ plist_init(&turnstile->waiters);
+ turnstile->next_free = NULL;
+ turnstile->top_waiter = NULL;
+ turnstile->owner = NULL;
+}
+
+void __init
+turnstile_setup(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(turnstile_htable); i++) {
+ turnstile_bucket_init(&turnstile_htable[i]);
+ }
+
+ kmem_cache_init(&turnstile_cache, "turnstile", sizeof(struct turnstile),
+ CPU_L1_SIZE, turnstile_ctor, 0);
+}
+
+struct turnstile *
+turnstile_create(void)
+{
+ struct turnstile *turnstile;
+
+ turnstile = kmem_cache_alloc(&turnstile_cache);
+
+ if (turnstile == NULL) {
+ return NULL;
+ }
+
+ turnstile_assert_init_state(turnstile);
+ return turnstile;
+}
+
+void
+turnstile_destroy(struct turnstile *turnstile)
+{
+ turnstile_assert_init_state(turnstile);
+ kmem_cache_free(&turnstile_cache, turnstile);
+}
+
+struct turnstile *
+turnstile_acquire(const void *sync_obj)
+{
+ struct turnstile_bucket *bucket;
+ struct turnstile *turnstile;
+
+ assert(sync_obj != NULL);
+
+ bucket = turnstile_bucket_get(sync_obj);
+
+ spinlock_lock(&bucket->lock);
+
+ turnstile = turnstile_bucket_lookup(bucket, sync_obj);
+
+ if (turnstile == NULL) {
+ spinlock_unlock(&bucket->lock);
+ return NULL;
+ }
+
+ return turnstile;
+}
+
+void
+turnstile_release(struct turnstile *turnstile)
+{
+ spinlock_unlock(&turnstile->bucket->lock);
+}
+
+static void
+turnstile_push_free(struct turnstile *turnstile,
+ struct turnstile *free_turnstile)
+{
+ assert(free_turnstile->next_free == NULL);
+ free_turnstile->next_free = turnstile->next_free;
+ turnstile->next_free = free_turnstile;
+}
+
+static struct turnstile *
+turnstile_pop_free(struct turnstile *turnstile)
+{
+ struct turnstile *free_turnstile;
+
+ free_turnstile = turnstile->next_free;
+
+ if (free_turnstile == NULL) {
+ return NULL;
+ }
+
+ turnstile->next_free = free_turnstile->next_free;
+ free_turnstile->next_free = NULL;
+ return free_turnstile;
+}
+
+struct turnstile *
+turnstile_lend(const void *sync_obj)
+{
+ struct turnstile_bucket *bucket;
+ struct turnstile *turnstile, *prev;
+ struct turnstile_td *td;
+
+ assert(sync_obj != NULL);
+
+ turnstile = thread_turnstile_lend();
+ turnstile_assert_init_state(turnstile);
+
+ td = thread_turnstile_td(thread_self());
+ bucket = turnstile_bucket_get(sync_obj);
+
+ spinlock_lock(&bucket->lock);
+
+ prev = turnstile_bucket_lookup(bucket, sync_obj);
+
+ if (prev == NULL) {
+ turnstile_use(turnstile, sync_obj);
+ turnstile_bucket_add(bucket, turnstile);
+ } else {
+ turnstile_push_free(prev, turnstile);
+ turnstile = prev;
+ }
+
+ spinlock_lock(&td->lock);
+ turnstile_td_set_turnstile(td, turnstile);
+ spinlock_unlock(&td->lock);
+
+ return turnstile;
+}
+
+void
+turnstile_return(struct turnstile *turnstile)
+{
+ struct turnstile_bucket *bucket;
+ struct turnstile *free_turnstile;
+ struct turnstile_td *td;
+
+ assert(turnstile_in_use(turnstile));
+
+ td = thread_turnstile_td(thread_self());
+
+ spinlock_lock(&td->lock);
+ turnstile_td_set_turnstile(td, NULL);
+ spinlock_unlock(&td->lock);
+
+ bucket = turnstile->bucket;
+ free_turnstile = turnstile_pop_free(turnstile);
+
+ if (free_turnstile == NULL) {
+ turnstile_bucket_remove(bucket, turnstile);
+ turnstile_unuse(turnstile);
+ free_turnstile = turnstile;
+ }
+
+ spinlock_unlock(&bucket->lock);
+
+ turnstile_assert_init_state(free_turnstile);
+ thread_turnstile_return(free_turnstile);
+}
+
+bool
+turnstile_empty(const struct turnstile *turnstile)
+{
+ return plist_empty(&turnstile->waiters);
+}
+
+static void
+turnstile_set_owner(struct turnstile *turnstile, struct thread *owner)
+{
+ struct turnstile_td *td;
+
+ assert(owner != NULL);
+ assert((turnstile->owner == NULL) || (turnstile->owner == owner));
+
+ td = thread_turnstile_td(owner);
+
+ thread_ref(owner);
+ spinlock_lock(&td->lock);
+
+ if (turnstile->owner == NULL) {
+ turnstile_td_own(td, turnstile);
+ }
+
+ spinlock_unlock(&turnstile->bucket->lock);
+
+ turnstile_td_propagate_priority_loop(td);
+
+ spinlock_lock(&turnstile->bucket->lock);
+}
+
+void
+turnstile_wait(struct turnstile *turnstile, const char *wchan,
+ struct thread *owner)
+{
+ struct turnstile_waiter waiter;
+ struct turnstile_td *td;
+ struct thread *thread;
+
+ thread = thread_self();
+ assert(thread != owner);
+
+ td = thread_turnstile_td(thread);
+
+ spinlock_lock(&td->lock);
+ turnstile_waiter_init(&waiter, thread);
+ turnstile_add_waiter(turnstile, &waiter);
+ turnstile_td_set_waiter(td, &waiter);
+ spinlock_unlock(&td->lock);
+
+ if (owner == NULL) {
+ if (turnstile->top_waiter == &waiter) {
+ turnstile_waiter_set_awaken(&waiter);
+ }
+ } else {
+ /* This function temporarily unlocks the turnstile */
+ turnstile_set_owner(turnstile, owner);
+ }
+
+ for (;;) {
+ if (!turnstile_waiter_awaken(&waiter)) {
+ thread_sleep(&turnstile->bucket->lock, turnstile->sync_obj, wchan);
+ }
+
+ /*
+ * The real priority of a thread may change between waking up
+ * and reacquiring the turnstile.
+ */
+ if (turnstile->top_waiter == &waiter) {
+ break;
+ }
+
+ /* Otherwise, make sure the new top waiter is awaken */
+ turnstile_waiter_wakeup(turnstile->top_waiter);
+ turnstile_waiter_clear_awaken(&waiter);
+ }
+
+ spinlock_lock(&td->lock);
+ turnstile_td_set_waiter(td, NULL);
+ turnstile_remove_waiter(turnstile, &waiter);
+ spinlock_unlock(&td->lock);
+}
+
+void
+turnstile_signal(struct turnstile *turnstile)
+{
+ struct turnstile_waiter *waiter;
+
+ if (turnstile_empty(turnstile)) {
+ return;
+ }
+
+ waiter = plist_last_entry(&turnstile->waiters,
+ struct turnstile_waiter, node);
+ turnstile_waiter_wakeup(waiter);
+}
+
+void
+turnstile_own(struct turnstile *turnstile)
+{
+ struct turnstile_td *td;
+ struct thread *owner;
+ unsigned int top_priority;
+
+ assert(turnstile->owner == NULL);
+
+ if (turnstile_empty(turnstile)) {
+ return;
+ }
+
+ owner = thread_self();
+ top_priority = turnstile_waiter_priority(turnstile->top_waiter);
+ assert(thread_real_global_priority(owner) >= top_priority);
+ td = thread_turnstile_td(owner);
+
+ spinlock_lock(&td->lock);
+ turnstile_td_own(td, turnstile);
+ spinlock_unlock(&td->lock);
+}
+
+void
+turnstile_disown(struct turnstile *turnstile)
+{
+ struct turnstile_td *td;
+ struct thread *owner;
+
+ owner = thread_self();
+ assert(turnstile->owner == owner);
+ assert(!turnstile_empty(turnstile));
+
+ td = thread_turnstile_td(owner);
+
+ spinlock_lock(&td->lock);
+ turnstile_td_disown(td, turnstile);
+ spinlock_unlock(&td->lock);
+}
diff --git a/kern/turnstile.h b/kern/turnstile.h
new file mode 100644
index 00000000..21a5a1a9
--- /dev/null
+++ b/kern/turnstile.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Priority propagation capable sleep queues.
+ *
+ * Turnstiles are used to build sleeping synchronization primitives where
+ * ownership applies, such as mutexes. They allow threads with different
+ * priorities to contend on the same synchronization object without
+ * unbounded priority inversion.
+ */
+
+#ifndef _KERN_TURNSTILE_H
+#define _KERN_TURNSTILE_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <kern/plist.h>
+#include <kern/spinlock.h>
+#include <kern/thread.h>
+#include <kern/turnstile_types.h>
+
+struct turnstile;
+
+/*
+ * Turnstile thread data.
+ */
+struct turnstile_td;
+
+#define turnstile_td_assert_lock(td) spinlock_assert_locked(&(td)->lock)
+
+/*
+ * Initialize turnstile thread data.
+ */
+static inline void
+turnstile_td_init(struct turnstile_td *td)
+{
+ spinlock_init(&td->lock);
+ td->turnstile = NULL;
+ td->waiter = NULL;
+ plist_init(&td->owned_turnstiles);
+ td->top_global_priority = 0;
+}
+
+/*
+ * Turnstile thread data locking functions.
+ */
+
+static inline void
+turnstile_td_lock(struct turnstile_td *td)
+{
+ spinlock_lock(&td->lock);
+}
+
+static inline int
+turnstile_td_trylock(struct turnstile_td *td)
+{
+ return spinlock_trylock(&td->lock);
+}
+
+static inline void
+turnstile_td_unlock(struct turnstile_td *td)
+{
+ spinlock_unlock(&td->lock);
+}
+
+/*
+ * Functions managing the turnstile a thread is sleeping in.
+ */
+
+static inline void
+turnstile_td_set_turnstile(struct turnstile_td *td,
+ struct turnstile *turnstile)
+{
+ td->turnstile = turnstile;
+}
+
+static inline struct turnstile *
+turnstile_td_get_turnstile(const struct turnstile_td *td)
+{
+ return td->turnstile;
+}
+
+/*
+ * Propagate priority starting at the thread containing the given thread data.
+ */
+void turnstile_td_propagate_priority(struct turnstile_td *td);
+
+/*
+ * Initialize the turnstile module.
+ */
+void turnstile_setup(void);
+
+/*
+ * Create/destroy a turnstile.
+ */
+struct turnstile * turnstile_create(void);
+void turnstile_destroy(struct turnstile *turnstile);
+
+/*
+ * Acquire/release a turnstile.
+ *
+ * Acquiring a turnstile serializes all access and disables preemption.
+ */
+struct turnstile * turnstile_acquire(const void *sync_obj);
+void turnstile_release(struct turnstile *turnstile);
+
+/*
+ * Lend/return a turnstile.
+ *
+ * A thread lends its private turnstile to the turnstile module in
+ * order to prepare its sleep. The turnstile obtained on lending
+ * is either the thread's turnstile, or an already existing turnstile
+ * for this synchronization object if another thread is waiting.
+ *
+ * When multiple threads are waiting on the same turnstile, the extra
+ * turnstiles lent are kept in an internal free list, used when threads
+ * are awaken to return a turnstile to them.
+ *
+ * Note that the turnstile returned may not be the one lent.
+ *
+ * The turnstile obtained when lending is automatically acquired.
+ */
+struct turnstile * turnstile_lend(const void *sync_obj);
+void turnstile_return(struct turnstile *turnstile);
+
+/*
+ * Return true if the given turnstile has no waiters.
+ *
+ * The turnstile must be acquired when calling this function.
+ */
+bool turnstile_empty(const struct turnstile *turnstile);
+
+/*
+ * Wait for a wake up on the given turnstile.
+ *
+ * The turnstile must be lent when calling this function. It is
+ * released and later reacquired before returning from this function.
+ *
+ * The calling thread is considered a waiter as long as it didn't
+ * reacquire the turnstile. This means that signalling a turnstile
+ * has no visible effect on the number of waiters until the turnstile
+ * is released, e.g. if a single thread is waiting and another signals
+ * the turnstile, the turnstile is not immediately considered empty.
+ *
+ * If owner isn't NULL, it must refer to the thread currently owning
+ * the associated synchronization object. The priority of the caller
+ * is propagated to the chain of turnstiles and owners as necessary
+ * to prevent unbounded priority inversion.
+ */
+void turnstile_wait(struct turnstile *turnstile, const char *wchan,
+ struct thread *owner);
+
+/*
+ * Wake up a thread waiting on the given turnstile, if any.
+ *
+ * The turnstile must be acquired when calling this function.
+ * Since a turnstile must be lent (and in turn is automatically
+ * acquired) when waiting, and acquired in order to signal it,
+ * wake-ups are serialized and cannot be missed.
+ */
+void turnstile_signal(struct turnstile *turnstile);
+
+/*
+ * Own/disown a turnstile.
+ *
+ * The turnstile must be lent when taking ownership, acquired when
+ * releasing it. Owning has no effect on empty turnstiles.
+ * Conversely, an empty turnstile cannot be disowned.
+ *
+ * Ownership must be updated atomically with regard to the ownership
+ * of the associated synchronization object.
+ */
+void turnstile_own(struct turnstile *turnstile);
+void turnstile_disown(struct turnstile *turnstile);
+
+#endif /* _KERN_TURNSTILE_H */
diff --git a/kern/turnstile_types.h b/kern/turnstile_types.h
new file mode 100644
index 00000000..a95b691e
--- /dev/null
+++ b/kern/turnstile_types.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definition used to avoid inclusion circular dependencies.
+ */
+
+#ifndef _KERN_TURNSTILE_TYPES_H
+#define _KERN_TURNSTILE_TYPES_H
+
+#include <kern/plist_types.h>
+#include <kern/spinlock_types.h>
+
+struct turnstile;
+struct turnstile_waiter;
+
+/*
+ * Per-thread turnstile data.
+ *
+ * The turnstile member indicates whether this thread is in a turnstile,
+ * and is only valid if the thread is not running.
+ *
+ * The waiter points to the structure a thread uses to queue itself on
+ * a turnstile. It is used to access a sleeping thread from another
+ * thread, e.g. on wake-ups or priority updates.
+ *
+ * The list of owned turnstiles is used by priority propagation to
+ * determine the top priority among all waiters, which is stored in
+ * the thread data so that turnstiles are quickly unlocked.
+ *
+ * Locking keys :
+ * (b) bucket
+ * (t) turnstile_td
+ */
+struct turnstile_td {
+ struct spinlock lock;
+ struct turnstile *turnstile; /* (t) */
+ struct turnstile_waiter *waiter; /* (b,t) */
+ struct plist owned_turnstiles; /* (t) */
+ unsigned int top_global_priority; /* (t) */
+ unsigned char top_sched_policy; /* (t) */
+ unsigned short top_priority; /* (t) */
+};
+
+#endif /* _KERN_TURNSTILE_TYPES_H */