summaryrefslogtreecommitdiff
path: root/kern
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2017-03-04 15:51:04 +0100
committerRichard Braun <rbraun@sceen.net>2017-03-04 16:47:59 +0100
commitef9426483f2f388c4874c6b12e0800645c3dbce4 (patch)
tree0cc9eb58a28d8cf0bb02c8b38cced1a4f37ac73f /kern
parent6fecd5cef7a2f549b4d81053a3b80365ed7828f5 (diff)
kern/{thread,turnstile}: implement priority inheritance
The new turnstile module provides priority propagation capable sleep queues, tightly coupled with the scheduler, and can be used to implement synchronization facilities with priority inheritance.
Diffstat (limited to 'kern')
-rw-r--r--kern/kernel.c2
-rw-r--r--kern/task.c4
-rw-r--r--kern/thread.c390
-rw-r--r--kern/thread.h141
-rw-r--r--kern/thread_i.h31
-rw-r--r--kern/turnstile.c788
-rw-r--r--kern/turnstile.h191
-rw-r--r--kern/turnstile_types.h58
8 files changed, 1484 insertions, 121 deletions
diff --git a/kern/kernel.c b/kern/kernel.c
index 9b8d4eec..cd3d3e3b 100644
--- a/kern/kernel.c
+++ b/kern/kernel.c
@@ -24,6 +24,7 @@
#include <kern/sref.h>
#include <kern/task.h>
#include <kern/thread.h>
+#include <kern/turnstile.h>
#include <kern/work.h>
#include <kern/xcall.h>
#include <machine/cpu.h>
@@ -43,6 +44,7 @@ kernel_main(void)
xcall_setup();
task_setup();
sleepq_setup();
+ turnstile_setup();
thread_setup();
work_setup();
llsync_setup();
diff --git a/kern/task.c b/kern/task.c
index afc2407a..d7d91c17 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -150,8 +150,8 @@ task_info(struct task *task)
thread_state_to_chr(thread),
thread_wchan_desc(thread),
(unsigned long)thread_wchan_addr(thread),
- thread_sched_class_to_str(thread_sched_class(thread)),
- thread_priority(thread),
+ thread_sched_class_to_str(thread_user_sched_class(thread)),
+ thread_user_priority(thread),
thread->name);
}
diff --git a/kern/thread.c b/kern/thread.c
index 069881c0..3eccebda 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -105,6 +105,7 @@
#include <kern/sref.h>
#include <kern/task.h>
#include <kern/thread.h>
+#include <kern/turnstile.h>
#include <kern/work.h>
#include <machine/atomic.h>
#include <machine/cpu.h>
@@ -261,13 +262,13 @@ struct thread_runq {
* Operations of a scheduling class.
*/
struct thread_sched_ops {
- void (*init_sched)(struct thread *thread, unsigned short priority);
struct thread_runq * (*select_runq)(struct thread *thread);
void (*add)(struct thread_runq *runq, struct thread *thread);
void (*remove)(struct thread_runq *runq, struct thread *thread);
void (*put_prev)(struct thread_runq *runq, struct thread *thread);
struct thread * (*get_next)(struct thread_runq *runq);
- void (*set_priority)(struct thread *thread, unsigned short priority);
+ void (*reset_priority)(struct thread *thread, unsigned short priority);
+ void (*update_priority)(struct thread *thread, unsigned short priority);
unsigned int (*get_global_priority)(unsigned short priority);
void (*set_next)(struct thread_runq *runq, struct thread *thread);
void (*tick)(struct thread_runq *runq, struct thread *thread);
@@ -343,6 +344,13 @@ struct thread_zombie {
struct thread *thread;
};
+static unsigned char
+thread_policy_to_class(unsigned char policy)
+{
+ assert(policy < ARRAY_SIZE(thread_policy_table));
+ return thread_policy_table[policy];
+}
+
static void
thread_set_wchan(struct thread *thread, const void *wchan_addr,
const char *wchan_desc)
@@ -358,15 +366,24 @@ thread_clear_wchan(struct thread *thread)
}
static const struct thread_sched_ops *
-thread_get_sched_ops(const struct thread *thread)
+thread_get_sched_ops(unsigned char sched_class)
{
- unsigned char sched_class;
-
- sched_class = thread_sched_class(thread);
assert(sched_class < ARRAY_SIZE(thread_sched_ops));
return &thread_sched_ops[sched_class];
}
+static const struct thread_sched_ops *
+thread_get_user_sched_ops(const struct thread *thread)
+{
+ return thread_get_sched_ops(thread_user_sched_class(thread));
+}
+
+static const struct thread_sched_ops *
+thread_get_real_sched_ops(const struct thread *thread)
+{
+ return thread_get_sched_ops(thread_real_sched_class(thread));
+}
+
static void __init
thread_runq_init_rt(struct thread_runq *runq)
{
@@ -458,7 +475,7 @@ thread_runq_add(struct thread_runq *runq, struct thread *thread)
spinlock_assert_locked(&runq->lock);
assert(!thread->in_runq);
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
ops->add(runq, thread);
if (runq->nr_threads == 0) {
@@ -467,7 +484,8 @@ thread_runq_add(struct thread_runq *runq, struct thread *thread)
runq->nr_threads++;
- if (thread_sched_class(thread) < thread_sched_class(runq->current)) {
+ if (thread_real_sched_class(thread)
+ < thread_real_sched_class(runq->current)) {
thread_set_flag(runq->current, THREAD_YIELD);
}
@@ -490,7 +508,7 @@ thread_runq_remove(struct thread_runq *runq, struct thread *thread)
cpumap_set_atomic(&thread_idle_runqs, thread_runq_cpu(runq));
}
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
ops->remove(runq, thread);
thread->in_runq = false;
@@ -504,7 +522,7 @@ thread_runq_put_prev(struct thread_runq *runq, struct thread *thread)
assert(!cpu_intr_enabled());
spinlock_assert_locked(&runq->lock);
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
if (ops->put_prev != NULL) {
ops->put_prev(runq, thread);
@@ -538,7 +556,7 @@ thread_runq_set_next(struct thread_runq *runq, struct thread *thread)
{
const struct thread_sched_ops *ops;
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
if (ops->set_next != NULL) {
ops->set_next(runq, thread);
@@ -660,13 +678,6 @@ thread_runq_double_lock(struct thread_runq *a, struct thread_runq *b)
}
}
-static void
-thread_sched_rt_init_sched(struct thread *thread, unsigned short priority)
-{
- assert(priority <= THREAD_SCHED_RT_PRIO_MAX);
- thread->rt_data.time_slice = THREAD_DEFAULT_RR_TIME_SLICE;
-}
-
static struct thread_runq *
thread_sched_rt_select_runq(struct thread *thread)
{
@@ -693,15 +704,17 @@ thread_sched_rt_add(struct thread_runq *runq, struct thread *thread)
struct list *threads;
rt_runq = &runq->rt_runq;
- threads = &rt_runq->threads[thread_priority(thread)];
+ threads = &rt_runq->threads[thread_real_priority(thread)];
list_insert_tail(threads, &thread->rt_data.node);
if (list_singular(threads)) {
- rt_runq->bitmap |= (1ULL << thread_priority(thread));
+ rt_runq->bitmap |= (1ULL << thread_real_priority(thread));
}
- if ((thread_sched_class(thread) == thread_sched_class(runq->current))
- && (thread_priority(thread) > thread_priority(runq->current))) {
+ if ((thread_real_sched_class(thread)
+ == thread_real_sched_class(runq->current))
+ && (thread_real_priority(thread)
+ > thread_real_priority(runq->current))) {
thread_set_flag(runq->current, THREAD_YIELD);
}
}
@@ -713,11 +726,11 @@ thread_sched_rt_remove(struct thread_runq *runq, struct thread *thread)
struct list *threads;
rt_runq = &runq->rt_runq;
- threads = &rt_runq->threads[thread_priority(thread)];
+ threads = &rt_runq->threads[thread_real_priority(thread)];
list_remove(&thread->rt_data.node);
if (list_empty(threads)) {
- rt_runq->bitmap &= ~(1ULL << thread_priority(thread));
+ rt_runq->bitmap &= ~(1ULL << thread_real_priority(thread));
}
}
@@ -749,6 +762,13 @@ thread_sched_rt_get_next(struct thread_runq *runq)
return thread;
}
+static void
+thread_sched_rt_reset_priority(struct thread *thread, unsigned short priority)
+{
+ assert(priority <= THREAD_SCHED_RT_PRIO_MAX);
+ thread->rt_data.time_slice = THREAD_DEFAULT_RR_TIME_SLICE;
+}
+
static unsigned int
thread_sched_rt_get_global_priority(unsigned short priority)
{
@@ -766,7 +786,7 @@ thread_sched_rt_tick(struct thread_runq *runq, struct thread *thread)
{
(void)runq;
- if (thread_sched_policy(thread) != THREAD_SCHED_POLICY_RR) {
+ if (thread_real_sched_policy(thread) != THREAD_SCHED_POLICY_RR) {
return;
}
@@ -786,16 +806,6 @@ thread_sched_fs_prio2weight(unsigned short priority)
return ((priority + 1) * THREAD_FS_ROUND_SLICE_BASE);
}
-static void
-thread_sched_fs_init_sched(struct thread *thread, unsigned short priority)
-{
- assert(priority <= THREAD_SCHED_FS_PRIO_MAX);
- thread->fs_data.fs_runq = NULL;
- thread->fs_data.round = 0;
- thread->fs_data.weight = thread_sched_fs_prio2weight(priority);
- thread->fs_data.work = 0;
-}
-
static struct thread_runq *
thread_sched_fs_select_runq(struct thread *thread)
{
@@ -897,7 +907,7 @@ thread_sched_fs_enqueue(struct thread_fs_runq *fs_runq, unsigned long round,
assert(thread->fs_data.fs_runq == NULL);
assert(thread->fs_data.work <= thread->fs_data.weight);
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
group_weight = group->weight + thread->fs_data.weight;
total_weight = fs_runq->weight + thread->fs_data.weight;
node = (group->weight == 0)
@@ -973,7 +983,7 @@ thread_sched_fs_restart(struct thread_runq *runq)
assert(node != NULL);
fs_runq->current = list_entry(node, struct thread_fs_group, node);
- if (thread_sched_class(runq->current) == THREAD_SCHED_CLASS_FS) {
+ if (thread_real_sched_class(runq->current) == THREAD_SCHED_CLASS_FS) {
thread_set_flag(runq->current, THREAD_YIELD);
}
}
@@ -1009,7 +1019,7 @@ thread_sched_fs_dequeue(struct thread *thread)
assert(thread->fs_data.fs_runq != NULL);
fs_runq = thread->fs_data.fs_runq;
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
thread->fs_data.fs_runq = NULL;
list_remove(&thread->fs_data.runq_node);
@@ -1084,7 +1094,7 @@ thread_sched_fs_put_prev(struct thread_runq *runq, struct thread *thread)
struct thread_fs_group *group;
fs_runq = runq->fs_runq_active;
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
list_insert_tail(&group->threads, &thread->fs_data.group_node);
if (thread->fs_data.work >= thread->fs_data.weight) {
@@ -1152,8 +1162,19 @@ thread_sched_fs_get_next(struct thread_runq *runq)
}
static void
-thread_sched_fs_set_priority(struct thread *thread, unsigned short priority)
+thread_sched_fs_reset_priority(struct thread *thread, unsigned short priority)
+{
+ assert(priority <= THREAD_SCHED_FS_PRIO_MAX);
+ thread->fs_data.fs_runq = NULL;
+ thread->fs_data.round = 0;
+ thread->fs_data.weight = thread_sched_fs_prio2weight(priority);
+ thread->fs_data.work = 0;
+}
+
+static void
+thread_sched_fs_update_priority(struct thread *thread, unsigned short priority)
{
+ assert(priority <= THREAD_SCHED_FS_PRIO_MAX);
thread->fs_data.weight = thread_sched_fs_prio2weight(priority);
if (thread->fs_data.work >= thread->fs_data.weight) {
@@ -1184,7 +1205,7 @@ thread_sched_fs_tick(struct thread_runq *runq, struct thread *thread)
fs_runq = runq->fs_runq_active;
fs_runq->work++;
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
group->work++;
thread_set_flag(thread, THREAD_YIELD);
thread->fs_data.work++;
@@ -1235,7 +1256,8 @@ thread_sched_fs_balance_eligible(struct thread_runq *runq,
if ((nr_threads == 0)
|| ((nr_threads == 1)
- && (thread_sched_class(runq->current) == THREAD_SCHED_CLASS_FS))) {
+ && (thread_real_sched_class(runq->current)
+ == THREAD_SCHED_CLASS_FS))) {
return 0;
}
@@ -1525,39 +1547,40 @@ thread_sched_idle_get_global_priority(unsigned short priority)
return THREAD_SCHED_GLOBAL_PRIO_IDLE;
}
-static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES] = {
+static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES]
+ = {
[THREAD_SCHED_CLASS_RT] = {
- .init_sched = thread_sched_rt_init_sched,
.select_runq = thread_sched_rt_select_runq,
.add = thread_sched_rt_add,
.remove = thread_sched_rt_remove,
.put_prev = thread_sched_rt_put_prev,
.get_next = thread_sched_rt_get_next,
- .set_priority = NULL,
+ .reset_priority = thread_sched_rt_reset_priority,
+ .update_priority = NULL,
.get_global_priority = thread_sched_rt_get_global_priority,
.set_next = thread_sched_rt_set_next,
.tick = thread_sched_rt_tick,
},
[THREAD_SCHED_CLASS_FS] = {
- .init_sched = thread_sched_fs_init_sched,
.select_runq = thread_sched_fs_select_runq,
.add = thread_sched_fs_add,
.remove = thread_sched_fs_remove,
.put_prev = thread_sched_fs_put_prev,
.get_next = thread_sched_fs_get_next,
- .set_priority = thread_sched_fs_set_priority,
+ .reset_priority = thread_sched_fs_reset_priority,
+ .update_priority = thread_sched_fs_update_priority,
.get_global_priority = thread_sched_fs_get_global_priority,
.set_next = thread_sched_fs_set_next,
.tick = thread_sched_fs_tick,
},
[THREAD_SCHED_CLASS_IDLE] = {
- .init_sched = NULL,
.select_runq = thread_sched_idle_select_runq,
.add = thread_sched_idle_add,
.remove = thread_sched_idle_remove,
.put_prev = NULL,
.get_next = thread_sched_idle_get_next,
- .set_priority = NULL,
+ .reset_priority = NULL,
+ .update_priority = NULL,
.get_global_priority = thread_sched_idle_get_global_priority,
.set_next = NULL,
.tick = NULL,
@@ -1565,30 +1588,95 @@ static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES] =
};
static void
-thread_set_sched_policy(struct thread *thread, unsigned char sched_policy)
+thread_set_user_sched_policy(struct thread *thread, unsigned char sched_policy)
+{
+ thread->user_sched_data.sched_policy = sched_policy;
+}
+
+static void
+thread_set_user_sched_class(struct thread *thread, unsigned char sched_class)
+{
+ thread->user_sched_data.sched_class = sched_class;
+}
+
+static void
+thread_set_user_priority(struct thread *thread, unsigned short priority)
+{
+ const struct thread_sched_ops *ops;
+
+ ops = thread_get_user_sched_ops(thread);
+
+ thread->user_sched_data.priority = priority;
+ thread->user_sched_data.global_priority
+ = ops->get_global_priority(priority);
+}
+
+static void
+thread_update_user_priority(struct thread *thread, unsigned short priority)
+{
+ thread_set_user_priority(thread, priority);
+}
+
+static void
+thread_set_real_sched_policy(struct thread *thread, unsigned char sched_policy)
{
- thread->sched_data.sched_policy = sched_policy;
+ thread->real_sched_data.sched_policy = sched_policy;
}
static void
-thread_set_sched_class(struct thread *thread, unsigned char sched_class)
+thread_set_real_sched_class(struct thread *thread, unsigned char sched_class)
{
- thread->sched_data.sched_class = sched_class;
+ thread->real_sched_data.sched_class = sched_class;
}
static void
-thread_set_priority(struct thread *thread, unsigned short priority)
+thread_set_real_priority(struct thread *thread, unsigned short priority)
{
const struct thread_sched_ops *ops;
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
- if (ops->set_priority != NULL) {
- ops->set_priority(thread, priority);
+ thread->real_sched_data.priority = priority;
+ thread->real_sched_data.global_priority
+ = ops->get_global_priority(priority);
+
+ if (ops->reset_priority != NULL) {
+ ops->reset_priority(thread, priority);
+ }
+}
+
+static void
+thread_update_real_priority(struct thread *thread, unsigned short priority)
+{
+ const struct thread_sched_ops *ops;
+
+ ops = thread_get_real_sched_ops(thread);
+
+ thread->real_sched_data.priority = priority;
+ thread->real_sched_data.global_priority
+ = ops->get_global_priority(priority);
+
+ if (ops->update_priority != NULL) {
+ ops->update_priority(thread, priority);
}
+}
+
+static void
+thread_reset_real_priority(struct thread *thread)
+{
+ const struct thread_sched_ops *ops;
+ struct thread_sched_data *user, *real;
- thread->sched_data.priority = priority;
- thread->sched_data.global_priority = ops->get_global_priority(priority);
+ user = &thread->user_sched_data;
+ real = &thread->real_sched_data;
+ *real = *user;
+ thread->boosted = false;
+
+ ops = thread_get_user_sched_ops(thread);
+
+ if (ops->reset_priority != NULL) {
+ ops->reset_priority(thread, real->priority);
+ }
}
static void __init
@@ -1604,9 +1692,10 @@ thread_bootstrap_common(unsigned int cpu)
booter->flags = 0;
booter->preempt = 1;
cpumap_fill(&booter->cpumap);
- thread_set_sched_policy(booter, THREAD_SCHED_POLICY_IDLE);
- thread_set_sched_class(booter, THREAD_SCHED_CLASS_IDLE);
- thread_set_priority(booter, 0);
+ thread_set_user_sched_policy(booter, THREAD_SCHED_POLICY_IDLE);
+ thread_set_user_sched_class(booter, THREAD_SCHED_CLASS_IDLE);
+ thread_set_user_priority(booter, 0);
+ thread_reset_real_priority(booter);
memset(booter->tsd, 0, sizeof(booter->tsd));
booter->task = kernel_task;
thread_runq_init(percpu_ptr(thread_runq, cpu), cpu, booter);
@@ -1620,8 +1709,8 @@ thread_bootstrap(void)
thread_fs_highest_round = THREAD_FS_INITIAL_ROUND;
- thread_bootstrap_common(0);
tcb_set_current(&thread_booters[0].tcb);
+ thread_bootstrap_common(0);
}
void __init
@@ -1677,23 +1766,9 @@ thread_destroy_tsd(struct thread *thread)
}
}
-static void
-thread_init_sched(struct thread *thread, unsigned short priority)
-{
- const struct thread_sched_ops *ops;
-
- ops = thread_get_sched_ops(thread);
-
- if (ops->init_sched != NULL) {
- ops->init_sched(thread, priority);
- }
-
- thread->sched_data.priority = priority;
- thread->sched_data.global_priority = ops->get_global_priority(priority);
-}
-
static int
-thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
+thread_init(struct thread *thread, void *stack,
+ const struct thread_attr *attr,
void (*fn)(void *), void *arg)
{
struct thread *caller;
@@ -1720,13 +1795,23 @@ thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
goto error_sleepq;
}
+ thread->priv_turnstile = turnstile_create();
+
+ if (thread->priv_turnstile == NULL) {
+ error = ERROR_NOMEM;
+ goto error_turnstile;
+ }
+
+ turnstile_td_init(&thread->turnstile_td);
+ thread->propagate_priority = false;
thread->preempt = THREAD_SUSPEND_PREEMPT_LEVEL;
thread->pinned = 0;
thread->llsync_read = 0;
cpumap_copy(&thread->cpumap, cpumap);
- thread_set_sched_policy(thread, attr->policy);
- thread_set_sched_class(thread, thread_policy_table[attr->policy]);
- thread_init_sched(thread, attr->priority);
+ thread_set_user_sched_policy(thread, attr->policy);
+ thread_set_user_sched_class(thread, thread_policy_to_class(attr->policy));
+ thread_set_user_priority(thread, attr->priority);
+ thread_reset_real_priority(thread);
memset(thread->tsd, 0, sizeof(thread->tsd));
mutex_init(&thread->join_lock);
condition_init(&thread->join_cond);
@@ -1753,6 +1838,8 @@ thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
error_tcb:
thread_destroy_tsd(thread);
+ turnstile_destroy(thread->priv_turnstile);
+error_turnstile:
sleepq_destroy(thread->priv_sleepq);
error_sleepq:
return error;
@@ -1799,6 +1886,7 @@ thread_destroy(struct thread *thread)
thread_destroy_tsd(thread);
task_remove_thread(thread->task, thread);
+ turnstile_destroy(thread->priv_turnstile);
sleepq_destroy(thread->priv_sleepq);
kmem_cache_free(&thread_stack_cache, thread->stack);
kmem_cache_free(&thread_cache, thread);
@@ -2224,7 +2312,7 @@ thread_wakeup(struct thread *thread)
thread->state = THREAD_RUNNING;
} else {
/*
- * If another wakeup was attempted right before this one, the thread
+ * If another wake-up was attempted right before this one, the thread
* may currently be pushed on a remote run queue, and the run queue
* being locked here is actually the previous one. The run queue
* pointer may be modified concurrently, now being protected by the
@@ -2249,7 +2337,7 @@ thread_wakeup(struct thread *thread)
cpu_intr_save(&flags);
if (!thread->pinned) {
- runq = thread_sched_ops[thread_sched_class(thread)].select_runq(thread);
+ runq = thread_get_real_sched_ops(thread)->select_runq(thread);
} else {
runq = thread->runq;
spinlock_lock(&runq->lock);
@@ -2341,7 +2429,7 @@ thread_tick_intr(void)
thread_balance_idle_tick(runq);
}
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
if (ops->tick != NULL) {
ops->tick(runq, thread);
@@ -2350,6 +2438,7 @@ thread_tick_intr(void)
spinlock_unlock(&runq->lock);
}
+/* TODO Move outside */
char
thread_state_to_chr(const struct thread *thread)
{
@@ -2365,10 +2454,11 @@ thread_state_to_chr(const struct thread *thread)
}
}
+/* TODO Move outside */
const char *
-thread_sched_class_to_str(unsigned char sched_policy)
+thread_sched_class_to_str(unsigned char sched_class)
{
- switch (sched_policy) {
+ switch (sched_class) {
case THREAD_SCHED_CLASS_RT:
return "rt";
case THREAD_SCHED_CLASS_FS:
@@ -2385,13 +2475,96 @@ thread_setscheduler(struct thread *thread, unsigned char policy,
unsigned short priority)
{
struct thread_runq *runq;
+ struct turnstile_td *td;
+ unsigned long flags;
+ bool requeue, current, update;
+
+ td = thread_turnstile_td(thread);
+
+ turnstile_td_lock(td);
+ runq = thread_lock_runq(thread, &flags);
+
+ if ((thread_user_sched_policy(thread) == policy)
+ && (thread_user_priority(thread) == priority)) {
+ goto out;
+ }
+
+ requeue = thread->in_runq;
+
+ if (!requeue) {
+ current = false;
+ } else {
+ if (thread != runq->current) {
+ current = false;
+ } else {
+ thread_runq_put_prev(runq, thread);
+ current = true;
+ }
+
+ thread_runq_remove(runq, thread);
+ }
+
+ if (thread_user_sched_policy(thread) == policy) {
+ thread_update_user_priority(thread, priority);
+ update = true;
+ } else {
+ thread_set_user_sched_policy(thread, policy);
+ thread_set_user_sched_class(thread, thread_policy_to_class(policy));
+ thread_set_user_priority(thread, priority);
+ update = false;
+ }
+
+ if (thread->boosted) {
+ if (thread_user_global_priority(thread)
+ >= thread_real_global_priority(thread)) {
+ thread_reset_real_priority(thread);
+ }
+ } else {
+ if (update) {
+ thread_update_real_priority(thread, priority);
+ } else {
+ thread_set_real_sched_policy(thread, policy);
+ thread_set_real_sched_class(thread, thread_policy_to_class(policy));
+ thread_set_real_priority(thread, priority);
+ }
+ }
+
+ if (requeue) {
+ thread_runq_add(runq, thread);
+
+ if (current) {
+ thread_runq_set_next(runq, thread);
+ }
+ }
+
+out:
+ thread_unlock_runq(runq, flags);
+ turnstile_td_unlock(td);
+
+ turnstile_td_propagate_priority(td);
+}
+
+void
+thread_pi_setscheduler(struct thread *thread, unsigned char policy,
+ unsigned short priority)
+{
+ const struct thread_sched_ops *ops;
+ struct thread_runq *runq;
+ struct turnstile_td *td;
+ unsigned int global_priority;
unsigned long flags;
bool requeue, current;
+ td = thread_turnstile_td(thread);
+ turnstile_td_assert_lock(td);
+
+ ops = thread_get_sched_ops(thread_policy_to_class(policy));
+ global_priority = ops->get_global_priority(priority);
+
runq = thread_lock_runq(thread, &flags);
- if ((thread_sched_policy(thread) == policy)
- && (thread_priority(thread) == priority)) {
+ if ((thread_real_sched_policy(thread) == policy)
+ && (thread_real_priority(thread) == priority)) {
goto out;
}
@@ -2410,13 +2583,18 @@ thread_setscheduler(struct thread *thread, unsigned char policy,
thread_runq_remove(runq, thread);
}
- if (thread_sched_policy(thread) == policy) {
- thread_set_priority(thread, priority);
+ if (global_priority <= thread_user_global_priority(thread)) {
+ thread_reset_real_priority(thread);
} else {
- thread_set_sched_policy(thread, policy);
- assert(policy < ARRAY_SIZE(thread_policy_table));
- thread_set_sched_class(thread, thread_policy_table[policy]);
- thread_init_sched(thread, priority);
+ if (thread_real_sched_policy(thread) == policy) {
+ thread_update_real_priority(thread, priority);
+ } else {
+ thread_set_real_sched_policy(thread, policy);
+ thread_set_real_sched_class(thread, thread_policy_to_class(policy));
+ thread_set_real_priority(thread, priority);
+ }
+
+ thread->boosted = true;
}
if (requeue) {
@@ -2432,6 +2610,28 @@ out:
}
void
+thread_propagate_priority(void)
+{
+ struct thread *thread;
+
+ /*
+ * Although it's possible to propagate priority with preemption
+ * disabled, the operation can be too expensive to allow it.
+ */
+ if (!thread_preempt_enabled()) {
+ thread_set_priority_propagation_needed();
+ return;
+ }
+
+ thread = thread_self();
+
+ /* Clear before propagation to avoid infinite recursion */
+ thread->propagate_priority = false;
+
+ turnstile_td_propagate_priority(thread_turnstile_td(thread));
+}
+
+void
thread_key_create(unsigned int *keyp, thread_dtor_fn_t dtor)
{
unsigned int key;
diff --git a/kern/thread.h b/kern/thread.h
index 1542e7ae..15babbfc 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -33,30 +33,23 @@
#ifndef _KERN_THREAD_H
#define _KERN_THREAD_H
+#include <stdbool.h>
+#include <stddef.h>
+
#include <kern/assert.h>
#include <kern/cpumap.h>
#include <kern/macros.h>
+#include <kern/spinlock_types.h>
+#include <kern/turnstile_types.h>
#include <machine/atomic.h>
#include <machine/tcb.h>
/*
- * Forward declaration
- */
-struct spinlock;
-
-/*
* Thread structure.
*/
struct thread;
/*
- * Thread name buffer size.
- */
-#define THREAD_NAME_SIZE 32
-
-/*
- * Common scheduling data.
- *
* The global priority of a thread is meant to be compared against
* another global priority to determine which thread has higher priority.
*/
@@ -67,6 +60,11 @@ struct thread_sched_data {
unsigned int global_priority;
};
+/*
+ * Thread name buffer size.
+ */
+#define THREAD_NAME_SIZE 32
+
#include <kern/thread_i.h>
#define THREAD_KERNEL_PREFIX PACKAGE "_"
@@ -261,6 +259,15 @@ void thread_tick_intr(void);
void thread_setscheduler(struct thread *thread, unsigned char policy,
unsigned short priority);
+/*
+ * Variant used for priority inheritance.
+ *
+ * The caller must hold the turnstile thread data lock and no turnstile
+ * locks when calling this function.
+ */
+void thread_pi_setscheduler(struct thread *thread, unsigned char policy,
+ unsigned short priority);
+
static inline void
thread_ref(struct thread *thread)
{
@@ -301,33 +308,72 @@ thread_wchan_desc(const struct thread *thread)
char thread_state_to_chr(const struct thread *thread);
static inline const struct thread_sched_data *
-thread_get_sched_data(const struct thread *thread)
+thread_get_user_sched_data(const struct thread *thread)
+{
+ return &thread->user_sched_data;
+}
+
+static inline const struct thread_sched_data *
+thread_get_real_sched_data(const struct thread *thread)
+{
+ return &thread->real_sched_data;
+}
+
+/*
+ * If the caller requires the scheduling data to be stable, it
+ * must lock one of the following objects :
+ * - the containing run queue
+ * - the per-thread turnstile data (turnstile_td)
+ *
+ * Both are locked when scheduling data are updated.
+ */
+
+static inline unsigned char
+thread_user_sched_policy(const struct thread *thread)
+{
+ return thread_get_user_sched_data(thread)->sched_policy;
+}
+
+static inline unsigned char
+thread_user_sched_class(const struct thread *thread)
{
- return &thread->sched_data;
+ return thread_get_user_sched_data(thread)->sched_class;
+}
+
+static inline unsigned short
+thread_user_priority(const struct thread *thread)
+{
+ return thread_get_user_sched_data(thread)->priority;
+}
+
+static inline unsigned int
+thread_user_global_priority(const struct thread *thread)
+{
+ return thread_get_user_sched_data(thread)->global_priority;
}
static inline unsigned char
-thread_sched_policy(const struct thread *thread)
+thread_real_sched_policy(const struct thread *thread)
{
- return thread_get_sched_data(thread)->sched_policy;
+ return thread_get_real_sched_data(thread)->sched_policy;
}
static inline unsigned char
-thread_sched_class(const struct thread *thread)
+thread_real_sched_class(const struct thread *thread)
{
- return thread_get_sched_data(thread)->sched_class;
+ return thread_get_real_sched_data(thread)->sched_class;
}
static inline unsigned short
-thread_priority(const struct thread *thread)
+thread_real_priority(const struct thread *thread)
{
- return thread_get_sched_data(thread)->priority;
+ return thread_get_real_sched_data(thread)->priority;
}
static inline unsigned int
-thread_global_priority(const struct thread *thread)
+thread_real_global_priority(const struct thread *thread)
{
- return thread_get_sched_data(thread)->global_priority;
+ return thread_get_real_sched_data(thread)->global_priority;
}
/*
@@ -390,6 +436,53 @@ thread_sleepq_return(struct sleepq *sleepq)
}
/*
+ * Turnstile lending functions.
+ */
+
+static inline struct turnstile *
+thread_turnstile_lend(void)
+{
+ struct turnstile *turnstile;
+
+ turnstile = thread_self()->priv_turnstile;
+ assert(turnstile != NULL);
+ thread_self()->priv_turnstile = NULL;
+ return turnstile;
+}
+
+static inline void
+thread_turnstile_return(struct turnstile *turnstile)
+{
+ assert(turnstile != NULL);
+ assert(thread_self()->priv_turnstile == NULL);
+ thread_self()->priv_turnstile = turnstile;
+}
+
+static inline struct turnstile_td *
+thread_turnstile_td(struct thread *thread)
+{
+ return &thread->turnstile_td;
+}
+
+/*
+ * Priority propagation functions.
+ */
+
+static inline bool
+thread_priority_propagation_needed(void)
+{
+ return thread_self()->propagate_priority;
+}
+
+static inline void
+thread_set_priority_propagation_needed(void)
+{
+ thread_self()->propagate_priority = true;
+}
+
+void thread_propagate_priority(void);
+
+/*
* Migration control functions.
*
* Functions that change the migration state are implicit compiler barriers.
@@ -444,6 +537,10 @@ thread_preempt_enable_no_resched(void)
thread = thread_self();
assert(thread->preempt != 0);
thread->preempt--;
+
+ if (thread_preempt_enabled() && thread_priority_propagation_needed()) {
+ thread_propagate_priority();
+ }
}
static inline void
diff --git a/kern/thread_i.h b/kern/thread_i.h
index 55f96082..865787a0 100644
--- a/kern/thread_i.h
+++ b/kern/thread_i.h
@@ -26,6 +26,7 @@
#include <kern/macros.h>
#include <kern/mutex_types.h>
#include <kern/param.h>
+#include <kern/turnstile_types.h>
#include <machine/atomic.h>
#include <machine/tcb.h>
@@ -103,6 +104,19 @@ struct thread {
/* Sleep queue available for lending */
struct sleepq *priv_sleepq;
+ /* Turnstile available for lending */
+ struct turnstile *priv_turnstile;
+
+ /* Per-thread turnstile data */
+ struct turnstile_td turnstile_td;
+
+ /*
+ * True if priority must be propagated when preemption is reenabled
+ *
+ * This member is thread-local.
+ */
+ bool propagate_priority;
+
/* Thread-local members */
unsigned short preempt;
unsigned short pinned;
@@ -111,8 +125,21 @@ struct thread {
/* Processors on which this thread is allowed to run */
struct cpumap cpumap;
- /* Scheduling data */
- struct thread_sched_data sched_data;
+ /*
+ * Scheduling data.
+ */
+ struct thread_sched_data user_sched_data; /* User-provided */
+ struct thread_sched_data real_sched_data; /* Computed from
+ priority propagation */
+
+ /*
+ * True if the real scheduling data are not the user scheduling data.
+ *
+ * Note that it doesn't provide any information about priority inheritance.
+ * A thread may be part of a priority inheritance chain without its
+ * priority being boosted.
+ */
+ bool boosted;
/* Class specific scheduling data */
union {
diff --git a/kern/turnstile.c b/kern/turnstile.c
new file mode 100644
index 00000000..43629376
--- /dev/null
+++ b/kern/turnstile.c
@@ -0,0 +1,788 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation is based on "Solaris(tm) Internals: Solaris 10 and
+ * OpenSolaris Kernel Architecture, Second Edition" by Richard McDougall
+ * and Jim Mauro. See "Part Six: Platform Specifics, Chapter 17: Locking
+ * and Synchronization, Section 7 Turnstiles and Priority Inheritance".
+ *
+ * The differences are outlined below.
+ *
+ * This implementation doesn't support read/write locks, only mutual
+ * exclusion, because ownership doesn't apply well to read/write locks.
+ *
+ * Instead of using an external sleep queue object, this module implements
+ * that functionality itself. The reasons behind this decision are :
+ * - the use of expensive priority lists used to queue threads, that
+ * a simpler sleep queue implementation shouldn't use
+ * - increased coupling with the scheduler
+ *
+ * Locking order : bucket (turnstile) -> turnstile_td -> thread_runq
+ *
+ * This order allows changing the owner of a turnstile without unlocking it
+ * which is important because a turnstile is normally used to synchronize
+ * access to the owner. Unlocking a turnstile would allow the owner to
+ * change and make complex transient states visible. The drawback is that
+ * a thread cannot be requeued atomically when its priority is changed.
+ * That deferred requeue is done during priority propagation.
+ *
+ * TODO Analyse hash parameters.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <kern/assert.h>
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/list.h>
+#include <kern/macros.h>
+#include <kern/param.h>
+#include <kern/plist.h>
+#include <kern/spinlock.h>
+#include <kern/thread.h>
+#include <kern/turnstile.h>
+#include <kern/turnstile_types.h>
+
+/*
+ * Locking keys :
+ * (b) bucket
+ */
+struct turnstile_bucket {
+ struct spinlock lock;
+ struct list list; /* (b) */
+} __aligned(CPU_L1_SIZE);
+
+/*
+ * Adding/removing waiters to/from a turnstile are performed while
+ * holding both the waiter's thread data and the turnstile locks.
+ *
+ * Changing the owner of a turnstile and linking/unlinking the turnstile
+ * into/from the owner's list of owned turnstiles are done atomically,
+ * while holding both the owner's thread data and the turnstile locks.
+ *
+ * Locking keys :
+ * (b) bucket
+ * (t) turnstile_td
+ *
+ * (*) A turnstile is referenced in thread data after/before being
+ * added/removed to/from its bucket. Referencing a turnstile in
+ * thread data requires holding the thread data lock. This implies
+ * that, while holding the thread data lock, if the referenced
+ * turnstile isn't NULL, the bucket pointer is also not NULL and
+ * stable.
+ */
+struct turnstile {
+ struct turnstile_bucket *bucket; /* (b*) */
+ struct list node; /* (b) */
+ const void *sync_obj; /* (b) */
+ struct plist waiters; /* (b,t) */
+ struct turnstile *next_free; /* (b) */
+ struct turnstile_waiter *top_waiter; /* (b,t) */
+ struct thread *owner; /* (b,t) */
+ struct plist_node td_node; /* (t) */
+};
+
+/*
+ * Locking keys :
+ * (b) bucket
+ * (t) turnstile_td
+ */
+struct turnstile_waiter {
+ struct plist_node node; /* (b,t) */
+ struct thread *thread; /* (b,t) */
+ bool awaken; /* (b) */
+};
+
+#define TURNSTILE_HTABLE_SIZE 128
+
+#if !ISP2(TURNSTILE_HTABLE_SIZE)
+#error hash table size must be a power of two
+#endif /* !ISP2(TURNSTILE_HTABLE_SIZE) */
+
+#define TURNSTILE_HTABLE_MASK (TURNSTILE_HTABLE_SIZE - 1)
+
+static struct turnstile_bucket turnstile_htable[TURNSTILE_HTABLE_SIZE];
+
+static struct kmem_cache turnstile_cache;
+
+static uintptr_t
+turnstile_hash(const void *addr)
+{
+ return ((uintptr_t)addr >> 8) ^ (uintptr_t)addr;
+}
+
+static void
+turnstile_waiter_init(struct turnstile_waiter *waiter, struct thread *thread)
+{
+ plist_node_init(&waiter->node, thread_real_global_priority(thread));
+ waiter->thread = thread;
+ waiter->awaken = false;
+}
+
+static unsigned int
+turnstile_waiter_priority(const struct turnstile_waiter *waiter)
+{
+ return plist_node_priority(&waiter->node);
+}
+
+static bool
+turnstile_waiter_awaken(const struct turnstile_waiter *waiter)
+{
+ return waiter->awaken;
+}
+
+static void
+turnstile_waiter_set_awaken(struct turnstile_waiter *waiter)
+{
+ waiter->awaken = true;
+}
+
+static void
+turnstile_waiter_clear_awaken(struct turnstile_waiter *waiter)
+{
+ waiter->awaken = false;
+}
+
+static void
+turnstile_waiter_wakeup(struct turnstile_waiter *waiter)
+{
+ if (turnstile_waiter_awaken(waiter)) {
+ return;
+ }
+
+ thread_wakeup(waiter->thread);
+ turnstile_waiter_set_awaken(waiter);
+}
+
+static void
+turnstile_update_top_waiter(struct turnstile *turnstile)
+{
+ if (turnstile_empty(turnstile)) {
+ turnstile->top_waiter = NULL;
+ return;
+ }
+
+ turnstile->top_waiter = plist_last_entry(&turnstile->waiters,
+ struct turnstile_waiter, node);
+}
+
+static void
+turnstile_add_waiter(struct turnstile *turnstile,
+ struct turnstile_waiter *waiter)
+{
+ assert(!turnstile_waiter_awaken(waiter));
+ plist_add(&turnstile->waiters, &waiter->node);
+ turnstile_update_top_waiter(turnstile);
+}
+
+static void
+turnstile_remove_waiter(struct turnstile *turnstile,
+ struct turnstile_waiter *waiter)
+{
+ assert(turnstile_waiter_awaken(waiter));
+ plist_remove(&turnstile->waiters, &waiter->node);
+ turnstile_update_top_waiter(turnstile);
+}
+
+static void
+turnstile_waiter_requeue(struct turnstile *turnstile,
+ struct turnstile_waiter *waiter)
+{
+ unsigned int global_priority;
+
+ global_priority = thread_real_global_priority(waiter->thread);
+ assert(global_priority != plist_node_priority(&waiter->node));
+
+ plist_remove(&turnstile->waiters, &waiter->node);
+ plist_node_set_priority(&waiter->node, global_priority);
+ plist_add(&turnstile->waiters, &waiter->node);
+ turnstile_update_top_waiter(turnstile);
+}
+
+static void
+turnstile_td_set_waiter(struct turnstile_td *td,
+ struct turnstile_waiter *waiter)
+{
+ td->waiter = waiter;
+}
+
+static struct turnstile_waiter *
+turnstile_td_get_waiter(const struct turnstile_td *td)
+{
+ return td->waiter;
+}
+
+static void
+turnstile_td_update_top_priority(struct turnstile_td *td)
+{
+ struct turnstile_waiter *top_waiter;
+ struct turnstile *top_turnstile;
+
+ if (plist_empty(&td->owned_turnstiles)) {
+ td->top_global_priority = 0;
+ return;
+ }
+
+ top_turnstile = plist_last_entry(&td->owned_turnstiles,
+ struct turnstile, td_node);
+ top_waiter = top_turnstile->top_waiter;
+
+ if (top_waiter == NULL) {
+ td->top_global_priority = 0;
+ } else {
+ td->top_global_priority = turnstile_waiter_priority(top_waiter);
+ td->top_sched_policy = thread_real_sched_policy(top_waiter->thread);
+ td->top_priority = thread_real_priority(top_waiter->thread);
+ }
+}
+
+static void
+turnstile_td_own(struct turnstile_td *td, struct turnstile *turnstile)
+{
+ struct turnstile_waiter *top_waiter;
+ unsigned int top_priority;
+
+ assert(turnstile->owner == NULL);
+
+ top_waiter = turnstile->top_waiter;
+ assert(top_waiter != NULL);
+ top_priority = thread_real_global_priority(top_waiter->thread);
+ plist_node_init(&turnstile->td_node, top_priority);
+ plist_add(&td->owned_turnstiles, &turnstile->td_node);
+ turnstile_td_update_top_priority(td);
+
+ turnstile->owner = structof(td, struct thread, turnstile_td);
+}
+
+static void
+turnstile_td_disown(struct turnstile_td *td, struct turnstile *turnstile)
+{
+ assert(turnstile->owner == structof(td, struct thread, turnstile_td));
+
+ assert(!plist_node_unlinked(&turnstile->td_node));
+ plist_remove(&td->owned_turnstiles, &turnstile->td_node);
+ turnstile_td_update_top_priority(td);
+
+ turnstile->owner = NULL;
+}
+
+static void
+turnstile_td_reown(struct turnstile_td *td, struct turnstile *turnstile)
+{
+ assert(turnstile->owner == structof(td, struct thread, turnstile_td));
+ assert(!plist_node_unlinked(&turnstile->td_node));
+ plist_remove(&td->owned_turnstiles, &turnstile->td_node);
+ turnstile->owner = NULL;
+ turnstile_td_own(td, turnstile);
+}
+
+/*
+ * The caller must hold the turnstile thread data lock and no turnstile
+ * locks when calling this function. The thread data are unlocked on return.
+ *
+ * In addition, this function drops a reference on the thread associated
+ * with the given thread data.
+ */
+static void
+turnstile_td_propagate_priority_loop(struct turnstile_td *td)
+{
+ unsigned int user_priority, real_priority, top_priority;
+ struct turnstile_waiter *waiter;
+ struct turnstile *turnstile;
+ struct thread *thread;
+ unsigned short priority;
+ unsigned char policy;
+ int error;
+
+ /*
+ * At the very least, this function must make sure that :
+ * - the given thread has its intended priority, which is the
+ * highest among its own and all the waiters in the turnstiles
+ * it owns, and
+ * - the thread is at its intended position in the turnstile it's
+ * waiting on, if any.
+ */
+
+ for (;;) {
+ thread = structof(td, struct thread, turnstile_td);
+ user_priority = thread_user_global_priority(thread);
+ real_priority = thread_real_global_priority(thread);
+ top_priority = td->top_global_priority;
+
+ if (top_priority > user_priority) {
+ policy = td->top_sched_policy;
+ priority = td->top_priority;
+ } else {
+ top_priority = user_priority;
+ policy = thread_user_sched_policy(thread);
+ priority = thread_user_priority(thread);
+ }
+
+ if (top_priority != real_priority) {
+ thread_pi_setscheduler(thread, policy, priority);
+ }
+
+ waiter = turnstile_td_get_waiter(td);
+
+ if ((waiter == NULL)
+ || (top_priority == turnstile_waiter_priority(waiter))) {
+ spinlock_unlock(&td->lock);
+ thread_unref(thread);
+ return;
+ }
+
+ turnstile = turnstile_td_get_turnstile(td);
+ assert(turnstile != NULL);
+
+ error = spinlock_trylock(&turnstile->bucket->lock);
+
+ if (error) {
+ spinlock_unlock(&td->lock);
+ spinlock_lock(&td->lock);
+ continue;
+ }
+
+ /*
+ * This couldn't be done while changing the thread's priority
+ * because of locking restrictions. Do it now.
+ */
+ turnstile_waiter_requeue(turnstile, waiter);
+
+ spinlock_unlock(&td->lock);
+ thread_unref(thread);
+
+ thread = turnstile->owner;
+
+ if (thread == NULL) {
+ break;
+ }
+
+ td = thread_turnstile_td(thread);
+
+ thread_ref(thread);
+ spinlock_lock(&td->lock);
+
+ turnstile_td_reown(td, turnstile);
+
+ spinlock_unlock(&turnstile->bucket->lock);
+ }
+
+ spinlock_unlock(&turnstile->bucket->lock);
+}
+
+void
+turnstile_td_propagate_priority(struct turnstile_td *td)
+{
+ struct thread *thread;
+
+ thread = structof(td, struct thread, turnstile_td);
+
+ thread_ref(thread);
+ spinlock_lock(&td->lock);
+ turnstile_td_propagate_priority_loop(td);
+}
+
+static void
+turnstile_assert_init_state(const struct turnstile *turnstile)
+{
+ assert(turnstile->bucket == NULL);
+ assert(turnstile->sync_obj == NULL);
+ assert(plist_empty(&turnstile->waiters));
+ assert(turnstile->next_free == NULL);
+ assert(turnstile->top_waiter == NULL);
+ assert(turnstile->owner == NULL);
+}
+
+static void
+turnstile_use(struct turnstile *turnstile, const void *sync_obj)
+{
+ assert(turnstile->sync_obj == NULL);
+ turnstile->sync_obj = sync_obj;
+}
+
+static void
+turnstile_unuse(struct turnstile *turnstile)
+{
+ assert(turnstile->sync_obj != NULL);
+ turnstile->sync_obj = NULL;
+}
+
+static bool
+turnstile_in_use(const struct turnstile *turnstile)
+{
+ return turnstile->sync_obj != NULL;
+}
+
+static bool
+turnstile_in_use_by(const struct turnstile *turnstile, const void *sync_obj)
+{
+ return turnstile->sync_obj == sync_obj;
+}
+
+static void
+turnstile_bucket_init(struct turnstile_bucket *bucket)
+{
+ spinlock_init(&bucket->lock);
+ list_init(&bucket->list);
+}
+
+static struct turnstile_bucket *
+turnstile_bucket_get(const void *sync_obj)
+{
+ uintptr_t index;
+
+ index = turnstile_hash(sync_obj) & TURNSTILE_HTABLE_MASK;
+ assert(index < ARRAY_SIZE(turnstile_htable));
+ return &turnstile_htable[index];
+}
+
+static void
+turnstile_bucket_add(struct turnstile_bucket *bucket,
+ struct turnstile *turnstile)
+{
+ assert(turnstile->bucket == NULL);
+ turnstile->bucket = bucket;
+ list_insert_tail(&bucket->list, &turnstile->node);
+}
+
+static void
+turnstile_bucket_remove(struct turnstile_bucket *bucket,
+ struct turnstile *turnstile)
+{
+ assert(turnstile->bucket == bucket);
+ turnstile->bucket = NULL;
+ list_remove(&turnstile->node);
+}
+
+static struct turnstile *
+turnstile_bucket_lookup(const struct turnstile_bucket *bucket,
+ const void *sync_obj)
+{
+ struct turnstile *turnstile;
+
+ list_for_each_entry(&bucket->list, turnstile, node) {
+ if (turnstile_in_use_by(turnstile, sync_obj)) {
+ return turnstile;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+turnstile_ctor(void *ptr)
+{
+ struct turnstile *turnstile;
+
+ turnstile = ptr;
+ turnstile->bucket = NULL;
+ turnstile->sync_obj = NULL;
+ plist_init(&turnstile->waiters);
+ turnstile->next_free = NULL;
+ turnstile->top_waiter = NULL;
+ turnstile->owner = NULL;
+}
+
+void __init
+turnstile_setup(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(turnstile_htable); i++) {
+ turnstile_bucket_init(&turnstile_htable[i]);
+ }
+
+ kmem_cache_init(&turnstile_cache, "turnstile", sizeof(struct turnstile),
+ CPU_L1_SIZE, turnstile_ctor, 0);
+}
+
+struct turnstile *
+turnstile_create(void)
+{
+ struct turnstile *turnstile;
+
+ turnstile = kmem_cache_alloc(&turnstile_cache);
+
+ if (turnstile == NULL) {
+ return NULL;
+ }
+
+ turnstile_assert_init_state(turnstile);
+ return turnstile;
+}
+
+void
+turnstile_destroy(struct turnstile *turnstile)
+{
+ turnstile_assert_init_state(turnstile);
+ kmem_cache_free(&turnstile_cache, turnstile);
+}
+
+struct turnstile *
+turnstile_acquire(const void *sync_obj)
+{
+ struct turnstile_bucket *bucket;
+ struct turnstile *turnstile;
+
+ assert(sync_obj != NULL);
+
+ bucket = turnstile_bucket_get(sync_obj);
+
+ spinlock_lock(&bucket->lock);
+
+ turnstile = turnstile_bucket_lookup(bucket, sync_obj);
+
+ if (turnstile == NULL) {
+ spinlock_unlock(&bucket->lock);
+ return NULL;
+ }
+
+ return turnstile;
+}
+
+void
+turnstile_release(struct turnstile *turnstile)
+{
+ spinlock_unlock(&turnstile->bucket->lock);
+}
+
+static void
+turnstile_push_free(struct turnstile *turnstile,
+ struct turnstile *free_turnstile)
+{
+ assert(free_turnstile->next_free == NULL);
+ free_turnstile->next_free = turnstile->next_free;
+ turnstile->next_free = free_turnstile;
+}
+
+static struct turnstile *
+turnstile_pop_free(struct turnstile *turnstile)
+{
+ struct turnstile *free_turnstile;
+
+ free_turnstile = turnstile->next_free;
+
+ if (free_turnstile == NULL) {
+ return NULL;
+ }
+
+ turnstile->next_free = free_turnstile->next_free;
+ free_turnstile->next_free = NULL;
+ return free_turnstile;
+}
+
+struct turnstile *
+turnstile_lend(const void *sync_obj)
+{
+ struct turnstile_bucket *bucket;
+ struct turnstile *turnstile, *prev;
+ struct turnstile_td *td;
+
+ assert(sync_obj != NULL);
+
+ turnstile = thread_turnstile_lend();
+ turnstile_assert_init_state(turnstile);
+
+ td = thread_turnstile_td(thread_self());
+ bucket = turnstile_bucket_get(sync_obj);
+
+ spinlock_lock(&bucket->lock);
+
+ prev = turnstile_bucket_lookup(bucket, sync_obj);
+
+ if (prev == NULL) {
+ turnstile_use(turnstile, sync_obj);
+ turnstile_bucket_add(bucket, turnstile);
+ } else {
+ turnstile_push_free(prev, turnstile);
+ turnstile = prev;
+ }
+
+ spinlock_lock(&td->lock);
+ turnstile_td_set_turnstile(td, turnstile);
+ spinlock_unlock(&td->lock);
+
+ return turnstile;
+}
+
+void
+turnstile_return(struct turnstile *turnstile)
+{
+ struct turnstile_bucket *bucket;
+ struct turnstile *free_turnstile;
+ struct turnstile_td *td;
+
+ assert(turnstile_in_use(turnstile));
+
+ td = thread_turnstile_td(thread_self());
+
+ spinlock_lock(&td->lock);
+ turnstile_td_set_turnstile(td, NULL);
+ spinlock_unlock(&td->lock);
+
+ bucket = turnstile->bucket;
+ free_turnstile = turnstile_pop_free(turnstile);
+
+ if (free_turnstile == NULL) {
+ turnstile_bucket_remove(bucket, turnstile);
+ turnstile_unuse(turnstile);
+ free_turnstile = turnstile;
+ }
+
+ spinlock_unlock(&bucket->lock);
+
+ turnstile_assert_init_state(free_turnstile);
+ thread_turnstile_return(free_turnstile);
+}
+
+bool
+turnstile_empty(const struct turnstile *turnstile)
+{
+ return plist_empty(&turnstile->waiters);
+}
+
+static void
+turnstile_set_owner(struct turnstile *turnstile, struct thread *owner)
+{
+ struct turnstile_td *td;
+
+ assert(owner != NULL);
+ assert((turnstile->owner == NULL) || (turnstile->owner == owner));
+
+ td = thread_turnstile_td(owner);
+
+ thread_ref(owner);
+ spinlock_lock(&td->lock);
+
+ if (turnstile->owner == NULL) {
+ turnstile_td_own(td, turnstile);
+ }
+
+ spinlock_unlock(&turnstile->bucket->lock);
+
+ turnstile_td_propagate_priority_loop(td);
+
+ spinlock_lock(&turnstile->bucket->lock);
+}
+
+void
+turnstile_wait(struct turnstile *turnstile, const char *wchan,
+ struct thread *owner)
+{
+ struct turnstile_waiter waiter;
+ struct turnstile_td *td;
+ struct thread *thread;
+
+ thread = thread_self();
+ assert(thread != owner);
+
+ td = thread_turnstile_td(thread);
+
+ spinlock_lock(&td->lock);
+ turnstile_waiter_init(&waiter, thread);
+ turnstile_add_waiter(turnstile, &waiter);
+ turnstile_td_set_waiter(td, &waiter);
+ spinlock_unlock(&td->lock);
+
+ if (owner == NULL) {
+ if (turnstile->top_waiter == &waiter) {
+ turnstile_waiter_set_awaken(&waiter);
+ }
+ } else {
+ /* This function temporarily unlocks the turnstile */
+ turnstile_set_owner(turnstile, owner);
+ }
+
+ for (;;) {
+ if (!turnstile_waiter_awaken(&waiter)) {
+ thread_sleep(&turnstile->bucket->lock, turnstile->sync_obj, wchan);
+ }
+
+ /*
+ * The real priority of a thread may change between waking up
+ * and reacquiring the turnstile.
+ */
+ if (turnstile->top_waiter == &waiter) {
+ break;
+ }
+
+ /* Otherwise, make sure the new top waiter is awaken */
+ turnstile_waiter_wakeup(turnstile->top_waiter);
+ turnstile_waiter_clear_awaken(&waiter);
+ }
+
+ spinlock_lock(&td->lock);
+ turnstile_td_set_waiter(td, NULL);
+ turnstile_remove_waiter(turnstile, &waiter);
+ spinlock_unlock(&td->lock);
+}
+
+void
+turnstile_signal(struct turnstile *turnstile)
+{
+ struct turnstile_waiter *waiter;
+
+ if (turnstile_empty(turnstile)) {
+ return;
+ }
+
+ waiter = plist_last_entry(&turnstile->waiters,
+ struct turnstile_waiter, node);
+ turnstile_waiter_wakeup(waiter);
+}
+
+void
+turnstile_own(struct turnstile *turnstile)
+{
+ struct turnstile_td *td;
+ struct thread *owner;
+ unsigned int top_priority;
+
+ assert(turnstile->owner == NULL);
+
+ if (turnstile_empty(turnstile)) {
+ return;
+ }
+
+ owner = thread_self();
+ top_priority = turnstile_waiter_priority(turnstile->top_waiter);
+ assert(thread_real_global_priority(owner) >= top_priority);
+ td = thread_turnstile_td(owner);
+
+ spinlock_lock(&td->lock);
+ turnstile_td_own(td, turnstile);
+ spinlock_unlock(&td->lock);
+}
+
+void
+turnstile_disown(struct turnstile *turnstile)
+{
+ struct turnstile_td *td;
+ struct thread *owner;
+
+ owner = thread_self();
+ assert(turnstile->owner == owner);
+ assert(!turnstile_empty(turnstile));
+
+ td = thread_turnstile_td(owner);
+
+ spinlock_lock(&td->lock);
+ turnstile_td_disown(td, turnstile);
+ spinlock_unlock(&td->lock);
+}
diff --git a/kern/turnstile.h b/kern/turnstile.h
new file mode 100644
index 00000000..21a5a1a9
--- /dev/null
+++ b/kern/turnstile.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Priority propagation capable sleep queues.
+ *
+ * Turnstiles are used to build sleeping synchronization primitives where
+ * ownership applies, such as mutexes. They allow threads with different
+ * priorities to contend on the same synchronization object without
+ * unbounded priority inversion.
+ */
+
+#ifndef _KERN_TURNSTILE_H
+#define _KERN_TURNSTILE_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <kern/plist.h>
+#include <kern/spinlock.h>
+#include <kern/thread.h>
+#include <kern/turnstile_types.h>
+
+struct turnstile;
+
+/*
+ * Turnstile thread data.
+ */
+struct turnstile_td;
+
+#define turnstile_td_assert_lock(td) spinlock_assert_locked(&(td)->lock)
+
+/*
+ * Initialize turnstile thread data.
+ */
+static inline void
+turnstile_td_init(struct turnstile_td *td)
+{
+ spinlock_init(&td->lock);
+ td->turnstile = NULL;
+ td->waiter = NULL;
+ plist_init(&td->owned_turnstiles);
+ td->top_global_priority = 0;
+}
+
+/*
+ * Turnstile thread data locking functions.
+ */
+
+static inline void
+turnstile_td_lock(struct turnstile_td *td)
+{
+ spinlock_lock(&td->lock);
+}
+
+static inline int
+turnstile_td_trylock(struct turnstile_td *td)
+{
+ return spinlock_trylock(&td->lock);
+}
+
+static inline void
+turnstile_td_unlock(struct turnstile_td *td)
+{
+ spinlock_unlock(&td->lock);
+}
+
+/*
+ * Functions managing the turnstile a thread is sleeping in.
+ */
+
+static inline void
+turnstile_td_set_turnstile(struct turnstile_td *td,
+ struct turnstile *turnstile)
+{
+ td->turnstile = turnstile;
+}
+
+static inline struct turnstile *
+turnstile_td_get_turnstile(const struct turnstile_td *td)
+{
+ return td->turnstile;
+}
+
+/*
+ * Propagate priority starting at the thread containing the given thread data.
+ */
+void turnstile_td_propagate_priority(struct turnstile_td *td);
+
+/*
+ * Initialize the turnstile module.
+ */
+void turnstile_setup(void);
+
+/*
+ * Create/destroy a turnstile.
+ */
+struct turnstile * turnstile_create(void);
+void turnstile_destroy(struct turnstile *turnstile);
+
+/*
+ * Acquire/release a turnstile.
+ *
+ * Acquiring a turnstile serializes all access and disables preemption.
+ */
+struct turnstile * turnstile_acquire(const void *sync_obj);
+void turnstile_release(struct turnstile *turnstile);
+
+/*
+ * Lend/return a turnstile.
+ *
+ * A thread lends its private turnstile to the turnstile module in
+ * order to prepare its sleep. The turnstile obtained on lending
+ * is either the thread's turnstile, or an already existing turnstile
+ * for this synchronization object if another thread is waiting.
+ *
+ * When multiple threads are waiting on the same turnstile, the extra
+ * turnstiles lent are kept in an internal free list, used when threads
+ * are awaken to return a turnstile to them.
+ *
+ * Note that the turnstile returned may not be the one lent.
+ *
+ * The turnstile obtained when lending is automatically acquired.
+ */
+struct turnstile * turnstile_lend(const void *sync_obj);
+void turnstile_return(struct turnstile *turnstile);
+
+/*
+ * Return true if the given turnstile has no waiters.
+ *
+ * The turnstile must be acquired when calling this function.
+ */
+bool turnstile_empty(const struct turnstile *turnstile);
+
+/*
+ * Wait for a wake up on the given turnstile.
+ *
+ * The turnstile must be lent when calling this function. It is
+ * released and later reacquired before returning from this function.
+ *
+ * The calling thread is considered a waiter as long as it didn't
+ * reacquire the turnstile. This means that signalling a turnstile
+ * has no visible effect on the number of waiters until the turnstile
+ * is released, e.g. if a single thread is waiting and another signals
+ * the turnstile, the turnstile is not immediately considered empty.
+ *
+ * If owner isn't NULL, it must refer to the thread currently owning
+ * the associated synchronization object. The priority of the caller
+ * is propagated to the chain of turnstiles and owners as necessary
+ * to prevent unbounded priority inversion.
+ */
+void turnstile_wait(struct turnstile *turnstile, const char *wchan,
+ struct thread *owner);
+
+/*
+ * Wake up a thread waiting on the given turnstile, if any.
+ *
+ * The turnstile must be acquired when calling this function.
+ * Since a turnstile must be lent (and in turn is automatically
+ * acquired) when waiting, and acquired in order to signal it,
+ * wake-ups are serialized and cannot be missed.
+ */
+void turnstile_signal(struct turnstile *turnstile);
+
+/*
+ * Own/disown a turnstile.
+ *
+ * The turnstile must be lent when taking ownership, acquired when
+ * releasing it. Owning has no effect on empty turnstiles.
+ * Conversely, an empty turnstile cannot be disowned.
+ *
+ * Ownership must be updated atomically with regard to the ownership
+ * of the associated synchronization object.
+ */
+void turnstile_own(struct turnstile *turnstile);
+void turnstile_disown(struct turnstile *turnstile);
+
+#endif /* _KERN_TURNSTILE_H */
diff --git a/kern/turnstile_types.h b/kern/turnstile_types.h
new file mode 100644
index 00000000..a95b691e
--- /dev/null
+++ b/kern/turnstile_types.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definition used to avoid inclusion circular dependencies.
+ */
+
+#ifndef _KERN_TURNSTILE_TYPES_H
+#define _KERN_TURNSTILE_TYPES_H
+
+#include <kern/plist_types.h>
+#include <kern/spinlock_types.h>
+
+struct turnstile;
+struct turnstile_waiter;
+
+/*
+ * Per-thread turnstile data.
+ *
+ * The turnstile member indicates whether this thread is in a turnstile,
+ * and is only valid if the thread is not running.
+ *
+ * The waiter points to the structure a thread uses to queue itself on
+ * a turnstile. It is used to access a sleeping thread from another
+ * thread, e.g. on wake-ups or priority updates.
+ *
+ * The list of owned turnstiles is used by priority propagation to
+ * determine the top priority among all waiters, which is stored in
+ * the thread data so that turnstiles are quickly unlocked.
+ *
+ * Locking keys :
+ * (b) bucket
+ * (t) turnstile_td
+ */
+struct turnstile_td {
+ struct spinlock lock;
+ struct turnstile *turnstile; /* (t) */
+ struct turnstile_waiter *waiter; /* (b,t) */
+ struct plist owned_turnstiles; /* (t) */
+ unsigned int top_global_priority; /* (t) */
+ unsigned char top_sched_policy; /* (t) */
+ unsigned short top_priority; /* (t) */
+};
+
+#endif /* _KERN_TURNSTILE_TYPES_H */