summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2017-03-04 17:02:09 +0100
committerRichard Braun <rbraun@sceen.net>2017-03-04 17:02:09 +0100
commit0b2db2776b3a4fb208f13867934147f8cef79b60 (patch)
treea685ad29009ecfe1aacc9901b67490fb2b6db48f
parentcee25b55290865734d92a6b6262fe79887288b1a (diff)
parent4121baff3354f8bd1a4ae62b68af29c6f3965462 (diff)
Merge branch 'sleep_rework'
-rw-r--r--INSTALL5
-rw-r--r--Makefrag.am16
-rw-r--r--configure.ac13
-rw-r--r--kern/condition.c194
-rw-r--r--kern/condition.h56
-rw-r--r--kern/condition_types.h9
-rw-r--r--kern/kernel.c4
-rw-r--r--kern/mutex.c48
-rw-r--r--kern/mutex.h98
-rw-r--r--kern/mutex_i.h68
-rw-r--r--kern/mutex_types.h19
-rw-r--r--kern/plist.c65
-rw-r--r--kern/plist.h270
-rw-r--r--kern/plist_types.h50
-rw-r--r--kern/rtmutex.c108
-rw-r--r--kern/rtmutex.h111
-rw-r--r--kern/rtmutex_i.h79
-rw-r--r--kern/rtmutex_types.h30
-rw-r--r--kern/sleepq.c382
-rw-r--r--kern/sleepq.h126
-rw-r--r--kern/task.c15
-rw-r--r--kern/thread.c421
-rw-r--r--kern/thread.h216
-rw-r--r--kern/thread_i.h53
-rw-r--r--kern/turnstile.c788
-rw-r--r--kern/turnstile.h191
-rw-r--r--kern/turnstile_types.h58
-rw-r--r--test/test_mutex_pi.c482
28 files changed, 3654 insertions, 321 deletions
diff --git a/INSTALL b/INSTALL
index f9042109..97dcd148 100644
--- a/INSTALL
+++ b/INSTALL
@@ -243,6 +243,11 @@ X15 Options
in a file named `test_pmap_update_mp.c' would be selected with the
option `--enable-test-module=pmap_update_mp'.
+`--enable-mutex-pi'
+ Enable priority inheritance for regular mutexes (note that priority
+ inheritance is always enabled for real-time mutexes).
+ TODO Thoroughly describe the implications of this option
+
`--with-max-cpus=MAX_CPUS'
Set the maximum number of supported processors.
diff --git a/Makefrag.am b/Makefrag.am
index a42c5ea8..f0d8a393 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -41,6 +41,9 @@ x15_SOURCES += \
kern/param.h \
kern/percpu.c \
kern/percpu.h \
+ kern/plist.c \
+ kern/plist.h \
+ kern/plist_types.h \
kern/printk.c \
kern/printk.h \
kern/rbtree.c \
@@ -49,6 +52,12 @@ x15_SOURCES += \
kern/rdxtree.c \
kern/rdxtree.h \
kern/rdxtree_i.h \
+ kern/rtmutex.c \
+ kern/rtmutex.h \
+ kern/rtmutex_i.h \
+ kern/rtmutex_types.h \
+ kern/sleepq.c \
+ kern/sleepq.h \
kern/spinlock.h \
kern/spinlock_i.h \
kern/spinlock_types.h \
@@ -64,6 +73,9 @@ x15_SOURCES += \
kern/thread.c \
kern/thread.h \
kern/thread_i.h \
+ kern/turnstile.c \
+ kern/turnstile.h \
+ kern/turnstile_types.h \
kern/types.h \
kern/work.c \
kern/work.h \
@@ -90,6 +102,10 @@ if TEST_LLSYNC_DEFER
x15_SOURCES += test/test_llsync_defer.c
endif TEST_LLSYNC_DEFER
+if TEST_MUTEX_PI
+x15_SOURCES += test/test_mutex_pi.c
+endif TEST_MUTEX_PI
+
if TEST_PMAP_UPDATE_MP
x15_SOURCES += test/test_pmap_update_mp.c
endif TEST_PMAP_UPDATE_MP
diff --git a/configure.ac b/configure.ac
index 023140bd..0b32388f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -34,12 +34,19 @@ AC_ARG_WITH([max-cpus],
[opt_max_cpus=$withval],
[opt_max_cpus=128])
+AC_ARG_ENABLE([mutex-pi],
+ [AS_HELP_STRING([--enable-mutex-pi],
+ [enable priority inheritance for regular mutexes
+ (note that priority inheritance is always
+ enabled for real-time mutexes)])])
+
AC_DEFINE([__KERNEL__], [1], [kernel code])
AC_DEFINE_UNQUOTED([ARCH], [$arch], [arch])
m4_define([ENABLE_TEST_MODULE],
[AS_CASE(["$enable_test_module"],
[llsync_defer], [test_llsync_defer=yes],
+ [mutex_pi], [test_mutex_pi=yes],
[pmap_update_mp], [test_pmap_update_mp=yes],
[sref_dirty_zeroes], [test_sref_dirty_zeroes=yes],
[sref_noref], [test_sref_noref=yes],
@@ -55,6 +62,8 @@ m4_define([ENABLE_TEST_MODULE],
AS_IF([test x"$enable_test_module" != xno], [ENABLE_TEST_MODULE])
AM_CONDITIONAL([TEST_LLSYNC_DEFER],
[test x"$test_llsync_defer" = xyes])
+AM_CONDITIONAL([TEST_MUTEX_PI],
+ [test x"$test_mutex_pi" = xyes])
AM_CONDITIONAL([TEST_PMAP_UPDATE_MP],
[test x"$test_pmap_update_mp" = xyes])
AM_CONDITIONAL([TEST_SREF_DIRTY_ZEROES],
@@ -74,6 +83,10 @@ AC_DEFINE_UNQUOTED([MAX_CPUS], [$opt_max_cpus],
[maximum number of supported processors])
AC_MSG_NOTICE([maximum number of supported processors: $opt_max_cpus])
+AS_IF([test x"$enable_mutex_pi" = xyes],
+ [AC_DEFINE_UNQUOTED([X15_MUTEX_PI], [],
+ [Enable priority inheritance for regular mutexes])])
+
AH_BOTTOM([#include <kern/config.h>])
AC_CONFIG_HEADER([config.h])
AC_CONFIG_FILES([Makefile])
diff --git a/kern/condition.c b/kern/condition.c
index 467cb85a..bcd5ba42 100644
--- a/kern/condition.c
+++ b/kern/condition.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 Richard Braun.
+ * Copyright (c) 2013-2017 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -15,124 +15,178 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*
- * In order to avoid the infamous thundering herd problem, this implementation
- * doesn't wake all threads when broadcasting a condition. Instead, they are
- * queued on the mutex associated with the condition, and an attempt to wake
- * one by locking and unlocking the mutex is performed. If the mutex is already
- * locked, the current owner does the same when unlocking.
+ * Locking order : mutex -> sleep queue
*/
#include <stddef.h>
#include <kern/assert.h>
#include <kern/condition.h>
-#include <kern/list.h>
+#include <kern/condition_types.h>
#include <kern/mutex.h>
-#include <kern/mutex_i.h>
-#include <kern/spinlock.h>
+#include <kern/sleepq.h>
#include <kern/thread.h>
-void
-condition_wait(struct condition *condition, struct mutex *mutex)
+static void
+condition_inc_nr_sleeping_waiters(struct condition *condition)
{
- struct mutex_waiter waiter;
- unsigned int state;
+ condition->nr_sleeping_waiters++;
+ assert(condition->nr_sleeping_waiters != 0);
+}
- waiter.thread = thread_self();
+static void
+condition_dec_nr_sleeping_waiters(struct condition *condition)
+{
+ assert(condition->nr_sleeping_waiters != 0);
+ condition->nr_sleeping_waiters--;
+}
- spinlock_lock(&condition->lock);
+static void
+condition_inc_nr_pending_waiters(struct condition *condition)
+{
+ condition->nr_pending_waiters++;
+ assert(condition->nr_pending_waiters != 0);
+}
- assert((condition->mutex == NULL) || (condition->mutex == mutex));
+static void
+condition_dec_nr_pending_waiters(struct condition *condition)
+{
+ assert(condition->nr_pending_waiters != 0);
+ condition->nr_pending_waiters--;
+}
- if (condition->mutex == NULL) {
- condition->mutex = mutex;
- }
+static void
+condition_move_waiters(struct condition *condition)
+{
+ unsigned short old;
- list_insert_tail(&condition->waiters, &waiter.node);
+ assert(condition->nr_sleeping_waiters != 0);
+ old = condition->nr_pending_waiters;
+ condition->nr_pending_waiters += condition->nr_sleeping_waiters;
+ assert(old < condition->nr_pending_waiters);
+ condition->nr_sleeping_waiters = 0;
+}
- spinlock_lock(&mutex->lock);
+void
+condition_wait(struct condition *condition, struct mutex *mutex)
+{
+ struct condition *last_cond;
+ struct sleepq *sleepq;
+
+ mutex_assert_locked(mutex);
+
+ /*
+ * Special case :
+ *
+ * mutex_lock(lock);
+ *
+ * for (;;) {
+ * while (!done) {
+ * condition_wait(condition, lock);
+ * }
+ *
+ * do_something();
+ * }
+ *
+ * Pull the last condition before unlocking the mutex to prevent
+ * mutex_unlock() from reacquiring the condition sleep queue.
+ */
+ last_cond = thread_pull_last_cond();
+
+ sleepq = sleepq_lend(condition, true);
+
+ mutex_unlock(mutex);
+
+ if (last_cond != NULL) {
+ assert(last_cond == condition);
+
+ if (condition->nr_pending_waiters != 0) {
+ sleepq_signal(sleepq);
+ }
+ }
- state = mutex_release(mutex);
+ condition_inc_nr_sleeping_waiters(condition);
+ sleepq_wait(sleepq, "cond");
+ condition_dec_nr_pending_waiters(condition);
- if (state == MUTEX_CONTENDED) {
- mutex_signal(mutex);
+ if (condition->nr_pending_waiters != 0) {
+ thread_set_last_cond(condition);
}
- spinlock_unlock(&condition->lock);
+ sleepq_return(sleepq);
- mutex_wait(mutex, &waiter);
- mutex_trydowngrade(mutex);
-
- spinlock_unlock(&mutex->lock);
+ mutex_lock(mutex);
}
void
condition_signal(struct condition *condition)
{
- struct mutex_waiter *waiter;
- struct mutex *mutex;
- unsigned int state;
+ struct sleepq *sleepq;
- spinlock_lock(&condition->lock);
+ sleepq = sleepq_acquire(condition, true);
- if (condition->mutex == NULL) {
- spinlock_unlock(&condition->lock);
+ if (sleepq == NULL) {
return;
}
- mutex = condition->mutex;
- waiter = list_first_entry(&condition->waiters, struct mutex_waiter, node);
- list_remove(&waiter->node);
-
- if (list_empty(&condition->waiters)) {
- condition->mutex = NULL;
+ if (condition->nr_sleeping_waiters == 0) {
+ goto out;
}
- spinlock_unlock(&condition->lock);
-
- spinlock_lock(&mutex->lock);
-
- mutex_queue(mutex, waiter);
- state = mutex_tryacquire_slow(mutex);
+ sleepq_signal(sleepq);
- if (state == MUTEX_UNLOCKED) {
- mutex_release(mutex);
- mutex_signal(mutex);
- }
+ condition_dec_nr_sleeping_waiters(condition);
+ condition_inc_nr_pending_waiters(condition);
- spinlock_unlock(&mutex->lock);
+out:
+ sleepq_release(sleepq);
}
void
condition_broadcast(struct condition *condition)
{
- struct list waiters;
- struct mutex *mutex;
- unsigned int state;
+ struct sleepq *sleepq;
- spinlock_lock(&condition->lock);
+ sleepq = sleepq_acquire(condition, true);
- if (condition->mutex == NULL) {
- spinlock_unlock(&condition->lock);
+ if (sleepq == NULL) {
return;
}
- mutex = condition->mutex;
- condition->mutex = NULL;
- list_set_head(&waiters, &condition->waiters);
- list_init(&condition->waiters);
+ if (condition->nr_sleeping_waiters == 0) {
+ goto out;
+ }
- spinlock_unlock(&condition->lock);
+ sleepq_signal(sleepq);
- spinlock_lock(&mutex->lock);
+ condition_move_waiters(condition);
- mutex_queue_list(mutex, &waiters);
- state = mutex_tryacquire_slow(mutex);
+out:
+ sleepq_release(sleepq);
+}
- if (state == MUTEX_UNLOCKED) {
- mutex_release(mutex);
- mutex_signal(mutex);
+void
+condition_wakeup(struct condition *condition)
+{
+ struct sleepq *sleepq;
+
+ sleepq = sleepq_acquire(condition, true);
+
+ if (sleepq == NULL) {
+ return;
}
- spinlock_unlock(&mutex->lock);
+ if (condition->nr_pending_waiters == 0) {
+ goto out;
+ }
+
+ /*
+ * Rely on the FIFO ordering of sleep queues so that signalling multiple
+ * times always wakes up the same thread, as long as that thread didn't
+ * reacquire the sleep queue.
+ */
+ sleepq_signal(sleepq);
+
+out:
+ sleepq_release(sleepq);
}
diff --git a/kern/condition.h b/kern/condition.h
index 55d37bf2..0ce8d94f 100644
--- a/kern/condition.h
+++ b/kern/condition.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 Richard Braun.
+ * Copyright (c) 2013-2017 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,34 +16,62 @@
*
*
* Condition variables.
+ *
+ * A condition variable is a synchronization primitive used to wait
+ * until a predicate becomes true. Multiple threads can be waiting
+ * for this condition. In order to synchronize changes on the predicate
+ * with waiting and signalling, a condition variable must be associated
+ * with a mutex.
*/
#ifndef _KERN_CONDITION_H
#define _KERN_CONDITION_H
-#include <stddef.h>
-
#include <kern/condition_types.h>
-#include <kern/list.h>
-#include <kern/spinlock.h>
+#include <kern/mutex_types.h>
-/*
- * Condition variable.
- */
struct condition;
+/*
+ * Initialize a condition variable.
+ */
static inline void
condition_init(struct condition *condition)
{
- spinlock_init(&condition->lock);
- condition->mutex = NULL;
- list_init(&condition->waiters);
+ condition->nr_sleeping_waiters = 0;
+ condition->nr_pending_waiters = 0;
}
-void condition_wait(struct condition *cond, struct mutex *mutex);
+/*
+ * Wait for a wake-up on the given condition variable.
+ *
+ * The associated mutex must be locked when calling this function.
+ * It is unlocked before waiting and relocked before returning.
+ */
+void condition_wait(struct condition *condition, struct mutex *mutex);
-void condition_signal(struct condition *cond);
+/*
+ * Wake up one (signal) or all (broadcast) threads waiting on a
+ * condition variable, if any.
+ *
+ * Although it is not necessary to hold the mutex associated to the
+ * condition variable when calling these functions, doing so guarantees
+ * that a wake-up done when changing the predicate cannot be missed by
+ * waiting threads.
+ */
+void condition_signal(struct condition *condition);
+void condition_broadcast(struct condition *condition);
-void condition_broadcast(struct condition *cond);
+/*
+ * Wake up a pending thread.
+ *
+ * This function isn't part of the standard condition variable interface.
+ * It is used to chain wake-ups to avoid the thundering herd effect.
+ * When broadcasting a condition variable, a single thread is actually
+ * awaken. Other threads become "pending waiters", still asleep but
+ * eligible for wake-up when the mutex associated to the condition variable,
+ * relocked when returning from condition_wait(), is finally unlocked.
+ */
+void condition_wakeup(struct condition *condition);
#endif /* _KERN_CONDITION_H */
diff --git a/kern/condition_types.h b/kern/condition_types.h
index 03b22676..13a29205 100644
--- a/kern/condition_types.h
+++ b/kern/condition_types.h
@@ -21,14 +21,9 @@
#ifndef _KERN_CONDITION_TYPES_H
#define _KERN_CONDITION_TYPES_H
-#include <kern/list_types.h>
-#include <kern/mutex_types.h>
-#include <kern/spinlock_types.h>
-
struct condition {
- struct spinlock lock;
- struct mutex *mutex;
- struct list waiters;
+ unsigned short nr_sleeping_waiters;
+ unsigned short nr_pending_waiters;
};
#endif /* _KERN_CONDITION_TYPES_H */
diff --git a/kern/kernel.c b/kern/kernel.c
index b63be5d2..cd3d3e3b 100644
--- a/kern/kernel.c
+++ b/kern/kernel.c
@@ -20,9 +20,11 @@
#include <kern/kernel.h>
#include <kern/llsync.h>
#include <kern/percpu.h>
+#include <kern/sleepq.h>
#include <kern/sref.h>
#include <kern/task.h>
#include <kern/thread.h>
+#include <kern/turnstile.h>
#include <kern/work.h>
#include <kern/xcall.h>
#include <machine/cpu.h>
@@ -41,6 +43,8 @@ kernel_main(void)
cpumap_setup();
xcall_setup();
task_setup();
+ sleepq_setup();
+ turnstile_setup();
thread_setup();
work_setup();
llsync_setup();
diff --git a/kern/mutex.c b/kern/mutex.c
index 58e24513..6a1af208 100644
--- a/kern/mutex.c
+++ b/kern/mutex.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 Richard Braun.
+ * Copyright (c) 2013-2017 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -15,36 +15,54 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#ifndef X15_MUTEX_PI
+
+#include <stddef.h>
+
#include <kern/mutex.h>
#include <kern/mutex_i.h>
-#include <kern/spinlock.h>
-#include <kern/thread.h>
+#include <kern/sleepq.h>
void
mutex_lock_slow(struct mutex *mutex)
{
- struct mutex_waiter waiter;
+ struct sleepq *sleepq;
unsigned int state;
- spinlock_lock(&mutex->lock);
+ sleepq = sleepq_lend(mutex, false);
+
+ for (;;) {
+ state = atomic_swap_uint(&mutex->state, MUTEX_CONTENDED);
- state = mutex_tryacquire_slow(mutex);
+ if (state == MUTEX_UNLOCKED) {
+ break;
+ }
- if (state != MUTEX_UNLOCKED) {
- waiter.thread = thread_self();
- mutex_queue(mutex, &waiter);
- mutex_wait(mutex, &waiter);
+ sleepq_wait(sleepq, "mutex");
}
- mutex_trydowngrade(mutex);
+ if (sleepq_empty(sleepq)) {
+ state = atomic_swap_uint(&mutex->state, MUTEX_LOCKED);
+ assert(state == MUTEX_CONTENDED);
+ }
- spinlock_unlock(&mutex->lock);
+ sleepq_return(sleepq);
}
void
mutex_unlock_slow(struct mutex *mutex)
{
- spinlock_lock(&mutex->lock);
- mutex_signal(mutex);
- spinlock_unlock(&mutex->lock);
+ struct sleepq *sleepq;
+
+ sleepq = sleepq_acquire(mutex, false);
+
+ if (sleepq == NULL) {
+ return;
+ }
+
+ sleepq_signal(sleepq);
+
+ sleepq_release(sleepq);
}
+
+#endif /* X15_MUTEX_PI */
diff --git a/kern/mutex.h b/kern/mutex.h
index 64efa350..c8ae4639 100644
--- a/kern/mutex.h
+++ b/kern/mutex.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 Richard Braun.
+ * Copyright (c) 2013-2017 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -15,33 +15,84 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*
- * Mutual exclusion locks.
+ * Mutual exclusion sleep locks.
*
* Unlike spin locks, acquiring a mutex may make the calling thread sleep.
+ *
+ * TODO Adaptive spinning.
*/
#ifndef _KERN_MUTEX_H
#define _KERN_MUTEX_H
+#include <kern/mutex_types.h>
+
+#ifdef X15_MUTEX_PI
+
+#include <kern/rtmutex.h>
+
+struct mutex;
+
+#define mutex_assert_locked(mutex) rtmutex_assert_locked(&(mutex)->rtmutex)
+
+static inline void
+mutex_init(struct mutex *mutex)
+{
+ rtmutex_init(&mutex->rtmutex);
+}
+
+static inline int
+mutex_trylock(struct mutex *mutex)
+{
+ return rtmutex_trylock(&mutex->rtmutex);
+}
+
+static inline void
+mutex_lock(struct mutex *mutex)
+{
+ rtmutex_lock(&mutex->rtmutex);
+}
+
+static inline void
+mutex_unlock(struct mutex *mutex)
+{
+ rtmutex_unlock(&mutex->rtmutex);
+
+ /*
+ * If this mutex was used along with a condition variable, wake up
+ * a potential pending waiter. This must be done after the mutex is
+ * unlocked so that a higher priority thread can directly acquire it.
+ */
+ thread_wakeup_last_cond();
+}
+
+#else /* X15_MUTEX_PI */
+
#include <kern/assert.h>
#include <kern/error.h>
-#include <kern/list.h>
#include <kern/mutex_i.h>
-#include <kern/mutex_types.h>
-#include <kern/spinlock.h>
+#include <kern/thread.h>
struct mutex;
+#define mutex_assert_locked(mutex) assert((mutex)->state != MUTEX_UNLOCKED)
+
+/*
+ * Initialize a mutex.
+ */
static inline void
mutex_init(struct mutex *mutex)
{
mutex->state = MUTEX_UNLOCKED;
- spinlock_init(&mutex->lock);
- list_init(&mutex->waiters);
}
-#define mutex_assert_locked(mutex) assert((mutex)->state != MUTEX_UNLOCKED)
-
+/*
+ * Attempt to lock the given mutex.
+ *
+ * This function may not sleep.
+ *
+ * Return 0 on success, ERROR_BUSY if the mutex is already locked.
+ */
static inline int
mutex_trylock(struct mutex *mutex)
{
@@ -56,6 +107,14 @@ mutex_trylock(struct mutex *mutex)
return ERROR_BUSY;
}
+/*
+ * Lock a mutex.
+ *
+ * If the mutex is already locked, the calling thread sleeps until the
+ * mutex is unlocked.
+ *
+ * A mutex can only be locked once.
+ */
static inline void
mutex_lock(struct mutex *mutex)
{
@@ -72,6 +131,12 @@ mutex_lock(struct mutex *mutex)
mutex_lock_slow(mutex);
}
+/*
+ * Unlock a mutex.
+ *
+ * The mutex must be locked, and must have been locked by the calling
+ * thread.
+ */
static inline void
mutex_unlock(struct mutex *mutex)
{
@@ -79,13 +144,18 @@ mutex_unlock(struct mutex *mutex)
state = mutex_release(mutex);
- if (state == MUTEX_LOCKED) {
- return;
+ if (state != MUTEX_LOCKED) {
+ assert(state == MUTEX_CONTENDED);
+ mutex_unlock_slow(mutex);
}
- assert(state == MUTEX_CONTENDED);
-
- mutex_unlock_slow(mutex);
+ /*
+ * If this mutex was used along with a condition variable, wake up
+ * a potential pending waiter.
+ */
+ thread_wakeup_last_cond();
}
+#endif /* X15_MUTEX_PI */
+
#endif /* _KERN_MUTEX_H */
diff --git a/kern/mutex_i.h b/kern/mutex_i.h
index 5e78a4da..158d8f7f 100644
--- a/kern/mutex_i.h
+++ b/kern/mutex_i.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2014 Richard Braun.
+ * Copyright (c) 2013-2017 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -18,25 +18,16 @@
#ifndef _KERN_MUTEX_I_H
#define _KERN_MUTEX_I_H
+#ifndef X15_MUTEX_PI
+
#include <kern/assert.h>
-#include <kern/list.h>
#include <kern/mutex_types.h>
-#include <kern/thread.h>
#include <machine/atomic.h>
#define MUTEX_UNLOCKED 0
#define MUTEX_LOCKED 1
#define MUTEX_CONTENDED 2
-struct mutex_waiter {
- struct list node;
- struct thread *thread;
-};
-
-void mutex_lock_slow(struct mutex *mutex);
-
-void mutex_unlock_slow(struct mutex *mutex);
-
static inline unsigned int
mutex_tryacquire(struct mutex *mutex)
{
@@ -44,12 +35,6 @@ mutex_tryacquire(struct mutex *mutex)
}
static inline unsigned int
-mutex_tryacquire_slow(struct mutex *mutex)
-{
- return atomic_swap_uint(&mutex->state, MUTEX_CONTENDED);
-}
-
-static inline unsigned int
mutex_release(struct mutex *mutex)
{
unsigned int state;
@@ -59,51 +44,10 @@ mutex_release(struct mutex *mutex)
return state;
}
-static inline void
-mutex_queue(struct mutex *mutex, struct mutex_waiter *waiter)
-{
- list_insert_tail(&mutex->waiters, &waiter->node);
-}
-
-static inline void
-mutex_queue_list(struct mutex *mutex, struct list *waiters)
-{
- list_concat(&mutex->waiters, waiters);
-}
-
-static inline void
-mutex_wait(struct mutex *mutex, struct mutex_waiter *waiter)
-{
- unsigned int state;
-
- do {
- thread_sleep(&mutex->lock, mutex, "mutex");
- state = mutex_tryacquire_slow(mutex);
- } while (state != MUTEX_UNLOCKED);
-
- list_remove(&waiter->node);
-}
-
-static inline void
-mutex_signal(struct mutex *mutex)
-{
- struct mutex_waiter *waiter;
-
- if (!list_empty(&mutex->waiters)) {
- waiter = list_first_entry(&mutex->waiters, struct mutex_waiter, node);
- thread_wakeup(waiter->thread);
- }
-}
+void mutex_lock_slow(struct mutex *mutex);
-static inline void
-mutex_trydowngrade(struct mutex *mutex)
-{
- if (list_empty(&mutex->waiters)) {
- unsigned int state;
+void mutex_unlock_slow(struct mutex *mutex);
- state = atomic_swap_uint(&mutex->state, MUTEX_LOCKED);
- assert(state == MUTEX_CONTENDED);
- }
-}
+#endif /* X15_MUTEX_PI */
#endif /* _KERN_MUTEX_I_H */
diff --git a/kern/mutex_types.h b/kern/mutex_types.h
index b348fb7b..4b7947fc 100644
--- a/kern/mutex_types.h
+++ b/kern/mutex_types.h
@@ -21,13 +21,24 @@
#ifndef _KERN_MUTEX_TYPES_H
#define _KERN_MUTEX_TYPES_H
-#include <kern/list_types.h>
-#include <kern/spinlock_types.h>
+#ifdef X15_MUTEX_PI
+
+#include <kern/rtmutex_types.h>
+
+/*
+ * Do not directly alias rtmutex to make sure they cannot be used
+ * with condition variables by mistake.
+ */
+struct mutex {
+ struct rtmutex rtmutex;
+};
+
+#else /* X15_MUTEX_PI */
struct mutex {
unsigned int state;
- struct spinlock lock;
- struct list waiters;
};
+#endif /* X15_MUTEX_PI */
+
#endif /* _KERN_MUTEX_TYPES_H */
diff --git a/kern/plist.c b/kern/plist.c
new file mode 100644
index 00000000..851de629
--- /dev/null
+++ b/kern/plist.c
@@ -0,0 +1,65 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/list.h>
+#include <kern/plist.h>
+
+void
+plist_add(struct plist *plist, struct plist_node *pnode)
+{
+ struct plist_node *next;
+
+ if (plist_empty(plist)) {
+ list_insert_head(&plist->list, &pnode->node);
+ list_insert_head(&plist->prio_list, &pnode->prio_node);
+ return;
+ }
+
+ list_for_each_entry(&plist->prio_list, next, prio_node) {
+ if (pnode->priority < next->priority) {
+ break;
+ }
+ }
+
+ if (list_end(&plist->prio_list, &next->prio_node)
+ || (pnode->priority != next->priority)) {
+ list_insert_before(&next->prio_node, &pnode->prio_node);
+ } else {
+ list_init(&pnode->prio_node);
+ }
+
+ list_insert_before(&next->node, &pnode->node);
+}
+
+void
+plist_remove(struct plist *plist, struct plist_node *pnode)
+{
+ struct plist_node *next;
+
+ if (!list_node_unlinked(&pnode->prio_node)) {
+ next = list_next_entry(pnode, node);
+
+ if (!list_end(&plist->list, &next->node)
+ && list_node_unlinked(&next->prio_node)) {
+ list_insert_after(&pnode->prio_node, &next->prio_node);
+ }
+
+ list_remove(&pnode->prio_node);
+ }
+
+ list_remove(&pnode->node);
+}
diff --git a/kern/plist.h b/kern/plist.h
new file mode 100644
index 00000000..dd8fdd00
--- /dev/null
+++ b/kern/plist.h
@@ -0,0 +1,270 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Priority list.
+ *
+ * This container acts as a doubly-linked list sorted by priority in
+ * ascending order. All operations behave as with a regular linked list
+ * except insertion, which is O(k), k being the number of priorities
+ * among the entries.
+ */
+
+#ifndef _KERN_PLIST_H
+#define _KERN_PLIST_H
+
+#include <stdbool.h>
+
+#include <kern/list.h>
+#include <kern/macros.h>
+#include <kern/plist_types.h>
+
+/*
+ * Priority list.
+ */
+struct plist;
+
+/*
+ * Priority list node.
+ */
+struct plist_node;
+
+/*
+ * Static priority list initializer.
+ */
+#define PLIST_INITIALIZER(plist) \
+ { LIST_INITIALIZER((plist).list), LIST_INITIALIZER((plist).prio_list) }
+
+/*
+ * Initialize a priority list.
+ */
+static inline void
+plist_init(struct plist *plist)
+{
+ list_init(&plist->list);
+ list_init(&plist->prio_list);
+}
+
+/*
+ * Initialize a priority list node.
+ */
+static inline void
+plist_node_init(struct plist_node *pnode, unsigned int priority)
+{
+ pnode->priority = priority;
+ list_node_init(&pnode->node);
+ list_node_init(&pnode->prio_node);
+}
+
+/*
+ * Return the priority associated with a node.
+ */
+static inline unsigned int
+plist_node_priority(const struct plist_node *pnode)
+{
+ return pnode->priority;
+}
+
+/*
+ * Update the priority of an already initialized node.
+ */
+static inline void
+plist_node_set_priority(struct plist_node *pnode, unsigned int priority)
+{
+ pnode->priority = priority;
+}
+
+/*
+ * Return true if pnode is in no priority lists.
+ */
+static inline bool
+plist_node_unlinked(const struct plist_node *pnode)
+{
+ return list_node_unlinked(&pnode->node);
+}
+
+/*
+ * Macro that evaluates to the address of the structure containing the
+ * given node based on the given type and member.
+ */
+#define plist_entry(pnode, type, member) structof(pnode, type, member)
+
+/*
+ * Return the first node of a priority list.
+ */
+static inline struct plist_node *
+plist_first(const struct plist *plist)
+{
+ return list_first_entry(&plist->list, struct plist_node, node);
+}
+
+/*
+ * Return the last node of a priority list.
+ */
+static inline struct plist_node *
+plist_last(const struct plist *plist)
+{
+ return list_last_entry(&plist->list, struct plist_node, node);
+}
+
+/*
+ * Return the node next to the given node.
+ */
+static inline struct plist_node *
+plist_next(const struct plist_node *pnode)
+{
+ return (struct plist_node *)list_next_entry(pnode, node);
+}
+
+/*
+ * Return the node previous to the given node.
+ */
+static inline struct plist_node *
+plist_prev(const struct plist_node *pnode)
+{
+ return (struct plist_node *)list_prev_entry(pnode, node);
+}
+
+/*
+ * Get the first entry of a priority list.
+ */
+#define plist_first_entry(plist, type, member) \
+ plist_entry(plist_first(plist), type, member)
+
+/*
+ * Get the last entry of a priority list.
+ */
+#define plist_last_entry(plist, type, member) \
+ plist_entry(plist_last(plist), type, member)
+
+/*
+ * Get the entry next to the given entry.
+ */
+#define plist_next_entry(entry, member) \
+ plist_entry(plist_next(&(entry)->member), typeof(*(entry)), member)
+
+/*
+ * Get the entry previous to the given entry.
+ */
+#define plist_prev_entry(entry, member) \
+ plist_entry(plist_prev(&(entry)->member), typeof(*(entry)), member)
+
+/*
+ * Return true if node is after the last or before the first node of
+ * a priority list.
+ */
+static inline bool
+plist_end(const struct plist *plist, const struct plist_node *pnode)
+{
+ return list_end(&plist->list, &pnode->node);
+}
+
+/*
+ * Return true if plist is empty.
+ */
+static inline bool
+plist_empty(const struct plist *plist)
+{
+ return list_empty(&plist->list);
+}
+
+/*
+ * Return true if plist contains exactly one node.
+ */
+static inline bool
+plist_singular(const struct plist *plist)
+{
+ return list_singular(&plist->list);
+}
+
+/*
+ * Add a node to a priority list.
+ *
+ * If the priority list already contains nodes with the same priority
+ * as the given node, it is inserted before them.
+ *
+ * The node must be initialized before calling this function.
+ */
+void plist_add(struct plist *plist, struct plist_node *pnode);
+
+/*
+ * Remove a node from a priority list.
+ *
+ * After completion, the node is stale.
+ */
+void plist_remove(struct plist *plist, struct plist_node *pnode);
+
+/*
+ * Forge a loop to process all nodes of a priority list.
+ *
+ * The node must not be altered during the loop.
+ */
+#define plist_for_each(plist, pnode) \
+for (pnode = plist_first(plist); \
+ !plist_end(plist, pnode); \
+ pnode = plist_next(pnode))
+
+/*
+ * Forge a loop to process all nodes of a priority list.
+ */
+#define plist_for_each_safe(plist, pnode, tmp) \
+for (pnode = plist_first(plist), tmp = plist_next(pnode); \
+ !plist_end(plist, pnode); \
+ pnode = tmp, tmp = plist_next(pnode))
+
+/*
+ * Version of plist_for_each() that processes nodes backward.
+ */
+#define plist_for_each_reverse(plist, pnode) \
+for (pnode = plist_last(plist); \
+ !plist_end(plist, pnode); \
+ pnode = plist_prev(pnode))
+
+/*
+ * Version of plist_for_each_safe() that processes nodes backward.
+ */
+#define plist_for_each_reverse_safe(plist, pnode, tmp) \
+for (pnode = plist_last(plist), tmp = plist_prev(pnode); \
+ !plist_end(plist, pnode); \
+ pnode = tmp, tmp = plist_prev(pnode))
+
+/*
+ * Forge a loop to process all entries of a priority list.
+ *
+ * The entry node must not be altered during the loop.
+ */
+#define plist_for_each_entry(plist, entry, member) \
+ list_for_each_entry(&(plist)->list, entry, member.node)
+
+/*
+ * Forge a loop to process all entries of a priority list.
+ */
+#define plist_for_each_entry_safe(plist, entry, tmp, member) \
+ list_for_each_entry_safe(&(plist)->list, entry, tmp, member.node)
+
+/*
+ * Version of plist_for_each_entry() that processes entries backward.
+ */
+#define plist_for_each_entry_reverse(plist, entry, member) \
+ list_for_each_entry_reverse(&(plist)->list, entry, member.node)
+
+/*
+ * Version of plist_for_each_entry_safe() that processes entries backward.
+ */
+#define plist_for_each_entry_reverse_safe(plist, entry, tmp, member) \
+ list_for_each_entry_reverse_safe(&(plist)->list, entry, tmp, member.node)
+
+#endif /* _KERN_PLIST_H */
diff --git a/kern/plist_types.h b/kern/plist_types.h
new file mode 100644
index 00000000..653d63f2
--- /dev/null
+++ b/kern/plist_types.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definition used to avoid inclusion circular dependencies.
+ */
+
+#ifndef _KERN_PLIST_TYPES_H
+#define _KERN_PLIST_TYPES_H
+
+#include <kern/list_types.h>
+
+/*
+ * The list member is used as the underlying regular linked list and
+ * contains all entries, sorted by priority in ascending order. The
+ * prio_list member contains exactly one entry for each priority
+ * present, also sorted by priority in ascending order. An entry
+ * on prio_list is the first entry in list for that priority.
+ * Here is a representation of a possible priority list :
+ *
+ * list--|1|--|3|--|3|--|3|--|4|--|6|--|6|--|8|
+ * | | | | | | | | |
+ * prio_list--+ +--+ +------------+ +--+ +-------+
+ *
+ */
+struct plist {
+ struct list list;
+ struct list prio_list;
+};
+
+struct plist_node {
+ unsigned int priority;
+ struct list node;
+ struct list prio_node;
+};
+
+#endif /* _KERN_PLIST_TYPES_H */
diff --git a/kern/rtmutex.c b/kern/rtmutex.c
new file mode 100644
index 00000000..f79f8a88
--- /dev/null
+++ b/kern/rtmutex.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <kern/assert.h>
+#include <kern/rtmutex.h>
+#include <kern/rtmutex_i.h>
+#include <kern/rtmutex_types.h>
+#include <kern/thread.h>
+#include <kern/turnstile.h>
+#include <machine/atomic.h>
+
+static void
+rtmutex_set_contended(struct rtmutex *rtmutex)
+{
+ uintptr_t owner, prev_owner;
+
+ do {
+ owner = rtmutex->owner;
+ prev_owner = atomic_cas_uintptr(&rtmutex->owner, owner,
+ owner | RTMUTEX_CONTENDED);
+ } while (prev_owner != owner);
+}
+
+void
+rtmutex_lock_slow(struct rtmutex *rtmutex)
+{
+ struct turnstile *turnstile;
+ uintptr_t owner, prev_owner;
+ struct thread *thread;
+ uintptr_t bits;
+
+ owner = (uintptr_t)thread_self();
+
+ turnstile = turnstile_lend(rtmutex);
+
+ rtmutex_set_contended(rtmutex);
+
+ bits = RTMUTEX_CONTENDED;
+
+ for (;;) {
+ prev_owner = atomic_cas_uintptr(&rtmutex->owner, bits, owner | bits);
+ assert((prev_owner & bits) == bits);
+
+ if (prev_owner == bits) {
+ break;
+ }
+
+ thread = (struct thread *)(prev_owner & RTMUTEX_OWNER_MASK);
+ turnstile_wait(turnstile, "rtmutex", thread);
+ bits |= RTMUTEX_FORCE_WAIT;
+ }
+
+ turnstile_own(turnstile);
+
+ if (turnstile_empty(turnstile)) {
+ prev_owner = atomic_swap_uintptr(&rtmutex->owner, owner);
+ assert(prev_owner == (owner | bits));
+ }
+
+ turnstile_return(turnstile);
+
+ /*
+ * A lock owner should never perform priority propagation on itself,
+ * because this process is done using its own priority, potentially
+ * introducing unbounded priority inversion.
+ * Instead, let new waiters do it, using their own priority.
+ */
+}
+
+void
+rtmutex_unlock_slow(struct rtmutex *rtmutex)
+{
+ struct turnstile *turnstile;
+ uintptr_t owner, prev_owner;
+
+ owner = (uintptr_t)thread_self();
+
+ turnstile = turnstile_acquire(rtmutex);
+ assert(turnstile != NULL);
+
+ prev_owner = atomic_swap_uintptr(&rtmutex->owner,
+ RTMUTEX_FORCE_WAIT | RTMUTEX_CONTENDED);
+ assert((prev_owner & RTMUTEX_OWNER_MASK) == owner);
+
+ turnstile_disown(turnstile);
+ turnstile_signal(turnstile);
+
+ turnstile_release(turnstile);
+
+ thread_propagate_priority();
+}
diff --git a/kern/rtmutex.h b/kern/rtmutex.h
new file mode 100644
index 00000000..f7cee716
--- /dev/null
+++ b/kern/rtmutex.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Real-time mutual exclusion locks.
+ *
+ * A real-time mutex is similar to a regular mutex, except priority
+ * inheritance is unconditionnally enabled.
+ */
+
+#ifndef _KERN_RTMUTEX_H
+#define _KERN_RTMUTEX_H
+
+#include <stdint.h>
+
+#include <kern/assert.h>
+#include <kern/error.h>
+#include <kern/rtmutex_i.h>
+#include <kern/rtmutex_types.h>
+
+struct rtmutex;
+
+#define rtmutex_assert_locked(rtmutex) assert((rtmutex)->owner != 0)
+
+/*
+ * Initialize a real-time mutex.
+ */
+static inline void
+rtmutex_init(struct rtmutex *rtmutex)
+{
+ rtmutex->owner = 0;
+}
+
+/*
+ * Attempt to lock the given real-time mutex.
+ *
+ * This function may not sleep.
+ *
+ * Return 0 on success, ERROR_BUSY if the mutex is already locked.
+ */
+static inline int
+rtmutex_trylock(struct rtmutex *rtmutex)
+{
+ uintptr_t prev_owner;
+
+ prev_owner = rtmutex_tryacquire(rtmutex);
+
+ if (prev_owner == 0) {
+ return 0;
+ }
+
+ return ERROR_BUSY;
+}
+
+/*
+ * Lock a real-time mutex.
+ *
+ * If the mutex is already locked, the calling thread sleeps until the
+ * mutex is unlocked, and its priority is propagated as needed to prevent
+ * unbounded priority inversion.
+ *
+ * A mutex can only be locked once.
+ */
+static inline void
+rtmutex_lock(struct rtmutex *rtmutex)
+{
+ uintptr_t prev_owner;
+
+ prev_owner = rtmutex_tryacquire(rtmutex);
+
+ if (prev_owner == 0) {
+ return;
+ }
+
+ rtmutex_lock_slow(rtmutex);
+}
+
+/*
+ * Unlock a real-time mutex.
+ *
+ * The mutex must be locked, and must have been locked by the calling
+ * thread.
+ */
+static inline void
+rtmutex_unlock(struct rtmutex *rtmutex)
+{
+ uintptr_t prev_owner;
+
+ prev_owner = rtmutex_tryrelease(rtmutex);
+
+ if (!(prev_owner & RTMUTEX_CONTENDED)) {
+ return;
+ }
+
+ rtmutex_unlock_slow(rtmutex);
+}
+
+#endif /* _KERN_RTMUTEX_H */
diff --git a/kern/rtmutex_i.h b/kern/rtmutex_i.h
new file mode 100644
index 00000000..b99b5bd0
--- /dev/null
+++ b/kern/rtmutex_i.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_RTMUTEX_I_H
+#define _KERN_RTMUTEX_I_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <kern/assert.h>
+#include <kern/rtmutex_types.h>
+#include <kern/thread.h>
+#include <machine/atomic.h>
+
+/*
+ * Real-time mutex flags.
+ *
+ * The "contended" flag indicates that threads are waiting for the mutex
+ * to be unlocked. It forces threads trying to lock the mutex as well as
+ * the owner to take the slow path.
+ *
+ * The "force-wait" flag prevents "stealing" a mutex. When a contended
+ * mutex is unlocked, a thread may concurrently try to lock it. Without
+ * this flag, it may succeed, and in doing so, it would prevent a
+ * potentially higher priority thread from locking the mutex. The flag
+ * forces all threads to not only take the slow path, but to also call
+ * the turnstile wait function so that only the highest priority thread
+ * may lock the mutex.
+ */
+#define RTMUTEX_CONTENDED 0x1
+#define RTMUTEX_FORCE_WAIT 0x2
+
+#define RTMUTEX_OWNER_MASK (~((uintptr_t)(RTMUTEX_FORCE_WAIT \
+ | RTMUTEX_CONTENDED)))
+
+#define rtmutex_assert_owner_aligned(owner) \
+ assert(((owner) & ~RTMUTEX_OWNER_MASK) == 0)
+
+static inline uintptr_t
+rtmutex_tryacquire(struct rtmutex *rtmutex)
+{
+ uintptr_t owner;
+
+ owner = (uintptr_t)thread_self();
+ rtmutex_assert_owner_aligned(owner);
+ return atomic_cas_uintptr(&rtmutex->owner, 0, owner);
+}
+
+static inline uintptr_t
+rtmutex_tryrelease(struct rtmutex *rtmutex)
+{
+ uintptr_t owner, prev_owner;
+
+ owner = (uintptr_t)thread_self();
+ rtmutex_assert_owner_aligned(owner);
+ prev_owner = atomic_cas_uintptr(&rtmutex->owner, owner, 0);
+ assert((prev_owner & RTMUTEX_OWNER_MASK) == owner);
+ return prev_owner;
+}
+
+void rtmutex_lock_slow(struct rtmutex *rtmutex);
+
+void rtmutex_unlock_slow(struct rtmutex *rtmutex);
+
+#endif /* _KERN_RTMUTEX_I_H */
diff --git a/kern/rtmutex_types.h b/kern/rtmutex_types.h
new file mode 100644
index 00000000..c03e0484
--- /dev/null
+++ b/kern/rtmutex_types.h
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definition used to avoid inclusion circular dependencies.
+ */
+
+#ifndef _KERN_RTMUTEX_TYPES_H
+#define _KERN_RTMUTEX_TYPES_H
+
+#include <stdint.h>
+
+struct rtmutex {
+ uintptr_t owner;
+};
+
+#endif /* _KERN_RTMUTEX_TYPES_H */
diff --git a/kern/sleepq.c b/kern/sleepq.c
new file mode 100644
index 00000000..0c41caef
--- /dev/null
+++ b/kern/sleepq.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * TODO Analyse hash parameters.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <kern/assert.h>
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/list.h>
+#include <kern/macros.h>
+#include <kern/param.h>
+#include <kern/sleepq.h>
+#include <kern/spinlock.h>
+#include <kern/thread.h>
+
+struct sleepq_bucket {
+ struct spinlock lock;
+ struct list list;
+} __aligned(CPU_L1_SIZE);
+
+struct sleepq {
+ struct sleepq_bucket *bucket;
+ struct list node;
+ const void *sync_obj;
+ struct list waiters;
+ struct sleepq *next_free;
+};
+
+struct sleepq_waiter {
+ struct list node;
+ struct thread *thread;
+};
+
+#define SLEEPQ_HTABLE_SIZE 128
+#define SLEEPQ_COND_HTABLE_SIZE 64
+
+#if !ISP2(SLEEPQ_HTABLE_SIZE) || !ISP2(SLEEPQ_COND_HTABLE_SIZE)
+#error hash table size must be a power of two
+#endif /* !ISP2(SLEEPQ_HTABLE_SIZE) */
+
+#define SLEEPQ_HTABLE_MASK (SLEEPQ_HTABLE_SIZE - 1)
+#define SLEEPQ_COND_HTABLE_MASK (SLEEPQ_COND_HTABLE_SIZE - 1)
+
+static struct sleepq_bucket sleepq_htable[SLEEPQ_HTABLE_SIZE];
+static struct sleepq_bucket sleepq_cond_htable[SLEEPQ_COND_HTABLE_SIZE];
+
+static struct kmem_cache sleepq_cache;
+
+static uintptr_t
+sleepq_hash(const void *addr)
+{
+ return ((uintptr_t)addr >> 8) ^ (uintptr_t)addr;
+}
+
+static void
+sleepq_waiter_init(struct sleepq_waiter *waiter, struct thread *thread)
+{
+ waiter->thread = thread;
+}
+
+static void
+sleepq_waiter_wakeup(struct sleepq_waiter *waiter)
+{
+ thread_wakeup(waiter->thread);
+}
+
+static void
+sleepq_assert_init_state(const struct sleepq *sleepq)
+{
+ assert(sleepq->bucket == NULL);
+ assert(sleepq->sync_obj == NULL);
+ assert(list_empty(&sleepq->waiters));
+ assert(sleepq->next_free == NULL);
+}
+
+static void
+sleepq_use(struct sleepq *sleepq, const void *sync_obj)
+{
+ assert(sleepq->sync_obj == NULL);
+ sleepq->sync_obj = sync_obj;
+}
+
+static void
+sleepq_unuse(struct sleepq *sleepq)
+{
+ assert(sleepq->sync_obj != NULL);
+ sleepq->sync_obj = NULL;
+}
+
+static bool
+sleepq_in_use(const struct sleepq *sleepq)
+{
+ return sleepq->sync_obj != NULL;
+}
+
+static bool
+sleepq_in_use_by(const struct sleepq *sleepq, const void *sync_obj)
+{
+ return sleepq->sync_obj == sync_obj;
+}
+
+static void
+sleepq_bucket_init(struct sleepq_bucket *bucket)
+{
+ spinlock_init(&bucket->lock);
+ list_init(&bucket->list);
+}
+
+static struct sleepq_bucket *
+sleepq_bucket_get_cond(const void *sync_obj)
+{
+ uintptr_t index;
+
+ index = sleepq_hash(sync_obj) & SLEEPQ_COND_HTABLE_MASK;
+ assert(index < ARRAY_SIZE(sleepq_cond_htable));
+ return &sleepq_cond_htable[index];
+}
+
+static struct sleepq_bucket *
+sleepq_bucket_get(const void *sync_obj, bool condition)
+{
+ uintptr_t index;
+
+ if (condition) {
+ return sleepq_bucket_get_cond(sync_obj);
+ }
+
+ index = sleepq_hash(sync_obj) & SLEEPQ_HTABLE_MASK;
+ assert(index < ARRAY_SIZE(sleepq_htable));
+ return &sleepq_htable[index];
+}
+
+static void
+sleepq_bucket_add(struct sleepq_bucket *bucket, struct sleepq *sleepq)
+{
+ assert(sleepq->bucket == NULL);
+ sleepq->bucket = bucket;
+ list_insert_tail(&bucket->list, &sleepq->node);
+}
+
+static void
+sleepq_bucket_remove(struct sleepq_bucket *bucket, struct sleepq *sleepq)
+{
+ assert(sleepq->bucket == bucket);
+ sleepq->bucket = NULL;
+ list_remove(&sleepq->node);
+}
+
+static struct sleepq *
+sleepq_bucket_lookup(const struct sleepq_bucket *bucket, const void *sync_obj)
+{
+ struct sleepq *sleepq;
+
+ list_for_each_entry(&bucket->list, sleepq, node) {
+ if (sleepq_in_use_by(sleepq, sync_obj)) {
+ assert(sleepq->bucket == bucket);
+ return sleepq;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+sleepq_ctor(void *ptr)
+{
+ struct sleepq *sleepq;
+
+ sleepq = ptr;
+ sleepq->bucket = NULL;
+ sleepq->sync_obj = NULL;
+ list_init(&sleepq->waiters);
+ sleepq->next_free = NULL;
+}
+
+void __init
+sleepq_setup(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(sleepq_htable); i++) {
+ sleepq_bucket_init(&sleepq_htable[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(sleepq_cond_htable); i++) {
+ sleepq_bucket_init(&sleepq_cond_htable[i]);
+ }
+
+ kmem_cache_init(&sleepq_cache, "sleepq", sizeof(struct sleepq),
+ CPU_L1_SIZE, sleepq_ctor, 0);
+}
+
+struct sleepq *
+sleepq_create(void)
+{
+ struct sleepq *sleepq;
+
+ sleepq = kmem_cache_alloc(&sleepq_cache);
+
+ if (sleepq == NULL) {
+ return NULL;
+ }
+
+ sleepq_assert_init_state(sleepq);
+ return sleepq;
+}
+
+void
+sleepq_destroy(struct sleepq *sleepq)
+{
+ sleepq_assert_init_state(sleepq);
+ kmem_cache_free(&sleepq_cache, sleepq);
+}
+
+struct sleepq *
+sleepq_acquire(const void *sync_obj, bool condition)
+{
+ struct sleepq_bucket *bucket;
+ struct sleepq *sleepq;
+
+ assert(sync_obj != NULL);
+
+ bucket = sleepq_bucket_get(sync_obj, condition);
+
+ spinlock_lock(&bucket->lock);
+
+ sleepq = sleepq_bucket_lookup(bucket, sync_obj);
+
+ if (sleepq == NULL) {
+ spinlock_unlock(&bucket->lock);
+ return NULL;
+ }
+
+ return sleepq;
+}
+
+void
+sleepq_release(struct sleepq *sleepq)
+{
+ spinlock_unlock(&sleepq->bucket->lock);
+}
+
+static void
+sleepq_push_free(struct sleepq *sleepq, struct sleepq *free_sleepq)
+{
+ assert(free_sleepq->next_free == NULL);
+ free_sleepq->next_free = sleepq->next_free;
+ sleepq->next_free = free_sleepq;
+}
+
+static struct sleepq *
+sleepq_pop_free(struct sleepq *sleepq)
+{
+ struct sleepq *free_sleepq;
+
+ free_sleepq = sleepq->next_free;
+
+ if (free_sleepq == NULL) {
+ return NULL;
+ }
+
+ sleepq->next_free = free_sleepq->next_free;
+ free_sleepq->next_free = NULL;
+ return free_sleepq;
+}
+
+struct sleepq *
+sleepq_lend(const void *sync_obj, bool condition)
+{
+ struct sleepq_bucket *bucket;
+ struct sleepq *sleepq, *prev;
+
+ assert(sync_obj != NULL);
+
+ sleepq = thread_sleepq_lend();
+ sleepq_assert_init_state(sleepq);
+
+ bucket = sleepq_bucket_get(sync_obj, condition);
+
+ spinlock_lock(&bucket->lock);
+
+ prev = sleepq_bucket_lookup(bucket, sync_obj);
+
+ if (prev == NULL) {
+ sleepq_use(sleepq, sync_obj);
+ sleepq_bucket_add(bucket, sleepq);
+ } else {
+ sleepq_push_free(prev, sleepq);
+ sleepq = prev;
+ }
+
+ return sleepq;
+}
+
+void
+sleepq_return(struct sleepq *sleepq)
+{
+ struct sleepq_bucket *bucket;
+ struct sleepq *free_sleepq;
+
+ assert(sleepq_in_use(sleepq));
+
+ bucket = sleepq->bucket;
+ free_sleepq = sleepq_pop_free(sleepq);
+
+ if (free_sleepq == NULL) {
+ sleepq_bucket_remove(bucket, sleepq);
+ sleepq_unuse(sleepq);
+ free_sleepq = sleepq;
+ }
+
+ spinlock_unlock(&bucket->lock);
+
+ sleepq_assert_init_state(free_sleepq);
+ thread_sleepq_return(free_sleepq);
+}
+
+static void
+sleepq_add_waiter(struct sleepq *sleepq, struct sleepq_waiter *waiter)
+{
+ list_insert_head(&sleepq->waiters, &waiter->node);
+}
+
+static void
+sleepq_remove_waiter(struct sleepq *sleepq, struct sleepq_waiter *waiter)
+{
+ (void)sleepq;
+ list_remove(&waiter->node);
+}
+
+bool
+sleepq_empty(const struct sleepq *sleepq)
+{
+ return list_empty(&sleepq->waiters);
+}
+
+void
+sleepq_wait(struct sleepq *sleepq, const char *wchan)
+{
+ struct sleepq_waiter waiter;
+ struct thread *thread;
+
+ thread = thread_self();
+ sleepq_waiter_init(&waiter, thread);
+ sleepq_add_waiter(sleepq, &waiter);
+
+ thread_sleep(&sleepq->bucket->lock, sleepq->sync_obj, wchan);
+
+ sleepq_remove_waiter(sleepq, &waiter);
+}
+
+void
+sleepq_signal(struct sleepq *sleepq)
+{
+ struct sleepq_waiter *waiter;
+
+ if (sleepq_empty(sleepq)) {
+ return;
+ }
+
+ waiter = list_last_entry(&sleepq->waiters, struct sleepq_waiter, node);
+ sleepq_waiter_wakeup(waiter);
+}
diff --git a/kern/sleepq.h b/kern/sleepq.h
new file mode 100644
index 00000000..1ba4a319
--- /dev/null
+++ b/kern/sleepq.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Generic sleep queues.
+ *
+ * Sleep queues are used to build sleeping synchronization primitives
+ * such as mutexes and condition variables.
+ *
+ * Although the sleep queues are mostly generic, this implementation
+ * relies on knowing whether a synchronization object is a condition
+ * variable or not, because waiting on a condition variable unlocks
+ * the associated mutex, at which point two sleep queues are locked.
+ * Handling condition variable sleep queues slightly differently
+ * allows preventing deadlocks while keeping overall complexity low.
+ *
+ * In addition, despite being used to implement condition variables,
+ * this implementation doesn't provide a broadcast call. The rationale
+ * is to force users to implement "chained waking" in order to avoid
+ * the thundering herd effect.
+ */
+
+#ifndef _KERN_SLEEPQ_H
+#define _KERN_SLEEPQ_H
+
+#include <stdbool.h>
+
+struct sleepq;
+
+/*
+ * Initialize the sleepq module.
+ */
+void sleepq_setup(void);
+
+/*
+ * Create/destroy a sleep queue.
+ */
+struct sleepq * sleepq_create(void);
+void sleepq_destroy(struct sleepq *sleepq);
+
+/*
+ * Acquire/release a sleep queue.
+ *
+ * Acquiring a sleep queue serializes all access and disables preemption.
+ *
+ * The condition argument must be true if the synchronization object
+ * is a condition variable.
+ */
+struct sleepq * sleepq_acquire(const void *sync_obj, bool condition);
+void sleepq_release(struct sleepq *sleepq);
+
+/*
+ * Lend/return a sleep queue.
+ *
+ * A thread lends its private sleep queue to the sleepq module in
+ * order to prepare its sleep. The sleep queue obtained on lending
+ * is either the thread's queue, or an already existing queue for
+ * this synchronization object if another thread is waiting.
+ *
+ * When multiple threads are waiting on the same queue, the extra
+ * queues lent are kept in an internal free list, used when threads
+ * are awaken to return a queue to them.
+ *
+ * Note that the sleep queue returned may not be the one lent.
+ *
+ * The sleep queue obtained when lending is automatically acquired.
+ *
+ * The condition argument must be true if the synchronization object
+ * is a condition variable.
+ */
+struct sleepq * sleepq_lend(const void *sync_obj, bool condition);
+void sleepq_return(struct sleepq *sleepq);
+
+/*
+ * Return true if the given sleep queue has no waiters.
+ *
+ * The sleep queue must be acquired when calling this function.
+ */
+bool sleepq_empty(const struct sleepq *sleepq);
+
+/*
+ * Wait for a wake up on the given sleep queue.
+ *
+ * The sleep queue must be lent when calling this function. It is
+ * released and later reacquired before returning from this function.
+ *
+ * The calling thread is considered a waiter as long as it didn't
+ * reacquire the sleep queue. This means that signalling a sleep queue
+ * has no visible effect on the number of waiters until the queue is
+ * released, e.g. if a single thread is waiting and another signals
+ * the queue, the queue is not immediately considered empty.
+ *
+ * Threads are queued in FIFO order.
+ */
+void sleepq_wait(struct sleepq *sleepq, const char *wchan);
+
+/*
+ * Wake up a thread waiting on the given sleep queue, if any.
+ *
+ * The sleep queue must be acquired when calling this function.
+ *
+ * Since a sleep queue must be lent (and in turn is automatically
+ * acquired) when waiting, and acquired in order to signal it,
+ * wake-ups are serialized and cannot be missed.
+ *
+ * Threads are queued in FIFO order, which means signalling a sleep
+ * queue multiple times always awakens the same thread, regardless
+ * of new waiters, as long as that first thread didn't reacquire the
+ * sleep queue.
+ */
+void sleepq_signal(struct sleepq *sleepq);
+
+#endif /* _KERN_SLEEPQ_H */
diff --git a/kern/task.c b/kern/task.c
index afc2407a..c7caeb48 100644
--- a/kern/task.c
+++ b/kern/task.c
@@ -143,15 +143,22 @@ task_info(struct task *task)
printk("task: name: %s, threads:\n", task->name);
+ /*
+ * Don't grab any lock when accessing threads, so that the function
+ * can be used to debug in the middle of most critical sections.
+ * Threads are only destroyed after being removed from their task
+ * so holding the task lock is enough to guarantee existence.
+ */
list_for_each_entry(&task->threads, thread, task_node) {
- printk("task: " TASK_INFO_ADDR_FMT " %c %8s:" TASK_INFO_ADDR_FMT
- " %.2s:%02hu %s\n",
+ printk(TASK_INFO_ADDR_FMT " %c %8s:" TASK_INFO_ADDR_FMT
+ " %.2s:%02hu %02u %s\n",
(unsigned long)thread,
thread_state_to_chr(thread),
thread_wchan_desc(thread),
(unsigned long)thread_wchan_addr(thread),
- thread_sched_class_to_str(thread_sched_class(thread)),
- thread_priority(thread),
+ thread_sched_class_to_str(thread_user_sched_class(thread)),
+ thread_user_priority(thread),
+ thread_real_global_priority(thread),
thread->name);
}
diff --git a/kern/thread.c b/kern/thread.c
index 8d7b3a75..14070d92 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -81,6 +81,7 @@
* weights in a smoother way than a raw scaling).
*/
+#include <stdbool.h>
#include <stddef.h>
#include <string.h>
@@ -98,11 +99,13 @@
#include <kern/panic.h>
#include <kern/param.h>
#include <kern/percpu.h>
+#include <kern/sleepq.h>
#include <kern/spinlock.h>
#include <kern/sprintf.h>
#include <kern/sref.h>
#include <kern/task.h>
#include <kern/thread.h>
+#include <kern/turnstile.h>
#include <kern/work.h>
#include <machine/atomic.h>
#include <machine/cpu.h>
@@ -259,13 +262,13 @@ struct thread_runq {
* Operations of a scheduling class.
*/
struct thread_sched_ops {
- void (*init_sched)(struct thread *thread, unsigned short priority);
struct thread_runq * (*select_runq)(struct thread *thread);
void (*add)(struct thread_runq *runq, struct thread *thread);
void (*remove)(struct thread_runq *runq, struct thread *thread);
void (*put_prev)(struct thread_runq *runq, struct thread *thread);
struct thread * (*get_next)(struct thread_runq *runq);
- void (*set_priority)(struct thread *thread, unsigned short priority);
+ void (*reset_priority)(struct thread *thread, unsigned short priority);
+ void (*update_priority)(struct thread *thread, unsigned short priority);
unsigned int (*get_global_priority)(unsigned short priority);
void (*set_next)(struct thread_runq *runq, struct thread *thread);
void (*tick)(struct thread_runq *runq, struct thread *thread);
@@ -341,6 +344,13 @@ struct thread_zombie {
struct thread *thread;
};
+static unsigned char
+thread_policy_to_class(unsigned char policy)
+{
+ assert(policy < ARRAY_SIZE(thread_policy_table));
+ return thread_policy_table[policy];
+}
+
static void
thread_set_wchan(struct thread *thread, const void *wchan_addr,
const char *wchan_desc)
@@ -359,15 +369,24 @@ thread_clear_wchan(struct thread *thread)
}
static const struct thread_sched_ops *
-thread_get_sched_ops(const struct thread *thread)
+thread_get_sched_ops(unsigned char sched_class)
{
- unsigned char sched_class;
-
- sched_class = thread_sched_class(thread);
assert(sched_class < ARRAY_SIZE(thread_sched_ops));
return &thread_sched_ops[sched_class];
}
+static const struct thread_sched_ops *
+thread_get_user_sched_ops(const struct thread *thread)
+{
+ return thread_get_sched_ops(thread_user_sched_class(thread));
+}
+
+static const struct thread_sched_ops *
+thread_get_real_sched_ops(const struct thread *thread)
+{
+ return thread_get_sched_ops(thread_real_sched_class(thread));
+}
+
static void __init
thread_runq_init_rt(struct thread_runq *runq)
{
@@ -457,8 +476,9 @@ thread_runq_add(struct thread_runq *runq, struct thread *thread)
assert(!cpu_intr_enabled());
spinlock_assert_locked(&runq->lock);
+ assert(!thread->in_runq);
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
ops->add(runq, thread);
if (runq->nr_threads == 0) {
@@ -467,11 +487,13 @@ thread_runq_add(struct thread_runq *runq, struct thread *thread)
runq->nr_threads++;
- if (thread_sched_class(thread) < thread_sched_class(runq->current)) {
+ if (thread_real_sched_class(thread)
+ < thread_real_sched_class(runq->current)) {
thread_set_flag(runq->current, THREAD_YIELD);
}
thread->runq = runq;
+ thread->in_runq = true;
}
static void
@@ -481,6 +503,7 @@ thread_runq_remove(struct thread_runq *runq, struct thread *thread)
assert(!cpu_intr_enabled());
spinlock_assert_locked(&runq->lock);
+ assert(thread->in_runq);
runq->nr_threads--;
@@ -488,8 +511,10 @@ thread_runq_remove(struct thread_runq *runq, struct thread *thread)
cpumap_set_atomic(&thread_idle_runqs, thread_runq_cpu(runq));
}
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
ops->remove(runq, thread);
+
+ thread->in_runq = false;
}
static void
@@ -500,7 +525,7 @@ thread_runq_put_prev(struct thread_runq *runq, struct thread *thread)
assert(!cpu_intr_enabled());
spinlock_assert_locked(&runq->lock);
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
if (ops->put_prev != NULL) {
ops->put_prev(runq, thread);
@@ -534,7 +559,7 @@ thread_runq_set_next(struct thread_runq *runq, struct thread *thread)
{
const struct thread_sched_ops *ops;
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
if (ops->set_next != NULL) {
ops->set_next(runq, thread);
@@ -656,13 +681,6 @@ thread_runq_double_lock(struct thread_runq *a, struct thread_runq *b)
}
}
-static void
-thread_sched_rt_init_sched(struct thread *thread, unsigned short priority)
-{
- assert(priority <= THREAD_SCHED_RT_PRIO_MAX);
- thread->rt_data.time_slice = THREAD_DEFAULT_RR_TIME_SLICE;
-}
-
static struct thread_runq *
thread_sched_rt_select_runq(struct thread *thread)
{
@@ -689,15 +707,17 @@ thread_sched_rt_add(struct thread_runq *runq, struct thread *thread)
struct list *threads;
rt_runq = &runq->rt_runq;
- threads = &rt_runq->threads[thread_priority(thread)];
+ threads = &rt_runq->threads[thread_real_priority(thread)];
list_insert_tail(threads, &thread->rt_data.node);
if (list_singular(threads)) {
- rt_runq->bitmap |= (1ULL << thread_priority(thread));
+ rt_runq->bitmap |= (1ULL << thread_real_priority(thread));
}
- if ((thread_sched_class(thread) == thread_sched_class(runq->current))
- && (thread_priority(thread) > thread_priority(runq->current))) {
+ if ((thread_real_sched_class(thread)
+ == thread_real_sched_class(runq->current))
+ && (thread_real_priority(thread)
+ > thread_real_priority(runq->current))) {
thread_set_flag(runq->current, THREAD_YIELD);
}
}
@@ -709,11 +729,11 @@ thread_sched_rt_remove(struct thread_runq *runq, struct thread *thread)
struct list *threads;
rt_runq = &runq->rt_runq;
- threads = &rt_runq->threads[thread_priority(thread)];
+ threads = &rt_runq->threads[thread_real_priority(thread)];
list_remove(&thread->rt_data.node);
if (list_empty(threads)) {
- rt_runq->bitmap &= ~(1ULL << thread_priority(thread));
+ rt_runq->bitmap &= ~(1ULL << thread_real_priority(thread));
}
}
@@ -745,6 +765,13 @@ thread_sched_rt_get_next(struct thread_runq *runq)
return thread;
}
+static void
+thread_sched_rt_reset_priority(struct thread *thread, unsigned short priority)
+{
+ assert(priority <= THREAD_SCHED_RT_PRIO_MAX);
+ thread->rt_data.time_slice = THREAD_DEFAULT_RR_TIME_SLICE;
+}
+
static unsigned int
thread_sched_rt_get_global_priority(unsigned short priority)
{
@@ -762,7 +789,7 @@ thread_sched_rt_tick(struct thread_runq *runq, struct thread *thread)
{
(void)runq;
- if (thread_sched_policy(thread) != THREAD_SCHED_POLICY_RR) {
+ if (thread_real_sched_policy(thread) != THREAD_SCHED_POLICY_RR) {
return;
}
@@ -782,16 +809,6 @@ thread_sched_fs_prio2weight(unsigned short priority)
return ((priority + 1) * THREAD_FS_ROUND_SLICE_BASE);
}
-static void
-thread_sched_fs_init_sched(struct thread *thread, unsigned short priority)
-{
- assert(priority <= THREAD_SCHED_FS_PRIO_MAX);
- thread->fs_data.fs_runq = NULL;
- thread->fs_data.round = 0;
- thread->fs_data.weight = thread_sched_fs_prio2weight(priority);
- thread->fs_data.work = 0;
-}
-
static struct thread_runq *
thread_sched_fs_select_runq(struct thread *thread)
{
@@ -893,7 +910,7 @@ thread_sched_fs_enqueue(struct thread_fs_runq *fs_runq, unsigned long round,
assert(thread->fs_data.fs_runq == NULL);
assert(thread->fs_data.work <= thread->fs_data.weight);
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
group_weight = group->weight + thread->fs_data.weight;
total_weight = fs_runq->weight + thread->fs_data.weight;
node = (group->weight == 0)
@@ -969,7 +986,7 @@ thread_sched_fs_restart(struct thread_runq *runq)
assert(node != NULL);
fs_runq->current = list_entry(node, struct thread_fs_group, node);
- if (thread_sched_class(runq->current) == THREAD_SCHED_CLASS_FS) {
+ if (thread_real_sched_class(runq->current) == THREAD_SCHED_CLASS_FS) {
thread_set_flag(runq->current, THREAD_YIELD);
}
}
@@ -1005,7 +1022,7 @@ thread_sched_fs_dequeue(struct thread *thread)
assert(thread->fs_data.fs_runq != NULL);
fs_runq = thread->fs_data.fs_runq;
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
thread->fs_data.fs_runq = NULL;
list_remove(&thread->fs_data.runq_node);
@@ -1080,7 +1097,7 @@ thread_sched_fs_put_prev(struct thread_runq *runq, struct thread *thread)
struct thread_fs_group *group;
fs_runq = runq->fs_runq_active;
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
list_insert_tail(&group->threads, &thread->fs_data.group_node);
if (thread->fs_data.work >= thread->fs_data.weight) {
@@ -1148,8 +1165,19 @@ thread_sched_fs_get_next(struct thread_runq *runq)
}
static void
-thread_sched_fs_set_priority(struct thread *thread, unsigned short priority)
+thread_sched_fs_reset_priority(struct thread *thread, unsigned short priority)
{
+ assert(priority <= THREAD_SCHED_FS_PRIO_MAX);
+ thread->fs_data.fs_runq = NULL;
+ thread->fs_data.round = 0;
+ thread->fs_data.weight = thread_sched_fs_prio2weight(priority);
+ thread->fs_data.work = 0;
+}
+
+static void
+thread_sched_fs_update_priority(struct thread *thread, unsigned short priority)
+{
+ assert(priority <= THREAD_SCHED_FS_PRIO_MAX);
thread->fs_data.weight = thread_sched_fs_prio2weight(priority);
if (thread->fs_data.work >= thread->fs_data.weight) {
@@ -1180,7 +1208,7 @@ thread_sched_fs_tick(struct thread_runq *runq, struct thread *thread)
fs_runq = runq->fs_runq_active;
fs_runq->work++;
- group = &fs_runq->group_array[thread_priority(thread)];
+ group = &fs_runq->group_array[thread_real_priority(thread)];
group->work++;
thread_set_flag(thread, THREAD_YIELD);
thread->fs_data.work++;
@@ -1231,7 +1259,8 @@ thread_sched_fs_balance_eligible(struct thread_runq *runq,
if ((nr_threads == 0)
|| ((nr_threads == 1)
- && (thread_sched_class(runq->current) == THREAD_SCHED_CLASS_FS))) {
+ && (thread_real_sched_class(runq->current)
+ == THREAD_SCHED_CLASS_FS))) {
return 0;
}
@@ -1521,39 +1550,40 @@ thread_sched_idle_get_global_priority(unsigned short priority)
return THREAD_SCHED_GLOBAL_PRIO_IDLE;
}
-static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES] = {
+static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES]
+ = {
[THREAD_SCHED_CLASS_RT] = {
- .init_sched = thread_sched_rt_init_sched,
.select_runq = thread_sched_rt_select_runq,
.add = thread_sched_rt_add,
.remove = thread_sched_rt_remove,
.put_prev = thread_sched_rt_put_prev,
.get_next = thread_sched_rt_get_next,
- .set_priority = NULL,
+ .reset_priority = thread_sched_rt_reset_priority,
+ .update_priority = NULL,
.get_global_priority = thread_sched_rt_get_global_priority,
.set_next = thread_sched_rt_set_next,
.tick = thread_sched_rt_tick,
},
[THREAD_SCHED_CLASS_FS] = {
- .init_sched = thread_sched_fs_init_sched,
.select_runq = thread_sched_fs_select_runq,
.add = thread_sched_fs_add,
.remove = thread_sched_fs_remove,
.put_prev = thread_sched_fs_put_prev,
.get_next = thread_sched_fs_get_next,
- .set_priority = thread_sched_fs_set_priority,
+ .reset_priority = thread_sched_fs_reset_priority,
+ .update_priority = thread_sched_fs_update_priority,
.get_global_priority = thread_sched_fs_get_global_priority,
.set_next = thread_sched_fs_set_next,
.tick = thread_sched_fs_tick,
},
[THREAD_SCHED_CLASS_IDLE] = {
- .init_sched = NULL,
.select_runq = thread_sched_idle_select_runq,
.add = thread_sched_idle_add,
.remove = thread_sched_idle_remove,
.put_prev = NULL,
.get_next = thread_sched_idle_get_next,
- .set_priority = NULL,
+ .reset_priority = NULL,
+ .update_priority = NULL,
.get_global_priority = thread_sched_idle_get_global_priority,
.set_next = NULL,
.tick = NULL,
@@ -1561,30 +1591,95 @@ static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES] =
};
static void
-thread_set_sched_policy(struct thread *thread, unsigned char sched_policy)
+thread_set_user_sched_policy(struct thread *thread, unsigned char sched_policy)
+{
+ thread->user_sched_data.sched_policy = sched_policy;
+}
+
+static void
+thread_set_user_sched_class(struct thread *thread, unsigned char sched_class)
+{
+ thread->user_sched_data.sched_class = sched_class;
+}
+
+static void
+thread_set_user_priority(struct thread *thread, unsigned short priority)
+{
+ const struct thread_sched_ops *ops;
+
+ ops = thread_get_user_sched_ops(thread);
+
+ thread->user_sched_data.priority = priority;
+ thread->user_sched_data.global_priority
+ = ops->get_global_priority(priority);
+}
+
+static void
+thread_update_user_priority(struct thread *thread, unsigned short priority)
+{
+ thread_set_user_priority(thread, priority);
+}
+
+static void
+thread_set_real_sched_policy(struct thread *thread, unsigned char sched_policy)
+{
+ thread->real_sched_data.sched_policy = sched_policy;
+}
+
+static void
+thread_set_real_sched_class(struct thread *thread, unsigned char sched_class)
{
- thread->sched_data.sched_policy = sched_policy;
+ thread->real_sched_data.sched_class = sched_class;
}
static void
-thread_set_sched_class(struct thread *thread, unsigned char sched_class)
+thread_set_real_priority(struct thread *thread, unsigned short priority)
{
- thread->sched_data.sched_class = sched_class;
+ const struct thread_sched_ops *ops;
+
+ ops = thread_get_real_sched_ops(thread);
+
+ thread->real_sched_data.priority = priority;
+ thread->real_sched_data.global_priority
+ = ops->get_global_priority(priority);
+
+ if (ops->reset_priority != NULL) {
+ ops->reset_priority(thread, priority);
+ }
}
static void
-thread_set_priority(struct thread *thread, unsigned short priority)
+thread_update_real_priority(struct thread *thread, unsigned short priority)
{
const struct thread_sched_ops *ops;
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
+
+ thread->real_sched_data.priority = priority;
+ thread->real_sched_data.global_priority
+ = ops->get_global_priority(priority);
- if (ops->set_priority != NULL) {
- ops->set_priority(thread, priority);
+ if (ops->update_priority != NULL) {
+ ops->update_priority(thread, priority);
}
+}
- thread->sched_data.priority = priority;
- thread->sched_data.global_priority = ops->get_global_priority(priority);
+static void
+thread_reset_real_priority(struct thread *thread)
+{
+ const struct thread_sched_ops *ops;
+ struct thread_sched_data *user, *real;
+
+ user = &thread->user_sched_data;
+ real = &thread->real_sched_data;
+ *real = *user;
+ thread->boosted = false;
+
+ ops = thread_get_user_sched_ops(thread);
+
+ if (ops->reset_priority != NULL) {
+ ops->reset_priority(thread, real->priority);
+ }
}
static void __init
@@ -1600,9 +1695,10 @@ thread_bootstrap_common(unsigned int cpu)
booter->flags = 0;
booter->preempt = 1;
cpumap_fill(&booter->cpumap);
- thread_set_sched_policy(booter, THREAD_SCHED_POLICY_IDLE);
- thread_set_sched_class(booter, THREAD_SCHED_CLASS_IDLE);
- thread_set_priority(booter, 0);
+ thread_set_user_sched_policy(booter, THREAD_SCHED_POLICY_IDLE);
+ thread_set_user_sched_class(booter, THREAD_SCHED_CLASS_IDLE);
+ thread_set_user_priority(booter, 0);
+ thread_reset_real_priority(booter);
memset(booter->tsd, 0, sizeof(booter->tsd));
booter->task = kernel_task;
thread_runq_init(percpu_ptr(thread_runq, cpu), cpu, booter);
@@ -1616,8 +1712,8 @@ thread_bootstrap(void)
thread_fs_highest_round = THREAD_FS_INITIAL_ROUND;
- thread_bootstrap_common(0);
tcb_set_current(&thread_booters[0].tcb);
+ thread_bootstrap_common(0);
}
void __init
@@ -1673,23 +1769,9 @@ thread_destroy_tsd(struct thread *thread)
}
}
-static void
-thread_init_sched(struct thread *thread, unsigned short priority)
-{
- const struct thread_sched_ops *ops;
-
- ops = thread_get_sched_ops(thread);
-
- if (ops->init_sched != NULL) {
- ops->init_sched(thread, priority);
- }
-
- thread->sched_data.priority = priority;
- thread->sched_data.global_priority = ops->get_global_priority(priority);
-}
-
static int
-thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
+thread_init(struct thread *thread, void *stack,
+ const struct thread_attr *attr,
void (*fn)(void *), void *arg)
{
struct thread *caller;
@@ -1706,15 +1788,34 @@ thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
thread->nr_refs = 1;
thread->flags = 0;
thread->runq = NULL;
+ thread->in_runq = false;
thread_set_wchan(thread, thread, "init");
thread->state = THREAD_SLEEPING;
+ thread->priv_sleepq = sleepq_create();
+
+ if (thread->priv_sleepq == NULL) {
+ error = ERROR_NOMEM;
+ goto error_sleepq;
+ }
+
+ thread->priv_turnstile = turnstile_create();
+
+ if (thread->priv_turnstile == NULL) {
+ error = ERROR_NOMEM;
+ goto error_turnstile;
+ }
+
+ turnstile_td_init(&thread->turnstile_td);
+ thread->last_cond = NULL;
+ thread->propagate_priority = false;
thread->preempt = THREAD_SUSPEND_PREEMPT_LEVEL;
thread->pinned = 0;
thread->llsync_read = 0;
cpumap_copy(&thread->cpumap, cpumap);
- thread_set_sched_policy(thread, attr->policy);
- thread_set_sched_class(thread, thread_policy_table[attr->policy]);
- thread_init_sched(thread, attr->priority);
+ thread_set_user_sched_policy(thread, attr->policy);
+ thread_set_user_sched_class(thread, thread_policy_to_class(attr->policy));
+ thread_set_user_priority(thread, attr->priority);
+ thread_reset_real_priority(thread);
memset(thread->tsd, 0, sizeof(thread->tsd));
mutex_init(&thread->join_lock);
condition_init(&thread->join_cond);
@@ -1732,15 +1833,19 @@ thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
error = tcb_init(&thread->tcb, stack, thread_main);
if (error) {
- goto error_tsd;
+ goto error_tcb;
}
task_add_thread(task, thread);
return 0;
-error_tsd:
+error_tcb:
thread_destroy_tsd(thread);
+ turnstile_destroy(thread->priv_turnstile);
+error_turnstile:
+ sleepq_destroy(thread->priv_sleepq);
+error_sleepq:
return error;
}
@@ -1782,8 +1887,12 @@ thread_destroy(struct thread *thread)
thread_unlock_runq(runq, flags);
} while (state != THREAD_DEAD);
- thread_destroy_tsd(thread);
+ /* See task_info() */
task_remove_thread(thread->task, thread);
+
+ thread_destroy_tsd(thread);
+ turnstile_destroy(thread->priv_turnstile);
+ sleepq_destroy(thread->priv_sleepq);
kmem_cache_free(&thread_stack_cache, thread->stack);
kmem_cache_free(&thread_cache, thread);
}
@@ -2208,7 +2317,7 @@ thread_wakeup(struct thread *thread)
thread->state = THREAD_RUNNING;
} else {
/*
- * If another wakeup was attempted right before this one, the thread
+ * If another wake-up was attempted right before this one, the thread
* may currently be pushed on a remote run queue, and the run queue
* being locked here is actually the previous one. The run queue
* pointer may be modified concurrently, now being protected by the
@@ -2233,7 +2342,7 @@ thread_wakeup(struct thread *thread)
cpu_intr_save(&flags);
if (!thread->pinned) {
- runq = thread_sched_ops[thread_sched_class(thread)].select_runq(thread);
+ runq = thread_get_real_sched_ops(thread)->select_runq(thread);
} else {
runq = thread->runq;
spinlock_lock(&runq->lock);
@@ -2325,7 +2434,7 @@ thread_tick_intr(void)
thread_balance_idle_tick(runq);
}
- ops = thread_get_sched_ops(thread);
+ ops = thread_get_real_sched_ops(thread);
if (ops->tick != NULL) {
ops->tick(runq, thread);
@@ -2334,6 +2443,7 @@ thread_tick_intr(void)
spinlock_unlock(&runq->lock);
}
+/* TODO Move outside */
char
thread_state_to_chr(const struct thread *thread)
{
@@ -2349,6 +2459,7 @@ thread_state_to_chr(const struct thread *thread)
}
}
+/* TODO Move outside */
const char *
thread_sched_class_to_str(unsigned char sched_class)
{
@@ -2369,17 +2480,23 @@ thread_setscheduler(struct thread *thread, unsigned char policy,
unsigned short priority)
{
struct thread_runq *runq;
+ struct turnstile_td *td;
unsigned long flags;
- bool current;
+ bool requeue, current, update;
+
+ td = thread_turnstile_td(thread);
+ turnstile_td_lock(td);
runq = thread_lock_runq(thread, &flags);
- if ((thread_sched_policy(thread) == policy)
- && (thread_priority(thread) == priority)) {
+ if ((thread_user_sched_policy(thread) == policy)
+ && (thread_user_priority(thread) == priority)) {
goto out;
}
- if (thread->state != THREAD_RUNNING) {
+ requeue = thread->in_runq;
+
+ if (!requeue) {
current = false;
} else {
if (thread != runq->current) {
@@ -2392,16 +2509,32 @@ thread_setscheduler(struct thread *thread, unsigned char policy,
thread_runq_remove(runq, thread);
}
- if (thread_sched_policy(thread) == policy) {
- thread_set_priority(thread, priority);
+ if (thread_user_sched_policy(thread) == policy) {
+ thread_update_user_priority(thread, priority);
+ update = true;
} else {
- thread_set_sched_policy(thread, policy);
- assert(policy < ARRAY_SIZE(thread_policy_table));
- thread_set_sched_class(thread, thread_policy_table[policy]);
- thread_init_sched(thread, priority);
+ thread_set_user_sched_policy(thread, policy);
+ thread_set_user_sched_class(thread, thread_policy_to_class(policy));
+ thread_set_user_priority(thread, priority);
+ update = false;
}
- if (thread->state == THREAD_RUNNING) {
+ if (thread->boosted) {
+ if (thread_user_global_priority(thread)
+ >= thread_real_global_priority(thread)) {
+ thread_reset_real_priority(thread);
+ }
+ } else {
+ if (update) {
+ thread_update_real_priority(thread, priority);
+ } else {
+ thread_set_real_sched_policy(thread, policy);
+ thread_set_real_sched_class(thread, thread_policy_to_class(policy));
+ thread_set_real_priority(thread, priority);
+ }
+ }
+
+ if (requeue) {
thread_runq_add(runq, thread);
if (current) {
@@ -2411,6 +2544,96 @@ thread_setscheduler(struct thread *thread, unsigned char policy,
out:
thread_unlock_runq(runq, flags);
+ turnstile_td_unlock(td);
+
+ turnstile_td_propagate_priority(td);
+}
+
+void
+thread_pi_setscheduler(struct thread *thread, unsigned char policy,
+ unsigned short priority)
+{
+ const struct thread_sched_ops *ops;
+ struct thread_runq *runq;
+ struct turnstile_td *td;
+ unsigned int global_priority;
+ unsigned long flags;
+ bool requeue, current;
+
+ td = thread_turnstile_td(thread);
+ turnstile_td_assert_lock(td);
+
+ ops = thread_get_sched_ops(thread_policy_to_class(policy));
+ global_priority = ops->get_global_priority(priority);
+
+ runq = thread_lock_runq(thread, &flags);
+
+ if ((thread_real_sched_policy(thread) == policy)
+ && (thread_real_priority(thread) == priority)) {
+ goto out;
+ }
+
+ requeue = thread->in_runq;
+
+ if (!requeue) {
+ current = false;
+ } else {
+ if (thread != runq->current) {
+ current = false;
+ } else {
+ thread_runq_put_prev(runq, thread);
+ current = true;
+ }
+
+ thread_runq_remove(runq, thread);
+ }
+
+ if (global_priority <= thread_user_global_priority(thread)) {
+ thread_reset_real_priority(thread);
+ } else {
+ if (thread_real_sched_policy(thread) == policy) {
+ thread_update_real_priority(thread, priority);
+ } else {
+ thread_set_real_sched_policy(thread, policy);
+ thread_set_real_sched_class(thread, thread_policy_to_class(policy));
+ thread_set_real_priority(thread, priority);
+ }
+
+ thread->boosted = true;
+ }
+
+ if (requeue) {
+ thread_runq_add(runq, thread);
+
+ if (current) {
+ thread_runq_set_next(runq, thread);
+ }
+ }
+
+out:
+ thread_unlock_runq(runq, flags);
+}
+
+void
+thread_propagate_priority(void)
+{
+ struct thread *thread;
+
+ /*
+ * Although it's possible to propagate priority with preemption
+ * disabled, the operation can be too expensive to allow it.
+ */
+ if (!thread_preempt_enabled()) {
+ thread_set_priority_propagation_needed();
+ return;
+ }
+
+ thread = thread_self();
+
+ /* Clear before propagation to avoid infinite recursion */
+ thread->propagate_priority = false;
+
+ turnstile_td_propagate_priority(thread_turnstile_td(thread));
}
void
diff --git a/kern/thread.h b/kern/thread.h
index 21be2ab7..60c330b9 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -33,30 +33,24 @@
#ifndef _KERN_THREAD_H
#define _KERN_THREAD_H
+#include <stdbool.h>
+#include <stddef.h>
+
#include <kern/assert.h>
+#include <kern/condition.h>
#include <kern/cpumap.h>
#include <kern/macros.h>
+#include <kern/spinlock_types.h>
+#include <kern/turnstile_types.h>
#include <machine/atomic.h>
#include <machine/tcb.h>
/*
- * Forward declaration
- */
-struct spinlock;
-
-/*
* Thread structure.
*/
struct thread;
/*
- * Thread name buffer size.
- */
-#define THREAD_NAME_SIZE 32
-
-/*
- * Common scheduling data.
- *
* The global priority of a thread is meant to be compared against
* another global priority to determine which thread has higher priority.
*/
@@ -67,6 +61,11 @@ struct thread_sched_data {
unsigned int global_priority;
};
+/*
+ * Thread name buffer size.
+ */
+#define THREAD_NAME_SIZE 32
+
#include <kern/thread_i.h>
#define THREAD_KERNEL_PREFIX PACKAGE "_"
@@ -199,8 +198,9 @@ void thread_join(struct thread *thread);
* Make the current thread sleep while waiting for an event.
*
* The interlock is used to synchronize the thread state with respect to
- * wakeups, i.e. a wakeup request sent by another thread will not be missed
+ * wake-ups, i.e. a wake-up request sent by another thread cannot be missed
* if that thread is holding the interlock.
+ *
* As a special exception, threads that use preemption as a synchronization
* mechanism can ommit the interlock and pass a NULL pointer instead.
* In any case, the preemption nesting level must strictly be one when calling
@@ -210,9 +210,6 @@ void thread_join(struct thread *thread);
* address should refer to a relevant synchronization object, normally
* containing the interlock, but not necessarily.
*
- * This is a low level thread control primitive that should only be called by
- * higher thread synchronization functions.
- *
* Implies a memory barrier.
*/
void thread_sleep(struct spinlock *interlock, const void *wchan_addr,
@@ -222,9 +219,6 @@ void thread_sleep(struct spinlock *interlock, const void *wchan_addr,
* Schedule a thread for execution on a processor.
*
* No action is performed if the target thread is already in the running state.
- *
- * This is a low level thread control primitive that should only be called by
- * higher thread synchronization functions.
*/
void thread_wakeup(struct thread *thread);
@@ -261,6 +255,15 @@ void thread_tick_intr(void);
void thread_setscheduler(struct thread *thread, unsigned char policy,
unsigned short priority);
+/*
+ * Variant used for priority inheritance.
+ *
+ * The caller must hold the turnstile thread data lock and no turnstile
+ * locks when calling this function.
+ */
+void thread_pi_setscheduler(struct thread *thread, unsigned char policy,
+ unsigned short priority);
+
static inline void
thread_ref(struct thread *thread)
{
@@ -301,33 +304,72 @@ thread_wchan_desc(const struct thread *thread)
char thread_state_to_chr(const struct thread *thread);
static inline const struct thread_sched_data *
-thread_get_sched_data(const struct thread *thread)
+thread_get_user_sched_data(const struct thread *thread)
{
- return &thread->sched_data;
+ return &thread->user_sched_data;
}
+static inline const struct thread_sched_data *
+thread_get_real_sched_data(const struct thread *thread)
+{
+ return &thread->real_sched_data;
+}
+
+/*
+ * If the caller requires the scheduling data to be stable, it
+ * must lock one of the following objects :
+ * - the containing run queue
+ * - the per-thread turnstile data (turnstile_td)
+ *
+ * Both are locked when scheduling data are updated.
+ */
+
static inline unsigned char
-thread_sched_policy(const struct thread *thread)
+thread_user_sched_policy(const struct thread *thread)
{
- return thread_get_sched_data(thread)->sched_policy;
+ return thread_get_user_sched_data(thread)->sched_policy;
}
static inline unsigned char
-thread_sched_class(const struct thread *thread)
+thread_user_sched_class(const struct thread *thread)
{
- return thread_get_sched_data(thread)->sched_class;
+ return thread_get_user_sched_data(thread)->sched_class;
}
static inline unsigned short
-thread_priority(const struct thread *thread)
+thread_user_priority(const struct thread *thread)
{
- return thread_get_sched_data(thread)->priority;
+ return thread_get_user_sched_data(thread)->priority;
}
static inline unsigned int
-thread_global_priority(const struct thread *thread)
+thread_user_global_priority(const struct thread *thread)
+{
+ return thread_get_user_sched_data(thread)->global_priority;
+}
+
+static inline unsigned char
+thread_real_sched_policy(const struct thread *thread)
+{
+ return thread_get_real_sched_data(thread)->sched_policy;
+}
+
+static inline unsigned char
+thread_real_sched_class(const struct thread *thread)
+{
+ return thread_get_real_sched_data(thread)->sched_class;
+}
+
+static inline unsigned short
+thread_real_priority(const struct thread *thread)
{
- return thread_get_sched_data(thread)->global_priority;
+ return thread_get_real_sched_data(thread)->priority;
+}
+
+static inline unsigned int
+thread_real_global_priority(const struct thread *thread)
+{
+ return thread_get_real_sched_data(thread)->global_priority;
}
/*
@@ -367,6 +409,118 @@ thread_schedule(void)
}
/*
+ * Sleep queue lending functions.
+ */
+
+static inline struct sleepq *
+thread_sleepq_lend(void)
+{
+ struct sleepq *sleepq;
+
+ sleepq = thread_self()->priv_sleepq;
+ assert(sleepq != NULL);
+ thread_self()->priv_sleepq = NULL;
+ return sleepq;
+}
+
+static inline void
+thread_sleepq_return(struct sleepq *sleepq)
+{
+ assert(sleepq != NULL);
+ assert(thread_self()->priv_sleepq == NULL);
+ thread_self()->priv_sleepq = sleepq;
+}
+
+/*
+ * Condition variable related functions.
+ */
+
+static inline void
+thread_set_last_cond(struct condition *last_cond)
+{
+ struct thread *thread;
+
+ thread = thread_self();
+ assert(thread->last_cond == NULL);
+ thread->last_cond = last_cond;
+}
+
+static inline struct condition *
+thread_pull_last_cond(void)
+{
+ struct condition *last_cond;
+ struct thread *thread;
+
+ thread = thread_self();
+ last_cond = thread->last_cond;
+
+ if (last_cond != NULL) {
+ thread->last_cond = NULL;
+ }
+
+ return last_cond;
+}
+
+static inline void
+thread_wakeup_last_cond(void)
+{
+ struct condition *last_cond;
+
+ last_cond = thread_pull_last_cond();
+
+ if (last_cond != NULL) {
+ condition_wakeup(last_cond);
+ }
+}
+
+/*
+ * Turnstile lending functions.
+ */
+
+static inline struct turnstile *
+thread_turnstile_lend(void)
+{
+ struct turnstile *turnstile;
+
+ turnstile = thread_self()->priv_turnstile;
+ assert(turnstile != NULL);
+ thread_self()->priv_turnstile = NULL;
+ return turnstile;
+}
+
+static inline void
+thread_turnstile_return(struct turnstile *turnstile)
+{
+ assert(turnstile != NULL);
+ assert(thread_self()->priv_turnstile == NULL);
+ thread_self()->priv_turnstile = turnstile;
+}
+
+static inline struct turnstile_td *
+thread_turnstile_td(struct thread *thread)
+{
+ return &thread->turnstile_td;
+}
+
+/*
+ * Priority propagation functions.
+ */
+
+static inline bool
+thread_priority_propagation_needed(void)
+{
+ return thread_self()->propagate_priority;
+}
+
+static inline void
+thread_set_priority_propagation_needed(void)
+{
+ thread_self()->propagate_priority = true;
+}
+
+void thread_propagate_priority(void);
+
+/*
* Migration control functions.
*
* Functions that change the migration state are implicit compiler barriers.
@@ -421,6 +575,10 @@ thread_preempt_enable_no_resched(void)
thread = thread_self();
assert(thread->preempt != 0);
thread->preempt--;
+
+ if (thread_preempt_enabled() && thread_priority_propagation_needed()) {
+ thread_propagate_priority();
+ }
}
static inline void
diff --git a/kern/thread_i.h b/kern/thread_i.h
index a062773d..00d40ca6 100644
--- a/kern/thread_i.h
+++ b/kern/thread_i.h
@@ -18,15 +18,23 @@
#ifndef _KERN_THREAD_I_H
#define _KERN_THREAD_I_H
+#include <stdbool.h>
+
#include <kern/condition_types.h>
#include <kern/cpumap.h>
#include <kern/list_types.h>
#include <kern/macros.h>
#include <kern/mutex_types.h>
#include <kern/param.h>
+#include <kern/turnstile_types.h>
#include <machine/atomic.h>
#include <machine/tcb.h>
+/*
+ * Forward declarations.
+ */
+struct sleepq;
+
struct thread_runq;
struct thread_fs_runq;
@@ -86,12 +94,38 @@ struct thread {
/* Flags must be changed atomically */
unsigned long flags;
- /* Sleep/wakeup synchronization members */
+ /* Sleep/wake-up synchronization members */
struct thread_runq *runq;
+ bool in_runq;
const void *wchan_addr;
const char *wchan_desc;
unsigned short state;
+ /* Sleep queue available for lending */
+ struct sleepq *priv_sleepq;
+
+ /* Turnstile available for lending */
+ struct turnstile *priv_turnstile;
+
+ /*
+ * When a thread wakes up after waiting for a condition variable,
+ * it sets this variable so that other waiters are only awaken
+ * after the associated mutex has actually been released.
+ *
+ * This member is thread-local.
+ */
+ struct condition *last_cond;
+
+ /* Per-thread turnstile data */
+ struct turnstile_td turnstile_td;
+
+ /*
+ * True if priority must be propagated when preemption is reenabled
+ *
+ * This member is thread-local.
+ */
+ bool propagate_priority;
+
/* Thread-local members */
unsigned short preempt;
unsigned short pinned;
@@ -100,8 +134,21 @@ struct thread {
/* Processors on which this thread is allowed to run */
struct cpumap cpumap;
- /* Scheduling data */
- struct thread_sched_data sched_data;
+ /*
+ * Scheduling data.
+ */
+ struct thread_sched_data user_sched_data; /* User-provided */
+ struct thread_sched_data real_sched_data; /* Computed from
+ priority propagation */
+
+ /*
+ * True if the real scheduling data are not the user scheduling data.
+ *
+ * Note that it doesn't provide any information about priority inheritance.
+ * A thread may be part of a priority inheritance chain without its
+ * priority being boosted.
+ */
+ bool boosted;
/* Class specific scheduling data */
union {
diff --git a/kern/turnstile.c b/kern/turnstile.c
new file mode 100644
index 00000000..43629376
--- /dev/null
+++ b/kern/turnstile.c
@@ -0,0 +1,788 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation is based on "Solaris(tm) Internals: Solaris 10 and
+ * OpenSolaris Kernel Architecture, Second Edition" by Richard McDougall
+ * and Jim Mauro. See "Part Six: Platform Specifics, Chapter 17: Locking
+ * and Synchronization, Section 7 Turnstiles and Priority Inheritance".
+ *
+ * The differences are outlined below.
+ *
+ * This implementation doesn't support read/write locks, only mutual
+ * exclusion, because ownership doesn't apply well to read/write locks.
+ *
+ * Instead of using an external sleep queue object, this module implements
+ * that functionality itself. The reasons behind this decision are :
+ * - the use of expensive priority lists used to queue threads, that
+ * a simpler sleep queue implementation shouldn't use
+ * - increased coupling with the scheduler
+ *
+ * Locking order : bucket (turnstile) -> turnstile_td -> thread_runq
+ *
+ * This order allows changing the owner of a turnstile without unlocking it
+ * which is important because a turnstile is normally used to synchronize
+ * access to the owner. Unlocking a turnstile would allow the owner to
+ * change and make complex transient states visible. The drawback is that
+ * a thread cannot be requeued atomically when its priority is changed.
+ * That deferred requeue is done during priority propagation.
+ *
+ * TODO Analyse hash parameters.
+ */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <kern/assert.h>
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/list.h>
+#include <kern/macros.h>
+#include <kern/param.h>
+#include <kern/plist.h>
+#include <kern/spinlock.h>
+#include <kern/thread.h>
+#include <kern/turnstile.h>
+#include <kern/turnstile_types.h>
+
+/*
+ * Locking keys :
+ * (b) bucket
+ */
+struct turnstile_bucket {
+ struct spinlock lock;
+ struct list list; /* (b) */
+} __aligned(CPU_L1_SIZE);
+
+/*
+ * Adding/removing waiters to/from a turnstile are performed while
+ * holding both the waiter's thread data and the turnstile locks.
+ *
+ * Changing the owner of a turnstile and linking/unlinking the turnstile
+ * into/from the owner's list of owned turnstiles are done atomically,
+ * while holding both the owner's thread data and the turnstile locks.
+ *
+ * Locking keys :
+ * (b) bucket
+ * (t) turnstile_td
+ *
+ * (*) A turnstile is referenced in thread data after/before being
+ * added/removed to/from its bucket. Referencing a turnstile in
+ * thread data requires holding the thread data lock. This implies
+ * that, while holding the thread data lock, if the referenced
+ * turnstile isn't NULL, the bucket pointer is also not NULL and
+ * stable.
+ */
+struct turnstile {
+ struct turnstile_bucket *bucket; /* (b*) */
+ struct list node; /* (b) */
+ const void *sync_obj; /* (b) */
+ struct plist waiters; /* (b,t) */
+ struct turnstile *next_free; /* (b) */
+ struct turnstile_waiter *top_waiter; /* (b,t) */
+ struct thread *owner; /* (b,t) */
+ struct plist_node td_node; /* (t) */
+};
+
+/*
+ * Locking keys :
+ * (b) bucket
+ * (t) turnstile_td
+ */
+struct turnstile_waiter {
+ struct plist_node node; /* (b,t) */
+ struct thread *thread; /* (b,t) */
+ bool awaken; /* (b) */
+};
+
+#define TURNSTILE_HTABLE_SIZE 128
+
+#if !ISP2(TURNSTILE_HTABLE_SIZE)
+#error hash table size must be a power of two
+#endif /* !ISP2(TURNSTILE_HTABLE_SIZE) */
+
+#define TURNSTILE_HTABLE_MASK (TURNSTILE_HTABLE_SIZE - 1)
+
+static struct turnstile_bucket turnstile_htable[TURNSTILE_HTABLE_SIZE];
+
+static struct kmem_cache turnstile_cache;
+
+static uintptr_t
+turnstile_hash(const void *addr)
+{
+ return ((uintptr_t)addr >> 8) ^ (uintptr_t)addr;
+}
+
+static void
+turnstile_waiter_init(struct turnstile_waiter *waiter, struct thread *thread)
+{
+ plist_node_init(&waiter->node, thread_real_global_priority(thread));
+ waiter->thread = thread;
+ waiter->awaken = false;
+}
+
+static unsigned int
+turnstile_waiter_priority(const struct turnstile_waiter *waiter)
+{
+ return plist_node_priority(&waiter->node);
+}
+
+static bool
+turnstile_waiter_awaken(const struct turnstile_waiter *waiter)
+{
+ return waiter->awaken;
+}
+
+static void
+turnstile_waiter_set_awaken(struct turnstile_waiter *waiter)
+{
+ waiter->awaken = true;
+}
+
+static void
+turnstile_waiter_clear_awaken(struct turnstile_waiter *waiter)
+{
+ waiter->awaken = false;
+}
+
+static void
+turnstile_waiter_wakeup(struct turnstile_waiter *waiter)
+{
+ if (turnstile_waiter_awaken(waiter)) {
+ return;
+ }
+
+ thread_wakeup(waiter->thread);
+ turnstile_waiter_set_awaken(waiter);
+}
+
+static void
+turnstile_update_top_waiter(struct turnstile *turnstile)
+{
+ if (turnstile_empty(turnstile)) {
+ turnstile->top_waiter = NULL;
+ return;
+ }
+
+ turnstile->top_waiter = plist_last_entry(&turnstile->waiters,
+ struct turnstile_waiter, node);
+}
+
+static void
+turnstile_add_waiter(struct turnstile *turnstile,
+ struct turnstile_waiter *waiter)
+{
+ assert(!turnstile_waiter_awaken(waiter));
+ plist_add(&turnstile->waiters, &waiter->node);
+ turnstile_update_top_waiter(turnstile);
+}
+
+static void
+turnstile_remove_waiter(struct turnstile *turnstile,
+ struct turnstile_waiter *waiter)
+{
+ assert(turnstile_waiter_awaken(waiter));
+ plist_remove(&turnstile->waiters, &waiter->node);
+ turnstile_update_top_waiter(turnstile);
+}
+
+static void
+turnstile_waiter_requeue(struct turnstile *turnstile,
+ struct turnstile_waiter *waiter)
+{
+ unsigned int global_priority;
+
+ global_priority = thread_real_global_priority(waiter->thread);
+ assert(global_priority != plist_node_priority(&waiter->node));
+
+ plist_remove(&turnstile->waiters, &waiter->node);
+ plist_node_set_priority(&waiter->node, global_priority);
+ plist_add(&turnstile->waiters, &waiter->node);
+ turnstile_update_top_waiter(turnstile);
+}
+
+static void
+turnstile_td_set_waiter(struct turnstile_td *td,
+ struct turnstile_waiter *waiter)
+{
+ td->waiter = waiter;
+}
+
+static struct turnstile_waiter *
+turnstile_td_get_waiter(const struct turnstile_td *td)
+{
+ return td->waiter;
+}
+
+static void
+turnstile_td_update_top_priority(struct turnstile_td *td)
+{
+ struct turnstile_waiter *top_waiter;
+ struct turnstile *top_turnstile;
+
+ if (plist_empty(&td->owned_turnstiles)) {
+ td->top_global_priority = 0;
+ return;
+ }
+
+ top_turnstile = plist_last_entry(&td->owned_turnstiles,
+ struct turnstile, td_node);
+ top_waiter = top_turnstile->top_waiter;
+
+ if (top_waiter == NULL) {
+ td->top_global_priority = 0;
+ } else {
+ td->top_global_priority = turnstile_waiter_priority(top_waiter);
+ td->top_sched_policy = thread_real_sched_policy(top_waiter->thread);
+ td->top_priority = thread_real_priority(top_waiter->thread);
+ }
+}
+
+static void
+turnstile_td_own(struct turnstile_td *td, struct turnstile *turnstile)
+{
+ struct turnstile_waiter *top_waiter;
+ unsigned int top_priority;
+
+ assert(turnstile->owner == NULL);
+
+ top_waiter = turnstile->top_waiter;
+ assert(top_waiter != NULL);
+ top_priority = thread_real_global_priority(top_waiter->thread);
+ plist_node_init(&turnstile->td_node, top_priority);
+ plist_add(&td->owned_turnstiles, &turnstile->td_node);
+ turnstile_td_update_top_priority(td);
+
+ turnstile->owner = structof(td, struct thread, turnstile_td);
+}
+
+static void
+turnstile_td_disown(struct turnstile_td *td, struct turnstile *turnstile)
+{
+ assert(turnstile->owner == structof(td, struct thread, turnstile_td));
+
+ assert(!plist_node_unlinked(&turnstile->td_node));
+ plist_remove(&td->owned_turnstiles, &turnstile->td_node);
+ turnstile_td_update_top_priority(td);
+
+ turnstile->owner = NULL;
+}
+
+static void
+turnstile_td_reown(struct turnstile_td *td, struct turnstile *turnstile)
+{
+ assert(turnstile->owner == structof(td, struct thread, turnstile_td));
+ assert(!plist_node_unlinked(&turnstile->td_node));
+ plist_remove(&td->owned_turnstiles, &turnstile->td_node);
+ turnstile->owner = NULL;
+ turnstile_td_own(td, turnstile);
+}
+
+/*
+ * The caller must hold the turnstile thread data lock and no turnstile
+ * locks when calling this function. The thread data are unlocked on return.
+ *
+ * In addition, this function drops a reference on the thread associated
+ * with the given thread data.
+ */
+static void
+turnstile_td_propagate_priority_loop(struct turnstile_td *td)
+{
+ unsigned int user_priority, real_priority, top_priority;
+ struct turnstile_waiter *waiter;
+ struct turnstile *turnstile;
+ struct thread *thread;
+ unsigned short priority;
+ unsigned char policy;
+ int error;
+
+ /*
+ * At the very least, this function must make sure that :
+ * - the given thread has its intended priority, which is the
+ * highest among its own and all the waiters in the turnstiles
+ * it owns, and
+ * - the thread is at its intended position in the turnstile it's
+ * waiting on, if any.
+ */
+
+ for (;;) {
+ thread = structof(td, struct thread, turnstile_td);
+ user_priority = thread_user_global_priority(thread);
+ real_priority = thread_real_global_priority(thread);
+ top_priority = td->top_global_priority;
+
+ if (top_priority > user_priority) {
+ policy = td->top_sched_policy;
+ priority = td->top_priority;
+ } else {
+ top_priority = user_priority;
+ policy = thread_user_sched_policy(thread);
+ priority = thread_user_priority(thread);
+ }
+
+ if (top_priority != real_priority) {
+ thread_pi_setscheduler(thread, policy, priority);
+ }
+
+ waiter = turnstile_td_get_waiter(td);
+
+ if ((waiter == NULL)
+ || (top_priority == turnstile_waiter_priority(waiter))) {
+ spinlock_unlock(&td->lock);
+ thread_unref(thread);
+ return;
+ }
+
+ turnstile = turnstile_td_get_turnstile(td);
+ assert(turnstile != NULL);
+
+ error = spinlock_trylock(&turnstile->bucket->lock);
+
+ if (error) {
+ spinlock_unlock(&td->lock);
+ spinlock_lock(&td->lock);
+ continue;
+ }
+
+ /*
+ * This couldn't be done while changing the thread's priority
+ * because of locking restrictions. Do it now.
+ */
+ turnstile_waiter_requeue(turnstile, waiter);
+
+ spinlock_unlock(&td->lock);
+ thread_unref(thread);
+
+ thread = turnstile->owner;
+
+ if (thread == NULL) {
+ break;
+ }
+
+ td = thread_turnstile_td(thread);
+
+ thread_ref(thread);
+ spinlock_lock(&td->lock);
+
+ turnstile_td_reown(td, turnstile);
+
+ spinlock_unlock(&turnstile->bucket->lock);
+ }
+
+ spinlock_unlock(&turnstile->bucket->lock);
+}
+
+void
+turnstile_td_propagate_priority(struct turnstile_td *td)
+{
+ struct thread *thread;
+
+ thread = structof(td, struct thread, turnstile_td);
+
+ thread_ref(thread);
+ spinlock_lock(&td->lock);
+ turnstile_td_propagate_priority_loop(td);
+}
+
+static void
+turnstile_assert_init_state(const struct turnstile *turnstile)
+{
+ assert(turnstile->bucket == NULL);
+ assert(turnstile->sync_obj == NULL);
+ assert(plist_empty(&turnstile->waiters));
+ assert(turnstile->next_free == NULL);
+ assert(turnstile->top_waiter == NULL);
+ assert(turnstile->owner == NULL);
+}
+
+static void
+turnstile_use(struct turnstile *turnstile, const void *sync_obj)
+{
+ assert(turnstile->sync_obj == NULL);
+ turnstile->sync_obj = sync_obj;
+}
+
+static void
+turnstile_unuse(struct turnstile *turnstile)
+{
+ assert(turnstile->sync_obj != NULL);
+ turnstile->sync_obj = NULL;
+}
+
+static bool
+turnstile_in_use(const struct turnstile *turnstile)
+{
+ return turnstile->sync_obj != NULL;
+}
+
+static bool
+turnstile_in_use_by(const struct turnstile *turnstile, const void *sync_obj)
+{
+ return turnstile->sync_obj == sync_obj;
+}
+
+static void
+turnstile_bucket_init(struct turnstile_bucket *bucket)
+{
+ spinlock_init(&bucket->lock);
+ list_init(&bucket->list);
+}
+
+static struct turnstile_bucket *
+turnstile_bucket_get(const void *sync_obj)
+{
+ uintptr_t index;
+
+ index = turnstile_hash(sync_obj) & TURNSTILE_HTABLE_MASK;
+ assert(index < ARRAY_SIZE(turnstile_htable));
+ return &turnstile_htable[index];
+}
+
+static void
+turnstile_bucket_add(struct turnstile_bucket *bucket,
+ struct turnstile *turnstile)
+{
+ assert(turnstile->bucket == NULL);
+ turnstile->bucket = bucket;
+ list_insert_tail(&bucket->list, &turnstile->node);
+}
+
+static void
+turnstile_bucket_remove(struct turnstile_bucket *bucket,
+ struct turnstile *turnstile)
+{
+ assert(turnstile->bucket == bucket);
+ turnstile->bucket = NULL;
+ list_remove(&turnstile->node);
+}
+
+static struct turnstile *
+turnstile_bucket_lookup(const struct turnstile_bucket *bucket,
+ const void *sync_obj)
+{
+ struct turnstile *turnstile;
+
+ list_for_each_entry(&bucket->list, turnstile, node) {
+ if (turnstile_in_use_by(turnstile, sync_obj)) {
+ return turnstile;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+turnstile_ctor(void *ptr)
+{
+ struct turnstile *turnstile;
+
+ turnstile = ptr;
+ turnstile->bucket = NULL;
+ turnstile->sync_obj = NULL;
+ plist_init(&turnstile->waiters);
+ turnstile->next_free = NULL;
+ turnstile->top_waiter = NULL;
+ turnstile->owner = NULL;
+}
+
+void __init
+turnstile_setup(void)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(turnstile_htable); i++) {
+ turnstile_bucket_init(&turnstile_htable[i]);
+ }
+
+ kmem_cache_init(&turnstile_cache, "turnstile", sizeof(struct turnstile),
+ CPU_L1_SIZE, turnstile_ctor, 0);
+}
+
+struct turnstile *
+turnstile_create(void)
+{
+ struct turnstile *turnstile;
+
+ turnstile = kmem_cache_alloc(&turnstile_cache);
+
+ if (turnstile == NULL) {
+ return NULL;
+ }
+
+ turnstile_assert_init_state(turnstile);
+ return turnstile;
+}
+
+void
+turnstile_destroy(struct turnstile *turnstile)
+{
+ turnstile_assert_init_state(turnstile);
+ kmem_cache_free(&turnstile_cache, turnstile);
+}
+
+struct turnstile *
+turnstile_acquire(const void *sync_obj)
+{
+ struct turnstile_bucket *bucket;
+ struct turnstile *turnstile;
+
+ assert(sync_obj != NULL);
+
+ bucket = turnstile_bucket_get(sync_obj);
+
+ spinlock_lock(&bucket->lock);
+
+ turnstile = turnstile_bucket_lookup(bucket, sync_obj);
+
+ if (turnstile == NULL) {
+ spinlock_unlock(&bucket->lock);
+ return NULL;
+ }
+
+ return turnstile;
+}
+
+void
+turnstile_release(struct turnstile *turnstile)
+{
+ spinlock_unlock(&turnstile->bucket->lock);
+}
+
+static void
+turnstile_push_free(struct turnstile *turnstile,
+ struct turnstile *free_turnstile)
+{
+ assert(free_turnstile->next_free == NULL);
+ free_turnstile->next_free = turnstile->next_free;
+ turnstile->next_free = free_turnstile;
+}
+
+static struct turnstile *
+turnstile_pop_free(struct turnstile *turnstile)
+{
+ struct turnstile *free_turnstile;
+
+ free_turnstile = turnstile->next_free;
+
+ if (free_turnstile == NULL) {
+ return NULL;
+ }
+
+ turnstile->next_free = free_turnstile->next_free;
+ free_turnstile->next_free = NULL;
+ return free_turnstile;
+}
+
+struct turnstile *
+turnstile_lend(const void *sync_obj)
+{
+ struct turnstile_bucket *bucket;
+ struct turnstile *turnstile, *prev;
+ struct turnstile_td *td;
+
+ assert(sync_obj != NULL);
+
+ turnstile = thread_turnstile_lend();
+ turnstile_assert_init_state(turnstile);
+
+ td = thread_turnstile_td(thread_self());
+ bucket = turnstile_bucket_get(sync_obj);
+
+ spinlock_lock(&bucket->lock);
+
+ prev = turnstile_bucket_lookup(bucket, sync_obj);
+
+ if (prev == NULL) {
+ turnstile_use(turnstile, sync_obj);
+ turnstile_bucket_add(bucket, turnstile);
+ } else {
+ turnstile_push_free(prev, turnstile);
+ turnstile = prev;
+ }
+
+ spinlock_lock(&td->lock);
+ turnstile_td_set_turnstile(td, turnstile);
+ spinlock_unlock(&td->lock);
+
+ return turnstile;
+}
+
+void
+turnstile_return(struct turnstile *turnstile)
+{
+ struct turnstile_bucket *bucket;
+ struct turnstile *free_turnstile;
+ struct turnstile_td *td;
+
+ assert(turnstile_in_use(turnstile));
+
+ td = thread_turnstile_td(thread_self());
+
+ spinlock_lock(&td->lock);
+ turnstile_td_set_turnstile(td, NULL);
+ spinlock_unlock(&td->lock);
+
+ bucket = turnstile->bucket;
+ free_turnstile = turnstile_pop_free(turnstile);
+
+ if (free_turnstile == NULL) {
+ turnstile_bucket_remove(bucket, turnstile);
+ turnstile_unuse(turnstile);
+ free_turnstile = turnstile;
+ }
+
+ spinlock_unlock(&bucket->lock);
+
+ turnstile_assert_init_state(free_turnstile);
+ thread_turnstile_return(free_turnstile);
+}
+
+bool
+turnstile_empty(const struct turnstile *turnstile)
+{
+ return plist_empty(&turnstile->waiters);
+}
+
+static void
+turnstile_set_owner(struct turnstile *turnstile, struct thread *owner)
+{
+ struct turnstile_td *td;
+
+ assert(owner != NULL);
+ assert((turnstile->owner == NULL) || (turnstile->owner == owner));
+
+ td = thread_turnstile_td(owner);
+
+ thread_ref(owner);
+ spinlock_lock(&td->lock);
+
+ if (turnstile->owner == NULL) {
+ turnstile_td_own(td, turnstile);
+ }
+
+ spinlock_unlock(&turnstile->bucket->lock);
+
+ turnstile_td_propagate_priority_loop(td);
+
+ spinlock_lock(&turnstile->bucket->lock);
+}
+
+void
+turnstile_wait(struct turnstile *turnstile, const char *wchan,
+ struct thread *owner)
+{
+ struct turnstile_waiter waiter;
+ struct turnstile_td *td;
+ struct thread *thread;
+
+ thread = thread_self();
+ assert(thread != owner);
+
+ td = thread_turnstile_td(thread);
+
+ spinlock_lock(&td->lock);
+ turnstile_waiter_init(&waiter, thread);
+ turnstile_add_waiter(turnstile, &waiter);
+ turnstile_td_set_waiter(td, &waiter);
+ spinlock_unlock(&td->lock);
+
+ if (owner == NULL) {
+ if (turnstile->top_waiter == &waiter) {
+ turnstile_waiter_set_awaken(&waiter);
+ }
+ } else {
+ /* This function temporarily unlocks the turnstile */
+ turnstile_set_owner(turnstile, owner);
+ }
+
+ for (;;) {
+ if (!turnstile_waiter_awaken(&waiter)) {
+ thread_sleep(&turnstile->bucket->lock, turnstile->sync_obj, wchan);
+ }
+
+ /*
+ * The real priority of a thread may change between waking up
+ * and reacquiring the turnstile.
+ */
+ if (turnstile->top_waiter == &waiter) {
+ break;
+ }
+
+ /* Otherwise, make sure the new top waiter is awaken */
+ turnstile_waiter_wakeup(turnstile->top_waiter);
+ turnstile_waiter_clear_awaken(&waiter);
+ }
+
+ spinlock_lock(&td->lock);
+ turnstile_td_set_waiter(td, NULL);
+ turnstile_remove_waiter(turnstile, &waiter);
+ spinlock_unlock(&td->lock);
+}
+
+void
+turnstile_signal(struct turnstile *turnstile)
+{
+ struct turnstile_waiter *waiter;
+
+ if (turnstile_empty(turnstile)) {
+ return;
+ }
+
+ waiter = plist_last_entry(&turnstile->waiters,
+ struct turnstile_waiter, node);
+ turnstile_waiter_wakeup(waiter);
+}
+
+void
+turnstile_own(struct turnstile *turnstile)
+{
+ struct turnstile_td *td;
+ struct thread *owner;
+ unsigned int top_priority;
+
+ assert(turnstile->owner == NULL);
+
+ if (turnstile_empty(turnstile)) {
+ return;
+ }
+
+ owner = thread_self();
+ top_priority = turnstile_waiter_priority(turnstile->top_waiter);
+ assert(thread_real_global_priority(owner) >= top_priority);
+ td = thread_turnstile_td(owner);
+
+ spinlock_lock(&td->lock);
+ turnstile_td_own(td, turnstile);
+ spinlock_unlock(&td->lock);
+}
+
+void
+turnstile_disown(struct turnstile *turnstile)
+{
+ struct turnstile_td *td;
+ struct thread *owner;
+
+ owner = thread_self();
+ assert(turnstile->owner == owner);
+ assert(!turnstile_empty(turnstile));
+
+ td = thread_turnstile_td(owner);
+
+ spinlock_lock(&td->lock);
+ turnstile_td_disown(td, turnstile);
+ spinlock_unlock(&td->lock);
+}
diff --git a/kern/turnstile.h b/kern/turnstile.h
new file mode 100644
index 00000000..21a5a1a9
--- /dev/null
+++ b/kern/turnstile.h
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Priority propagation capable sleep queues.
+ *
+ * Turnstiles are used to build sleeping synchronization primitives where
+ * ownership applies, such as mutexes. They allow threads with different
+ * priorities to contend on the same synchronization object without
+ * unbounded priority inversion.
+ */
+
+#ifndef _KERN_TURNSTILE_H
+#define _KERN_TURNSTILE_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <kern/plist.h>
+#include <kern/spinlock.h>
+#include <kern/thread.h>
+#include <kern/turnstile_types.h>
+
+struct turnstile;
+
+/*
+ * Turnstile thread data.
+ */
+struct turnstile_td;
+
+#define turnstile_td_assert_lock(td) spinlock_assert_locked(&(td)->lock)
+
+/*
+ * Initialize turnstile thread data.
+ */
+static inline void
+turnstile_td_init(struct turnstile_td *td)
+{
+ spinlock_init(&td->lock);
+ td->turnstile = NULL;
+ td->waiter = NULL;
+ plist_init(&td->owned_turnstiles);
+ td->top_global_priority = 0;
+}
+
+/*
+ * Turnstile thread data locking functions.
+ */
+
+static inline void
+turnstile_td_lock(struct turnstile_td *td)
+{
+ spinlock_lock(&td->lock);
+}
+
+static inline int
+turnstile_td_trylock(struct turnstile_td *td)
+{
+ return spinlock_trylock(&td->lock);
+}
+
+static inline void
+turnstile_td_unlock(struct turnstile_td *td)
+{
+ spinlock_unlock(&td->lock);
+}
+
+/*
+ * Functions managing the turnstile a thread is sleeping in.
+ */
+
+static inline void
+turnstile_td_set_turnstile(struct turnstile_td *td,
+ struct turnstile *turnstile)
+{
+ td->turnstile = turnstile;
+}
+
+static inline struct turnstile *
+turnstile_td_get_turnstile(const struct turnstile_td *td)
+{
+ return td->turnstile;
+}
+
+/*
+ * Propagate priority starting at the thread containing the given thread data.
+ */
+void turnstile_td_propagate_priority(struct turnstile_td *td);
+
+/*
+ * Initialize the turnstile module.
+ */
+void turnstile_setup(void);
+
+/*
+ * Create/destroy a turnstile.
+ */
+struct turnstile * turnstile_create(void);
+void turnstile_destroy(struct turnstile *turnstile);
+
+/*
+ * Acquire/release a turnstile.
+ *
+ * Acquiring a turnstile serializes all access and disables preemption.
+ */
+struct turnstile * turnstile_acquire(const void *sync_obj);
+void turnstile_release(struct turnstile *turnstile);
+
+/*
+ * Lend/return a turnstile.
+ *
+ * A thread lends its private turnstile to the turnstile module in
+ * order to prepare its sleep. The turnstile obtained on lending
+ * is either the thread's turnstile, or an already existing turnstile
+ * for this synchronization object if another thread is waiting.
+ *
+ * When multiple threads are waiting on the same turnstile, the extra
+ * turnstiles lent are kept in an internal free list, used when threads
+ * are awaken to return a turnstile to them.
+ *
+ * Note that the turnstile returned may not be the one lent.
+ *
+ * The turnstile obtained when lending is automatically acquired.
+ */
+struct turnstile * turnstile_lend(const void *sync_obj);
+void turnstile_return(struct turnstile *turnstile);
+
+/*
+ * Return true if the given turnstile has no waiters.
+ *
+ * The turnstile must be acquired when calling this function.
+ */
+bool turnstile_empty(const struct turnstile *turnstile);
+
+/*
+ * Wait for a wake up on the given turnstile.
+ *
+ * The turnstile must be lent when calling this function. It is
+ * released and later reacquired before returning from this function.
+ *
+ * The calling thread is considered a waiter as long as it didn't
+ * reacquire the turnstile. This means that signalling a turnstile
+ * has no visible effect on the number of waiters until the turnstile
+ * is released, e.g. if a single thread is waiting and another signals
+ * the turnstile, the turnstile is not immediately considered empty.
+ *
+ * If owner isn't NULL, it must refer to the thread currently owning
+ * the associated synchronization object. The priority of the caller
+ * is propagated to the chain of turnstiles and owners as necessary
+ * to prevent unbounded priority inversion.
+ */
+void turnstile_wait(struct turnstile *turnstile, const char *wchan,
+ struct thread *owner);
+
+/*
+ * Wake up a thread waiting on the given turnstile, if any.
+ *
+ * The turnstile must be acquired when calling this function.
+ * Since a turnstile must be lent (and in turn is automatically
+ * acquired) when waiting, and acquired in order to signal it,
+ * wake-ups are serialized and cannot be missed.
+ */
+void turnstile_signal(struct turnstile *turnstile);
+
+/*
+ * Own/disown a turnstile.
+ *
+ * The turnstile must be lent when taking ownership, acquired when
+ * releasing it. Owning has no effect on empty turnstiles.
+ * Conversely, an empty turnstile cannot be disowned.
+ *
+ * Ownership must be updated atomically with regard to the ownership
+ * of the associated synchronization object.
+ */
+void turnstile_own(struct turnstile *turnstile);
+void turnstile_disown(struct turnstile *turnstile);
+
+#endif /* _KERN_TURNSTILE_H */
diff --git a/kern/turnstile_types.h b/kern/turnstile_types.h
new file mode 100644
index 00000000..a95b691e
--- /dev/null
+++ b/kern/turnstile_types.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definition used to avoid inclusion circular dependencies.
+ */
+
+#ifndef _KERN_TURNSTILE_TYPES_H
+#define _KERN_TURNSTILE_TYPES_H
+
+#include <kern/plist_types.h>
+#include <kern/spinlock_types.h>
+
+struct turnstile;
+struct turnstile_waiter;
+
+/*
+ * Per-thread turnstile data.
+ *
+ * The turnstile member indicates whether this thread is in a turnstile,
+ * and is only valid if the thread is not running.
+ *
+ * The waiter points to the structure a thread uses to queue itself on
+ * a turnstile. It is used to access a sleeping thread from another
+ * thread, e.g. on wake-ups or priority updates.
+ *
+ * The list of owned turnstiles is used by priority propagation to
+ * determine the top priority among all waiters, which is stored in
+ * the thread data so that turnstiles are quickly unlocked.
+ *
+ * Locking keys :
+ * (b) bucket
+ * (t) turnstile_td
+ */
+struct turnstile_td {
+ struct spinlock lock;
+ struct turnstile *turnstile; /* (t) */
+ struct turnstile_waiter *waiter; /* (b,t) */
+ struct plist owned_turnstiles; /* (t) */
+ unsigned int top_global_priority; /* (t) */
+ unsigned char top_sched_policy; /* (t) */
+ unsigned short top_priority; /* (t) */
+};
+
+#endif /* _KERN_TURNSTILE_TYPES_H */
diff --git a/test/test_mutex_pi.c b/test/test_mutex_pi.c
new file mode 100644
index 00000000..1adcf627
--- /dev/null
+++ b/test/test_mutex_pi.c
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This test module is a stress test, expected to never terminate, of
+ * priority inheritance with mutexes. It creates a priority inheritance
+ * tree by starting multiple threads manipulating multiple mutexes.
+ *
+ * Here is one intended state of the priority inheritance tree :
+ *
+ *
+ * C->M2-+
+ * |
+ * D--+ +->B->M1->A
+ * | |
+ * +->M3-+
+ * |
+ * E--+
+ *
+ *
+ * M1,M2,M3,etc... are mutexes and A,B,C,etc... are threads. The thread
+ * priorities p(thread) are ordered such that p(A) < p(B) < p(C) etc...
+ * An arrow from a mutex to a thread indicates ownership, such that
+ * M1->A means that A owns M1. An arrow from a thread to a mutex indicates
+ * waiting, such that B->M1 means that B is waiting for M1 to be unlocked.
+ *
+ * In addition, thread B is actually many threads, each terminating after
+ * unlocking their mutexes. Also, the priority of thread C is regularly
+ * increased to p(E) + 1 and later restored to p(C).
+ *
+ * Here is the list of all the cases this test must cover :
+ * - Priority inheritance: All threads can get their real priority boosted.
+ * C can be boosted if it owns M2 and B waits for it while inheriting
+ * from E, and E can be boosted when p(C) is temporarily increased.
+ * - Return to normal priority: after releasing all locks, a thread must
+ * have reset its real priority to its user priority.
+ * - Priority changes: When the priority of C is increased, that priority
+ * must take precedence over all others.
+ * - Thread destruction resilience: Check that priority propagation never
+ * accesses a destroyed thread.
+ *
+ * Note that this test doesn't check that priority propagation correctly
+ * adjusts the top priority after lowering the priority of thread C back
+ * to p(C).
+ *
+ * In order to artificially create priority inversions, all threads run on
+ * separate processors, making this test require 5 processors.
+ *
+ * TODO Use timers instead of busy-waiting so that binding to processors
+ * isn't required.
+ *
+ * The test should output a couple of messages about thread priorities
+ * being boosted, and then frequent updates from each thread to show
+ * they're all making progress. Thread B suffers from contention the most
+ * so its report frequency should be lower. Thread A suffers from contention
+ * the least and should be the most frequent to report progress. Because of
+ * contention from B, D and E on M3, D rarely gets boosted. The reason is
+ * that, when B owns the mutex, E is likely to wait on M3 soon enough that
+ * it will be awaken before D, preventing the conditions for priority
+ * inheritance to occur.
+ *
+ * Note that the test uses regular mutexes instead of real-time mutexes,
+ * so that its behaviour can be analyzed for both types depending on
+ * build options.
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include <kern/cpumap.h>
+#include <kern/error.h>
+#include <kern/mutex.h>
+#include <kern/panic.h>
+#include <kern/printk.h>
+#include <kern/thread.h>
+#include <kern/turnstile.h>
+#include <test/test.h>
+
+#define TEST_PRIO_A (THREAD_SCHED_RT_PRIO_MIN + 1)
+#define TEST_PRIO_B (TEST_PRIO_A + 1)
+#define TEST_PRIO_C (TEST_PRIO_B + 1)
+#define TEST_PRIO_D (TEST_PRIO_C + 1)
+#define TEST_PRIO_E (TEST_PRIO_D + 1)
+
+#define TEST_NR_LOCK_LOOPS 500
+#define TEST_NR_CONSUME_CPU_LOOPS 10000000
+
+static struct mutex test_mutex_1;
+static struct mutex test_mutex_2;
+static struct mutex test_mutex_3;
+
+static const char *
+test_thread_from_priority(unsigned short priority)
+{
+ switch (priority) {
+ case TEST_PRIO_A:
+ return "a";
+ case TEST_PRIO_B:
+ return "b";
+ case TEST_PRIO_C:
+ return "c";
+ case TEST_PRIO_D:
+ return "d";
+ case TEST_PRIO_E:
+ return "e";
+ case TEST_PRIO_E + 1:
+ return "e+";
+ default:
+ panic("invalid priority %u", priority);
+ }
+}
+
+static char
+test_get_name(void)
+{
+ const char *name;
+ size_t length;
+
+ name = thread_self()->name;
+ length = strlen(name);
+ return name[length - 1];
+}
+
+static void
+test_consume_cpu(void)
+{
+ volatile unsigned int i;
+
+ for (i = 0; i < TEST_NR_CONSUME_CPU_LOOPS; i++);
+}
+
+static void
+test_check_initial_priority(void)
+{
+ unsigned short user_priority, real_priority;
+ struct thread *thread;
+
+ thread = thread_self();
+ user_priority = thread_user_priority(thread);
+ real_priority = thread_real_priority(thread);
+
+ if (user_priority != real_priority) {
+ panic("%c: invalid initial priority %hu",
+ test_get_name(), real_priority);
+ }
+}
+
+static void
+test_for_priority_boosted(unsigned short *highest_priority)
+{
+ unsigned short user_priority, real_priority;
+ struct turnstile_td *td;
+ struct thread *thread;
+
+ thread = thread_self();
+ td = thread_turnstile_td(thread);
+
+ turnstile_td_lock(td);
+
+ user_priority = thread_user_priority(thread);
+ real_priority = thread_real_priority(thread);
+
+ if (user_priority != real_priority) {
+ if (user_priority > real_priority) {
+ panic("%c: invalid real priority: %hu (boosted:%u)",
+ test_get_name(), real_priority, thread->boosted);
+ }
+
+ if (real_priority > *highest_priority) {
+ printk("%c: real priority boosted to %s\n",
+ test_get_name(), test_thread_from_priority(real_priority));
+ *highest_priority = real_priority;
+ }
+ }
+
+ turnstile_td_unlock(td);
+}
+
+static void
+test_for_priority_deboosted(void)
+{
+ unsigned short user_priority, real_priority;
+ struct turnstile_td *td;
+ struct thread *thread;
+
+ thread = thread_self();
+ td = thread_turnstile_td(thread);
+
+ turnstile_td_lock(td);
+
+ user_priority = thread_user_priority(thread);
+ real_priority = thread_real_priority(thread);
+
+ if (user_priority != real_priority) {
+ panic("%c: real priority not reset (boosted:%d)", test_get_name(), thread->boosted);
+ }
+
+ turnstile_td_unlock(td);
+}
+
+static void
+test_report_progress(unsigned int i)
+{
+ printk("%c:%u ", test_get_name(), i);
+}
+
+static void
+test_a(void *arg)
+{
+ unsigned short highest_priority;
+ unsigned int i, j;
+
+ (void)arg;
+
+ test_check_initial_priority();
+
+ highest_priority = 0;
+
+ for (i = 1; /* no condition */; i++) {
+ for (j = 0; j < TEST_NR_LOCK_LOOPS; j++) {
+ mutex_lock(&test_mutex_1);
+ test_consume_cpu();
+ test_for_priority_boosted(&highest_priority);
+ mutex_unlock(&test_mutex_1);
+
+ test_for_priority_deboosted();
+
+ test_consume_cpu();
+ }
+
+ test_report_progress(i);
+ }
+}
+
+static void
+test_b(void *arg)
+{
+ test_check_initial_priority();
+
+ mutex_lock(&test_mutex_3);
+ mutex_lock(&test_mutex_2);
+ mutex_lock(&test_mutex_1);
+ test_consume_cpu();
+ test_for_priority_boosted(arg);
+ mutex_unlock(&test_mutex_1);
+ test_consume_cpu();
+ mutex_unlock(&test_mutex_2);
+ test_consume_cpu();
+ mutex_unlock(&test_mutex_3);
+
+ /*
+ * It would be better if the thread could immediately terminate, but
+ * it's also the thread that locks multiple mutexes, so make sure it
+ * was correctly deboosted. This should be cheap enough to not matter
+ * much.
+ */
+ test_for_priority_deboosted();
+}
+
+static void
+test_manage_b(void *arg)
+{
+ unsigned short highest_priority;
+ struct thread_attr attr;
+ struct thread *thread_b;
+ struct cpumap *cpumap;
+ unsigned int i, j;
+ int error;
+
+ (void)arg;
+
+ error = cpumap_create(&cpumap);
+ error_check(error, "cpumap_create");
+ cpumap_zero(cpumap);
+ cpumap_set(cpumap, 1);
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_b");
+ thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO);
+ thread_attr_set_priority(&attr, TEST_PRIO_B);
+ thread_attr_set_cpumap(&attr, cpumap);
+ cpumap_destroy(cpumap);
+
+ highest_priority = 0;
+
+ for (i = 1; /* no condition */; i++) {
+ for (j = 0; j < TEST_NR_LOCK_LOOPS; j++) {
+ error = thread_create(&thread_b, &attr, test_b, &highest_priority);
+ error_check(error, "thread_create");
+ thread_join(thread_b);
+
+ test_consume_cpu();
+ }
+
+ printk("b:%u ", i);
+ }
+}
+
+static void
+test_c(void *arg)
+{
+ unsigned short highest_priority;
+ unsigned int i, j;
+
+ (void)arg;
+
+ test_check_initial_priority();
+
+ highest_priority = 0;
+
+ for (i = 1; /* no condition */; i++) {
+ for (j = 0; j < TEST_NR_LOCK_LOOPS; j++) {
+ mutex_lock(&test_mutex_2);
+ test_consume_cpu();
+ test_for_priority_boosted(&highest_priority);
+ mutex_unlock(&test_mutex_2);
+
+ test_for_priority_deboosted();
+
+ test_consume_cpu();
+ }
+
+ test_report_progress(i);
+ }
+}
+
+static void
+test_chprio_c(void *arg)
+{
+ struct thread *thread_c;
+
+ thread_c = arg;
+
+ test_consume_cpu();
+
+ for (;;) {
+ thread_setscheduler(thread_c, THREAD_SCHED_POLICY_FIFO,
+ TEST_PRIO_E + 1);
+ thread_setscheduler(thread_c, THREAD_SCHED_POLICY_FIFO,
+ TEST_PRIO_C);
+ }
+}
+
+static void
+test_d(void *arg)
+{
+ unsigned short highest_priority;
+ unsigned int i, j;
+
+ (void)arg;
+
+ test_check_initial_priority();
+
+ highest_priority = 0;
+
+ for (i = 1; /* no condition */; i++) {
+ for (j = 0; j < TEST_NR_LOCK_LOOPS; j++) {
+ mutex_lock(&test_mutex_3);
+ test_consume_cpu();
+ test_for_priority_boosted(&highest_priority);
+ mutex_unlock(&test_mutex_3);
+
+ test_for_priority_deboosted();
+
+ test_consume_cpu();
+ }
+
+ test_report_progress(i);
+ }
+}
+
+static void
+test_e(void *arg)
+{
+ unsigned short highest_priority;
+ unsigned int i, j;
+
+ (void)arg;
+
+ test_check_initial_priority();
+
+ highest_priority = 0;
+
+ for (i = 1; /* no condition */; i++) {
+ for (j = 0; j < TEST_NR_LOCK_LOOPS; j++) {
+ mutex_lock(&test_mutex_3);
+ test_consume_cpu();
+ test_for_priority_boosted(&highest_priority);
+ mutex_unlock(&test_mutex_3);
+
+ test_for_priority_deboosted();
+
+ test_consume_cpu();
+ }
+
+ test_report_progress(i);
+ }
+}
+
+void
+test_setup(void)
+{
+ struct thread_attr attr;
+ struct thread *thread;
+ struct cpumap *cpumap;
+ int error;
+
+ mutex_init(&test_mutex_1);
+ mutex_init(&test_mutex_2);
+ mutex_init(&test_mutex_3);
+
+ error = cpumap_create(&cpumap);
+ error_check(error, "cpumap_create");
+
+ cpumap_zero(cpumap);
+ cpumap_set(cpumap, 0);
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_a");
+ thread_attr_set_detached(&attr);
+ thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO);
+ thread_attr_set_priority(&attr, TEST_PRIO_A);
+ thread_attr_set_cpumap(&attr, cpumap);
+ error = thread_create(&thread, &attr, test_a, NULL);
+ error_check(error, "thread_create");
+
+ cpumap_zero(cpumap);
+ cpumap_set(cpumap, 1);
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_manage_b");
+ thread_attr_set_detached(&attr);
+ thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO);
+ thread_attr_set_priority(&attr, TEST_PRIO_B);
+ thread_attr_set_cpumap(&attr, cpumap);
+ error = thread_create(&thread, &attr, test_manage_b, NULL);
+ error_check(error, "thread_create");
+
+ cpumap_zero(cpumap);
+ cpumap_set(cpumap, 2);
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_c");
+ thread_attr_set_detached(&attr);
+ thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO);
+ thread_attr_set_priority(&attr, TEST_PRIO_C);
+ thread_attr_set_cpumap(&attr, cpumap);
+ error = thread_create(&thread, &attr, test_c, NULL);
+ error_check(error, "thread_create");
+
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_chprio_c");
+ thread_attr_set_detached(&attr);
+ error = thread_create(&thread, &attr, test_chprio_c, thread);
+ error_check(error, "thread_create");
+
+ cpumap_zero(cpumap);
+ cpumap_set(cpumap, 3);
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_d");
+ thread_attr_set_detached(&attr);
+ thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO);
+ thread_attr_set_priority(&attr, TEST_PRIO_D);
+ thread_attr_set_cpumap(&attr, cpumap);
+ error = thread_create(&thread, &attr, test_d, NULL);
+ error_check(error, "thread_create");
+
+ cpumap_zero(cpumap);
+ cpumap_set(cpumap, 4);
+ thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_e");
+ thread_attr_set_detached(&attr);
+ thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO);
+ thread_attr_set_priority(&attr, TEST_PRIO_E);
+ thread_attr_set_cpumap(&attr, cpumap);
+ error = thread_create(&thread, &attr, test_e, NULL);
+ error_check(error, "thread_create");
+
+ cpumap_destroy(cpumap);
+}