diff options
author | Richard Braun <rbraun@sceen.net> | 2017-03-04 17:02:09 +0100 |
---|---|---|
committer | Richard Braun <rbraun@sceen.net> | 2017-03-04 17:02:09 +0100 |
commit | 0b2db2776b3a4fb208f13867934147f8cef79b60 (patch) | |
tree | a685ad29009ecfe1aacc9901b67490fb2b6db48f | |
parent | cee25b55290865734d92a6b6262fe79887288b1a (diff) | |
parent | 4121baff3354f8bd1a4ae62b68af29c6f3965462 (diff) |
Merge branch 'sleep_rework'
-rw-r--r-- | INSTALL | 5 | ||||
-rw-r--r-- | Makefrag.am | 16 | ||||
-rw-r--r-- | configure.ac | 13 | ||||
-rw-r--r-- | kern/condition.c | 194 | ||||
-rw-r--r-- | kern/condition.h | 56 | ||||
-rw-r--r-- | kern/condition_types.h | 9 | ||||
-rw-r--r-- | kern/kernel.c | 4 | ||||
-rw-r--r-- | kern/mutex.c | 48 | ||||
-rw-r--r-- | kern/mutex.h | 98 | ||||
-rw-r--r-- | kern/mutex_i.h | 68 | ||||
-rw-r--r-- | kern/mutex_types.h | 19 | ||||
-rw-r--r-- | kern/plist.c | 65 | ||||
-rw-r--r-- | kern/plist.h | 270 | ||||
-rw-r--r-- | kern/plist_types.h | 50 | ||||
-rw-r--r-- | kern/rtmutex.c | 108 | ||||
-rw-r--r-- | kern/rtmutex.h | 111 | ||||
-rw-r--r-- | kern/rtmutex_i.h | 79 | ||||
-rw-r--r-- | kern/rtmutex_types.h | 30 | ||||
-rw-r--r-- | kern/sleepq.c | 382 | ||||
-rw-r--r-- | kern/sleepq.h | 126 | ||||
-rw-r--r-- | kern/task.c | 15 | ||||
-rw-r--r-- | kern/thread.c | 421 | ||||
-rw-r--r-- | kern/thread.h | 216 | ||||
-rw-r--r-- | kern/thread_i.h | 53 | ||||
-rw-r--r-- | kern/turnstile.c | 788 | ||||
-rw-r--r-- | kern/turnstile.h | 191 | ||||
-rw-r--r-- | kern/turnstile_types.h | 58 | ||||
-rw-r--r-- | test/test_mutex_pi.c | 482 |
28 files changed, 3654 insertions, 321 deletions
@@ -243,6 +243,11 @@ X15 Options in a file named `test_pmap_update_mp.c' would be selected with the option `--enable-test-module=pmap_update_mp'. +`--enable-mutex-pi' + Enable priority inheritance for regular mutexes (note that priority + inheritance is always enabled for real-time mutexes). + TODO Thoroughly describe the implications of this option + `--with-max-cpus=MAX_CPUS' Set the maximum number of supported processors. diff --git a/Makefrag.am b/Makefrag.am index a42c5ea8..f0d8a393 100644 --- a/Makefrag.am +++ b/Makefrag.am @@ -41,6 +41,9 @@ x15_SOURCES += \ kern/param.h \ kern/percpu.c \ kern/percpu.h \ + kern/plist.c \ + kern/plist.h \ + kern/plist_types.h \ kern/printk.c \ kern/printk.h \ kern/rbtree.c \ @@ -49,6 +52,12 @@ x15_SOURCES += \ kern/rdxtree.c \ kern/rdxtree.h \ kern/rdxtree_i.h \ + kern/rtmutex.c \ + kern/rtmutex.h \ + kern/rtmutex_i.h \ + kern/rtmutex_types.h \ + kern/sleepq.c \ + kern/sleepq.h \ kern/spinlock.h \ kern/spinlock_i.h \ kern/spinlock_types.h \ @@ -64,6 +73,9 @@ x15_SOURCES += \ kern/thread.c \ kern/thread.h \ kern/thread_i.h \ + kern/turnstile.c \ + kern/turnstile.h \ + kern/turnstile_types.h \ kern/types.h \ kern/work.c \ kern/work.h \ @@ -90,6 +102,10 @@ if TEST_LLSYNC_DEFER x15_SOURCES += test/test_llsync_defer.c endif TEST_LLSYNC_DEFER +if TEST_MUTEX_PI +x15_SOURCES += test/test_mutex_pi.c +endif TEST_MUTEX_PI + if TEST_PMAP_UPDATE_MP x15_SOURCES += test/test_pmap_update_mp.c endif TEST_PMAP_UPDATE_MP diff --git a/configure.ac b/configure.ac index 023140bd..0b32388f 100644 --- a/configure.ac +++ b/configure.ac @@ -34,12 +34,19 @@ AC_ARG_WITH([max-cpus], [opt_max_cpus=$withval], [opt_max_cpus=128]) +AC_ARG_ENABLE([mutex-pi], + [AS_HELP_STRING([--enable-mutex-pi], + [enable priority inheritance for regular mutexes + (note that priority inheritance is always + enabled for real-time mutexes)])]) + AC_DEFINE([__KERNEL__], [1], [kernel code]) AC_DEFINE_UNQUOTED([ARCH], [$arch], [arch]) m4_define([ENABLE_TEST_MODULE], [AS_CASE(["$enable_test_module"], [llsync_defer], [test_llsync_defer=yes], + [mutex_pi], [test_mutex_pi=yes], [pmap_update_mp], [test_pmap_update_mp=yes], [sref_dirty_zeroes], [test_sref_dirty_zeroes=yes], [sref_noref], [test_sref_noref=yes], @@ -55,6 +62,8 @@ m4_define([ENABLE_TEST_MODULE], AS_IF([test x"$enable_test_module" != xno], [ENABLE_TEST_MODULE]) AM_CONDITIONAL([TEST_LLSYNC_DEFER], [test x"$test_llsync_defer" = xyes]) +AM_CONDITIONAL([TEST_MUTEX_PI], + [test x"$test_mutex_pi" = xyes]) AM_CONDITIONAL([TEST_PMAP_UPDATE_MP], [test x"$test_pmap_update_mp" = xyes]) AM_CONDITIONAL([TEST_SREF_DIRTY_ZEROES], @@ -74,6 +83,10 @@ AC_DEFINE_UNQUOTED([MAX_CPUS], [$opt_max_cpus], [maximum number of supported processors]) AC_MSG_NOTICE([maximum number of supported processors: $opt_max_cpus]) +AS_IF([test x"$enable_mutex_pi" = xyes], + [AC_DEFINE_UNQUOTED([X15_MUTEX_PI], [], + [Enable priority inheritance for regular mutexes])]) + AH_BOTTOM([#include <kern/config.h>]) AC_CONFIG_HEADER([config.h]) AC_CONFIG_FILES([Makefile]) diff --git a/kern/condition.c b/kern/condition.c index 467cb85a..bcd5ba42 100644 --- a/kern/condition.c +++ b/kern/condition.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 Richard Braun. + * Copyright (c) 2013-2017 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,124 +15,178 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * * - * In order to avoid the infamous thundering herd problem, this implementation - * doesn't wake all threads when broadcasting a condition. Instead, they are - * queued on the mutex associated with the condition, and an attempt to wake - * one by locking and unlocking the mutex is performed. If the mutex is already - * locked, the current owner does the same when unlocking. + * Locking order : mutex -> sleep queue */ #include <stddef.h> #include <kern/assert.h> #include <kern/condition.h> -#include <kern/list.h> +#include <kern/condition_types.h> #include <kern/mutex.h> -#include <kern/mutex_i.h> -#include <kern/spinlock.h> +#include <kern/sleepq.h> #include <kern/thread.h> -void -condition_wait(struct condition *condition, struct mutex *mutex) +static void +condition_inc_nr_sleeping_waiters(struct condition *condition) { - struct mutex_waiter waiter; - unsigned int state; + condition->nr_sleeping_waiters++; + assert(condition->nr_sleeping_waiters != 0); +} - waiter.thread = thread_self(); +static void +condition_dec_nr_sleeping_waiters(struct condition *condition) +{ + assert(condition->nr_sleeping_waiters != 0); + condition->nr_sleeping_waiters--; +} - spinlock_lock(&condition->lock); +static void +condition_inc_nr_pending_waiters(struct condition *condition) +{ + condition->nr_pending_waiters++; + assert(condition->nr_pending_waiters != 0); +} - assert((condition->mutex == NULL) || (condition->mutex == mutex)); +static void +condition_dec_nr_pending_waiters(struct condition *condition) +{ + assert(condition->nr_pending_waiters != 0); + condition->nr_pending_waiters--; +} - if (condition->mutex == NULL) { - condition->mutex = mutex; - } +static void +condition_move_waiters(struct condition *condition) +{ + unsigned short old; - list_insert_tail(&condition->waiters, &waiter.node); + assert(condition->nr_sleeping_waiters != 0); + old = condition->nr_pending_waiters; + condition->nr_pending_waiters += condition->nr_sleeping_waiters; + assert(old < condition->nr_pending_waiters); + condition->nr_sleeping_waiters = 0; +} - spinlock_lock(&mutex->lock); +void +condition_wait(struct condition *condition, struct mutex *mutex) +{ + struct condition *last_cond; + struct sleepq *sleepq; + + mutex_assert_locked(mutex); + + /* + * Special case : + * + * mutex_lock(lock); + * + * for (;;) { + * while (!done) { + * condition_wait(condition, lock); + * } + * + * do_something(); + * } + * + * Pull the last condition before unlocking the mutex to prevent + * mutex_unlock() from reacquiring the condition sleep queue. + */ + last_cond = thread_pull_last_cond(); + + sleepq = sleepq_lend(condition, true); + + mutex_unlock(mutex); + + if (last_cond != NULL) { + assert(last_cond == condition); + + if (condition->nr_pending_waiters != 0) { + sleepq_signal(sleepq); + } + } - state = mutex_release(mutex); + condition_inc_nr_sleeping_waiters(condition); + sleepq_wait(sleepq, "cond"); + condition_dec_nr_pending_waiters(condition); - if (state == MUTEX_CONTENDED) { - mutex_signal(mutex); + if (condition->nr_pending_waiters != 0) { + thread_set_last_cond(condition); } - spinlock_unlock(&condition->lock); + sleepq_return(sleepq); - mutex_wait(mutex, &waiter); - mutex_trydowngrade(mutex); - - spinlock_unlock(&mutex->lock); + mutex_lock(mutex); } void condition_signal(struct condition *condition) { - struct mutex_waiter *waiter; - struct mutex *mutex; - unsigned int state; + struct sleepq *sleepq; - spinlock_lock(&condition->lock); + sleepq = sleepq_acquire(condition, true); - if (condition->mutex == NULL) { - spinlock_unlock(&condition->lock); + if (sleepq == NULL) { return; } - mutex = condition->mutex; - waiter = list_first_entry(&condition->waiters, struct mutex_waiter, node); - list_remove(&waiter->node); - - if (list_empty(&condition->waiters)) { - condition->mutex = NULL; + if (condition->nr_sleeping_waiters == 0) { + goto out; } - spinlock_unlock(&condition->lock); - - spinlock_lock(&mutex->lock); - - mutex_queue(mutex, waiter); - state = mutex_tryacquire_slow(mutex); + sleepq_signal(sleepq); - if (state == MUTEX_UNLOCKED) { - mutex_release(mutex); - mutex_signal(mutex); - } + condition_dec_nr_sleeping_waiters(condition); + condition_inc_nr_pending_waiters(condition); - spinlock_unlock(&mutex->lock); +out: + sleepq_release(sleepq); } void condition_broadcast(struct condition *condition) { - struct list waiters; - struct mutex *mutex; - unsigned int state; + struct sleepq *sleepq; - spinlock_lock(&condition->lock); + sleepq = sleepq_acquire(condition, true); - if (condition->mutex == NULL) { - spinlock_unlock(&condition->lock); + if (sleepq == NULL) { return; } - mutex = condition->mutex; - condition->mutex = NULL; - list_set_head(&waiters, &condition->waiters); - list_init(&condition->waiters); + if (condition->nr_sleeping_waiters == 0) { + goto out; + } - spinlock_unlock(&condition->lock); + sleepq_signal(sleepq); - spinlock_lock(&mutex->lock); + condition_move_waiters(condition); - mutex_queue_list(mutex, &waiters); - state = mutex_tryacquire_slow(mutex); +out: + sleepq_release(sleepq); +} - if (state == MUTEX_UNLOCKED) { - mutex_release(mutex); - mutex_signal(mutex); +void +condition_wakeup(struct condition *condition) +{ + struct sleepq *sleepq; + + sleepq = sleepq_acquire(condition, true); + + if (sleepq == NULL) { + return; } - spinlock_unlock(&mutex->lock); + if (condition->nr_pending_waiters == 0) { + goto out; + } + + /* + * Rely on the FIFO ordering of sleep queues so that signalling multiple + * times always wakes up the same thread, as long as that thread didn't + * reacquire the sleep queue. + */ + sleepq_signal(sleepq); + +out: + sleepq_release(sleepq); } diff --git a/kern/condition.h b/kern/condition.h index 55d37bf2..0ce8d94f 100644 --- a/kern/condition.h +++ b/kern/condition.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 Richard Braun. + * Copyright (c) 2013-2017 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,34 +16,62 @@ * * * Condition variables. + * + * A condition variable is a synchronization primitive used to wait + * until a predicate becomes true. Multiple threads can be waiting + * for this condition. In order to synchronize changes on the predicate + * with waiting and signalling, a condition variable must be associated + * with a mutex. */ #ifndef _KERN_CONDITION_H #define _KERN_CONDITION_H -#include <stddef.h> - #include <kern/condition_types.h> -#include <kern/list.h> -#include <kern/spinlock.h> +#include <kern/mutex_types.h> -/* - * Condition variable. - */ struct condition; +/* + * Initialize a condition variable. + */ static inline void condition_init(struct condition *condition) { - spinlock_init(&condition->lock); - condition->mutex = NULL; - list_init(&condition->waiters); + condition->nr_sleeping_waiters = 0; + condition->nr_pending_waiters = 0; } -void condition_wait(struct condition *cond, struct mutex *mutex); +/* + * Wait for a wake-up on the given condition variable. + * + * The associated mutex must be locked when calling this function. + * It is unlocked before waiting and relocked before returning. + */ +void condition_wait(struct condition *condition, struct mutex *mutex); -void condition_signal(struct condition *cond); +/* + * Wake up one (signal) or all (broadcast) threads waiting on a + * condition variable, if any. + * + * Although it is not necessary to hold the mutex associated to the + * condition variable when calling these functions, doing so guarantees + * that a wake-up done when changing the predicate cannot be missed by + * waiting threads. + */ +void condition_signal(struct condition *condition); +void condition_broadcast(struct condition *condition); -void condition_broadcast(struct condition *cond); +/* + * Wake up a pending thread. + * + * This function isn't part of the standard condition variable interface. + * It is used to chain wake-ups to avoid the thundering herd effect. + * When broadcasting a condition variable, a single thread is actually + * awaken. Other threads become "pending waiters", still asleep but + * eligible for wake-up when the mutex associated to the condition variable, + * relocked when returning from condition_wait(), is finally unlocked. + */ +void condition_wakeup(struct condition *condition); #endif /* _KERN_CONDITION_H */ diff --git a/kern/condition_types.h b/kern/condition_types.h index 03b22676..13a29205 100644 --- a/kern/condition_types.h +++ b/kern/condition_types.h @@ -21,14 +21,9 @@ #ifndef _KERN_CONDITION_TYPES_H #define _KERN_CONDITION_TYPES_H -#include <kern/list_types.h> -#include <kern/mutex_types.h> -#include <kern/spinlock_types.h> - struct condition { - struct spinlock lock; - struct mutex *mutex; - struct list waiters; + unsigned short nr_sleeping_waiters; + unsigned short nr_pending_waiters; }; #endif /* _KERN_CONDITION_TYPES_H */ diff --git a/kern/kernel.c b/kern/kernel.c index b63be5d2..cd3d3e3b 100644 --- a/kern/kernel.c +++ b/kern/kernel.c @@ -20,9 +20,11 @@ #include <kern/kernel.h> #include <kern/llsync.h> #include <kern/percpu.h> +#include <kern/sleepq.h> #include <kern/sref.h> #include <kern/task.h> #include <kern/thread.h> +#include <kern/turnstile.h> #include <kern/work.h> #include <kern/xcall.h> #include <machine/cpu.h> @@ -41,6 +43,8 @@ kernel_main(void) cpumap_setup(); xcall_setup(); task_setup(); + sleepq_setup(); + turnstile_setup(); thread_setup(); work_setup(); llsync_setup(); diff --git a/kern/mutex.c b/kern/mutex.c index 58e24513..6a1af208 100644 --- a/kern/mutex.c +++ b/kern/mutex.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 Richard Braun. + * Copyright (c) 2013-2017 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,36 +15,54 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#ifndef X15_MUTEX_PI + +#include <stddef.h> + #include <kern/mutex.h> #include <kern/mutex_i.h> -#include <kern/spinlock.h> -#include <kern/thread.h> +#include <kern/sleepq.h> void mutex_lock_slow(struct mutex *mutex) { - struct mutex_waiter waiter; + struct sleepq *sleepq; unsigned int state; - spinlock_lock(&mutex->lock); + sleepq = sleepq_lend(mutex, false); + + for (;;) { + state = atomic_swap_uint(&mutex->state, MUTEX_CONTENDED); - state = mutex_tryacquire_slow(mutex); + if (state == MUTEX_UNLOCKED) { + break; + } - if (state != MUTEX_UNLOCKED) { - waiter.thread = thread_self(); - mutex_queue(mutex, &waiter); - mutex_wait(mutex, &waiter); + sleepq_wait(sleepq, "mutex"); } - mutex_trydowngrade(mutex); + if (sleepq_empty(sleepq)) { + state = atomic_swap_uint(&mutex->state, MUTEX_LOCKED); + assert(state == MUTEX_CONTENDED); + } - spinlock_unlock(&mutex->lock); + sleepq_return(sleepq); } void mutex_unlock_slow(struct mutex *mutex) { - spinlock_lock(&mutex->lock); - mutex_signal(mutex); - spinlock_unlock(&mutex->lock); + struct sleepq *sleepq; + + sleepq = sleepq_acquire(mutex, false); + + if (sleepq == NULL) { + return; + } + + sleepq_signal(sleepq); + + sleepq_release(sleepq); } + +#endif /* X15_MUTEX_PI */ diff --git a/kern/mutex.h b/kern/mutex.h index 64efa350..c8ae4639 100644 --- a/kern/mutex.h +++ b/kern/mutex.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 Richard Braun. + * Copyright (c) 2013-2017 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,33 +15,84 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * * - * Mutual exclusion locks. + * Mutual exclusion sleep locks. * * Unlike spin locks, acquiring a mutex may make the calling thread sleep. + * + * TODO Adaptive spinning. */ #ifndef _KERN_MUTEX_H #define _KERN_MUTEX_H +#include <kern/mutex_types.h> + +#ifdef X15_MUTEX_PI + +#include <kern/rtmutex.h> + +struct mutex; + +#define mutex_assert_locked(mutex) rtmutex_assert_locked(&(mutex)->rtmutex) + +static inline void +mutex_init(struct mutex *mutex) +{ + rtmutex_init(&mutex->rtmutex); +} + +static inline int +mutex_trylock(struct mutex *mutex) +{ + return rtmutex_trylock(&mutex->rtmutex); +} + +static inline void +mutex_lock(struct mutex *mutex) +{ + rtmutex_lock(&mutex->rtmutex); +} + +static inline void +mutex_unlock(struct mutex *mutex) +{ + rtmutex_unlock(&mutex->rtmutex); + + /* + * If this mutex was used along with a condition variable, wake up + * a potential pending waiter. This must be done after the mutex is + * unlocked so that a higher priority thread can directly acquire it. + */ + thread_wakeup_last_cond(); +} + +#else /* X15_MUTEX_PI */ + #include <kern/assert.h> #include <kern/error.h> -#include <kern/list.h> #include <kern/mutex_i.h> -#include <kern/mutex_types.h> -#include <kern/spinlock.h> +#include <kern/thread.h> struct mutex; +#define mutex_assert_locked(mutex) assert((mutex)->state != MUTEX_UNLOCKED) + +/* + * Initialize a mutex. + */ static inline void mutex_init(struct mutex *mutex) { mutex->state = MUTEX_UNLOCKED; - spinlock_init(&mutex->lock); - list_init(&mutex->waiters); } -#define mutex_assert_locked(mutex) assert((mutex)->state != MUTEX_UNLOCKED) - +/* + * Attempt to lock the given mutex. + * + * This function may not sleep. + * + * Return 0 on success, ERROR_BUSY if the mutex is already locked. + */ static inline int mutex_trylock(struct mutex *mutex) { @@ -56,6 +107,14 @@ mutex_trylock(struct mutex *mutex) return ERROR_BUSY; } +/* + * Lock a mutex. + * + * If the mutex is already locked, the calling thread sleeps until the + * mutex is unlocked. + * + * A mutex can only be locked once. + */ static inline void mutex_lock(struct mutex *mutex) { @@ -72,6 +131,12 @@ mutex_lock(struct mutex *mutex) mutex_lock_slow(mutex); } +/* + * Unlock a mutex. + * + * The mutex must be locked, and must have been locked by the calling + * thread. + */ static inline void mutex_unlock(struct mutex *mutex) { @@ -79,13 +144,18 @@ mutex_unlock(struct mutex *mutex) state = mutex_release(mutex); - if (state == MUTEX_LOCKED) { - return; + if (state != MUTEX_LOCKED) { + assert(state == MUTEX_CONTENDED); + mutex_unlock_slow(mutex); } - assert(state == MUTEX_CONTENDED); - - mutex_unlock_slow(mutex); + /* + * If this mutex was used along with a condition variable, wake up + * a potential pending waiter. + */ + thread_wakeup_last_cond(); } +#endif /* X15_MUTEX_PI */ + #endif /* _KERN_MUTEX_H */ diff --git a/kern/mutex_i.h b/kern/mutex_i.h index 5e78a4da..158d8f7f 100644 --- a/kern/mutex_i.h +++ b/kern/mutex_i.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2014 Richard Braun. + * Copyright (c) 2013-2017 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -18,25 +18,16 @@ #ifndef _KERN_MUTEX_I_H #define _KERN_MUTEX_I_H +#ifndef X15_MUTEX_PI + #include <kern/assert.h> -#include <kern/list.h> #include <kern/mutex_types.h> -#include <kern/thread.h> #include <machine/atomic.h> #define MUTEX_UNLOCKED 0 #define MUTEX_LOCKED 1 #define MUTEX_CONTENDED 2 -struct mutex_waiter { - struct list node; - struct thread *thread; -}; - -void mutex_lock_slow(struct mutex *mutex); - -void mutex_unlock_slow(struct mutex *mutex); - static inline unsigned int mutex_tryacquire(struct mutex *mutex) { @@ -44,12 +35,6 @@ mutex_tryacquire(struct mutex *mutex) } static inline unsigned int -mutex_tryacquire_slow(struct mutex *mutex) -{ - return atomic_swap_uint(&mutex->state, MUTEX_CONTENDED); -} - -static inline unsigned int mutex_release(struct mutex *mutex) { unsigned int state; @@ -59,51 +44,10 @@ mutex_release(struct mutex *mutex) return state; } -static inline void -mutex_queue(struct mutex *mutex, struct mutex_waiter *waiter) -{ - list_insert_tail(&mutex->waiters, &waiter->node); -} - -static inline void -mutex_queue_list(struct mutex *mutex, struct list *waiters) -{ - list_concat(&mutex->waiters, waiters); -} - -static inline void -mutex_wait(struct mutex *mutex, struct mutex_waiter *waiter) -{ - unsigned int state; - - do { - thread_sleep(&mutex->lock, mutex, "mutex"); - state = mutex_tryacquire_slow(mutex); - } while (state != MUTEX_UNLOCKED); - - list_remove(&waiter->node); -} - -static inline void -mutex_signal(struct mutex *mutex) -{ - struct mutex_waiter *waiter; - - if (!list_empty(&mutex->waiters)) { - waiter = list_first_entry(&mutex->waiters, struct mutex_waiter, node); - thread_wakeup(waiter->thread); - } -} +void mutex_lock_slow(struct mutex *mutex); -static inline void -mutex_trydowngrade(struct mutex *mutex) -{ - if (list_empty(&mutex->waiters)) { - unsigned int state; +void mutex_unlock_slow(struct mutex *mutex); - state = atomic_swap_uint(&mutex->state, MUTEX_LOCKED); - assert(state == MUTEX_CONTENDED); - } -} +#endif /* X15_MUTEX_PI */ #endif /* _KERN_MUTEX_I_H */ diff --git a/kern/mutex_types.h b/kern/mutex_types.h index b348fb7b..4b7947fc 100644 --- a/kern/mutex_types.h +++ b/kern/mutex_types.h @@ -21,13 +21,24 @@ #ifndef _KERN_MUTEX_TYPES_H #define _KERN_MUTEX_TYPES_H -#include <kern/list_types.h> -#include <kern/spinlock_types.h> +#ifdef X15_MUTEX_PI + +#include <kern/rtmutex_types.h> + +/* + * Do not directly alias rtmutex to make sure they cannot be used + * with condition variables by mistake. + */ +struct mutex { + struct rtmutex rtmutex; +}; + +#else /* X15_MUTEX_PI */ struct mutex { unsigned int state; - struct spinlock lock; - struct list waiters; }; +#endif /* X15_MUTEX_PI */ + #endif /* _KERN_MUTEX_TYPES_H */ diff --git a/kern/plist.c b/kern/plist.c new file mode 100644 index 00000000..851de629 --- /dev/null +++ b/kern/plist.c @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <kern/list.h> +#include <kern/plist.h> + +void +plist_add(struct plist *plist, struct plist_node *pnode) +{ + struct plist_node *next; + + if (plist_empty(plist)) { + list_insert_head(&plist->list, &pnode->node); + list_insert_head(&plist->prio_list, &pnode->prio_node); + return; + } + + list_for_each_entry(&plist->prio_list, next, prio_node) { + if (pnode->priority < next->priority) { + break; + } + } + + if (list_end(&plist->prio_list, &next->prio_node) + || (pnode->priority != next->priority)) { + list_insert_before(&next->prio_node, &pnode->prio_node); + } else { + list_init(&pnode->prio_node); + } + + list_insert_before(&next->node, &pnode->node); +} + +void +plist_remove(struct plist *plist, struct plist_node *pnode) +{ + struct plist_node *next; + + if (!list_node_unlinked(&pnode->prio_node)) { + next = list_next_entry(pnode, node); + + if (!list_end(&plist->list, &next->node) + && list_node_unlinked(&next->prio_node)) { + list_insert_after(&pnode->prio_node, &next->prio_node); + } + + list_remove(&pnode->prio_node); + } + + list_remove(&pnode->node); +} diff --git a/kern/plist.h b/kern/plist.h new file mode 100644 index 00000000..dd8fdd00 --- /dev/null +++ b/kern/plist.h @@ -0,0 +1,270 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Priority list. + * + * This container acts as a doubly-linked list sorted by priority in + * ascending order. All operations behave as with a regular linked list + * except insertion, which is O(k), k being the number of priorities + * among the entries. + */ + +#ifndef _KERN_PLIST_H +#define _KERN_PLIST_H + +#include <stdbool.h> + +#include <kern/list.h> +#include <kern/macros.h> +#include <kern/plist_types.h> + +/* + * Priority list. + */ +struct plist; + +/* + * Priority list node. + */ +struct plist_node; + +/* + * Static priority list initializer. + */ +#define PLIST_INITIALIZER(plist) \ + { LIST_INITIALIZER((plist).list), LIST_INITIALIZER((plist).prio_list) } + +/* + * Initialize a priority list. + */ +static inline void +plist_init(struct plist *plist) +{ + list_init(&plist->list); + list_init(&plist->prio_list); +} + +/* + * Initialize a priority list node. + */ +static inline void +plist_node_init(struct plist_node *pnode, unsigned int priority) +{ + pnode->priority = priority; + list_node_init(&pnode->node); + list_node_init(&pnode->prio_node); +} + +/* + * Return the priority associated with a node. + */ +static inline unsigned int +plist_node_priority(const struct plist_node *pnode) +{ + return pnode->priority; +} + +/* + * Update the priority of an already initialized node. + */ +static inline void +plist_node_set_priority(struct plist_node *pnode, unsigned int priority) +{ + pnode->priority = priority; +} + +/* + * Return true if pnode is in no priority lists. + */ +static inline bool +plist_node_unlinked(const struct plist_node *pnode) +{ + return list_node_unlinked(&pnode->node); +} + +/* + * Macro that evaluates to the address of the structure containing the + * given node based on the given type and member. + */ +#define plist_entry(pnode, type, member) structof(pnode, type, member) + +/* + * Return the first node of a priority list. + */ +static inline struct plist_node * +plist_first(const struct plist *plist) +{ + return list_first_entry(&plist->list, struct plist_node, node); +} + +/* + * Return the last node of a priority list. + */ +static inline struct plist_node * +plist_last(const struct plist *plist) +{ + return list_last_entry(&plist->list, struct plist_node, node); +} + +/* + * Return the node next to the given node. + */ +static inline struct plist_node * +plist_next(const struct plist_node *pnode) +{ + return (struct plist_node *)list_next_entry(pnode, node); +} + +/* + * Return the node previous to the given node. + */ +static inline struct plist_node * +plist_prev(const struct plist_node *pnode) +{ + return (struct plist_node *)list_prev_entry(pnode, node); +} + +/* + * Get the first entry of a priority list. + */ +#define plist_first_entry(plist, type, member) \ + plist_entry(plist_first(plist), type, member) + +/* + * Get the last entry of a priority list. + */ +#define plist_last_entry(plist, type, member) \ + plist_entry(plist_last(plist), type, member) + +/* + * Get the entry next to the given entry. + */ +#define plist_next_entry(entry, member) \ + plist_entry(plist_next(&(entry)->member), typeof(*(entry)), member) + +/* + * Get the entry previous to the given entry. + */ +#define plist_prev_entry(entry, member) \ + plist_entry(plist_prev(&(entry)->member), typeof(*(entry)), member) + +/* + * Return true if node is after the last or before the first node of + * a priority list. + */ +static inline bool +plist_end(const struct plist *plist, const struct plist_node *pnode) +{ + return list_end(&plist->list, &pnode->node); +} + +/* + * Return true if plist is empty. + */ +static inline bool +plist_empty(const struct plist *plist) +{ + return list_empty(&plist->list); +} + +/* + * Return true if plist contains exactly one node. + */ +static inline bool +plist_singular(const struct plist *plist) +{ + return list_singular(&plist->list); +} + +/* + * Add a node to a priority list. + * + * If the priority list already contains nodes with the same priority + * as the given node, it is inserted before them. + * + * The node must be initialized before calling this function. + */ +void plist_add(struct plist *plist, struct plist_node *pnode); + +/* + * Remove a node from a priority list. + * + * After completion, the node is stale. + */ +void plist_remove(struct plist *plist, struct plist_node *pnode); + +/* + * Forge a loop to process all nodes of a priority list. + * + * The node must not be altered during the loop. + */ +#define plist_for_each(plist, pnode) \ +for (pnode = plist_first(plist); \ + !plist_end(plist, pnode); \ + pnode = plist_next(pnode)) + +/* + * Forge a loop to process all nodes of a priority list. + */ +#define plist_for_each_safe(plist, pnode, tmp) \ +for (pnode = plist_first(plist), tmp = plist_next(pnode); \ + !plist_end(plist, pnode); \ + pnode = tmp, tmp = plist_next(pnode)) + +/* + * Version of plist_for_each() that processes nodes backward. + */ +#define plist_for_each_reverse(plist, pnode) \ +for (pnode = plist_last(plist); \ + !plist_end(plist, pnode); \ + pnode = plist_prev(pnode)) + +/* + * Version of plist_for_each_safe() that processes nodes backward. + */ +#define plist_for_each_reverse_safe(plist, pnode, tmp) \ +for (pnode = plist_last(plist), tmp = plist_prev(pnode); \ + !plist_end(plist, pnode); \ + pnode = tmp, tmp = plist_prev(pnode)) + +/* + * Forge a loop to process all entries of a priority list. + * + * The entry node must not be altered during the loop. + */ +#define plist_for_each_entry(plist, entry, member) \ + list_for_each_entry(&(plist)->list, entry, member.node) + +/* + * Forge a loop to process all entries of a priority list. + */ +#define plist_for_each_entry_safe(plist, entry, tmp, member) \ + list_for_each_entry_safe(&(plist)->list, entry, tmp, member.node) + +/* + * Version of plist_for_each_entry() that processes entries backward. + */ +#define plist_for_each_entry_reverse(plist, entry, member) \ + list_for_each_entry_reverse(&(plist)->list, entry, member.node) + +/* + * Version of plist_for_each_entry_safe() that processes entries backward. + */ +#define plist_for_each_entry_reverse_safe(plist, entry, tmp, member) \ + list_for_each_entry_reverse_safe(&(plist)->list, entry, tmp, member.node) + +#endif /* _KERN_PLIST_H */ diff --git a/kern/plist_types.h b/kern/plist_types.h new file mode 100644 index 00000000..653d63f2 --- /dev/null +++ b/kern/plist_types.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Isolated type definition used to avoid inclusion circular dependencies. + */ + +#ifndef _KERN_PLIST_TYPES_H +#define _KERN_PLIST_TYPES_H + +#include <kern/list_types.h> + +/* + * The list member is used as the underlying regular linked list and + * contains all entries, sorted by priority in ascending order. The + * prio_list member contains exactly one entry for each priority + * present, also sorted by priority in ascending order. An entry + * on prio_list is the first entry in list for that priority. + * Here is a representation of a possible priority list : + * + * list--|1|--|3|--|3|--|3|--|4|--|6|--|6|--|8| + * | | | | | | | | | + * prio_list--+ +--+ +------------+ +--+ +-------+ + * + */ +struct plist { + struct list list; + struct list prio_list; +}; + +struct plist_node { + unsigned int priority; + struct list node; + struct list prio_node; +}; + +#endif /* _KERN_PLIST_TYPES_H */ diff --git a/kern/rtmutex.c b/kern/rtmutex.c new file mode 100644 index 00000000..f79f8a88 --- /dev/null +++ b/kern/rtmutex.c @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <stdint.h> + +#include <kern/assert.h> +#include <kern/rtmutex.h> +#include <kern/rtmutex_i.h> +#include <kern/rtmutex_types.h> +#include <kern/thread.h> +#include <kern/turnstile.h> +#include <machine/atomic.h> + +static void +rtmutex_set_contended(struct rtmutex *rtmutex) +{ + uintptr_t owner, prev_owner; + + do { + owner = rtmutex->owner; + prev_owner = atomic_cas_uintptr(&rtmutex->owner, owner, + owner | RTMUTEX_CONTENDED); + } while (prev_owner != owner); +} + +void +rtmutex_lock_slow(struct rtmutex *rtmutex) +{ + struct turnstile *turnstile; + uintptr_t owner, prev_owner; + struct thread *thread; + uintptr_t bits; + + owner = (uintptr_t)thread_self(); + + turnstile = turnstile_lend(rtmutex); + + rtmutex_set_contended(rtmutex); + + bits = RTMUTEX_CONTENDED; + + for (;;) { + prev_owner = atomic_cas_uintptr(&rtmutex->owner, bits, owner | bits); + assert((prev_owner & bits) == bits); + + if (prev_owner == bits) { + break; + } + + thread = (struct thread *)(prev_owner & RTMUTEX_OWNER_MASK); + turnstile_wait(turnstile, "rtmutex", thread); + bits |= RTMUTEX_FORCE_WAIT; + } + + turnstile_own(turnstile); + + if (turnstile_empty(turnstile)) { + prev_owner = atomic_swap_uintptr(&rtmutex->owner, owner); + assert(prev_owner == (owner | bits)); + } + + turnstile_return(turnstile); + + /* + * A lock owner should never perform priority propagation on itself, + * because this process is done using its own priority, potentially + * introducing unbounded priority inversion. + * Instead, let new waiters do it, using their own priority. + */ +} + +void +rtmutex_unlock_slow(struct rtmutex *rtmutex) +{ + struct turnstile *turnstile; + uintptr_t owner, prev_owner; + + owner = (uintptr_t)thread_self(); + + turnstile = turnstile_acquire(rtmutex); + assert(turnstile != NULL); + + prev_owner = atomic_swap_uintptr(&rtmutex->owner, + RTMUTEX_FORCE_WAIT | RTMUTEX_CONTENDED); + assert((prev_owner & RTMUTEX_OWNER_MASK) == owner); + + turnstile_disown(turnstile); + turnstile_signal(turnstile); + + turnstile_release(turnstile); + + thread_propagate_priority(); +} diff --git a/kern/rtmutex.h b/kern/rtmutex.h new file mode 100644 index 00000000..f7cee716 --- /dev/null +++ b/kern/rtmutex.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Real-time mutual exclusion locks. + * + * A real-time mutex is similar to a regular mutex, except priority + * inheritance is unconditionnally enabled. + */ + +#ifndef _KERN_RTMUTEX_H +#define _KERN_RTMUTEX_H + +#include <stdint.h> + +#include <kern/assert.h> +#include <kern/error.h> +#include <kern/rtmutex_i.h> +#include <kern/rtmutex_types.h> + +struct rtmutex; + +#define rtmutex_assert_locked(rtmutex) assert((rtmutex)->owner != 0) + +/* + * Initialize a real-time mutex. + */ +static inline void +rtmutex_init(struct rtmutex *rtmutex) +{ + rtmutex->owner = 0; +} + +/* + * Attempt to lock the given real-time mutex. + * + * This function may not sleep. + * + * Return 0 on success, ERROR_BUSY if the mutex is already locked. + */ +static inline int +rtmutex_trylock(struct rtmutex *rtmutex) +{ + uintptr_t prev_owner; + + prev_owner = rtmutex_tryacquire(rtmutex); + + if (prev_owner == 0) { + return 0; + } + + return ERROR_BUSY; +} + +/* + * Lock a real-time mutex. + * + * If the mutex is already locked, the calling thread sleeps until the + * mutex is unlocked, and its priority is propagated as needed to prevent + * unbounded priority inversion. + * + * A mutex can only be locked once. + */ +static inline void +rtmutex_lock(struct rtmutex *rtmutex) +{ + uintptr_t prev_owner; + + prev_owner = rtmutex_tryacquire(rtmutex); + + if (prev_owner == 0) { + return; + } + + rtmutex_lock_slow(rtmutex); +} + +/* + * Unlock a real-time mutex. + * + * The mutex must be locked, and must have been locked by the calling + * thread. + */ +static inline void +rtmutex_unlock(struct rtmutex *rtmutex) +{ + uintptr_t prev_owner; + + prev_owner = rtmutex_tryrelease(rtmutex); + + if (!(prev_owner & RTMUTEX_CONTENDED)) { + return; + } + + rtmutex_unlock_slow(rtmutex); +} + +#endif /* _KERN_RTMUTEX_H */ diff --git a/kern/rtmutex_i.h b/kern/rtmutex_i.h new file mode 100644 index 00000000..b99b5bd0 --- /dev/null +++ b/kern/rtmutex_i.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _KERN_RTMUTEX_I_H +#define _KERN_RTMUTEX_I_H + +#include <stdbool.h> +#include <stdint.h> + +#include <kern/assert.h> +#include <kern/rtmutex_types.h> +#include <kern/thread.h> +#include <machine/atomic.h> + +/* + * Real-time mutex flags. + * + * The "contended" flag indicates that threads are waiting for the mutex + * to be unlocked. It forces threads trying to lock the mutex as well as + * the owner to take the slow path. + * + * The "force-wait" flag prevents "stealing" a mutex. When a contended + * mutex is unlocked, a thread may concurrently try to lock it. Without + * this flag, it may succeed, and in doing so, it would prevent a + * potentially higher priority thread from locking the mutex. The flag + * forces all threads to not only take the slow path, but to also call + * the turnstile wait function so that only the highest priority thread + * may lock the mutex. + */ +#define RTMUTEX_CONTENDED 0x1 +#define RTMUTEX_FORCE_WAIT 0x2 + +#define RTMUTEX_OWNER_MASK (~((uintptr_t)(RTMUTEX_FORCE_WAIT \ + | RTMUTEX_CONTENDED))) + +#define rtmutex_assert_owner_aligned(owner) \ + assert(((owner) & ~RTMUTEX_OWNER_MASK) == 0) + +static inline uintptr_t +rtmutex_tryacquire(struct rtmutex *rtmutex) +{ + uintptr_t owner; + + owner = (uintptr_t)thread_self(); + rtmutex_assert_owner_aligned(owner); + return atomic_cas_uintptr(&rtmutex->owner, 0, owner); +} + +static inline uintptr_t +rtmutex_tryrelease(struct rtmutex *rtmutex) +{ + uintptr_t owner, prev_owner; + + owner = (uintptr_t)thread_self(); + rtmutex_assert_owner_aligned(owner); + prev_owner = atomic_cas_uintptr(&rtmutex->owner, owner, 0); + assert((prev_owner & RTMUTEX_OWNER_MASK) == owner); + return prev_owner; +} + +void rtmutex_lock_slow(struct rtmutex *rtmutex); + +void rtmutex_unlock_slow(struct rtmutex *rtmutex); + +#endif /* _KERN_RTMUTEX_I_H */ diff --git a/kern/rtmutex_types.h b/kern/rtmutex_types.h new file mode 100644 index 00000000..c03e0484 --- /dev/null +++ b/kern/rtmutex_types.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Isolated type definition used to avoid inclusion circular dependencies. + */ + +#ifndef _KERN_RTMUTEX_TYPES_H +#define _KERN_RTMUTEX_TYPES_H + +#include <stdint.h> + +struct rtmutex { + uintptr_t owner; +}; + +#endif /* _KERN_RTMUTEX_TYPES_H */ diff --git a/kern/sleepq.c b/kern/sleepq.c new file mode 100644 index 00000000..0c41caef --- /dev/null +++ b/kern/sleepq.c @@ -0,0 +1,382 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * TODO Analyse hash parameters. + */ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include <kern/assert.h> +#include <kern/init.h> +#include <kern/kmem.h> +#include <kern/list.h> +#include <kern/macros.h> +#include <kern/param.h> +#include <kern/sleepq.h> +#include <kern/spinlock.h> +#include <kern/thread.h> + +struct sleepq_bucket { + struct spinlock lock; + struct list list; +} __aligned(CPU_L1_SIZE); + +struct sleepq { + struct sleepq_bucket *bucket; + struct list node; + const void *sync_obj; + struct list waiters; + struct sleepq *next_free; +}; + +struct sleepq_waiter { + struct list node; + struct thread *thread; +}; + +#define SLEEPQ_HTABLE_SIZE 128 +#define SLEEPQ_COND_HTABLE_SIZE 64 + +#if !ISP2(SLEEPQ_HTABLE_SIZE) || !ISP2(SLEEPQ_COND_HTABLE_SIZE) +#error hash table size must be a power of two +#endif /* !ISP2(SLEEPQ_HTABLE_SIZE) */ + +#define SLEEPQ_HTABLE_MASK (SLEEPQ_HTABLE_SIZE - 1) +#define SLEEPQ_COND_HTABLE_MASK (SLEEPQ_COND_HTABLE_SIZE - 1) + +static struct sleepq_bucket sleepq_htable[SLEEPQ_HTABLE_SIZE]; +static struct sleepq_bucket sleepq_cond_htable[SLEEPQ_COND_HTABLE_SIZE]; + +static struct kmem_cache sleepq_cache; + +static uintptr_t +sleepq_hash(const void *addr) +{ + return ((uintptr_t)addr >> 8) ^ (uintptr_t)addr; +} + +static void +sleepq_waiter_init(struct sleepq_waiter *waiter, struct thread *thread) +{ + waiter->thread = thread; +} + +static void +sleepq_waiter_wakeup(struct sleepq_waiter *waiter) +{ + thread_wakeup(waiter->thread); +} + +static void +sleepq_assert_init_state(const struct sleepq *sleepq) +{ + assert(sleepq->bucket == NULL); + assert(sleepq->sync_obj == NULL); + assert(list_empty(&sleepq->waiters)); + assert(sleepq->next_free == NULL); +} + +static void +sleepq_use(struct sleepq *sleepq, const void *sync_obj) +{ + assert(sleepq->sync_obj == NULL); + sleepq->sync_obj = sync_obj; +} + +static void +sleepq_unuse(struct sleepq *sleepq) +{ + assert(sleepq->sync_obj != NULL); + sleepq->sync_obj = NULL; +} + +static bool +sleepq_in_use(const struct sleepq *sleepq) +{ + return sleepq->sync_obj != NULL; +} + +static bool +sleepq_in_use_by(const struct sleepq *sleepq, const void *sync_obj) +{ + return sleepq->sync_obj == sync_obj; +} + +static void +sleepq_bucket_init(struct sleepq_bucket *bucket) +{ + spinlock_init(&bucket->lock); + list_init(&bucket->list); +} + +static struct sleepq_bucket * +sleepq_bucket_get_cond(const void *sync_obj) +{ + uintptr_t index; + + index = sleepq_hash(sync_obj) & SLEEPQ_COND_HTABLE_MASK; + assert(index < ARRAY_SIZE(sleepq_cond_htable)); + return &sleepq_cond_htable[index]; +} + +static struct sleepq_bucket * +sleepq_bucket_get(const void *sync_obj, bool condition) +{ + uintptr_t index; + + if (condition) { + return sleepq_bucket_get_cond(sync_obj); + } + + index = sleepq_hash(sync_obj) & SLEEPQ_HTABLE_MASK; + assert(index < ARRAY_SIZE(sleepq_htable)); + return &sleepq_htable[index]; +} + +static void +sleepq_bucket_add(struct sleepq_bucket *bucket, struct sleepq *sleepq) +{ + assert(sleepq->bucket == NULL); + sleepq->bucket = bucket; + list_insert_tail(&bucket->list, &sleepq->node); +} + +static void +sleepq_bucket_remove(struct sleepq_bucket *bucket, struct sleepq *sleepq) +{ + assert(sleepq->bucket == bucket); + sleepq->bucket = NULL; + list_remove(&sleepq->node); +} + +static struct sleepq * +sleepq_bucket_lookup(const struct sleepq_bucket *bucket, const void *sync_obj) +{ + struct sleepq *sleepq; + + list_for_each_entry(&bucket->list, sleepq, node) { + if (sleepq_in_use_by(sleepq, sync_obj)) { + assert(sleepq->bucket == bucket); + return sleepq; + } + } + + return NULL; +} + +static void +sleepq_ctor(void *ptr) +{ + struct sleepq *sleepq; + + sleepq = ptr; + sleepq->bucket = NULL; + sleepq->sync_obj = NULL; + list_init(&sleepq->waiters); + sleepq->next_free = NULL; +} + +void __init +sleepq_setup(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(sleepq_htable); i++) { + sleepq_bucket_init(&sleepq_htable[i]); + } + + for (i = 0; i < ARRAY_SIZE(sleepq_cond_htable); i++) { + sleepq_bucket_init(&sleepq_cond_htable[i]); + } + + kmem_cache_init(&sleepq_cache, "sleepq", sizeof(struct sleepq), + CPU_L1_SIZE, sleepq_ctor, 0); +} + +struct sleepq * +sleepq_create(void) +{ + struct sleepq *sleepq; + + sleepq = kmem_cache_alloc(&sleepq_cache); + + if (sleepq == NULL) { + return NULL; + } + + sleepq_assert_init_state(sleepq); + return sleepq; +} + +void +sleepq_destroy(struct sleepq *sleepq) +{ + sleepq_assert_init_state(sleepq); + kmem_cache_free(&sleepq_cache, sleepq); +} + +struct sleepq * +sleepq_acquire(const void *sync_obj, bool condition) +{ + struct sleepq_bucket *bucket; + struct sleepq *sleepq; + + assert(sync_obj != NULL); + + bucket = sleepq_bucket_get(sync_obj, condition); + + spinlock_lock(&bucket->lock); + + sleepq = sleepq_bucket_lookup(bucket, sync_obj); + + if (sleepq == NULL) { + spinlock_unlock(&bucket->lock); + return NULL; + } + + return sleepq; +} + +void +sleepq_release(struct sleepq *sleepq) +{ + spinlock_unlock(&sleepq->bucket->lock); +} + +static void +sleepq_push_free(struct sleepq *sleepq, struct sleepq *free_sleepq) +{ + assert(free_sleepq->next_free == NULL); + free_sleepq->next_free = sleepq->next_free; + sleepq->next_free = free_sleepq; +} + +static struct sleepq * +sleepq_pop_free(struct sleepq *sleepq) +{ + struct sleepq *free_sleepq; + + free_sleepq = sleepq->next_free; + + if (free_sleepq == NULL) { + return NULL; + } + + sleepq->next_free = free_sleepq->next_free; + free_sleepq->next_free = NULL; + return free_sleepq; +} + +struct sleepq * +sleepq_lend(const void *sync_obj, bool condition) +{ + struct sleepq_bucket *bucket; + struct sleepq *sleepq, *prev; + + assert(sync_obj != NULL); + + sleepq = thread_sleepq_lend(); + sleepq_assert_init_state(sleepq); + + bucket = sleepq_bucket_get(sync_obj, condition); + + spinlock_lock(&bucket->lock); + + prev = sleepq_bucket_lookup(bucket, sync_obj); + + if (prev == NULL) { + sleepq_use(sleepq, sync_obj); + sleepq_bucket_add(bucket, sleepq); + } else { + sleepq_push_free(prev, sleepq); + sleepq = prev; + } + + return sleepq; +} + +void +sleepq_return(struct sleepq *sleepq) +{ + struct sleepq_bucket *bucket; + struct sleepq *free_sleepq; + + assert(sleepq_in_use(sleepq)); + + bucket = sleepq->bucket; + free_sleepq = sleepq_pop_free(sleepq); + + if (free_sleepq == NULL) { + sleepq_bucket_remove(bucket, sleepq); + sleepq_unuse(sleepq); + free_sleepq = sleepq; + } + + spinlock_unlock(&bucket->lock); + + sleepq_assert_init_state(free_sleepq); + thread_sleepq_return(free_sleepq); +} + +static void +sleepq_add_waiter(struct sleepq *sleepq, struct sleepq_waiter *waiter) +{ + list_insert_head(&sleepq->waiters, &waiter->node); +} + +static void +sleepq_remove_waiter(struct sleepq *sleepq, struct sleepq_waiter *waiter) +{ + (void)sleepq; + list_remove(&waiter->node); +} + +bool +sleepq_empty(const struct sleepq *sleepq) +{ + return list_empty(&sleepq->waiters); +} + +void +sleepq_wait(struct sleepq *sleepq, const char *wchan) +{ + struct sleepq_waiter waiter; + struct thread *thread; + + thread = thread_self(); + sleepq_waiter_init(&waiter, thread); + sleepq_add_waiter(sleepq, &waiter); + + thread_sleep(&sleepq->bucket->lock, sleepq->sync_obj, wchan); + + sleepq_remove_waiter(sleepq, &waiter); +} + +void +sleepq_signal(struct sleepq *sleepq) +{ + struct sleepq_waiter *waiter; + + if (sleepq_empty(sleepq)) { + return; + } + + waiter = list_last_entry(&sleepq->waiters, struct sleepq_waiter, node); + sleepq_waiter_wakeup(waiter); +} diff --git a/kern/sleepq.h b/kern/sleepq.h new file mode 100644 index 00000000..1ba4a319 --- /dev/null +++ b/kern/sleepq.h @@ -0,0 +1,126 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Generic sleep queues. + * + * Sleep queues are used to build sleeping synchronization primitives + * such as mutexes and condition variables. + * + * Although the sleep queues are mostly generic, this implementation + * relies on knowing whether a synchronization object is a condition + * variable or not, because waiting on a condition variable unlocks + * the associated mutex, at which point two sleep queues are locked. + * Handling condition variable sleep queues slightly differently + * allows preventing deadlocks while keeping overall complexity low. + * + * In addition, despite being used to implement condition variables, + * this implementation doesn't provide a broadcast call. The rationale + * is to force users to implement "chained waking" in order to avoid + * the thundering herd effect. + */ + +#ifndef _KERN_SLEEPQ_H +#define _KERN_SLEEPQ_H + +#include <stdbool.h> + +struct sleepq; + +/* + * Initialize the sleepq module. + */ +void sleepq_setup(void); + +/* + * Create/destroy a sleep queue. + */ +struct sleepq * sleepq_create(void); +void sleepq_destroy(struct sleepq *sleepq); + +/* + * Acquire/release a sleep queue. + * + * Acquiring a sleep queue serializes all access and disables preemption. + * + * The condition argument must be true if the synchronization object + * is a condition variable. + */ +struct sleepq * sleepq_acquire(const void *sync_obj, bool condition); +void sleepq_release(struct sleepq *sleepq); + +/* + * Lend/return a sleep queue. + * + * A thread lends its private sleep queue to the sleepq module in + * order to prepare its sleep. The sleep queue obtained on lending + * is either the thread's queue, or an already existing queue for + * this synchronization object if another thread is waiting. + * + * When multiple threads are waiting on the same queue, the extra + * queues lent are kept in an internal free list, used when threads + * are awaken to return a queue to them. + * + * Note that the sleep queue returned may not be the one lent. + * + * The sleep queue obtained when lending is automatically acquired. + * + * The condition argument must be true if the synchronization object + * is a condition variable. + */ +struct sleepq * sleepq_lend(const void *sync_obj, bool condition); +void sleepq_return(struct sleepq *sleepq); + +/* + * Return true if the given sleep queue has no waiters. + * + * The sleep queue must be acquired when calling this function. + */ +bool sleepq_empty(const struct sleepq *sleepq); + +/* + * Wait for a wake up on the given sleep queue. + * + * The sleep queue must be lent when calling this function. It is + * released and later reacquired before returning from this function. + * + * The calling thread is considered a waiter as long as it didn't + * reacquire the sleep queue. This means that signalling a sleep queue + * has no visible effect on the number of waiters until the queue is + * released, e.g. if a single thread is waiting and another signals + * the queue, the queue is not immediately considered empty. + * + * Threads are queued in FIFO order. + */ +void sleepq_wait(struct sleepq *sleepq, const char *wchan); + +/* + * Wake up a thread waiting on the given sleep queue, if any. + * + * The sleep queue must be acquired when calling this function. + * + * Since a sleep queue must be lent (and in turn is automatically + * acquired) when waiting, and acquired in order to signal it, + * wake-ups are serialized and cannot be missed. + * + * Threads are queued in FIFO order, which means signalling a sleep + * queue multiple times always awakens the same thread, regardless + * of new waiters, as long as that first thread didn't reacquire the + * sleep queue. + */ +void sleepq_signal(struct sleepq *sleepq); + +#endif /* _KERN_SLEEPQ_H */ diff --git a/kern/task.c b/kern/task.c index afc2407a..c7caeb48 100644 --- a/kern/task.c +++ b/kern/task.c @@ -143,15 +143,22 @@ task_info(struct task *task) printk("task: name: %s, threads:\n", task->name); + /* + * Don't grab any lock when accessing threads, so that the function + * can be used to debug in the middle of most critical sections. + * Threads are only destroyed after being removed from their task + * so holding the task lock is enough to guarantee existence. + */ list_for_each_entry(&task->threads, thread, task_node) { - printk("task: " TASK_INFO_ADDR_FMT " %c %8s:" TASK_INFO_ADDR_FMT - " %.2s:%02hu %s\n", + printk(TASK_INFO_ADDR_FMT " %c %8s:" TASK_INFO_ADDR_FMT + " %.2s:%02hu %02u %s\n", (unsigned long)thread, thread_state_to_chr(thread), thread_wchan_desc(thread), (unsigned long)thread_wchan_addr(thread), - thread_sched_class_to_str(thread_sched_class(thread)), - thread_priority(thread), + thread_sched_class_to_str(thread_user_sched_class(thread)), + thread_user_priority(thread), + thread_real_global_priority(thread), thread->name); } diff --git a/kern/thread.c b/kern/thread.c index 8d7b3a75..14070d92 100644 --- a/kern/thread.c +++ b/kern/thread.c @@ -81,6 +81,7 @@ * weights in a smoother way than a raw scaling). */ +#include <stdbool.h> #include <stddef.h> #include <string.h> @@ -98,11 +99,13 @@ #include <kern/panic.h> #include <kern/param.h> #include <kern/percpu.h> +#include <kern/sleepq.h> #include <kern/spinlock.h> #include <kern/sprintf.h> #include <kern/sref.h> #include <kern/task.h> #include <kern/thread.h> +#include <kern/turnstile.h> #include <kern/work.h> #include <machine/atomic.h> #include <machine/cpu.h> @@ -259,13 +262,13 @@ struct thread_runq { * Operations of a scheduling class. */ struct thread_sched_ops { - void (*init_sched)(struct thread *thread, unsigned short priority); struct thread_runq * (*select_runq)(struct thread *thread); void (*add)(struct thread_runq *runq, struct thread *thread); void (*remove)(struct thread_runq *runq, struct thread *thread); void (*put_prev)(struct thread_runq *runq, struct thread *thread); struct thread * (*get_next)(struct thread_runq *runq); - void (*set_priority)(struct thread *thread, unsigned short priority); + void (*reset_priority)(struct thread *thread, unsigned short priority); + void (*update_priority)(struct thread *thread, unsigned short priority); unsigned int (*get_global_priority)(unsigned short priority); void (*set_next)(struct thread_runq *runq, struct thread *thread); void (*tick)(struct thread_runq *runq, struct thread *thread); @@ -341,6 +344,13 @@ struct thread_zombie { struct thread *thread; }; +static unsigned char +thread_policy_to_class(unsigned char policy) +{ + assert(policy < ARRAY_SIZE(thread_policy_table)); + return thread_policy_table[policy]; +} + static void thread_set_wchan(struct thread *thread, const void *wchan_addr, const char *wchan_desc) @@ -359,15 +369,24 @@ thread_clear_wchan(struct thread *thread) } static const struct thread_sched_ops * -thread_get_sched_ops(const struct thread *thread) +thread_get_sched_ops(unsigned char sched_class) { - unsigned char sched_class; - - sched_class = thread_sched_class(thread); assert(sched_class < ARRAY_SIZE(thread_sched_ops)); return &thread_sched_ops[sched_class]; } +static const struct thread_sched_ops * +thread_get_user_sched_ops(const struct thread *thread) +{ + return thread_get_sched_ops(thread_user_sched_class(thread)); +} + +static const struct thread_sched_ops * +thread_get_real_sched_ops(const struct thread *thread) +{ + return thread_get_sched_ops(thread_real_sched_class(thread)); +} + static void __init thread_runq_init_rt(struct thread_runq *runq) { @@ -457,8 +476,9 @@ thread_runq_add(struct thread_runq *runq, struct thread *thread) assert(!cpu_intr_enabled()); spinlock_assert_locked(&runq->lock); + assert(!thread->in_runq); - ops = thread_get_sched_ops(thread); + ops = thread_get_real_sched_ops(thread); ops->add(runq, thread); if (runq->nr_threads == 0) { @@ -467,11 +487,13 @@ thread_runq_add(struct thread_runq *runq, struct thread *thread) runq->nr_threads++; - if (thread_sched_class(thread) < thread_sched_class(runq->current)) { + if (thread_real_sched_class(thread) + < thread_real_sched_class(runq->current)) { thread_set_flag(runq->current, THREAD_YIELD); } thread->runq = runq; + thread->in_runq = true; } static void @@ -481,6 +503,7 @@ thread_runq_remove(struct thread_runq *runq, struct thread *thread) assert(!cpu_intr_enabled()); spinlock_assert_locked(&runq->lock); + assert(thread->in_runq); runq->nr_threads--; @@ -488,8 +511,10 @@ thread_runq_remove(struct thread_runq *runq, struct thread *thread) cpumap_set_atomic(&thread_idle_runqs, thread_runq_cpu(runq)); } - ops = thread_get_sched_ops(thread); + ops = thread_get_real_sched_ops(thread); ops->remove(runq, thread); + + thread->in_runq = false; } static void @@ -500,7 +525,7 @@ thread_runq_put_prev(struct thread_runq *runq, struct thread *thread) assert(!cpu_intr_enabled()); spinlock_assert_locked(&runq->lock); - ops = thread_get_sched_ops(thread); + ops = thread_get_real_sched_ops(thread); if (ops->put_prev != NULL) { ops->put_prev(runq, thread); @@ -534,7 +559,7 @@ thread_runq_set_next(struct thread_runq *runq, struct thread *thread) { const struct thread_sched_ops *ops; - ops = thread_get_sched_ops(thread); + ops = thread_get_real_sched_ops(thread); if (ops->set_next != NULL) { ops->set_next(runq, thread); @@ -656,13 +681,6 @@ thread_runq_double_lock(struct thread_runq *a, struct thread_runq *b) } } -static void -thread_sched_rt_init_sched(struct thread *thread, unsigned short priority) -{ - assert(priority <= THREAD_SCHED_RT_PRIO_MAX); - thread->rt_data.time_slice = THREAD_DEFAULT_RR_TIME_SLICE; -} - static struct thread_runq * thread_sched_rt_select_runq(struct thread *thread) { @@ -689,15 +707,17 @@ thread_sched_rt_add(struct thread_runq *runq, struct thread *thread) struct list *threads; rt_runq = &runq->rt_runq; - threads = &rt_runq->threads[thread_priority(thread)]; + threads = &rt_runq->threads[thread_real_priority(thread)]; list_insert_tail(threads, &thread->rt_data.node); if (list_singular(threads)) { - rt_runq->bitmap |= (1ULL << thread_priority(thread)); + rt_runq->bitmap |= (1ULL << thread_real_priority(thread)); } - if ((thread_sched_class(thread) == thread_sched_class(runq->current)) - && (thread_priority(thread) > thread_priority(runq->current))) { + if ((thread_real_sched_class(thread) + == thread_real_sched_class(runq->current)) + && (thread_real_priority(thread) + > thread_real_priority(runq->current))) { thread_set_flag(runq->current, THREAD_YIELD); } } @@ -709,11 +729,11 @@ thread_sched_rt_remove(struct thread_runq *runq, struct thread *thread) struct list *threads; rt_runq = &runq->rt_runq; - threads = &rt_runq->threads[thread_priority(thread)]; + threads = &rt_runq->threads[thread_real_priority(thread)]; list_remove(&thread->rt_data.node); if (list_empty(threads)) { - rt_runq->bitmap &= ~(1ULL << thread_priority(thread)); + rt_runq->bitmap &= ~(1ULL << thread_real_priority(thread)); } } @@ -745,6 +765,13 @@ thread_sched_rt_get_next(struct thread_runq *runq) return thread; } +static void +thread_sched_rt_reset_priority(struct thread *thread, unsigned short priority) +{ + assert(priority <= THREAD_SCHED_RT_PRIO_MAX); + thread->rt_data.time_slice = THREAD_DEFAULT_RR_TIME_SLICE; +} + static unsigned int thread_sched_rt_get_global_priority(unsigned short priority) { @@ -762,7 +789,7 @@ thread_sched_rt_tick(struct thread_runq *runq, struct thread *thread) { (void)runq; - if (thread_sched_policy(thread) != THREAD_SCHED_POLICY_RR) { + if (thread_real_sched_policy(thread) != THREAD_SCHED_POLICY_RR) { return; } @@ -782,16 +809,6 @@ thread_sched_fs_prio2weight(unsigned short priority) return ((priority + 1) * THREAD_FS_ROUND_SLICE_BASE); } -static void -thread_sched_fs_init_sched(struct thread *thread, unsigned short priority) -{ - assert(priority <= THREAD_SCHED_FS_PRIO_MAX); - thread->fs_data.fs_runq = NULL; - thread->fs_data.round = 0; - thread->fs_data.weight = thread_sched_fs_prio2weight(priority); - thread->fs_data.work = 0; -} - static struct thread_runq * thread_sched_fs_select_runq(struct thread *thread) { @@ -893,7 +910,7 @@ thread_sched_fs_enqueue(struct thread_fs_runq *fs_runq, unsigned long round, assert(thread->fs_data.fs_runq == NULL); assert(thread->fs_data.work <= thread->fs_data.weight); - group = &fs_runq->group_array[thread_priority(thread)]; + group = &fs_runq->group_array[thread_real_priority(thread)]; group_weight = group->weight + thread->fs_data.weight; total_weight = fs_runq->weight + thread->fs_data.weight; node = (group->weight == 0) @@ -969,7 +986,7 @@ thread_sched_fs_restart(struct thread_runq *runq) assert(node != NULL); fs_runq->current = list_entry(node, struct thread_fs_group, node); - if (thread_sched_class(runq->current) == THREAD_SCHED_CLASS_FS) { + if (thread_real_sched_class(runq->current) == THREAD_SCHED_CLASS_FS) { thread_set_flag(runq->current, THREAD_YIELD); } } @@ -1005,7 +1022,7 @@ thread_sched_fs_dequeue(struct thread *thread) assert(thread->fs_data.fs_runq != NULL); fs_runq = thread->fs_data.fs_runq; - group = &fs_runq->group_array[thread_priority(thread)]; + group = &fs_runq->group_array[thread_real_priority(thread)]; thread->fs_data.fs_runq = NULL; list_remove(&thread->fs_data.runq_node); @@ -1080,7 +1097,7 @@ thread_sched_fs_put_prev(struct thread_runq *runq, struct thread *thread) struct thread_fs_group *group; fs_runq = runq->fs_runq_active; - group = &fs_runq->group_array[thread_priority(thread)]; + group = &fs_runq->group_array[thread_real_priority(thread)]; list_insert_tail(&group->threads, &thread->fs_data.group_node); if (thread->fs_data.work >= thread->fs_data.weight) { @@ -1148,8 +1165,19 @@ thread_sched_fs_get_next(struct thread_runq *runq) } static void -thread_sched_fs_set_priority(struct thread *thread, unsigned short priority) +thread_sched_fs_reset_priority(struct thread *thread, unsigned short priority) { + assert(priority <= THREAD_SCHED_FS_PRIO_MAX); + thread->fs_data.fs_runq = NULL; + thread->fs_data.round = 0; + thread->fs_data.weight = thread_sched_fs_prio2weight(priority); + thread->fs_data.work = 0; +} + +static void +thread_sched_fs_update_priority(struct thread *thread, unsigned short priority) +{ + assert(priority <= THREAD_SCHED_FS_PRIO_MAX); thread->fs_data.weight = thread_sched_fs_prio2weight(priority); if (thread->fs_data.work >= thread->fs_data.weight) { @@ -1180,7 +1208,7 @@ thread_sched_fs_tick(struct thread_runq *runq, struct thread *thread) fs_runq = runq->fs_runq_active; fs_runq->work++; - group = &fs_runq->group_array[thread_priority(thread)]; + group = &fs_runq->group_array[thread_real_priority(thread)]; group->work++; thread_set_flag(thread, THREAD_YIELD); thread->fs_data.work++; @@ -1231,7 +1259,8 @@ thread_sched_fs_balance_eligible(struct thread_runq *runq, if ((nr_threads == 0) || ((nr_threads == 1) - && (thread_sched_class(runq->current) == THREAD_SCHED_CLASS_FS))) { + && (thread_real_sched_class(runq->current) + == THREAD_SCHED_CLASS_FS))) { return 0; } @@ -1521,39 +1550,40 @@ thread_sched_idle_get_global_priority(unsigned short priority) return THREAD_SCHED_GLOBAL_PRIO_IDLE; } -static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES] = { +static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES] + = { [THREAD_SCHED_CLASS_RT] = { - .init_sched = thread_sched_rt_init_sched, .select_runq = thread_sched_rt_select_runq, .add = thread_sched_rt_add, .remove = thread_sched_rt_remove, .put_prev = thread_sched_rt_put_prev, .get_next = thread_sched_rt_get_next, - .set_priority = NULL, + .reset_priority = thread_sched_rt_reset_priority, + .update_priority = NULL, .get_global_priority = thread_sched_rt_get_global_priority, .set_next = thread_sched_rt_set_next, .tick = thread_sched_rt_tick, }, [THREAD_SCHED_CLASS_FS] = { - .init_sched = thread_sched_fs_init_sched, .select_runq = thread_sched_fs_select_runq, .add = thread_sched_fs_add, .remove = thread_sched_fs_remove, .put_prev = thread_sched_fs_put_prev, .get_next = thread_sched_fs_get_next, - .set_priority = thread_sched_fs_set_priority, + .reset_priority = thread_sched_fs_reset_priority, + .update_priority = thread_sched_fs_update_priority, .get_global_priority = thread_sched_fs_get_global_priority, .set_next = thread_sched_fs_set_next, .tick = thread_sched_fs_tick, }, [THREAD_SCHED_CLASS_IDLE] = { - .init_sched = NULL, .select_runq = thread_sched_idle_select_runq, .add = thread_sched_idle_add, .remove = thread_sched_idle_remove, .put_prev = NULL, .get_next = thread_sched_idle_get_next, - .set_priority = NULL, + .reset_priority = NULL, + .update_priority = NULL, .get_global_priority = thread_sched_idle_get_global_priority, .set_next = NULL, .tick = NULL, @@ -1561,30 +1591,95 @@ static const struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES] = }; static void -thread_set_sched_policy(struct thread *thread, unsigned char sched_policy) +thread_set_user_sched_policy(struct thread *thread, unsigned char sched_policy) +{ + thread->user_sched_data.sched_policy = sched_policy; +} + +static void +thread_set_user_sched_class(struct thread *thread, unsigned char sched_class) +{ + thread->user_sched_data.sched_class = sched_class; +} + +static void +thread_set_user_priority(struct thread *thread, unsigned short priority) +{ + const struct thread_sched_ops *ops; + + ops = thread_get_user_sched_ops(thread); + + thread->user_sched_data.priority = priority; + thread->user_sched_data.global_priority + = ops->get_global_priority(priority); +} + +static void +thread_update_user_priority(struct thread *thread, unsigned short priority) +{ + thread_set_user_priority(thread, priority); +} + +static void +thread_set_real_sched_policy(struct thread *thread, unsigned char sched_policy) +{ + thread->real_sched_data.sched_policy = sched_policy; +} + +static void +thread_set_real_sched_class(struct thread *thread, unsigned char sched_class) { - thread->sched_data.sched_policy = sched_policy; + thread->real_sched_data.sched_class = sched_class; } static void -thread_set_sched_class(struct thread *thread, unsigned char sched_class) +thread_set_real_priority(struct thread *thread, unsigned short priority) { - thread->sched_data.sched_class = sched_class; + const struct thread_sched_ops *ops; + + ops = thread_get_real_sched_ops(thread); + + thread->real_sched_data.priority = priority; + thread->real_sched_data.global_priority + = ops->get_global_priority(priority); + + if (ops->reset_priority != NULL) { + ops->reset_priority(thread, priority); + } } static void -thread_set_priority(struct thread *thread, unsigned short priority) +thread_update_real_priority(struct thread *thread, unsigned short priority) { const struct thread_sched_ops *ops; - ops = thread_get_sched_ops(thread); + ops = thread_get_real_sched_ops(thread); + + thread->real_sched_data.priority = priority; + thread->real_sched_data.global_priority + = ops->get_global_priority(priority); - if (ops->set_priority != NULL) { - ops->set_priority(thread, priority); + if (ops->update_priority != NULL) { + ops->update_priority(thread, priority); } +} - thread->sched_data.priority = priority; - thread->sched_data.global_priority = ops->get_global_priority(priority); +static void +thread_reset_real_priority(struct thread *thread) +{ + const struct thread_sched_ops *ops; + struct thread_sched_data *user, *real; + + user = &thread->user_sched_data; + real = &thread->real_sched_data; + *real = *user; + thread->boosted = false; + + ops = thread_get_user_sched_ops(thread); + + if (ops->reset_priority != NULL) { + ops->reset_priority(thread, real->priority); + } } static void __init @@ -1600,9 +1695,10 @@ thread_bootstrap_common(unsigned int cpu) booter->flags = 0; booter->preempt = 1; cpumap_fill(&booter->cpumap); - thread_set_sched_policy(booter, THREAD_SCHED_POLICY_IDLE); - thread_set_sched_class(booter, THREAD_SCHED_CLASS_IDLE); - thread_set_priority(booter, 0); + thread_set_user_sched_policy(booter, THREAD_SCHED_POLICY_IDLE); + thread_set_user_sched_class(booter, THREAD_SCHED_CLASS_IDLE); + thread_set_user_priority(booter, 0); + thread_reset_real_priority(booter); memset(booter->tsd, 0, sizeof(booter->tsd)); booter->task = kernel_task; thread_runq_init(percpu_ptr(thread_runq, cpu), cpu, booter); @@ -1616,8 +1712,8 @@ thread_bootstrap(void) thread_fs_highest_round = THREAD_FS_INITIAL_ROUND; - thread_bootstrap_common(0); tcb_set_current(&thread_booters[0].tcb); + thread_bootstrap_common(0); } void __init @@ -1673,23 +1769,9 @@ thread_destroy_tsd(struct thread *thread) } } -static void -thread_init_sched(struct thread *thread, unsigned short priority) -{ - const struct thread_sched_ops *ops; - - ops = thread_get_sched_ops(thread); - - if (ops->init_sched != NULL) { - ops->init_sched(thread, priority); - } - - thread->sched_data.priority = priority; - thread->sched_data.global_priority = ops->get_global_priority(priority); -} - static int -thread_init(struct thread *thread, void *stack, const struct thread_attr *attr, +thread_init(struct thread *thread, void *stack, + const struct thread_attr *attr, void (*fn)(void *), void *arg) { struct thread *caller; @@ -1706,15 +1788,34 @@ thread_init(struct thread *thread, void *stack, const struct thread_attr *attr, thread->nr_refs = 1; thread->flags = 0; thread->runq = NULL; + thread->in_runq = false; thread_set_wchan(thread, thread, "init"); thread->state = THREAD_SLEEPING; + thread->priv_sleepq = sleepq_create(); + + if (thread->priv_sleepq == NULL) { + error = ERROR_NOMEM; + goto error_sleepq; + } + + thread->priv_turnstile = turnstile_create(); + + if (thread->priv_turnstile == NULL) { + error = ERROR_NOMEM; + goto error_turnstile; + } + + turnstile_td_init(&thread->turnstile_td); + thread->last_cond = NULL; + thread->propagate_priority = false; thread->preempt = THREAD_SUSPEND_PREEMPT_LEVEL; thread->pinned = 0; thread->llsync_read = 0; cpumap_copy(&thread->cpumap, cpumap); - thread_set_sched_policy(thread, attr->policy); - thread_set_sched_class(thread, thread_policy_table[attr->policy]); - thread_init_sched(thread, attr->priority); + thread_set_user_sched_policy(thread, attr->policy); + thread_set_user_sched_class(thread, thread_policy_to_class(attr->policy)); + thread_set_user_priority(thread, attr->priority); + thread_reset_real_priority(thread); memset(thread->tsd, 0, sizeof(thread->tsd)); mutex_init(&thread->join_lock); condition_init(&thread->join_cond); @@ -1732,15 +1833,19 @@ thread_init(struct thread *thread, void *stack, const struct thread_attr *attr, error = tcb_init(&thread->tcb, stack, thread_main); if (error) { - goto error_tsd; + goto error_tcb; } task_add_thread(task, thread); return 0; -error_tsd: +error_tcb: thread_destroy_tsd(thread); + turnstile_destroy(thread->priv_turnstile); +error_turnstile: + sleepq_destroy(thread->priv_sleepq); +error_sleepq: return error; } @@ -1782,8 +1887,12 @@ thread_destroy(struct thread *thread) thread_unlock_runq(runq, flags); } while (state != THREAD_DEAD); - thread_destroy_tsd(thread); + /* See task_info() */ task_remove_thread(thread->task, thread); + + thread_destroy_tsd(thread); + turnstile_destroy(thread->priv_turnstile); + sleepq_destroy(thread->priv_sleepq); kmem_cache_free(&thread_stack_cache, thread->stack); kmem_cache_free(&thread_cache, thread); } @@ -2208,7 +2317,7 @@ thread_wakeup(struct thread *thread) thread->state = THREAD_RUNNING; } else { /* - * If another wakeup was attempted right before this one, the thread + * If another wake-up was attempted right before this one, the thread * may currently be pushed on a remote run queue, and the run queue * being locked here is actually the previous one. The run queue * pointer may be modified concurrently, now being protected by the @@ -2233,7 +2342,7 @@ thread_wakeup(struct thread *thread) cpu_intr_save(&flags); if (!thread->pinned) { - runq = thread_sched_ops[thread_sched_class(thread)].select_runq(thread); + runq = thread_get_real_sched_ops(thread)->select_runq(thread); } else { runq = thread->runq; spinlock_lock(&runq->lock); @@ -2325,7 +2434,7 @@ thread_tick_intr(void) thread_balance_idle_tick(runq); } - ops = thread_get_sched_ops(thread); + ops = thread_get_real_sched_ops(thread); if (ops->tick != NULL) { ops->tick(runq, thread); @@ -2334,6 +2443,7 @@ thread_tick_intr(void) spinlock_unlock(&runq->lock); } +/* TODO Move outside */ char thread_state_to_chr(const struct thread *thread) { @@ -2349,6 +2459,7 @@ thread_state_to_chr(const struct thread *thread) } } +/* TODO Move outside */ const char * thread_sched_class_to_str(unsigned char sched_class) { @@ -2369,17 +2480,23 @@ thread_setscheduler(struct thread *thread, unsigned char policy, unsigned short priority) { struct thread_runq *runq; + struct turnstile_td *td; unsigned long flags; - bool current; + bool requeue, current, update; + + td = thread_turnstile_td(thread); + turnstile_td_lock(td); runq = thread_lock_runq(thread, &flags); - if ((thread_sched_policy(thread) == policy) - && (thread_priority(thread) == priority)) { + if ((thread_user_sched_policy(thread) == policy) + && (thread_user_priority(thread) == priority)) { goto out; } - if (thread->state != THREAD_RUNNING) { + requeue = thread->in_runq; + + if (!requeue) { current = false; } else { if (thread != runq->current) { @@ -2392,16 +2509,32 @@ thread_setscheduler(struct thread *thread, unsigned char policy, thread_runq_remove(runq, thread); } - if (thread_sched_policy(thread) == policy) { - thread_set_priority(thread, priority); + if (thread_user_sched_policy(thread) == policy) { + thread_update_user_priority(thread, priority); + update = true; } else { - thread_set_sched_policy(thread, policy); - assert(policy < ARRAY_SIZE(thread_policy_table)); - thread_set_sched_class(thread, thread_policy_table[policy]); - thread_init_sched(thread, priority); + thread_set_user_sched_policy(thread, policy); + thread_set_user_sched_class(thread, thread_policy_to_class(policy)); + thread_set_user_priority(thread, priority); + update = false; } - if (thread->state == THREAD_RUNNING) { + if (thread->boosted) { + if (thread_user_global_priority(thread) + >= thread_real_global_priority(thread)) { + thread_reset_real_priority(thread); + } + } else { + if (update) { + thread_update_real_priority(thread, priority); + } else { + thread_set_real_sched_policy(thread, policy); + thread_set_real_sched_class(thread, thread_policy_to_class(policy)); + thread_set_real_priority(thread, priority); + } + } + + if (requeue) { thread_runq_add(runq, thread); if (current) { @@ -2411,6 +2544,96 @@ thread_setscheduler(struct thread *thread, unsigned char policy, out: thread_unlock_runq(runq, flags); + turnstile_td_unlock(td); + + turnstile_td_propagate_priority(td); +} + +void +thread_pi_setscheduler(struct thread *thread, unsigned char policy, + unsigned short priority) +{ + const struct thread_sched_ops *ops; + struct thread_runq *runq; + struct turnstile_td *td; + unsigned int global_priority; + unsigned long flags; + bool requeue, current; + + td = thread_turnstile_td(thread); + turnstile_td_assert_lock(td); + + ops = thread_get_sched_ops(thread_policy_to_class(policy)); + global_priority = ops->get_global_priority(priority); + + runq = thread_lock_runq(thread, &flags); + + if ((thread_real_sched_policy(thread) == policy) + && (thread_real_priority(thread) == priority)) { + goto out; + } + + requeue = thread->in_runq; + + if (!requeue) { + current = false; + } else { + if (thread != runq->current) { + current = false; + } else { + thread_runq_put_prev(runq, thread); + current = true; + } + + thread_runq_remove(runq, thread); + } + + if (global_priority <= thread_user_global_priority(thread)) { + thread_reset_real_priority(thread); + } else { + if (thread_real_sched_policy(thread) == policy) { + thread_update_real_priority(thread, priority); + } else { + thread_set_real_sched_policy(thread, policy); + thread_set_real_sched_class(thread, thread_policy_to_class(policy)); + thread_set_real_priority(thread, priority); + } + + thread->boosted = true; + } + + if (requeue) { + thread_runq_add(runq, thread); + + if (current) { + thread_runq_set_next(runq, thread); + } + } + +out: + thread_unlock_runq(runq, flags); +} + +void +thread_propagate_priority(void) +{ + struct thread *thread; + + /* + * Although it's possible to propagate priority with preemption + * disabled, the operation can be too expensive to allow it. + */ + if (!thread_preempt_enabled()) { + thread_set_priority_propagation_needed(); + return; + } + + thread = thread_self(); + + /* Clear before propagation to avoid infinite recursion */ + thread->propagate_priority = false; + + turnstile_td_propagate_priority(thread_turnstile_td(thread)); } void diff --git a/kern/thread.h b/kern/thread.h index 21be2ab7..60c330b9 100644 --- a/kern/thread.h +++ b/kern/thread.h @@ -33,30 +33,24 @@ #ifndef _KERN_THREAD_H #define _KERN_THREAD_H +#include <stdbool.h> +#include <stddef.h> + #include <kern/assert.h> +#include <kern/condition.h> #include <kern/cpumap.h> #include <kern/macros.h> +#include <kern/spinlock_types.h> +#include <kern/turnstile_types.h> #include <machine/atomic.h> #include <machine/tcb.h> /* - * Forward declaration - */ -struct spinlock; - -/* * Thread structure. */ struct thread; /* - * Thread name buffer size. - */ -#define THREAD_NAME_SIZE 32 - -/* - * Common scheduling data. - * * The global priority of a thread is meant to be compared against * another global priority to determine which thread has higher priority. */ @@ -67,6 +61,11 @@ struct thread_sched_data { unsigned int global_priority; }; +/* + * Thread name buffer size. + */ +#define THREAD_NAME_SIZE 32 + #include <kern/thread_i.h> #define THREAD_KERNEL_PREFIX PACKAGE "_" @@ -199,8 +198,9 @@ void thread_join(struct thread *thread); * Make the current thread sleep while waiting for an event. * * The interlock is used to synchronize the thread state with respect to - * wakeups, i.e. a wakeup request sent by another thread will not be missed + * wake-ups, i.e. a wake-up request sent by another thread cannot be missed * if that thread is holding the interlock. + * * As a special exception, threads that use preemption as a synchronization * mechanism can ommit the interlock and pass a NULL pointer instead. * In any case, the preemption nesting level must strictly be one when calling @@ -210,9 +210,6 @@ void thread_join(struct thread *thread); * address should refer to a relevant synchronization object, normally * containing the interlock, but not necessarily. * - * This is a low level thread control primitive that should only be called by - * higher thread synchronization functions. - * * Implies a memory barrier. */ void thread_sleep(struct spinlock *interlock, const void *wchan_addr, @@ -222,9 +219,6 @@ void thread_sleep(struct spinlock *interlock, const void *wchan_addr, * Schedule a thread for execution on a processor. * * No action is performed if the target thread is already in the running state. - * - * This is a low level thread control primitive that should only be called by - * higher thread synchronization functions. */ void thread_wakeup(struct thread *thread); @@ -261,6 +255,15 @@ void thread_tick_intr(void); void thread_setscheduler(struct thread *thread, unsigned char policy, unsigned short priority); +/* + * Variant used for priority inheritance. + * + * The caller must hold the turnstile thread data lock and no turnstile + * locks when calling this function. + */ +void thread_pi_setscheduler(struct thread *thread, unsigned char policy, + unsigned short priority); + static inline void thread_ref(struct thread *thread) { @@ -301,33 +304,72 @@ thread_wchan_desc(const struct thread *thread) char thread_state_to_chr(const struct thread *thread); static inline const struct thread_sched_data * -thread_get_sched_data(const struct thread *thread) +thread_get_user_sched_data(const struct thread *thread) { - return &thread->sched_data; + return &thread->user_sched_data; } +static inline const struct thread_sched_data * +thread_get_real_sched_data(const struct thread *thread) +{ + return &thread->real_sched_data; +} + +/* + * If the caller requires the scheduling data to be stable, it + * must lock one of the following objects : + * - the containing run queue + * - the per-thread turnstile data (turnstile_td) + * + * Both are locked when scheduling data are updated. + */ + static inline unsigned char -thread_sched_policy(const struct thread *thread) +thread_user_sched_policy(const struct thread *thread) { - return thread_get_sched_data(thread)->sched_policy; + return thread_get_user_sched_data(thread)->sched_policy; } static inline unsigned char -thread_sched_class(const struct thread *thread) +thread_user_sched_class(const struct thread *thread) { - return thread_get_sched_data(thread)->sched_class; + return thread_get_user_sched_data(thread)->sched_class; } static inline unsigned short -thread_priority(const struct thread *thread) +thread_user_priority(const struct thread *thread) { - return thread_get_sched_data(thread)->priority; + return thread_get_user_sched_data(thread)->priority; } static inline unsigned int -thread_global_priority(const struct thread *thread) +thread_user_global_priority(const struct thread *thread) +{ + return thread_get_user_sched_data(thread)->global_priority; +} + +static inline unsigned char +thread_real_sched_policy(const struct thread *thread) +{ + return thread_get_real_sched_data(thread)->sched_policy; +} + +static inline unsigned char +thread_real_sched_class(const struct thread *thread) +{ + return thread_get_real_sched_data(thread)->sched_class; +} + +static inline unsigned short +thread_real_priority(const struct thread *thread) { - return thread_get_sched_data(thread)->global_priority; + return thread_get_real_sched_data(thread)->priority; +} + +static inline unsigned int +thread_real_global_priority(const struct thread *thread) +{ + return thread_get_real_sched_data(thread)->global_priority; } /* @@ -367,6 +409,118 @@ thread_schedule(void) } /* + * Sleep queue lending functions. + */ + +static inline struct sleepq * +thread_sleepq_lend(void) +{ + struct sleepq *sleepq; + + sleepq = thread_self()->priv_sleepq; + assert(sleepq != NULL); + thread_self()->priv_sleepq = NULL; + return sleepq; +} + +static inline void +thread_sleepq_return(struct sleepq *sleepq) +{ + assert(sleepq != NULL); + assert(thread_self()->priv_sleepq == NULL); + thread_self()->priv_sleepq = sleepq; +} + +/* + * Condition variable related functions. + */ + +static inline void +thread_set_last_cond(struct condition *last_cond) +{ + struct thread *thread; + + thread = thread_self(); + assert(thread->last_cond == NULL); + thread->last_cond = last_cond; +} + +static inline struct condition * +thread_pull_last_cond(void) +{ + struct condition *last_cond; + struct thread *thread; + + thread = thread_self(); + last_cond = thread->last_cond; + + if (last_cond != NULL) { + thread->last_cond = NULL; + } + + return last_cond; +} + +static inline void +thread_wakeup_last_cond(void) +{ + struct condition *last_cond; + + last_cond = thread_pull_last_cond(); + + if (last_cond != NULL) { + condition_wakeup(last_cond); + } +} + +/* + * Turnstile lending functions. + */ + +static inline struct turnstile * +thread_turnstile_lend(void) +{ + struct turnstile *turnstile; + + turnstile = thread_self()->priv_turnstile; + assert(turnstile != NULL); + thread_self()->priv_turnstile = NULL; + return turnstile; +} + +static inline void +thread_turnstile_return(struct turnstile *turnstile) +{ + assert(turnstile != NULL); + assert(thread_self()->priv_turnstile == NULL); + thread_self()->priv_turnstile = turnstile; +} + +static inline struct turnstile_td * +thread_turnstile_td(struct thread *thread) +{ + return &thread->turnstile_td; +} + +/* + * Priority propagation functions. + */ + +static inline bool +thread_priority_propagation_needed(void) +{ + return thread_self()->propagate_priority; +} + +static inline void +thread_set_priority_propagation_needed(void) +{ + thread_self()->propagate_priority = true; +} + +void thread_propagate_priority(void); + +/* * Migration control functions. * * Functions that change the migration state are implicit compiler barriers. @@ -421,6 +575,10 @@ thread_preempt_enable_no_resched(void) thread = thread_self(); assert(thread->preempt != 0); thread->preempt--; + + if (thread_preempt_enabled() && thread_priority_propagation_needed()) { + thread_propagate_priority(); + } } static inline void diff --git a/kern/thread_i.h b/kern/thread_i.h index a062773d..00d40ca6 100644 --- a/kern/thread_i.h +++ b/kern/thread_i.h @@ -18,15 +18,23 @@ #ifndef _KERN_THREAD_I_H #define _KERN_THREAD_I_H +#include <stdbool.h> + #include <kern/condition_types.h> #include <kern/cpumap.h> #include <kern/list_types.h> #include <kern/macros.h> #include <kern/mutex_types.h> #include <kern/param.h> +#include <kern/turnstile_types.h> #include <machine/atomic.h> #include <machine/tcb.h> +/* + * Forward declarations. + */ +struct sleepq; + struct thread_runq; struct thread_fs_runq; @@ -86,12 +94,38 @@ struct thread { /* Flags must be changed atomically */ unsigned long flags; - /* Sleep/wakeup synchronization members */ + /* Sleep/wake-up synchronization members */ struct thread_runq *runq; + bool in_runq; const void *wchan_addr; const char *wchan_desc; unsigned short state; + /* Sleep queue available for lending */ + struct sleepq *priv_sleepq; + + /* Turnstile available for lending */ + struct turnstile *priv_turnstile; + + /* + * When a thread wakes up after waiting for a condition variable, + * it sets this variable so that other waiters are only awaken + * after the associated mutex has actually been released. + * + * This member is thread-local. + */ + struct condition *last_cond; + + /* Per-thread turnstile data */ + struct turnstile_td turnstile_td; + + /* + * True if priority must be propagated when preemption is reenabled + * + * This member is thread-local. + */ + bool propagate_priority; + /* Thread-local members */ unsigned short preempt; unsigned short pinned; @@ -100,8 +134,21 @@ struct thread { /* Processors on which this thread is allowed to run */ struct cpumap cpumap; - /* Scheduling data */ - struct thread_sched_data sched_data; + /* + * Scheduling data. + */ + struct thread_sched_data user_sched_data; /* User-provided */ + struct thread_sched_data real_sched_data; /* Computed from + priority propagation */ + + /* + * True if the real scheduling data are not the user scheduling data. + * + * Note that it doesn't provide any information about priority inheritance. + * A thread may be part of a priority inheritance chain without its + * priority being boosted. + */ + bool boosted; /* Class specific scheduling data */ union { diff --git a/kern/turnstile.c b/kern/turnstile.c new file mode 100644 index 00000000..43629376 --- /dev/null +++ b/kern/turnstile.c @@ -0,0 +1,788 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This implementation is based on "Solaris(tm) Internals: Solaris 10 and + * OpenSolaris Kernel Architecture, Second Edition" by Richard McDougall + * and Jim Mauro. See "Part Six: Platform Specifics, Chapter 17: Locking + * and Synchronization, Section 7 Turnstiles and Priority Inheritance". + * + * The differences are outlined below. + * + * This implementation doesn't support read/write locks, only mutual + * exclusion, because ownership doesn't apply well to read/write locks. + * + * Instead of using an external sleep queue object, this module implements + * that functionality itself. The reasons behind this decision are : + * - the use of expensive priority lists used to queue threads, that + * a simpler sleep queue implementation shouldn't use + * - increased coupling with the scheduler + * + * Locking order : bucket (turnstile) -> turnstile_td -> thread_runq + * + * This order allows changing the owner of a turnstile without unlocking it + * which is important because a turnstile is normally used to synchronize + * access to the owner. Unlocking a turnstile would allow the owner to + * change and make complex transient states visible. The drawback is that + * a thread cannot be requeued atomically when its priority is changed. + * That deferred requeue is done during priority propagation. + * + * TODO Analyse hash parameters. + */ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include <kern/assert.h> +#include <kern/init.h> +#include <kern/kmem.h> +#include <kern/list.h> +#include <kern/macros.h> +#include <kern/param.h> +#include <kern/plist.h> +#include <kern/spinlock.h> +#include <kern/thread.h> +#include <kern/turnstile.h> +#include <kern/turnstile_types.h> + +/* + * Locking keys : + * (b) bucket + */ +struct turnstile_bucket { + struct spinlock lock; + struct list list; /* (b) */ +} __aligned(CPU_L1_SIZE); + +/* + * Adding/removing waiters to/from a turnstile are performed while + * holding both the waiter's thread data and the turnstile locks. + * + * Changing the owner of a turnstile and linking/unlinking the turnstile + * into/from the owner's list of owned turnstiles are done atomically, + * while holding both the owner's thread data and the turnstile locks. + * + * Locking keys : + * (b) bucket + * (t) turnstile_td + * + * (*) A turnstile is referenced in thread data after/before being + * added/removed to/from its bucket. Referencing a turnstile in + * thread data requires holding the thread data lock. This implies + * that, while holding the thread data lock, if the referenced + * turnstile isn't NULL, the bucket pointer is also not NULL and + * stable. + */ +struct turnstile { + struct turnstile_bucket *bucket; /* (b*) */ + struct list node; /* (b) */ + const void *sync_obj; /* (b) */ + struct plist waiters; /* (b,t) */ + struct turnstile *next_free; /* (b) */ + struct turnstile_waiter *top_waiter; /* (b,t) */ + struct thread *owner; /* (b,t) */ + struct plist_node td_node; /* (t) */ +}; + +/* + * Locking keys : + * (b) bucket + * (t) turnstile_td + */ +struct turnstile_waiter { + struct plist_node node; /* (b,t) */ + struct thread *thread; /* (b,t) */ + bool awaken; /* (b) */ +}; + +#define TURNSTILE_HTABLE_SIZE 128 + +#if !ISP2(TURNSTILE_HTABLE_SIZE) +#error hash table size must be a power of two +#endif /* !ISP2(TURNSTILE_HTABLE_SIZE) */ + +#define TURNSTILE_HTABLE_MASK (TURNSTILE_HTABLE_SIZE - 1) + +static struct turnstile_bucket turnstile_htable[TURNSTILE_HTABLE_SIZE]; + +static struct kmem_cache turnstile_cache; + +static uintptr_t +turnstile_hash(const void *addr) +{ + return ((uintptr_t)addr >> 8) ^ (uintptr_t)addr; +} + +static void +turnstile_waiter_init(struct turnstile_waiter *waiter, struct thread *thread) +{ + plist_node_init(&waiter->node, thread_real_global_priority(thread)); + waiter->thread = thread; + waiter->awaken = false; +} + +static unsigned int +turnstile_waiter_priority(const struct turnstile_waiter *waiter) +{ + return plist_node_priority(&waiter->node); +} + +static bool +turnstile_waiter_awaken(const struct turnstile_waiter *waiter) +{ + return waiter->awaken; +} + +static void +turnstile_waiter_set_awaken(struct turnstile_waiter *waiter) +{ + waiter->awaken = true; +} + +static void +turnstile_waiter_clear_awaken(struct turnstile_waiter *waiter) +{ + waiter->awaken = false; +} + +static void +turnstile_waiter_wakeup(struct turnstile_waiter *waiter) +{ + if (turnstile_waiter_awaken(waiter)) { + return; + } + + thread_wakeup(waiter->thread); + turnstile_waiter_set_awaken(waiter); +} + +static void +turnstile_update_top_waiter(struct turnstile *turnstile) +{ + if (turnstile_empty(turnstile)) { + turnstile->top_waiter = NULL; + return; + } + + turnstile->top_waiter = plist_last_entry(&turnstile->waiters, + struct turnstile_waiter, node); +} + +static void +turnstile_add_waiter(struct turnstile *turnstile, + struct turnstile_waiter *waiter) +{ + assert(!turnstile_waiter_awaken(waiter)); + plist_add(&turnstile->waiters, &waiter->node); + turnstile_update_top_waiter(turnstile); +} + +static void +turnstile_remove_waiter(struct turnstile *turnstile, + struct turnstile_waiter *waiter) +{ + assert(turnstile_waiter_awaken(waiter)); + plist_remove(&turnstile->waiters, &waiter->node); + turnstile_update_top_waiter(turnstile); +} + +static void +turnstile_waiter_requeue(struct turnstile *turnstile, + struct turnstile_waiter *waiter) +{ + unsigned int global_priority; + + global_priority = thread_real_global_priority(waiter->thread); + assert(global_priority != plist_node_priority(&waiter->node)); + + plist_remove(&turnstile->waiters, &waiter->node); + plist_node_set_priority(&waiter->node, global_priority); + plist_add(&turnstile->waiters, &waiter->node); + turnstile_update_top_waiter(turnstile); +} + +static void +turnstile_td_set_waiter(struct turnstile_td *td, + struct turnstile_waiter *waiter) +{ + td->waiter = waiter; +} + +static struct turnstile_waiter * +turnstile_td_get_waiter(const struct turnstile_td *td) +{ + return td->waiter; +} + +static void +turnstile_td_update_top_priority(struct turnstile_td *td) +{ + struct turnstile_waiter *top_waiter; + struct turnstile *top_turnstile; + + if (plist_empty(&td->owned_turnstiles)) { + td->top_global_priority = 0; + return; + } + + top_turnstile = plist_last_entry(&td->owned_turnstiles, + struct turnstile, td_node); + top_waiter = top_turnstile->top_waiter; + + if (top_waiter == NULL) { + td->top_global_priority = 0; + } else { + td->top_global_priority = turnstile_waiter_priority(top_waiter); + td->top_sched_policy = thread_real_sched_policy(top_waiter->thread); + td->top_priority = thread_real_priority(top_waiter->thread); + } +} + +static void +turnstile_td_own(struct turnstile_td *td, struct turnstile *turnstile) +{ + struct turnstile_waiter *top_waiter; + unsigned int top_priority; + + assert(turnstile->owner == NULL); + + top_waiter = turnstile->top_waiter; + assert(top_waiter != NULL); + top_priority = thread_real_global_priority(top_waiter->thread); + plist_node_init(&turnstile->td_node, top_priority); + plist_add(&td->owned_turnstiles, &turnstile->td_node); + turnstile_td_update_top_priority(td); + + turnstile->owner = structof(td, struct thread, turnstile_td); +} + +static void +turnstile_td_disown(struct turnstile_td *td, struct turnstile *turnstile) +{ + assert(turnstile->owner == structof(td, struct thread, turnstile_td)); + + assert(!plist_node_unlinked(&turnstile->td_node)); + plist_remove(&td->owned_turnstiles, &turnstile->td_node); + turnstile_td_update_top_priority(td); + + turnstile->owner = NULL; +} + +static void +turnstile_td_reown(struct turnstile_td *td, struct turnstile *turnstile) +{ + assert(turnstile->owner == structof(td, struct thread, turnstile_td)); + assert(!plist_node_unlinked(&turnstile->td_node)); + plist_remove(&td->owned_turnstiles, &turnstile->td_node); + turnstile->owner = NULL; + turnstile_td_own(td, turnstile); +} + +/* + * The caller must hold the turnstile thread data lock and no turnstile + * locks when calling this function. The thread data are unlocked on return. + * + * In addition, this function drops a reference on the thread associated + * with the given thread data. + */ +static void +turnstile_td_propagate_priority_loop(struct turnstile_td *td) +{ + unsigned int user_priority, real_priority, top_priority; + struct turnstile_waiter *waiter; + struct turnstile *turnstile; + struct thread *thread; + unsigned short priority; + unsigned char policy; + int error; + + /* + * At the very least, this function must make sure that : + * - the given thread has its intended priority, which is the + * highest among its own and all the waiters in the turnstiles + * it owns, and + * - the thread is at its intended position in the turnstile it's + * waiting on, if any. + */ + + for (;;) { + thread = structof(td, struct thread, turnstile_td); + user_priority = thread_user_global_priority(thread); + real_priority = thread_real_global_priority(thread); + top_priority = td->top_global_priority; + + if (top_priority > user_priority) { + policy = td->top_sched_policy; + priority = td->top_priority; + } else { + top_priority = user_priority; + policy = thread_user_sched_policy(thread); + priority = thread_user_priority(thread); + } + + if (top_priority != real_priority) { + thread_pi_setscheduler(thread, policy, priority); + } + + waiter = turnstile_td_get_waiter(td); + + if ((waiter == NULL) + || (top_priority == turnstile_waiter_priority(waiter))) { + spinlock_unlock(&td->lock); + thread_unref(thread); + return; + } + + turnstile = turnstile_td_get_turnstile(td); + assert(turnstile != NULL); + + error = spinlock_trylock(&turnstile->bucket->lock); + + if (error) { + spinlock_unlock(&td->lock); + spinlock_lock(&td->lock); + continue; + } + + /* + * This couldn't be done while changing the thread's priority + * because of locking restrictions. Do it now. + */ + turnstile_waiter_requeue(turnstile, waiter); + + spinlock_unlock(&td->lock); + thread_unref(thread); + + thread = turnstile->owner; + + if (thread == NULL) { + break; + } + + td = thread_turnstile_td(thread); + + thread_ref(thread); + spinlock_lock(&td->lock); + + turnstile_td_reown(td, turnstile); + + spinlock_unlock(&turnstile->bucket->lock); + } + + spinlock_unlock(&turnstile->bucket->lock); +} + +void +turnstile_td_propagate_priority(struct turnstile_td *td) +{ + struct thread *thread; + + thread = structof(td, struct thread, turnstile_td); + + thread_ref(thread); + spinlock_lock(&td->lock); + turnstile_td_propagate_priority_loop(td); +} + +static void +turnstile_assert_init_state(const struct turnstile *turnstile) +{ + assert(turnstile->bucket == NULL); + assert(turnstile->sync_obj == NULL); + assert(plist_empty(&turnstile->waiters)); + assert(turnstile->next_free == NULL); + assert(turnstile->top_waiter == NULL); + assert(turnstile->owner == NULL); +} + +static void +turnstile_use(struct turnstile *turnstile, const void *sync_obj) +{ + assert(turnstile->sync_obj == NULL); + turnstile->sync_obj = sync_obj; +} + +static void +turnstile_unuse(struct turnstile *turnstile) +{ + assert(turnstile->sync_obj != NULL); + turnstile->sync_obj = NULL; +} + +static bool +turnstile_in_use(const struct turnstile *turnstile) +{ + return turnstile->sync_obj != NULL; +} + +static bool +turnstile_in_use_by(const struct turnstile *turnstile, const void *sync_obj) +{ + return turnstile->sync_obj == sync_obj; +} + +static void +turnstile_bucket_init(struct turnstile_bucket *bucket) +{ + spinlock_init(&bucket->lock); + list_init(&bucket->list); +} + +static struct turnstile_bucket * +turnstile_bucket_get(const void *sync_obj) +{ + uintptr_t index; + + index = turnstile_hash(sync_obj) & TURNSTILE_HTABLE_MASK; + assert(index < ARRAY_SIZE(turnstile_htable)); + return &turnstile_htable[index]; +} + +static void +turnstile_bucket_add(struct turnstile_bucket *bucket, + struct turnstile *turnstile) +{ + assert(turnstile->bucket == NULL); + turnstile->bucket = bucket; + list_insert_tail(&bucket->list, &turnstile->node); +} + +static void +turnstile_bucket_remove(struct turnstile_bucket *bucket, + struct turnstile *turnstile) +{ + assert(turnstile->bucket == bucket); + turnstile->bucket = NULL; + list_remove(&turnstile->node); +} + +static struct turnstile * +turnstile_bucket_lookup(const struct turnstile_bucket *bucket, + const void *sync_obj) +{ + struct turnstile *turnstile; + + list_for_each_entry(&bucket->list, turnstile, node) { + if (turnstile_in_use_by(turnstile, sync_obj)) { + return turnstile; + } + } + + return NULL; +} + +static void +turnstile_ctor(void *ptr) +{ + struct turnstile *turnstile; + + turnstile = ptr; + turnstile->bucket = NULL; + turnstile->sync_obj = NULL; + plist_init(&turnstile->waiters); + turnstile->next_free = NULL; + turnstile->top_waiter = NULL; + turnstile->owner = NULL; +} + +void __init +turnstile_setup(void) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(turnstile_htable); i++) { + turnstile_bucket_init(&turnstile_htable[i]); + } + + kmem_cache_init(&turnstile_cache, "turnstile", sizeof(struct turnstile), + CPU_L1_SIZE, turnstile_ctor, 0); +} + +struct turnstile * +turnstile_create(void) +{ + struct turnstile *turnstile; + + turnstile = kmem_cache_alloc(&turnstile_cache); + + if (turnstile == NULL) { + return NULL; + } + + turnstile_assert_init_state(turnstile); + return turnstile; +} + +void +turnstile_destroy(struct turnstile *turnstile) +{ + turnstile_assert_init_state(turnstile); + kmem_cache_free(&turnstile_cache, turnstile); +} + +struct turnstile * +turnstile_acquire(const void *sync_obj) +{ + struct turnstile_bucket *bucket; + struct turnstile *turnstile; + + assert(sync_obj != NULL); + + bucket = turnstile_bucket_get(sync_obj); + + spinlock_lock(&bucket->lock); + + turnstile = turnstile_bucket_lookup(bucket, sync_obj); + + if (turnstile == NULL) { + spinlock_unlock(&bucket->lock); + return NULL; + } + + return turnstile; +} + +void +turnstile_release(struct turnstile *turnstile) +{ + spinlock_unlock(&turnstile->bucket->lock); +} + +static void +turnstile_push_free(struct turnstile *turnstile, + struct turnstile *free_turnstile) +{ + assert(free_turnstile->next_free == NULL); + free_turnstile->next_free = turnstile->next_free; + turnstile->next_free = free_turnstile; +} + +static struct turnstile * +turnstile_pop_free(struct turnstile *turnstile) +{ + struct turnstile *free_turnstile; + + free_turnstile = turnstile->next_free; + + if (free_turnstile == NULL) { + return NULL; + } + + turnstile->next_free = free_turnstile->next_free; + free_turnstile->next_free = NULL; + return free_turnstile; +} + +struct turnstile * +turnstile_lend(const void *sync_obj) +{ + struct turnstile_bucket *bucket; + struct turnstile *turnstile, *prev; + struct turnstile_td *td; + + assert(sync_obj != NULL); + + turnstile = thread_turnstile_lend(); + turnstile_assert_init_state(turnstile); + + td = thread_turnstile_td(thread_self()); + bucket = turnstile_bucket_get(sync_obj); + + spinlock_lock(&bucket->lock); + + prev = turnstile_bucket_lookup(bucket, sync_obj); + + if (prev == NULL) { + turnstile_use(turnstile, sync_obj); + turnstile_bucket_add(bucket, turnstile); + } else { + turnstile_push_free(prev, turnstile); + turnstile = prev; + } + + spinlock_lock(&td->lock); + turnstile_td_set_turnstile(td, turnstile); + spinlock_unlock(&td->lock); + + return turnstile; +} + +void +turnstile_return(struct turnstile *turnstile) +{ + struct turnstile_bucket *bucket; + struct turnstile *free_turnstile; + struct turnstile_td *td; + + assert(turnstile_in_use(turnstile)); + + td = thread_turnstile_td(thread_self()); + + spinlock_lock(&td->lock); + turnstile_td_set_turnstile(td, NULL); + spinlock_unlock(&td->lock); + + bucket = turnstile->bucket; + free_turnstile = turnstile_pop_free(turnstile); + + if (free_turnstile == NULL) { + turnstile_bucket_remove(bucket, turnstile); + turnstile_unuse(turnstile); + free_turnstile = turnstile; + } + + spinlock_unlock(&bucket->lock); + + turnstile_assert_init_state(free_turnstile); + thread_turnstile_return(free_turnstile); +} + +bool +turnstile_empty(const struct turnstile *turnstile) +{ + return plist_empty(&turnstile->waiters); +} + +static void +turnstile_set_owner(struct turnstile *turnstile, struct thread *owner) +{ + struct turnstile_td *td; + + assert(owner != NULL); + assert((turnstile->owner == NULL) || (turnstile->owner == owner)); + + td = thread_turnstile_td(owner); + + thread_ref(owner); + spinlock_lock(&td->lock); + + if (turnstile->owner == NULL) { + turnstile_td_own(td, turnstile); + } + + spinlock_unlock(&turnstile->bucket->lock); + + turnstile_td_propagate_priority_loop(td); + + spinlock_lock(&turnstile->bucket->lock); +} + +void +turnstile_wait(struct turnstile *turnstile, const char *wchan, + struct thread *owner) +{ + struct turnstile_waiter waiter; + struct turnstile_td *td; + struct thread *thread; + + thread = thread_self(); + assert(thread != owner); + + td = thread_turnstile_td(thread); + + spinlock_lock(&td->lock); + turnstile_waiter_init(&waiter, thread); + turnstile_add_waiter(turnstile, &waiter); + turnstile_td_set_waiter(td, &waiter); + spinlock_unlock(&td->lock); + + if (owner == NULL) { + if (turnstile->top_waiter == &waiter) { + turnstile_waiter_set_awaken(&waiter); + } + } else { + /* This function temporarily unlocks the turnstile */ + turnstile_set_owner(turnstile, owner); + } + + for (;;) { + if (!turnstile_waiter_awaken(&waiter)) { + thread_sleep(&turnstile->bucket->lock, turnstile->sync_obj, wchan); + } + + /* + * The real priority of a thread may change between waking up + * and reacquiring the turnstile. + */ + if (turnstile->top_waiter == &waiter) { + break; + } + + /* Otherwise, make sure the new top waiter is awaken */ + turnstile_waiter_wakeup(turnstile->top_waiter); + turnstile_waiter_clear_awaken(&waiter); + } + + spinlock_lock(&td->lock); + turnstile_td_set_waiter(td, NULL); + turnstile_remove_waiter(turnstile, &waiter); + spinlock_unlock(&td->lock); +} + +void +turnstile_signal(struct turnstile *turnstile) +{ + struct turnstile_waiter *waiter; + + if (turnstile_empty(turnstile)) { + return; + } + + waiter = plist_last_entry(&turnstile->waiters, + struct turnstile_waiter, node); + turnstile_waiter_wakeup(waiter); +} + +void +turnstile_own(struct turnstile *turnstile) +{ + struct turnstile_td *td; + struct thread *owner; + unsigned int top_priority; + + assert(turnstile->owner == NULL); + + if (turnstile_empty(turnstile)) { + return; + } + + owner = thread_self(); + top_priority = turnstile_waiter_priority(turnstile->top_waiter); + assert(thread_real_global_priority(owner) >= top_priority); + td = thread_turnstile_td(owner); + + spinlock_lock(&td->lock); + turnstile_td_own(td, turnstile); + spinlock_unlock(&td->lock); +} + +void +turnstile_disown(struct turnstile *turnstile) +{ + struct turnstile_td *td; + struct thread *owner; + + owner = thread_self(); + assert(turnstile->owner == owner); + assert(!turnstile_empty(turnstile)); + + td = thread_turnstile_td(owner); + + spinlock_lock(&td->lock); + turnstile_td_disown(td, turnstile); + spinlock_unlock(&td->lock); +} diff --git a/kern/turnstile.h b/kern/turnstile.h new file mode 100644 index 00000000..21a5a1a9 --- /dev/null +++ b/kern/turnstile.h @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Priority propagation capable sleep queues. + * + * Turnstiles are used to build sleeping synchronization primitives where + * ownership applies, such as mutexes. They allow threads with different + * priorities to contend on the same synchronization object without + * unbounded priority inversion. + */ + +#ifndef _KERN_TURNSTILE_H +#define _KERN_TURNSTILE_H + +#include <stdbool.h> +#include <stddef.h> + +#include <kern/plist.h> +#include <kern/spinlock.h> +#include <kern/thread.h> +#include <kern/turnstile_types.h> + +struct turnstile; + +/* + * Turnstile thread data. + */ +struct turnstile_td; + +#define turnstile_td_assert_lock(td) spinlock_assert_locked(&(td)->lock) + +/* + * Initialize turnstile thread data. + */ +static inline void +turnstile_td_init(struct turnstile_td *td) +{ + spinlock_init(&td->lock); + td->turnstile = NULL; + td->waiter = NULL; + plist_init(&td->owned_turnstiles); + td->top_global_priority = 0; +} + +/* + * Turnstile thread data locking functions. + */ + +static inline void +turnstile_td_lock(struct turnstile_td *td) +{ + spinlock_lock(&td->lock); +} + +static inline int +turnstile_td_trylock(struct turnstile_td *td) +{ + return spinlock_trylock(&td->lock); +} + +static inline void +turnstile_td_unlock(struct turnstile_td *td) +{ + spinlock_unlock(&td->lock); +} + +/* + * Functions managing the turnstile a thread is sleeping in. + */ + +static inline void +turnstile_td_set_turnstile(struct turnstile_td *td, + struct turnstile *turnstile) +{ + td->turnstile = turnstile; +} + +static inline struct turnstile * +turnstile_td_get_turnstile(const struct turnstile_td *td) +{ + return td->turnstile; +} + +/* + * Propagate priority starting at the thread containing the given thread data. + */ +void turnstile_td_propagate_priority(struct turnstile_td *td); + +/* + * Initialize the turnstile module. + */ +void turnstile_setup(void); + +/* + * Create/destroy a turnstile. + */ +struct turnstile * turnstile_create(void); +void turnstile_destroy(struct turnstile *turnstile); + +/* + * Acquire/release a turnstile. + * + * Acquiring a turnstile serializes all access and disables preemption. + */ +struct turnstile * turnstile_acquire(const void *sync_obj); +void turnstile_release(struct turnstile *turnstile); + +/* + * Lend/return a turnstile. + * + * A thread lends its private turnstile to the turnstile module in + * order to prepare its sleep. The turnstile obtained on lending + * is either the thread's turnstile, or an already existing turnstile + * for this synchronization object if another thread is waiting. + * + * When multiple threads are waiting on the same turnstile, the extra + * turnstiles lent are kept in an internal free list, used when threads + * are awaken to return a turnstile to them. + * + * Note that the turnstile returned may not be the one lent. + * + * The turnstile obtained when lending is automatically acquired. + */ +struct turnstile * turnstile_lend(const void *sync_obj); +void turnstile_return(struct turnstile *turnstile); + +/* + * Return true if the given turnstile has no waiters. + * + * The turnstile must be acquired when calling this function. + */ +bool turnstile_empty(const struct turnstile *turnstile); + +/* + * Wait for a wake up on the given turnstile. + * + * The turnstile must be lent when calling this function. It is + * released and later reacquired before returning from this function. + * + * The calling thread is considered a waiter as long as it didn't + * reacquire the turnstile. This means that signalling a turnstile + * has no visible effect on the number of waiters until the turnstile + * is released, e.g. if a single thread is waiting and another signals + * the turnstile, the turnstile is not immediately considered empty. + * + * If owner isn't NULL, it must refer to the thread currently owning + * the associated synchronization object. The priority of the caller + * is propagated to the chain of turnstiles and owners as necessary + * to prevent unbounded priority inversion. + */ +void turnstile_wait(struct turnstile *turnstile, const char *wchan, + struct thread *owner); + +/* + * Wake up a thread waiting on the given turnstile, if any. + * + * The turnstile must be acquired when calling this function. + * Since a turnstile must be lent (and in turn is automatically + * acquired) when waiting, and acquired in order to signal it, + * wake-ups are serialized and cannot be missed. + */ +void turnstile_signal(struct turnstile *turnstile); + +/* + * Own/disown a turnstile. + * + * The turnstile must be lent when taking ownership, acquired when + * releasing it. Owning has no effect on empty turnstiles. + * Conversely, an empty turnstile cannot be disowned. + * + * Ownership must be updated atomically with regard to the ownership + * of the associated synchronization object. + */ +void turnstile_own(struct turnstile *turnstile); +void turnstile_disown(struct turnstile *turnstile); + +#endif /* _KERN_TURNSTILE_H */ diff --git a/kern/turnstile_types.h b/kern/turnstile_types.h new file mode 100644 index 00000000..a95b691e --- /dev/null +++ b/kern/turnstile_types.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Isolated type definition used to avoid inclusion circular dependencies. + */ + +#ifndef _KERN_TURNSTILE_TYPES_H +#define _KERN_TURNSTILE_TYPES_H + +#include <kern/plist_types.h> +#include <kern/spinlock_types.h> + +struct turnstile; +struct turnstile_waiter; + +/* + * Per-thread turnstile data. + * + * The turnstile member indicates whether this thread is in a turnstile, + * and is only valid if the thread is not running. + * + * The waiter points to the structure a thread uses to queue itself on + * a turnstile. It is used to access a sleeping thread from another + * thread, e.g. on wake-ups or priority updates. + * + * The list of owned turnstiles is used by priority propagation to + * determine the top priority among all waiters, which is stored in + * the thread data so that turnstiles are quickly unlocked. + * + * Locking keys : + * (b) bucket + * (t) turnstile_td + */ +struct turnstile_td { + struct spinlock lock; + struct turnstile *turnstile; /* (t) */ + struct turnstile_waiter *waiter; /* (b,t) */ + struct plist owned_turnstiles; /* (t) */ + unsigned int top_global_priority; /* (t) */ + unsigned char top_sched_policy; /* (t) */ + unsigned short top_priority; /* (t) */ +}; + +#endif /* _KERN_TURNSTILE_TYPES_H */ diff --git a/test/test_mutex_pi.c b/test/test_mutex_pi.c new file mode 100644 index 00000000..1adcf627 --- /dev/null +++ b/test/test_mutex_pi.c @@ -0,0 +1,482 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This test module is a stress test, expected to never terminate, of + * priority inheritance with mutexes. It creates a priority inheritance + * tree by starting multiple threads manipulating multiple mutexes. + * + * Here is one intended state of the priority inheritance tree : + * + * + * C->M2-+ + * | + * D--+ +->B->M1->A + * | | + * +->M3-+ + * | + * E--+ + * + * + * M1,M2,M3,etc... are mutexes and A,B,C,etc... are threads. The thread + * priorities p(thread) are ordered such that p(A) < p(B) < p(C) etc... + * An arrow from a mutex to a thread indicates ownership, such that + * M1->A means that A owns M1. An arrow from a thread to a mutex indicates + * waiting, such that B->M1 means that B is waiting for M1 to be unlocked. + * + * In addition, thread B is actually many threads, each terminating after + * unlocking their mutexes. Also, the priority of thread C is regularly + * increased to p(E) + 1 and later restored to p(C). + * + * Here is the list of all the cases this test must cover : + * - Priority inheritance: All threads can get their real priority boosted. + * C can be boosted if it owns M2 and B waits for it while inheriting + * from E, and E can be boosted when p(C) is temporarily increased. + * - Return to normal priority: after releasing all locks, a thread must + * have reset its real priority to its user priority. + * - Priority changes: When the priority of C is increased, that priority + * must take precedence over all others. + * - Thread destruction resilience: Check that priority propagation never + * accesses a destroyed thread. + * + * Note that this test doesn't check that priority propagation correctly + * adjusts the top priority after lowering the priority of thread C back + * to p(C). + * + * In order to artificially create priority inversions, all threads run on + * separate processors, making this test require 5 processors. + * + * TODO Use timers instead of busy-waiting so that binding to processors + * isn't required. + * + * The test should output a couple of messages about thread priorities + * being boosted, and then frequent updates from each thread to show + * they're all making progress. Thread B suffers from contention the most + * so its report frequency should be lower. Thread A suffers from contention + * the least and should be the most frequent to report progress. Because of + * contention from B, D and E on M3, D rarely gets boosted. The reason is + * that, when B owns the mutex, E is likely to wait on M3 soon enough that + * it will be awaken before D, preventing the conditions for priority + * inheritance to occur. + * + * Note that the test uses regular mutexes instead of real-time mutexes, + * so that its behaviour can be analyzed for both types depending on + * build options. + */ + +#include <stddef.h> +#include <string.h> + +#include <kern/cpumap.h> +#include <kern/error.h> +#include <kern/mutex.h> +#include <kern/panic.h> +#include <kern/printk.h> +#include <kern/thread.h> +#include <kern/turnstile.h> +#include <test/test.h> + +#define TEST_PRIO_A (THREAD_SCHED_RT_PRIO_MIN + 1) +#define TEST_PRIO_B (TEST_PRIO_A + 1) +#define TEST_PRIO_C (TEST_PRIO_B + 1) +#define TEST_PRIO_D (TEST_PRIO_C + 1) +#define TEST_PRIO_E (TEST_PRIO_D + 1) + +#define TEST_NR_LOCK_LOOPS 500 +#define TEST_NR_CONSUME_CPU_LOOPS 10000000 + +static struct mutex test_mutex_1; +static struct mutex test_mutex_2; +static struct mutex test_mutex_3; + +static const char * +test_thread_from_priority(unsigned short priority) +{ + switch (priority) { + case TEST_PRIO_A: + return "a"; + case TEST_PRIO_B: + return "b"; + case TEST_PRIO_C: + return "c"; + case TEST_PRIO_D: + return "d"; + case TEST_PRIO_E: + return "e"; + case TEST_PRIO_E + 1: + return "e+"; + default: + panic("invalid priority %u", priority); + } +} + +static char +test_get_name(void) +{ + const char *name; + size_t length; + + name = thread_self()->name; + length = strlen(name); + return name[length - 1]; +} + +static void +test_consume_cpu(void) +{ + volatile unsigned int i; + + for (i = 0; i < TEST_NR_CONSUME_CPU_LOOPS; i++); +} + +static void +test_check_initial_priority(void) +{ + unsigned short user_priority, real_priority; + struct thread *thread; + + thread = thread_self(); + user_priority = thread_user_priority(thread); + real_priority = thread_real_priority(thread); + + if (user_priority != real_priority) { + panic("%c: invalid initial priority %hu", + test_get_name(), real_priority); + } +} + +static void +test_for_priority_boosted(unsigned short *highest_priority) +{ + unsigned short user_priority, real_priority; + struct turnstile_td *td; + struct thread *thread; + + thread = thread_self(); + td = thread_turnstile_td(thread); + + turnstile_td_lock(td); + + user_priority = thread_user_priority(thread); + real_priority = thread_real_priority(thread); + + if (user_priority != real_priority) { + if (user_priority > real_priority) { + panic("%c: invalid real priority: %hu (boosted:%u)", + test_get_name(), real_priority, thread->boosted); + } + + if (real_priority > *highest_priority) { + printk("%c: real priority boosted to %s\n", + test_get_name(), test_thread_from_priority(real_priority)); + *highest_priority = real_priority; + } + } + + turnstile_td_unlock(td); +} + +static void +test_for_priority_deboosted(void) +{ + unsigned short user_priority, real_priority; + struct turnstile_td *td; + struct thread *thread; + + thread = thread_self(); + td = thread_turnstile_td(thread); + + turnstile_td_lock(td); + + user_priority = thread_user_priority(thread); + real_priority = thread_real_priority(thread); + + if (user_priority != real_priority) { + panic("%c: real priority not reset (boosted:%d)", test_get_name(), thread->boosted); + } + + turnstile_td_unlock(td); +} + +static void +test_report_progress(unsigned int i) +{ + printk("%c:%u ", test_get_name(), i); +} + +static void +test_a(void *arg) +{ + unsigned short highest_priority; + unsigned int i, j; + + (void)arg; + + test_check_initial_priority(); + + highest_priority = 0; + + for (i = 1; /* no condition */; i++) { + for (j = 0; j < TEST_NR_LOCK_LOOPS; j++) { + mutex_lock(&test_mutex_1); + test_consume_cpu(); + test_for_priority_boosted(&highest_priority); + mutex_unlock(&test_mutex_1); + + test_for_priority_deboosted(); + + test_consume_cpu(); + } + + test_report_progress(i); + } +} + +static void +test_b(void *arg) +{ + test_check_initial_priority(); + + mutex_lock(&test_mutex_3); + mutex_lock(&test_mutex_2); + mutex_lock(&test_mutex_1); + test_consume_cpu(); + test_for_priority_boosted(arg); + mutex_unlock(&test_mutex_1); + test_consume_cpu(); + mutex_unlock(&test_mutex_2); + test_consume_cpu(); + mutex_unlock(&test_mutex_3); + + /* + * It would be better if the thread could immediately terminate, but + * it's also the thread that locks multiple mutexes, so make sure it + * was correctly deboosted. This should be cheap enough to not matter + * much. + */ + test_for_priority_deboosted(); +} + +static void +test_manage_b(void *arg) +{ + unsigned short highest_priority; + struct thread_attr attr; + struct thread *thread_b; + struct cpumap *cpumap; + unsigned int i, j; + int error; + + (void)arg; + + error = cpumap_create(&cpumap); + error_check(error, "cpumap_create"); + cpumap_zero(cpumap); + cpumap_set(cpumap, 1); + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_b"); + thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO); + thread_attr_set_priority(&attr, TEST_PRIO_B); + thread_attr_set_cpumap(&attr, cpumap); + cpumap_destroy(cpumap); + + highest_priority = 0; + + for (i = 1; /* no condition */; i++) { + for (j = 0; j < TEST_NR_LOCK_LOOPS; j++) { + error = thread_create(&thread_b, &attr, test_b, &highest_priority); + error_check(error, "thread_create"); + thread_join(thread_b); + + test_consume_cpu(); + } + + printk("b:%u ", i); + } +} + +static void +test_c(void *arg) +{ + unsigned short highest_priority; + unsigned int i, j; + + (void)arg; + + test_check_initial_priority(); + + highest_priority = 0; + + for (i = 1; /* no condition */; i++) { + for (j = 0; j < TEST_NR_LOCK_LOOPS; j++) { + mutex_lock(&test_mutex_2); + test_consume_cpu(); + test_for_priority_boosted(&highest_priority); + mutex_unlock(&test_mutex_2); + + test_for_priority_deboosted(); + + test_consume_cpu(); + } + + test_report_progress(i); + } +} + +static void +test_chprio_c(void *arg) +{ + struct thread *thread_c; + + thread_c = arg; + + test_consume_cpu(); + + for (;;) { + thread_setscheduler(thread_c, THREAD_SCHED_POLICY_FIFO, + TEST_PRIO_E + 1); + thread_setscheduler(thread_c, THREAD_SCHED_POLICY_FIFO, + TEST_PRIO_C); + } +} + +static void +test_d(void *arg) +{ + unsigned short highest_priority; + unsigned int i, j; + + (void)arg; + + test_check_initial_priority(); + + highest_priority = 0; + + for (i = 1; /* no condition */; i++) { + for (j = 0; j < TEST_NR_LOCK_LOOPS; j++) { + mutex_lock(&test_mutex_3); + test_consume_cpu(); + test_for_priority_boosted(&highest_priority); + mutex_unlock(&test_mutex_3); + + test_for_priority_deboosted(); + + test_consume_cpu(); + } + + test_report_progress(i); + } +} + +static void +test_e(void *arg) +{ + unsigned short highest_priority; + unsigned int i, j; + + (void)arg; + + test_check_initial_priority(); + + highest_priority = 0; + + for (i = 1; /* no condition */; i++) { + for (j = 0; j < TEST_NR_LOCK_LOOPS; j++) { + mutex_lock(&test_mutex_3); + test_consume_cpu(); + test_for_priority_boosted(&highest_priority); + mutex_unlock(&test_mutex_3); + + test_for_priority_deboosted(); + + test_consume_cpu(); + } + + test_report_progress(i); + } +} + +void +test_setup(void) +{ + struct thread_attr attr; + struct thread *thread; + struct cpumap *cpumap; + int error; + + mutex_init(&test_mutex_1); + mutex_init(&test_mutex_2); + mutex_init(&test_mutex_3); + + error = cpumap_create(&cpumap); + error_check(error, "cpumap_create"); + + cpumap_zero(cpumap); + cpumap_set(cpumap, 0); + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_a"); + thread_attr_set_detached(&attr); + thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO); + thread_attr_set_priority(&attr, TEST_PRIO_A); + thread_attr_set_cpumap(&attr, cpumap); + error = thread_create(&thread, &attr, test_a, NULL); + error_check(error, "thread_create"); + + cpumap_zero(cpumap); + cpumap_set(cpumap, 1); + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_manage_b"); + thread_attr_set_detached(&attr); + thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO); + thread_attr_set_priority(&attr, TEST_PRIO_B); + thread_attr_set_cpumap(&attr, cpumap); + error = thread_create(&thread, &attr, test_manage_b, NULL); + error_check(error, "thread_create"); + + cpumap_zero(cpumap); + cpumap_set(cpumap, 2); + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_c"); + thread_attr_set_detached(&attr); + thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO); + thread_attr_set_priority(&attr, TEST_PRIO_C); + thread_attr_set_cpumap(&attr, cpumap); + error = thread_create(&thread, &attr, test_c, NULL); + error_check(error, "thread_create"); + + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_chprio_c"); + thread_attr_set_detached(&attr); + error = thread_create(&thread, &attr, test_chprio_c, thread); + error_check(error, "thread_create"); + + cpumap_zero(cpumap); + cpumap_set(cpumap, 3); + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_d"); + thread_attr_set_detached(&attr); + thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO); + thread_attr_set_priority(&attr, TEST_PRIO_D); + thread_attr_set_cpumap(&attr, cpumap); + error = thread_create(&thread, &attr, test_d, NULL); + error_check(error, "thread_create"); + + cpumap_zero(cpumap); + cpumap_set(cpumap, 4); + thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_e"); + thread_attr_set_detached(&attr); + thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO); + thread_attr_set_priority(&attr, TEST_PRIO_E); + thread_attr_set_cpumap(&attr, cpumap); + error = thread_create(&thread, &attr, test_e, NULL); + error_check(error, "thread_create"); + + cpumap_destroy(cpumap); +} |