diff options
author | Remy Noel <mocramis@gmail.com> | 2018-02-25 17:16:37 +0100 |
---|---|---|
committer | Remy Noel <mocramis@gmail.com> | 2018-02-25 17:25:49 +0100 |
commit | cba7db6931932953fd0113a70019049234eb0b08 (patch) | |
tree | 6d0c14e881cee20e0dca1807b6177d5246eb3f85 | |
parent | 652168fe3d867eec17ac7fa318c8743d524ef40f (diff) | |
parent | e8265363384bc3f8e6bb31466a3590ce27811efa (diff) |
Merge branch 'master' into perfmon
73 files changed, 1839 insertions, 1281 deletions
diff --git a/arch/x86/machine/boot.c b/arch/x86/machine/boot.c index afc41d7..6934896 100644 --- a/arch/x86/machine/boot.c +++ b/arch/x86/machine/boot.c @@ -350,14 +350,17 @@ boot_setup_paging(struct multiboot_raw_info *mbi, unsigned long eax) return pmap_setup_paging(); } +#ifdef CONFIG_X86_PAE +#define BOOT_PAE_LABEL " PAE" +#else /* CONFIG_X86_PAE */ +#define BOOT_PAE_LABEL +#endif /* CONFIG_X86_PAE */ + void __init boot_log_info(void) { log_info(KERNEL_NAME "/" CONFIG_SUBARCH " " KERNEL_VERSION -#ifdef CONFIG_X86_PAE - " PAE" -#endif /* CONFIG_X86_PAE */ - ); + BOOT_PAE_LABEL); } static void * __init diff --git a/arch/x86/machine/cpu.h b/arch/x86/machine/cpu.h index 7675822..d7a6498 100644 --- a/arch/x86/machine/cpu.h +++ b/arch/x86/machine/cpu.h @@ -24,6 +24,9 @@ * L1 cache line size. * * XXX Use this value until processor selection is available. + * + * TODO Add macros to specifically align to the cache line size, and to + * do so only in SMP configurations. */ #define CPU_L1_SIZE 64 @@ -706,6 +709,7 @@ INIT_OP_DECLARE(cpu_setup); /* * This init operation provides : * - cpu_count() + * - access to percpu variables on all processors */ INIT_OP_DECLARE(cpu_mp_probe); diff --git a/arch/x86/machine/lapic.c b/arch/x86/machine/lapic.c index 1a81a18..180a1a2 100644 --- a/arch/x86/machine/lapic.c +++ b/arch/x86/machine/lapic.c @@ -95,7 +95,7 @@ struct lapic_register { uint32_t reserved0; uint32_t reserved1; uint32_t reserved2; -} __packed; +}; /* * Local APIC register map. @@ -171,7 +171,7 @@ struct lapic_map { const struct lapic_register reserved19; struct lapic_register timer_dcr; const struct lapic_register reserved20; -} __packed; +}; /* * Address where local APIC registers are mapped. diff --git a/arch/x86/machine/pmap.c b/arch/x86/machine/pmap.c index 5ed0502..2b4c23c 100644 --- a/arch/x86/machine/pmap.c +++ b/arch/x86/machine/pmap.c @@ -1546,6 +1546,7 @@ pmap_update(struct pmap *pmap) spinlock_unlock(&queue->lock); } + /* TODO Improve scalability */ cpumap_for_each(&oplist->cpumap, cpu) { request = &array->requests[cpu]; diff --git a/arch/x86/machine/strace.h b/arch/x86/machine/strace.h index 2f686a0..b55f6b8 100644 --- a/arch/x86/machine/strace.h +++ b/arch/x86/machine/strace.h @@ -16,6 +16,8 @@ * * * Stack tracing. + * + * TODO Make it possible to debug without the frame pointer. */ #ifndef _X86_STRACE_H diff --git a/doc/intro.9.txt b/doc/intro.9.txt index b3367ba..5975b8b 100644 --- a/doc/intro.9.txt +++ b/doc/intro.9.txt @@ -88,10 +88,10 @@ module:kern/condition:: module:arch/cpu:: Architecture-specific processor interface which provides interrupt control functions. -module:kern/llsync:: - Lockless synchronization, similar to Linux Read-Copy Update (RCU). module:kern/mutex:: Mutual exclusion lock. +module:kern/rcu:: + Read-Copy Update (RCU) lockless synchronization. module:kern/semaphore:: Semaphore. module:kern/sleepq:: diff --git a/doc/style.9.txt b/doc/style.9.txt index 1b5108f..01a84e7 100644 --- a/doc/style.9.txt +++ b/doc/style.9.txt @@ -587,7 +587,7 @@ The first line being a short description, usable as an email subject, there is no need for a final dot. This also keeps it slightly shorter. -------------------------------------------------------------------------------- -kern/{llsync,rdxtree}: don't use llsync until it's ready +kern/{sleepq,turnstile}: handle spurious wakeups -------------------------------------------------------------------------------- This format is based on brace expansion in the bash shell. diff --git a/kern/Kconfig b/kern/Kconfig index 417017b..39ed902 100644 --- a/kern/Kconfig +++ b/kern/Kconfig @@ -102,4 +102,10 @@ config MUTEX_DEBUG ---help--- Enable mutex debugging and instrumentation. +config SPINLOCK_DEBUG + bool "Spinlock debugging" + default n + ---help--- + Enable spinlock ownership tracking. + endmenu diff --git a/kern/Makefile b/kern/Makefile index 3dd02f5..3779380 100644 --- a/kern/Makefile +++ b/kern/Makefile @@ -12,7 +12,6 @@ x15_SOURCES-y += \ kern/intr.c \ kern/kernel.c \ kern/kmem.c \ - kern/llsync.c \ kern/log.c \ kern/mutex.c \ kern/panic.c \ @@ -20,6 +19,7 @@ x15_SOURCES-y += \ kern/plist.c \ kern/printf.c \ kern/rbtree.c \ + kern/rcu.c \ kern/rdxtree.c \ kern/rtmutex.c \ kern/semaphore.c \ diff --git a/kern/atomic.h b/kern/atomic.h index 6c0105d..940720a 100644 --- a/kern/atomic.h +++ b/kern/atomic.h @@ -16,6 +16,9 @@ * * * Type-generic memory-model aware atomic operations. + * + * TODO Replace mentions of "memory barriers" throughout the code with + * C11 memory model terminology. */ #ifndef _KERN_ATOMIC_H @@ -113,6 +116,15 @@ MACRO_END #endif /* + * Thread fences. + */ + +#define atomic_fence_acquire() __atomic_thread_fence(ATOMIC_ACQUIRE) +#define atomic_fence_release() __atomic_thread_fence(ATOMIC_RELEASE) +#define atomic_fence_acq_rel() __atomic_thread_fence(ATOMIC_ACQ_REL) +#define atomic_fence_seq_cst() __atomic_thread_fence(ATOMIC_SEQ_CST) + +/* * Common shortcuts. */ diff --git a/kern/clock.c b/kern/clock.c index 19f823c..0b72a8f 100644 --- a/kern/clock.c +++ b/kern/clock.c @@ -22,8 +22,8 @@ #include <kern/clock.h> #include <kern/clock_i.h> #include <kern/init.h> -#include <kern/llsync.h> #include <kern/percpu.h> +#include <kern/rcu.h> #include <kern/sref.h> #include <kern/syscnt.h> #include <kern/thread.h> @@ -60,7 +60,6 @@ clock_setup(void) } INIT_OP_DEFINE(clock_setup, - INIT_OP_DEP(boot_setup_intr, true), INIT_OP_DEP(cpu_mp_probe, true), INIT_OP_DEP(syscnt_setup, true)); @@ -90,7 +89,7 @@ void clock_tick_intr(void) } timer_report_periodic_event(); - llsync_report_periodic_event(); + rcu_report_periodic_event(); sref_report_periodic_event(); work_report_periodic_event(); thread_report_periodic_event(); diff --git a/kern/clock.h b/kern/clock.h index 854c146..3b695d5 100644 --- a/kern/clock.h +++ b/kern/clock.h @@ -116,6 +116,4 @@ clock_time_occurred(uint64_t t, uint64_t ref) void clock_tick_intr(void); -INIT_OP_DECLARE(clock_setup); - #endif /* _KERN_CLOCK_H */ diff --git a/kern/clock_i.h b/kern/clock_i.h index 830f48c..8b98f5e 100644 --- a/kern/clock_i.h +++ b/kern/clock_i.h @@ -18,6 +18,7 @@ #ifndef _KERN_CLOCK_I_H #define _KERN_CLOCK_I_H +#include <stdalign.h> #include <stdint.h> #include <machine/cpu.h> diff --git a/kern/condition.c b/kern/condition.c index e6d6595..c407e94 100644 --- a/kern/condition.c +++ b/kern/condition.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017 Richard Braun. + * Copyright (c) 2013-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -27,46 +27,21 @@ #include <kern/condition_types.h> #include <kern/mutex.h> #include <kern/sleepq.h> -#include <kern/thread.h> static int condition_wait_common(struct condition *condition, struct mutex *mutex, bool timed, uint64_t ticks) { - struct condition *last_cond; struct sleepq *sleepq; unsigned long flags; int error; mutex_assert_locked(mutex); - /* - * Special case : - * - * mutex_lock(lock); - * - * for (;;) { - * while (!done) { - * condition_wait(condition, lock); - * } - * - * do_something(); - * } - * - * Pull the last condition before unlocking the mutex to prevent - * mutex_unlock() from reacquiring the condition sleep queue. - */ - last_cond = thread_pull_last_cond(); - sleepq = sleepq_lend(condition, true, &flags); mutex_unlock(mutex); - if (last_cond != NULL) { - assert(last_cond == condition); - sleepq_wakeup(sleepq); - } - if (timed) { error = sleepq_timedwait(sleepq, "cond", ticks); } else { @@ -74,10 +49,6 @@ condition_wait_common(struct condition *condition, struct mutex *mutex, error = 0; } - if (!error) { - thread_set_last_cond(condition); - } - sleepq_return(sleepq, flags); mutex_lock(mutex); @@ -134,20 +105,3 @@ condition_broadcast(struct condition *condition) sleepq_release(sleepq, flags); } - -void -condition_wakeup(struct condition *condition) -{ - struct sleepq *sleepq; - unsigned long flags; - - sleepq = sleepq_acquire(condition, true, &flags); - - if (sleepq == NULL) { - return; - } - - sleepq_wakeup(sleepq); - - sleepq_release(sleepq, flags); -} diff --git a/kern/condition.h b/kern/condition.h index 90a59f0..2082bee 100644 --- a/kern/condition.h +++ b/kern/condition.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017 Richard Braun. + * Copyright (c) 2013-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -65,16 +65,4 @@ int condition_timedwait(struct condition *condition, void condition_signal(struct condition *condition); void condition_broadcast(struct condition *condition); -/* - * Wake up a pending thread. - * - * This function isn't part of the standard condition variable interface. - * It is used to chain wake-ups to avoid the thundering herd effect. - * When broadcasting a condition variable, a single thread is actually - * awaken. Other threads become "pending waiters", still asleep but - * eligible for wake-up when the mutex associated to the condition variable, - * relocked when returning from condition_wait(), is finally unlocked. - */ -void condition_wakeup(struct condition *condition); - #endif /* _KERN_CONDITION_H */ diff --git a/kern/hlist.h b/kern/hlist.h index e105cb8..83d64fe 100644 --- a/kern/hlist.h +++ b/kern/hlist.h @@ -25,8 +25,8 @@ #include <stddef.h> #include <kern/hlist_types.h> -#include <kern/llsync.h> #include <kern/macros.h> +#include <kern/rcu.h> struct hlist; @@ -266,25 +266,25 @@ for (entry = hlist_first_entry(list, typeof(*entry), member), \ * Return the first node of a list. */ static inline struct hlist_node * -hlist_llsync_first(const struct hlist *list) +hlist_rcu_first(const struct hlist *list) { - return llsync_load_ptr(list->first); + return rcu_load_ptr(list->first); } /* * Return the node next to the given node. */ static inline struct hlist_node * -hlist_llsync_next(const struct hlist_node *node) +hlist_rcu_next(const struct hlist_node *node) { - return llsync_load_ptr(node->next); + return rcu_load_ptr(node->next); } /* * Insert a node at the head of a list. */ static inline void -hlist_llsync_insert_head(struct hlist *list, struct hlist_node *node) +hlist_rcu_insert_head(struct hlist *list, struct hlist_node *node) { struct hlist_node *first; @@ -296,26 +296,26 @@ hlist_llsync_insert_head(struct hlist *list, struct hlist_node *node) first->pprev = &node->next; } - llsync_store_ptr(list->first, node); + rcu_store_ptr(list->first, node); } /* * Insert a node before another node. */ static inline void -hlist_llsync_insert_before(struct hlist_node *next, struct hlist_node *node) +hlist_rcu_insert_before(struct hlist_node *next, struct hlist_node *node) { node->next = next; node->pprev = next->pprev; next->pprev = &node->next; - llsync_store_ptr(*node->pprev, node); + rcu_store_ptr(*node->pprev, node); } /* * Insert a node after another node. */ static inline void -hlist_llsync_insert_after(struct hlist_node *prev, struct hlist_node *node) +hlist_rcu_insert_after(struct hlist_node *prev, struct hlist_node *node) { node->next = prev->next; node->pprev = &prev->next; @@ -324,48 +324,48 @@ hlist_llsync_insert_after(struct hlist_node *prev, struct hlist_node *node) node->next->pprev = &node->next; } - llsync_store_ptr(prev->next, node); + rcu_store_ptr(prev->next, node); } /* * Remove a node from a list. */ static inline void -hlist_llsync_remove(struct hlist_node *node) +hlist_rcu_remove(struct hlist_node *node) { if (node->next != NULL) { node->next->pprev = node->pprev; } - llsync_store_ptr(*node->pprev, node->next); + rcu_store_ptr(*node->pprev, node->next); } /* * Macro that evaluates to the address of the structure containing the * given node based on the given type and member. */ -#define hlist_llsync_entry(node, type, member) \ - structof(llsync_load_ptr(node), type, member) +#define hlist_rcu_entry(node, type, member) \ + structof(rcu_load_ptr(node), type, member) /* * Get the first entry of a list. */ -#define hlist_llsync_first_entry(list, type, member) \ +#define hlist_rcu_first_entry(list, type, member) \ MACRO_BEGIN \ struct hlist_node *___first; \ \ - ___first = hlist_llsync_first(list); \ + ___first = hlist_rcu_first(list); \ hlist_end(___first) ? NULL : hlist_entry(___first, type, member); \ MACRO_END /* * Get the entry next to the given entry. */ -#define hlist_llsync_next_entry(entry, member) \ +#define hlist_rcu_next_entry(entry, member) \ MACRO_BEGIN \ struct hlist_node *___next; \ \ - ___next = hlist_llsync_next(&entry->member); \ + ___next = hlist_rcu_next(&entry->member); \ hlist_end(___next) \ ? NULL \ : hlist_entry(___next, typeof(*entry), member); \ @@ -374,17 +374,17 @@ MACRO_END /* * Forge a loop to process all nodes of a list. */ -#define hlist_llsync_for_each(list, node) \ -for (node = hlist_llsync_first(list); \ +#define hlist_rcu_for_each(list, node) \ +for (node = hlist_rcu_first(list); \ !hlist_end(node); \ - node = hlist_llsync_next(node)) + node = hlist_rcu_next(node)) /* * Forge a loop to process all entries of a list. */ -#define hlist_llsync_for_each_entry(list, entry, member) \ -for (entry = hlist_llsync_first_entry(list, typeof(*entry), member); \ +#define hlist_rcu_for_each_entry(list, entry, member) \ +for (entry = hlist_rcu_first_entry(list, typeof(*entry), member); \ entry != NULL; \ - entry = hlist_llsync_next_entry(entry, member)) + entry = hlist_rcu_next_entry(entry, member)) #endif /* _KERN_HLIST_H */ diff --git a/kern/list.h b/kern/list.h index f48d7d7..7ea3992 100644 --- a/kern/list.h +++ b/kern/list.h @@ -25,8 +25,8 @@ #include <stddef.h> #include <kern/list_types.h> -#include <kern/llsync.h> #include <kern/macros.h> +#include <kern/rcu.h> /* * Structure used as both head and node. @@ -385,18 +385,18 @@ for (entry = list_last_entry(list, typeof(*entry), member), \ * Return the first node of a list. */ static inline struct list * -list_llsync_first(const struct list *list) +list_rcu_first(const struct list *list) { - return llsync_load_ptr(list->next); + return rcu_load_ptr(list->next); } /* * Return the node next to the given node. */ static inline struct list * -list_llsync_next(const struct list *node) +list_rcu_next(const struct list *node) { - return llsync_load_ptr(node->next); + return rcu_load_ptr(node->next); } /* @@ -405,11 +405,11 @@ list_llsync_next(const struct list *node) * This function is private. */ static inline void -list_llsync_add(struct list *prev, struct list *next, struct list *node) +list_rcu_add(struct list *prev, struct list *next, struct list *node) { node->next = next; node->prev = prev; - llsync_store_ptr(prev->next, node); + rcu_store_ptr(prev->next, node); next->prev = node; } @@ -417,36 +417,36 @@ list_llsync_add(struct list *prev, struct list *next, struct list *node) * Insert a node at the head of a list. */ static inline void -list_llsync_insert_head(struct list *list, struct list *node) +list_rcu_insert_head(struct list *list, struct list *node) { - list_llsync_add(list, list->next, node); + list_rcu_add(list, list->next, node); } /* * Insert a node at the tail of a list. */ static inline void -list_llsync_insert_tail(struct list *list, struct list *node) +list_rcu_insert_tail(struct list *list, struct list *node) { - list_llsync_add(list->prev, list, node); + list_rcu_add(list->prev, list, node); } /* * Insert a node before another node. */ static inline void -list_llsync_insert_before(struct list *next, struct list *node) +list_rcu_insert_before(struct list *next, struct list *node) { - list_llsync_add(next->prev, next, node); + list_rcu_add(next->prev, next, node); } /* * Insert a node after another node. */ static inline void -list_llsync_insert_after(struct list *prev, struct list *node) +list_rcu_insert_after(struct list *prev, struct list *node) { - list_llsync_add(prev, prev->next, node); + list_rcu_add(prev, prev->next, node); } /* @@ -455,18 +455,18 @@ list_llsync_insert_after(struct list *prev, struct list *node) * After completion, the node is stale. */ static inline void -list_llsync_remove(struct list *node) +list_rcu_remove(struct list *node) { node->next->prev = node->prev; - llsync_store_ptr(node->prev->next, node->next); + rcu_store_ptr(node->prev->next, node->next); } /* * Macro that evaluates to the address of the structure containing the * given node based on the given type and member. */ -#define list_llsync_entry(node, type, member) \ - structof(llsync_load_ptr(node), type, member) +#define list_rcu_entry(node, type, member) \ + structof(rcu_load_ptr(node), type, member) /* * Get the first entry of a list. @@ -475,13 +475,13 @@ list_llsync_remove(struct list *node) * the node pointer can only be read once, preventing the combination * of lockless list_empty()/list_first_entry() variants. */ -#define list_llsync_first_entry(list, type, member) \ +#define list_rcu_first_entry(list, type, member) \ MACRO_BEGIN \ struct list *___list; \ struct list *___first; \ \ ___list = (list); \ - ___first = list_llsync_first(___list); \ + ___first = list_rcu_first(___list); \ list_end(___list, ___first) \ ? NULL \ : list_entry(___first, type, member); \ @@ -494,29 +494,29 @@ MACRO_END * the node pointer can only be read once, preventing the combination * of lockless list_empty()/list_next_entry() variants. */ -#define list_llsync_next_entry(entry, member) \ - list_llsync_first_entry(&entry->member, typeof(*entry), member) +#define list_rcu_next_entry(entry, member) \ + list_rcu_first_entry(&entry->member, typeof(*entry), member) /* * Forge a loop to process all nodes of a list. * * The node must not be altered during the loop. */ -#define list_llsync_for_each(list, node) \ -for (node = list_llsync_first(list); \ +#define list_rcu_for_each(list, node) \ +for (node = list_rcu_first(list); \ !list_end(list, node); \ - node = list_llsync_next(node)) + node = list_rcu_next(node)) /* * Forge a loop to process all entries of a list. * * The entry node must not be altered during the loop. */ -#define list_llsync_for_each_entry(list, entry, member) \ -for (entry = list_llsync_entry(list_first(list), \ +#define list_rcu_for_each_entry(list, entry, member) \ +for (entry = list_rcu_entry(list_first(list), \ typeof(*entry), member); \ !list_end(list, &entry->member); \ - entry = list_llsync_entry(list_next(&entry->member), \ + entry = list_rcu_entry(list_next(&entry->member), \ typeof(*entry), member)) #endif /* _KERN_LIST_H */ diff --git a/kern/llsync.c b/kern/llsync.c deleted file mode 100644 index 36233fa..0000000 --- a/kern/llsync.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Copyright (c) 2013-2014 Richard Braun. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * - * The method used by this module is described in the expired US patent - * 4809168, "Passive Serialization in a Multitasking Environment". It is - * similar to "Classic RCU (Read-Copy Update)" as found in Linux 2.6, with - * the notable difference that RCU actively starts grace periods, where - * passive serialization waits for two sequential "multiprocess checkpoints" - * (renamed global checkpoints in this implementation) to occur. - * - * It is used instead of RCU because of patents that may not allow writing - * an implementation not based on the Linux code (see - * http://lists.lttng.org/pipermail/lttng-dev/2013-May/020305.html). As - * patents expire, this module could be reworked to become a true RCU - * implementation. In the mean time, the module interface was carefully - * designed to be similar to RCU. - * - * TODO Gracefully handle large amounts of deferred works. - */ - -#include <assert.h> -#include <stdbool.h> -#include <stddef.h> - -#include <kern/condition.h> -#include <kern/cpumap.h> -#include <kern/init.h> -#include <kern/list.h> -#include <kern/log.h> -#include <kern/llsync.h> -#include <kern/llsync_i.h> -#include <kern/macros.h> -#include <kern/mutex.h> -#include <kern/percpu.h> -#include <kern/spinlock.h> -#include <kern/syscnt.h> -#include <kern/work.h> -#include <kern/thread.h> -#include <machine/cpu.h> - -/* - * Initial global checkpoint ID. - * - * Set to a high value to make sure overflows are correctly handled. - */ -#define LLSYNC_INITIAL_GCID ((unsigned int)-10) - -/* - * Number of pending works beyond which to issue a warning. - */ -#define LLSYNC_NR_PENDING_WORKS_WARN 10000 - -struct llsync_data llsync_data; -struct llsync_cpu_data llsync_cpu_data __percpu; - -struct llsync_waiter { - struct work work; - struct mutex lock; - struct condition cond; - int done; -}; - -static bool llsync_is_ready __read_mostly = false; - -bool -llsync_ready(void) -{ - return llsync_is_ready; -} - -static int __init -llsync_setup(void) -{ - struct llsync_cpu_data *cpu_data; - unsigned int i; - - spinlock_init(&llsync_data.lock); - work_queue_init(&llsync_data.queue0); - work_queue_init(&llsync_data.queue1); - syscnt_register(&llsync_data.sc_global_checkpoints, - "llsync_global_checkpoints"); - syscnt_register(&llsync_data.sc_periodic_checkins, - "llsync_periodic_checkins"); - syscnt_register(&llsync_data.sc_failed_periodic_checkins, - "llsync_failed_periodic_checkins"); - llsync_data.gcid.value = LLSYNC_INITIAL_GCID; - - for (i = 0; i < cpu_count(); i++) { - cpu_data = percpu_ptr(llsync_cpu_data, i); - work_queue_init(&cpu_data->queue0); - } - - return 0; -} - -INIT_OP_DEFINE(llsync_setup, - INIT_OP_DEP(log_setup, true), - INIT_OP_DEP(mutex_setup, true), - INIT_OP_DEP(spinlock_setup, true), - INIT_OP_DEP(syscnt_setup, true), - INIT_OP_DEP(thread_bootstrap, true), - INIT_OP_DEP(work_setup, true)); - -static void -llsync_process_global_checkpoint(void) -{ - struct work_queue queue; - unsigned int nr_works; - - assert(cpumap_find_first(&llsync_data.pending_checkpoints) == -1); - assert(llsync_data.nr_pending_checkpoints == 0); - - nr_works = work_queue_nr_works(&llsync_data.queue0) - + work_queue_nr_works(&llsync_data.queue1); - - /* TODO Handle hysteresis */ - if (!llsync_data.no_warning && (nr_works >= LLSYNC_NR_PENDING_WORKS_WARN)) { - llsync_data.no_warning = 1; - log_warning("llsync: large number of pending works\n"); - } - - if (llsync_data.nr_registered_cpus == 0) { - work_queue_concat(&llsync_data.queue1, &llsync_data.queue0); - work_queue_init(&llsync_data.queue0); - } else { - cpumap_copy(&llsync_data.pending_checkpoints, &llsync_data.registered_cpus); - llsync_data.nr_pending_checkpoints = llsync_data.nr_registered_cpus; - } - - work_queue_transfer(&queue, &llsync_data.queue1); - work_queue_transfer(&llsync_data.queue1, &llsync_data.queue0); - work_queue_init(&llsync_data.queue0); - - if (work_queue_nr_works(&queue) != 0) { - work_queue_schedule(&queue, 0); - } - - llsync_data.gcid.value++; - syscnt_inc(&llsync_data.sc_global_checkpoints); -} - -static void -llsync_flush_works(struct llsync_cpu_data *cpu_data) -{ - if (work_queue_nr_works(&cpu_data->queue0) == 0) { - return; - } - - work_queue_concat(&llsync_data.queue0, &cpu_data->queue0); - work_queue_init(&cpu_data->queue0); -} - -static void -llsync_commit_checkpoint(unsigned int cpu) -{ - int pending; - - pending = cpumap_test(&llsync_data.pending_checkpoints, cpu); - - if (!pending) { - return; - } - - cpumap_clear(&llsync_data.pending_checkpoints, cpu); - llsync_data.nr_pending_checkpoints--; - - if (llsync_data.nr_pending_checkpoints == 0) { - llsync_process_global_checkpoint(); - } -} - -void -llsync_register(void) -{ - struct llsync_cpu_data *cpu_data; - unsigned long flags; - unsigned int cpu; - - if (!llsync_is_ready) { - llsync_is_ready = true; - } - - cpu = cpu_id(); - cpu_data = llsync_get_cpu_data(); - - spinlock_lock_intr_save(&llsync_data.lock, &flags); - - assert(!cpu_data->registered); - assert(work_queue_nr_works(&cpu_data->queue0) == 0); - cpu_data->registered = 1; - cpu_data->gcid = llsync_data.gcid.value; - - assert(!cpumap_test(&llsync_data.registered_cpus, cpu)); - cpumap_set(&llsync_data.registered_cpus, cpu); - llsync_data.nr_registered_cpus++; - - assert(!cpumap_test(&llsync_data.pending_checkpoints, cpu)); - - if ((llsync_data.nr_registered_cpus == 1) - && (llsync_data.nr_pending_checkpoints == 0)) { - llsync_process_global_checkpoint(); - } - - spinlock_unlock_intr_restore(&llsync_data.lock, flags); -} - -void -llsync_unregister(void) -{ - struct llsync_cpu_data *cpu_data; - unsigned long flags; - unsigned int cpu; - - cpu = cpu_id(); - cpu_data = llsync_get_cpu_data(); - - spinlock_lock_intr_save(&llsync_data.lock, &flags); - - llsync_flush_works(cpu_data); - - assert(cpu_data->registered); - cpu_data->registered = 0; - - assert(cpumap_test(&llsync_data.registered_cpus, cpu)); - cpumap_clear(&llsync_data.registered_cpus, cpu); - llsync_data.nr_registered_cpus--; - - /* - * Processor registration qualifies as a checkpoint. Since unregistering - * a processor also disables commits until it's registered again, perform - * one now. - */ - llsync_commit_checkpoint(cpu); - - spinlock_unlock_intr_restore(&llsync_data.lock, flags); -} - -void -llsync_report_periodic_event(void) -{ - struct llsync_cpu_data *cpu_data; - unsigned int gcid; - - assert(thread_check_intr_context()); - - cpu_data = llsync_get_cpu_data(); - - if (!cpu_data->registered) { - assert(work_queue_nr_works(&cpu_data->queue0) == 0); - return; - } - - spinlock_lock(&llsync_data.lock); - - llsync_flush_works(cpu_data); - - gcid = llsync_data.gcid.value; - assert((gcid - cpu_data->gcid) <= 1); - - /* - * If the local copy of the global checkpoint ID matches the true - * value, the current processor has checked in. - * - * Otherwise, there were no checkpoint since the last global checkpoint. - * Check whether this periodic event occurred during a read-side critical - * section, and if not, trigger a checkpoint. - */ - if (cpu_data->gcid == gcid) { - llsync_commit_checkpoint(cpu_id()); - } else { - if (thread_llsync_in_read_cs()) { - syscnt_inc(&llsync_data.sc_failed_periodic_checkins); - } else { - cpu_data->gcid = gcid; - syscnt_inc(&llsync_data.sc_periodic_checkins); - llsync_commit_checkpoint(cpu_id()); - } - } - - spinlock_unlock(&llsync_data.lock); -} - -void -llsync_defer(struct work *work) -{ - struct llsync_cpu_data *cpu_data; - unsigned long flags; - - thread_preempt_disable_intr_save(&flags); - cpu_data = llsync_get_cpu_data(); - work_queue_push(&cpu_data->queue0, work); - thread_preempt_enable_intr_restore(flags); -} - -static void -llsync_signal(struct work *work) -{ - struct llsync_waiter *waiter; - - waiter = structof(work, struct llsync_waiter, work); - - mutex_lock(&waiter->lock); - waiter->done = 1; - condition_signal(&waiter->cond); - mutex_unlock(&waiter->lock); -} - -void -llsync_wait(void) -{ - struct llsync_waiter waiter; - - work_init(&waiter.work, llsync_signal); - mutex_init(&waiter.lock); - condition_init(&waiter.cond); - waiter.done = 0; - - llsync_defer(&waiter.work); - - mutex_lock(&waiter.lock); - - while (!waiter.done) { - condition_wait(&waiter.cond, &waiter.lock); - } - - mutex_unlock(&waiter.lock); -} diff --git a/kern/llsync.h b/kern/llsync.h deleted file mode 100644 index 4a0d026..0000000 --- a/kern/llsync.h +++ /dev/null @@ -1,174 +0,0 @@ -/* - * Copyright (c) 2013-2014 Richard Braun. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * - * Lockless synchronization. - * - * The llsync module provides services similar to RCU (Read-Copy Update). - * As such, it can be thought of as an efficient reader-writer lock - * replacement. It is efficient because read-side critical sections - * don't use expensive synchronization mechanisms such as locks or atomic - * instructions. Lockless synchronization is therefore best used for - * read-mostly objects. Updating still requires conventional lock-based - * synchronization. - * - * The basic idea is that read-side critical sections are assumed to hold - * read-side references, and objects for which there may be read-side - * references must exist as long as such references may be held. The llsync - * module tracks special system events to determine when read-side references - * can no longer exist. - * - * Since read-side critical sections can run concurrently with updates, - * it is important to make sure that objects are consistent when being - * accessed. This is achieved with a publish/subscribe mechanism that relies - * on the natural atomicity of machine word updates in memory, i.e. all - * supported architectures must guarantee that, when updating a word, and - * in turn a pointer, other processors reading that word obtain a valid - * value, that is either the previous or the next value of the word, but not - * a mixed-up value. The llsync module provides the llsync_store_ptr() and - * llsync_load_ptr() wrappers that take care of low level details such as - * compiler and memory barriers, so that objects are completely built and - * consistent when published and accessed. - * - * As objects are published through pointers, multiple versions can exist at - * the same time. Previous versions cannot be deleted as long as read-side - * references may exist. Operations that must wait for all read-side references - * to be dropped can be either synchronous, i.e. block until it is safe to - * proceed, or be deferred, in which case they are queued and later handed to - * the work module. As a result, special care must be taken if using lockless - * synchronization in the work module itself. - * - * The two system events tracked by the llsync module are context switches - * and a periodic event, normally the periodic timer interrupt that drives - * the scheduler. Context switches are used as checkpoint triggers. A - * checkpoint is a point in execution at which no read-side reference can - * exist, i.e. the processor isn't running any read-side critical section. - * Since context switches can be very frequent, a checkpoint is local to - * the processor and lightweight. The periodic event is used to commit - * checkpoints globally so that other processors are aware of the progress - * of one another. As the system allows situations in which two periodic - * events can occur without a single context switch, the periodic event is - * also used as a checkpoint trigger. When all checkpoints have been - * committed, a global checkpoint occurs. The occurrence of global checkpoints - * allows the llsync module to determine when it is safe to process deferred - * work or unblock update sides. - */ - -#ifndef _KERN_LLSYNC_H -#define _KERN_LLSYNC_H - -#include <stdbool.h> - -#include <kern/atomic.h> -#include <kern/macros.h> -#include <kern/llsync_i.h> -#include <kern/thread.h> -#include <kern/work.h> - -/* - * Safely store a pointer. - */ -#define llsync_store_ptr(ptr, value) atomic_store(&(ptr), value, ATOMIC_RELEASE) - -/* - * Safely load a pointer. - */ -#define llsync_load_ptr(ptr) atomic_load(&(ptr), ATOMIC_CONSUME) - -/* - * Read-side critical section enter/exit functions. - * - * It is not allowed to block inside a read-side critical section. - */ - -static inline void -llsync_read_enter(void) -{ - int in_read_cs; - - in_read_cs = thread_llsync_in_read_cs(); - thread_llsync_read_inc(); - - if (!in_read_cs) { - thread_preempt_disable(); - } -} - -static inline void -llsync_read_exit(void) -{ - thread_llsync_read_dec(); - - if (!thread_llsync_in_read_cs()) { - thread_preempt_enable(); - } -} - -/* - * Return true if the llsync module is initialized, false otherwise. - */ -bool llsync_ready(void); - -/* - * Manage registration of the current processor. - * - * The caller must not be allowed to migrate when calling these functions. - * - * Registering tells the llsync module that the current processor reports - * context switches and periodic events. - * - * When a processor enters a state in which checking in becomes irrelevant, - * it unregisters itself so that the other registered processors don't need - * to wait for it to make progress. For example, this is done inside the - * idle loop since it is obviously impossible to enter a read-side critical - * section while idling. - */ -void llsync_register(void); -void llsync_unregister(void); - -/* - * Report a context switch on the current processor. - * - * Interrupts and preemption must be disabled when calling this function. - */ -static inline void -llsync_report_context_switch(void) -{ - llsync_checkin(); -} - -/* - * Report a periodic event on the current processor. - * - * Interrupts and preemption must be disabled when calling this function. - */ -void llsync_report_periodic_event(void); - -/* - * Defer an operation until all existing read-side references are dropped, - * without blocking. - */ -void llsync_defer(struct work *work); - -/* - * Wait for all existing read-side references to be dropped. - * - * This function sleeps, and may do so for a moderately long duration (a few - * system timer ticks). - */ -void llsync_wait(void); - -#endif /* _KERN_LLSYNC_H */ diff --git a/kern/llsync_i.h b/kern/llsync_i.h deleted file mode 100644 index e043eb9..0000000 --- a/kern/llsync_i.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2013-2014 Richard Braun. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef _KERN_LLSYNC_I_H -#define _KERN_LLSYNC_I_H - -#include <assert.h> -#include <stdalign.h> - -#include <kern/cpumap.h> -#include <kern/macros.h> -#include <kern/spinlock.h> -#include <kern/syscnt.h> -#include <kern/work.h> -#include <machine/cpu.h> - -/* - * Global data. - * - * The queue number matches the number of global checkpoints that occurred - * since works contained in it were added. After two global checkpoints, - * works are scheduled for processing. - * - * Interrupts must be disabled when acquiring the global data lock. - */ -struct llsync_data { - struct spinlock lock; - struct cpumap registered_cpus; - unsigned int nr_registered_cpus; - struct cpumap pending_checkpoints; - unsigned int nr_pending_checkpoints; - int no_warning; - struct work_queue queue0; - struct work_queue queue1; - struct syscnt sc_global_checkpoints; - struct syscnt sc_periodic_checkins; - struct syscnt sc_failed_periodic_checkins; - - /* - * Global checkpoint ID. - * - * This variable can be frequently accessed from many processors so : - * - reserve a whole cache line for it - * - apply optimistic accesses to reduce contention - */ - struct { - alignas(CPU_L1_SIZE) volatile unsigned int value; - } gcid; -}; - -extern struct llsync_data llsync_data; - -/* - * Per-processor data. - * - * Every processor records whether it is registered and a local copy of the - * global checkpoint ID, which is meaningless on unregistered processors. - * The true global checkpoint ID is incremented when a global checkpoint occurs, - * after which all the local copies become stale. Checking in synchronizes - * the local copy of the global checkpoint ID. - * - * When works are deferred, they are initially added to a processor-local - * queue. This queue is regularly flushed to the global data, an operation - * that occurs every time a processor may commit a checkpoint. The downside - * of this scalability optimization is that it introduces some additional - * latency for works that are added to a processor queue between a flush and - * a global checkpoint. - * - * Interrupts and preemption must be disabled on access. - */ -struct llsync_cpu_data { - int registered; - unsigned int gcid; - struct work_queue queue0; -}; - -extern struct llsync_cpu_data llsync_cpu_data; - -static inline struct llsync_cpu_data * -llsync_get_cpu_data(void) -{ - return cpu_local_ptr(llsync_cpu_data); -} - -static inline void -llsync_checkin(void) -{ - struct llsync_cpu_data *cpu_data; - unsigned int gcid; - - assert(!cpu_intr_enabled()); - assert(!thread_preempt_enabled()); - - cpu_data = llsync_get_cpu_data(); - - if (!cpu_data->registered) { - assert(work_queue_nr_works(&cpu_data->queue0) == 0); - return; - } - - /* - * The global checkpoint ID obtained might be obsolete here, in which - * case a commit will not determine that a checkpoint actually occurred. - * This should seldom happen. - */ - gcid = llsync_data.gcid.value; - assert((gcid - cpu_data->gcid) <= 1); - cpu_data->gcid = gcid; -} - -#endif /* _KERN_LLSYNC_I_H */ diff --git a/kern/log2.h b/kern/log2.h index ed12b44..762aaa7 100644 --- a/kern/log2.h +++ b/kern/log2.h @@ -16,6 +16,8 @@ * * * Integer base 2 logarithm operations. + * + * TODO Fix naming. */ #ifndef _KERN_LOG2_H diff --git a/kern/macros.h b/kern/macros.h index 2c06be5..0411da2 100644 --- a/kern/macros.h +++ b/kern/macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2010, 2013 Richard Braun. + * Copyright (c) 2009-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,6 +19,8 @@ * * This file is a top header in the inclusion hierarchy, and shouldn't include * other headers that may cause circular dependencies. + * + * TODO Improve documentation. */ #ifndef _KERN_MACROS_H @@ -28,10 +30,14 @@ #error "GCC 4+ required" #endif +#ifndef __ASSEMBLER__ +#include <stddef.h> +#endif + /* * Attributes for variables that are mostly read and seldom changed. */ -#define __read_mostly __section(".data.read_mostly") +#define __read_mostly __section(".data.read_mostly") #define MACRO_BEGIN ({ #define MACRO_END }) diff --git a/kern/mutex.c b/kern/mutex.c index bd548b8..7f46630 100644 --- a/kern/mutex.c +++ b/kern/mutex.c @@ -20,11 +20,20 @@ #include <kern/thread.h> static int __init +mutex_bootstrap(void) +{ + return 0; +} + +INIT_OP_DEFINE(mutex_bootstrap, + INIT_OP_DEP(mutex_impl_bootstrap, true), + INIT_OP_DEP(thread_setup_booter, true)); + +static int __init mutex_setup(void) { return 0; } INIT_OP_DEFINE(mutex_setup, - INIT_OP_DEP(mutex_impl_setup, true), - INIT_OP_DEP(thread_setup_booter, true)); + INIT_OP_DEP(mutex_impl_setup, true)); diff --git a/kern/mutex.h b/kern/mutex.h index f152d31..b4332de 100644 --- a/kern/mutex.h +++ b/kern/mutex.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013-2017 Richard Braun. + * Copyright (c) 2013-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -37,7 +37,6 @@ #include <kern/init.h> #include <kern/mutex_types.h> -#include <kern/thread.h> /* * Initialize a mutex. @@ -102,15 +101,18 @@ static inline void mutex_unlock(struct mutex *mutex) { mutex_impl_unlock(mutex); - - /* - * If this mutex was used along with a condition variable, wake up - * a potential pending waiter. - */ - thread_wakeup_last_cond(); } /* + * Special init operation for syscnt_setup. + * + * This init operation only exists to avoid a circular dependency between + * syscnt_setup and mutex_setup, without giving syscnt_setup knowledge + * about the dependencies of mutex_setup. + */ +INIT_OP_DECLARE(mutex_bootstrap); + +/* * This init operation provides : * - uncontended mutex locking * diff --git a/kern/mutex/mutex_adaptive.c b/kern/mutex/mutex_adaptive.c index 3e6b610..db92f9d 100644 --- a/kern/mutex/mutex_adaptive.c +++ b/kern/mutex/mutex_adaptive.c @@ -318,14 +318,29 @@ mutex_adaptive_unlock_slow(struct mutex *mutex) } static int +mutex_adaptive_bootstrap(void) +{ + return 0; +} + +INIT_OP_DEFINE(mutex_adaptive_bootstrap, + INIT_OP_DEP(thread_setup_booter, true)); + +static int mutex_adaptive_setup(void) { mutex_adaptive_setup_debug(); return 0; } -INIT_OP_DEFINE(mutex_adaptive_setup, #ifdef CONFIG_MUTEX_DEBUG +#define MUTEX_ADAPTIVE_DEBUG_INIT_OP_DEPS \ INIT_OP_DEP(syscnt_setup, true), +#else /* CONFIG_MUTEX_DEBUG */ +#define MUTEX_ADAPTIVE_DEBUG_INIT_OP_DEPS #endif /* CONFIG_MUTEX_DEBUG */ + +INIT_OP_DEFINE(mutex_adaptive_setup, + INIT_OP_DEP(mutex_adaptive_bootstrap, true), + MUTEX_ADAPTIVE_DEBUG_INIT_OP_DEPS ); diff --git a/kern/mutex/mutex_adaptive_i.h b/kern/mutex/mutex_adaptive_i.h index a8598e6..6ff1277 100644 --- a/kern/mutex/mutex_adaptive_i.h +++ b/kern/mutex/mutex_adaptive_i.h @@ -133,8 +133,14 @@ mutex_impl_unlock(struct mutex *mutex) } } -#define mutex_impl_setup mutex_adaptive_setup +/* + * Mutex init operations. See kern/mutex.h. + */ +#define mutex_impl_bootstrap mutex_adaptive_bootstrap +INIT_OP_DECLARE(mutex_adaptive_bootstrap); + +#define mutex_impl_setup mutex_adaptive_setup INIT_OP_DECLARE(mutex_adaptive_setup); #endif /* _KERN_MUTEX_ADAPTIVE_I_H */ diff --git a/kern/mutex/mutex_pi_i.h b/kern/mutex/mutex_pi_i.h index 2d5a2b6..14f63be 100644 --- a/kern/mutex/mutex_pi_i.h +++ b/kern/mutex/mutex_pi_i.h @@ -65,6 +65,11 @@ mutex_impl_unlock(struct mutex *mutex) rtmutex_unlock(&mutex->rtmutex); } -#define mutex_impl_setup rtmutex_setup +/* + * Mutex init operations. See kern/mutex.h. + */ + +#define mutex_impl_bootstrap rtmutex_bootstrap +#define mutex_impl_setup rtmutex_setup #endif /* _KERN_MUTEX_PI_I_H */ diff --git a/kern/mutex/mutex_plain.c b/kern/mutex/mutex_plain.c index 8945014..f12f13f 100644 --- a/kern/mutex/mutex_plain.c +++ b/kern/mutex/mutex_plain.c @@ -159,14 +159,29 @@ mutex_plain_unlock_slow(struct mutex *mutex) } static int +mutex_plain_bootstrap(void) +{ + return 0; +} + +INIT_OP_DEFINE(mutex_plain_bootstrap); + +static int mutex_plain_setup(void) { mutex_plain_setup_debug(); return 0; } -INIT_OP_DEFINE(mutex_plain_setup, #ifdef CONFIG_MUTEX_DEBUG +#define MUTEX_PLAIN_DEBUG_INIT_OP_DEPS \ INIT_OP_DEP(syscnt_setup, true), +#else /* CONFIG_MUTEX_DEBUG */ +#define MUTEX_PLAIN_DEBUG_INIT_OP_DEPS #endif /* CONFIG_MUTEX_DEBUG */ + + +INIT_OP_DEFINE(mutex_plain_setup, + INIT_OP_DEP(mutex_plain_bootstrap, true), + MUTEX_PLAIN_DEBUG_INIT_OP_DEPS ); diff --git a/kern/mutex/mutex_plain_i.h b/kern/mutex/mutex_plain_i.h index fe97308..74def89 100644 --- a/kern/mutex/mutex_plain_i.h +++ b/kern/mutex/mutex_plain_i.h @@ -127,8 +127,14 @@ mutex_impl_unlock(struct mutex *mutex) } } -#define mutex_impl_setup mutex_plain_setup +/* + * Mutex init operations. See kern/mutex.h. + */ +#define mutex_impl_bootstrap mutex_plain_bootstrap +INIT_OP_DECLARE(mutex_plain_bootstrap); + +#define mutex_impl_setup mutex_plain_setup INIT_OP_DECLARE(mutex_plain_setup); #endif /* _KERN_MUTEX_PLAIN_I_H */ diff --git a/kern/percpu.h b/kern/percpu.h index 87d1470..e53f4e5 100644 --- a/kern/percpu.h +++ b/kern/percpu.h @@ -89,7 +89,7 @@ percpu_area(unsigned int cpu) extern void *percpu_areas[CONFIG_MAX_CPUS]; void *area; - assert(cpu < CONFIG_MAX_CPUS); + assert(cpu < ARRAY_SIZE(percpu_areas)); area = percpu_areas[cpu]; assert(area != NULL); return area; @@ -106,13 +106,18 @@ int percpu_add(unsigned int cpu); /* * This init operation provides : - * - percpu section is registered as the BSP percpu area + * - access to percpu variables on processor 0 */ INIT_OP_DECLARE(percpu_bootstrap); /* * This init operation provides : * - new percpu areas can be created + * + * The dependency that provides access to percpu variables on all processors + * is cpu_mp_probe. + * + * TODO Add percpu alias to cpu_mp_probe. */ INIT_OP_DECLARE(percpu_setup); diff --git a/kern/perfmon.c b/kern/perfmon.c index 59b7740..6c545ca 100644 --- a/kern/perfmon.c +++ b/kern/perfmon.c @@ -180,7 +180,7 @@ struct perfmon_cpu_pmu { struct perfmon_cpu_pmc pmcs[PERFMON_MAX_PMCS]; }; -static struct perfmon_pmu_ops pmu_driver __read_mostly = {}; +static struct perfmon_pmu_ops pmu_driver __read_mostly; static struct perfmon_pmu perfmon_pmu; static struct perfmon_cpu_pmu perfmon_cpu_pmu __percpu; diff --git a/kern/rcu.c b/kern/rcu.c new file mode 100644 index 0000000..973d6d5 --- /dev/null +++ b/kern/rcu.c @@ -0,0 +1,787 @@ +/* + * Copyright (c) 2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This implementation is based on the paper "Extending RCU for Realtime + * and Embedded Workloads" by Paul E. McKenney, Ingo Molnar, Dipankar Sarma, + * and Suparna Bhattacharya. Beside the mechanisms not implemented yet, + * such as priority boosting, the differences are described below. + * + * First, this implementation uses scalable reference counters provided + * by the sref module instead of per-CPU counters as described in the paper. + * The main benefit of this approach is the centralization of most scalability + * improvements in the sref module, which should propagate to all sref users, + * including RCU. + * + * In addition, this implementation introduces the concept of windows, where + * a window is a range in time to which readers may be linked. Here, a + * grace period is defined as the time range at the end of a window where + * various synchronization steps are performed to enforce the RCU guarantees. + * The minimum duration of a window acts as a knob allowing users to tune + * the behavior of the RCU system. + * + * Finally, the state machine described in the paper is updated to accommodate + * for windows, since grace periods don't run back-to-back to each other. + * Windows are regularly checked and flipped if the previous one isn't + * active any more. From that moment, processors may notice the global flip + * and perform a local flip of their work window ID. Once all processors + * have acknowleged the flip, it is certain that no new work may be queued + * on the previous window. At this point, the same occurs for the + * processor-local reader window ID, and once all processors have + * acknowleged that flip, there can be no new reader linked to the previous + * window. The RCU system then releases its own reference to the previous + * window and waits for the window reference counter to drop to 0, indicating + * that all readers linked to the previous window have left their read-side + * critical section. When this global event occurs, processors are requested + * to flush the works queued for the previous window, and once they all have + * acknowleged their flush, the window ends and becomes inactive, allowing + * a new grace period to occur later on. + * + * Here is an informal diagram describing this process : + * + * t ----> + * + * reader window flip ---+ +--- no more readers + * work window flip ------+ | | +- works flushed + * (grace period start) | | | | (grace period / window end) + * v v v v + * +--------------+-+-----+-+ + * | . . . | + * | window 0 . . gp . | + * | removal . . . | reclamation + * +--------------+-+-----+-+-----+----+ + * | . | + * | window 1 . gp | + * | removal . | reclamation + * +---------------+----+-------- + * | + * | window 2 ... + * | + * +------------- + * + * TODO Improve atomic acknowledgment scalability. + * TODO Handle large amounts of deferred works. + * TODO Priority boosting of slow readers. + * TODO CPU registration for dyntick-friendly behavior. + */ + +#include <assert.h> +#include <stdalign.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> + +#include <kern/atomic.h> +#include <kern/clock.h> +#include <kern/init.h> +#include <kern/macros.h> +#include <kern/rcu.h> +#include <kern/panic.h> +#include <kern/percpu.h> +#include <kern/spinlock.h> +#include <kern/sref.h> +#include <kern/syscnt.h> +#include <kern/thread.h> +#include <kern/timer.h> +#include <kern/work.h> +#include <machine/cpu.h> + +/* + * Negative close to 0 so that an overflow occurs early. + */ +#define RCU_WINDOW_ID_INIT_VALUE ((unsigned int)-500) + +/* + * Interval between window checking. + * + * When windows are checked, a flip occurs if the previous window isn't + * active any more. + */ +#define RCU_WINDOW_CHECK_INTERVAL_MS 10 + +/* + * Grace period states. + * + * These states are only used to trigger per-CPU processing that is + * globally acknowleged by decrementing a global atomic counter. They + * do not completely represent the actual state of a grace period. + */ +enum rcu_gp_state { + RCU_GP_STATE_WORK_WINDOW_FLIP, + RCU_GP_STATE_READER_WINDOW_FLIP, + RCU_GP_STATE_WORK_FLUSH, +}; + +/* + * Per-CPU view of a window. + * + * Deferred works are scheduled when the window ends. + */ +struct rcu_cpu_window { + struct work_queue works; +}; + +/* + * Per-CPU RCU data. + * + * Each processor maintains two local window IDs. One is used as the current + * window ID when deferring work, the other when detecting a reader. A local + * flip occurs when a processor notices that the global grace period state + * no longer matches the local grace period state. These checks only occur + * on periodic events. + * + * Interrupts and preemption must be disabled when accessing local CPU data. + */ +struct rcu_cpu_data { + enum rcu_gp_state gp_state; + unsigned int work_wid; + unsigned int reader_wid; + struct rcu_cpu_window windows[2]; + struct syscnt sc_nr_detected_readers; +}; + +/* + * Global window. + * + * A window is a time range that tracks read-side references. Conceptually, + * each reader adds a reference to the current window. In practice, references + * are only added when readers are detected, which occurs on a context switch + * (to track preempted threads) or a reader window flip (to prevent currently + * running readers to be linked to the next window). + * + * When a window is started, its scalable reference counter is initialized + * with a reference owned by the RCU system. That reference guarantees that + * the window remains active as long as new readers may add references, + * since it prevents the counter from dropping to 0. After a reader window + * flip, there may not be new references to the window, and the initial + * reference is dropped, allowing the counter to reach 0 once all detected + * readers leave their critical section and unreference the window they're + * linked to. + */ +struct rcu_window { + struct sref_counter nr_refs; + uint64_t start_ts; + bool active; +}; + +/* + * Global data. + * + * Processors regularly check the grace period state against their own, + * locally cached grace period state, and take action whenever they differ. + * False sharing is avoided by making the global grace period state fill an + * entire cache line on SMP. + * + * After processors notice a grace period state change, they acknowledge + * noticing this change by decrementing the atomic acknowledgment counter, + * which also fills a complete cache line on SMP in order to restrict cache + * line bouncing. Atomic operations on this counter are done with + * acquire-release ordering to enforce the memory ordering guarantees + * required by the implementation, as well as those provided by the public + * interface. + * + * In addition to the global window ID and the windows themselves, the data + * include a timer, used to trigger the end of windows, i.e. grace periods. + * Since the timer function, atomic acknowledgments, and window no-reference + * function chain each other, there is currently no need for a global lock. + */ +struct rcu_data { + struct { + alignas(CPU_L1_SIZE) enum rcu_gp_state gp_state; + }; + struct { + alignas(CPU_L1_SIZE) unsigned int nr_acks; + }; + + unsigned int wid; + struct rcu_window windows[2]; + struct timer timer; + struct syscnt sc_nr_windows; + struct syscnt sc_last_window_ms; + struct syscnt sc_longest_window_ms; +}; + +/* + * Structure used to implement rcu_wait(). + */ +struct rcu_waiter { + struct work work; + struct spinlock lock; + struct thread *thread; + bool done; +}; + +static struct rcu_data rcu_data; +static struct rcu_cpu_data rcu_cpu_data __percpu; + +static struct rcu_cpu_data * +rcu_get_cpu_data(void) +{ + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + return cpu_local_ptr(rcu_cpu_data); +} + +static enum rcu_gp_state +rcu_data_get_gp_state(const struct rcu_data *data) +{ + return data->gp_state; +} + +static unsigned int +rcu_data_get_wid(const struct rcu_data *data) +{ + return data->wid; +} + +static struct rcu_window * +rcu_data_get_window_from_index(struct rcu_data *data, size_t index) +{ + assert(index < ARRAY_SIZE(data->windows)); + return &data->windows[index]; +} + +static struct rcu_window * +rcu_data_get_window(struct rcu_data *data, unsigned int wid) +{ + return rcu_data_get_window_from_index(data, wid & 1); +} + +static void +rcu_data_update_gp_state(struct rcu_data *data, enum rcu_gp_state gp_state) +{ + assert(data->nr_acks == 0); + + switch (gp_state) { + case RCU_GP_STATE_WORK_WINDOW_FLIP: + assert(data->gp_state == RCU_GP_STATE_WORK_FLUSH); + break; + case RCU_GP_STATE_READER_WINDOW_FLIP: + assert(data->gp_state == RCU_GP_STATE_WORK_WINDOW_FLIP); + break; + case RCU_GP_STATE_WORK_FLUSH: + assert(data->gp_state == RCU_GP_STATE_READER_WINDOW_FLIP); + break; + default: + panic("rcu: invalid grace period state"); + } + + data->nr_acks = cpu_count(); + atomic_store(&data->gp_state, gp_state, ATOMIC_RELEASE); +} + +static bool +rcu_data_check_gp_state(const struct rcu_data *data, + enum rcu_gp_state local_gp_state, + enum rcu_gp_state *global_gp_state) +{ + *global_gp_state = atomic_load(&data->gp_state, ATOMIC_RELAXED); + + if (unlikely(local_gp_state != *global_gp_state)) { + atomic_fence_acquire(); + return true; + } + + return false; +} + +static void +rcu_window_end(struct rcu_window *window) +{ + assert(window->active); + window->active = false; +} + +static void +rcu_window_ref(struct rcu_window *window) +{ + sref_counter_inc(&window->nr_refs); +} + +static void +rcu_window_unref(struct rcu_window *window) +{ + sref_counter_dec(&window->nr_refs); +} + +static uint64_t +rcu_window_get_start_ts(const struct rcu_window *window) +{ + return window->start_ts; +} + +static void +rcu_window_flush(struct sref_counter *counter) +{ + (void)counter; + + rcu_data_update_gp_state(&rcu_data, RCU_GP_STATE_WORK_FLUSH); +} + +static void __init +rcu_window_init(struct rcu_window *window) +{ + window->active = false; +} + +static void +rcu_window_start(struct rcu_window *window) +{ + assert(!window->active); + + sref_counter_init(&window->nr_refs, 1, NULL, rcu_window_flush); + window->start_ts = clock_get_time(); + window->active = true; +} + +static bool +rcu_window_active(const struct rcu_window *window) +{ + return window->active; +} + +static void +rcu_data_end_prev_window(struct rcu_data *data, uint64_t now) +{ + struct rcu_window *window; + uint64_t duration; + + window = rcu_data_get_window(data, data->wid - 1); + duration = clock_ticks_to_ms(now - rcu_window_get_start_ts(window)); + syscnt_set(&data->sc_last_window_ms, duration); + + if (duration > syscnt_read(&data->sc_longest_window_ms)) { + syscnt_set(&data->sc_longest_window_ms, duration); + } + + rcu_window_end(window); +} + +static void +rcu_data_schedule_timer(struct rcu_data *data, uint64_t now) +{ + uint64_t ticks; + + ticks = clock_ticks_from_ms(RCU_WINDOW_CHECK_INTERVAL_MS); + timer_schedule(&data->timer, now + ticks); +} + +static void +rcu_data_ack_cpu(struct rcu_data *data) +{ + struct rcu_window *window; + unsigned int prev_nr_acks; + uint64_t now; + + prev_nr_acks = atomic_fetch_sub(&data->nr_acks, 1, ATOMIC_ACQ_REL); + + if (prev_nr_acks != 1) { + assert(prev_nr_acks != 0); + return; + } + + switch (data->gp_state) { + case RCU_GP_STATE_WORK_WINDOW_FLIP: + rcu_data_update_gp_state(data, RCU_GP_STATE_READER_WINDOW_FLIP); + break; + case RCU_GP_STATE_READER_WINDOW_FLIP: + window = rcu_data_get_window(data, data->wid - 1); + rcu_window_unref(window); + break; + case RCU_GP_STATE_WORK_FLUSH: + now = clock_get_time(); + rcu_data_end_prev_window(data, now); + rcu_data_schedule_timer(data, now); + break; + default: + panic("rcu: invalid grace period state"); + } +} + +static bool +rcu_data_flip_windows(struct rcu_data *data) +{ + struct rcu_window *window; + + window = rcu_data_get_window(data, data->wid - 1); + + if (rcu_window_active(window)) { + return false; + } + + rcu_window_start(window); + syscnt_inc(&data->sc_nr_windows); + data->wid++; + rcu_data_update_gp_state(data, RCU_GP_STATE_WORK_WINDOW_FLIP); + return true; +} + +static void +rcu_data_check_windows(struct timer *timer) +{ + struct rcu_data *data; + bool flipped; + + data = &rcu_data; + flipped = rcu_data_flip_windows(data); + + if (!flipped) { + rcu_data_schedule_timer(data, timer_get_time(timer)); + } +} + +static void __init +rcu_data_init(struct rcu_data *data) +{ + data->gp_state = RCU_GP_STATE_WORK_FLUSH; + data->nr_acks = 0; + data->wid = RCU_WINDOW_ID_INIT_VALUE; + + for (size_t i = 0; i < ARRAY_SIZE(data->windows); i++) { + rcu_window_init(rcu_data_get_window_from_index(data, i)); + } + + rcu_window_start(rcu_data_get_window(data, data->wid)); + + timer_init(&data->timer, rcu_data_check_windows, 0); + rcu_data_schedule_timer(data, clock_get_time()); + + syscnt_register(&data->sc_nr_windows, "rcu_nr_windows"); + syscnt_register(&data->sc_last_window_ms, "rcu_last_window_ms"); + syscnt_register(&data->sc_longest_window_ms, "rcu_longest_window_ms"); +} + +static void __init +rcu_cpu_window_init(struct rcu_cpu_window *cpu_window) +{ + work_queue_init(&cpu_window->works); +} + +static void +rcu_cpu_window_queue(struct rcu_cpu_window *cpu_window, struct work *work) +{ + work_queue_push(&cpu_window->works, work); +} + +static void +rcu_cpu_window_flush(struct rcu_cpu_window *cpu_window) +{ + work_queue_schedule(&cpu_window->works, 0); + work_queue_init(&cpu_window->works); +} + +static unsigned int +rcu_cpu_data_get_reader_wid(const struct rcu_cpu_data *cpu_data) +{ + return cpu_data->reader_wid; +} + +static struct rcu_cpu_window * +rcu_cpu_data_get_window_from_index(struct rcu_cpu_data *cpu_data, size_t index) +{ + assert(index < ARRAY_SIZE(cpu_data->windows)); + return &cpu_data->windows[index]; +} + +static struct rcu_cpu_window * +rcu_cpu_data_get_window(struct rcu_cpu_data *cpu_data, unsigned int wid) +{ + return rcu_cpu_data_get_window_from_index(cpu_data, wid & 1); +} + +static void __init +rcu_cpu_data_init(struct rcu_cpu_data *cpu_data, unsigned int cpu) +{ + struct rcu_data *data; + char name[SYSCNT_NAME_SIZE]; + + data = &rcu_data; + + cpu_data->gp_state = rcu_data_get_gp_state(data); + cpu_data->work_wid = rcu_data_get_wid(data); + cpu_data->reader_wid = cpu_data->work_wid; + + for (size_t i = 0; i < ARRAY_SIZE(cpu_data->windows); i++) { + rcu_cpu_window_init(rcu_cpu_data_get_window_from_index(cpu_data, i)); + } + + snprintf(name, sizeof(name), "rcu_nr_detected_readers/%u", cpu); + syscnt_register(&cpu_data->sc_nr_detected_readers, name); +} + +static void +rcu_cpu_data_queue(struct rcu_cpu_data *cpu_data, struct work *work) +{ + struct rcu_cpu_window *cpu_window; + + cpu_window = rcu_cpu_data_get_window(cpu_data, cpu_data->work_wid); + rcu_cpu_window_queue(cpu_window, work); +} + +static void +rcu_cpu_data_flush(struct rcu_cpu_data *cpu_data) +{ + struct rcu_cpu_window *cpu_window; + + assert(cpu_data->work_wid == cpu_data->reader_wid); + + cpu_window = rcu_cpu_data_get_window(cpu_data, cpu_data->work_wid - 1); + rcu_cpu_window_flush(cpu_window); +} + +void +rcu_reader_init(struct rcu_reader *reader) +{ + reader->level = 0; + reader->linked = false; +} + +static void +rcu_reader_link(struct rcu_reader *reader, struct rcu_cpu_data *cpu_data) +{ + assert(!cpu_intr_enabled()); + assert(reader == thread_rcu_reader(thread_self())); + assert(!rcu_reader_linked(reader)); + + reader->wid = rcu_cpu_data_get_reader_wid(cpu_data); + reader->linked = true; +} + +static void +rcu_reader_unlink(struct rcu_reader *reader) +{ + assert(reader->level == 0); + reader->linked = false; +} + +static void +rcu_reader_enter(struct rcu_reader *reader, struct rcu_cpu_data *cpu_data) +{ + struct rcu_window *window; + struct rcu_data *data; + unsigned int wid; + + if (rcu_reader_linked(reader)) { + return; + } + + data = &rcu_data; + wid = rcu_cpu_data_get_reader_wid(cpu_data); + window = rcu_data_get_window(data, wid); + + rcu_reader_link(reader, cpu_data); + rcu_window_ref(window); + + syscnt_inc(&cpu_data->sc_nr_detected_readers); +} + +void +rcu_reader_leave(struct rcu_reader *reader) +{ + struct rcu_window *window; + struct rcu_data *data; + + data = &rcu_data; + + window = rcu_data_get_window(data, reader->wid); + rcu_window_unref(window); + rcu_reader_unlink(reader); +} + +static void +rcu_reader_account(struct rcu_reader *reader, struct rcu_cpu_data *cpu_data) +{ + if (rcu_reader_in_cs(reader)) { + rcu_reader_enter(reader, cpu_data); + } +} + +static void +rcu_cpu_data_flip_work_wid(struct rcu_cpu_data *cpu_data) +{ + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + cpu_data->work_wid++; +} + +static void +rcu_cpu_data_flip_reader_wid(struct rcu_cpu_data *cpu_data) +{ + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + rcu_reader_account(thread_rcu_reader(thread_self()), cpu_data); + cpu_data->reader_wid++; +} + +static void +rcu_cpu_data_check_gp_state(struct rcu_cpu_data *cpu_data) +{ + enum rcu_gp_state local_gp_state, global_gp_state; + struct rcu_data *data; + bool diff; + + data = &rcu_data; + + /* + * A loop is used to optimize the case where a processor is the last to + * acknowledge a grace period state change, in which case the latter + * also immediately changes and can be acknowleged right away. As a + * result, this loop may never run more than twice. + */ + for (unsigned int i = 0; /* no condition */; i++) { + local_gp_state = cpu_data->gp_state; + diff = rcu_data_check_gp_state(data, local_gp_state, &global_gp_state); + + if (!diff) { + break; + } + + assert(i < 2); + + switch (global_gp_state) { + case RCU_GP_STATE_WORK_WINDOW_FLIP: + rcu_cpu_data_flip_work_wid(cpu_data); + rcu_data_ack_cpu(data); + break; + case RCU_GP_STATE_READER_WINDOW_FLIP: + rcu_cpu_data_flip_reader_wid(cpu_data); + rcu_data_ack_cpu(data); + break; + case RCU_GP_STATE_WORK_FLUSH: + rcu_cpu_data_flush(cpu_data); + rcu_data_ack_cpu(data); + break; + default: + panic("rcu: invalid grace period state"); + } + + cpu_data->gp_state = global_gp_state; + } +} + +void +rcu_report_context_switch(struct rcu_reader *reader) +{ + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + /* + * Most readers don't need to be accounted for because their execution + * doesn't overlap with a grace period. If a reader is preempted however, + * it must be accounted in case a grace period starts while the reader + * is preempted. Accounting also occurs when a grace period starts, and + * more exactly, when the reader window ID of a processor is flipped. + */ + rcu_reader_account(reader, rcu_get_cpu_data()); +} + +void +rcu_report_periodic_event(void) +{ + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + rcu_cpu_data_check_gp_state(rcu_get_cpu_data()); +} + +void +rcu_defer(struct work *work) +{ + struct rcu_cpu_data *cpu_data; + unsigned long flags; + + thread_preempt_disable_intr_save(&flags); + cpu_data = rcu_get_cpu_data(); + rcu_cpu_data_queue(cpu_data, work); + thread_preempt_enable_intr_restore(flags); +} + +static void +rcu_waiter_wakeup(struct work *work) +{ + struct rcu_waiter *waiter; + + waiter = structof(work, struct rcu_waiter, work); + + spinlock_lock(&waiter->lock); + waiter->done = true; + thread_wakeup(waiter->thread); + spinlock_unlock(&waiter->lock); +} + +static void +rcu_waiter_init(struct rcu_waiter *waiter, struct thread *thread) +{ + work_init(&waiter->work, rcu_waiter_wakeup); + spinlock_init(&waiter->lock); + waiter->thread = thread; + waiter->done = false; +} + +static void +rcu_waiter_wait(struct rcu_waiter *waiter) +{ + rcu_defer(&waiter->work); + + spinlock_lock(&waiter->lock); + + while (!waiter->done) { + thread_sleep(&waiter->lock, waiter, "rcu_wait"); + } + + spinlock_unlock(&waiter->lock); +} + +void +rcu_wait(void) +{ + struct rcu_waiter waiter; + + rcu_waiter_init(&waiter, thread_self()), + rcu_waiter_wait(&waiter); +} + +static int __init +rcu_bootstrap(void) +{ + rcu_data_init(&rcu_data); + rcu_cpu_data_init(cpu_local_ptr(rcu_cpu_data), 0); + return 0; +} + +INIT_OP_DEFINE(rcu_bootstrap, + INIT_OP_DEP(spinlock_setup, true), + INIT_OP_DEP(sref_bootstrap, true), + INIT_OP_DEP(syscnt_setup, true), + INIT_OP_DEP(thread_bootstrap, true), + INIT_OP_DEP(timer_bootstrap, true)); + +static int __init +rcu_setup(void) +{ + for (unsigned int i = 1; i < cpu_count(); i++) { + rcu_cpu_data_init(percpu_ptr(rcu_cpu_data, i), i); + } + + return 0; +} + +INIT_OP_DEFINE(rcu_setup, + INIT_OP_DEP(cpu_mp_probe, true), + INIT_OP_DEP(rcu_bootstrap, true)); diff --git a/kern/rcu.h b/kern/rcu.h new file mode 100644 index 0000000..48079d0 --- /dev/null +++ b/kern/rcu.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Read-Copy Update. + * + * This module provides a synchronization framework for read-only lockless + * operations, best used on read-mostly data structures. + * + * To learn more about RCU, see "Is Parallel Programming Hard, And, If So, + * What Can You Do About It ?" by Paul E. McKenney. + * + * A thorough discussion of RCU's requirements can be found at [1]. + * However, the memory model and terminology used in that document being + * different from C11, the memory barrier guarantees are hereby translated + * into memory ordering guarantees : + * + * 1. Release-acquire ordering is enforced between the end of all + * read-side critical sections starting before a grace period starts + * and the start of all works deferred before the same grace period + * starts. + * 2. Release-acquire ordering is enforced between the start of all + * read-side critical sections ending after a grace period ends + * and all the work deferrals done before the same grace period + * starts. + * 3. Release-acquire ordering is enforced between all the work deferrals + * done before a grace period starts and the start of all works deferred + * before the same grace period starts. + * + * [1] https://www.kernel.org/doc/Documentation/RCU/Design/Requirements/Requirements.html. + */ + +#ifndef _KERN_RCU_H +#define _KERN_RCU_H + +#include <kern/atomic.h> +#include <kern/init.h> +#include <kern/rcu_i.h> +#include <kern/rcu_types.h> +#include <kern/thread.h> +#include <kern/work.h> + +/* + * Thread-local RCU data. + */ +struct rcu_reader; + +/* + * Safely store a pointer. + * + * This macro enforces release ordering on the given pointer. + */ +#define rcu_store_ptr(ptr, value) atomic_store(&(ptr), value, ATOMIC_RELEASE) + +/* + * Safely load a pointer. + * + * This macro enforces consume ordering on the given pointer. + */ +#define rcu_load_ptr(ptr) atomic_load(&(ptr), ATOMIC_CONSUME) + +/* + * Read-side critical section functions. + * + * Critical sections are preemptible, may safely nest, and may safely + * be used in interrupt context. It is not allowed to sleep inside a + * read-side critical section. However, it is allowed to acquire locks + * that don't sleep, such as spin locks. + * + * Entering or leaving a critical section doesn't provide any memory + * ordering guarantee. + */ + +static inline void +rcu_read_enter(void) +{ + rcu_reader_inc(thread_rcu_reader(thread_self())); + barrier(); +} + +static inline void +rcu_read_leave(void) +{ + barrier(); + rcu_reader_dec(thread_rcu_reader(thread_self())); +} + +/* + * Initialize a RCU reader. + */ +void rcu_reader_init(struct rcu_reader *reader); + +/* + * Report a context switch on the current processor. + * + * The argument is the RCU reader of the preempted thread. + * + * Interrupts and preemption must be disabled when calling this function. + */ +void rcu_report_context_switch(struct rcu_reader *reader); + +/* + * Report a periodic event on the current processor. + * + * Interrupts and preemption must be disabled when calling this function. + */ +void rcu_report_periodic_event(void); + +/* + * Defer a work until all existing read-side references are dropped, + * without blocking. + * + * The given work is scheduled when all existing read-side references + * are dropped. + */ +void rcu_defer(struct work *work); + +/* + * Wait for all existing read-side references to be dropped. + * + * This function sleeps, and may do so for a moderately long duration, + * at leat a few system timer ticks, sometimes a lot more. + */ +void rcu_wait(void); + +/* + * This init operation provides : + * - read-side critical sections usable + */ +INIT_OP_DECLARE(rcu_bootstrap); + +#endif /* _KERN_RCU_H */ diff --git a/kern/rcu_i.h b/kern/rcu_i.h new file mode 100644 index 0000000..6be933d --- /dev/null +++ b/kern/rcu_i.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _KERN_RCU_I_H +#define _KERN_RCU_I_H + +#include <assert.h> +#include <stdbool.h> + +#include <kern/macros.h> +#include <kern/rcu_types.h> +#include <kern/thread.h> + +void rcu_reader_leave(struct rcu_reader *reader); + +static inline bool +rcu_reader_in_cs(const struct rcu_reader *reader) +{ + return reader->level != 0; +} + +static inline bool +rcu_reader_linked(const struct rcu_reader *reader) +{ + assert(reader == thread_rcu_reader(thread_self())); + return reader->linked; +} + +static inline void +rcu_reader_inc(struct rcu_reader *reader) +{ + reader->level++; + assert(reader->level != 0); +} + +static inline void +rcu_reader_dec(struct rcu_reader *reader) +{ + assert(reader->level != 0); + reader->level--; + + if (unlikely(!rcu_reader_in_cs(reader) && rcu_reader_linked(reader))) { + rcu_reader_leave(reader); + } +} + +#endif /* _KERN_RCU_I_H */ diff --git a/kern/rcu_types.h b/kern/rcu_types.h new file mode 100644 index 0000000..12b8913 --- /dev/null +++ b/kern/rcu_types.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2018 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Isolated type definition used to avoid inclusion circular dependencies. + */ + +#ifndef _KERN_RCU_TYPES_H +#define _KERN_RCU_TYPES_H + +#include <stdbool.h> + +/* + * Thread-local data used to track threads running read-side critical + * sections. + * + * The window ID is valid if and only if the reader is linked. + * + * Interrupts and preemption must be disabled when accessing a reader. + */ +struct rcu_reader { + unsigned int level; + unsigned int wid; + bool linked; +}; + +#endif /* _KERN_RCU_TYPES_H */ diff --git a/kern/rdxtree.c b/kern/rdxtree.c index 678ec17..9e2e759 100644 --- a/kern/rdxtree.c +++ b/kern/rdxtree.c @@ -25,8 +25,8 @@ #include <kern/error.h> #include <kern/init.h> #include <kern/kmem.h> -#include <kern/llsync.h> #include <kern/macros.h> +#include <kern/rcu.h> #include <kern/rdxtree.h> #include <kern/rdxtree_i.h> #include <kern/work.h> @@ -190,17 +190,7 @@ rdxtree_node_schedule_destruction(struct rdxtree_node *node) assert(node->parent == NULL); work_init(&node->work, rdxtree_node_destroy_deferred); - - /* - * This assumes that llsync is initialized before scheduling is started - * so that there can be no read-side reference when destroying the node. - */ - if (!llsync_ready()) { - rdxtree_node_destroy(node); - return; - } - - llsync_defer(&node->work); + rcu_defer(&node->work); } static inline void @@ -238,7 +228,7 @@ rdxtree_node_insert(struct rdxtree_node *node, unsigned short index, assert(node->entries[index] == NULL); node->nr_entries++; - llsync_store_ptr(node->entries[index], entry); + rcu_store_ptr(node->entries[index], entry); } static inline void @@ -255,7 +245,7 @@ rdxtree_node_remove(struct rdxtree_node *node, unsigned short index) assert(node->entries[index] != NULL); node->nr_entries--; - llsync_store_ptr(node->entries[index], NULL); + rcu_store_ptr(node->entries[index], NULL); } static inline void * @@ -267,7 +257,7 @@ rdxtree_node_find(struct rdxtree_node *node, unsigned short *indexp) index = *indexp; while (index < ARRAY_SIZE(node->entries)) { - ptr = rdxtree_entry_addr(llsync_load_ptr(node->entries[index])); + ptr = rdxtree_entry_addr(rcu_load_ptr(node->entries[index])); if (ptr != NULL) { *indexp = index; @@ -355,7 +345,7 @@ rdxtree_shrink(struct rdxtree *tree) rdxtree_node_unlink(rdxtree_entry_addr(entry)); } - llsync_store_ptr(tree->root, entry); + rcu_store_ptr(tree->root, entry); /* * There is still one valid entry (the first one) in this node. It @@ -410,7 +400,7 @@ rdxtree_grow(struct rdxtree *tree, rdxtree_key_t key) rdxtree_node_insert(node, 0, tree->root); tree->height++; - llsync_store_ptr(tree->root, rdxtree_node_to_entry(node)); + rcu_store_ptr(tree->root, rdxtree_node_to_entry(node)); root = node; } while (new_height > tree->height); @@ -433,7 +423,7 @@ rdxtree_cleanup(struct rdxtree *tree, struct rdxtree_node *node) if (node->parent == NULL) { tree->height = 0; - llsync_store_ptr(tree->root, NULL); + rcu_store_ptr(tree->root, NULL); rdxtree_node_schedule_destruction(node); break; } @@ -488,7 +478,7 @@ rdxtree_insert_common(struct rdxtree *tree, rdxtree_key_t key, return ERROR_BUSY; } - llsync_store_ptr(tree->root, ptr); + rcu_store_ptr(tree->root, ptr); if (slotp != NULL) { *slotp = &tree->root; @@ -516,7 +506,7 @@ rdxtree_insert_common(struct rdxtree *tree, rdxtree_key_t key, } if (prev == NULL) { - llsync_store_ptr(tree->root, rdxtree_node_to_entry(node)); + rcu_store_ptr(tree->root, rdxtree_node_to_entry(node)); } else { rdxtree_node_link(node, prev, index); rdxtree_node_insert_node(prev, index, node); @@ -565,7 +555,7 @@ rdxtree_insert_alloc_common(struct rdxtree *tree, void *ptr, if (unlikely(height == 0)) { if (tree->root == NULL) { - llsync_store_ptr(tree->root, ptr); + rcu_store_ptr(tree->root, ptr); *keyp = 0; if (slotp != NULL) { @@ -661,7 +651,7 @@ rdxtree_remove(struct rdxtree *tree, rdxtree_key_t key) node = rdxtree_entry_addr(tree->root); if (unlikely(height == 0)) { - llsync_store_ptr(tree->root, NULL); + rcu_store_ptr(tree->root, NULL); return node; } @@ -700,7 +690,7 @@ rdxtree_lookup_common(const struct rdxtree *tree, rdxtree_key_t key, unsigned short height, shift, index; void *entry; - entry = llsync_load_ptr(tree->root); + entry = rcu_load_ptr(tree->root); if (entry == NULL) { node = NULL; @@ -731,7 +721,7 @@ rdxtree_lookup_common(const struct rdxtree *tree, rdxtree_key_t key, prev = node; index = (unsigned short)(key >> shift) & RDXTREE_RADIX_MASK; - entry = llsync_load_ptr(node->entries[index]); + entry = rcu_load_ptr(node->entries[index]); node = rdxtree_entry_addr(entry); shift -= RDXTREE_RADIX; height--; @@ -755,7 +745,7 @@ rdxtree_replace_slot(void **slot, void *ptr) old = *slot; assert(old != NULL); rdxtree_assert_alignment(old); - llsync_store_ptr(*slot, ptr); + rcu_store_ptr(*slot, ptr); return old; } @@ -767,7 +757,7 @@ rdxtree_walk_next(struct rdxtree *tree, struct rdxtree_iter *iter) rdxtree_key_t key; void *entry; - entry = llsync_load_ptr(tree->root); + entry = rcu_load_ptr(tree->root); if (entry == NULL) { return NULL; @@ -863,7 +853,7 @@ rdxtree_remove_all(struct rdxtree *tree) if (tree->height == 0) { if (tree->root != NULL) { - llsync_store_ptr(tree->root, NULL); + rcu_store_ptr(tree->root, NULL); } return; @@ -906,4 +896,5 @@ rdxtree_setup(void) } INIT_OP_DEFINE(rdxtree_setup, - INIT_OP_DEP(kmem_bootstrap, true)); + INIT_OP_DEP(kmem_bootstrap, true), + INIT_OP_DEP(rcu_bootstrap, true)); diff --git a/kern/rdxtree.h b/kern/rdxtree.h index 1430208..f19f349 100644 --- a/kern/rdxtree.h +++ b/kern/rdxtree.h @@ -19,8 +19,7 @@ * * In addition to the standard insertion operation, this implementation can * allocate keys for the caller at insertion time. It also allows lookups to - * occur concurrently with updates through the use of lockless synchronization - * (see the llsync module). + * occur concurrently with updates using RCU. */ #ifndef _KERN_RDXTREE_H @@ -30,7 +29,7 @@ #include <stdint.h> #include <kern/init.h> -#include <kern/llsync.h> +#include <kern/rcu.h> typedef uint64_t rdxtree_key_t; @@ -160,7 +159,7 @@ rdxtree_lookup_slot(const struct rdxtree *tree, rdxtree_key_t key) static inline void * rdxtree_load_slot(void **slot) { - return llsync_load_ptr(*slot); + return rcu_load_ptr(*slot); } /* diff --git a/kern/rtmutex.c b/kern/rtmutex.c index 36c72a3..55d17cd 100644 --- a/kern/rtmutex.c +++ b/kern/rtmutex.c @@ -225,14 +225,29 @@ out: } static int +rtmutex_bootstrap(void) +{ + return 0; +} + +INIT_OP_DEFINE(rtmutex_bootstrap, + INIT_OP_DEP(thread_setup_booter, true)); + +static int rtmutex_setup(void) { rtmutex_setup_debug(); return 0; } -INIT_OP_DEFINE(rtmutex_setup, #ifdef CONFIG_MUTEX_DEBUG +#define RTMUTEX_DEBUG_INIT_OPS \ INIT_OP_DEP(syscnt_setup, true), +#else /* CONFIG_MUTEX_DEBUG */ +#define RTMUTEX_DEBUG_INIT_OPS #endif /* CONFIG_MUTEX_DEBUG */ - ); + +INIT_OP_DEFINE(rtmutex_setup, + INIT_OP_DEP(rtmutex_bootstrap, true), + RTMUTEX_DEBUG_INIT_OPS +); diff --git a/kern/rtmutex.h b/kern/rtmutex.h index f8b11bd..90b3438 100644 --- a/kern/rtmutex.h +++ b/kern/rtmutex.h @@ -132,6 +132,10 @@ rtmutex_unlock(struct rtmutex *rtmutex) } } +/* + * Mutex init operations. See kern/mutex.h. + */ +INIT_OP_DECLARE(rtmutex_bootstrap); INIT_OP_DECLARE(rtmutex_setup); #endif /* _KERN_RTMUTEX_H */ diff --git a/kern/sleepq.c b/kern/sleepq.c index 0d04c6e..44ab53b 100644 --- a/kern/sleepq.c +++ b/kern/sleepq.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Richard Braun. + * Copyright (c) 2017-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -431,6 +431,16 @@ sleepq_remove_waiter(struct sleepq *sleepq, struct sleepq_waiter *waiter) list_remove(&waiter->node); } +static struct sleepq_waiter * +sleepq_get_last_waiter(struct sleepq *sleepq) +{ + if (list_empty(&sleepq->waiters)) { + return NULL; + } + + return list_last_entry(&sleepq->waiters, struct sleepq_waiter, node); +} + bool sleepq_empty(const struct sleepq *sleepq) { @@ -441,7 +451,7 @@ static int sleepq_wait_common(struct sleepq *sleepq, const char *wchan, bool timed, uint64_t ticks) { - struct sleepq_waiter waiter; + struct sleepq_waiter waiter, *next; struct thread *thread; int error; @@ -469,6 +479,17 @@ sleepq_wait_common(struct sleepq *sleepq, const char *wchan, sleepq_remove_waiter(sleepq, &waiter); + /* + * Chain wake-ups here to prevent broadacasting from walking a list + * with preemption disabled. Note that this doesn't guard against + * the thundering herd effect for condition variables. + */ + next = sleepq_get_last_waiter(sleepq); + + if (next) { + sleepq_waiter_wakeup(next); + } + return error; } @@ -503,46 +524,18 @@ sleepq_signal(struct sleepq *sleepq) sleepq_waiter_wakeup(waiter); } -static void -sleepq_wakeup_common(struct sleepq *sleepq) -{ - struct sleepq_waiter *waiter; - - assert(!list_empty(&sleepq->waiters)); - - waiter = list_last_entry(&sleepq->waiters, struct sleepq_waiter, node); - sleepq_waiter_wakeup(waiter); -} - void sleepq_broadcast(struct sleepq *sleepq) { struct sleepq_waiter *waiter; - if (sleepq->oldest_waiter == NULL) { - goto out; - } - - list_for_each_entry(&sleepq->waiters, waiter, node) { - sleepq_waiter_set_pending_wakeup(waiter); - - if (waiter == sleepq->oldest_waiter) { - break; - } - } - - sleepq->oldest_waiter = NULL; - -out: - sleepq_wakeup_common(sleepq); -} + waiter = sleepq->oldest_waiter; -void -sleepq_wakeup(struct sleepq *sleepq) -{ - if (list_empty(&sleepq->waiters)) { + if (!waiter) { return; } - sleepq_wakeup_common(sleepq); + sleepq->oldest_waiter = NULL; + sleepq_waiter_set_pending_wakeup(waiter); + sleepq_waiter_wakeup(waiter); } diff --git a/kern/sleepq.h b/kern/sleepq.h index 007e3f2..213925c 100644 --- a/kern/sleepq.h +++ b/kern/sleepq.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Richard Braun. + * Copyright (c) 2017-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -125,25 +125,12 @@ int sleepq_timedwait(struct sleepq *sleepq, const char *wchan, uint64_t ticks); * At least one thread is awaken if any threads are waiting on the sleep * queue. * - * A broadcast differs only by also making all currently waiting threads - * pending for wake-up. As with sleepq_signal, a single thread may be - * awaken. The rationale is to force users to implement "chained waking" - * in order to avoid the thundering herd effect. + * Broadcasting a sleep queue wakes up all waiting threads. */ void sleepq_signal(struct sleepq *sleepq); void sleepq_broadcast(struct sleepq *sleepq); /* - * Wake up a pending thread. - * - * This function may only wake up a thread pending for wake-up after a - * broadcast. It is used to chain wake-ups to avoid the thundering herd - * effect. If there are no threads pending for wake-up, this function - * does nothing. - */ -void sleepq_wakeup(struct sleepq *sleepq); - -/* * This init operation provides : * - sleepq creation * - module fully initialized diff --git a/kern/slist.h b/kern/slist.h index c0dcb08..d708f38 100644 --- a/kern/slist.h +++ b/kern/slist.h @@ -24,8 +24,8 @@ #include <stdbool.h> #include <stddef.h> -#include <kern/llsync.h> #include <kern/macros.h> +#include <kern/rcu.h> #include <kern/slist_types.h> struct slist; @@ -303,46 +303,46 @@ for (entry = slist_first_entry(list, typeof(*entry), member), \ * Return the first node of a list. */ static inline struct slist_node * -slist_llsync_first(const struct slist *list) +slist_rcu_first(const struct slist *list) { - return llsync_load_ptr(list->first); + return rcu_load_ptr(list->first); } /* * Return the node next to the given node. */ static inline struct slist_node * -slist_llsync_next(const struct slist_node *node) +slist_rcu_next(const struct slist_node *node) { - return llsync_load_ptr(node->next); + return rcu_load_ptr(node->next); } /* * Insert a node at the head of a list. */ static inline void -slist_llsync_insert_head(struct slist *list, struct slist_node *node) +slist_rcu_insert_head(struct slist *list, struct slist_node *node) { if (slist_empty(list)) { list->last = node; } node->next = list->first; - llsync_store_ptr(list->first, node); + rcu_store_ptr(list->first, node); } /* * Insert a node at the tail of a list. */ static inline void -slist_llsync_insert_tail(struct slist *list, struct slist_node *node) +slist_rcu_insert_tail(struct slist *list, struct slist_node *node) { node->next = NULL; if (slist_empty(list)) { - llsync_store_ptr(list->first, node); + rcu_store_ptr(list->first, node); } else { - llsync_store_ptr(list->last->next, node); + rcu_store_ptr(list->last->next, node); } list->last = node; @@ -354,11 +354,11 @@ slist_llsync_insert_tail(struct slist *list, struct slist_node *node) * The prev node must be valid. */ static inline void -slist_llsync_insert_after(struct slist *list, struct slist_node *prev, +slist_rcu_insert_after(struct slist *list, struct slist_node *prev, struct slist_node *node) { node->next = prev->next; - llsync_store_ptr(prev->next, node); + rcu_store_ptr(prev->next, node); if (list->last == prev) { list->last = node; @@ -373,20 +373,20 @@ slist_llsync_insert_after(struct slist *list, struct slist_node *prev, * first node is removed. */ static inline void -slist_llsync_remove(struct slist *list, struct slist_node *prev) +slist_rcu_remove(struct slist *list, struct slist_node *prev) { struct slist_node *node; if (slist_end(prev)) { node = list->first; - llsync_store_ptr(list->first, node->next); + rcu_store_ptr(list->first, node->next); if (list->last == node) { list->last = NULL; } } else { node = prev->next; - llsync_store_ptr(prev->next, node->next); + rcu_store_ptr(prev->next, node->next); if (list->last == node) { list->last = prev; @@ -398,28 +398,28 @@ slist_llsync_remove(struct slist *list, struct slist_node *prev) * Macro that evaluates to the address of the structure containing the * given node based on the given type and member. */ -#define slist_llsync_entry(node, type, member) \ - structof(llsync_load_ptr(node), type, member) +#define slist_rcu_entry(node, type, member) \ + structof(rcu_load_ptr(node), type, member) /* * Get the first entry of a list. */ -#define slist_llsync_first_entry(list, type, member) \ +#define slist_rcu_first_entry(list, type, member) \ MACRO_BEGIN \ struct slist_node *___first; \ \ - ___first = slist_llsync_first(list); \ + ___first = slist_rcu_first(list); \ slist_end(___first) ? NULL : slist_entry(___first, type, member); \ MACRO_END /* * Get the entry next to the given entry. */ -#define slist_llsync_next_entry(entry, member) \ +#define slist_rcu_next_entry(entry, member) \ MACRO_BEGIN \ struct slist_node *___next; \ \ - ___next = slist_llsync_next(&entry->member); \ + ___next = slist_rcu_next(&entry->member); \ slist_end(___next) \ ? NULL \ : slist_entry(___next, typeof(*entry), member); \ @@ -428,17 +428,17 @@ MACRO_END /* * Forge a loop to process all nodes of a list. */ -#define slist_llsync_for_each(list, node) \ -for (node = slist_llsync_first(list); \ +#define slist_rcu_for_each(list, node) \ +for (node = slist_rcu_first(list); \ !slist_end(node); \ - node = slist_llsync_next(node)) + node = slist_rcu_next(node)) /* * Forge a loop to process all entries of a list. */ -#define slist_llsync_for_each_entry(list, entry, member) \ -for (entry = slist_llsync_first_entry(list, typeof(*entry), member); \ +#define slist_rcu_for_each_entry(list, entry, member) \ +for (entry = slist_rcu_first_entry(list, typeof(*entry), member); \ entry != NULL; \ - entry = slist_llsync_next_entry(entry, member)) + entry = slist_rcu_next_entry(entry, member)) #endif /* _KERN_SLIST_H */ diff --git a/kern/spinlock.c b/kern/spinlock.c index a591c61..9857b38 100644 --- a/kern/spinlock.c +++ b/kern/spinlock.c @@ -139,6 +139,10 @@ void spinlock_init(struct spinlock *lock) { lock->value = SPINLOCK_UNLOCKED; + +#ifdef SPINLOCK_TRACK_OWNER + lock->owner = NULL; +#endif /* SPINLOCK_TRACK_OWNER */ } static unsigned int @@ -292,6 +296,7 @@ spinlock_lock_slow(struct spinlock *lock) spinlock_store_first_qid(lock, SPINLOCK_QID_NULL); } + spinlock_own(lock); error = spinlock_try_downgrade(lock, qid); if (!error) { diff --git a/kern/spinlock.h b/kern/spinlock.h index dd98cbf..30fc1e5 100644 --- a/kern/spinlock.h +++ b/kern/spinlock.h @@ -36,6 +36,21 @@ struct spinlock; #define spinlock_assert_locked(lock) assert((lock)->value != SPINLOCK_UNLOCKED) +#ifdef SPINLOCK_TRACK_OWNER + +static inline void +spinlock_transfer_owner(struct spinlock *lock, struct thread *owner) +{ + assert(lock->owner == thread_self()); + lock->owner = owner; +} + +#else /* SPINLOCK_TRACK_OWNER */ + +#define spinlock_transfer_owner(lock, owner) + +#endif /* SPINLOCK_TRACK_OWNER */ + /* * Initialize a spin lock. */ diff --git a/kern/spinlock_i.h b/kern/spinlock_i.h index c9dcdd1..a9f5fce 100644 --- a/kern/spinlock_i.h +++ b/kern/spinlock_i.h @@ -26,6 +26,7 @@ #include <kern/error.h> #include <kern/macros.h> #include <kern/spinlock_types.h> +#include <kern/thread.h> #include <machine/cpu.h> /* @@ -36,6 +37,33 @@ #define SPINLOCK_UNLOCKED 0 #define SPINLOCK_LOCKED 1 +#ifdef CONFIG_SPINLOCK_DEBUG +#define SPINLOCK_TRACK_OWNER +#endif + +#ifdef SPINLOCK_TRACK_OWNER + +static inline void +spinlock_own(struct spinlock *lock) +{ + assert(!lock->owner); + lock->owner = thread_self(); +} + +static inline void +spinlock_disown(struct spinlock *lock) +{ + assert(lock->owner == thread_self()); + lock->owner = NULL; +} + +#else /* SPINLOCK_TRACK_OWNER */ + +#define spinlock_own(lock) +#define spinlock_disown(lock) + +#endif /* SPINLOCK_TRACK_OWNER */ + static inline int spinlock_lock_fast(struct spinlock *lock) { @@ -47,6 +75,7 @@ spinlock_lock_fast(struct spinlock *lock) return ERROR_BUSY; } + spinlock_own(lock); return 0; } @@ -55,6 +84,7 @@ spinlock_unlock_fast(struct spinlock *lock) { unsigned int prev; + spinlock_disown(lock); prev = atomic_cas_release(&lock->value, SPINLOCK_LOCKED, SPINLOCK_UNLOCKED); if (unlikely(prev != SPINLOCK_LOCKED)) { diff --git a/kern/spinlock_types.h b/kern/spinlock_types.h index 7dbca37..93f85f7 100644 --- a/kern/spinlock_types.h +++ b/kern/spinlock_types.h @@ -21,8 +21,14 @@ #ifndef _KERN_SPINLOCK_TYPES_H #define _KERN_SPINLOCK_TYPES_H +struct thread; + struct spinlock { unsigned int value; + +#ifdef CONFIG_SPINLOCK_DEBUG + struct thread *owner; +#endif /* CONFIG_SPINLOCK_DEBUG */ }; #endif /* _KERN_SPINLOCK_TYPES_H */ diff --git a/kern/sref.c b/kern/sref.c index 1ad9212..2b20cb4 100644 --- a/kern/sref.c +++ b/kern/sref.c @@ -99,9 +99,9 @@ struct sref_queue { */ struct sref_data { struct spinlock lock; - struct cpumap registered_cpus; + struct cpumap registered_cpus; /* TODO Review usage */ unsigned int nr_registered_cpus; - struct cpumap pending_flushes; + struct cpumap pending_flushes; /* TODO Review usage */ unsigned int nr_pending_flushes; unsigned int current_queue_id; struct sref_queue queues[2]; @@ -143,18 +143,7 @@ struct sref_delta { * prevent a race with the periodic event that drives regular flushes * (normally the periodic timer interrupt). * - * Preemption must be disabled when accessing a delta cache. - * - * The dirty flag means there may be data to process, and is used to wake - * up the manager thread. Marking a cache dirty is done either by regular - * threads increasing or decreasing a delta, or the periodic event handler. - * Clearing the dirty flag is only done by the manager thread. Disabling - * preemption makes sure only one thread is accessing the cache at any time, - * but it doesn't prevent the periodic handler from running. Since the handler - * as well as regular threads set that flag, a race between them is harmless. - * On the other hand, the handler won't access the flag if it is run while - * the manager thread is running. Since the manager thread is already running, - * there is no need to wake it up anyway. + * Interrupts and preemption must be disabled when accessing a delta cache. */ struct sref_cache { struct sref_delta deltas[SREF_MAX_DELTAS]; @@ -387,6 +376,8 @@ sref_counter_schedule_review(struct sref_counter *counter) static void sref_counter_add(struct sref_counter *counter, unsigned long delta) { + assert(!cpu_intr_enabled()); + spinlock_lock(&counter->lock); counter->value += delta; @@ -547,19 +538,19 @@ sref_cache_get(void) } static struct sref_cache * -sref_cache_acquire(void) +sref_cache_acquire(unsigned long *flags) { struct sref_cache *cache; - thread_preempt_disable(); + thread_preempt_disable_intr_save(flags); cache = sref_cache_get(); return cache; } static void -sref_cache_release(void) +sref_cache_release(unsigned long flags) { - thread_preempt_enable(); + thread_preempt_enable_intr_restore(flags); } static bool @@ -640,10 +631,11 @@ static void sref_cache_flush(struct sref_cache *cache, struct sref_queue *queue) { struct sref_delta *delta; + unsigned long flags; unsigned int cpu; for (;;) { - thread_preempt_disable(); + thread_preempt_disable_intr_save(&flags); if (list_empty(&cache->valid_deltas)) { break; @@ -652,9 +644,11 @@ sref_cache_flush(struct sref_cache *cache, struct sref_queue *queue) delta = list_first_entry(&cache->valid_deltas, typeof(*delta), node); sref_cache_remove_delta(delta); - thread_preempt_enable(); + thread_preempt_enable_intr_restore(flags); } + cpu_intr_restore(flags); + cpu = cpu_id(); spinlock_lock(&sref_data.lock); @@ -684,30 +678,15 @@ sref_cache_flush(struct sref_cache *cache, struct sref_queue *queue) } static void -sref_cache_wakeup_manager(struct sref_cache *cache) -{ - unsigned long flags; - - cpu_intr_save(&flags); - thread_wakeup(cache->manager); - cpu_intr_restore(flags); -} - -/* - * Force the manager thread of a cache to run. - */ -static void sref_cache_manage(struct sref_cache *cache) { + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + sref_cache_mark_dirty(cache); - sref_cache_wakeup_manager(cache); + thread_wakeup(cache->manager); } -/* - * Check if a cache is dirty and wake up its manager thread if it is. - * - * Return true if the cache is dirty and requires maintenance. - */ static bool sref_cache_check(struct sref_cache *cache) { @@ -715,7 +694,7 @@ sref_cache_check(struct sref_cache *cache) return false; } - sref_cache_wakeup_manager(cache); + sref_cache_manage(cache); return true; } @@ -734,6 +713,7 @@ sref_review(struct sref_queue *queue) int64_t nr_dirty, nr_revive, nr_true; struct sref_counter *counter; struct work_queue works; + unsigned long flags; bool requeue; int error; @@ -745,7 +725,7 @@ sref_review(struct sref_queue *queue) while (!sref_queue_empty(queue)) { counter = sref_queue_pop(queue); - spinlock_lock(&counter->lock); + spinlock_lock_intr_save(&counter->lock, &flags); assert(sref_counter_is_queued(counter)); sref_counter_clear_queued(counter); @@ -753,7 +733,7 @@ sref_review(struct sref_queue *queue) if (counter->value != 0) { sref_counter_clear_dirty(counter); sref_counter_clear_dying(counter); - spinlock_unlock(&counter->lock); + spinlock_unlock_intr_restore(&counter->lock, flags); } else { if (sref_counter_is_dirty(counter)) { requeue = true; @@ -772,14 +752,14 @@ sref_review(struct sref_queue *queue) if (requeue) { sref_counter_schedule_review(counter); - spinlock_unlock(&counter->lock); + spinlock_unlock_intr_restore(&counter->lock, flags); } else { /* * Keep in mind that the work structure shares memory with * the counter data. Unlocking isn't needed here, since this * counter is now really at 0, but do it for consistency. */ - spinlock_unlock(&counter->lock); + spinlock_unlock_intr_restore(&counter->lock, flags); nr_true++; work_init(&counter->work, sref_noref); work_queue_push(&works, &counter->work); @@ -847,6 +827,8 @@ sref_bootstrap(void) } INIT_OP_DEFINE(sref_bootstrap, + INIT_OP_DEP(cpu_setup, true), + INIT_OP_DEP(spinlock_setup, true), INIT_OP_DEP(syscnt_setup, true)); static void __init @@ -896,10 +878,10 @@ sref_setup(void) INIT_OP_DEFINE(sref_setup, INIT_OP_DEP(cpu_mp_probe, true), + INIT_OP_DEP(cpumap_setup, true), INIT_OP_DEP(log_setup, true), INIT_OP_DEP(panic_setup, true), INIT_OP_DEP(sref_bootstrap, true), - INIT_OP_DEP(syscnt_setup, true), INIT_OP_DEP(thread_setup, true)); void @@ -937,6 +919,7 @@ int sref_unregister(void) { struct sref_cache *cache; + unsigned long flags; unsigned int cpu; bool dirty; int error; @@ -944,22 +927,17 @@ sref_unregister(void) assert(!thread_preempt_enabled()); cache = sref_cache_get(); - assert(sref_cache_is_registered(cache)); - /* - * Check the dirty flag after clearing the registered flag. The - * periodic event handler won't set the dirty flag if the processor - * is unregistered. It is then safe to test if that flag is set. - * Everything involved is processor-local, therefore a simple compiler - * barrier is enough to enforce ordering. - */ + cpu_intr_save(&flags); + + assert(sref_cache_is_registered(cache)); sref_cache_clear_registered(cache); - barrier(); dirty = sref_cache_check(cache); if (dirty) { sref_cache_mark_registered(cache); - return ERROR_BUSY; + error = ERROR_BUSY; + goto out; } cpu = cpu_id(); @@ -991,6 +969,9 @@ sref_unregister(void) spinlock_unlock(&sref_data.lock); +out: + cpu_intr_restore(flags); + return error; } @@ -1012,13 +993,16 @@ sref_report_periodic_event(void) void sref_counter_init(struct sref_counter *counter, + unsigned long init_value, struct sref_weakref *weakref, sref_noref_fn_t noref_fn) { + assert(init_value != 0); + counter->noref_fn = noref_fn; spinlock_init(&counter->lock); counter->flags = 0; - counter->value = 1; + counter->value = init_value; counter->weakref = weakref; if (weakref) { @@ -1040,10 +1024,11 @@ void sref_counter_inc(struct sref_counter *counter) { struct sref_cache *cache; + unsigned long flags; - cache = sref_cache_acquire(); + cache = sref_cache_acquire(&flags); sref_counter_inc_common(counter, cache); - sref_cache_release(); + sref_cache_release(flags); } void @@ -1051,12 +1036,13 @@ sref_counter_dec(struct sref_counter *counter) { struct sref_cache *cache; struct sref_delta *delta; + unsigned long flags; - cache = sref_cache_acquire(); + cache = sref_cache_acquire(&flags); sref_cache_mark_dirty(cache); delta = sref_cache_get_delta(cache, counter); sref_delta_dec(delta); - sref_cache_release(); + sref_cache_release(flags); } struct sref_counter * @@ -1064,8 +1050,9 @@ sref_weakref_get(struct sref_weakref *weakref) { struct sref_counter *counter; struct sref_cache *cache; + unsigned long flags; - cache = sref_cache_acquire(); + cache = sref_cache_acquire(&flags); counter = sref_weakref_tryget(weakref); @@ -1073,7 +1060,7 @@ sref_weakref_get(struct sref_weakref *weakref) sref_counter_inc_common(counter, cache); } - sref_cache_release(); + sref_cache_release(flags); return counter; } diff --git a/kern/sref.h b/kern/sref.h index 768f1ee..fb62d83 100644 --- a/kern/sref.h +++ b/kern/sref.h @@ -31,6 +31,8 @@ #ifndef _KERN_SREF_H #define _KERN_SREF_H +#include <kern/init.h> + /* * Scalable reference counter. */ @@ -77,17 +79,20 @@ void sref_report_periodic_event(void); /* * Initialize a scalable reference counter. * - * The counter is set to 1. The no-reference function is called (from thread - * context) when it is certain that the true number of references is 0. + * The no-reference function is called (from thread context) when it is + * certain that the true number of references is 0. */ void sref_counter_init(struct sref_counter *counter, + unsigned long init_value, struct sref_weakref *weakref, sref_noref_fn_t noref_fn); /* * Counter operations. * - * These functions may safely be called with preemption disabled. + * These functions may safely be called in interrupt context. + * + * These functions imply a compiler barrier. */ void sref_counter_inc(struct sref_counter *counter); void sref_counter_dec(struct sref_counter *counter); @@ -98,8 +103,14 @@ void sref_counter_dec(struct sref_counter *counter); * If successful, increment the reference counter before returning it. * Otherwise return NULL. * - * This function may safely be called with preemption disabled. + * This function may safely be called in interrupt context. */ struct sref_counter * sref_weakref_get(struct sref_weakref *weakref); +/* + * This init operation provides : + * - sref counter and weakref initialization and usage + */ +INIT_OP_DECLARE(sref_bootstrap); + #endif /* _KERN_SREF_H */ diff --git a/kern/sref_i.h b/kern/sref_i.h index 27f962c..65d0c1b 100644 --- a/kern/sref_i.h +++ b/kern/sref_i.h @@ -51,17 +51,21 @@ struct sref_weakref { * It's tempting to merge the flags into the node member, but since they're * not protected by the same lock, store them separately. * - * TODO Locking keys. + * Locking keys : + * (c) sref_counter + * (g) sref_data + * + * Interrupts must be disabled when accessing a global counter. */ struct sref_counter { sref_noref_fn_t noref_fn; union { struct { - struct slist_node node; + struct slist_node node; /* (g) */ struct spinlock lock; - int flags; - unsigned long value; + int flags; /* (c) */ + unsigned long value; /* (c) */ struct sref_weakref *weakref; }; diff --git a/kern/syscnt.c b/kern/syscnt.c index cd13a39..e0b213d 100644 --- a/kern/syscnt.c +++ b/kern/syscnt.c @@ -76,7 +76,8 @@ syscnt_setup(void) * modules may use system counters for debugging. */ INIT_OP_DEFINE(syscnt_setup, - INIT_OP_DEP(thread_setup_booter, true)); + INIT_OP_DEP(mutex_bootstrap, true), + INIT_OP_DEP(spinlock_setup, true)); void __init syscnt_register(struct syscnt *syscnt, const char *name) diff --git a/kern/syscnt.h b/kern/syscnt.h index 816aa4b..fb00b75 100644 --- a/kern/syscnt.h +++ b/kern/syscnt.h @@ -53,6 +53,12 @@ void syscnt_register(struct syscnt *syscnt, const char *name); #ifdef ATOMIC_HAVE_64B_OPS static inline void +syscnt_set(struct syscnt *syscnt, uint64_t value) +{ + atomic_store(&syscnt->value, value, ATOMIC_RELAXED); +} + +static inline void syscnt_add(struct syscnt *syscnt, int64_t delta) { atomic_add(&syscnt->value, delta, ATOMIC_RELAXED); @@ -67,6 +73,16 @@ syscnt_read(const struct syscnt *syscnt) #else /* ATOMIC_HAVE_64B_OPS */ static inline void +syscnt_set(struct syscnt *syscnt, uint64_t value) +{ + unsigned long flags; + + spinlock_lock_intr_save(&syscnt->lock, &flags); + syscnt->value = value; + spinlock_unlock_intr_restore(&syscnt->lock, flags); +} + +static inline void syscnt_add(struct syscnt *syscnt, int64_t delta) { unsigned long flags; @@ -114,6 +130,7 @@ void syscnt_info(const char *prefix); /* * This init operation provides : * - registration of system counters + * - module fully initialized */ INIT_OP_DECLARE(syscnt_setup); diff --git a/kern/thread.c b/kern/thread.c index d8f0de1..9fe6434 100644 --- a/kern/thread.c +++ b/kern/thread.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2017 Richard Braun. + * Copyright (c) 2012-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -92,17 +92,16 @@ #include <kern/atomic.h> #include <kern/clock.h> -#include <kern/condition.h> #include <kern/cpumap.h> #include <kern/error.h> #include <kern/init.h> #include <kern/kmem.h> #include <kern/list.h> -#include <kern/llsync.h> #include <kern/macros.h> #include <kern/panic.h> #include <kern/percpu.h> #include <kern/perfmon.h> +#include <kern/rcu.h> #include <kern/shell.h> #include <kern/sleepq.h> #include <kern/spinlock.h> @@ -637,8 +636,6 @@ thread_runq_schedule(struct thread_runq *runq) assert(!cpu_intr_enabled()); spinlock_assert_locked(&runq->lock); - llsync_report_context_switch(); - thread_clear_flag(prev, THREAD_YIELD); thread_runq_put_prev(runq, prev); @@ -657,6 +654,9 @@ thread_runq_schedule(struct thread_runq *runq) if (likely(prev != next)) { thread_runq_schedule_unload(prev); + rcu_report_context_switch(thread_rcu_reader(prev)); + spinlock_transfer_owner(&runq->lock, next); + /* * That's where the true context switch occurs. The next thread must * unlock the run queue and reenable preemption. Note that unlocking @@ -669,15 +669,18 @@ thread_runq_schedule(struct thread_runq *runq) * The thread is dispatched on a processor once again. * * Keep in mind the system state may have changed a lot since this - * function was called. In particular, the next thread may have been - * destroyed, and must not be referenced any more. + * function was called. In particular : + * - The next thread may have been destroyed, and must not be + * referenced any more. + * - The current thread may have been migrated to another processor. */ barrier(); - thread_runq_schedule_load(prev); - /* The thread might have been moved to another processor */ + next = NULL; runq = thread_runq_local(); + } else { + next = NULL; } assert(prev->preempt_level == THREAD_SUSPEND_PREEMPT_LEVEL); @@ -1713,6 +1716,7 @@ thread_init_booter(unsigned int cpu) booter->flags = 0; booter->intr_level = 0; booter->preempt_level = 1; + rcu_reader_init(&booter->rcu_reader); cpumap_fill(&booter->cpumap); thread_set_user_sched_policy(booter, THREAD_SCHED_POLICY_IDLE); thread_set_user_sched_class(booter, THREAD_SCHED_CLASS_IDLE); @@ -1836,12 +1840,11 @@ thread_init(struct thread *thread, void *stack, } turnstile_td_init(&thread->turnstile_td); - thread->last_cond = NULL; thread->propagate_priority = false; thread->preempt_level = THREAD_SUSPEND_PREEMPT_LEVEL; thread->pin_level = 0; thread->intr_level = 0; - thread->llsync_level = 0; + rcu_reader_init(&thread->rcu_reader); cpumap_copy(&thread->cpumap, cpumap); thread_set_user_sched_policy(thread, attr->policy); thread_set_user_sched_class(thread, thread_policy_to_class(attr->policy)); @@ -2137,7 +2140,6 @@ static void thread_idle(void *arg) { struct thread *self; - int error; (void)arg; @@ -2145,14 +2147,6 @@ thread_idle(void *arg) for (;;) { thread_preempt_disable(); - error = sref_unregister(); - - if (error) { - assert(error == ERROR_BUSY); - goto error_sref; - } - - llsync_unregister(); for (;;) { cpu_intr_disable(); @@ -2165,10 +2159,6 @@ thread_idle(void *arg) cpu_idle(); } - llsync_register(); - sref_register(); - -error_sref: thread_preempt_enable(); } } @@ -2340,23 +2330,33 @@ thread_setup(void) return 0; } +#ifdef CONFIG_THREAD_STACK_GUARD +#define THREAD_STACK_GUARD_INIT_OP_DEPS \ + INIT_OP_DEP(vm_kmem_setup, true), \ + INIT_OP_DEP(vm_map_setup, true), \ + INIT_OP_DEP(vm_page_setup, true), +#else /* CONFIG_THREAD_STACK_GUARD */ +#define THREAD_STACK_GUARD_INIT_OP_DEPS +#endif /* CONFIG_THREAD_STACK_GUARD */ + +#ifdef CONFIG_PERFMON +#define THREAD_PERFMON_INIT_OP_DEPS \ + INIT_OP_DEP(perfmon_bootstrap, true), +#else /* CONFIG_PERFMON */ +#define THREAD_PERFMON_INIT_OP_DEPS +#endif /* CONFIG_PERFMON */ + INIT_OP_DEFINE(thread_setup, INIT_OP_DEP(cpumap_setup, true), INIT_OP_DEP(kmem_setup, true), INIT_OP_DEP(pmap_setup, true), -#ifdef CONFIG_PERFMON - INIT_OP_DEP(perfmon_bootstrap, true), -#endif INIT_OP_DEP(sleepq_setup, true), INIT_OP_DEP(task_setup, true), INIT_OP_DEP(thread_bootstrap, true), INIT_OP_DEP(turnstile_setup, true), -#ifdef CONFIG_THREAD_STACK_GUARD - INIT_OP_DEP(vm_kmem_setup, true), - INIT_OP_DEP(vm_map_setup, true), - INIT_OP_DEP(vm_page_setup, true), -#endif - ); + THREAD_PERFMON_INIT_OP_DEPS + THREAD_STACK_GUARD_INIT_OP_DEPS +); void __init thread_ap_setup(void) @@ -2654,11 +2654,11 @@ thread_run_scheduler(void) assert(thread == runq->current); assert(thread->preempt_level == (THREAD_SUSPEND_PREEMPT_LEVEL - 1)); - llsync_register(); sref_register(); spinlock_lock(&runq->lock); thread = thread_runq_get_next(thread_runq_local()); + spinlock_transfer_owner(&runq->lock, thread); tcb_load(&thread->tcb); } diff --git a/kern/thread.h b/kern/thread.h index 8e1d85e..a6faa95 100644 --- a/kern/thread.h +++ b/kern/thread.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2017 Richard Braun. + * Copyright (c) 2012-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -41,7 +41,6 @@ #include <kern/atomic.h> #include <kern/init.h> -#include <kern/condition.h> #include <kern/cpumap.h> #include <kern/kernel.h> #include <kern/macros.h> @@ -229,6 +228,8 @@ int thread_timedsleep(struct spinlock *interlock, const void *wchan_addr, * * If the target thread is NULL, the calling thread, or already in the * running state, no action is performed and ERROR_INVAL is returned. + * + * TODO Describe memory ordering with regard to thread_sleep(). */ int thread_wakeup(struct thread *thread); @@ -250,6 +251,8 @@ noreturn void thread_run_scheduler(void); * This call does nothing if preemption is disabled, or the scheduler * determines the caller should continue to run (e.g. it's currently the only * runnable thread). + * + * Implies a full memory barrier if a context switch occurred. */ void thread_yield(void); @@ -450,48 +453,6 @@ thread_sleepq_return(struct sleepq *sleepq) } /* - * Condition variable related functions. - */ - -static inline void -thread_set_last_cond(struct condition *last_cond) -{ - struct thread *thread; - - thread = thread_self(); - assert(thread->last_cond == NULL); - thread->last_cond = last_cond; -} - -static inline struct condition * -thread_pull_last_cond(void) -{ - struct condition *last_cond; - struct thread *thread; - - thread = thread_self(); - last_cond = thread->last_cond; - - if (last_cond != NULL) { - thread->last_cond = NULL; - } - - return last_cond; -} - -static inline void -thread_wakeup_last_cond(void) -{ - struct condition *last_cond; - - last_cond = thread_pull_last_cond(); - - if (last_cond != NULL) { - condition_wakeup(last_cond); - } -} - -/* * Turnstile lending functions. */ @@ -691,38 +652,13 @@ thread_intr_leave(void) } /* - * Lockless synchronization read-side critical section level control functions. + * RCU functions. */ -static inline int -thread_llsync_in_read_cs(void) -{ - struct thread *thread; - - thread = thread_self(); - return thread->llsync_level != 0; -} - -static inline void -thread_llsync_read_inc(void) -{ - struct thread *thread; - - thread = thread_self(); - thread->llsync_level++; - assert(thread->llsync_level != 0); - barrier(); -} - -static inline void -thread_llsync_read_dec(void) +static inline struct rcu_reader * +thread_rcu_reader(struct thread *thread) { - struct thread *thread; - - barrier(); - thread = thread_self(); - assert(thread->llsync_level != 0); - thread->llsync_level--; + return &thread->rcu_reader; } /* diff --git a/kern/thread_i.h b/kern/thread_i.h index 97d037b..476d013 100644 --- a/kern/thread_i.h +++ b/kern/thread_i.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Richard Braun. + * Copyright (c) 2017-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -22,9 +22,9 @@ #include <stdbool.h> #include <kern/atomic.h> -#include <kern/condition_types.h> #include <kern/cpumap.h> #include <kern/list_types.h> +#include <kern/rcu_types.h> #include <kern/spinlock_types.h> #include <kern/turnstile_types.h> #include <machine/cpu.h> @@ -125,15 +125,6 @@ struct thread { /* Turnstile available for lending */ struct turnstile *priv_turnstile; /* (-) */ - /* - * When a thread wakes up after waiting for a condition variable, - * it sets this variable so that other waiters are only awaken - * after the associated mutex has actually been released. - * - * This member is thread-local. - */ - struct condition *last_cond; /* (-) */ - struct turnstile_td turnstile_td; /* (t) */ /* True if priority must be propagated when preemption is reenabled */ @@ -148,8 +139,8 @@ struct thread { /* Interrupt level, in thread context if 0 */ unsigned short intr_level; /* (-) */ - /* Read-side critical section level, not in any if 0 */ - unsigned short llsync_level; /* (-) */ + /* RCU per-thread data */ + struct rcu_reader rcu_reader; /* (-) */ /* Processors on which this thread is allowed to run */ struct cpumap cpumap; /* (r) */ diff --git a/kern/timer.c b/kern/timer.c index d6d1d9d..cafe49f 100644 --- a/kern/timer.c +++ b/kern/timer.c @@ -410,9 +410,20 @@ timer_bucket_filter(struct timer_bucket *bucket, uint64_t now, } static int __init +timer_bootstrap(void) +{ + timer_cpu_data_init(cpu_local_ptr(timer_cpu_data), 0); + return 0; +} + +INIT_OP_DEFINE(timer_bootstrap, + INIT_OP_DEP(cpu_setup, true), + INIT_OP_DEP(spinlock_setup, true)); + +static int __init timer_setup(void) { - for (unsigned int cpu = 0; cpu < cpu_count(); cpu++) { + for (unsigned int cpu = 1; cpu < cpu_count(); cpu++) { timer_cpu_data_init(percpu_ptr(timer_cpu_data, cpu), cpu); } @@ -420,10 +431,11 @@ timer_setup(void) } INIT_OP_DEFINE(timer_setup, - INIT_OP_DEP(boot_setup_intr, true), - INIT_OP_DEP(cpu_mp_probe, true)); + INIT_OP_DEP(cpu_mp_probe, true), + INIT_OP_DEP(spinlock_setup, true)); -void timer_init(struct timer *timer, timer_fn_t fn, int flags) +void +timer_init(struct timer *timer, timer_fn_t fn, int flags) { timer->fn = fn; timer->cpu = TIMER_INVALID_CPU; diff --git a/kern/timer.h b/kern/timer.h index ddace45..d47e5a7 100644 --- a/kern/timer.h +++ b/kern/timer.h @@ -99,8 +99,7 @@ void timer_report_periodic_event(void); /* * This init operation provides : * - timer initialization and scheduling - * - module fully initialized */ -INIT_OP_DECLARE(timer_setup); +INIT_OP_DECLARE(timer_bootstrap); #endif /* _KERN_TIMER_H */ diff --git a/kern/work.c b/kern/work.c index c1bdede..d496961 100644 --- a/kern/work.c +++ b/kern/work.c @@ -102,7 +102,6 @@ struct work_pool { }; static int work_thread_create(struct work_pool *pool, unsigned int id); -static void work_thread_destroy(struct work_thread *worker); static struct work_pool work_pool_cpu_main __percpu; static struct work_pool work_pool_cpu_highprio __percpu; @@ -158,7 +157,16 @@ work_pool_compute_max_threads(unsigned int nr_cpus) } static void __init -work_pool_init(struct work_pool *pool, unsigned int cpu, int flags) +work_pool_init(struct work_pool *pool) +{ + spinlock_init(&pool->lock); + work_queue_init(&pool->queue0); + work_queue_init(&pool->queue1); + pool->manager = NULL; +} + +static void __init +work_pool_build(struct work_pool *pool, unsigned int cpu, int flags) { char name[SYSCNT_NAME_SIZE]; const char *suffix; @@ -180,10 +188,6 @@ work_pool_init(struct work_pool *pool, unsigned int cpu, int flags) max_threads = work_pool_compute_max_threads(nr_cpus); - spinlock_init(&pool->lock); - work_queue_init(&pool->queue0); - work_queue_init(&pool->queue1); - pool->manager = NULL; pool->max_threads = max_threads; pool->nr_threads = 0; pool->nr_available_threads = 0; @@ -292,6 +296,13 @@ work_pool_concat_queue(struct work_pool *pool, struct work_queue *queue) } static void +work_thread_destroy(struct work_thread *worker) +{ + thread_join(worker->thread); + kmem_cache_free(&work_thread_cache, worker); +} + +static void work_process(void *arg) { struct work_thread *self, *worker; @@ -464,30 +475,42 @@ error_cpumap: return error; } -static void -work_thread_destroy(struct work_thread *worker) +static int __init +work_bootstrap(void) { - thread_join(worker->thread); - kmem_cache_free(&work_thread_cache, worker); + work_pool_init(cpu_local_ptr(work_pool_cpu_main)); + work_pool_init(cpu_local_ptr(work_pool_cpu_highprio)); + return 0; } +INIT_OP_DEFINE(work_bootstrap, + INIT_OP_DEP(cpu_setup, true), + INIT_OP_DEP(spinlock_setup, true), + INIT_OP_DEP(thread_bootstrap, true)); + static int __init work_setup(void) { - unsigned int i; - kmem_cache_init(&work_thread_cache, "work_thread", sizeof(struct work_thread), 0, NULL, 0); - for (i = 0; i < cpu_count(); i++) { - work_pool_init(percpu_ptr(work_pool_cpu_main, i), i, 0); - work_pool_init(percpu_ptr(work_pool_cpu_highprio, i), i, - WORK_PF_HIGHPRIO); + for (unsigned int i = 1; i < cpu_count(); i++) { + work_pool_init(percpu_ptr(work_pool_cpu_main, i)); + work_pool_init(percpu_ptr(work_pool_cpu_highprio, i)); + } + + work_pool_init(&work_pool_main); + work_pool_init(&work_pool_highprio); + + for (unsigned int i = 0; i < cpu_count(); i++) { + work_pool_build(percpu_ptr(work_pool_cpu_main, i), i, 0); + work_pool_build(percpu_ptr(work_pool_cpu_highprio, i), i, + WORK_PF_HIGHPRIO); } - work_pool_init(&work_pool_main, WORK_INVALID_CPU, WORK_PF_GLOBAL); - work_pool_init(&work_pool_highprio, WORK_INVALID_CPU, - WORK_PF_GLOBAL | WORK_PF_HIGHPRIO); + work_pool_build(&work_pool_main, WORK_INVALID_CPU, WORK_PF_GLOBAL); + work_pool_build(&work_pool_highprio, WORK_INVALID_CPU, + WORK_PF_GLOBAL | WORK_PF_HIGHPRIO); log_info("work: threads per pool (per-cpu/global): %u/%u, spare: %u", percpu_var(work_pool_cpu_main.max_threads, 0), @@ -504,7 +527,8 @@ INIT_OP_DEFINE(work_setup, INIT_OP_DEP(panic_setup, true), INIT_OP_DEP(spinlock_setup, true), INIT_OP_DEP(syscnt_setup, true), - INIT_OP_DEP(thread_setup, true)); + INIT_OP_DEP(thread_setup, true), + INIT_OP_DEP(work_bootstrap, true)); void work_schedule(struct work *work, int flags) diff --git a/kern/work.h b/kern/work.h index 68801c4..ccd3c50 100644 --- a/kern/work.h +++ b/kern/work.h @@ -131,6 +131,9 @@ work_init(struct work *work, work_fn_t fn) /* * Schedule work for deferred processing. * + * After being scheduled, a work queue must be reinitialized before + * it can be reused. + * * This function may be called from interrupt context. */ void work_schedule(struct work *work, int flags); @@ -148,9 +151,8 @@ void work_report_periodic_event(void); /* * This init operation provides : - * - works can be scheduled - * - module fully initialized + * - work / work queue initialization and scheduling */ -INIT_OP_DECLARE(work_setup); +INIT_OP_DECLARE(work_bootstrap); #endif /* _KERN_WORK_H */ diff --git a/kern/xcall.c b/kern/xcall.c index 215c1c7..2096f61 100644 --- a/kern/xcall.c +++ b/kern/xcall.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2017 Richard Braun. + * Copyright (c) 2014-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -32,45 +32,58 @@ #include <machine/cpu.h> struct xcall { - alignas(CPU_L1_SIZE) xcall_fn_t fn; + xcall_fn_t fn; void *arg; }; /* * Per-CPU data. * - * Send calls are sent to remote processors. Their access is synchronized - * by disabling preemption. + * The lock is used to serialize cross-calls from different processors + * to the same processor. It is held during the complete cross-call + * sequence. Inside the critical section, accesses to the receive call + * are used to enforce release-acquire ordering between the sending + * and receiving processors. * - * The received call points to either NULL if there is no call to process, - * or a remote send call otherwise. The lock serializes the complete - * inter-processor operation, i.e. setting the received call pointer, - * communication through an IPI, and waiting for the processor to - * acknowledge execution. By serializing interrupts, it is certain that - * there is a 1:1 mapping between interrupts and cross-calls, allowing - * the handler to process only one cross-call instead of iterating over - * a queue. This way, interrupts with higher priority can be handled - * between multiple cross-calls. + * Locking keys : + * (a) atomic + * (c) cpu_data */ struct xcall_cpu_data { - alignas(CPU_L1_SIZE) struct xcall send_calls[CONFIG_MAX_CPUS]; - - struct syscnt sc_sent; - struct syscnt sc_received; - struct xcall *recv_call; - struct spinlock lock; + alignas(CPU_L1_SIZE) struct spinlock lock; + struct xcall *recv_call; /* (c) */ + struct syscnt sc_sent; /* (a) */ + struct syscnt sc_received; /* (a) */ }; static struct xcall_cpu_data xcall_cpu_data __percpu; -static inline void -xcall_set(struct xcall *call, xcall_fn_t fn, void *arg) +static struct xcall_cpu_data * +xcall_get_local_cpu_data(void) +{ + return cpu_local_ptr(xcall_cpu_data); +} + +static struct xcall_cpu_data * +xcall_get_cpu_data(unsigned int cpu) +{ + return percpu_ptr(xcall_cpu_data, cpu); +} + +static void +xcall_init(struct xcall *call, xcall_fn_t fn, void *arg) { call->fn = fn; call->arg = arg; } static void +xcall_process(struct xcall *call) +{ + call->fn(call->arg); +} + +static void xcall_cpu_data_init(struct xcall_cpu_data *cpu_data, unsigned int cpu) { char name[SYSCNT_NAME_SIZE]; @@ -83,20 +96,6 @@ xcall_cpu_data_init(struct xcall_cpu_data *cpu_data, unsigned int cpu) spinlock_init(&cpu_data->lock); } -static struct xcall_cpu_data * -xcall_cpu_data_get(void) -{ - assert(!thread_preempt_enabled()); - return cpu_local_ptr(xcall_cpu_data); -} - -static struct xcall * -xcall_cpu_data_get_send_call(struct xcall_cpu_data *cpu_data, unsigned int cpu) -{ - assert(cpu < ARRAY_SIZE(cpu_data->send_calls)); - return &cpu_data->send_calls[cpu]; -} - static struct xcall * xcall_cpu_data_get_recv_call(const struct xcall_cpu_data *cpu_data) { @@ -122,47 +121,44 @@ xcall_setup(void) unsigned int i; for (i = 0; i < cpu_count(); i++) { - xcall_cpu_data_init(percpu_ptr(xcall_cpu_data, i), i); + xcall_cpu_data_init(xcall_get_cpu_data(i), i); } return 0; } INIT_OP_DEFINE(xcall_setup, + INIT_OP_DEP(cpu_mp_probe, true), INIT_OP_DEP(thread_bootstrap, true), INIT_OP_DEP(spinlock_setup, true)); void xcall_call(xcall_fn_t fn, void *arg, unsigned int cpu) { - struct xcall_cpu_data *local_data, *remote_data; - struct xcall *call; + struct xcall_cpu_data *cpu_data; + struct xcall call; assert(cpu_intr_enabled()); - assert(fn != NULL); - - remote_data = percpu_ptr(xcall_cpu_data, cpu); - - thread_preempt_disable(); + assert(fn); - local_data = xcall_cpu_data_get(); - call = xcall_cpu_data_get_send_call(local_data, cpu); - xcall_set(call, fn, arg); + xcall_init(&call, fn, arg); + cpu_data = xcall_get_cpu_data(cpu); - spinlock_lock(&remote_data->lock); + spinlock_lock(&cpu_data->lock); - xcall_cpu_data_set_recv_call(remote_data, call); + /* Enforce release ordering on the receive call */ + xcall_cpu_data_set_recv_call(cpu_data, &call); cpu_send_xcall(cpu); - syscnt_inc(&remote_data->sc_sent); - while (xcall_cpu_data_get_recv_call(remote_data) != NULL) { + /* Enforce acquire ordering on the receive call */ + while (xcall_cpu_data_get_recv_call(cpu_data) != NULL) { cpu_pause(); } - spinlock_unlock(&remote_data->lock); + spinlock_unlock(&cpu_data->lock); - thread_preempt_enable(); + syscnt_inc(&cpu_data->sc_sent); } void @@ -173,17 +169,19 @@ xcall_intr(void) assert(thread_check_intr_context()); - cpu_data = xcall_cpu_data_get(); + cpu_data = xcall_get_local_cpu_data(); + /* Enforce acquire ordering on the receive call */ call = xcall_cpu_data_get_recv_call(cpu_data); if (call) { - call->fn(call->arg); + xcall_process(call); } else { log_warning("xcall: spurious interrupt on cpu%u", cpu_id()); } syscnt_inc(&cpu_data->sc_received); + /* Enforce release ordering on the receive call */ xcall_cpu_data_clear_recv_call(cpu_data); } diff --git a/kern/xcall.h b/kern/xcall.h index df36487..4733152 100644 --- a/kern/xcall.h +++ b/kern/xcall.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Richard Braun. + * Copyright (c) 2014-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,11 +19,15 @@ * * This module provides the ability to run functions, called cross-calls, * on specific processors. + * + * TODO Asynchronous cross-calls. */ #ifndef _KERN_XCALL_H #define _KERN_XCALL_H +#include <kern/init.h> + /* * Type for cross-call functions. */ @@ -33,19 +37,28 @@ typedef void (*xcall_fn_t)(void *arg); * Run the given cross-call function on a specific processor. * * The operation is completely synchronous, returning only when the function - * has finished running on the target processor, with the side effects of - * the function visible. + * has finished running on the target processor. Release-acquire ordering is + * enforced both before and after the function runs on the target processor, + * so that side effects produced by the caller are visible to the function, + * and vice-versa on return. * - * The function is run in interrupt context. Interrupts must be enabled + * The callback function runs in interrupt context. Interrupts must be enabled * when calling this function. */ void xcall_call(xcall_fn_t fn, void *arg, unsigned int cpu); /* - * Report a cross-call interrupt from a remote processor. + * Handle a cross-call interrupt from a remote processor. * * Called from interrupt context. */ void xcall_intr(void); +/* + * This init operation provides : + * - cross-calls are usable + * - module fully initialized + */ +INIT_OP_DECLARE(xcall_setup); + #endif /* _KERN_XCALL_H */ diff --git a/test/Kconfig b/test/Kconfig index 6f3dbb4..1355c1a 100644 --- a/test/Kconfig +++ b/test/Kconfig @@ -9,9 +9,6 @@ if TEST_MODULE choice prompt "Select test module" -config TEST_MODULE_LLSYNC_DEFER - bool "llsync_defer" - config TEST_MODULE_MUTEX bool "mutex" select MUTEX_DEBUG @@ -19,9 +16,6 @@ config TEST_MODULE_MUTEX config TEST_MODULE_MUTEX_PI bool "mutex_pi" -config TEST_MODULE_PMAP_UPDATE_MP - bool "pmap_update_mp" - config TEST_MODULE_PERFMON_CPU bool "perfmon_cpu" select PERFMON @@ -34,6 +28,12 @@ config TEST_MODULE_PERFMON_TORTURE bool "perfmon_torture" select PERFMON +config TEST_MODULE_PMAP_UPDATE_MP + bool "pmap_update_mp" + +config TEST_MODULE_RCU_DEFER + bool "rcu_defer" + config TEST_MODULE_SREF_DIRTY_ZEROES bool "sref_dirty_zeroes" diff --git a/test/Makefile b/test/Makefile index 0a29e3a..e6526be 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,12 +1,12 @@ -x15_SOURCES-$(CONFIG_TEST_MODULE_LLSYNC_DEFER) += test/test_llsync_defer.c x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX) += test/test_mutex.c x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX_PI) += test/test_mutex_pi.c +x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_CPU) += test/test_perfmon_cpu.c +x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_THREAD) += test/test_perfmon_thread.c +x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_TORTURE) += test/test_perfmon_torture.c x15_SOURCES-$(CONFIG_TEST_MODULE_PMAP_UPDATE_MP) += test/test_pmap_update_mp.c +x15_SOURCES-$(CONFIG_TEST_MODULE_RCU_DEFER) += test/test_rcu_defer.c x15_SOURCES-$(CONFIG_TEST_MODULE_SREF_DIRTY_ZEROES) += test/test_sref_dirty_zeroes.c x15_SOURCES-$(CONFIG_TEST_MODULE_SREF_NOREF) += test/test_sref_noref.c x15_SOURCES-$(CONFIG_TEST_MODULE_SREF_WEAKREF) += test/test_sref_weakref.c x15_SOURCES-$(CONFIG_TEST_MODULE_VM_PAGE_FILL) += test/test_vm_page_fill.c x15_SOURCES-$(CONFIG_TEST_MODULE_XCALL) += test/test_xcall.c -x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_CPU) += test/test_perfmon_cpu.c -x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_THREAD) += test/test_perfmon_thread.c -x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_TORTURE) += test/test_perfmon_torture.c diff --git a/test/test_mutex.c b/test/test_mutex.c index 029e2fe..684ec1b 100644 --- a/test/test_mutex.c +++ b/test/test_mutex.c @@ -33,6 +33,7 @@ #include <kern/atomic.h> #include <kern/clock.h> #include <kern/error.h> +#include <kern/init.h> #include <kern/kmem.h> #include <kern/log.h> #include <kern/mutex.h> @@ -161,7 +162,7 @@ test_report_syscnt(struct timer *timer) timer_schedule(timer, time); } -void +void __init test_setup(void) { uint64_t time; diff --git a/test/test_mutex_pi.c b/test/test_mutex_pi.c index ef47048..4332a91 100644 --- a/test/test_mutex_pi.c +++ b/test/test_mutex_pi.c @@ -80,6 +80,7 @@ #include <kern/cpumap.h> #include <kern/error.h> +#include <kern/init.h> #include <kern/mutex.h> #include <kern/panic.h> #include <kern/syscnt.h> @@ -416,7 +417,7 @@ test_e(void *arg) } } -void +void __init test_setup(void) { struct thread_attr attr; diff --git a/test/test_pmap_update_mp.c b/test/test_pmap_update_mp.c index c058323..7b6431a 100644 --- a/test/test_pmap_update_mp.c +++ b/test/test_pmap_update_mp.c @@ -31,6 +31,7 @@ #include <kern/condition.h> #include <kern/cpumap.h> #include <kern/error.h> +#include <kern/init.h> #include <kern/mutex.h> #include <kern/panic.h> #include <kern/thread.h> @@ -94,7 +95,7 @@ test_run2(void *arg) printf("done\n"); } -void +void __init test_setup(void) { struct thread_attr attr; diff --git a/test/test_llsync_defer.c b/test/test_rcu_defer.c index c87aff7..820d050 100644 --- a/test/test_llsync_defer.c +++ b/test/test_rcu_defer.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Richard Braun. + * Copyright (c) 2014-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,12 +16,12 @@ * * * This test module is a stress test, expected to never terminate, of the - * work deferring functionality of the llsync module. It creates three - * threads, a producer, a consumer, and a "peeker". The producer allocates + * work deferring functionality of the rcu module. It creates three + * threads, a producer, a consumer, and a reader. The producer allocates * a page and writes it. It then transfers the page to the consumer, using - * the llsync interface to update the global page pointer. Once at the - * consumer, the llsync interface is used to defer the release of the page. - * Concurrently, the peeker accesses the page and checks its content when + * the rcu interface to update the global page pointer. Once at the + * consumer, the rcu interface is used to defer the release of the page. + * Concurrently, the reader accesses the page and checks its content when * available. These accesses are performed inside a read-side critical * section and should therefore never fail. * @@ -35,10 +35,10 @@ #include <kern/condition.h> #include <kern/error.h> #include <kern/kmem.h> -#include <kern/llsync.h> #include <kern/macros.h> #include <kern/mutex.h> #include <kern/panic.h> +#include <kern/rcu.h> #include <kern/thread.h> #include <kern/work.h> #include <machine/page.h> @@ -64,11 +64,11 @@ static void test_alloc(void *arg) { struct test_pdsc *pdsc; - unsigned long i; + unsigned long nr_loops; (void)arg; - i = 0; + nr_loops = 0; mutex_lock(&test_lock); @@ -87,14 +87,14 @@ test_alloc(void *arg) } } - llsync_store_ptr(test_pdsc, pdsc); + rcu_store_ptr(test_pdsc, pdsc); condition_signal(&test_condition); - if ((i % TEST_LOOPS_PER_PRINT) == 0) { + if ((nr_loops % TEST_LOOPS_PER_PRINT) == 0) { printf("alloc "); } - i++; + nr_loops++; } } @@ -116,11 +116,11 @@ static void test_free(void *arg) { struct test_pdsc *pdsc; - unsigned long i; + unsigned long nr_loops; (void)arg; - i = 0; + nr_loops = 0; mutex_lock(&test_lock); @@ -130,20 +130,20 @@ test_free(void *arg) } pdsc = test_pdsc; - llsync_store_ptr(test_pdsc, NULL); + rcu_store_ptr(test_pdsc, NULL); if (pdsc != NULL) { work_init(&pdsc->work, test_deferred_free); - llsync_defer(&pdsc->work); + rcu_defer(&pdsc->work); } condition_signal(&test_condition); - if ((i % TEST_LOOPS_PER_PRINT) == 0) { + if ((nr_loops % TEST_LOOPS_PER_PRINT) == 0) { printf("free "); } - i++; + nr_loops++; } } @@ -152,36 +152,36 @@ test_read(void *arg) { const struct test_pdsc *pdsc; const unsigned char *s; - unsigned long i, j; + unsigned long nr_loops; (void)arg; - i = 0; + nr_loops = 0; for (;;) { - llsync_read_enter(); + rcu_read_enter(); - pdsc = llsync_load_ptr(test_pdsc); + pdsc = rcu_load_ptr(test_pdsc); if (pdsc != NULL) { s = (const unsigned char *)pdsc->addr; if (s != NULL) { - for (j = 0; j < PAGE_SIZE; j++) { - if (s[j] != TEST_VALIDATION_BYTE) { + for (unsigned int i = 0; i < PAGE_SIZE; i++) { + if (s[i] != TEST_VALIDATION_BYTE) { panic("invalid content"); } } - if ((i % TEST_LOOPS_PER_PRINT) == 0) { + if ((nr_loops % TEST_LOOPS_PER_PRINT) == 0) { printf("read "); } - i++; + nr_loops++; } } - llsync_read_exit(); + rcu_read_leave(); } } diff --git a/test/test_sref_dirty_zeroes.c b/test/test_sref_dirty_zeroes.c index be4c491..5318335 100644 --- a/test/test_sref_dirty_zeroes.c +++ b/test/test_sref_dirty_zeroes.c @@ -31,6 +31,7 @@ #include <kern/condition.h> #include <kern/error.h> +#include <kern/init.h> #include <kern/kmem.h> #include <kern/macros.h> #include <kern/mutex.h> @@ -101,7 +102,7 @@ test_noref(struct sref_counter *counter) panic("0 references, page released\n"); } -void +void __init test_setup(void) { struct thread_attr attr; @@ -111,7 +112,7 @@ test_setup(void) condition_init(&test_condition); mutex_init(&test_lock); - sref_counter_init(&test_counter, NULL, test_noref); + sref_counter_init(&test_counter, 1, NULL, test_noref); test_transient_ref = 0; thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_inc"); diff --git a/test/test_sref_noref.c b/test/test_sref_noref.c index f556d32..40f1d8f 100644 --- a/test/test_sref_noref.c +++ b/test/test_sref_noref.c @@ -37,6 +37,7 @@ #include <kern/condition.h> #include <kern/error.h> +#include <kern/init.h> #include <kern/kmem.h> #include <kern/macros.h> #include <kern/mutex.h> @@ -142,7 +143,7 @@ test_run(void *arg) panic("vm_kmem_alloc: %s", error_str(ERROR_NOMEM)); } - sref_counter_init(&obj->ref_counter, NULL, test_obj_noref); + sref_counter_init(&obj->ref_counter, 1, NULL, test_obj_noref); printf("page allocated, 1 reference, publishing\n"); @@ -168,7 +169,7 @@ test_run(void *arg) kmem_free(threads, sizeof(*threads) * nr_threads); } -void +void __init test_setup(void) { struct thread_attr attr; diff --git a/test/test_sref_weakref.c b/test/test_sref_weakref.c index 85d4cbe..d1d4407 100644 --- a/test/test_sref_weakref.c +++ b/test/test_sref_weakref.c @@ -36,6 +36,7 @@ #include <stdio.h> #include <kern/error.h> +#include <kern/init.h> #include <kern/macros.h> #include <kern/sref.h> #include <kern/syscnt.h> @@ -67,7 +68,7 @@ test_run(void *arg) continue; } - sref_counter_init(counter, &test_weakref, test_noref); + sref_counter_init(counter, 1, &test_weakref, test_noref); sref_counter_dec(counter); for (j = 0; j < 0x20000000; j++); @@ -101,7 +102,7 @@ test_ref(void *arg) } } -void +void __init test_setup(void) { struct thread_attr attr; diff --git a/test/test_vm_page_fill.c b/test/test_vm_page_fill.c index 0b4278b..2f6e644 100644 --- a/test/test_vm_page_fill.c +++ b/test/test_vm_page_fill.c @@ -28,6 +28,7 @@ #include <kern/cpumap.h> #include <kern/error.h> +#include <kern/init.h> #include <kern/list.h> #include <kern/thread.h> #include <machine/page.h> @@ -134,7 +135,7 @@ test_run(void *arg) } } -void +void __init test_setup(void) { struct thread_attr attr; diff --git a/test/test_xcall.c b/test/test_xcall.c index cd33594..c2fc54b 100644 --- a/test/test_xcall.c +++ b/test/test_xcall.c @@ -25,43 +25,50 @@ #include <stddef.h> #include <stdio.h> -#include <kern/error.h> #include <kern/cpumap.h> +#include <kern/error.h> +#include <kern/init.h> #include <kern/log.h> #include <kern/panic.h> #include <kern/thread.h> #include <kern/xcall.h> #include <test/test.h> -static bool test_done; +struct test_data { + unsigned int cpu; + bool done; +}; static void test_fn(void *arg) { - uintptr_t cpu; + struct test_data *data; assert(thread_interrupted()); - cpu = (uintptr_t)arg; + data = arg; - if (cpu != cpu_id()) { - panic("invalid cpu"); + if (data->cpu != cpu_id()) { + panic("test: invalid cpu"); } log_info("function called, running on cpu%u\n", cpu_id()); - test_done = true; + data->done = true; } static void test_once(unsigned int cpu) { - test_done = false; + struct test_data data; + + data.cpu = cpu; + data.done = false; log_info("cross-call: cpu%u -> cpu%u:\n", cpu_id(), cpu); - xcall_call(test_fn, (void *)(uintptr_t)cpu, cpu); + xcall_call(test_fn, &data, cpu); - if (!test_done) { - panic("test_done false"); + if (!data.done) { + panic("test: xcall failed"); } } @@ -70,7 +77,7 @@ test_run_cpu(void *arg) { (void)arg; - for (unsigned int i = (cpu_count() - 1); i < cpu_count(); i++) { + for (unsigned int i = (cpu_count() - 1); i < cpu_count(); i--) { test_once(i); } } @@ -116,7 +123,7 @@ test_run(void *arg) log_info("done\n"); } -void +void __init test_setup(void) { struct thread_attr attr; diff --git a/tools/build_configs.py b/tools/build_configs.py index 42124a9..04f278c 100755 --- a/tools/build_configs.py +++ b/tools/build_configs.py @@ -37,26 +37,33 @@ def gen_configs_values_str(options_dict): # to all generated strings. def gen_cc_options_list(options_dict): return map(lambda x: '%s -Werror' % x, gen_configs_values_str(options_dict)) +def gen_exclusive_boolean_filter(args): + enabled_option, options_list = args + filter = dict() -# Check whether a filter prototype is valid. -# -# A filter prototype is a list of (name, value) pairs used to build a filter. -# Keep in mind that a valid filter must match invalid configurations. As a -# result, a filter prototype is valid if and only if -# - all options are included in the given list, and -# - the number of enabled options is not 1 -def check_exclusive_boolean_filter(prototype): - return (len(set(map(lambda x: x[0], prototype))) == len(prototype) - and len(filter(lambda x: x[1][1] == 'y', prototype)) != 1) - -# Generate a list of filters on a list of boolean options. + for option in options_list: + if option == enabled_option: + value = [True, 'y'] + else: + value = [True, 'n'] + + filter.update({option : value}) + + return filter + +# Generate a list of passing filters on a list of boolean options. # -# The resulting filters match configurations that don't have one and only -# one of the given options enabled. -def gen_exclusive_boolean_filters_list(options_list): - product = itertools.product(options_list, [[True, 'y'], [True, 'n']]) - prototypes = list(itertools.combinations(product, len(options_list))) - return map(dict, filter(check_exclusive_boolean_filter, prototypes)) +# The resulting filters match configurations that have only one of the given +# options enabled, unless all_disabled is true, in which case an additional +# filter is generated to match configurations where none of the options +# are enabled. +def gen_exclusive_boolean_filters_list(options_list, all_disabled=False): + option_and_options = map(lambda x: (x, options_list), options_list) + + if all_disabled: + option_and_options += [(None, options_list)] + + return map(gen_exclusive_boolean_filter, option_and_options) # Dictionary of compiler options. # @@ -96,18 +103,48 @@ large_options_dict.update({ 'CONFIG_THREAD_STACK_GUARD' : ['y', 'n'], }) +test_list = [ + 'CONFIG_TEST_MODULE_MUTEX', + 'CONFIG_TEST_MODULE_MUTEX_PI', + 'CONFIG_TEST_MODULE_PMAP_UPDATE_MP', + 'CONFIG_TEST_MODULE_RCU_DEFER', + 'CONFIG_TEST_MODULE_SREF_DIRTY_ZEROES', + 'CONFIG_TEST_MODULE_SREF_NOREF', + 'CONFIG_TEST_MODULE_SREF_WEAKREF', + 'CONFIG_TEST_MODULE_VM_PAGE_FILL', + 'CONFIG_TEST_MODULE_XCALL', +] + +test_options_dict = dict(small_options_dict) + +for test in test_list: + test_options_dict.update({test : ['y', 'n']}) + all_options_sets = { 'small' : small_options_dict, 'large' : large_options_dict, + 'test' : test_options_dict, } -# List of filters used to determine valid configurations. +# Filters. # -# Each entry is a dictionary of options. The key matches an option name -# whereas the value is a [match_flag, string/regular expression] list. -# The match flag is true if the matching expression must match, false -# otherwise. -all_filters_list = [ +# A filter is a list of dictionaries of options. For each dictionary, the +# key matches an option name whereas the value is a +# [match_flag, string/regular expression] list. The match flag is true if +# the matching expression must match, false otherwise. +# +# Passing filters are used to allow configurations that match the filters, +# whereras blocking filters allow configurations that do not match. + +passing_filters_list = gen_exclusive_boolean_filters_list([ + 'CONFIG_MUTEX_ADAPTIVE', + 'CONFIG_MUTEX_PI', + 'CONFIG_MUTEX_PLAIN', +]) +passing_filters_list += gen_exclusive_boolean_filters_list(test_list, + all_disabled=True) + +blocking_filters_list = [ # XXX Clang currently cannot build the kernel with LTO. { 'CONFIG_CC_EXE' : [True, 'clang'], @@ -126,11 +163,6 @@ all_filters_list = [ 'CONFIG_X86_PAE' : [True, 'y'], }, ] -all_filters_list += gen_exclusive_boolean_filters_list([ - 'CONFIG_MUTEX_ADAPTIVE', - 'CONFIG_MUTEX_PI', - 'CONFIG_MUTEX_PLAIN' -]) def gen_config_line(config_entry): name, value = config_entry @@ -183,34 +215,73 @@ def test_config(args): return [retval, buildtree] -# Return true if a filter doesn't completely match a configuration. +# Return true if a filter completely matches a configuration. def check_filter(config_dict, filter_dict): for name, value in filter_dict.iteritems(): if not name in config_dict: - return True + return False if isinstance(value[1], str): if value[0] != (config_dict[name] == value[1]): - return True + return False else: if value[0] != bool(value[1].match(config_dict[name])): - return True + return False + + return True + +def check_filter_relevant(config_dict, filter_dict): + for name, value in filter_dict.iteritems(): + if name in config_dict: + return True + + return False + +def check_filters_list_relevant(config_dict, filters_list): + for filter_dict in filters_list: + if check_filter_relevant(config_dict, filter_dict): + return True + + return False + +# Return true if a configuration doesn't pass any given filter. +# +# If the given filters list is irrelevant, i.e. it applies to none of +# the options in the given configuration, the filters are considered +# to match. +def check_passing_filters(args): + config_dict, filters_list = args + + if not check_filters_list_relevant(config_dict, filters_list): + return True + + for filter_dict in filters_list: + if check_filter(config_dict, filter_dict): + return True return False # Return true if a configuration passes all the given filters. -def check_filters(args): +def check_blocking_filters(args): config_dict, filters_list = args for filter_dict in filters_list: - if (not check_filter(config_dict, filter_dict)): + if check_filter(config_dict, filter_dict): return False return True -def filter_configs_list(configs_list, filters_list): - configs_and_filters = map(lambda x: (x, filters_list), configs_list) - return map(lambda x: x[0], filter(check_filters, configs_and_filters)) +def filter_configs_list(configs_list, passing_filters_list, + blocking_filters_list): + configs_and_filters = map(lambda x: (x, passing_filters_list), + configs_list) + configs_list = map(lambda x: x[0], filter(check_passing_filters, + configs_and_filters)) + configs_and_filters = map(lambda x: (x, blocking_filters_list), + configs_list) + configs_list = map(lambda x: x[0], filter(check_blocking_filters, + configs_and_filters)) + return configs_list def find_options_dict(options_sets, name): if name not in options_sets: @@ -250,7 +321,8 @@ def main(): print 'set: ' + args.set configs_list = filter_configs_list(gen_configs_list(options_dict), - all_filters_list) + passing_filters_list, + blocking_filters_list) nr_configs = len(configs_list) print 'total: ' + str(nr_configs) diff --git a/vm/vm_object.c b/vm/vm_object.c index 707008e..2cb4eed 100644 --- a/vm/vm_object.c +++ b/vm/vm_object.c @@ -24,8 +24,8 @@ #include <stdint.h> #include <kern/init.h> -#include <kern/llsync.h> #include <kern/mutex.h> +#include <kern/rcu.h> #include <kern/rdxtree.h> #include <vm/vm_object.h> #include <vm/vm_page.h> @@ -132,7 +132,7 @@ vm_object_lookup(struct vm_object *object, uint64_t offset) struct vm_page *page; int error; - llsync_read_enter(); + rcu_read_enter(); do { page = rdxtree_lookup(&object->pages, vm_page_btop(offset)); @@ -144,7 +144,7 @@ vm_object_lookup(struct vm_object *object, uint64_t offset) error = vm_page_tryref(page); } while (error); - llsync_read_exit(); + rcu_read_leave(); return page; } |