summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRemy Noel <mocramis@gmail.com>2018-02-25 17:16:37 +0100
committerRemy Noel <mocramis@gmail.com>2018-02-25 17:25:49 +0100
commitcba7db6931932953fd0113a70019049234eb0b08 (patch)
tree6d0c14e881cee20e0dca1807b6177d5246eb3f85
parent652168fe3d867eec17ac7fa318c8743d524ef40f (diff)
parente8265363384bc3f8e6bb31466a3590ce27811efa (diff)
Merge branch 'master' into perfmon
-rw-r--r--arch/x86/machine/boot.c11
-rw-r--r--arch/x86/machine/cpu.h4
-rw-r--r--arch/x86/machine/lapic.c4
-rw-r--r--arch/x86/machine/pmap.c1
-rw-r--r--arch/x86/machine/strace.h2
-rw-r--r--doc/intro.9.txt4
-rw-r--r--doc/style.9.txt2
-rw-r--r--kern/Kconfig6
-rw-r--r--kern/Makefile2
-rw-r--r--kern/atomic.h12
-rw-r--r--kern/clock.c5
-rw-r--r--kern/clock.h2
-rw-r--r--kern/clock_i.h1
-rw-r--r--kern/condition.c48
-rw-r--r--kern/condition.h14
-rw-r--r--kern/hlist.h50
-rw-r--r--kern/list.h58
-rw-r--r--kern/llsync.c341
-rw-r--r--kern/llsync.h174
-rw-r--r--kern/llsync_i.h125
-rw-r--r--kern/log2.h2
-rw-r--r--kern/macros.h10
-rw-r--r--kern/mutex.c13
-rw-r--r--kern/mutex.h18
-rw-r--r--kern/mutex/mutex_adaptive.c17
-rw-r--r--kern/mutex/mutex_adaptive_i.h8
-rw-r--r--kern/mutex/mutex_pi_i.h7
-rw-r--r--kern/mutex/mutex_plain.c17
-rw-r--r--kern/mutex/mutex_plain_i.h8
-rw-r--r--kern/percpu.h9
-rw-r--r--kern/perfmon.c2
-rw-r--r--kern/rcu.c787
-rw-r--r--kern/rcu.h145
-rw-r--r--kern/rcu_i.h61
-rw-r--r--kern/rcu_types.h40
-rw-r--r--kern/rdxtree.c47
-rw-r--r--kern/rdxtree.h7
-rw-r--r--kern/rtmutex.c19
-rw-r--r--kern/rtmutex.h4
-rw-r--r--kern/sleepq.c63
-rw-r--r--kern/sleepq.h17
-rw-r--r--kern/slist.h54
-rw-r--r--kern/spinlock.c5
-rw-r--r--kern/spinlock.h15
-rw-r--r--kern/spinlock_i.h30
-rw-r--r--kern/spinlock_types.h6
-rw-r--r--kern/sref.c111
-rw-r--r--kern/sref.h19
-rw-r--r--kern/sref_i.h12
-rw-r--r--kern/syscnt.c3
-rw-r--r--kern/syscnt.h17
-rw-r--r--kern/thread.c68
-rw-r--r--kern/thread.h82
-rw-r--r--kern/thread_i.h17
-rw-r--r--kern/timer.c20
-rw-r--r--kern/timer.h3
-rw-r--r--kern/work.c64
-rw-r--r--kern/work.h8
-rw-r--r--kern/xcall.c106
-rw-r--r--kern/xcall.h23
-rw-r--r--test/Kconfig12
-rw-r--r--test/Makefile8
-rw-r--r--test/test_mutex.c3
-rw-r--r--test/test_mutex_pi.c3
-rw-r--r--test/test_pmap_update_mp.c3
-rw-r--r--test/test_rcu_defer.c (renamed from test/test_llsync_defer.c)54
-rw-r--r--test/test_sref_dirty_zeroes.c5
-rw-r--r--test/test_sref_noref.c5
-rw-r--r--test/test_sref_weakref.c5
-rw-r--r--test/test_vm_page_fill.c3
-rw-r--r--test/test_xcall.c33
-rwxr-xr-xtools/build_configs.py150
-rw-r--r--vm/vm_object.c6
73 files changed, 1839 insertions, 1281 deletions
diff --git a/arch/x86/machine/boot.c b/arch/x86/machine/boot.c
index afc41d7..6934896 100644
--- a/arch/x86/machine/boot.c
+++ b/arch/x86/machine/boot.c
@@ -350,14 +350,17 @@ boot_setup_paging(struct multiboot_raw_info *mbi, unsigned long eax)
return pmap_setup_paging();
}
+#ifdef CONFIG_X86_PAE
+#define BOOT_PAE_LABEL " PAE"
+#else /* CONFIG_X86_PAE */
+#define BOOT_PAE_LABEL
+#endif /* CONFIG_X86_PAE */
+
void __init
boot_log_info(void)
{
log_info(KERNEL_NAME "/" CONFIG_SUBARCH " " KERNEL_VERSION
-#ifdef CONFIG_X86_PAE
- " PAE"
-#endif /* CONFIG_X86_PAE */
- );
+ BOOT_PAE_LABEL);
}
static void * __init
diff --git a/arch/x86/machine/cpu.h b/arch/x86/machine/cpu.h
index 7675822..d7a6498 100644
--- a/arch/x86/machine/cpu.h
+++ b/arch/x86/machine/cpu.h
@@ -24,6 +24,9 @@
* L1 cache line size.
*
* XXX Use this value until processor selection is available.
+ *
+ * TODO Add macros to specifically align to the cache line size, and to
+ * do so only in SMP configurations.
*/
#define CPU_L1_SIZE 64
@@ -706,6 +709,7 @@ INIT_OP_DECLARE(cpu_setup);
/*
* This init operation provides :
* - cpu_count()
+ * - access to percpu variables on all processors
*/
INIT_OP_DECLARE(cpu_mp_probe);
diff --git a/arch/x86/machine/lapic.c b/arch/x86/machine/lapic.c
index 1a81a18..180a1a2 100644
--- a/arch/x86/machine/lapic.c
+++ b/arch/x86/machine/lapic.c
@@ -95,7 +95,7 @@ struct lapic_register {
uint32_t reserved0;
uint32_t reserved1;
uint32_t reserved2;
-} __packed;
+};
/*
* Local APIC register map.
@@ -171,7 +171,7 @@ struct lapic_map {
const struct lapic_register reserved19;
struct lapic_register timer_dcr;
const struct lapic_register reserved20;
-} __packed;
+};
/*
* Address where local APIC registers are mapped.
diff --git a/arch/x86/machine/pmap.c b/arch/x86/machine/pmap.c
index 5ed0502..2b4c23c 100644
--- a/arch/x86/machine/pmap.c
+++ b/arch/x86/machine/pmap.c
@@ -1546,6 +1546,7 @@ pmap_update(struct pmap *pmap)
spinlock_unlock(&queue->lock);
}
+ /* TODO Improve scalability */
cpumap_for_each(&oplist->cpumap, cpu) {
request = &array->requests[cpu];
diff --git a/arch/x86/machine/strace.h b/arch/x86/machine/strace.h
index 2f686a0..b55f6b8 100644
--- a/arch/x86/machine/strace.h
+++ b/arch/x86/machine/strace.h
@@ -16,6 +16,8 @@
*
*
* Stack tracing.
+ *
+ * TODO Make it possible to debug without the frame pointer.
*/
#ifndef _X86_STRACE_H
diff --git a/doc/intro.9.txt b/doc/intro.9.txt
index b3367ba..5975b8b 100644
--- a/doc/intro.9.txt
+++ b/doc/intro.9.txt
@@ -88,10 +88,10 @@ module:kern/condition::
module:arch/cpu::
Architecture-specific processor interface which provides interrupt
control functions.
-module:kern/llsync::
- Lockless synchronization, similar to Linux Read-Copy Update (RCU).
module:kern/mutex::
Mutual exclusion lock.
+module:kern/rcu::
+ Read-Copy Update (RCU) lockless synchronization.
module:kern/semaphore::
Semaphore.
module:kern/sleepq::
diff --git a/doc/style.9.txt b/doc/style.9.txt
index 1b5108f..01a84e7 100644
--- a/doc/style.9.txt
+++ b/doc/style.9.txt
@@ -587,7 +587,7 @@ The first line being a short description, usable as an email subject,
there is no need for a final dot. This also keeps it slightly shorter.
--------------------------------------------------------------------------------
-kern/{llsync,rdxtree}: don't use llsync until it's ready
+kern/{sleepq,turnstile}: handle spurious wakeups
--------------------------------------------------------------------------------
This format is based on brace expansion in the bash shell.
diff --git a/kern/Kconfig b/kern/Kconfig
index 417017b..39ed902 100644
--- a/kern/Kconfig
+++ b/kern/Kconfig
@@ -102,4 +102,10 @@ config MUTEX_DEBUG
---help---
Enable mutex debugging and instrumentation.
+config SPINLOCK_DEBUG
+ bool "Spinlock debugging"
+ default n
+ ---help---
+ Enable spinlock ownership tracking.
+
endmenu
diff --git a/kern/Makefile b/kern/Makefile
index 3dd02f5..3779380 100644
--- a/kern/Makefile
+++ b/kern/Makefile
@@ -12,7 +12,6 @@ x15_SOURCES-y += \
kern/intr.c \
kern/kernel.c \
kern/kmem.c \
- kern/llsync.c \
kern/log.c \
kern/mutex.c \
kern/panic.c \
@@ -20,6 +19,7 @@ x15_SOURCES-y += \
kern/plist.c \
kern/printf.c \
kern/rbtree.c \
+ kern/rcu.c \
kern/rdxtree.c \
kern/rtmutex.c \
kern/semaphore.c \
diff --git a/kern/atomic.h b/kern/atomic.h
index 6c0105d..940720a 100644
--- a/kern/atomic.h
+++ b/kern/atomic.h
@@ -16,6 +16,9 @@
*
*
* Type-generic memory-model aware atomic operations.
+ *
+ * TODO Replace mentions of "memory barriers" throughout the code with
+ * C11 memory model terminology.
*/
#ifndef _KERN_ATOMIC_H
@@ -113,6 +116,15 @@ MACRO_END
#endif
/*
+ * Thread fences.
+ */
+
+#define atomic_fence_acquire() __atomic_thread_fence(ATOMIC_ACQUIRE)
+#define atomic_fence_release() __atomic_thread_fence(ATOMIC_RELEASE)
+#define atomic_fence_acq_rel() __atomic_thread_fence(ATOMIC_ACQ_REL)
+#define atomic_fence_seq_cst() __atomic_thread_fence(ATOMIC_SEQ_CST)
+
+/*
* Common shortcuts.
*/
diff --git a/kern/clock.c b/kern/clock.c
index 19f823c..0b72a8f 100644
--- a/kern/clock.c
+++ b/kern/clock.c
@@ -22,8 +22,8 @@
#include <kern/clock.h>
#include <kern/clock_i.h>
#include <kern/init.h>
-#include <kern/llsync.h>
#include <kern/percpu.h>
+#include <kern/rcu.h>
#include <kern/sref.h>
#include <kern/syscnt.h>
#include <kern/thread.h>
@@ -60,7 +60,6 @@ clock_setup(void)
}
INIT_OP_DEFINE(clock_setup,
- INIT_OP_DEP(boot_setup_intr, true),
INIT_OP_DEP(cpu_mp_probe, true),
INIT_OP_DEP(syscnt_setup, true));
@@ -90,7 +89,7 @@ void clock_tick_intr(void)
}
timer_report_periodic_event();
- llsync_report_periodic_event();
+ rcu_report_periodic_event();
sref_report_periodic_event();
work_report_periodic_event();
thread_report_periodic_event();
diff --git a/kern/clock.h b/kern/clock.h
index 854c146..3b695d5 100644
--- a/kern/clock.h
+++ b/kern/clock.h
@@ -116,6 +116,4 @@ clock_time_occurred(uint64_t t, uint64_t ref)
void clock_tick_intr(void);
-INIT_OP_DECLARE(clock_setup);
-
#endif /* _KERN_CLOCK_H */
diff --git a/kern/clock_i.h b/kern/clock_i.h
index 830f48c..8b98f5e 100644
--- a/kern/clock_i.h
+++ b/kern/clock_i.h
@@ -18,6 +18,7 @@
#ifndef _KERN_CLOCK_I_H
#define _KERN_CLOCK_I_H
+#include <stdalign.h>
#include <stdint.h>
#include <machine/cpu.h>
diff --git a/kern/condition.c b/kern/condition.c
index e6d6595..c407e94 100644
--- a/kern/condition.c
+++ b/kern/condition.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2017 Richard Braun.
+ * Copyright (c) 2013-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -27,46 +27,21 @@
#include <kern/condition_types.h>
#include <kern/mutex.h>
#include <kern/sleepq.h>
-#include <kern/thread.h>
static int
condition_wait_common(struct condition *condition, struct mutex *mutex,
bool timed, uint64_t ticks)
{
- struct condition *last_cond;
struct sleepq *sleepq;
unsigned long flags;
int error;
mutex_assert_locked(mutex);
- /*
- * Special case :
- *
- * mutex_lock(lock);
- *
- * for (;;) {
- * while (!done) {
- * condition_wait(condition, lock);
- * }
- *
- * do_something();
- * }
- *
- * Pull the last condition before unlocking the mutex to prevent
- * mutex_unlock() from reacquiring the condition sleep queue.
- */
- last_cond = thread_pull_last_cond();
-
sleepq = sleepq_lend(condition, true, &flags);
mutex_unlock(mutex);
- if (last_cond != NULL) {
- assert(last_cond == condition);
- sleepq_wakeup(sleepq);
- }
-
if (timed) {
error = sleepq_timedwait(sleepq, "cond", ticks);
} else {
@@ -74,10 +49,6 @@ condition_wait_common(struct condition *condition, struct mutex *mutex,
error = 0;
}
- if (!error) {
- thread_set_last_cond(condition);
- }
-
sleepq_return(sleepq, flags);
mutex_lock(mutex);
@@ -134,20 +105,3 @@ condition_broadcast(struct condition *condition)
sleepq_release(sleepq, flags);
}
-
-void
-condition_wakeup(struct condition *condition)
-{
- struct sleepq *sleepq;
- unsigned long flags;
-
- sleepq = sleepq_acquire(condition, true, &flags);
-
- if (sleepq == NULL) {
- return;
- }
-
- sleepq_wakeup(sleepq);
-
- sleepq_release(sleepq, flags);
-}
diff --git a/kern/condition.h b/kern/condition.h
index 90a59f0..2082bee 100644
--- a/kern/condition.h
+++ b/kern/condition.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2017 Richard Braun.
+ * Copyright (c) 2013-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -65,16 +65,4 @@ int condition_timedwait(struct condition *condition,
void condition_signal(struct condition *condition);
void condition_broadcast(struct condition *condition);
-/*
- * Wake up a pending thread.
- *
- * This function isn't part of the standard condition variable interface.
- * It is used to chain wake-ups to avoid the thundering herd effect.
- * When broadcasting a condition variable, a single thread is actually
- * awaken. Other threads become "pending waiters", still asleep but
- * eligible for wake-up when the mutex associated to the condition variable,
- * relocked when returning from condition_wait(), is finally unlocked.
- */
-void condition_wakeup(struct condition *condition);
-
#endif /* _KERN_CONDITION_H */
diff --git a/kern/hlist.h b/kern/hlist.h
index e105cb8..83d64fe 100644
--- a/kern/hlist.h
+++ b/kern/hlist.h
@@ -25,8 +25,8 @@
#include <stddef.h>
#include <kern/hlist_types.h>
-#include <kern/llsync.h>
#include <kern/macros.h>
+#include <kern/rcu.h>
struct hlist;
@@ -266,25 +266,25 @@ for (entry = hlist_first_entry(list, typeof(*entry), member), \
* Return the first node of a list.
*/
static inline struct hlist_node *
-hlist_llsync_first(const struct hlist *list)
+hlist_rcu_first(const struct hlist *list)
{
- return llsync_load_ptr(list->first);
+ return rcu_load_ptr(list->first);
}
/*
* Return the node next to the given node.
*/
static inline struct hlist_node *
-hlist_llsync_next(const struct hlist_node *node)
+hlist_rcu_next(const struct hlist_node *node)
{
- return llsync_load_ptr(node->next);
+ return rcu_load_ptr(node->next);
}
/*
* Insert a node at the head of a list.
*/
static inline void
-hlist_llsync_insert_head(struct hlist *list, struct hlist_node *node)
+hlist_rcu_insert_head(struct hlist *list, struct hlist_node *node)
{
struct hlist_node *first;
@@ -296,26 +296,26 @@ hlist_llsync_insert_head(struct hlist *list, struct hlist_node *node)
first->pprev = &node->next;
}
- llsync_store_ptr(list->first, node);
+ rcu_store_ptr(list->first, node);
}
/*
* Insert a node before another node.
*/
static inline void
-hlist_llsync_insert_before(struct hlist_node *next, struct hlist_node *node)
+hlist_rcu_insert_before(struct hlist_node *next, struct hlist_node *node)
{
node->next = next;
node->pprev = next->pprev;
next->pprev = &node->next;
- llsync_store_ptr(*node->pprev, node);
+ rcu_store_ptr(*node->pprev, node);
}
/*
* Insert a node after another node.
*/
static inline void
-hlist_llsync_insert_after(struct hlist_node *prev, struct hlist_node *node)
+hlist_rcu_insert_after(struct hlist_node *prev, struct hlist_node *node)
{
node->next = prev->next;
node->pprev = &prev->next;
@@ -324,48 +324,48 @@ hlist_llsync_insert_after(struct hlist_node *prev, struct hlist_node *node)
node->next->pprev = &node->next;
}
- llsync_store_ptr(prev->next, node);
+ rcu_store_ptr(prev->next, node);
}
/*
* Remove a node from a list.
*/
static inline void
-hlist_llsync_remove(struct hlist_node *node)
+hlist_rcu_remove(struct hlist_node *node)
{
if (node->next != NULL) {
node->next->pprev = node->pprev;
}
- llsync_store_ptr(*node->pprev, node->next);
+ rcu_store_ptr(*node->pprev, node->next);
}
/*
* Macro that evaluates to the address of the structure containing the
* given node based on the given type and member.
*/
-#define hlist_llsync_entry(node, type, member) \
- structof(llsync_load_ptr(node), type, member)
+#define hlist_rcu_entry(node, type, member) \
+ structof(rcu_load_ptr(node), type, member)
/*
* Get the first entry of a list.
*/
-#define hlist_llsync_first_entry(list, type, member) \
+#define hlist_rcu_first_entry(list, type, member) \
MACRO_BEGIN \
struct hlist_node *___first; \
\
- ___first = hlist_llsync_first(list); \
+ ___first = hlist_rcu_first(list); \
hlist_end(___first) ? NULL : hlist_entry(___first, type, member); \
MACRO_END
/*
* Get the entry next to the given entry.
*/
-#define hlist_llsync_next_entry(entry, member) \
+#define hlist_rcu_next_entry(entry, member) \
MACRO_BEGIN \
struct hlist_node *___next; \
\
- ___next = hlist_llsync_next(&entry->member); \
+ ___next = hlist_rcu_next(&entry->member); \
hlist_end(___next) \
? NULL \
: hlist_entry(___next, typeof(*entry), member); \
@@ -374,17 +374,17 @@ MACRO_END
/*
* Forge a loop to process all nodes of a list.
*/
-#define hlist_llsync_for_each(list, node) \
-for (node = hlist_llsync_first(list); \
+#define hlist_rcu_for_each(list, node) \
+for (node = hlist_rcu_first(list); \
!hlist_end(node); \
- node = hlist_llsync_next(node))
+ node = hlist_rcu_next(node))
/*
* Forge a loop to process all entries of a list.
*/
-#define hlist_llsync_for_each_entry(list, entry, member) \
-for (entry = hlist_llsync_first_entry(list, typeof(*entry), member); \
+#define hlist_rcu_for_each_entry(list, entry, member) \
+for (entry = hlist_rcu_first_entry(list, typeof(*entry), member); \
entry != NULL; \
- entry = hlist_llsync_next_entry(entry, member))
+ entry = hlist_rcu_next_entry(entry, member))
#endif /* _KERN_HLIST_H */
diff --git a/kern/list.h b/kern/list.h
index f48d7d7..7ea3992 100644
--- a/kern/list.h
+++ b/kern/list.h
@@ -25,8 +25,8 @@
#include <stddef.h>
#include <kern/list_types.h>
-#include <kern/llsync.h>
#include <kern/macros.h>
+#include <kern/rcu.h>
/*
* Structure used as both head and node.
@@ -385,18 +385,18 @@ for (entry = list_last_entry(list, typeof(*entry), member), \
* Return the first node of a list.
*/
static inline struct list *
-list_llsync_first(const struct list *list)
+list_rcu_first(const struct list *list)
{
- return llsync_load_ptr(list->next);
+ return rcu_load_ptr(list->next);
}
/*
* Return the node next to the given node.
*/
static inline struct list *
-list_llsync_next(const struct list *node)
+list_rcu_next(const struct list *node)
{
- return llsync_load_ptr(node->next);
+ return rcu_load_ptr(node->next);
}
/*
@@ -405,11 +405,11 @@ list_llsync_next(const struct list *node)
* This function is private.
*/
static inline void
-list_llsync_add(struct list *prev, struct list *next, struct list *node)
+list_rcu_add(struct list *prev, struct list *next, struct list *node)
{
node->next = next;
node->prev = prev;
- llsync_store_ptr(prev->next, node);
+ rcu_store_ptr(prev->next, node);
next->prev = node;
}
@@ -417,36 +417,36 @@ list_llsync_add(struct list *prev, struct list *next, struct list *node)
* Insert a node at the head of a list.
*/
static inline void
-list_llsync_insert_head(struct list *list, struct list *node)
+list_rcu_insert_head(struct list *list, struct list *node)
{
- list_llsync_add(list, list->next, node);
+ list_rcu_add(list, list->next, node);
}
/*
* Insert a node at the tail of a list.
*/
static inline void
-list_llsync_insert_tail(struct list *list, struct list *node)
+list_rcu_insert_tail(struct list *list, struct list *node)
{
- list_llsync_add(list->prev, list, node);
+ list_rcu_add(list->prev, list, node);
}
/*
* Insert a node before another node.
*/
static inline void
-list_llsync_insert_before(struct list *next, struct list *node)
+list_rcu_insert_before(struct list *next, struct list *node)
{
- list_llsync_add(next->prev, next, node);
+ list_rcu_add(next->prev, next, node);
}
/*
* Insert a node after another node.
*/
static inline void
-list_llsync_insert_after(struct list *prev, struct list *node)
+list_rcu_insert_after(struct list *prev, struct list *node)
{
- list_llsync_add(prev, prev->next, node);
+ list_rcu_add(prev, prev->next, node);
}
/*
@@ -455,18 +455,18 @@ list_llsync_insert_after(struct list *prev, struct list *node)
* After completion, the node is stale.
*/
static inline void
-list_llsync_remove(struct list *node)
+list_rcu_remove(struct list *node)
{
node->next->prev = node->prev;
- llsync_store_ptr(node->prev->next, node->next);
+ rcu_store_ptr(node->prev->next, node->next);
}
/*
* Macro that evaluates to the address of the structure containing the
* given node based on the given type and member.
*/
-#define list_llsync_entry(node, type, member) \
- structof(llsync_load_ptr(node), type, member)
+#define list_rcu_entry(node, type, member) \
+ structof(rcu_load_ptr(node), type, member)
/*
* Get the first entry of a list.
@@ -475,13 +475,13 @@ list_llsync_remove(struct list *node)
* the node pointer can only be read once, preventing the combination
* of lockless list_empty()/list_first_entry() variants.
*/
-#define list_llsync_first_entry(list, type, member) \
+#define list_rcu_first_entry(list, type, member) \
MACRO_BEGIN \
struct list *___list; \
struct list *___first; \
\
___list = (list); \
- ___first = list_llsync_first(___list); \
+ ___first = list_rcu_first(___list); \
list_end(___list, ___first) \
? NULL \
: list_entry(___first, type, member); \
@@ -494,29 +494,29 @@ MACRO_END
* the node pointer can only be read once, preventing the combination
* of lockless list_empty()/list_next_entry() variants.
*/
-#define list_llsync_next_entry(entry, member) \
- list_llsync_first_entry(&entry->member, typeof(*entry), member)
+#define list_rcu_next_entry(entry, member) \
+ list_rcu_first_entry(&entry->member, typeof(*entry), member)
/*
* Forge a loop to process all nodes of a list.
*
* The node must not be altered during the loop.
*/
-#define list_llsync_for_each(list, node) \
-for (node = list_llsync_first(list); \
+#define list_rcu_for_each(list, node) \
+for (node = list_rcu_first(list); \
!list_end(list, node); \
- node = list_llsync_next(node))
+ node = list_rcu_next(node))
/*
* Forge a loop to process all entries of a list.
*
* The entry node must not be altered during the loop.
*/
-#define list_llsync_for_each_entry(list, entry, member) \
-for (entry = list_llsync_entry(list_first(list), \
+#define list_rcu_for_each_entry(list, entry, member) \
+for (entry = list_rcu_entry(list_first(list), \
typeof(*entry), member); \
!list_end(list, &entry->member); \
- entry = list_llsync_entry(list_next(&entry->member), \
+ entry = list_rcu_entry(list_next(&entry->member), \
typeof(*entry), member))
#endif /* _KERN_LIST_H */
diff --git a/kern/llsync.c b/kern/llsync.c
deleted file mode 100644
index 36233fa..0000000
--- a/kern/llsync.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/*
- * Copyright (c) 2013-2014 Richard Braun.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- *
- * The method used by this module is described in the expired US patent
- * 4809168, "Passive Serialization in a Multitasking Environment". It is
- * similar to "Classic RCU (Read-Copy Update)" as found in Linux 2.6, with
- * the notable difference that RCU actively starts grace periods, where
- * passive serialization waits for two sequential "multiprocess checkpoints"
- * (renamed global checkpoints in this implementation) to occur.
- *
- * It is used instead of RCU because of patents that may not allow writing
- * an implementation not based on the Linux code (see
- * http://lists.lttng.org/pipermail/lttng-dev/2013-May/020305.html). As
- * patents expire, this module could be reworked to become a true RCU
- * implementation. In the mean time, the module interface was carefully
- * designed to be similar to RCU.
- *
- * TODO Gracefully handle large amounts of deferred works.
- */
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stddef.h>
-
-#include <kern/condition.h>
-#include <kern/cpumap.h>
-#include <kern/init.h>
-#include <kern/list.h>
-#include <kern/log.h>
-#include <kern/llsync.h>
-#include <kern/llsync_i.h>
-#include <kern/macros.h>
-#include <kern/mutex.h>
-#include <kern/percpu.h>
-#include <kern/spinlock.h>
-#include <kern/syscnt.h>
-#include <kern/work.h>
-#include <kern/thread.h>
-#include <machine/cpu.h>
-
-/*
- * Initial global checkpoint ID.
- *
- * Set to a high value to make sure overflows are correctly handled.
- */
-#define LLSYNC_INITIAL_GCID ((unsigned int)-10)
-
-/*
- * Number of pending works beyond which to issue a warning.
- */
-#define LLSYNC_NR_PENDING_WORKS_WARN 10000
-
-struct llsync_data llsync_data;
-struct llsync_cpu_data llsync_cpu_data __percpu;
-
-struct llsync_waiter {
- struct work work;
- struct mutex lock;
- struct condition cond;
- int done;
-};
-
-static bool llsync_is_ready __read_mostly = false;
-
-bool
-llsync_ready(void)
-{
- return llsync_is_ready;
-}
-
-static int __init
-llsync_setup(void)
-{
- struct llsync_cpu_data *cpu_data;
- unsigned int i;
-
- spinlock_init(&llsync_data.lock);
- work_queue_init(&llsync_data.queue0);
- work_queue_init(&llsync_data.queue1);
- syscnt_register(&llsync_data.sc_global_checkpoints,
- "llsync_global_checkpoints");
- syscnt_register(&llsync_data.sc_periodic_checkins,
- "llsync_periodic_checkins");
- syscnt_register(&llsync_data.sc_failed_periodic_checkins,
- "llsync_failed_periodic_checkins");
- llsync_data.gcid.value = LLSYNC_INITIAL_GCID;
-
- for (i = 0; i < cpu_count(); i++) {
- cpu_data = percpu_ptr(llsync_cpu_data, i);
- work_queue_init(&cpu_data->queue0);
- }
-
- return 0;
-}
-
-INIT_OP_DEFINE(llsync_setup,
- INIT_OP_DEP(log_setup, true),
- INIT_OP_DEP(mutex_setup, true),
- INIT_OP_DEP(spinlock_setup, true),
- INIT_OP_DEP(syscnt_setup, true),
- INIT_OP_DEP(thread_bootstrap, true),
- INIT_OP_DEP(work_setup, true));
-
-static void
-llsync_process_global_checkpoint(void)
-{
- struct work_queue queue;
- unsigned int nr_works;
-
- assert(cpumap_find_first(&llsync_data.pending_checkpoints) == -1);
- assert(llsync_data.nr_pending_checkpoints == 0);
-
- nr_works = work_queue_nr_works(&llsync_data.queue0)
- + work_queue_nr_works(&llsync_data.queue1);
-
- /* TODO Handle hysteresis */
- if (!llsync_data.no_warning && (nr_works >= LLSYNC_NR_PENDING_WORKS_WARN)) {
- llsync_data.no_warning = 1;
- log_warning("llsync: large number of pending works\n");
- }
-
- if (llsync_data.nr_registered_cpus == 0) {
- work_queue_concat(&llsync_data.queue1, &llsync_data.queue0);
- work_queue_init(&llsync_data.queue0);
- } else {
- cpumap_copy(&llsync_data.pending_checkpoints, &llsync_data.registered_cpus);
- llsync_data.nr_pending_checkpoints = llsync_data.nr_registered_cpus;
- }
-
- work_queue_transfer(&queue, &llsync_data.queue1);
- work_queue_transfer(&llsync_data.queue1, &llsync_data.queue0);
- work_queue_init(&llsync_data.queue0);
-
- if (work_queue_nr_works(&queue) != 0) {
- work_queue_schedule(&queue, 0);
- }
-
- llsync_data.gcid.value++;
- syscnt_inc(&llsync_data.sc_global_checkpoints);
-}
-
-static void
-llsync_flush_works(struct llsync_cpu_data *cpu_data)
-{
- if (work_queue_nr_works(&cpu_data->queue0) == 0) {
- return;
- }
-
- work_queue_concat(&llsync_data.queue0, &cpu_data->queue0);
- work_queue_init(&cpu_data->queue0);
-}
-
-static void
-llsync_commit_checkpoint(unsigned int cpu)
-{
- int pending;
-
- pending = cpumap_test(&llsync_data.pending_checkpoints, cpu);
-
- if (!pending) {
- return;
- }
-
- cpumap_clear(&llsync_data.pending_checkpoints, cpu);
- llsync_data.nr_pending_checkpoints--;
-
- if (llsync_data.nr_pending_checkpoints == 0) {
- llsync_process_global_checkpoint();
- }
-}
-
-void
-llsync_register(void)
-{
- struct llsync_cpu_data *cpu_data;
- unsigned long flags;
- unsigned int cpu;
-
- if (!llsync_is_ready) {
- llsync_is_ready = true;
- }
-
- cpu = cpu_id();
- cpu_data = llsync_get_cpu_data();
-
- spinlock_lock_intr_save(&llsync_data.lock, &flags);
-
- assert(!cpu_data->registered);
- assert(work_queue_nr_works(&cpu_data->queue0) == 0);
- cpu_data->registered = 1;
- cpu_data->gcid = llsync_data.gcid.value;
-
- assert(!cpumap_test(&llsync_data.registered_cpus, cpu));
- cpumap_set(&llsync_data.registered_cpus, cpu);
- llsync_data.nr_registered_cpus++;
-
- assert(!cpumap_test(&llsync_data.pending_checkpoints, cpu));
-
- if ((llsync_data.nr_registered_cpus == 1)
- && (llsync_data.nr_pending_checkpoints == 0)) {
- llsync_process_global_checkpoint();
- }
-
- spinlock_unlock_intr_restore(&llsync_data.lock, flags);
-}
-
-void
-llsync_unregister(void)
-{
- struct llsync_cpu_data *cpu_data;
- unsigned long flags;
- unsigned int cpu;
-
- cpu = cpu_id();
- cpu_data = llsync_get_cpu_data();
-
- spinlock_lock_intr_save(&llsync_data.lock, &flags);
-
- llsync_flush_works(cpu_data);
-
- assert(cpu_data->registered);
- cpu_data->registered = 0;
-
- assert(cpumap_test(&llsync_data.registered_cpus, cpu));
- cpumap_clear(&llsync_data.registered_cpus, cpu);
- llsync_data.nr_registered_cpus--;
-
- /*
- * Processor registration qualifies as a checkpoint. Since unregistering
- * a processor also disables commits until it's registered again, perform
- * one now.
- */
- llsync_commit_checkpoint(cpu);
-
- spinlock_unlock_intr_restore(&llsync_data.lock, flags);
-}
-
-void
-llsync_report_periodic_event(void)
-{
- struct llsync_cpu_data *cpu_data;
- unsigned int gcid;
-
- assert(thread_check_intr_context());
-
- cpu_data = llsync_get_cpu_data();
-
- if (!cpu_data->registered) {
- assert(work_queue_nr_works(&cpu_data->queue0) == 0);
- return;
- }
-
- spinlock_lock(&llsync_data.lock);
-
- llsync_flush_works(cpu_data);
-
- gcid = llsync_data.gcid.value;
- assert((gcid - cpu_data->gcid) <= 1);
-
- /*
- * If the local copy of the global checkpoint ID matches the true
- * value, the current processor has checked in.
- *
- * Otherwise, there were no checkpoint since the last global checkpoint.
- * Check whether this periodic event occurred during a read-side critical
- * section, and if not, trigger a checkpoint.
- */
- if (cpu_data->gcid == gcid) {
- llsync_commit_checkpoint(cpu_id());
- } else {
- if (thread_llsync_in_read_cs()) {
- syscnt_inc(&llsync_data.sc_failed_periodic_checkins);
- } else {
- cpu_data->gcid = gcid;
- syscnt_inc(&llsync_data.sc_periodic_checkins);
- llsync_commit_checkpoint(cpu_id());
- }
- }
-
- spinlock_unlock(&llsync_data.lock);
-}
-
-void
-llsync_defer(struct work *work)
-{
- struct llsync_cpu_data *cpu_data;
- unsigned long flags;
-
- thread_preempt_disable_intr_save(&flags);
- cpu_data = llsync_get_cpu_data();
- work_queue_push(&cpu_data->queue0, work);
- thread_preempt_enable_intr_restore(flags);
-}
-
-static void
-llsync_signal(struct work *work)
-{
- struct llsync_waiter *waiter;
-
- waiter = structof(work, struct llsync_waiter, work);
-
- mutex_lock(&waiter->lock);
- waiter->done = 1;
- condition_signal(&waiter->cond);
- mutex_unlock(&waiter->lock);
-}
-
-void
-llsync_wait(void)
-{
- struct llsync_waiter waiter;
-
- work_init(&waiter.work, llsync_signal);
- mutex_init(&waiter.lock);
- condition_init(&waiter.cond);
- waiter.done = 0;
-
- llsync_defer(&waiter.work);
-
- mutex_lock(&waiter.lock);
-
- while (!waiter.done) {
- condition_wait(&waiter.cond, &waiter.lock);
- }
-
- mutex_unlock(&waiter.lock);
-}
diff --git a/kern/llsync.h b/kern/llsync.h
deleted file mode 100644
index 4a0d026..0000000
--- a/kern/llsync.h
+++ /dev/null
@@ -1,174 +0,0 @@
-/*
- * Copyright (c) 2013-2014 Richard Braun.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- *
- * Lockless synchronization.
- *
- * The llsync module provides services similar to RCU (Read-Copy Update).
- * As such, it can be thought of as an efficient reader-writer lock
- * replacement. It is efficient because read-side critical sections
- * don't use expensive synchronization mechanisms such as locks or atomic
- * instructions. Lockless synchronization is therefore best used for
- * read-mostly objects. Updating still requires conventional lock-based
- * synchronization.
- *
- * The basic idea is that read-side critical sections are assumed to hold
- * read-side references, and objects for which there may be read-side
- * references must exist as long as such references may be held. The llsync
- * module tracks special system events to determine when read-side references
- * can no longer exist.
- *
- * Since read-side critical sections can run concurrently with updates,
- * it is important to make sure that objects are consistent when being
- * accessed. This is achieved with a publish/subscribe mechanism that relies
- * on the natural atomicity of machine word updates in memory, i.e. all
- * supported architectures must guarantee that, when updating a word, and
- * in turn a pointer, other processors reading that word obtain a valid
- * value, that is either the previous or the next value of the word, but not
- * a mixed-up value. The llsync module provides the llsync_store_ptr() and
- * llsync_load_ptr() wrappers that take care of low level details such as
- * compiler and memory barriers, so that objects are completely built and
- * consistent when published and accessed.
- *
- * As objects are published through pointers, multiple versions can exist at
- * the same time. Previous versions cannot be deleted as long as read-side
- * references may exist. Operations that must wait for all read-side references
- * to be dropped can be either synchronous, i.e. block until it is safe to
- * proceed, or be deferred, in which case they are queued and later handed to
- * the work module. As a result, special care must be taken if using lockless
- * synchronization in the work module itself.
- *
- * The two system events tracked by the llsync module are context switches
- * and a periodic event, normally the periodic timer interrupt that drives
- * the scheduler. Context switches are used as checkpoint triggers. A
- * checkpoint is a point in execution at which no read-side reference can
- * exist, i.e. the processor isn't running any read-side critical section.
- * Since context switches can be very frequent, a checkpoint is local to
- * the processor and lightweight. The periodic event is used to commit
- * checkpoints globally so that other processors are aware of the progress
- * of one another. As the system allows situations in which two periodic
- * events can occur without a single context switch, the periodic event is
- * also used as a checkpoint trigger. When all checkpoints have been
- * committed, a global checkpoint occurs. The occurrence of global checkpoints
- * allows the llsync module to determine when it is safe to process deferred
- * work or unblock update sides.
- */
-
-#ifndef _KERN_LLSYNC_H
-#define _KERN_LLSYNC_H
-
-#include <stdbool.h>
-
-#include <kern/atomic.h>
-#include <kern/macros.h>
-#include <kern/llsync_i.h>
-#include <kern/thread.h>
-#include <kern/work.h>
-
-/*
- * Safely store a pointer.
- */
-#define llsync_store_ptr(ptr, value) atomic_store(&(ptr), value, ATOMIC_RELEASE)
-
-/*
- * Safely load a pointer.
- */
-#define llsync_load_ptr(ptr) atomic_load(&(ptr), ATOMIC_CONSUME)
-
-/*
- * Read-side critical section enter/exit functions.
- *
- * It is not allowed to block inside a read-side critical section.
- */
-
-static inline void
-llsync_read_enter(void)
-{
- int in_read_cs;
-
- in_read_cs = thread_llsync_in_read_cs();
- thread_llsync_read_inc();
-
- if (!in_read_cs) {
- thread_preempt_disable();
- }
-}
-
-static inline void
-llsync_read_exit(void)
-{
- thread_llsync_read_dec();
-
- if (!thread_llsync_in_read_cs()) {
- thread_preempt_enable();
- }
-}
-
-/*
- * Return true if the llsync module is initialized, false otherwise.
- */
-bool llsync_ready(void);
-
-/*
- * Manage registration of the current processor.
- *
- * The caller must not be allowed to migrate when calling these functions.
- *
- * Registering tells the llsync module that the current processor reports
- * context switches and periodic events.
- *
- * When a processor enters a state in which checking in becomes irrelevant,
- * it unregisters itself so that the other registered processors don't need
- * to wait for it to make progress. For example, this is done inside the
- * idle loop since it is obviously impossible to enter a read-side critical
- * section while idling.
- */
-void llsync_register(void);
-void llsync_unregister(void);
-
-/*
- * Report a context switch on the current processor.
- *
- * Interrupts and preemption must be disabled when calling this function.
- */
-static inline void
-llsync_report_context_switch(void)
-{
- llsync_checkin();
-}
-
-/*
- * Report a periodic event on the current processor.
- *
- * Interrupts and preemption must be disabled when calling this function.
- */
-void llsync_report_periodic_event(void);
-
-/*
- * Defer an operation until all existing read-side references are dropped,
- * without blocking.
- */
-void llsync_defer(struct work *work);
-
-/*
- * Wait for all existing read-side references to be dropped.
- *
- * This function sleeps, and may do so for a moderately long duration (a few
- * system timer ticks).
- */
-void llsync_wait(void);
-
-#endif /* _KERN_LLSYNC_H */
diff --git a/kern/llsync_i.h b/kern/llsync_i.h
deleted file mode 100644
index e043eb9..0000000
--- a/kern/llsync_i.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * Copyright (c) 2013-2014 Richard Braun.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _KERN_LLSYNC_I_H
-#define _KERN_LLSYNC_I_H
-
-#include <assert.h>
-#include <stdalign.h>
-
-#include <kern/cpumap.h>
-#include <kern/macros.h>
-#include <kern/spinlock.h>
-#include <kern/syscnt.h>
-#include <kern/work.h>
-#include <machine/cpu.h>
-
-/*
- * Global data.
- *
- * The queue number matches the number of global checkpoints that occurred
- * since works contained in it were added. After two global checkpoints,
- * works are scheduled for processing.
- *
- * Interrupts must be disabled when acquiring the global data lock.
- */
-struct llsync_data {
- struct spinlock lock;
- struct cpumap registered_cpus;
- unsigned int nr_registered_cpus;
- struct cpumap pending_checkpoints;
- unsigned int nr_pending_checkpoints;
- int no_warning;
- struct work_queue queue0;
- struct work_queue queue1;
- struct syscnt sc_global_checkpoints;
- struct syscnt sc_periodic_checkins;
- struct syscnt sc_failed_periodic_checkins;
-
- /*
- * Global checkpoint ID.
- *
- * This variable can be frequently accessed from many processors so :
- * - reserve a whole cache line for it
- * - apply optimistic accesses to reduce contention
- */
- struct {
- alignas(CPU_L1_SIZE) volatile unsigned int value;
- } gcid;
-};
-
-extern struct llsync_data llsync_data;
-
-/*
- * Per-processor data.
- *
- * Every processor records whether it is registered and a local copy of the
- * global checkpoint ID, which is meaningless on unregistered processors.
- * The true global checkpoint ID is incremented when a global checkpoint occurs,
- * after which all the local copies become stale. Checking in synchronizes
- * the local copy of the global checkpoint ID.
- *
- * When works are deferred, they are initially added to a processor-local
- * queue. This queue is regularly flushed to the global data, an operation
- * that occurs every time a processor may commit a checkpoint. The downside
- * of this scalability optimization is that it introduces some additional
- * latency for works that are added to a processor queue between a flush and
- * a global checkpoint.
- *
- * Interrupts and preemption must be disabled on access.
- */
-struct llsync_cpu_data {
- int registered;
- unsigned int gcid;
- struct work_queue queue0;
-};
-
-extern struct llsync_cpu_data llsync_cpu_data;
-
-static inline struct llsync_cpu_data *
-llsync_get_cpu_data(void)
-{
- return cpu_local_ptr(llsync_cpu_data);
-}
-
-static inline void
-llsync_checkin(void)
-{
- struct llsync_cpu_data *cpu_data;
- unsigned int gcid;
-
- assert(!cpu_intr_enabled());
- assert(!thread_preempt_enabled());
-
- cpu_data = llsync_get_cpu_data();
-
- if (!cpu_data->registered) {
- assert(work_queue_nr_works(&cpu_data->queue0) == 0);
- return;
- }
-
- /*
- * The global checkpoint ID obtained might be obsolete here, in which
- * case a commit will not determine that a checkpoint actually occurred.
- * This should seldom happen.
- */
- gcid = llsync_data.gcid.value;
- assert((gcid - cpu_data->gcid) <= 1);
- cpu_data->gcid = gcid;
-}
-
-#endif /* _KERN_LLSYNC_I_H */
diff --git a/kern/log2.h b/kern/log2.h
index ed12b44..762aaa7 100644
--- a/kern/log2.h
+++ b/kern/log2.h
@@ -16,6 +16,8 @@
*
*
* Integer base 2 logarithm operations.
+ *
+ * TODO Fix naming.
*/
#ifndef _KERN_LOG2_H
diff --git a/kern/macros.h b/kern/macros.h
index 2c06be5..0411da2 100644
--- a/kern/macros.h
+++ b/kern/macros.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2009, 2010, 2013 Richard Braun.
+ * Copyright (c) 2009-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,6 +19,8 @@
*
* This file is a top header in the inclusion hierarchy, and shouldn't include
* other headers that may cause circular dependencies.
+ *
+ * TODO Improve documentation.
*/
#ifndef _KERN_MACROS_H
@@ -28,10 +30,14 @@
#error "GCC 4+ required"
#endif
+#ifndef __ASSEMBLER__
+#include <stddef.h>
+#endif
+
/*
* Attributes for variables that are mostly read and seldom changed.
*/
-#define __read_mostly __section(".data.read_mostly")
+#define __read_mostly __section(".data.read_mostly")
#define MACRO_BEGIN ({
#define MACRO_END })
diff --git a/kern/mutex.c b/kern/mutex.c
index bd548b8..7f46630 100644
--- a/kern/mutex.c
+++ b/kern/mutex.c
@@ -20,11 +20,20 @@
#include <kern/thread.h>
static int __init
+mutex_bootstrap(void)
+{
+ return 0;
+}
+
+INIT_OP_DEFINE(mutex_bootstrap,
+ INIT_OP_DEP(mutex_impl_bootstrap, true),
+ INIT_OP_DEP(thread_setup_booter, true));
+
+static int __init
mutex_setup(void)
{
return 0;
}
INIT_OP_DEFINE(mutex_setup,
- INIT_OP_DEP(mutex_impl_setup, true),
- INIT_OP_DEP(thread_setup_booter, true));
+ INIT_OP_DEP(mutex_impl_setup, true));
diff --git a/kern/mutex.h b/kern/mutex.h
index f152d31..b4332de 100644
--- a/kern/mutex.h
+++ b/kern/mutex.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013-2017 Richard Braun.
+ * Copyright (c) 2013-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -37,7 +37,6 @@
#include <kern/init.h>
#include <kern/mutex_types.h>
-#include <kern/thread.h>
/*
* Initialize a mutex.
@@ -102,15 +101,18 @@ static inline void
mutex_unlock(struct mutex *mutex)
{
mutex_impl_unlock(mutex);
-
- /*
- * If this mutex was used along with a condition variable, wake up
- * a potential pending waiter.
- */
- thread_wakeup_last_cond();
}
/*
+ * Special init operation for syscnt_setup.
+ *
+ * This init operation only exists to avoid a circular dependency between
+ * syscnt_setup and mutex_setup, without giving syscnt_setup knowledge
+ * about the dependencies of mutex_setup.
+ */
+INIT_OP_DECLARE(mutex_bootstrap);
+
+/*
* This init operation provides :
* - uncontended mutex locking
*
diff --git a/kern/mutex/mutex_adaptive.c b/kern/mutex/mutex_adaptive.c
index 3e6b610..db92f9d 100644
--- a/kern/mutex/mutex_adaptive.c
+++ b/kern/mutex/mutex_adaptive.c
@@ -318,14 +318,29 @@ mutex_adaptive_unlock_slow(struct mutex *mutex)
}
static int
+mutex_adaptive_bootstrap(void)
+{
+ return 0;
+}
+
+INIT_OP_DEFINE(mutex_adaptive_bootstrap,
+ INIT_OP_DEP(thread_setup_booter, true));
+
+static int
mutex_adaptive_setup(void)
{
mutex_adaptive_setup_debug();
return 0;
}
-INIT_OP_DEFINE(mutex_adaptive_setup,
#ifdef CONFIG_MUTEX_DEBUG
+#define MUTEX_ADAPTIVE_DEBUG_INIT_OP_DEPS \
INIT_OP_DEP(syscnt_setup, true),
+#else /* CONFIG_MUTEX_DEBUG */
+#define MUTEX_ADAPTIVE_DEBUG_INIT_OP_DEPS
#endif /* CONFIG_MUTEX_DEBUG */
+
+INIT_OP_DEFINE(mutex_adaptive_setup,
+ INIT_OP_DEP(mutex_adaptive_bootstrap, true),
+ MUTEX_ADAPTIVE_DEBUG_INIT_OP_DEPS
);
diff --git a/kern/mutex/mutex_adaptive_i.h b/kern/mutex/mutex_adaptive_i.h
index a8598e6..6ff1277 100644
--- a/kern/mutex/mutex_adaptive_i.h
+++ b/kern/mutex/mutex_adaptive_i.h
@@ -133,8 +133,14 @@ mutex_impl_unlock(struct mutex *mutex)
}
}
-#define mutex_impl_setup mutex_adaptive_setup
+/*
+ * Mutex init operations. See kern/mutex.h.
+ */
+#define mutex_impl_bootstrap mutex_adaptive_bootstrap
+INIT_OP_DECLARE(mutex_adaptive_bootstrap);
+
+#define mutex_impl_setup mutex_adaptive_setup
INIT_OP_DECLARE(mutex_adaptive_setup);
#endif /* _KERN_MUTEX_ADAPTIVE_I_H */
diff --git a/kern/mutex/mutex_pi_i.h b/kern/mutex/mutex_pi_i.h
index 2d5a2b6..14f63be 100644
--- a/kern/mutex/mutex_pi_i.h
+++ b/kern/mutex/mutex_pi_i.h
@@ -65,6 +65,11 @@ mutex_impl_unlock(struct mutex *mutex)
rtmutex_unlock(&mutex->rtmutex);
}
-#define mutex_impl_setup rtmutex_setup
+/*
+ * Mutex init operations. See kern/mutex.h.
+ */
+
+#define mutex_impl_bootstrap rtmutex_bootstrap
+#define mutex_impl_setup rtmutex_setup
#endif /* _KERN_MUTEX_PI_I_H */
diff --git a/kern/mutex/mutex_plain.c b/kern/mutex/mutex_plain.c
index 8945014..f12f13f 100644
--- a/kern/mutex/mutex_plain.c
+++ b/kern/mutex/mutex_plain.c
@@ -159,14 +159,29 @@ mutex_plain_unlock_slow(struct mutex *mutex)
}
static int
+mutex_plain_bootstrap(void)
+{
+ return 0;
+}
+
+INIT_OP_DEFINE(mutex_plain_bootstrap);
+
+static int
mutex_plain_setup(void)
{
mutex_plain_setup_debug();
return 0;
}
-INIT_OP_DEFINE(mutex_plain_setup,
#ifdef CONFIG_MUTEX_DEBUG
+#define MUTEX_PLAIN_DEBUG_INIT_OP_DEPS \
INIT_OP_DEP(syscnt_setup, true),
+#else /* CONFIG_MUTEX_DEBUG */
+#define MUTEX_PLAIN_DEBUG_INIT_OP_DEPS
#endif /* CONFIG_MUTEX_DEBUG */
+
+
+INIT_OP_DEFINE(mutex_plain_setup,
+ INIT_OP_DEP(mutex_plain_bootstrap, true),
+ MUTEX_PLAIN_DEBUG_INIT_OP_DEPS
);
diff --git a/kern/mutex/mutex_plain_i.h b/kern/mutex/mutex_plain_i.h
index fe97308..74def89 100644
--- a/kern/mutex/mutex_plain_i.h
+++ b/kern/mutex/mutex_plain_i.h
@@ -127,8 +127,14 @@ mutex_impl_unlock(struct mutex *mutex)
}
}
-#define mutex_impl_setup mutex_plain_setup
+/*
+ * Mutex init operations. See kern/mutex.h.
+ */
+#define mutex_impl_bootstrap mutex_plain_bootstrap
+INIT_OP_DECLARE(mutex_plain_bootstrap);
+
+#define mutex_impl_setup mutex_plain_setup
INIT_OP_DECLARE(mutex_plain_setup);
#endif /* _KERN_MUTEX_PLAIN_I_H */
diff --git a/kern/percpu.h b/kern/percpu.h
index 87d1470..e53f4e5 100644
--- a/kern/percpu.h
+++ b/kern/percpu.h
@@ -89,7 +89,7 @@ percpu_area(unsigned int cpu)
extern void *percpu_areas[CONFIG_MAX_CPUS];
void *area;
- assert(cpu < CONFIG_MAX_CPUS);
+ assert(cpu < ARRAY_SIZE(percpu_areas));
area = percpu_areas[cpu];
assert(area != NULL);
return area;
@@ -106,13 +106,18 @@ int percpu_add(unsigned int cpu);
/*
* This init operation provides :
- * - percpu section is registered as the BSP percpu area
+ * - access to percpu variables on processor 0
*/
INIT_OP_DECLARE(percpu_bootstrap);
/*
* This init operation provides :
* - new percpu areas can be created
+ *
+ * The dependency that provides access to percpu variables on all processors
+ * is cpu_mp_probe.
+ *
+ * TODO Add percpu alias to cpu_mp_probe.
*/
INIT_OP_DECLARE(percpu_setup);
diff --git a/kern/perfmon.c b/kern/perfmon.c
index 59b7740..6c545ca 100644
--- a/kern/perfmon.c
+++ b/kern/perfmon.c
@@ -180,7 +180,7 @@ struct perfmon_cpu_pmu {
struct perfmon_cpu_pmc pmcs[PERFMON_MAX_PMCS];
};
-static struct perfmon_pmu_ops pmu_driver __read_mostly = {};
+static struct perfmon_pmu_ops pmu_driver __read_mostly;
static struct perfmon_pmu perfmon_pmu;
static struct perfmon_cpu_pmu perfmon_cpu_pmu __percpu;
diff --git a/kern/rcu.c b/kern/rcu.c
new file mode 100644
index 0000000..973d6d5
--- /dev/null
+++ b/kern/rcu.c
@@ -0,0 +1,787 @@
+/*
+ * Copyright (c) 2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation is based on the paper "Extending RCU for Realtime
+ * and Embedded Workloads" by Paul E. McKenney, Ingo Molnar, Dipankar Sarma,
+ * and Suparna Bhattacharya. Beside the mechanisms not implemented yet,
+ * such as priority boosting, the differences are described below.
+ *
+ * First, this implementation uses scalable reference counters provided
+ * by the sref module instead of per-CPU counters as described in the paper.
+ * The main benefit of this approach is the centralization of most scalability
+ * improvements in the sref module, which should propagate to all sref users,
+ * including RCU.
+ *
+ * In addition, this implementation introduces the concept of windows, where
+ * a window is a range in time to which readers may be linked. Here, a
+ * grace period is defined as the time range at the end of a window where
+ * various synchronization steps are performed to enforce the RCU guarantees.
+ * The minimum duration of a window acts as a knob allowing users to tune
+ * the behavior of the RCU system.
+ *
+ * Finally, the state machine described in the paper is updated to accommodate
+ * for windows, since grace periods don't run back-to-back to each other.
+ * Windows are regularly checked and flipped if the previous one isn't
+ * active any more. From that moment, processors may notice the global flip
+ * and perform a local flip of their work window ID. Once all processors
+ * have acknowleged the flip, it is certain that no new work may be queued
+ * on the previous window. At this point, the same occurs for the
+ * processor-local reader window ID, and once all processors have
+ * acknowleged that flip, there can be no new reader linked to the previous
+ * window. The RCU system then releases its own reference to the previous
+ * window and waits for the window reference counter to drop to 0, indicating
+ * that all readers linked to the previous window have left their read-side
+ * critical section. When this global event occurs, processors are requested
+ * to flush the works queued for the previous window, and once they all have
+ * acknowleged their flush, the window ends and becomes inactive, allowing
+ * a new grace period to occur later on.
+ *
+ * Here is an informal diagram describing this process :
+ *
+ * t ---->
+ *
+ * reader window flip ---+ +--- no more readers
+ * work window flip ------+ | | +- works flushed
+ * (grace period start) | | | | (grace period / window end)
+ * v v v v
+ * +--------------+-+-----+-+
+ * | . . . |
+ * | window 0 . . gp . |
+ * | removal . . . | reclamation
+ * +--------------+-+-----+-+-----+----+
+ * | . |
+ * | window 1 . gp |
+ * | removal . | reclamation
+ * +---------------+----+--------
+ * |
+ * | window 2 ...
+ * |
+ * +-------------
+ *
+ * TODO Improve atomic acknowledgment scalability.
+ * TODO Handle large amounts of deferred works.
+ * TODO Priority boosting of slow readers.
+ * TODO CPU registration for dyntick-friendly behavior.
+ */
+
+#include <assert.h>
+#include <stdalign.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+
+#include <kern/atomic.h>
+#include <kern/clock.h>
+#include <kern/init.h>
+#include <kern/macros.h>
+#include <kern/rcu.h>
+#include <kern/panic.h>
+#include <kern/percpu.h>
+#include <kern/spinlock.h>
+#include <kern/sref.h>
+#include <kern/syscnt.h>
+#include <kern/thread.h>
+#include <kern/timer.h>
+#include <kern/work.h>
+#include <machine/cpu.h>
+
+/*
+ * Negative close to 0 so that an overflow occurs early.
+ */
+#define RCU_WINDOW_ID_INIT_VALUE ((unsigned int)-500)
+
+/*
+ * Interval between window checking.
+ *
+ * When windows are checked, a flip occurs if the previous window isn't
+ * active any more.
+ */
+#define RCU_WINDOW_CHECK_INTERVAL_MS 10
+
+/*
+ * Grace period states.
+ *
+ * These states are only used to trigger per-CPU processing that is
+ * globally acknowleged by decrementing a global atomic counter. They
+ * do not completely represent the actual state of a grace period.
+ */
+enum rcu_gp_state {
+ RCU_GP_STATE_WORK_WINDOW_FLIP,
+ RCU_GP_STATE_READER_WINDOW_FLIP,
+ RCU_GP_STATE_WORK_FLUSH,
+};
+
+/*
+ * Per-CPU view of a window.
+ *
+ * Deferred works are scheduled when the window ends.
+ */
+struct rcu_cpu_window {
+ struct work_queue works;
+};
+
+/*
+ * Per-CPU RCU data.
+ *
+ * Each processor maintains two local window IDs. One is used as the current
+ * window ID when deferring work, the other when detecting a reader. A local
+ * flip occurs when a processor notices that the global grace period state
+ * no longer matches the local grace period state. These checks only occur
+ * on periodic events.
+ *
+ * Interrupts and preemption must be disabled when accessing local CPU data.
+ */
+struct rcu_cpu_data {
+ enum rcu_gp_state gp_state;
+ unsigned int work_wid;
+ unsigned int reader_wid;
+ struct rcu_cpu_window windows[2];
+ struct syscnt sc_nr_detected_readers;
+};
+
+/*
+ * Global window.
+ *
+ * A window is a time range that tracks read-side references. Conceptually,
+ * each reader adds a reference to the current window. In practice, references
+ * are only added when readers are detected, which occurs on a context switch
+ * (to track preempted threads) or a reader window flip (to prevent currently
+ * running readers to be linked to the next window).
+ *
+ * When a window is started, its scalable reference counter is initialized
+ * with a reference owned by the RCU system. That reference guarantees that
+ * the window remains active as long as new readers may add references,
+ * since it prevents the counter from dropping to 0. After a reader window
+ * flip, there may not be new references to the window, and the initial
+ * reference is dropped, allowing the counter to reach 0 once all detected
+ * readers leave their critical section and unreference the window they're
+ * linked to.
+ */
+struct rcu_window {
+ struct sref_counter nr_refs;
+ uint64_t start_ts;
+ bool active;
+};
+
+/*
+ * Global data.
+ *
+ * Processors regularly check the grace period state against their own,
+ * locally cached grace period state, and take action whenever they differ.
+ * False sharing is avoided by making the global grace period state fill an
+ * entire cache line on SMP.
+ *
+ * After processors notice a grace period state change, they acknowledge
+ * noticing this change by decrementing the atomic acknowledgment counter,
+ * which also fills a complete cache line on SMP in order to restrict cache
+ * line bouncing. Atomic operations on this counter are done with
+ * acquire-release ordering to enforce the memory ordering guarantees
+ * required by the implementation, as well as those provided by the public
+ * interface.
+ *
+ * In addition to the global window ID and the windows themselves, the data
+ * include a timer, used to trigger the end of windows, i.e. grace periods.
+ * Since the timer function, atomic acknowledgments, and window no-reference
+ * function chain each other, there is currently no need for a global lock.
+ */
+struct rcu_data {
+ struct {
+ alignas(CPU_L1_SIZE) enum rcu_gp_state gp_state;
+ };
+ struct {
+ alignas(CPU_L1_SIZE) unsigned int nr_acks;
+ };
+
+ unsigned int wid;
+ struct rcu_window windows[2];
+ struct timer timer;
+ struct syscnt sc_nr_windows;
+ struct syscnt sc_last_window_ms;
+ struct syscnt sc_longest_window_ms;
+};
+
+/*
+ * Structure used to implement rcu_wait().
+ */
+struct rcu_waiter {
+ struct work work;
+ struct spinlock lock;
+ struct thread *thread;
+ bool done;
+};
+
+static struct rcu_data rcu_data;
+static struct rcu_cpu_data rcu_cpu_data __percpu;
+
+static struct rcu_cpu_data *
+rcu_get_cpu_data(void)
+{
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ return cpu_local_ptr(rcu_cpu_data);
+}
+
+static enum rcu_gp_state
+rcu_data_get_gp_state(const struct rcu_data *data)
+{
+ return data->gp_state;
+}
+
+static unsigned int
+rcu_data_get_wid(const struct rcu_data *data)
+{
+ return data->wid;
+}
+
+static struct rcu_window *
+rcu_data_get_window_from_index(struct rcu_data *data, size_t index)
+{
+ assert(index < ARRAY_SIZE(data->windows));
+ return &data->windows[index];
+}
+
+static struct rcu_window *
+rcu_data_get_window(struct rcu_data *data, unsigned int wid)
+{
+ return rcu_data_get_window_from_index(data, wid & 1);
+}
+
+static void
+rcu_data_update_gp_state(struct rcu_data *data, enum rcu_gp_state gp_state)
+{
+ assert(data->nr_acks == 0);
+
+ switch (gp_state) {
+ case RCU_GP_STATE_WORK_WINDOW_FLIP:
+ assert(data->gp_state == RCU_GP_STATE_WORK_FLUSH);
+ break;
+ case RCU_GP_STATE_READER_WINDOW_FLIP:
+ assert(data->gp_state == RCU_GP_STATE_WORK_WINDOW_FLIP);
+ break;
+ case RCU_GP_STATE_WORK_FLUSH:
+ assert(data->gp_state == RCU_GP_STATE_READER_WINDOW_FLIP);
+ break;
+ default:
+ panic("rcu: invalid grace period state");
+ }
+
+ data->nr_acks = cpu_count();
+ atomic_store(&data->gp_state, gp_state, ATOMIC_RELEASE);
+}
+
+static bool
+rcu_data_check_gp_state(const struct rcu_data *data,
+ enum rcu_gp_state local_gp_state,
+ enum rcu_gp_state *global_gp_state)
+{
+ *global_gp_state = atomic_load(&data->gp_state, ATOMIC_RELAXED);
+
+ if (unlikely(local_gp_state != *global_gp_state)) {
+ atomic_fence_acquire();
+ return true;
+ }
+
+ return false;
+}
+
+static void
+rcu_window_end(struct rcu_window *window)
+{
+ assert(window->active);
+ window->active = false;
+}
+
+static void
+rcu_window_ref(struct rcu_window *window)
+{
+ sref_counter_inc(&window->nr_refs);
+}
+
+static void
+rcu_window_unref(struct rcu_window *window)
+{
+ sref_counter_dec(&window->nr_refs);
+}
+
+static uint64_t
+rcu_window_get_start_ts(const struct rcu_window *window)
+{
+ return window->start_ts;
+}
+
+static void
+rcu_window_flush(struct sref_counter *counter)
+{
+ (void)counter;
+
+ rcu_data_update_gp_state(&rcu_data, RCU_GP_STATE_WORK_FLUSH);
+}
+
+static void __init
+rcu_window_init(struct rcu_window *window)
+{
+ window->active = false;
+}
+
+static void
+rcu_window_start(struct rcu_window *window)
+{
+ assert(!window->active);
+
+ sref_counter_init(&window->nr_refs, 1, NULL, rcu_window_flush);
+ window->start_ts = clock_get_time();
+ window->active = true;
+}
+
+static bool
+rcu_window_active(const struct rcu_window *window)
+{
+ return window->active;
+}
+
+static void
+rcu_data_end_prev_window(struct rcu_data *data, uint64_t now)
+{
+ struct rcu_window *window;
+ uint64_t duration;
+
+ window = rcu_data_get_window(data, data->wid - 1);
+ duration = clock_ticks_to_ms(now - rcu_window_get_start_ts(window));
+ syscnt_set(&data->sc_last_window_ms, duration);
+
+ if (duration > syscnt_read(&data->sc_longest_window_ms)) {
+ syscnt_set(&data->sc_longest_window_ms, duration);
+ }
+
+ rcu_window_end(window);
+}
+
+static void
+rcu_data_schedule_timer(struct rcu_data *data, uint64_t now)
+{
+ uint64_t ticks;
+
+ ticks = clock_ticks_from_ms(RCU_WINDOW_CHECK_INTERVAL_MS);
+ timer_schedule(&data->timer, now + ticks);
+}
+
+static void
+rcu_data_ack_cpu(struct rcu_data *data)
+{
+ struct rcu_window *window;
+ unsigned int prev_nr_acks;
+ uint64_t now;
+
+ prev_nr_acks = atomic_fetch_sub(&data->nr_acks, 1, ATOMIC_ACQ_REL);
+
+ if (prev_nr_acks != 1) {
+ assert(prev_nr_acks != 0);
+ return;
+ }
+
+ switch (data->gp_state) {
+ case RCU_GP_STATE_WORK_WINDOW_FLIP:
+ rcu_data_update_gp_state(data, RCU_GP_STATE_READER_WINDOW_FLIP);
+ break;
+ case RCU_GP_STATE_READER_WINDOW_FLIP:
+ window = rcu_data_get_window(data, data->wid - 1);
+ rcu_window_unref(window);
+ break;
+ case RCU_GP_STATE_WORK_FLUSH:
+ now = clock_get_time();
+ rcu_data_end_prev_window(data, now);
+ rcu_data_schedule_timer(data, now);
+ break;
+ default:
+ panic("rcu: invalid grace period state");
+ }
+}
+
+static bool
+rcu_data_flip_windows(struct rcu_data *data)
+{
+ struct rcu_window *window;
+
+ window = rcu_data_get_window(data, data->wid - 1);
+
+ if (rcu_window_active(window)) {
+ return false;
+ }
+
+ rcu_window_start(window);
+ syscnt_inc(&data->sc_nr_windows);
+ data->wid++;
+ rcu_data_update_gp_state(data, RCU_GP_STATE_WORK_WINDOW_FLIP);
+ return true;
+}
+
+static void
+rcu_data_check_windows(struct timer *timer)
+{
+ struct rcu_data *data;
+ bool flipped;
+
+ data = &rcu_data;
+ flipped = rcu_data_flip_windows(data);
+
+ if (!flipped) {
+ rcu_data_schedule_timer(data, timer_get_time(timer));
+ }
+}
+
+static void __init
+rcu_data_init(struct rcu_data *data)
+{
+ data->gp_state = RCU_GP_STATE_WORK_FLUSH;
+ data->nr_acks = 0;
+ data->wid = RCU_WINDOW_ID_INIT_VALUE;
+
+ for (size_t i = 0; i < ARRAY_SIZE(data->windows); i++) {
+ rcu_window_init(rcu_data_get_window_from_index(data, i));
+ }
+
+ rcu_window_start(rcu_data_get_window(data, data->wid));
+
+ timer_init(&data->timer, rcu_data_check_windows, 0);
+ rcu_data_schedule_timer(data, clock_get_time());
+
+ syscnt_register(&data->sc_nr_windows, "rcu_nr_windows");
+ syscnt_register(&data->sc_last_window_ms, "rcu_last_window_ms");
+ syscnt_register(&data->sc_longest_window_ms, "rcu_longest_window_ms");
+}
+
+static void __init
+rcu_cpu_window_init(struct rcu_cpu_window *cpu_window)
+{
+ work_queue_init(&cpu_window->works);
+}
+
+static void
+rcu_cpu_window_queue(struct rcu_cpu_window *cpu_window, struct work *work)
+{
+ work_queue_push(&cpu_window->works, work);
+}
+
+static void
+rcu_cpu_window_flush(struct rcu_cpu_window *cpu_window)
+{
+ work_queue_schedule(&cpu_window->works, 0);
+ work_queue_init(&cpu_window->works);
+}
+
+static unsigned int
+rcu_cpu_data_get_reader_wid(const struct rcu_cpu_data *cpu_data)
+{
+ return cpu_data->reader_wid;
+}
+
+static struct rcu_cpu_window *
+rcu_cpu_data_get_window_from_index(struct rcu_cpu_data *cpu_data, size_t index)
+{
+ assert(index < ARRAY_SIZE(cpu_data->windows));
+ return &cpu_data->windows[index];
+}
+
+static struct rcu_cpu_window *
+rcu_cpu_data_get_window(struct rcu_cpu_data *cpu_data, unsigned int wid)
+{
+ return rcu_cpu_data_get_window_from_index(cpu_data, wid & 1);
+}
+
+static void __init
+rcu_cpu_data_init(struct rcu_cpu_data *cpu_data, unsigned int cpu)
+{
+ struct rcu_data *data;
+ char name[SYSCNT_NAME_SIZE];
+
+ data = &rcu_data;
+
+ cpu_data->gp_state = rcu_data_get_gp_state(data);
+ cpu_data->work_wid = rcu_data_get_wid(data);
+ cpu_data->reader_wid = cpu_data->work_wid;
+
+ for (size_t i = 0; i < ARRAY_SIZE(cpu_data->windows); i++) {
+ rcu_cpu_window_init(rcu_cpu_data_get_window_from_index(cpu_data, i));
+ }
+
+ snprintf(name, sizeof(name), "rcu_nr_detected_readers/%u", cpu);
+ syscnt_register(&cpu_data->sc_nr_detected_readers, name);
+}
+
+static void
+rcu_cpu_data_queue(struct rcu_cpu_data *cpu_data, struct work *work)
+{
+ struct rcu_cpu_window *cpu_window;
+
+ cpu_window = rcu_cpu_data_get_window(cpu_data, cpu_data->work_wid);
+ rcu_cpu_window_queue(cpu_window, work);
+}
+
+static void
+rcu_cpu_data_flush(struct rcu_cpu_data *cpu_data)
+{
+ struct rcu_cpu_window *cpu_window;
+
+ assert(cpu_data->work_wid == cpu_data->reader_wid);
+
+ cpu_window = rcu_cpu_data_get_window(cpu_data, cpu_data->work_wid - 1);
+ rcu_cpu_window_flush(cpu_window);
+}
+
+void
+rcu_reader_init(struct rcu_reader *reader)
+{
+ reader->level = 0;
+ reader->linked = false;
+}
+
+static void
+rcu_reader_link(struct rcu_reader *reader, struct rcu_cpu_data *cpu_data)
+{
+ assert(!cpu_intr_enabled());
+ assert(reader == thread_rcu_reader(thread_self()));
+ assert(!rcu_reader_linked(reader));
+
+ reader->wid = rcu_cpu_data_get_reader_wid(cpu_data);
+ reader->linked = true;
+}
+
+static void
+rcu_reader_unlink(struct rcu_reader *reader)
+{
+ assert(reader->level == 0);
+ reader->linked = false;
+}
+
+static void
+rcu_reader_enter(struct rcu_reader *reader, struct rcu_cpu_data *cpu_data)
+{
+ struct rcu_window *window;
+ struct rcu_data *data;
+ unsigned int wid;
+
+ if (rcu_reader_linked(reader)) {
+ return;
+ }
+
+ data = &rcu_data;
+ wid = rcu_cpu_data_get_reader_wid(cpu_data);
+ window = rcu_data_get_window(data, wid);
+
+ rcu_reader_link(reader, cpu_data);
+ rcu_window_ref(window);
+
+ syscnt_inc(&cpu_data->sc_nr_detected_readers);
+}
+
+void
+rcu_reader_leave(struct rcu_reader *reader)
+{
+ struct rcu_window *window;
+ struct rcu_data *data;
+
+ data = &rcu_data;
+
+ window = rcu_data_get_window(data, reader->wid);
+ rcu_window_unref(window);
+ rcu_reader_unlink(reader);
+}
+
+static void
+rcu_reader_account(struct rcu_reader *reader, struct rcu_cpu_data *cpu_data)
+{
+ if (rcu_reader_in_cs(reader)) {
+ rcu_reader_enter(reader, cpu_data);
+ }
+}
+
+static void
+rcu_cpu_data_flip_work_wid(struct rcu_cpu_data *cpu_data)
+{
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ cpu_data->work_wid++;
+}
+
+static void
+rcu_cpu_data_flip_reader_wid(struct rcu_cpu_data *cpu_data)
+{
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ rcu_reader_account(thread_rcu_reader(thread_self()), cpu_data);
+ cpu_data->reader_wid++;
+}
+
+static void
+rcu_cpu_data_check_gp_state(struct rcu_cpu_data *cpu_data)
+{
+ enum rcu_gp_state local_gp_state, global_gp_state;
+ struct rcu_data *data;
+ bool diff;
+
+ data = &rcu_data;
+
+ /*
+ * A loop is used to optimize the case where a processor is the last to
+ * acknowledge a grace period state change, in which case the latter
+ * also immediately changes and can be acknowleged right away. As a
+ * result, this loop may never run more than twice.
+ */
+ for (unsigned int i = 0; /* no condition */; i++) {
+ local_gp_state = cpu_data->gp_state;
+ diff = rcu_data_check_gp_state(data, local_gp_state, &global_gp_state);
+
+ if (!diff) {
+ break;
+ }
+
+ assert(i < 2);
+
+ switch (global_gp_state) {
+ case RCU_GP_STATE_WORK_WINDOW_FLIP:
+ rcu_cpu_data_flip_work_wid(cpu_data);
+ rcu_data_ack_cpu(data);
+ break;
+ case RCU_GP_STATE_READER_WINDOW_FLIP:
+ rcu_cpu_data_flip_reader_wid(cpu_data);
+ rcu_data_ack_cpu(data);
+ break;
+ case RCU_GP_STATE_WORK_FLUSH:
+ rcu_cpu_data_flush(cpu_data);
+ rcu_data_ack_cpu(data);
+ break;
+ default:
+ panic("rcu: invalid grace period state");
+ }
+
+ cpu_data->gp_state = global_gp_state;
+ }
+}
+
+void
+rcu_report_context_switch(struct rcu_reader *reader)
+{
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ /*
+ * Most readers don't need to be accounted for because their execution
+ * doesn't overlap with a grace period. If a reader is preempted however,
+ * it must be accounted in case a grace period starts while the reader
+ * is preempted. Accounting also occurs when a grace period starts, and
+ * more exactly, when the reader window ID of a processor is flipped.
+ */
+ rcu_reader_account(reader, rcu_get_cpu_data());
+}
+
+void
+rcu_report_periodic_event(void)
+{
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ rcu_cpu_data_check_gp_state(rcu_get_cpu_data());
+}
+
+void
+rcu_defer(struct work *work)
+{
+ struct rcu_cpu_data *cpu_data;
+ unsigned long flags;
+
+ thread_preempt_disable_intr_save(&flags);
+ cpu_data = rcu_get_cpu_data();
+ rcu_cpu_data_queue(cpu_data, work);
+ thread_preempt_enable_intr_restore(flags);
+}
+
+static void
+rcu_waiter_wakeup(struct work *work)
+{
+ struct rcu_waiter *waiter;
+
+ waiter = structof(work, struct rcu_waiter, work);
+
+ spinlock_lock(&waiter->lock);
+ waiter->done = true;
+ thread_wakeup(waiter->thread);
+ spinlock_unlock(&waiter->lock);
+}
+
+static void
+rcu_waiter_init(struct rcu_waiter *waiter, struct thread *thread)
+{
+ work_init(&waiter->work, rcu_waiter_wakeup);
+ spinlock_init(&waiter->lock);
+ waiter->thread = thread;
+ waiter->done = false;
+}
+
+static void
+rcu_waiter_wait(struct rcu_waiter *waiter)
+{
+ rcu_defer(&waiter->work);
+
+ spinlock_lock(&waiter->lock);
+
+ while (!waiter->done) {
+ thread_sleep(&waiter->lock, waiter, "rcu_wait");
+ }
+
+ spinlock_unlock(&waiter->lock);
+}
+
+void
+rcu_wait(void)
+{
+ struct rcu_waiter waiter;
+
+ rcu_waiter_init(&waiter, thread_self()),
+ rcu_waiter_wait(&waiter);
+}
+
+static int __init
+rcu_bootstrap(void)
+{
+ rcu_data_init(&rcu_data);
+ rcu_cpu_data_init(cpu_local_ptr(rcu_cpu_data), 0);
+ return 0;
+}
+
+INIT_OP_DEFINE(rcu_bootstrap,
+ INIT_OP_DEP(spinlock_setup, true),
+ INIT_OP_DEP(sref_bootstrap, true),
+ INIT_OP_DEP(syscnt_setup, true),
+ INIT_OP_DEP(thread_bootstrap, true),
+ INIT_OP_DEP(timer_bootstrap, true));
+
+static int __init
+rcu_setup(void)
+{
+ for (unsigned int i = 1; i < cpu_count(); i++) {
+ rcu_cpu_data_init(percpu_ptr(rcu_cpu_data, i), i);
+ }
+
+ return 0;
+}
+
+INIT_OP_DEFINE(rcu_setup,
+ INIT_OP_DEP(cpu_mp_probe, true),
+ INIT_OP_DEP(rcu_bootstrap, true));
diff --git a/kern/rcu.h b/kern/rcu.h
new file mode 100644
index 0000000..48079d0
--- /dev/null
+++ b/kern/rcu.h
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Read-Copy Update.
+ *
+ * This module provides a synchronization framework for read-only lockless
+ * operations, best used on read-mostly data structures.
+ *
+ * To learn more about RCU, see "Is Parallel Programming Hard, And, If So,
+ * What Can You Do About It ?" by Paul E. McKenney.
+ *
+ * A thorough discussion of RCU's requirements can be found at [1].
+ * However, the memory model and terminology used in that document being
+ * different from C11, the memory barrier guarantees are hereby translated
+ * into memory ordering guarantees :
+ *
+ * 1. Release-acquire ordering is enforced between the end of all
+ * read-side critical sections starting before a grace period starts
+ * and the start of all works deferred before the same grace period
+ * starts.
+ * 2. Release-acquire ordering is enforced between the start of all
+ * read-side critical sections ending after a grace period ends
+ * and all the work deferrals done before the same grace period
+ * starts.
+ * 3. Release-acquire ordering is enforced between all the work deferrals
+ * done before a grace period starts and the start of all works deferred
+ * before the same grace period starts.
+ *
+ * [1] https://www.kernel.org/doc/Documentation/RCU/Design/Requirements/Requirements.html.
+ */
+
+#ifndef _KERN_RCU_H
+#define _KERN_RCU_H
+
+#include <kern/atomic.h>
+#include <kern/init.h>
+#include <kern/rcu_i.h>
+#include <kern/rcu_types.h>
+#include <kern/thread.h>
+#include <kern/work.h>
+
+/*
+ * Thread-local RCU data.
+ */
+struct rcu_reader;
+
+/*
+ * Safely store a pointer.
+ *
+ * This macro enforces release ordering on the given pointer.
+ */
+#define rcu_store_ptr(ptr, value) atomic_store(&(ptr), value, ATOMIC_RELEASE)
+
+/*
+ * Safely load a pointer.
+ *
+ * This macro enforces consume ordering on the given pointer.
+ */
+#define rcu_load_ptr(ptr) atomic_load(&(ptr), ATOMIC_CONSUME)
+
+/*
+ * Read-side critical section functions.
+ *
+ * Critical sections are preemptible, may safely nest, and may safely
+ * be used in interrupt context. It is not allowed to sleep inside a
+ * read-side critical section. However, it is allowed to acquire locks
+ * that don't sleep, such as spin locks.
+ *
+ * Entering or leaving a critical section doesn't provide any memory
+ * ordering guarantee.
+ */
+
+static inline void
+rcu_read_enter(void)
+{
+ rcu_reader_inc(thread_rcu_reader(thread_self()));
+ barrier();
+}
+
+static inline void
+rcu_read_leave(void)
+{
+ barrier();
+ rcu_reader_dec(thread_rcu_reader(thread_self()));
+}
+
+/*
+ * Initialize a RCU reader.
+ */
+void rcu_reader_init(struct rcu_reader *reader);
+
+/*
+ * Report a context switch on the current processor.
+ *
+ * The argument is the RCU reader of the preempted thread.
+ *
+ * Interrupts and preemption must be disabled when calling this function.
+ */
+void rcu_report_context_switch(struct rcu_reader *reader);
+
+/*
+ * Report a periodic event on the current processor.
+ *
+ * Interrupts and preemption must be disabled when calling this function.
+ */
+void rcu_report_periodic_event(void);
+
+/*
+ * Defer a work until all existing read-side references are dropped,
+ * without blocking.
+ *
+ * The given work is scheduled when all existing read-side references
+ * are dropped.
+ */
+void rcu_defer(struct work *work);
+
+/*
+ * Wait for all existing read-side references to be dropped.
+ *
+ * This function sleeps, and may do so for a moderately long duration,
+ * at leat a few system timer ticks, sometimes a lot more.
+ */
+void rcu_wait(void);
+
+/*
+ * This init operation provides :
+ * - read-side critical sections usable
+ */
+INIT_OP_DECLARE(rcu_bootstrap);
+
+#endif /* _KERN_RCU_H */
diff --git a/kern/rcu_i.h b/kern/rcu_i.h
new file mode 100644
index 0000000..6be933d
--- /dev/null
+++ b/kern/rcu_i.h
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_RCU_I_H
+#define _KERN_RCU_I_H
+
+#include <assert.h>
+#include <stdbool.h>
+
+#include <kern/macros.h>
+#include <kern/rcu_types.h>
+#include <kern/thread.h>
+
+void rcu_reader_leave(struct rcu_reader *reader);
+
+static inline bool
+rcu_reader_in_cs(const struct rcu_reader *reader)
+{
+ return reader->level != 0;
+}
+
+static inline bool
+rcu_reader_linked(const struct rcu_reader *reader)
+{
+ assert(reader == thread_rcu_reader(thread_self()));
+ return reader->linked;
+}
+
+static inline void
+rcu_reader_inc(struct rcu_reader *reader)
+{
+ reader->level++;
+ assert(reader->level != 0);
+}
+
+static inline void
+rcu_reader_dec(struct rcu_reader *reader)
+{
+ assert(reader->level != 0);
+ reader->level--;
+
+ if (unlikely(!rcu_reader_in_cs(reader) && rcu_reader_linked(reader))) {
+ rcu_reader_leave(reader);
+ }
+}
+
+#endif /* _KERN_RCU_I_H */
diff --git a/kern/rcu_types.h b/kern/rcu_types.h
new file mode 100644
index 0000000..12b8913
--- /dev/null
+++ b/kern/rcu_types.h
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2018 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definition used to avoid inclusion circular dependencies.
+ */
+
+#ifndef _KERN_RCU_TYPES_H
+#define _KERN_RCU_TYPES_H
+
+#include <stdbool.h>
+
+/*
+ * Thread-local data used to track threads running read-side critical
+ * sections.
+ *
+ * The window ID is valid if and only if the reader is linked.
+ *
+ * Interrupts and preemption must be disabled when accessing a reader.
+ */
+struct rcu_reader {
+ unsigned int level;
+ unsigned int wid;
+ bool linked;
+};
+
+#endif /* _KERN_RCU_TYPES_H */
diff --git a/kern/rdxtree.c b/kern/rdxtree.c
index 678ec17..9e2e759 100644
--- a/kern/rdxtree.c
+++ b/kern/rdxtree.c
@@ -25,8 +25,8 @@
#include <kern/error.h>
#include <kern/init.h>
#include <kern/kmem.h>
-#include <kern/llsync.h>
#include <kern/macros.h>
+#include <kern/rcu.h>
#include <kern/rdxtree.h>
#include <kern/rdxtree_i.h>
#include <kern/work.h>
@@ -190,17 +190,7 @@ rdxtree_node_schedule_destruction(struct rdxtree_node *node)
assert(node->parent == NULL);
work_init(&node->work, rdxtree_node_destroy_deferred);
-
- /*
- * This assumes that llsync is initialized before scheduling is started
- * so that there can be no read-side reference when destroying the node.
- */
- if (!llsync_ready()) {
- rdxtree_node_destroy(node);
- return;
- }
-
- llsync_defer(&node->work);
+ rcu_defer(&node->work);
}
static inline void
@@ -238,7 +228,7 @@ rdxtree_node_insert(struct rdxtree_node *node, unsigned short index,
assert(node->entries[index] == NULL);
node->nr_entries++;
- llsync_store_ptr(node->entries[index], entry);
+ rcu_store_ptr(node->entries[index], entry);
}
static inline void
@@ -255,7 +245,7 @@ rdxtree_node_remove(struct rdxtree_node *node, unsigned short index)
assert(node->entries[index] != NULL);
node->nr_entries--;
- llsync_store_ptr(node->entries[index], NULL);
+ rcu_store_ptr(node->entries[index], NULL);
}
static inline void *
@@ -267,7 +257,7 @@ rdxtree_node_find(struct rdxtree_node *node, unsigned short *indexp)
index = *indexp;
while (index < ARRAY_SIZE(node->entries)) {
- ptr = rdxtree_entry_addr(llsync_load_ptr(node->entries[index]));
+ ptr = rdxtree_entry_addr(rcu_load_ptr(node->entries[index]));
if (ptr != NULL) {
*indexp = index;
@@ -355,7 +345,7 @@ rdxtree_shrink(struct rdxtree *tree)
rdxtree_node_unlink(rdxtree_entry_addr(entry));
}
- llsync_store_ptr(tree->root, entry);
+ rcu_store_ptr(tree->root, entry);
/*
* There is still one valid entry (the first one) in this node. It
@@ -410,7 +400,7 @@ rdxtree_grow(struct rdxtree *tree, rdxtree_key_t key)
rdxtree_node_insert(node, 0, tree->root);
tree->height++;
- llsync_store_ptr(tree->root, rdxtree_node_to_entry(node));
+ rcu_store_ptr(tree->root, rdxtree_node_to_entry(node));
root = node;
} while (new_height > tree->height);
@@ -433,7 +423,7 @@ rdxtree_cleanup(struct rdxtree *tree, struct rdxtree_node *node)
if (node->parent == NULL) {
tree->height = 0;
- llsync_store_ptr(tree->root, NULL);
+ rcu_store_ptr(tree->root, NULL);
rdxtree_node_schedule_destruction(node);
break;
}
@@ -488,7 +478,7 @@ rdxtree_insert_common(struct rdxtree *tree, rdxtree_key_t key,
return ERROR_BUSY;
}
- llsync_store_ptr(tree->root, ptr);
+ rcu_store_ptr(tree->root, ptr);
if (slotp != NULL) {
*slotp = &tree->root;
@@ -516,7 +506,7 @@ rdxtree_insert_common(struct rdxtree *tree, rdxtree_key_t key,
}
if (prev == NULL) {
- llsync_store_ptr(tree->root, rdxtree_node_to_entry(node));
+ rcu_store_ptr(tree->root, rdxtree_node_to_entry(node));
} else {
rdxtree_node_link(node, prev, index);
rdxtree_node_insert_node(prev, index, node);
@@ -565,7 +555,7 @@ rdxtree_insert_alloc_common(struct rdxtree *tree, void *ptr,
if (unlikely(height == 0)) {
if (tree->root == NULL) {
- llsync_store_ptr(tree->root, ptr);
+ rcu_store_ptr(tree->root, ptr);
*keyp = 0;
if (slotp != NULL) {
@@ -661,7 +651,7 @@ rdxtree_remove(struct rdxtree *tree, rdxtree_key_t key)
node = rdxtree_entry_addr(tree->root);
if (unlikely(height == 0)) {
- llsync_store_ptr(tree->root, NULL);
+ rcu_store_ptr(tree->root, NULL);
return node;
}
@@ -700,7 +690,7 @@ rdxtree_lookup_common(const struct rdxtree *tree, rdxtree_key_t key,
unsigned short height, shift, index;
void *entry;
- entry = llsync_load_ptr(tree->root);
+ entry = rcu_load_ptr(tree->root);
if (entry == NULL) {
node = NULL;
@@ -731,7 +721,7 @@ rdxtree_lookup_common(const struct rdxtree *tree, rdxtree_key_t key,
prev = node;
index = (unsigned short)(key >> shift) & RDXTREE_RADIX_MASK;
- entry = llsync_load_ptr(node->entries[index]);
+ entry = rcu_load_ptr(node->entries[index]);
node = rdxtree_entry_addr(entry);
shift -= RDXTREE_RADIX;
height--;
@@ -755,7 +745,7 @@ rdxtree_replace_slot(void **slot, void *ptr)
old = *slot;
assert(old != NULL);
rdxtree_assert_alignment(old);
- llsync_store_ptr(*slot, ptr);
+ rcu_store_ptr(*slot, ptr);
return old;
}
@@ -767,7 +757,7 @@ rdxtree_walk_next(struct rdxtree *tree, struct rdxtree_iter *iter)
rdxtree_key_t key;
void *entry;
- entry = llsync_load_ptr(tree->root);
+ entry = rcu_load_ptr(tree->root);
if (entry == NULL) {
return NULL;
@@ -863,7 +853,7 @@ rdxtree_remove_all(struct rdxtree *tree)
if (tree->height == 0) {
if (tree->root != NULL) {
- llsync_store_ptr(tree->root, NULL);
+ rcu_store_ptr(tree->root, NULL);
}
return;
@@ -906,4 +896,5 @@ rdxtree_setup(void)
}
INIT_OP_DEFINE(rdxtree_setup,
- INIT_OP_DEP(kmem_bootstrap, true));
+ INIT_OP_DEP(kmem_bootstrap, true),
+ INIT_OP_DEP(rcu_bootstrap, true));
diff --git a/kern/rdxtree.h b/kern/rdxtree.h
index 1430208..f19f349 100644
--- a/kern/rdxtree.h
+++ b/kern/rdxtree.h
@@ -19,8 +19,7 @@
*
* In addition to the standard insertion operation, this implementation can
* allocate keys for the caller at insertion time. It also allows lookups to
- * occur concurrently with updates through the use of lockless synchronization
- * (see the llsync module).
+ * occur concurrently with updates using RCU.
*/
#ifndef _KERN_RDXTREE_H
@@ -30,7 +29,7 @@
#include <stdint.h>
#include <kern/init.h>
-#include <kern/llsync.h>
+#include <kern/rcu.h>
typedef uint64_t rdxtree_key_t;
@@ -160,7 +159,7 @@ rdxtree_lookup_slot(const struct rdxtree *tree, rdxtree_key_t key)
static inline void *
rdxtree_load_slot(void **slot)
{
- return llsync_load_ptr(*slot);
+ return rcu_load_ptr(*slot);
}
/*
diff --git a/kern/rtmutex.c b/kern/rtmutex.c
index 36c72a3..55d17cd 100644
--- a/kern/rtmutex.c
+++ b/kern/rtmutex.c
@@ -225,14 +225,29 @@ out:
}
static int
+rtmutex_bootstrap(void)
+{
+ return 0;
+}
+
+INIT_OP_DEFINE(rtmutex_bootstrap,
+ INIT_OP_DEP(thread_setup_booter, true));
+
+static int
rtmutex_setup(void)
{
rtmutex_setup_debug();
return 0;
}
-INIT_OP_DEFINE(rtmutex_setup,
#ifdef CONFIG_MUTEX_DEBUG
+#define RTMUTEX_DEBUG_INIT_OPS \
INIT_OP_DEP(syscnt_setup, true),
+#else /* CONFIG_MUTEX_DEBUG */
+#define RTMUTEX_DEBUG_INIT_OPS
#endif /* CONFIG_MUTEX_DEBUG */
- );
+
+INIT_OP_DEFINE(rtmutex_setup,
+ INIT_OP_DEP(rtmutex_bootstrap, true),
+ RTMUTEX_DEBUG_INIT_OPS
+);
diff --git a/kern/rtmutex.h b/kern/rtmutex.h
index f8b11bd..90b3438 100644
--- a/kern/rtmutex.h
+++ b/kern/rtmutex.h
@@ -132,6 +132,10 @@ rtmutex_unlock(struct rtmutex *rtmutex)
}
}
+/*
+ * Mutex init operations. See kern/mutex.h.
+ */
+INIT_OP_DECLARE(rtmutex_bootstrap);
INIT_OP_DECLARE(rtmutex_setup);
#endif /* _KERN_RTMUTEX_H */
diff --git a/kern/sleepq.c b/kern/sleepq.c
index 0d04c6e..44ab53b 100644
--- a/kern/sleepq.c
+++ b/kern/sleepq.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -431,6 +431,16 @@ sleepq_remove_waiter(struct sleepq *sleepq, struct sleepq_waiter *waiter)
list_remove(&waiter->node);
}
+static struct sleepq_waiter *
+sleepq_get_last_waiter(struct sleepq *sleepq)
+{
+ if (list_empty(&sleepq->waiters)) {
+ return NULL;
+ }
+
+ return list_last_entry(&sleepq->waiters, struct sleepq_waiter, node);
+}
+
bool
sleepq_empty(const struct sleepq *sleepq)
{
@@ -441,7 +451,7 @@ static int
sleepq_wait_common(struct sleepq *sleepq, const char *wchan,
bool timed, uint64_t ticks)
{
- struct sleepq_waiter waiter;
+ struct sleepq_waiter waiter, *next;
struct thread *thread;
int error;
@@ -469,6 +479,17 @@ sleepq_wait_common(struct sleepq *sleepq, const char *wchan,
sleepq_remove_waiter(sleepq, &waiter);
+ /*
+ * Chain wake-ups here to prevent broadacasting from walking a list
+ * with preemption disabled. Note that this doesn't guard against
+ * the thundering herd effect for condition variables.
+ */
+ next = sleepq_get_last_waiter(sleepq);
+
+ if (next) {
+ sleepq_waiter_wakeup(next);
+ }
+
return error;
}
@@ -503,46 +524,18 @@ sleepq_signal(struct sleepq *sleepq)
sleepq_waiter_wakeup(waiter);
}
-static void
-sleepq_wakeup_common(struct sleepq *sleepq)
-{
- struct sleepq_waiter *waiter;
-
- assert(!list_empty(&sleepq->waiters));
-
- waiter = list_last_entry(&sleepq->waiters, struct sleepq_waiter, node);
- sleepq_waiter_wakeup(waiter);
-}
-
void
sleepq_broadcast(struct sleepq *sleepq)
{
struct sleepq_waiter *waiter;
- if (sleepq->oldest_waiter == NULL) {
- goto out;
- }
-
- list_for_each_entry(&sleepq->waiters, waiter, node) {
- sleepq_waiter_set_pending_wakeup(waiter);
-
- if (waiter == sleepq->oldest_waiter) {
- break;
- }
- }
-
- sleepq->oldest_waiter = NULL;
-
-out:
- sleepq_wakeup_common(sleepq);
-}
+ waiter = sleepq->oldest_waiter;
-void
-sleepq_wakeup(struct sleepq *sleepq)
-{
- if (list_empty(&sleepq->waiters)) {
+ if (!waiter) {
return;
}
- sleepq_wakeup_common(sleepq);
+ sleepq->oldest_waiter = NULL;
+ sleepq_waiter_set_pending_wakeup(waiter);
+ sleepq_waiter_wakeup(waiter);
}
diff --git a/kern/sleepq.h b/kern/sleepq.h
index 007e3f2..213925c 100644
--- a/kern/sleepq.h
+++ b/kern/sleepq.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -125,25 +125,12 @@ int sleepq_timedwait(struct sleepq *sleepq, const char *wchan, uint64_t ticks);
* At least one thread is awaken if any threads are waiting on the sleep
* queue.
*
- * A broadcast differs only by also making all currently waiting threads
- * pending for wake-up. As with sleepq_signal, a single thread may be
- * awaken. The rationale is to force users to implement "chained waking"
- * in order to avoid the thundering herd effect.
+ * Broadcasting a sleep queue wakes up all waiting threads.
*/
void sleepq_signal(struct sleepq *sleepq);
void sleepq_broadcast(struct sleepq *sleepq);
/*
- * Wake up a pending thread.
- *
- * This function may only wake up a thread pending for wake-up after a
- * broadcast. It is used to chain wake-ups to avoid the thundering herd
- * effect. If there are no threads pending for wake-up, this function
- * does nothing.
- */
-void sleepq_wakeup(struct sleepq *sleepq);
-
-/*
* This init operation provides :
* - sleepq creation
* - module fully initialized
diff --git a/kern/slist.h b/kern/slist.h
index c0dcb08..d708f38 100644
--- a/kern/slist.h
+++ b/kern/slist.h
@@ -24,8 +24,8 @@
#include <stdbool.h>
#include <stddef.h>
-#include <kern/llsync.h>
#include <kern/macros.h>
+#include <kern/rcu.h>
#include <kern/slist_types.h>
struct slist;
@@ -303,46 +303,46 @@ for (entry = slist_first_entry(list, typeof(*entry), member), \
* Return the first node of a list.
*/
static inline struct slist_node *
-slist_llsync_first(const struct slist *list)
+slist_rcu_first(const struct slist *list)
{
- return llsync_load_ptr(list->first);
+ return rcu_load_ptr(list->first);
}
/*
* Return the node next to the given node.
*/
static inline struct slist_node *
-slist_llsync_next(const struct slist_node *node)
+slist_rcu_next(const struct slist_node *node)
{
- return llsync_load_ptr(node->next);
+ return rcu_load_ptr(node->next);
}
/*
* Insert a node at the head of a list.
*/
static inline void
-slist_llsync_insert_head(struct slist *list, struct slist_node *node)
+slist_rcu_insert_head(struct slist *list, struct slist_node *node)
{
if (slist_empty(list)) {
list->last = node;
}
node->next = list->first;
- llsync_store_ptr(list->first, node);
+ rcu_store_ptr(list->first, node);
}
/*
* Insert a node at the tail of a list.
*/
static inline void
-slist_llsync_insert_tail(struct slist *list, struct slist_node *node)
+slist_rcu_insert_tail(struct slist *list, struct slist_node *node)
{
node->next = NULL;
if (slist_empty(list)) {
- llsync_store_ptr(list->first, node);
+ rcu_store_ptr(list->first, node);
} else {
- llsync_store_ptr(list->last->next, node);
+ rcu_store_ptr(list->last->next, node);
}
list->last = node;
@@ -354,11 +354,11 @@ slist_llsync_insert_tail(struct slist *list, struct slist_node *node)
* The prev node must be valid.
*/
static inline void
-slist_llsync_insert_after(struct slist *list, struct slist_node *prev,
+slist_rcu_insert_after(struct slist *list, struct slist_node *prev,
struct slist_node *node)
{
node->next = prev->next;
- llsync_store_ptr(prev->next, node);
+ rcu_store_ptr(prev->next, node);
if (list->last == prev) {
list->last = node;
@@ -373,20 +373,20 @@ slist_llsync_insert_after(struct slist *list, struct slist_node *prev,
* first node is removed.
*/
static inline void
-slist_llsync_remove(struct slist *list, struct slist_node *prev)
+slist_rcu_remove(struct slist *list, struct slist_node *prev)
{
struct slist_node *node;
if (slist_end(prev)) {
node = list->first;
- llsync_store_ptr(list->first, node->next);
+ rcu_store_ptr(list->first, node->next);
if (list->last == node) {
list->last = NULL;
}
} else {
node = prev->next;
- llsync_store_ptr(prev->next, node->next);
+ rcu_store_ptr(prev->next, node->next);
if (list->last == node) {
list->last = prev;
@@ -398,28 +398,28 @@ slist_llsync_remove(struct slist *list, struct slist_node *prev)
* Macro that evaluates to the address of the structure containing the
* given node based on the given type and member.
*/
-#define slist_llsync_entry(node, type, member) \
- structof(llsync_load_ptr(node), type, member)
+#define slist_rcu_entry(node, type, member) \
+ structof(rcu_load_ptr(node), type, member)
/*
* Get the first entry of a list.
*/
-#define slist_llsync_first_entry(list, type, member) \
+#define slist_rcu_first_entry(list, type, member) \
MACRO_BEGIN \
struct slist_node *___first; \
\
- ___first = slist_llsync_first(list); \
+ ___first = slist_rcu_first(list); \
slist_end(___first) ? NULL : slist_entry(___first, type, member); \
MACRO_END
/*
* Get the entry next to the given entry.
*/
-#define slist_llsync_next_entry(entry, member) \
+#define slist_rcu_next_entry(entry, member) \
MACRO_BEGIN \
struct slist_node *___next; \
\
- ___next = slist_llsync_next(&entry->member); \
+ ___next = slist_rcu_next(&entry->member); \
slist_end(___next) \
? NULL \
: slist_entry(___next, typeof(*entry), member); \
@@ -428,17 +428,17 @@ MACRO_END
/*
* Forge a loop to process all nodes of a list.
*/
-#define slist_llsync_for_each(list, node) \
-for (node = slist_llsync_first(list); \
+#define slist_rcu_for_each(list, node) \
+for (node = slist_rcu_first(list); \
!slist_end(node); \
- node = slist_llsync_next(node))
+ node = slist_rcu_next(node))
/*
* Forge a loop to process all entries of a list.
*/
-#define slist_llsync_for_each_entry(list, entry, member) \
-for (entry = slist_llsync_first_entry(list, typeof(*entry), member); \
+#define slist_rcu_for_each_entry(list, entry, member) \
+for (entry = slist_rcu_first_entry(list, typeof(*entry), member); \
entry != NULL; \
- entry = slist_llsync_next_entry(entry, member))
+ entry = slist_rcu_next_entry(entry, member))
#endif /* _KERN_SLIST_H */
diff --git a/kern/spinlock.c b/kern/spinlock.c
index a591c61..9857b38 100644
--- a/kern/spinlock.c
+++ b/kern/spinlock.c
@@ -139,6 +139,10 @@ void
spinlock_init(struct spinlock *lock)
{
lock->value = SPINLOCK_UNLOCKED;
+
+#ifdef SPINLOCK_TRACK_OWNER
+ lock->owner = NULL;
+#endif /* SPINLOCK_TRACK_OWNER */
}
static unsigned int
@@ -292,6 +296,7 @@ spinlock_lock_slow(struct spinlock *lock)
spinlock_store_first_qid(lock, SPINLOCK_QID_NULL);
}
+ spinlock_own(lock);
error = spinlock_try_downgrade(lock, qid);
if (!error) {
diff --git a/kern/spinlock.h b/kern/spinlock.h
index dd98cbf..30fc1e5 100644
--- a/kern/spinlock.h
+++ b/kern/spinlock.h
@@ -36,6 +36,21 @@ struct spinlock;
#define spinlock_assert_locked(lock) assert((lock)->value != SPINLOCK_UNLOCKED)
+#ifdef SPINLOCK_TRACK_OWNER
+
+static inline void
+spinlock_transfer_owner(struct spinlock *lock, struct thread *owner)
+{
+ assert(lock->owner == thread_self());
+ lock->owner = owner;
+}
+
+#else /* SPINLOCK_TRACK_OWNER */
+
+#define spinlock_transfer_owner(lock, owner)
+
+#endif /* SPINLOCK_TRACK_OWNER */
+
/*
* Initialize a spin lock.
*/
diff --git a/kern/spinlock_i.h b/kern/spinlock_i.h
index c9dcdd1..a9f5fce 100644
--- a/kern/spinlock_i.h
+++ b/kern/spinlock_i.h
@@ -26,6 +26,7 @@
#include <kern/error.h>
#include <kern/macros.h>
#include <kern/spinlock_types.h>
+#include <kern/thread.h>
#include <machine/cpu.h>
/*
@@ -36,6 +37,33 @@
#define SPINLOCK_UNLOCKED 0
#define SPINLOCK_LOCKED 1
+#ifdef CONFIG_SPINLOCK_DEBUG
+#define SPINLOCK_TRACK_OWNER
+#endif
+
+#ifdef SPINLOCK_TRACK_OWNER
+
+static inline void
+spinlock_own(struct spinlock *lock)
+{
+ assert(!lock->owner);
+ lock->owner = thread_self();
+}
+
+static inline void
+spinlock_disown(struct spinlock *lock)
+{
+ assert(lock->owner == thread_self());
+ lock->owner = NULL;
+}
+
+#else /* SPINLOCK_TRACK_OWNER */
+
+#define spinlock_own(lock)
+#define spinlock_disown(lock)
+
+#endif /* SPINLOCK_TRACK_OWNER */
+
static inline int
spinlock_lock_fast(struct spinlock *lock)
{
@@ -47,6 +75,7 @@ spinlock_lock_fast(struct spinlock *lock)
return ERROR_BUSY;
}
+ spinlock_own(lock);
return 0;
}
@@ -55,6 +84,7 @@ spinlock_unlock_fast(struct spinlock *lock)
{
unsigned int prev;
+ spinlock_disown(lock);
prev = atomic_cas_release(&lock->value, SPINLOCK_LOCKED, SPINLOCK_UNLOCKED);
if (unlikely(prev != SPINLOCK_LOCKED)) {
diff --git a/kern/spinlock_types.h b/kern/spinlock_types.h
index 7dbca37..93f85f7 100644
--- a/kern/spinlock_types.h
+++ b/kern/spinlock_types.h
@@ -21,8 +21,14 @@
#ifndef _KERN_SPINLOCK_TYPES_H
#define _KERN_SPINLOCK_TYPES_H
+struct thread;
+
struct spinlock {
unsigned int value;
+
+#ifdef CONFIG_SPINLOCK_DEBUG
+ struct thread *owner;
+#endif /* CONFIG_SPINLOCK_DEBUG */
};
#endif /* _KERN_SPINLOCK_TYPES_H */
diff --git a/kern/sref.c b/kern/sref.c
index 1ad9212..2b20cb4 100644
--- a/kern/sref.c
+++ b/kern/sref.c
@@ -99,9 +99,9 @@ struct sref_queue {
*/
struct sref_data {
struct spinlock lock;
- struct cpumap registered_cpus;
+ struct cpumap registered_cpus; /* TODO Review usage */
unsigned int nr_registered_cpus;
- struct cpumap pending_flushes;
+ struct cpumap pending_flushes; /* TODO Review usage */
unsigned int nr_pending_flushes;
unsigned int current_queue_id;
struct sref_queue queues[2];
@@ -143,18 +143,7 @@ struct sref_delta {
* prevent a race with the periodic event that drives regular flushes
* (normally the periodic timer interrupt).
*
- * Preemption must be disabled when accessing a delta cache.
- *
- * The dirty flag means there may be data to process, and is used to wake
- * up the manager thread. Marking a cache dirty is done either by regular
- * threads increasing or decreasing a delta, or the periodic event handler.
- * Clearing the dirty flag is only done by the manager thread. Disabling
- * preemption makes sure only one thread is accessing the cache at any time,
- * but it doesn't prevent the periodic handler from running. Since the handler
- * as well as regular threads set that flag, a race between them is harmless.
- * On the other hand, the handler won't access the flag if it is run while
- * the manager thread is running. Since the manager thread is already running,
- * there is no need to wake it up anyway.
+ * Interrupts and preemption must be disabled when accessing a delta cache.
*/
struct sref_cache {
struct sref_delta deltas[SREF_MAX_DELTAS];
@@ -387,6 +376,8 @@ sref_counter_schedule_review(struct sref_counter *counter)
static void
sref_counter_add(struct sref_counter *counter, unsigned long delta)
{
+ assert(!cpu_intr_enabled());
+
spinlock_lock(&counter->lock);
counter->value += delta;
@@ -547,19 +538,19 @@ sref_cache_get(void)
}
static struct sref_cache *
-sref_cache_acquire(void)
+sref_cache_acquire(unsigned long *flags)
{
struct sref_cache *cache;
- thread_preempt_disable();
+ thread_preempt_disable_intr_save(flags);
cache = sref_cache_get();
return cache;
}
static void
-sref_cache_release(void)
+sref_cache_release(unsigned long flags)
{
- thread_preempt_enable();
+ thread_preempt_enable_intr_restore(flags);
}
static bool
@@ -640,10 +631,11 @@ static void
sref_cache_flush(struct sref_cache *cache, struct sref_queue *queue)
{
struct sref_delta *delta;
+ unsigned long flags;
unsigned int cpu;
for (;;) {
- thread_preempt_disable();
+ thread_preempt_disable_intr_save(&flags);
if (list_empty(&cache->valid_deltas)) {
break;
@@ -652,9 +644,11 @@ sref_cache_flush(struct sref_cache *cache, struct sref_queue *queue)
delta = list_first_entry(&cache->valid_deltas, typeof(*delta), node);
sref_cache_remove_delta(delta);
- thread_preempt_enable();
+ thread_preempt_enable_intr_restore(flags);
}
+ cpu_intr_restore(flags);
+
cpu = cpu_id();
spinlock_lock(&sref_data.lock);
@@ -684,30 +678,15 @@ sref_cache_flush(struct sref_cache *cache, struct sref_queue *queue)
}
static void
-sref_cache_wakeup_manager(struct sref_cache *cache)
-{
- unsigned long flags;
-
- cpu_intr_save(&flags);
- thread_wakeup(cache->manager);
- cpu_intr_restore(flags);
-}
-
-/*
- * Force the manager thread of a cache to run.
- */
-static void
sref_cache_manage(struct sref_cache *cache)
{
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
sref_cache_mark_dirty(cache);
- sref_cache_wakeup_manager(cache);
+ thread_wakeup(cache->manager);
}
-/*
- * Check if a cache is dirty and wake up its manager thread if it is.
- *
- * Return true if the cache is dirty and requires maintenance.
- */
static bool
sref_cache_check(struct sref_cache *cache)
{
@@ -715,7 +694,7 @@ sref_cache_check(struct sref_cache *cache)
return false;
}
- sref_cache_wakeup_manager(cache);
+ sref_cache_manage(cache);
return true;
}
@@ -734,6 +713,7 @@ sref_review(struct sref_queue *queue)
int64_t nr_dirty, nr_revive, nr_true;
struct sref_counter *counter;
struct work_queue works;
+ unsigned long flags;
bool requeue;
int error;
@@ -745,7 +725,7 @@ sref_review(struct sref_queue *queue)
while (!sref_queue_empty(queue)) {
counter = sref_queue_pop(queue);
- spinlock_lock(&counter->lock);
+ spinlock_lock_intr_save(&counter->lock, &flags);
assert(sref_counter_is_queued(counter));
sref_counter_clear_queued(counter);
@@ -753,7 +733,7 @@ sref_review(struct sref_queue *queue)
if (counter->value != 0) {
sref_counter_clear_dirty(counter);
sref_counter_clear_dying(counter);
- spinlock_unlock(&counter->lock);
+ spinlock_unlock_intr_restore(&counter->lock, flags);
} else {
if (sref_counter_is_dirty(counter)) {
requeue = true;
@@ -772,14 +752,14 @@ sref_review(struct sref_queue *queue)
if (requeue) {
sref_counter_schedule_review(counter);
- spinlock_unlock(&counter->lock);
+ spinlock_unlock_intr_restore(&counter->lock, flags);
} else {
/*
* Keep in mind that the work structure shares memory with
* the counter data. Unlocking isn't needed here, since this
* counter is now really at 0, but do it for consistency.
*/
- spinlock_unlock(&counter->lock);
+ spinlock_unlock_intr_restore(&counter->lock, flags);
nr_true++;
work_init(&counter->work, sref_noref);
work_queue_push(&works, &counter->work);
@@ -847,6 +827,8 @@ sref_bootstrap(void)
}
INIT_OP_DEFINE(sref_bootstrap,
+ INIT_OP_DEP(cpu_setup, true),
+ INIT_OP_DEP(spinlock_setup, true),
INIT_OP_DEP(syscnt_setup, true));
static void __init
@@ -896,10 +878,10 @@ sref_setup(void)
INIT_OP_DEFINE(sref_setup,
INIT_OP_DEP(cpu_mp_probe, true),
+ INIT_OP_DEP(cpumap_setup, true),
INIT_OP_DEP(log_setup, true),
INIT_OP_DEP(panic_setup, true),
INIT_OP_DEP(sref_bootstrap, true),
- INIT_OP_DEP(syscnt_setup, true),
INIT_OP_DEP(thread_setup, true));
void
@@ -937,6 +919,7 @@ int
sref_unregister(void)
{
struct sref_cache *cache;
+ unsigned long flags;
unsigned int cpu;
bool dirty;
int error;
@@ -944,22 +927,17 @@ sref_unregister(void)
assert(!thread_preempt_enabled());
cache = sref_cache_get();
- assert(sref_cache_is_registered(cache));
- /*
- * Check the dirty flag after clearing the registered flag. The
- * periodic event handler won't set the dirty flag if the processor
- * is unregistered. It is then safe to test if that flag is set.
- * Everything involved is processor-local, therefore a simple compiler
- * barrier is enough to enforce ordering.
- */
+ cpu_intr_save(&flags);
+
+ assert(sref_cache_is_registered(cache));
sref_cache_clear_registered(cache);
- barrier();
dirty = sref_cache_check(cache);
if (dirty) {
sref_cache_mark_registered(cache);
- return ERROR_BUSY;
+ error = ERROR_BUSY;
+ goto out;
}
cpu = cpu_id();
@@ -991,6 +969,9 @@ sref_unregister(void)
spinlock_unlock(&sref_data.lock);
+out:
+ cpu_intr_restore(flags);
+
return error;
}
@@ -1012,13 +993,16 @@ sref_report_periodic_event(void)
void
sref_counter_init(struct sref_counter *counter,
+ unsigned long init_value,
struct sref_weakref *weakref,
sref_noref_fn_t noref_fn)
{
+ assert(init_value != 0);
+
counter->noref_fn = noref_fn;
spinlock_init(&counter->lock);
counter->flags = 0;
- counter->value = 1;
+ counter->value = init_value;
counter->weakref = weakref;
if (weakref) {
@@ -1040,10 +1024,11 @@ void
sref_counter_inc(struct sref_counter *counter)
{
struct sref_cache *cache;
+ unsigned long flags;
- cache = sref_cache_acquire();
+ cache = sref_cache_acquire(&flags);
sref_counter_inc_common(counter, cache);
- sref_cache_release();
+ sref_cache_release(flags);
}
void
@@ -1051,12 +1036,13 @@ sref_counter_dec(struct sref_counter *counter)
{
struct sref_cache *cache;
struct sref_delta *delta;
+ unsigned long flags;
- cache = sref_cache_acquire();
+ cache = sref_cache_acquire(&flags);
sref_cache_mark_dirty(cache);
delta = sref_cache_get_delta(cache, counter);
sref_delta_dec(delta);
- sref_cache_release();
+ sref_cache_release(flags);
}
struct sref_counter *
@@ -1064,8 +1050,9 @@ sref_weakref_get(struct sref_weakref *weakref)
{
struct sref_counter *counter;
struct sref_cache *cache;
+ unsigned long flags;
- cache = sref_cache_acquire();
+ cache = sref_cache_acquire(&flags);
counter = sref_weakref_tryget(weakref);
@@ -1073,7 +1060,7 @@ sref_weakref_get(struct sref_weakref *weakref)
sref_counter_inc_common(counter, cache);
}
- sref_cache_release();
+ sref_cache_release(flags);
return counter;
}
diff --git a/kern/sref.h b/kern/sref.h
index 768f1ee..fb62d83 100644
--- a/kern/sref.h
+++ b/kern/sref.h
@@ -31,6 +31,8 @@
#ifndef _KERN_SREF_H
#define _KERN_SREF_H
+#include <kern/init.h>
+
/*
* Scalable reference counter.
*/
@@ -77,17 +79,20 @@ void sref_report_periodic_event(void);
/*
* Initialize a scalable reference counter.
*
- * The counter is set to 1. The no-reference function is called (from thread
- * context) when it is certain that the true number of references is 0.
+ * The no-reference function is called (from thread context) when it is
+ * certain that the true number of references is 0.
*/
void sref_counter_init(struct sref_counter *counter,
+ unsigned long init_value,
struct sref_weakref *weakref,
sref_noref_fn_t noref_fn);
/*
* Counter operations.
*
- * These functions may safely be called with preemption disabled.
+ * These functions may safely be called in interrupt context.
+ *
+ * These functions imply a compiler barrier.
*/
void sref_counter_inc(struct sref_counter *counter);
void sref_counter_dec(struct sref_counter *counter);
@@ -98,8 +103,14 @@ void sref_counter_dec(struct sref_counter *counter);
* If successful, increment the reference counter before returning it.
* Otherwise return NULL.
*
- * This function may safely be called with preemption disabled.
+ * This function may safely be called in interrupt context.
*/
struct sref_counter * sref_weakref_get(struct sref_weakref *weakref);
+/*
+ * This init operation provides :
+ * - sref counter and weakref initialization and usage
+ */
+INIT_OP_DECLARE(sref_bootstrap);
+
#endif /* _KERN_SREF_H */
diff --git a/kern/sref_i.h b/kern/sref_i.h
index 27f962c..65d0c1b 100644
--- a/kern/sref_i.h
+++ b/kern/sref_i.h
@@ -51,17 +51,21 @@ struct sref_weakref {
* It's tempting to merge the flags into the node member, but since they're
* not protected by the same lock, store them separately.
*
- * TODO Locking keys.
+ * Locking keys :
+ * (c) sref_counter
+ * (g) sref_data
+ *
+ * Interrupts must be disabled when accessing a global counter.
*/
struct sref_counter {
sref_noref_fn_t noref_fn;
union {
struct {
- struct slist_node node;
+ struct slist_node node; /* (g) */
struct spinlock lock;
- int flags;
- unsigned long value;
+ int flags; /* (c) */
+ unsigned long value; /* (c) */
struct sref_weakref *weakref;
};
diff --git a/kern/syscnt.c b/kern/syscnt.c
index cd13a39..e0b213d 100644
--- a/kern/syscnt.c
+++ b/kern/syscnt.c
@@ -76,7 +76,8 @@ syscnt_setup(void)
* modules may use system counters for debugging.
*/
INIT_OP_DEFINE(syscnt_setup,
- INIT_OP_DEP(thread_setup_booter, true));
+ INIT_OP_DEP(mutex_bootstrap, true),
+ INIT_OP_DEP(spinlock_setup, true));
void __init
syscnt_register(struct syscnt *syscnt, const char *name)
diff --git a/kern/syscnt.h b/kern/syscnt.h
index 816aa4b..fb00b75 100644
--- a/kern/syscnt.h
+++ b/kern/syscnt.h
@@ -53,6 +53,12 @@ void syscnt_register(struct syscnt *syscnt, const char *name);
#ifdef ATOMIC_HAVE_64B_OPS
static inline void
+syscnt_set(struct syscnt *syscnt, uint64_t value)
+{
+ atomic_store(&syscnt->value, value, ATOMIC_RELAXED);
+}
+
+static inline void
syscnt_add(struct syscnt *syscnt, int64_t delta)
{
atomic_add(&syscnt->value, delta, ATOMIC_RELAXED);
@@ -67,6 +73,16 @@ syscnt_read(const struct syscnt *syscnt)
#else /* ATOMIC_HAVE_64B_OPS */
static inline void
+syscnt_set(struct syscnt *syscnt, uint64_t value)
+{
+ unsigned long flags;
+
+ spinlock_lock_intr_save(&syscnt->lock, &flags);
+ syscnt->value = value;
+ spinlock_unlock_intr_restore(&syscnt->lock, flags);
+}
+
+static inline void
syscnt_add(struct syscnt *syscnt, int64_t delta)
{
unsigned long flags;
@@ -114,6 +130,7 @@ void syscnt_info(const char *prefix);
/*
* This init operation provides :
* - registration of system counters
+ * - module fully initialized
*/
INIT_OP_DECLARE(syscnt_setup);
diff --git a/kern/thread.c b/kern/thread.c
index d8f0de1..9fe6434 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2017 Richard Braun.
+ * Copyright (c) 2012-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -92,17 +92,16 @@
#include <kern/atomic.h>
#include <kern/clock.h>
-#include <kern/condition.h>
#include <kern/cpumap.h>
#include <kern/error.h>
#include <kern/init.h>
#include <kern/kmem.h>
#include <kern/list.h>
-#include <kern/llsync.h>
#include <kern/macros.h>
#include <kern/panic.h>
#include <kern/percpu.h>
#include <kern/perfmon.h>
+#include <kern/rcu.h>
#include <kern/shell.h>
#include <kern/sleepq.h>
#include <kern/spinlock.h>
@@ -637,8 +636,6 @@ thread_runq_schedule(struct thread_runq *runq)
assert(!cpu_intr_enabled());
spinlock_assert_locked(&runq->lock);
- llsync_report_context_switch();
-
thread_clear_flag(prev, THREAD_YIELD);
thread_runq_put_prev(runq, prev);
@@ -657,6 +654,9 @@ thread_runq_schedule(struct thread_runq *runq)
if (likely(prev != next)) {
thread_runq_schedule_unload(prev);
+ rcu_report_context_switch(thread_rcu_reader(prev));
+ spinlock_transfer_owner(&runq->lock, next);
+
/*
* That's where the true context switch occurs. The next thread must
* unlock the run queue and reenable preemption. Note that unlocking
@@ -669,15 +669,18 @@ thread_runq_schedule(struct thread_runq *runq)
* The thread is dispatched on a processor once again.
*
* Keep in mind the system state may have changed a lot since this
- * function was called. In particular, the next thread may have been
- * destroyed, and must not be referenced any more.
+ * function was called. In particular :
+ * - The next thread may have been destroyed, and must not be
+ * referenced any more.
+ * - The current thread may have been migrated to another processor.
*/
barrier();
-
thread_runq_schedule_load(prev);
- /* The thread might have been moved to another processor */
+ next = NULL;
runq = thread_runq_local();
+ } else {
+ next = NULL;
}
assert(prev->preempt_level == THREAD_SUSPEND_PREEMPT_LEVEL);
@@ -1713,6 +1716,7 @@ thread_init_booter(unsigned int cpu)
booter->flags = 0;
booter->intr_level = 0;
booter->preempt_level = 1;
+ rcu_reader_init(&booter->rcu_reader);
cpumap_fill(&booter->cpumap);
thread_set_user_sched_policy(booter, THREAD_SCHED_POLICY_IDLE);
thread_set_user_sched_class(booter, THREAD_SCHED_CLASS_IDLE);
@@ -1836,12 +1840,11 @@ thread_init(struct thread *thread, void *stack,
}
turnstile_td_init(&thread->turnstile_td);
- thread->last_cond = NULL;
thread->propagate_priority = false;
thread->preempt_level = THREAD_SUSPEND_PREEMPT_LEVEL;
thread->pin_level = 0;
thread->intr_level = 0;
- thread->llsync_level = 0;
+ rcu_reader_init(&thread->rcu_reader);
cpumap_copy(&thread->cpumap, cpumap);
thread_set_user_sched_policy(thread, attr->policy);
thread_set_user_sched_class(thread, thread_policy_to_class(attr->policy));
@@ -2137,7 +2140,6 @@ static void
thread_idle(void *arg)
{
struct thread *self;
- int error;
(void)arg;
@@ -2145,14 +2147,6 @@ thread_idle(void *arg)
for (;;) {
thread_preempt_disable();
- error = sref_unregister();
-
- if (error) {
- assert(error == ERROR_BUSY);
- goto error_sref;
- }
-
- llsync_unregister();
for (;;) {
cpu_intr_disable();
@@ -2165,10 +2159,6 @@ thread_idle(void *arg)
cpu_idle();
}
- llsync_register();
- sref_register();
-
-error_sref:
thread_preempt_enable();
}
}
@@ -2340,23 +2330,33 @@ thread_setup(void)
return 0;
}
+#ifdef CONFIG_THREAD_STACK_GUARD
+#define THREAD_STACK_GUARD_INIT_OP_DEPS \
+ INIT_OP_DEP(vm_kmem_setup, true), \
+ INIT_OP_DEP(vm_map_setup, true), \
+ INIT_OP_DEP(vm_page_setup, true),
+#else /* CONFIG_THREAD_STACK_GUARD */
+#define THREAD_STACK_GUARD_INIT_OP_DEPS
+#endif /* CONFIG_THREAD_STACK_GUARD */
+
+#ifdef CONFIG_PERFMON
+#define THREAD_PERFMON_INIT_OP_DEPS \
+ INIT_OP_DEP(perfmon_bootstrap, true),
+#else /* CONFIG_PERFMON */
+#define THREAD_PERFMON_INIT_OP_DEPS
+#endif /* CONFIG_PERFMON */
+
INIT_OP_DEFINE(thread_setup,
INIT_OP_DEP(cpumap_setup, true),
INIT_OP_DEP(kmem_setup, true),
INIT_OP_DEP(pmap_setup, true),
-#ifdef CONFIG_PERFMON
- INIT_OP_DEP(perfmon_bootstrap, true),
-#endif
INIT_OP_DEP(sleepq_setup, true),
INIT_OP_DEP(task_setup, true),
INIT_OP_DEP(thread_bootstrap, true),
INIT_OP_DEP(turnstile_setup, true),
-#ifdef CONFIG_THREAD_STACK_GUARD
- INIT_OP_DEP(vm_kmem_setup, true),
- INIT_OP_DEP(vm_map_setup, true),
- INIT_OP_DEP(vm_page_setup, true),
-#endif
- );
+ THREAD_PERFMON_INIT_OP_DEPS
+ THREAD_STACK_GUARD_INIT_OP_DEPS
+);
void __init
thread_ap_setup(void)
@@ -2654,11 +2654,11 @@ thread_run_scheduler(void)
assert(thread == runq->current);
assert(thread->preempt_level == (THREAD_SUSPEND_PREEMPT_LEVEL - 1));
- llsync_register();
sref_register();
spinlock_lock(&runq->lock);
thread = thread_runq_get_next(thread_runq_local());
+ spinlock_transfer_owner(&runq->lock, thread);
tcb_load(&thread->tcb);
}
diff --git a/kern/thread.h b/kern/thread.h
index 8e1d85e..a6faa95 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012-2017 Richard Braun.
+ * Copyright (c) 2012-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -41,7 +41,6 @@
#include <kern/atomic.h>
#include <kern/init.h>
-#include <kern/condition.h>
#include <kern/cpumap.h>
#include <kern/kernel.h>
#include <kern/macros.h>
@@ -229,6 +228,8 @@ int thread_timedsleep(struct spinlock *interlock, const void *wchan_addr,
*
* If the target thread is NULL, the calling thread, or already in the
* running state, no action is performed and ERROR_INVAL is returned.
+ *
+ * TODO Describe memory ordering with regard to thread_sleep().
*/
int thread_wakeup(struct thread *thread);
@@ -250,6 +251,8 @@ noreturn void thread_run_scheduler(void);
* This call does nothing if preemption is disabled, or the scheduler
* determines the caller should continue to run (e.g. it's currently the only
* runnable thread).
+ *
+ * Implies a full memory barrier if a context switch occurred.
*/
void thread_yield(void);
@@ -450,48 +453,6 @@ thread_sleepq_return(struct sleepq *sleepq)
}
/*
- * Condition variable related functions.
- */
-
-static inline void
-thread_set_last_cond(struct condition *last_cond)
-{
- struct thread *thread;
-
- thread = thread_self();
- assert(thread->last_cond == NULL);
- thread->last_cond = last_cond;
-}
-
-static inline struct condition *
-thread_pull_last_cond(void)
-{
- struct condition *last_cond;
- struct thread *thread;
-
- thread = thread_self();
- last_cond = thread->last_cond;
-
- if (last_cond != NULL) {
- thread->last_cond = NULL;
- }
-
- return last_cond;
-}
-
-static inline void
-thread_wakeup_last_cond(void)
-{
- struct condition *last_cond;
-
- last_cond = thread_pull_last_cond();
-
- if (last_cond != NULL) {
- condition_wakeup(last_cond);
- }
-}
-
-/*
* Turnstile lending functions.
*/
@@ -691,38 +652,13 @@ thread_intr_leave(void)
}
/*
- * Lockless synchronization read-side critical section level control functions.
+ * RCU functions.
*/
-static inline int
-thread_llsync_in_read_cs(void)
-{
- struct thread *thread;
-
- thread = thread_self();
- return thread->llsync_level != 0;
-}
-
-static inline void
-thread_llsync_read_inc(void)
-{
- struct thread *thread;
-
- thread = thread_self();
- thread->llsync_level++;
- assert(thread->llsync_level != 0);
- barrier();
-}
-
-static inline void
-thread_llsync_read_dec(void)
+static inline struct rcu_reader *
+thread_rcu_reader(struct thread *thread)
{
- struct thread *thread;
-
- barrier();
- thread = thread_self();
- assert(thread->llsync_level != 0);
- thread->llsync_level--;
+ return &thread->rcu_reader;
}
/*
diff --git a/kern/thread_i.h b/kern/thread_i.h
index 97d037b..476d013 100644
--- a/kern/thread_i.h
+++ b/kern/thread_i.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -22,9 +22,9 @@
#include <stdbool.h>
#include <kern/atomic.h>
-#include <kern/condition_types.h>
#include <kern/cpumap.h>
#include <kern/list_types.h>
+#include <kern/rcu_types.h>
#include <kern/spinlock_types.h>
#include <kern/turnstile_types.h>
#include <machine/cpu.h>
@@ -125,15 +125,6 @@ struct thread {
/* Turnstile available for lending */
struct turnstile *priv_turnstile; /* (-) */
- /*
- * When a thread wakes up after waiting for a condition variable,
- * it sets this variable so that other waiters are only awaken
- * after the associated mutex has actually been released.
- *
- * This member is thread-local.
- */
- struct condition *last_cond; /* (-) */
-
struct turnstile_td turnstile_td; /* (t) */
/* True if priority must be propagated when preemption is reenabled */
@@ -148,8 +139,8 @@ struct thread {
/* Interrupt level, in thread context if 0 */
unsigned short intr_level; /* (-) */
- /* Read-side critical section level, not in any if 0 */
- unsigned short llsync_level; /* (-) */
+ /* RCU per-thread data */
+ struct rcu_reader rcu_reader; /* (-) */
/* Processors on which this thread is allowed to run */
struct cpumap cpumap; /* (r) */
diff --git a/kern/timer.c b/kern/timer.c
index d6d1d9d..cafe49f 100644
--- a/kern/timer.c
+++ b/kern/timer.c
@@ -410,9 +410,20 @@ timer_bucket_filter(struct timer_bucket *bucket, uint64_t now,
}
static int __init
+timer_bootstrap(void)
+{
+ timer_cpu_data_init(cpu_local_ptr(timer_cpu_data), 0);
+ return 0;
+}
+
+INIT_OP_DEFINE(timer_bootstrap,
+ INIT_OP_DEP(cpu_setup, true),
+ INIT_OP_DEP(spinlock_setup, true));
+
+static int __init
timer_setup(void)
{
- for (unsigned int cpu = 0; cpu < cpu_count(); cpu++) {
+ for (unsigned int cpu = 1; cpu < cpu_count(); cpu++) {
timer_cpu_data_init(percpu_ptr(timer_cpu_data, cpu), cpu);
}
@@ -420,10 +431,11 @@ timer_setup(void)
}
INIT_OP_DEFINE(timer_setup,
- INIT_OP_DEP(boot_setup_intr, true),
- INIT_OP_DEP(cpu_mp_probe, true));
+ INIT_OP_DEP(cpu_mp_probe, true),
+ INIT_OP_DEP(spinlock_setup, true));
-void timer_init(struct timer *timer, timer_fn_t fn, int flags)
+void
+timer_init(struct timer *timer, timer_fn_t fn, int flags)
{
timer->fn = fn;
timer->cpu = TIMER_INVALID_CPU;
diff --git a/kern/timer.h b/kern/timer.h
index ddace45..d47e5a7 100644
--- a/kern/timer.h
+++ b/kern/timer.h
@@ -99,8 +99,7 @@ void timer_report_periodic_event(void);
/*
* This init operation provides :
* - timer initialization and scheduling
- * - module fully initialized
*/
-INIT_OP_DECLARE(timer_setup);
+INIT_OP_DECLARE(timer_bootstrap);
#endif /* _KERN_TIMER_H */
diff --git a/kern/work.c b/kern/work.c
index c1bdede..d496961 100644
--- a/kern/work.c
+++ b/kern/work.c
@@ -102,7 +102,6 @@ struct work_pool {
};
static int work_thread_create(struct work_pool *pool, unsigned int id);
-static void work_thread_destroy(struct work_thread *worker);
static struct work_pool work_pool_cpu_main __percpu;
static struct work_pool work_pool_cpu_highprio __percpu;
@@ -158,7 +157,16 @@ work_pool_compute_max_threads(unsigned int nr_cpus)
}
static void __init
-work_pool_init(struct work_pool *pool, unsigned int cpu, int flags)
+work_pool_init(struct work_pool *pool)
+{
+ spinlock_init(&pool->lock);
+ work_queue_init(&pool->queue0);
+ work_queue_init(&pool->queue1);
+ pool->manager = NULL;
+}
+
+static void __init
+work_pool_build(struct work_pool *pool, unsigned int cpu, int flags)
{
char name[SYSCNT_NAME_SIZE];
const char *suffix;
@@ -180,10 +188,6 @@ work_pool_init(struct work_pool *pool, unsigned int cpu, int flags)
max_threads = work_pool_compute_max_threads(nr_cpus);
- spinlock_init(&pool->lock);
- work_queue_init(&pool->queue0);
- work_queue_init(&pool->queue1);
- pool->manager = NULL;
pool->max_threads = max_threads;
pool->nr_threads = 0;
pool->nr_available_threads = 0;
@@ -292,6 +296,13 @@ work_pool_concat_queue(struct work_pool *pool, struct work_queue *queue)
}
static void
+work_thread_destroy(struct work_thread *worker)
+{
+ thread_join(worker->thread);
+ kmem_cache_free(&work_thread_cache, worker);
+}
+
+static void
work_process(void *arg)
{
struct work_thread *self, *worker;
@@ -464,30 +475,42 @@ error_cpumap:
return error;
}
-static void
-work_thread_destroy(struct work_thread *worker)
+static int __init
+work_bootstrap(void)
{
- thread_join(worker->thread);
- kmem_cache_free(&work_thread_cache, worker);
+ work_pool_init(cpu_local_ptr(work_pool_cpu_main));
+ work_pool_init(cpu_local_ptr(work_pool_cpu_highprio));
+ return 0;
}
+INIT_OP_DEFINE(work_bootstrap,
+ INIT_OP_DEP(cpu_setup, true),
+ INIT_OP_DEP(spinlock_setup, true),
+ INIT_OP_DEP(thread_bootstrap, true));
+
static int __init
work_setup(void)
{
- unsigned int i;
-
kmem_cache_init(&work_thread_cache, "work_thread",
sizeof(struct work_thread), 0, NULL, 0);
- for (i = 0; i < cpu_count(); i++) {
- work_pool_init(percpu_ptr(work_pool_cpu_main, i), i, 0);
- work_pool_init(percpu_ptr(work_pool_cpu_highprio, i), i,
- WORK_PF_HIGHPRIO);
+ for (unsigned int i = 1; i < cpu_count(); i++) {
+ work_pool_init(percpu_ptr(work_pool_cpu_main, i));
+ work_pool_init(percpu_ptr(work_pool_cpu_highprio, i));
+ }
+
+ work_pool_init(&work_pool_main);
+ work_pool_init(&work_pool_highprio);
+
+ for (unsigned int i = 0; i < cpu_count(); i++) {
+ work_pool_build(percpu_ptr(work_pool_cpu_main, i), i, 0);
+ work_pool_build(percpu_ptr(work_pool_cpu_highprio, i), i,
+ WORK_PF_HIGHPRIO);
}
- work_pool_init(&work_pool_main, WORK_INVALID_CPU, WORK_PF_GLOBAL);
- work_pool_init(&work_pool_highprio, WORK_INVALID_CPU,
- WORK_PF_GLOBAL | WORK_PF_HIGHPRIO);
+ work_pool_build(&work_pool_main, WORK_INVALID_CPU, WORK_PF_GLOBAL);
+ work_pool_build(&work_pool_highprio, WORK_INVALID_CPU,
+ WORK_PF_GLOBAL | WORK_PF_HIGHPRIO);
log_info("work: threads per pool (per-cpu/global): %u/%u, spare: %u",
percpu_var(work_pool_cpu_main.max_threads, 0),
@@ -504,7 +527,8 @@ INIT_OP_DEFINE(work_setup,
INIT_OP_DEP(panic_setup, true),
INIT_OP_DEP(spinlock_setup, true),
INIT_OP_DEP(syscnt_setup, true),
- INIT_OP_DEP(thread_setup, true));
+ INIT_OP_DEP(thread_setup, true),
+ INIT_OP_DEP(work_bootstrap, true));
void
work_schedule(struct work *work, int flags)
diff --git a/kern/work.h b/kern/work.h
index 68801c4..ccd3c50 100644
--- a/kern/work.h
+++ b/kern/work.h
@@ -131,6 +131,9 @@ work_init(struct work *work, work_fn_t fn)
/*
* Schedule work for deferred processing.
*
+ * After being scheduled, a work queue must be reinitialized before
+ * it can be reused.
+ *
* This function may be called from interrupt context.
*/
void work_schedule(struct work *work, int flags);
@@ -148,9 +151,8 @@ void work_report_periodic_event(void);
/*
* This init operation provides :
- * - works can be scheduled
- * - module fully initialized
+ * - work / work queue initialization and scheduling
*/
-INIT_OP_DECLARE(work_setup);
+INIT_OP_DECLARE(work_bootstrap);
#endif /* _KERN_WORK_H */
diff --git a/kern/xcall.c b/kern/xcall.c
index 215c1c7..2096f61 100644
--- a/kern/xcall.c
+++ b/kern/xcall.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014-2017 Richard Braun.
+ * Copyright (c) 2014-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -32,45 +32,58 @@
#include <machine/cpu.h>
struct xcall {
- alignas(CPU_L1_SIZE) xcall_fn_t fn;
+ xcall_fn_t fn;
void *arg;
};
/*
* Per-CPU data.
*
- * Send calls are sent to remote processors. Their access is synchronized
- * by disabling preemption.
+ * The lock is used to serialize cross-calls from different processors
+ * to the same processor. It is held during the complete cross-call
+ * sequence. Inside the critical section, accesses to the receive call
+ * are used to enforce release-acquire ordering between the sending
+ * and receiving processors.
*
- * The received call points to either NULL if there is no call to process,
- * or a remote send call otherwise. The lock serializes the complete
- * inter-processor operation, i.e. setting the received call pointer,
- * communication through an IPI, and waiting for the processor to
- * acknowledge execution. By serializing interrupts, it is certain that
- * there is a 1:1 mapping between interrupts and cross-calls, allowing
- * the handler to process only one cross-call instead of iterating over
- * a queue. This way, interrupts with higher priority can be handled
- * between multiple cross-calls.
+ * Locking keys :
+ * (a) atomic
+ * (c) cpu_data
*/
struct xcall_cpu_data {
- alignas(CPU_L1_SIZE) struct xcall send_calls[CONFIG_MAX_CPUS];
-
- struct syscnt sc_sent;
- struct syscnt sc_received;
- struct xcall *recv_call;
- struct spinlock lock;
+ alignas(CPU_L1_SIZE) struct spinlock lock;
+ struct xcall *recv_call; /* (c) */
+ struct syscnt sc_sent; /* (a) */
+ struct syscnt sc_received; /* (a) */
};
static struct xcall_cpu_data xcall_cpu_data __percpu;
-static inline void
-xcall_set(struct xcall *call, xcall_fn_t fn, void *arg)
+static struct xcall_cpu_data *
+xcall_get_local_cpu_data(void)
+{
+ return cpu_local_ptr(xcall_cpu_data);
+}
+
+static struct xcall_cpu_data *
+xcall_get_cpu_data(unsigned int cpu)
+{
+ return percpu_ptr(xcall_cpu_data, cpu);
+}
+
+static void
+xcall_init(struct xcall *call, xcall_fn_t fn, void *arg)
{
call->fn = fn;
call->arg = arg;
}
static void
+xcall_process(struct xcall *call)
+{
+ call->fn(call->arg);
+}
+
+static void
xcall_cpu_data_init(struct xcall_cpu_data *cpu_data, unsigned int cpu)
{
char name[SYSCNT_NAME_SIZE];
@@ -83,20 +96,6 @@ xcall_cpu_data_init(struct xcall_cpu_data *cpu_data, unsigned int cpu)
spinlock_init(&cpu_data->lock);
}
-static struct xcall_cpu_data *
-xcall_cpu_data_get(void)
-{
- assert(!thread_preempt_enabled());
- return cpu_local_ptr(xcall_cpu_data);
-}
-
-static struct xcall *
-xcall_cpu_data_get_send_call(struct xcall_cpu_data *cpu_data, unsigned int cpu)
-{
- assert(cpu < ARRAY_SIZE(cpu_data->send_calls));
- return &cpu_data->send_calls[cpu];
-}
-
static struct xcall *
xcall_cpu_data_get_recv_call(const struct xcall_cpu_data *cpu_data)
{
@@ -122,47 +121,44 @@ xcall_setup(void)
unsigned int i;
for (i = 0; i < cpu_count(); i++) {
- xcall_cpu_data_init(percpu_ptr(xcall_cpu_data, i), i);
+ xcall_cpu_data_init(xcall_get_cpu_data(i), i);
}
return 0;
}
INIT_OP_DEFINE(xcall_setup,
+ INIT_OP_DEP(cpu_mp_probe, true),
INIT_OP_DEP(thread_bootstrap, true),
INIT_OP_DEP(spinlock_setup, true));
void
xcall_call(xcall_fn_t fn, void *arg, unsigned int cpu)
{
- struct xcall_cpu_data *local_data, *remote_data;
- struct xcall *call;
+ struct xcall_cpu_data *cpu_data;
+ struct xcall call;
assert(cpu_intr_enabled());
- assert(fn != NULL);
-
- remote_data = percpu_ptr(xcall_cpu_data, cpu);
-
- thread_preempt_disable();
+ assert(fn);
- local_data = xcall_cpu_data_get();
- call = xcall_cpu_data_get_send_call(local_data, cpu);
- xcall_set(call, fn, arg);
+ xcall_init(&call, fn, arg);
+ cpu_data = xcall_get_cpu_data(cpu);
- spinlock_lock(&remote_data->lock);
+ spinlock_lock(&cpu_data->lock);
- xcall_cpu_data_set_recv_call(remote_data, call);
+ /* Enforce release ordering on the receive call */
+ xcall_cpu_data_set_recv_call(cpu_data, &call);
cpu_send_xcall(cpu);
- syscnt_inc(&remote_data->sc_sent);
- while (xcall_cpu_data_get_recv_call(remote_data) != NULL) {
+ /* Enforce acquire ordering on the receive call */
+ while (xcall_cpu_data_get_recv_call(cpu_data) != NULL) {
cpu_pause();
}
- spinlock_unlock(&remote_data->lock);
+ spinlock_unlock(&cpu_data->lock);
- thread_preempt_enable();
+ syscnt_inc(&cpu_data->sc_sent);
}
void
@@ -173,17 +169,19 @@ xcall_intr(void)
assert(thread_check_intr_context());
- cpu_data = xcall_cpu_data_get();
+ cpu_data = xcall_get_local_cpu_data();
+ /* Enforce acquire ordering on the receive call */
call = xcall_cpu_data_get_recv_call(cpu_data);
if (call) {
- call->fn(call->arg);
+ xcall_process(call);
} else {
log_warning("xcall: spurious interrupt on cpu%u", cpu_id());
}
syscnt_inc(&cpu_data->sc_received);
+ /* Enforce release ordering on the receive call */
xcall_cpu_data_clear_recv_call(cpu_data);
}
diff --git a/kern/xcall.h b/kern/xcall.h
index df36487..4733152 100644
--- a/kern/xcall.h
+++ b/kern/xcall.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Richard Braun.
+ * Copyright (c) 2014-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -19,11 +19,15 @@
*
* This module provides the ability to run functions, called cross-calls,
* on specific processors.
+ *
+ * TODO Asynchronous cross-calls.
*/
#ifndef _KERN_XCALL_H
#define _KERN_XCALL_H
+#include <kern/init.h>
+
/*
* Type for cross-call functions.
*/
@@ -33,19 +37,28 @@ typedef void (*xcall_fn_t)(void *arg);
* Run the given cross-call function on a specific processor.
*
* The operation is completely synchronous, returning only when the function
- * has finished running on the target processor, with the side effects of
- * the function visible.
+ * has finished running on the target processor. Release-acquire ordering is
+ * enforced both before and after the function runs on the target processor,
+ * so that side effects produced by the caller are visible to the function,
+ * and vice-versa on return.
*
- * The function is run in interrupt context. Interrupts must be enabled
+ * The callback function runs in interrupt context. Interrupts must be enabled
* when calling this function.
*/
void xcall_call(xcall_fn_t fn, void *arg, unsigned int cpu);
/*
- * Report a cross-call interrupt from a remote processor.
+ * Handle a cross-call interrupt from a remote processor.
*
* Called from interrupt context.
*/
void xcall_intr(void);
+/*
+ * This init operation provides :
+ * - cross-calls are usable
+ * - module fully initialized
+ */
+INIT_OP_DECLARE(xcall_setup);
+
#endif /* _KERN_XCALL_H */
diff --git a/test/Kconfig b/test/Kconfig
index 6f3dbb4..1355c1a 100644
--- a/test/Kconfig
+++ b/test/Kconfig
@@ -9,9 +9,6 @@ if TEST_MODULE
choice
prompt "Select test module"
-config TEST_MODULE_LLSYNC_DEFER
- bool "llsync_defer"
-
config TEST_MODULE_MUTEX
bool "mutex"
select MUTEX_DEBUG
@@ -19,9 +16,6 @@ config TEST_MODULE_MUTEX
config TEST_MODULE_MUTEX_PI
bool "mutex_pi"
-config TEST_MODULE_PMAP_UPDATE_MP
- bool "pmap_update_mp"
-
config TEST_MODULE_PERFMON_CPU
bool "perfmon_cpu"
select PERFMON
@@ -34,6 +28,12 @@ config TEST_MODULE_PERFMON_TORTURE
bool "perfmon_torture"
select PERFMON
+config TEST_MODULE_PMAP_UPDATE_MP
+ bool "pmap_update_mp"
+
+config TEST_MODULE_RCU_DEFER
+ bool "rcu_defer"
+
config TEST_MODULE_SREF_DIRTY_ZEROES
bool "sref_dirty_zeroes"
diff --git a/test/Makefile b/test/Makefile
index 0a29e3a..e6526be 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -1,12 +1,12 @@
-x15_SOURCES-$(CONFIG_TEST_MODULE_LLSYNC_DEFER) += test/test_llsync_defer.c
x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX) += test/test_mutex.c
x15_SOURCES-$(CONFIG_TEST_MODULE_MUTEX_PI) += test/test_mutex_pi.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_CPU) += test/test_perfmon_cpu.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_THREAD) += test/test_perfmon_thread.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_TORTURE) += test/test_perfmon_torture.c
x15_SOURCES-$(CONFIG_TEST_MODULE_PMAP_UPDATE_MP) += test/test_pmap_update_mp.c
+x15_SOURCES-$(CONFIG_TEST_MODULE_RCU_DEFER) += test/test_rcu_defer.c
x15_SOURCES-$(CONFIG_TEST_MODULE_SREF_DIRTY_ZEROES) += test/test_sref_dirty_zeroes.c
x15_SOURCES-$(CONFIG_TEST_MODULE_SREF_NOREF) += test/test_sref_noref.c
x15_SOURCES-$(CONFIG_TEST_MODULE_SREF_WEAKREF) += test/test_sref_weakref.c
x15_SOURCES-$(CONFIG_TEST_MODULE_VM_PAGE_FILL) += test/test_vm_page_fill.c
x15_SOURCES-$(CONFIG_TEST_MODULE_XCALL) += test/test_xcall.c
-x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_CPU) += test/test_perfmon_cpu.c
-x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_THREAD) += test/test_perfmon_thread.c
-x15_SOURCES-$(CONFIG_TEST_MODULE_PERFMON_TORTURE) += test/test_perfmon_torture.c
diff --git a/test/test_mutex.c b/test/test_mutex.c
index 029e2fe..684ec1b 100644
--- a/test/test_mutex.c
+++ b/test/test_mutex.c
@@ -33,6 +33,7 @@
#include <kern/atomic.h>
#include <kern/clock.h>
#include <kern/error.h>
+#include <kern/init.h>
#include <kern/kmem.h>
#include <kern/log.h>
#include <kern/mutex.h>
@@ -161,7 +162,7 @@ test_report_syscnt(struct timer *timer)
timer_schedule(timer, time);
}
-void
+void __init
test_setup(void)
{
uint64_t time;
diff --git a/test/test_mutex_pi.c b/test/test_mutex_pi.c
index ef47048..4332a91 100644
--- a/test/test_mutex_pi.c
+++ b/test/test_mutex_pi.c
@@ -80,6 +80,7 @@
#include <kern/cpumap.h>
#include <kern/error.h>
+#include <kern/init.h>
#include <kern/mutex.h>
#include <kern/panic.h>
#include <kern/syscnt.h>
@@ -416,7 +417,7 @@ test_e(void *arg)
}
}
-void
+void __init
test_setup(void)
{
struct thread_attr attr;
diff --git a/test/test_pmap_update_mp.c b/test/test_pmap_update_mp.c
index c058323..7b6431a 100644
--- a/test/test_pmap_update_mp.c
+++ b/test/test_pmap_update_mp.c
@@ -31,6 +31,7 @@
#include <kern/condition.h>
#include <kern/cpumap.h>
#include <kern/error.h>
+#include <kern/init.h>
#include <kern/mutex.h>
#include <kern/panic.h>
#include <kern/thread.h>
@@ -94,7 +95,7 @@ test_run2(void *arg)
printf("done\n");
}
-void
+void __init
test_setup(void)
{
struct thread_attr attr;
diff --git a/test/test_llsync_defer.c b/test/test_rcu_defer.c
index c87aff7..820d050 100644
--- a/test/test_llsync_defer.c
+++ b/test/test_rcu_defer.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Richard Braun.
+ * Copyright (c) 2014-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,12 +16,12 @@
*
*
* This test module is a stress test, expected to never terminate, of the
- * work deferring functionality of the llsync module. It creates three
- * threads, a producer, a consumer, and a "peeker". The producer allocates
+ * work deferring functionality of the rcu module. It creates three
+ * threads, a producer, a consumer, and a reader. The producer allocates
* a page and writes it. It then transfers the page to the consumer, using
- * the llsync interface to update the global page pointer. Once at the
- * consumer, the llsync interface is used to defer the release of the page.
- * Concurrently, the peeker accesses the page and checks its content when
+ * the rcu interface to update the global page pointer. Once at the
+ * consumer, the rcu interface is used to defer the release of the page.
+ * Concurrently, the reader accesses the page and checks its content when
* available. These accesses are performed inside a read-side critical
* section and should therefore never fail.
*
@@ -35,10 +35,10 @@
#include <kern/condition.h>
#include <kern/error.h>
#include <kern/kmem.h>
-#include <kern/llsync.h>
#include <kern/macros.h>
#include <kern/mutex.h>
#include <kern/panic.h>
+#include <kern/rcu.h>
#include <kern/thread.h>
#include <kern/work.h>
#include <machine/page.h>
@@ -64,11 +64,11 @@ static void
test_alloc(void *arg)
{
struct test_pdsc *pdsc;
- unsigned long i;
+ unsigned long nr_loops;
(void)arg;
- i = 0;
+ nr_loops = 0;
mutex_lock(&test_lock);
@@ -87,14 +87,14 @@ test_alloc(void *arg)
}
}
- llsync_store_ptr(test_pdsc, pdsc);
+ rcu_store_ptr(test_pdsc, pdsc);
condition_signal(&test_condition);
- if ((i % TEST_LOOPS_PER_PRINT) == 0) {
+ if ((nr_loops % TEST_LOOPS_PER_PRINT) == 0) {
printf("alloc ");
}
- i++;
+ nr_loops++;
}
}
@@ -116,11 +116,11 @@ static void
test_free(void *arg)
{
struct test_pdsc *pdsc;
- unsigned long i;
+ unsigned long nr_loops;
(void)arg;
- i = 0;
+ nr_loops = 0;
mutex_lock(&test_lock);
@@ -130,20 +130,20 @@ test_free(void *arg)
}
pdsc = test_pdsc;
- llsync_store_ptr(test_pdsc, NULL);
+ rcu_store_ptr(test_pdsc, NULL);
if (pdsc != NULL) {
work_init(&pdsc->work, test_deferred_free);
- llsync_defer(&pdsc->work);
+ rcu_defer(&pdsc->work);
}
condition_signal(&test_condition);
- if ((i % TEST_LOOPS_PER_PRINT) == 0) {
+ if ((nr_loops % TEST_LOOPS_PER_PRINT) == 0) {
printf("free ");
}
- i++;
+ nr_loops++;
}
}
@@ -152,36 +152,36 @@ test_read(void *arg)
{
const struct test_pdsc *pdsc;
const unsigned char *s;
- unsigned long i, j;
+ unsigned long nr_loops;
(void)arg;
- i = 0;
+ nr_loops = 0;
for (;;) {
- llsync_read_enter();
+ rcu_read_enter();
- pdsc = llsync_load_ptr(test_pdsc);
+ pdsc = rcu_load_ptr(test_pdsc);
if (pdsc != NULL) {
s = (const unsigned char *)pdsc->addr;
if (s != NULL) {
- for (j = 0; j < PAGE_SIZE; j++) {
- if (s[j] != TEST_VALIDATION_BYTE) {
+ for (unsigned int i = 0; i < PAGE_SIZE; i++) {
+ if (s[i] != TEST_VALIDATION_BYTE) {
panic("invalid content");
}
}
- if ((i % TEST_LOOPS_PER_PRINT) == 0) {
+ if ((nr_loops % TEST_LOOPS_PER_PRINT) == 0) {
printf("read ");
}
- i++;
+ nr_loops++;
}
}
- llsync_read_exit();
+ rcu_read_leave();
}
}
diff --git a/test/test_sref_dirty_zeroes.c b/test/test_sref_dirty_zeroes.c
index be4c491..5318335 100644
--- a/test/test_sref_dirty_zeroes.c
+++ b/test/test_sref_dirty_zeroes.c
@@ -31,6 +31,7 @@
#include <kern/condition.h>
#include <kern/error.h>
+#include <kern/init.h>
#include <kern/kmem.h>
#include <kern/macros.h>
#include <kern/mutex.h>
@@ -101,7 +102,7 @@ test_noref(struct sref_counter *counter)
panic("0 references, page released\n");
}
-void
+void __init
test_setup(void)
{
struct thread_attr attr;
@@ -111,7 +112,7 @@ test_setup(void)
condition_init(&test_condition);
mutex_init(&test_lock);
- sref_counter_init(&test_counter, NULL, test_noref);
+ sref_counter_init(&test_counter, 1, NULL, test_noref);
test_transient_ref = 0;
thread_attr_init(&attr, THREAD_KERNEL_PREFIX "test_inc");
diff --git a/test/test_sref_noref.c b/test/test_sref_noref.c
index f556d32..40f1d8f 100644
--- a/test/test_sref_noref.c
+++ b/test/test_sref_noref.c
@@ -37,6 +37,7 @@
#include <kern/condition.h>
#include <kern/error.h>
+#include <kern/init.h>
#include <kern/kmem.h>
#include <kern/macros.h>
#include <kern/mutex.h>
@@ -142,7 +143,7 @@ test_run(void *arg)
panic("vm_kmem_alloc: %s", error_str(ERROR_NOMEM));
}
- sref_counter_init(&obj->ref_counter, NULL, test_obj_noref);
+ sref_counter_init(&obj->ref_counter, 1, NULL, test_obj_noref);
printf("page allocated, 1 reference, publishing\n");
@@ -168,7 +169,7 @@ test_run(void *arg)
kmem_free(threads, sizeof(*threads) * nr_threads);
}
-void
+void __init
test_setup(void)
{
struct thread_attr attr;
diff --git a/test/test_sref_weakref.c b/test/test_sref_weakref.c
index 85d4cbe..d1d4407 100644
--- a/test/test_sref_weakref.c
+++ b/test/test_sref_weakref.c
@@ -36,6 +36,7 @@
#include <stdio.h>
#include <kern/error.h>
+#include <kern/init.h>
#include <kern/macros.h>
#include <kern/sref.h>
#include <kern/syscnt.h>
@@ -67,7 +68,7 @@ test_run(void *arg)
continue;
}
- sref_counter_init(counter, &test_weakref, test_noref);
+ sref_counter_init(counter, 1, &test_weakref, test_noref);
sref_counter_dec(counter);
for (j = 0; j < 0x20000000; j++);
@@ -101,7 +102,7 @@ test_ref(void *arg)
}
}
-void
+void __init
test_setup(void)
{
struct thread_attr attr;
diff --git a/test/test_vm_page_fill.c b/test/test_vm_page_fill.c
index 0b4278b..2f6e644 100644
--- a/test/test_vm_page_fill.c
+++ b/test/test_vm_page_fill.c
@@ -28,6 +28,7 @@
#include <kern/cpumap.h>
#include <kern/error.h>
+#include <kern/init.h>
#include <kern/list.h>
#include <kern/thread.h>
#include <machine/page.h>
@@ -134,7 +135,7 @@ test_run(void *arg)
}
}
-void
+void __init
test_setup(void)
{
struct thread_attr attr;
diff --git a/test/test_xcall.c b/test/test_xcall.c
index cd33594..c2fc54b 100644
--- a/test/test_xcall.c
+++ b/test/test_xcall.c
@@ -25,43 +25,50 @@
#include <stddef.h>
#include <stdio.h>
-#include <kern/error.h>
#include <kern/cpumap.h>
+#include <kern/error.h>
+#include <kern/init.h>
#include <kern/log.h>
#include <kern/panic.h>
#include <kern/thread.h>
#include <kern/xcall.h>
#include <test/test.h>
-static bool test_done;
+struct test_data {
+ unsigned int cpu;
+ bool done;
+};
static void
test_fn(void *arg)
{
- uintptr_t cpu;
+ struct test_data *data;
assert(thread_interrupted());
- cpu = (uintptr_t)arg;
+ data = arg;
- if (cpu != cpu_id()) {
- panic("invalid cpu");
+ if (data->cpu != cpu_id()) {
+ panic("test: invalid cpu");
}
log_info("function called, running on cpu%u\n", cpu_id());
- test_done = true;
+ data->done = true;
}
static void
test_once(unsigned int cpu)
{
- test_done = false;
+ struct test_data data;
+
+ data.cpu = cpu;
+ data.done = false;
log_info("cross-call: cpu%u -> cpu%u:\n", cpu_id(), cpu);
- xcall_call(test_fn, (void *)(uintptr_t)cpu, cpu);
+ xcall_call(test_fn, &data, cpu);
- if (!test_done) {
- panic("test_done false");
+ if (!data.done) {
+ panic("test: xcall failed");
}
}
@@ -70,7 +77,7 @@ test_run_cpu(void *arg)
{
(void)arg;
- for (unsigned int i = (cpu_count() - 1); i < cpu_count(); i++) {
+ for (unsigned int i = (cpu_count() - 1); i < cpu_count(); i--) {
test_once(i);
}
}
@@ -116,7 +123,7 @@ test_run(void *arg)
log_info("done\n");
}
-void
+void __init
test_setup(void)
{
struct thread_attr attr;
diff --git a/tools/build_configs.py b/tools/build_configs.py
index 42124a9..04f278c 100755
--- a/tools/build_configs.py
+++ b/tools/build_configs.py
@@ -37,26 +37,33 @@ def gen_configs_values_str(options_dict):
# to all generated strings.
def gen_cc_options_list(options_dict):
return map(lambda x: '%s -Werror' % x, gen_configs_values_str(options_dict))
+def gen_exclusive_boolean_filter(args):
+ enabled_option, options_list = args
+ filter = dict()
-# Check whether a filter prototype is valid.
-#
-# A filter prototype is a list of (name, value) pairs used to build a filter.
-# Keep in mind that a valid filter must match invalid configurations. As a
-# result, a filter prototype is valid if and only if
-# - all options are included in the given list, and
-# - the number of enabled options is not 1
-def check_exclusive_boolean_filter(prototype):
- return (len(set(map(lambda x: x[0], prototype))) == len(prototype)
- and len(filter(lambda x: x[1][1] == 'y', prototype)) != 1)
-
-# Generate a list of filters on a list of boolean options.
+ for option in options_list:
+ if option == enabled_option:
+ value = [True, 'y']
+ else:
+ value = [True, 'n']
+
+ filter.update({option : value})
+
+ return filter
+
+# Generate a list of passing filters on a list of boolean options.
#
-# The resulting filters match configurations that don't have one and only
-# one of the given options enabled.
-def gen_exclusive_boolean_filters_list(options_list):
- product = itertools.product(options_list, [[True, 'y'], [True, 'n']])
- prototypes = list(itertools.combinations(product, len(options_list)))
- return map(dict, filter(check_exclusive_boolean_filter, prototypes))
+# The resulting filters match configurations that have only one of the given
+# options enabled, unless all_disabled is true, in which case an additional
+# filter is generated to match configurations where none of the options
+# are enabled.
+def gen_exclusive_boolean_filters_list(options_list, all_disabled=False):
+ option_and_options = map(lambda x: (x, options_list), options_list)
+
+ if all_disabled:
+ option_and_options += [(None, options_list)]
+
+ return map(gen_exclusive_boolean_filter, option_and_options)
# Dictionary of compiler options.
#
@@ -96,18 +103,48 @@ large_options_dict.update({
'CONFIG_THREAD_STACK_GUARD' : ['y', 'n'],
})
+test_list = [
+ 'CONFIG_TEST_MODULE_MUTEX',
+ 'CONFIG_TEST_MODULE_MUTEX_PI',
+ 'CONFIG_TEST_MODULE_PMAP_UPDATE_MP',
+ 'CONFIG_TEST_MODULE_RCU_DEFER',
+ 'CONFIG_TEST_MODULE_SREF_DIRTY_ZEROES',
+ 'CONFIG_TEST_MODULE_SREF_NOREF',
+ 'CONFIG_TEST_MODULE_SREF_WEAKREF',
+ 'CONFIG_TEST_MODULE_VM_PAGE_FILL',
+ 'CONFIG_TEST_MODULE_XCALL',
+]
+
+test_options_dict = dict(small_options_dict)
+
+for test in test_list:
+ test_options_dict.update({test : ['y', 'n']})
+
all_options_sets = {
'small' : small_options_dict,
'large' : large_options_dict,
+ 'test' : test_options_dict,
}
-# List of filters used to determine valid configurations.
+# Filters.
#
-# Each entry is a dictionary of options. The key matches an option name
-# whereas the value is a [match_flag, string/regular expression] list.
-# The match flag is true if the matching expression must match, false
-# otherwise.
-all_filters_list = [
+# A filter is a list of dictionaries of options. For each dictionary, the
+# key matches an option name whereas the value is a
+# [match_flag, string/regular expression] list. The match flag is true if
+# the matching expression must match, false otherwise.
+#
+# Passing filters are used to allow configurations that match the filters,
+# whereras blocking filters allow configurations that do not match.
+
+passing_filters_list = gen_exclusive_boolean_filters_list([
+ 'CONFIG_MUTEX_ADAPTIVE',
+ 'CONFIG_MUTEX_PI',
+ 'CONFIG_MUTEX_PLAIN',
+])
+passing_filters_list += gen_exclusive_boolean_filters_list(test_list,
+ all_disabled=True)
+
+blocking_filters_list = [
# XXX Clang currently cannot build the kernel with LTO.
{
'CONFIG_CC_EXE' : [True, 'clang'],
@@ -126,11 +163,6 @@ all_filters_list = [
'CONFIG_X86_PAE' : [True, 'y'],
},
]
-all_filters_list += gen_exclusive_boolean_filters_list([
- 'CONFIG_MUTEX_ADAPTIVE',
- 'CONFIG_MUTEX_PI',
- 'CONFIG_MUTEX_PLAIN'
-])
def gen_config_line(config_entry):
name, value = config_entry
@@ -183,34 +215,73 @@ def test_config(args):
return [retval, buildtree]
-# Return true if a filter doesn't completely match a configuration.
+# Return true if a filter completely matches a configuration.
def check_filter(config_dict, filter_dict):
for name, value in filter_dict.iteritems():
if not name in config_dict:
- return True
+ return False
if isinstance(value[1], str):
if value[0] != (config_dict[name] == value[1]):
- return True
+ return False
else:
if value[0] != bool(value[1].match(config_dict[name])):
- return True
+ return False
+
+ return True
+
+def check_filter_relevant(config_dict, filter_dict):
+ for name, value in filter_dict.iteritems():
+ if name in config_dict:
+ return True
+
+ return False
+
+def check_filters_list_relevant(config_dict, filters_list):
+ for filter_dict in filters_list:
+ if check_filter_relevant(config_dict, filter_dict):
+ return True
+
+ return False
+
+# Return true if a configuration doesn't pass any given filter.
+#
+# If the given filters list is irrelevant, i.e. it applies to none of
+# the options in the given configuration, the filters are considered
+# to match.
+def check_passing_filters(args):
+ config_dict, filters_list = args
+
+ if not check_filters_list_relevant(config_dict, filters_list):
+ return True
+
+ for filter_dict in filters_list:
+ if check_filter(config_dict, filter_dict):
+ return True
return False
# Return true if a configuration passes all the given filters.
-def check_filters(args):
+def check_blocking_filters(args):
config_dict, filters_list = args
for filter_dict in filters_list:
- if (not check_filter(config_dict, filter_dict)):
+ if check_filter(config_dict, filter_dict):
return False
return True
-def filter_configs_list(configs_list, filters_list):
- configs_and_filters = map(lambda x: (x, filters_list), configs_list)
- return map(lambda x: x[0], filter(check_filters, configs_and_filters))
+def filter_configs_list(configs_list, passing_filters_list,
+ blocking_filters_list):
+ configs_and_filters = map(lambda x: (x, passing_filters_list),
+ configs_list)
+ configs_list = map(lambda x: x[0], filter(check_passing_filters,
+ configs_and_filters))
+ configs_and_filters = map(lambda x: (x, blocking_filters_list),
+ configs_list)
+ configs_list = map(lambda x: x[0], filter(check_blocking_filters,
+ configs_and_filters))
+ return configs_list
def find_options_dict(options_sets, name):
if name not in options_sets:
@@ -250,7 +321,8 @@ def main():
print 'set: ' + args.set
configs_list = filter_configs_list(gen_configs_list(options_dict),
- all_filters_list)
+ passing_filters_list,
+ blocking_filters_list)
nr_configs = len(configs_list)
print 'total: ' + str(nr_configs)
diff --git a/vm/vm_object.c b/vm/vm_object.c
index 707008e..2cb4eed 100644
--- a/vm/vm_object.c
+++ b/vm/vm_object.c
@@ -24,8 +24,8 @@
#include <stdint.h>
#include <kern/init.h>
-#include <kern/llsync.h>
#include <kern/mutex.h>
+#include <kern/rcu.h>
#include <kern/rdxtree.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
@@ -132,7 +132,7 @@ vm_object_lookup(struct vm_object *object, uint64_t offset)
struct vm_page *page;
int error;
- llsync_read_enter();
+ rcu_read_enter();
do {
page = rdxtree_lookup(&object->pages, vm_page_btop(offset));
@@ -144,7 +144,7 @@ vm_object_lookup(struct vm_object *object, uint64_t offset)
error = vm_page_tryref(page);
} while (error);
- llsync_read_exit();
+ rcu_read_leave();
return page;
}