summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefrag.am7
-rw-r--r--arch/x86/machine/boot.c2
-rw-r--r--configure.ac3
-rw-r--r--kern/kernel.c2
-rw-r--r--kern/sref.c885
-rw-r--r--kern/sref.h104
-rw-r--r--kern/sref_i.h48
-rw-r--r--kern/thread.c17
-rw-r--r--test/test_sref_noref.c175
9 files changed, 1242 insertions, 1 deletions
diff --git a/Makefrag.am b/Makefrag.am
index 75f5828b..63d3c258 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -42,6 +42,9 @@ x15_SOURCES += \
kern/spinlock_i.h \
kern/sprintf.c \
kern/sprintf.h \
+ kern/sref.c \
+ kern/sref.h \
+ kern/sref_i.h \
kern/stddef.h \
kern/stdint.h \
kern/string.c \
@@ -78,6 +81,10 @@ if TEST_PMAP_UPDATE_MP
x15_SOURCES += test/test_pmap_update_mp.c
endif TEST_PMAP_UPDATE_MP
+if TEST_SREF_NOREF
+x15_SOURCES += test/test_sref_noref.c
+endif TEST_SREF_NOREF
+
if TEST_X86_PMAP_REMOVE_PTP
x15_SOURCES += test/test_x86_pmap_remove_ptp.c
endif TEST_X86_PMAP_REMOVE_PTP
diff --git a/arch/x86/machine/boot.c b/arch/x86/machine/boot.c
index 5cc90ff6..09b57140 100644
--- a/arch/x86/machine/boot.c
+++ b/arch/x86/machine/boot.c
@@ -51,6 +51,7 @@
#include <kern/param.h>
#include <kern/percpu.h>
#include <kern/printk.h>
+#include <kern/sref.h>
#include <kern/stddef.h>
#include <kern/stdint.h>
#include <kern/string.h>
@@ -274,6 +275,7 @@ boot_main(void)
cpu_setup();
thread_bootstrap();
pmap_bootstrap();
+ sref_bootstrap();
cga_setup();
printk_setup();
boot_show_version();
diff --git a/configure.ac b/configure.ac
index 91dd6eb8..5bdd541b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -46,6 +46,7 @@ m4_define([ENABLE_TEST_MODULE],
[AS_CASE(["$enable_test_module"],
[llsync_defer], [test_llsync_defer=yes],
[pmap_update_mp], [test_pmap_update_mp=yes],
+ [sref_noref], [test_sref_noref=yes],
[x86_pmap_remove_ptp],
[test_x86_pmap_remove_ptp=yes
AC_MSG_NOTICE([forcing max_cpus to 1])
@@ -60,6 +61,8 @@ AM_CONDITIONAL([TEST_LLSYNC_DEFER],
[test x"$test_llsync_defer" = xyes])
AM_CONDITIONAL([TEST_PMAP_UPDATE_MP],
[test x"$test_pmap_update_mp" = xyes])
+AM_CONDITIONAL([TEST_SREF_NOREF],
+ [test x"$test_sref_noref" = xyes])
AM_CONDITIONAL([TEST_X86_PMAP_REMOVE_PTP],
[test x"$test_x86_pmap_remove_ptp" = xyes])
diff --git a/kern/kernel.c b/kern/kernel.c
index 069649de..462fa9d9 100644
--- a/kern/kernel.c
+++ b/kern/kernel.c
@@ -20,6 +20,7 @@
#include <kern/kernel.h>
#include <kern/llsync.h>
#include <kern/percpu.h>
+#include <kern/sref.h>
#include <kern/task.h>
#include <kern/thread.h>
#include <kern/work.h>
@@ -41,6 +42,7 @@ kernel_main(void)
thread_setup();
work_setup();
llsync_setup();
+ sref_setup();
vm_page_info();
#ifdef RUN_TEST_MODULE
diff --git a/kern/sref.c b/kern/sref.c
new file mode 100644
index 00000000..c10dbc97
--- /dev/null
+++ b/kern/sref.c
@@ -0,0 +1,885 @@
+/*
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation is based on the paper "RadixVM: Scalable address
+ * spaces for multithreaded applications" by Austin T. Clements,
+ * M. Frans Kaashoek, and Nickolai Zeldovich. Specifically, it implements
+ * the Refcache component described in the paper, with a few differences
+ * outlined below.
+ *
+ * For now, this module doesn't provide weak references.
+ *
+ * Refcache synchronizes access to delta caches by disabling preemption.
+ * That behaviour is realtime-unfriendly because of the large number of
+ * deltas in a cache. This module uses dedicated manager threads to
+ * perform cache flushes and review queue processing, and mutexes to
+ * synchronize cache access.
+ *
+ * In addition, Refcache normally requires all processors to regularly
+ * process their local data. That behaviour is dyntick-unfriendly. As a
+ * result, this module handles processor registration so that processors
+ * that aren't participating in reference counting (e.g. because they're
+ * idling) don't prevent others from progressing. Instead of per-processor
+ * review queues, there is one global review queue which can be managed
+ * from any processor. Review queue access should still be considerably
+ * infrequent in practice, keeping the impact on contention low.
+ *
+ * Locking protocol : cache-> counter -> global data
+ */
+
+#include <kern/assert.h>
+#include <kern/condition.h>
+#include <kern/cpumap.h>
+#include <kern/error.h>
+#include <kern/evcnt.h>
+#include <kern/init.h>
+#include <kern/macros.h>
+#include <kern/mutex.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/percpu.h>
+#include <kern/spinlock.h>
+#include <kern/sprintf.h>
+#include <kern/sref.h>
+#include <kern/sref_i.h>
+#include <kern/stddef.h>
+#include <kern/thread.h>
+#include <machine/cpu.h>
+
+/*
+ * Maximum number of deltas per cache.
+ */
+#define SREF_MAX_DELTAS 4096
+
+#ifdef __LP64__
+#define SREF_HASH_SHIFT 3
+#else /* __LP64__ */
+#define SREF_HASH_SHIFT 2
+#endif /* __LP64__ */
+
+/*
+ * Number of counters in review queue beyond which to issue a warning.
+ */
+#define SREF_NR_COUNTERS_WARN 10000
+
+struct sref_queue {
+ struct sref_counter *first;
+ struct sref_counter *last;
+ unsigned long size;
+};
+
+/*
+ * Global data.
+ *
+ * If there is a pending flush, its associated CPU must be registered.
+ * Notwithstanding transient states, the number of pending flushes is 0
+ * if and only if no processor is registered, in which case the sref
+ * module, and probably the whole system, is completely idle.
+ *
+ * The review queue is implemented with two queues of counters. Each
+ * queue is named with a number that matches the number of epochs that
+ * occurred since their counters were added.
+ */
+struct sref_data {
+ struct spinlock lock;
+ struct cpumap registered_cpus;
+ unsigned int nr_registered_cpus;
+ struct cpumap pending_flushes;
+ unsigned int nr_pending_flushes;
+ struct sref_queue queue0;
+ struct sref_queue queue1;
+ struct evcnt ev_epoch;
+ struct evcnt ev_dirty_zero;
+ struct evcnt ev_true_zero;
+ int no_warning;
+};
+
+/*
+ * Temporary difference to apply on a reference counter.
+ *
+ * Deltas are stored in per-processor caches and added to their global
+ * counter when flushed. A delta is valid if and only if the counter it
+ * points to isn't NULL. If the value of a delta is not zero, the delta
+ * must be valid.
+ */
+struct sref_delta {
+ struct sref_counter *counter;
+ unsigned long value;
+};
+
+/*
+ * Per-processor cache of deltas.
+ *
+ * Delta caches are implemented with hash tables for quick ref count to
+ * delta lookups. For now, a very simple replacement policy, similar to
+ * that described in the RadixVM paper, is used. Improve with an LRU-like
+ * algorithm if this turns out to be a problem.
+ *
+ * Manager threads periodically flush deltas and process the review queue.
+ * Waking up a manager thread must be done with interrupts disabled to
+ * prevent a race with the periodic event that drives regular flushes
+ * (normally the periodic timer interrupt).
+ *
+ * Changing the registered flag is done with preemption disabled. The
+ * periodic event handler checks whether the current processor is
+ * registered, and can do so at any time. This race is harmless.
+ *
+ * The dirty flag means there may be data to process, and is used to wake
+ * up the manager thread. Marking a cache dirty is done either by regular
+ * threads increasing or decreasing a delta, or the periodic event handler.
+ * Clearing the dirty flag is only done by the manager thread. The cache
+ * lock makes sure only one thread is accessing the cache at any time, but
+ * it doesn't prevent the periodic handler from running. Since the handler
+ * as well as regular threads set that flag, a race between them is harmless.
+ * On the other hand, the handler won't access the flag if it is run while
+ * the manager thread is running. Since the manager thread is already running,
+ * there is no need to wake it up anyway.
+ */
+struct sref_cache {
+ struct mutex lock;
+ struct sref_delta deltas[SREF_MAX_DELTAS];
+ struct evcnt ev_evict;
+ struct evcnt ev_flush;
+ struct thread *manager;
+ int registered;
+ int dirty;
+};
+
+static struct sref_data sref_data;
+static struct sref_cache sref_cache __percpu;
+
+static void __init
+sref_queue_init(struct sref_queue *queue)
+{
+ queue->first = NULL;
+ queue->last = NULL;
+ queue->size = 0;
+}
+
+static inline unsigned long
+sref_queue_size(const struct sref_queue *queue)
+{
+ return queue->size;
+}
+
+static inline int
+sref_queue_empty(const struct sref_queue *queue)
+{
+ return (queue->size == 0);
+}
+
+static void
+sref_queue_push(struct sref_queue *queue, struct sref_counter *counter)
+{
+ counter->next = NULL;
+
+ if (queue->last == NULL)
+ queue->first = counter;
+ else
+ queue->last->next = counter;
+
+ queue->last = counter;
+ queue->size++;
+}
+
+static struct sref_counter *
+sref_queue_pop(struct sref_queue *queue)
+{
+ struct sref_counter *counter;
+
+ counter = queue->first;
+ queue->first = counter->next;
+
+ if (queue->last == counter)
+ queue->last = NULL;
+
+ queue->size--;
+ return counter;
+}
+
+static void
+sref_queue_transfer(struct sref_queue *dest, struct sref_queue *src)
+{
+ *dest = *src;
+}
+
+static void
+sref_queue_concat(struct sref_queue *queue1, struct sref_queue *queue2)
+{
+ if (sref_queue_empty(queue2))
+ return;
+
+ if (sref_queue_empty(queue1)) {
+ sref_queue_transfer(queue1, queue2);
+ return;
+ }
+
+ queue1->last->next = queue2->first;
+ queue1->last = queue2->last;
+ queue1->size += queue2->size;
+}
+
+static inline unsigned long
+sref_counter_hash(const struct sref_counter *counter)
+{
+ unsigned long va;
+
+ va = (unsigned long)counter;
+
+ assert(P2ALIGNED(va, 1UL << SREF_HASH_SHIFT));
+ return (va >> SREF_HASH_SHIFT);
+}
+
+static inline unsigned long
+sref_counter_index(const struct sref_counter *counter)
+{
+ return (sref_counter_hash(counter) & (SREF_MAX_DELTAS - 1));
+}
+
+static inline int
+sref_counter_is_queued(const struct sref_counter *counter)
+{
+ return (counter->flags & SREF_QUEUED);
+}
+
+static inline void
+sref_counter_mark_queued(struct sref_counter *counter)
+{
+ counter->flags |= SREF_QUEUED;
+}
+
+static inline void
+sref_counter_clear_queued(struct sref_counter *counter)
+{
+ counter->flags &= ~SREF_QUEUED;
+}
+
+static inline int
+sref_counter_is_dirty(const struct sref_counter *counter)
+{
+ return (counter->flags & SREF_DIRTY);
+}
+
+static inline void
+sref_counter_mark_dirty(struct sref_counter *counter)
+{
+ counter->flags |= SREF_DIRTY;
+}
+
+static inline void
+sref_counter_clear_dirty(struct sref_counter *counter)
+{
+ counter->flags &= ~SREF_DIRTY;
+}
+
+static void
+sref_counter_schedule_review(struct sref_counter *counter)
+{
+ assert(!sref_counter_is_queued(counter));
+ assert(!sref_counter_is_dirty(counter));
+
+ sref_counter_mark_queued(counter);
+
+ spinlock_lock(&sref_data.lock);
+ sref_queue_push(&sref_data.queue0, counter);
+ spinlock_unlock(&sref_data.lock);
+}
+
+static void
+sref_counter_add(struct sref_counter *counter, unsigned long delta)
+{
+ assert(delta != 0);
+
+ spinlock_lock(&counter->lock);
+
+ counter->value += delta;
+
+ if (counter->value == 0) {
+ if (sref_counter_is_queued(counter))
+ sref_counter_mark_dirty(counter);
+ else
+ sref_counter_schedule_review(counter);
+ }
+
+ spinlock_unlock(&counter->lock);
+}
+
+static void __init
+sref_delta_init(struct sref_delta *delta)
+{
+ delta->counter = NULL;
+ delta->value = 0;
+}
+
+static inline struct sref_counter *
+sref_delta_counter(struct sref_delta *delta)
+{
+ return delta->counter;
+}
+
+static inline void
+sref_delta_set_counter(struct sref_delta *delta, struct sref_counter *counter)
+{
+ assert(delta->value == 0);
+ delta->counter = counter;
+}
+
+static inline void
+sref_delta_inc(struct sref_delta *delta)
+{
+ delta->value++;
+}
+
+static inline void
+sref_delta_dec(struct sref_delta *delta)
+{
+ delta->value--;
+}
+
+static inline int
+sref_delta_is_valid(const struct sref_delta *delta)
+{
+ return (delta->counter != NULL);
+}
+
+static inline int
+sref_delta_is_zero(const struct sref_delta *delta)
+{
+ return (delta->value == 0);
+}
+
+static void
+sref_delta_flush(struct sref_delta *delta)
+{
+ assert(delta->value != 0);
+ sref_counter_add(delta->counter, delta->value);
+ delta->value = 0;
+}
+
+static inline unsigned long
+sref_review_queue_size(void)
+{
+ return (sref_queue_size(&sref_data.queue0)
+ + sref_queue_size(&sref_data.queue1));
+}
+
+static inline int
+sref_review_queue_empty(void)
+{
+ return (sref_review_queue_size() == 0);
+}
+
+static void
+sref_reset_pending_flushes(void)
+{
+ cpumap_copy(&sref_data.pending_flushes, &sref_data.registered_cpus);
+ sref_data.nr_pending_flushes = sref_data.nr_registered_cpus;
+}
+
+static void
+sref_end_epoch(struct sref_queue *queue)
+{
+ assert(cpumap_find_first(&sref_data.registered_cpus) != -1);
+ assert(sref_data.nr_registered_cpus != 0);
+ assert(cpumap_find_first(&sref_data.pending_flushes) == -1);
+ assert(sref_data.nr_pending_flushes == 0);
+
+ if (!sref_data.no_warning
+ && (sref_review_queue_size() >= SREF_NR_COUNTERS_WARN)) {
+ sref_data.no_warning = 1;
+ printk("sref: warning: large number of counters in review queue\n");
+ }
+
+ if (sref_data.nr_registered_cpus == 1) {
+ sref_queue_concat(&sref_data.queue1, &sref_data.queue0);
+ sref_queue_init(&sref_data.queue0);
+ }
+
+ sref_queue_transfer(queue, &sref_data.queue1);
+ sref_queue_transfer(&sref_data.queue1, &sref_data.queue0);
+ sref_queue_init(&sref_data.queue0);
+ evcnt_inc(&sref_data.ev_epoch);
+ sref_reset_pending_flushes();
+}
+
+static inline struct sref_delta *
+sref_cache_delta(struct sref_cache *cache, unsigned int i)
+{
+ assert(i < ARRAY_SIZE(cache->deltas));
+ return &cache->deltas[i];
+}
+
+static void __init
+sref_cache_init(struct sref_cache *cache, unsigned int cpu)
+{
+ char name[EVCNT_NAME_SIZE];
+ struct sref_delta *delta;
+ unsigned int i;
+
+ mutex_init(&cache->lock);
+
+ for (i = 0; i < ARRAY_SIZE(cache->deltas); i++) {
+ delta = sref_cache_delta(cache, i);
+ sref_delta_init(delta);
+ }
+
+ snprintf(name, sizeof(name), "sref_evict/%u", cpu);
+ evcnt_register(&cache->ev_evict, name);
+ snprintf(name, sizeof(name), "sref_flush/%u", cpu);
+ evcnt_register(&cache->ev_flush, name);
+ cache->manager = NULL;
+ cache->registered = 0;
+ cache->dirty = 0;
+}
+
+static inline struct sref_cache *
+sref_cache_get(void)
+{
+ return cpu_local_ptr(sref_cache);
+}
+
+static struct sref_cache *
+sref_cache_acquire(void)
+{
+ struct sref_cache *cache;
+
+ thread_pin();
+ cache = sref_cache_get();
+
+ mutex_lock(&cache->lock);
+ return cache;
+}
+
+static void
+sref_cache_release(struct sref_cache *cache)
+{
+ mutex_unlock(&cache->lock);
+ thread_unpin();
+}
+
+static inline int
+sref_cache_is_registered(const struct sref_cache *cache)
+{
+ return cache->registered;
+}
+
+static inline void
+sref_cache_mark_registered(struct sref_cache *cache)
+{
+ cache->registered = 1;
+}
+
+static inline void
+sref_cache_clear_registered(struct sref_cache *cache)
+{
+ cache->registered = 0;
+}
+
+static inline int
+sref_cache_is_dirty(const struct sref_cache *cache)
+{
+ return cache->dirty;
+}
+
+static inline void
+sref_cache_mark_dirty(struct sref_cache *cache)
+{
+ cache->dirty = 1;
+}
+
+static inline void
+sref_cache_clear_dirty(struct sref_cache *cache)
+{
+ cache->dirty = 0;
+}
+
+static void
+sref_cache_evict(struct sref_cache *cache, struct sref_delta *delta)
+{
+ if (sref_delta_is_zero(delta))
+ return;
+
+ sref_delta_flush(delta);
+ evcnt_inc(&cache->ev_evict);
+}
+
+static struct sref_delta *
+sref_cache_get_delta(struct sref_cache *cache, struct sref_counter *counter)
+{
+ struct sref_delta *delta;
+
+ delta = sref_cache_delta(cache, sref_counter_index(counter));
+
+ if (!sref_delta_is_valid(delta))
+ sref_delta_set_counter(delta, counter);
+ else if (sref_delta_counter(delta) != counter) {
+ sref_cache_evict(cache, delta);
+ sref_delta_set_counter(delta, counter);
+ }
+
+ return delta;
+}
+
+static void
+sref_cache_flush(struct sref_cache *cache, struct sref_queue *queue)
+{
+ struct sref_delta *delta;
+ unsigned int i, cpu;
+
+ for (i = 0; i < ARRAY_SIZE(cache->deltas); i++) {
+ delta = sref_cache_delta(cache, i);
+
+ if (!sref_delta_is_zero(delta))
+ sref_delta_flush(delta);
+ }
+
+ cpu = cpu_id();
+
+ spinlock_lock(&sref_data.lock);
+
+ assert(sref_cache_is_registered(cache));
+ assert(cpumap_test(&sref_data.registered_cpus, cpu));
+
+ if (!cpumap_test(&sref_data.pending_flushes, cpu))
+ sref_queue_init(queue);
+ else {
+ cpumap_clear(&sref_data.pending_flushes, cpu);
+ sref_data.nr_pending_flushes--;
+
+ if (sref_data.nr_pending_flushes != 0)
+ sref_queue_init(queue);
+ else
+ sref_end_epoch(queue);
+ }
+
+ spinlock_unlock(&sref_data.lock);
+
+ sref_cache_clear_dirty(cache);
+ evcnt_inc(&cache->ev_flush);
+}
+
+static void
+sref_cache_wakeup_manager(struct sref_cache *cache)
+{
+ unsigned long flags;
+
+ cpu_intr_save(&flags);
+ thread_wakeup(cache->manager);
+ cpu_intr_restore(flags);
+}
+
+/*
+ * Force the manager thread of a cache to run.
+ */
+static void
+sref_cache_manage(struct sref_cache *cache)
+{
+ sref_cache_mark_dirty(cache);
+ sref_cache_wakeup_manager(cache);
+}
+
+/*
+ * Check if a cache is dirty and wake up its manager thread if it is.
+ *
+ * Return true if the cache is dirty and requires maintenance.
+ */
+static int
+sref_cache_check(struct sref_cache *cache)
+{
+ if (!sref_cache_is_dirty(cache))
+ return 0;
+
+ sref_cache_wakeup_manager(cache);
+ return 1;
+}
+
+static void
+sref_noref(struct work *work)
+{
+ struct sref_counter *counter;
+
+ counter = structof(work, struct sref_counter, work);
+ counter->noref_fn(counter);
+}
+
+static void
+sref_review(struct sref_queue *queue)
+{
+ struct sref_counter *counter;
+ struct work_queue works;
+ unsigned long nr_dirty, nr_true;
+
+ nr_dirty = 0;
+ nr_true = 0;
+ work_queue_init(&works);
+
+ while (!sref_queue_empty(queue)) {
+ counter = sref_queue_pop(queue);
+
+ spinlock_lock(&counter->lock);
+
+ assert(sref_counter_is_queued(counter));
+ sref_counter_clear_queued(counter);
+ sref_counter_clear_dirty(counter);
+
+ if (counter->value != 0)
+ spinlock_unlock(&counter->lock);
+ else if (sref_counter_is_dirty(counter)) {
+ sref_counter_clear_queued(counter);
+ sref_counter_clear_dirty(counter);
+ sref_counter_schedule_review(counter);
+ spinlock_unlock(&counter->lock);
+ nr_dirty++;
+ } else {
+ /*
+ * Keep in mind that the work structure shares memory with
+ * the counter data. Unlocking isn't needed here, since this
+ * counter is now really at 0, but do it for consistency.
+ */
+ spinlock_unlock(&counter->lock);
+ nr_true++;
+ work_init(&counter->work, sref_noref);
+ work_queue_push(&works, &counter->work);
+ }
+ }
+
+ if (work_queue_nr_works(&works) != 0)
+ work_queue_schedule(&works, 0);
+
+ if ((nr_dirty + nr_true) != 0) {
+ spinlock_lock(&sref_data.lock);
+ evcnt_add(&sref_data.ev_dirty_zero, nr_dirty);
+ evcnt_add(&sref_data.ev_true_zero, nr_true);
+ spinlock_unlock(&sref_data.lock);
+ }
+}
+
+static void
+sref_manage(void *arg)
+{
+ struct sref_cache *cache;
+ struct sref_queue queue;
+ unsigned long flags;
+
+ cache = arg;
+
+ for (;;) {
+ thread_preempt_disable();
+ cpu_intr_save(&flags);
+
+ while (!sref_cache_is_dirty(cache))
+ thread_sleep(NULL);
+
+ cpu_intr_restore(flags);
+ thread_preempt_enable();
+
+ mutex_lock(&cache->lock);
+ sref_cache_flush(cache, &queue);
+ mutex_unlock(&cache->lock);
+
+ sref_review(&queue);
+ }
+
+ /* Never reached */
+}
+
+void __init
+sref_bootstrap(void)
+{
+ spinlock_init(&sref_data.lock);
+ sref_queue_init(&sref_data.queue0);
+ sref_queue_init(&sref_data.queue1);
+ evcnt_register(&sref_data.ev_epoch, "sref_epoch");
+ evcnt_register(&sref_data.ev_dirty_zero, "sref_dirty_zero");
+ evcnt_register(&sref_data.ev_true_zero, "sref_true_zero");
+
+ sref_cache_init(sref_cache_get(), 0);
+}
+
+static void __init
+sref_setup_manager(struct sref_cache *cache, unsigned int cpu)
+{
+ char name[THREAD_NAME_SIZE];
+ struct thread_attr attr;
+ struct thread *manager;
+ struct cpumap *cpumap;
+ int error;
+
+ error = cpumap_create(&cpumap);
+
+ if (error)
+ panic("sref: unable to create manager thread CPU map");
+
+ cpumap_zero(cpumap);
+ cpumap_set(cpumap, cpu);
+ snprintf(name, sizeof(name), "x15_sref_manage/%u", cpu);
+ thread_attr_init(&attr, name);
+ thread_attr_set_cpumap(&attr, cpumap);
+ thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO);
+ thread_attr_set_priority(&attr, THREAD_SCHED_RT_PRIO_MIN);
+ error = thread_create(&manager, &attr, sref_manage, cache);
+ cpumap_destroy(cpumap);
+
+ if (error)
+ panic("sref: unable to create manager thread");
+
+ cache->manager = manager;
+}
+
+void __init
+sref_setup(void)
+{
+ unsigned int i;
+
+ for (i = 1; i < cpu_count(); i++)
+ sref_cache_init(percpu_ptr(sref_cache, i), i);
+
+ for (i = 0; i < cpu_count(); i++)
+ sref_setup_manager(percpu_ptr(sref_cache, i), i);
+}
+
+void
+sref_register(void)
+{
+ struct sref_cache *cache;
+ unsigned int cpu;
+
+ assert(!thread_preempt_enabled());
+
+ cache = sref_cache_get();
+ assert(!sref_cache_is_registered(cache));
+ assert(!sref_cache_is_dirty(cache));
+
+ cpu = cpu_id();
+
+ spinlock_lock(&sref_data.lock);
+
+ assert(!cpumap_test(&sref_data.registered_cpus, cpu));
+ cpumap_set(&sref_data.registered_cpus, cpu);
+ sref_data.nr_registered_cpus++;
+
+ if ((sref_data.nr_registered_cpus == 1)
+ && (sref_data.nr_pending_flushes == 0))
+ sref_reset_pending_flushes();
+
+ spinlock_unlock(&sref_data.lock);
+
+ sref_cache_mark_registered(cache);
+}
+
+int
+sref_unregister(void)
+{
+ struct sref_cache *cache;
+ unsigned int cpu;
+ int dirty, error;
+
+ assert(!thread_preempt_enabled());
+
+ cache = sref_cache_get();
+ assert(sref_cache_is_registered(cache));
+
+ /*
+ * Check the dirty flag after clearing the registered flag. The
+ * periodic event handler won't set the dirty flag if the processor
+ * is unregistered. It is then safe to test if that flag is set.
+ * Everything involved is processor-local, therefore a simple compiler
+ * barrier is enough to enforce ordering.
+ */
+ sref_cache_clear_registered(cache);
+ barrier();
+ dirty = sref_cache_check(cache);
+
+ if (dirty) {
+ sref_cache_mark_registered(cache);
+ return ERROR_BUSY;
+ }
+
+ cpu = cpu_id();
+
+ spinlock_lock(&sref_data.lock);
+
+ assert(cpumap_test(&sref_data.registered_cpus, cpu));
+
+ if (!cpumap_test(&sref_data.pending_flushes, cpu)) {
+ assert(sref_data.nr_pending_flushes != 0);
+ error = 0;
+ } else if ((sref_data.nr_registered_cpus == 1)
+ && (sref_data.nr_pending_flushes == 1)
+ && sref_review_queue_empty()) {
+ cpumap_clear(&sref_data.pending_flushes, cpu);
+ sref_data.nr_pending_flushes--;
+ error = 0;
+ } else {
+ sref_cache_manage(cache);
+ error = ERROR_BUSY;
+ }
+
+ if (error)
+ sref_cache_mark_registered(cache);
+ else {
+ cpumap_clear(&sref_data.registered_cpus, cpu);
+ sref_data.nr_registered_cpus--;
+ }
+
+ spinlock_unlock(&sref_data.lock);
+
+ return error;
+}
+
+void
+sref_report_periodic_event(void)
+{
+ struct sref_cache *cache;
+
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ cache = sref_cache_get();
+
+ if (!sref_cache_is_registered(cache)
+ || (cache->manager == thread_self()))
+ return;
+
+ sref_cache_manage(cache);
+}
+
+void
+sref_counter_inc(struct sref_counter *counter)
+{
+ struct sref_cache *cache;
+ struct sref_delta *delta;
+
+ cache = sref_cache_acquire();
+ sref_cache_mark_dirty(cache);
+ delta = sref_cache_get_delta(cache, counter);
+ sref_delta_inc(delta);
+ sref_cache_release(cache);
+}
+
+void
+sref_counter_dec(struct sref_counter *counter)
+{
+ struct sref_cache *cache;
+ struct sref_delta *delta;
+
+ cache = sref_cache_acquire();
+ sref_cache_mark_dirty(cache);
+ delta = sref_cache_get_delta(cache, counter);
+ sref_delta_dec(delta);
+ sref_cache_release(cache);
+}
diff --git a/kern/sref.h b/kern/sref.h
new file mode 100644
index 00000000..c854b9d2
--- /dev/null
+++ b/kern/sref.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Scalable reference counting.
+ *
+ * The purpose of this module is to reduce the amount of inter-processor
+ * communication usually involved with reference counting. Scalable
+ * reference counters should only be used when multiprocessor scalability
+ * is important because of the costs they imply (increased memory usage
+ * and latencies).
+ *
+ * When a counter drops to 0, the no-reference function associated with it
+ * is called in work context. As a result, special care must be taken if
+ * using sref counters in the work module itself.
+ */
+
+#ifndef _KERN_SREF_H
+#define _KERN_SREF_H
+
+#include <kern/spinlock.h>
+
+struct sref_counter;
+
+/*
+ * Type for no-reference functions.
+ */
+typedef void (*sref_noref_fn_t)(struct sref_counter *);
+
+#include <kern/sref_i.h>
+
+/*
+ * Early initialization of the sref module.
+ *
+ * This function depends on the availability of percpu variables.
+ */
+void sref_bootstrap(void);
+
+/*
+ * Initialize the sref module.
+ *
+ * This function mostly takes care of setting up periodic maintenance.
+ */
+void sref_setup(void);
+
+/*
+ * Manage registration of the current processor.
+ *
+ * Registering tells the sref module that the current processor reports
+ * periodic events. When a processor enters a state in which reporting
+ * periodic events becomes irrelevant, it unregisters itself so that the
+ * other registered processors don't need to wait for it to make progress.
+ * For example, this is done inside the idle loop since it is obviously
+ * impossible to obtain or release references while idling.
+ *
+ * Unregistration can fail if internal data still require processing, in
+ * which case a maintenance thread is awaken and ERROR_BUSY is returned.
+ *
+ * Preemption must be disabled when calling these functions.
+ */
+void sref_register(void);
+int sref_unregister(void);
+
+/*
+ * Report a periodic event (normally the periodic timer interrupt) on the
+ * current processor.
+ *
+ * Interrupts and preemption must be disabled when calling this function.
+ */
+void sref_report_periodic_event(void);
+
+/*
+ * Initialize a scalable reference counter.
+ *
+ * The counter is set to 1. The no-reference function is called (from thread
+ * context) when it is certain that the true number of references is 0.
+ */
+static inline void
+sref_counter_init(struct sref_counter *counter, sref_noref_fn_t noref_fn)
+{
+ counter->noref_fn = noref_fn;
+ spinlock_init(&counter->lock);
+ counter->flags = 0;
+ counter->value = 1;
+}
+
+void sref_counter_inc(struct sref_counter *counter);
+
+void sref_counter_dec(struct sref_counter *counter);
+
+#endif /* _KERN_SREF_H */
diff --git a/kern/sref_i.h b/kern/sref_i.h
new file mode 100644
index 00000000..158ad429
--- /dev/null
+++ b/kern/sref_i.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_SREF_I_H
+#define _KERN_SREF_I_H
+
+#include <kern/spinlock.h>
+#include <kern/work.h>
+
+#define SREF_QUEUED 0x1
+#define SREF_DIRTY 0x2
+
+/*
+ * Scalable reference counter.
+ *
+ * It's tempting to merge the flags into the next member, but since they're
+ * not protected by the same lock, store them separately.
+ */
+struct sref_counter {
+ sref_noref_fn_t noref_fn;
+
+ union {
+ struct {
+ struct sref_counter *next;
+ struct spinlock lock;
+ int flags;
+ unsigned long value;
+ };
+
+ struct work work;
+ };
+};
+
+#endif /* _KERN_SREF_I_H */
diff --git a/kern/thread.c b/kern/thread.c
index fdf2fc3c..3d9e8bf9 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -97,6 +97,7 @@
#include <kern/percpu.h>
#include <kern/spinlock.h>
#include <kern/sprintf.h>
+#include <kern/sref.h>
#include <kern/stddef.h>
#include <kern/string.h>
#include <kern/task.h>
@@ -1708,6 +1709,7 @@ static void
thread_idle(void *arg)
{
struct thread *self;
+ int error;
(void)arg;
@@ -1715,6 +1717,13 @@ thread_idle(void *arg)
for (;;) {
thread_preempt_disable();
+ error = sref_unregister();
+
+ if (error) {
+ assert(error == ERROR_BUSY);
+ goto error_sref;
+ }
+
llsync_unregister();
for (;;) {
@@ -1729,6 +1738,9 @@ thread_idle(void *arg)
}
llsync_register();
+ sref_register();
+
+error_sref:
thread_preempt_enable();
}
}
@@ -1997,11 +2009,13 @@ thread_run_scheduler(void)
assert(!cpu_intr_enabled());
runq = thread_runq_local();
- llsync_register();
thread = thread_self();
assert(thread == runq->current);
assert(thread->preempt == 1);
+ llsync_register();
+ sref_register();
+
spinlock_lock(&runq->lock);
thread = thread_runq_get_next(thread_runq_local());
@@ -2055,6 +2069,7 @@ thread_tick_intr(void)
runq = thread_runq_local();
evcnt_inc(&runq->ev_tick_intr);
llsync_report_periodic_event();
+ sref_report_periodic_event();
work_report_periodic_event();
thread = thread_self();
diff --git a/test/test_sref_noref.c b/test/test_sref_noref.c
new file mode 100644
index 00000000..e58917c7
--- /dev/null
+++ b/test/test_sref_noref.c
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2014 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This test module checks that the no-reference function of a scalable
+ * reference counter is actually called when the number of references drops
+ * to 0. An initial master thread creates a bunch of slave threads, more
+ * than the number of processors to enforce migrations. These slaves wait
+ * for the master to allocate a page for a test object with a scalable
+ * reference counter. Once they receive the page, they manipulate the
+ * counter until the master thread tells them to stop. The master thread
+ * also manipulates the counter for a fixed number of iterations before
+ * stopping the slaves. The master thread then joins all slaves to make
+ * sure all of them have released their reference on the test object.
+ * Finally, it releases the initial reference, at which point, the
+ * no-reference function should be called.
+ *
+ * Notes:
+ * The number of loops must be large enough to allow many epochs to occur.
+ * Also, it's very hard to artificially produce dirty zeroes.
+ */
+
+#include <kern/assert.h>
+#include <kern/condition.h>
+#include <kern/evcnt.h>
+#include <kern/kmem.h>
+#include <kern/macros.h>
+#include <kern/mutex.h>
+#include <kern/sprintf.h>
+#include <kern/sref.h>
+#include <kern/stddef.h>
+#include <kern/thread.h>
+#include <test/test.h>
+#include <vm/vm_kmem.h>
+
+#define NR_LOOPS (100UL * 1000 * 1000)
+
+struct test_obj {
+ struct sref_counter ref_counter;
+};
+
+static struct condition test_condition;
+static struct mutex test_lock;
+static struct test_obj *test_obj;
+static volatile int test_stop;
+
+static void
+test_manipulate_counter(struct test_obj *obj)
+{
+ sref_counter_inc(&obj->ref_counter);
+ thread_yield();
+ sref_counter_dec(&obj->ref_counter);
+ thread_yield();
+}
+
+static void
+test_ref(void *arg)
+{
+ struct test_obj *obj;
+
+ (void)arg;
+
+ mutex_lock(&test_lock);
+
+ printk("waiting for page\n");
+
+ while (test_obj == NULL)
+ condition_wait(&test_condition, &test_lock);
+
+ obj = test_obj;
+
+ mutex_unlock(&test_lock);
+
+ printk("page received, manipulate reference counter\n");
+
+ while (!test_stop)
+ test_manipulate_counter(obj);
+
+ printk("thread exiting\n");
+}
+
+static void
+test_obj_noref(struct sref_counter *counter)
+{
+ struct test_obj *obj;
+
+ obj = structof(counter, struct test_obj, ref_counter);
+ vm_kmem_free((unsigned long)obj, sizeof(*obj));
+ printk("0 references, page released\n");
+ evcnt_info("sref_epoch");
+ evcnt_info("sref_dirty_zero");
+ evcnt_info("sref_true_zero");
+}
+
+static void
+test_run(void *arg)
+{
+ char name[THREAD_NAME_SIZE];
+ struct thread_attr attr;
+ struct thread **threads;
+ struct test_obj *obj;
+ volatile unsigned long loop;
+ unsigned long va;
+ unsigned int i, nr_threads;
+ int error;
+
+ (void)arg;
+
+ nr_threads = cpu_count() + 1;
+ threads = kmem_alloc(sizeof(*threads) * nr_threads);
+ assert(threads != NULL);
+
+ for (i = 0; i < nr_threads; i++) {
+ snprintf(name, sizeof(name), "x15_test_ref/%u", i);
+ thread_attr_init(&attr, name);
+ error = thread_create(&threads[i], &attr, test_ref, NULL);
+ assert(!error);
+ }
+
+ printk("allocating page\n");
+ va = vm_kmem_alloc(sizeof(*obj));
+ assert(va != 0);
+ obj = (void *)va;
+ sref_counter_init(&obj->ref_counter, test_obj_noref);
+
+ printk("page allocated, 1 reference, publishing\n");
+
+ mutex_lock(&test_lock);
+ test_obj = obj;
+ condition_broadcast(&test_condition);
+ mutex_unlock(&test_lock);
+
+ for (loop = 0; loop < NR_LOOPS; loop++)
+ test_manipulate_counter(obj);
+
+ printk("stopping test, wait for threads\n");
+ test_stop = 1;
+
+ for (i = 0; i < nr_threads; i++)
+ thread_join(threads[i]);
+
+ printk("releasing initial reference\n");
+ sref_counter_dec(&obj->ref_counter);
+
+ kmem_free(threads, sizeof(*threads) * nr_threads);
+}
+
+void
+test_setup(void)
+{
+ struct thread_attr attr;
+ struct thread *thread;
+ int error;
+
+ condition_init(&test_condition);
+ mutex_init(&test_lock);
+
+ thread_attr_init(&attr, "x15_test_run");
+ thread_attr_set_detached(&attr);
+ error = thread_create(&thread, &attr, test_run, NULL);
+ assert(!error);
+}