diff options
author | Richard Braun <rbraun@sceen.net> | 2014-09-20 02:26:06 +0200 |
---|---|---|
committer | Richard Braun <rbraun@sceen.net> | 2014-09-20 02:26:06 +0200 |
commit | 600f0097618e9085fabf6f7ecc0283a5bb9f044d (patch) | |
tree | 28cf0f93671a6d02631be2d279a24e6197e305db | |
parent | 65c9a187a9926181534939480a4034b06b10b71e (diff) |
kern/sref: new module
This module provides multiprocessor scalable reference counters, based
on Refcache, as described in the RadixVM paper.
-rw-r--r-- | Makefrag.am | 7 | ||||
-rw-r--r-- | arch/x86/machine/boot.c | 2 | ||||
-rw-r--r-- | configure.ac | 3 | ||||
-rw-r--r-- | kern/kernel.c | 2 | ||||
-rw-r--r-- | kern/sref.c | 885 | ||||
-rw-r--r-- | kern/sref.h | 104 | ||||
-rw-r--r-- | kern/sref_i.h | 48 | ||||
-rw-r--r-- | kern/thread.c | 17 | ||||
-rw-r--r-- | test/test_sref_noref.c | 175 |
9 files changed, 1242 insertions, 1 deletions
diff --git a/Makefrag.am b/Makefrag.am index 75f5828b..63d3c258 100644 --- a/Makefrag.am +++ b/Makefrag.am @@ -42,6 +42,9 @@ x15_SOURCES += \ kern/spinlock_i.h \ kern/sprintf.c \ kern/sprintf.h \ + kern/sref.c \ + kern/sref.h \ + kern/sref_i.h \ kern/stddef.h \ kern/stdint.h \ kern/string.c \ @@ -78,6 +81,10 @@ if TEST_PMAP_UPDATE_MP x15_SOURCES += test/test_pmap_update_mp.c endif TEST_PMAP_UPDATE_MP +if TEST_SREF_NOREF +x15_SOURCES += test/test_sref_noref.c +endif TEST_SREF_NOREF + if TEST_X86_PMAP_REMOVE_PTP x15_SOURCES += test/test_x86_pmap_remove_ptp.c endif TEST_X86_PMAP_REMOVE_PTP diff --git a/arch/x86/machine/boot.c b/arch/x86/machine/boot.c index 5cc90ff6..09b57140 100644 --- a/arch/x86/machine/boot.c +++ b/arch/x86/machine/boot.c @@ -51,6 +51,7 @@ #include <kern/param.h> #include <kern/percpu.h> #include <kern/printk.h> +#include <kern/sref.h> #include <kern/stddef.h> #include <kern/stdint.h> #include <kern/string.h> @@ -274,6 +275,7 @@ boot_main(void) cpu_setup(); thread_bootstrap(); pmap_bootstrap(); + sref_bootstrap(); cga_setup(); printk_setup(); boot_show_version(); diff --git a/configure.ac b/configure.ac index 91dd6eb8..5bdd541b 100644 --- a/configure.ac +++ b/configure.ac @@ -46,6 +46,7 @@ m4_define([ENABLE_TEST_MODULE], [AS_CASE(["$enable_test_module"], [llsync_defer], [test_llsync_defer=yes], [pmap_update_mp], [test_pmap_update_mp=yes], + [sref_noref], [test_sref_noref=yes], [x86_pmap_remove_ptp], [test_x86_pmap_remove_ptp=yes AC_MSG_NOTICE([forcing max_cpus to 1]) @@ -60,6 +61,8 @@ AM_CONDITIONAL([TEST_LLSYNC_DEFER], [test x"$test_llsync_defer" = xyes]) AM_CONDITIONAL([TEST_PMAP_UPDATE_MP], [test x"$test_pmap_update_mp" = xyes]) +AM_CONDITIONAL([TEST_SREF_NOREF], + [test x"$test_sref_noref" = xyes]) AM_CONDITIONAL([TEST_X86_PMAP_REMOVE_PTP], [test x"$test_x86_pmap_remove_ptp" = xyes]) diff --git a/kern/kernel.c b/kern/kernel.c index 069649de..462fa9d9 100644 --- a/kern/kernel.c +++ b/kern/kernel.c @@ -20,6 +20,7 @@ #include <kern/kernel.h> #include <kern/llsync.h> #include <kern/percpu.h> +#include <kern/sref.h> #include <kern/task.h> #include <kern/thread.h> #include <kern/work.h> @@ -41,6 +42,7 @@ kernel_main(void) thread_setup(); work_setup(); llsync_setup(); + sref_setup(); vm_page_info(); #ifdef RUN_TEST_MODULE diff --git a/kern/sref.c b/kern/sref.c new file mode 100644 index 00000000..c10dbc97 --- /dev/null +++ b/kern/sref.c @@ -0,0 +1,885 @@ +/* + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This implementation is based on the paper "RadixVM: Scalable address + * spaces for multithreaded applications" by Austin T. Clements, + * M. Frans Kaashoek, and Nickolai Zeldovich. Specifically, it implements + * the Refcache component described in the paper, with a few differences + * outlined below. + * + * For now, this module doesn't provide weak references. + * + * Refcache synchronizes access to delta caches by disabling preemption. + * That behaviour is realtime-unfriendly because of the large number of + * deltas in a cache. This module uses dedicated manager threads to + * perform cache flushes and review queue processing, and mutexes to + * synchronize cache access. + * + * In addition, Refcache normally requires all processors to regularly + * process their local data. That behaviour is dyntick-unfriendly. As a + * result, this module handles processor registration so that processors + * that aren't participating in reference counting (e.g. because they're + * idling) don't prevent others from progressing. Instead of per-processor + * review queues, there is one global review queue which can be managed + * from any processor. Review queue access should still be considerably + * infrequent in practice, keeping the impact on contention low. + * + * Locking protocol : cache-> counter -> global data + */ + +#include <kern/assert.h> +#include <kern/condition.h> +#include <kern/cpumap.h> +#include <kern/error.h> +#include <kern/evcnt.h> +#include <kern/init.h> +#include <kern/macros.h> +#include <kern/mutex.h> +#include <kern/panic.h> +#include <kern/param.h> +#include <kern/percpu.h> +#include <kern/spinlock.h> +#include <kern/sprintf.h> +#include <kern/sref.h> +#include <kern/sref_i.h> +#include <kern/stddef.h> +#include <kern/thread.h> +#include <machine/cpu.h> + +/* + * Maximum number of deltas per cache. + */ +#define SREF_MAX_DELTAS 4096 + +#ifdef __LP64__ +#define SREF_HASH_SHIFT 3 +#else /* __LP64__ */ +#define SREF_HASH_SHIFT 2 +#endif /* __LP64__ */ + +/* + * Number of counters in review queue beyond which to issue a warning. + */ +#define SREF_NR_COUNTERS_WARN 10000 + +struct sref_queue { + struct sref_counter *first; + struct sref_counter *last; + unsigned long size; +}; + +/* + * Global data. + * + * If there is a pending flush, its associated CPU must be registered. + * Notwithstanding transient states, the number of pending flushes is 0 + * if and only if no processor is registered, in which case the sref + * module, and probably the whole system, is completely idle. + * + * The review queue is implemented with two queues of counters. Each + * queue is named with a number that matches the number of epochs that + * occurred since their counters were added. + */ +struct sref_data { + struct spinlock lock; + struct cpumap registered_cpus; + unsigned int nr_registered_cpus; + struct cpumap pending_flushes; + unsigned int nr_pending_flushes; + struct sref_queue queue0; + struct sref_queue queue1; + struct evcnt ev_epoch; + struct evcnt ev_dirty_zero; + struct evcnt ev_true_zero; + int no_warning; +}; + +/* + * Temporary difference to apply on a reference counter. + * + * Deltas are stored in per-processor caches and added to their global + * counter when flushed. A delta is valid if and only if the counter it + * points to isn't NULL. If the value of a delta is not zero, the delta + * must be valid. + */ +struct sref_delta { + struct sref_counter *counter; + unsigned long value; +}; + +/* + * Per-processor cache of deltas. + * + * Delta caches are implemented with hash tables for quick ref count to + * delta lookups. For now, a very simple replacement policy, similar to + * that described in the RadixVM paper, is used. Improve with an LRU-like + * algorithm if this turns out to be a problem. + * + * Manager threads periodically flush deltas and process the review queue. + * Waking up a manager thread must be done with interrupts disabled to + * prevent a race with the periodic event that drives regular flushes + * (normally the periodic timer interrupt). + * + * Changing the registered flag is done with preemption disabled. The + * periodic event handler checks whether the current processor is + * registered, and can do so at any time. This race is harmless. + * + * The dirty flag means there may be data to process, and is used to wake + * up the manager thread. Marking a cache dirty is done either by regular + * threads increasing or decreasing a delta, or the periodic event handler. + * Clearing the dirty flag is only done by the manager thread. The cache + * lock makes sure only one thread is accessing the cache at any time, but + * it doesn't prevent the periodic handler from running. Since the handler + * as well as regular threads set that flag, a race between them is harmless. + * On the other hand, the handler won't access the flag if it is run while + * the manager thread is running. Since the manager thread is already running, + * there is no need to wake it up anyway. + */ +struct sref_cache { + struct mutex lock; + struct sref_delta deltas[SREF_MAX_DELTAS]; + struct evcnt ev_evict; + struct evcnt ev_flush; + struct thread *manager; + int registered; + int dirty; +}; + +static struct sref_data sref_data; +static struct sref_cache sref_cache __percpu; + +static void __init +sref_queue_init(struct sref_queue *queue) +{ + queue->first = NULL; + queue->last = NULL; + queue->size = 0; +} + +static inline unsigned long +sref_queue_size(const struct sref_queue *queue) +{ + return queue->size; +} + +static inline int +sref_queue_empty(const struct sref_queue *queue) +{ + return (queue->size == 0); +} + +static void +sref_queue_push(struct sref_queue *queue, struct sref_counter *counter) +{ + counter->next = NULL; + + if (queue->last == NULL) + queue->first = counter; + else + queue->last->next = counter; + + queue->last = counter; + queue->size++; +} + +static struct sref_counter * +sref_queue_pop(struct sref_queue *queue) +{ + struct sref_counter *counter; + + counter = queue->first; + queue->first = counter->next; + + if (queue->last == counter) + queue->last = NULL; + + queue->size--; + return counter; +} + +static void +sref_queue_transfer(struct sref_queue *dest, struct sref_queue *src) +{ + *dest = *src; +} + +static void +sref_queue_concat(struct sref_queue *queue1, struct sref_queue *queue2) +{ + if (sref_queue_empty(queue2)) + return; + + if (sref_queue_empty(queue1)) { + sref_queue_transfer(queue1, queue2); + return; + } + + queue1->last->next = queue2->first; + queue1->last = queue2->last; + queue1->size += queue2->size; +} + +static inline unsigned long +sref_counter_hash(const struct sref_counter *counter) +{ + unsigned long va; + + va = (unsigned long)counter; + + assert(P2ALIGNED(va, 1UL << SREF_HASH_SHIFT)); + return (va >> SREF_HASH_SHIFT); +} + +static inline unsigned long +sref_counter_index(const struct sref_counter *counter) +{ + return (sref_counter_hash(counter) & (SREF_MAX_DELTAS - 1)); +} + +static inline int +sref_counter_is_queued(const struct sref_counter *counter) +{ + return (counter->flags & SREF_QUEUED); +} + +static inline void +sref_counter_mark_queued(struct sref_counter *counter) +{ + counter->flags |= SREF_QUEUED; +} + +static inline void +sref_counter_clear_queued(struct sref_counter *counter) +{ + counter->flags &= ~SREF_QUEUED; +} + +static inline int +sref_counter_is_dirty(const struct sref_counter *counter) +{ + return (counter->flags & SREF_DIRTY); +} + +static inline void +sref_counter_mark_dirty(struct sref_counter *counter) +{ + counter->flags |= SREF_DIRTY; +} + +static inline void +sref_counter_clear_dirty(struct sref_counter *counter) +{ + counter->flags &= ~SREF_DIRTY; +} + +static void +sref_counter_schedule_review(struct sref_counter *counter) +{ + assert(!sref_counter_is_queued(counter)); + assert(!sref_counter_is_dirty(counter)); + + sref_counter_mark_queued(counter); + + spinlock_lock(&sref_data.lock); + sref_queue_push(&sref_data.queue0, counter); + spinlock_unlock(&sref_data.lock); +} + +static void +sref_counter_add(struct sref_counter *counter, unsigned long delta) +{ + assert(delta != 0); + + spinlock_lock(&counter->lock); + + counter->value += delta; + + if (counter->value == 0) { + if (sref_counter_is_queued(counter)) + sref_counter_mark_dirty(counter); + else + sref_counter_schedule_review(counter); + } + + spinlock_unlock(&counter->lock); +} + +static void __init +sref_delta_init(struct sref_delta *delta) +{ + delta->counter = NULL; + delta->value = 0; +} + +static inline struct sref_counter * +sref_delta_counter(struct sref_delta *delta) +{ + return delta->counter; +} + +static inline void +sref_delta_set_counter(struct sref_delta *delta, struct sref_counter *counter) +{ + assert(delta->value == 0); + delta->counter = counter; +} + +static inline void +sref_delta_inc(struct sref_delta *delta) +{ + delta->value++; +} + +static inline void +sref_delta_dec(struct sref_delta *delta) +{ + delta->value--; +} + +static inline int +sref_delta_is_valid(const struct sref_delta *delta) +{ + return (delta->counter != NULL); +} + +static inline int +sref_delta_is_zero(const struct sref_delta *delta) +{ + return (delta->value == 0); +} + +static void +sref_delta_flush(struct sref_delta *delta) +{ + assert(delta->value != 0); + sref_counter_add(delta->counter, delta->value); + delta->value = 0; +} + +static inline unsigned long +sref_review_queue_size(void) +{ + return (sref_queue_size(&sref_data.queue0) + + sref_queue_size(&sref_data.queue1)); +} + +static inline int +sref_review_queue_empty(void) +{ + return (sref_review_queue_size() == 0); +} + +static void +sref_reset_pending_flushes(void) +{ + cpumap_copy(&sref_data.pending_flushes, &sref_data.registered_cpus); + sref_data.nr_pending_flushes = sref_data.nr_registered_cpus; +} + +static void +sref_end_epoch(struct sref_queue *queue) +{ + assert(cpumap_find_first(&sref_data.registered_cpus) != -1); + assert(sref_data.nr_registered_cpus != 0); + assert(cpumap_find_first(&sref_data.pending_flushes) == -1); + assert(sref_data.nr_pending_flushes == 0); + + if (!sref_data.no_warning + && (sref_review_queue_size() >= SREF_NR_COUNTERS_WARN)) { + sref_data.no_warning = 1; + printk("sref: warning: large number of counters in review queue\n"); + } + + if (sref_data.nr_registered_cpus == 1) { + sref_queue_concat(&sref_data.queue1, &sref_data.queue0); + sref_queue_init(&sref_data.queue0); + } + + sref_queue_transfer(queue, &sref_data.queue1); + sref_queue_transfer(&sref_data.queue1, &sref_data.queue0); + sref_queue_init(&sref_data.queue0); + evcnt_inc(&sref_data.ev_epoch); + sref_reset_pending_flushes(); +} + +static inline struct sref_delta * +sref_cache_delta(struct sref_cache *cache, unsigned int i) +{ + assert(i < ARRAY_SIZE(cache->deltas)); + return &cache->deltas[i]; +} + +static void __init +sref_cache_init(struct sref_cache *cache, unsigned int cpu) +{ + char name[EVCNT_NAME_SIZE]; + struct sref_delta *delta; + unsigned int i; + + mutex_init(&cache->lock); + + for (i = 0; i < ARRAY_SIZE(cache->deltas); i++) { + delta = sref_cache_delta(cache, i); + sref_delta_init(delta); + } + + snprintf(name, sizeof(name), "sref_evict/%u", cpu); + evcnt_register(&cache->ev_evict, name); + snprintf(name, sizeof(name), "sref_flush/%u", cpu); + evcnt_register(&cache->ev_flush, name); + cache->manager = NULL; + cache->registered = 0; + cache->dirty = 0; +} + +static inline struct sref_cache * +sref_cache_get(void) +{ + return cpu_local_ptr(sref_cache); +} + +static struct sref_cache * +sref_cache_acquire(void) +{ + struct sref_cache *cache; + + thread_pin(); + cache = sref_cache_get(); + + mutex_lock(&cache->lock); + return cache; +} + +static void +sref_cache_release(struct sref_cache *cache) +{ + mutex_unlock(&cache->lock); + thread_unpin(); +} + +static inline int +sref_cache_is_registered(const struct sref_cache *cache) +{ + return cache->registered; +} + +static inline void +sref_cache_mark_registered(struct sref_cache *cache) +{ + cache->registered = 1; +} + +static inline void +sref_cache_clear_registered(struct sref_cache *cache) +{ + cache->registered = 0; +} + +static inline int +sref_cache_is_dirty(const struct sref_cache *cache) +{ + return cache->dirty; +} + +static inline void +sref_cache_mark_dirty(struct sref_cache *cache) +{ + cache->dirty = 1; +} + +static inline void +sref_cache_clear_dirty(struct sref_cache *cache) +{ + cache->dirty = 0; +} + +static void +sref_cache_evict(struct sref_cache *cache, struct sref_delta *delta) +{ + if (sref_delta_is_zero(delta)) + return; + + sref_delta_flush(delta); + evcnt_inc(&cache->ev_evict); +} + +static struct sref_delta * +sref_cache_get_delta(struct sref_cache *cache, struct sref_counter *counter) +{ + struct sref_delta *delta; + + delta = sref_cache_delta(cache, sref_counter_index(counter)); + + if (!sref_delta_is_valid(delta)) + sref_delta_set_counter(delta, counter); + else if (sref_delta_counter(delta) != counter) { + sref_cache_evict(cache, delta); + sref_delta_set_counter(delta, counter); + } + + return delta; +} + +static void +sref_cache_flush(struct sref_cache *cache, struct sref_queue *queue) +{ + struct sref_delta *delta; + unsigned int i, cpu; + + for (i = 0; i < ARRAY_SIZE(cache->deltas); i++) { + delta = sref_cache_delta(cache, i); + + if (!sref_delta_is_zero(delta)) + sref_delta_flush(delta); + } + + cpu = cpu_id(); + + spinlock_lock(&sref_data.lock); + + assert(sref_cache_is_registered(cache)); + assert(cpumap_test(&sref_data.registered_cpus, cpu)); + + if (!cpumap_test(&sref_data.pending_flushes, cpu)) + sref_queue_init(queue); + else { + cpumap_clear(&sref_data.pending_flushes, cpu); + sref_data.nr_pending_flushes--; + + if (sref_data.nr_pending_flushes != 0) + sref_queue_init(queue); + else + sref_end_epoch(queue); + } + + spinlock_unlock(&sref_data.lock); + + sref_cache_clear_dirty(cache); + evcnt_inc(&cache->ev_flush); +} + +static void +sref_cache_wakeup_manager(struct sref_cache *cache) +{ + unsigned long flags; + + cpu_intr_save(&flags); + thread_wakeup(cache->manager); + cpu_intr_restore(flags); +} + +/* + * Force the manager thread of a cache to run. + */ +static void +sref_cache_manage(struct sref_cache *cache) +{ + sref_cache_mark_dirty(cache); + sref_cache_wakeup_manager(cache); +} + +/* + * Check if a cache is dirty and wake up its manager thread if it is. + * + * Return true if the cache is dirty and requires maintenance. + */ +static int +sref_cache_check(struct sref_cache *cache) +{ + if (!sref_cache_is_dirty(cache)) + return 0; + + sref_cache_wakeup_manager(cache); + return 1; +} + +static void +sref_noref(struct work *work) +{ + struct sref_counter *counter; + + counter = structof(work, struct sref_counter, work); + counter->noref_fn(counter); +} + +static void +sref_review(struct sref_queue *queue) +{ + struct sref_counter *counter; + struct work_queue works; + unsigned long nr_dirty, nr_true; + + nr_dirty = 0; + nr_true = 0; + work_queue_init(&works); + + while (!sref_queue_empty(queue)) { + counter = sref_queue_pop(queue); + + spinlock_lock(&counter->lock); + + assert(sref_counter_is_queued(counter)); + sref_counter_clear_queued(counter); + sref_counter_clear_dirty(counter); + + if (counter->value != 0) + spinlock_unlock(&counter->lock); + else if (sref_counter_is_dirty(counter)) { + sref_counter_clear_queued(counter); + sref_counter_clear_dirty(counter); + sref_counter_schedule_review(counter); + spinlock_unlock(&counter->lock); + nr_dirty++; + } else { + /* + * Keep in mind that the work structure shares memory with + * the counter data. Unlocking isn't needed here, since this + * counter is now really at 0, but do it for consistency. + */ + spinlock_unlock(&counter->lock); + nr_true++; + work_init(&counter->work, sref_noref); + work_queue_push(&works, &counter->work); + } + } + + if (work_queue_nr_works(&works) != 0) + work_queue_schedule(&works, 0); + + if ((nr_dirty + nr_true) != 0) { + spinlock_lock(&sref_data.lock); + evcnt_add(&sref_data.ev_dirty_zero, nr_dirty); + evcnt_add(&sref_data.ev_true_zero, nr_true); + spinlock_unlock(&sref_data.lock); + } +} + +static void +sref_manage(void *arg) +{ + struct sref_cache *cache; + struct sref_queue queue; + unsigned long flags; + + cache = arg; + + for (;;) { + thread_preempt_disable(); + cpu_intr_save(&flags); + + while (!sref_cache_is_dirty(cache)) + thread_sleep(NULL); + + cpu_intr_restore(flags); + thread_preempt_enable(); + + mutex_lock(&cache->lock); + sref_cache_flush(cache, &queue); + mutex_unlock(&cache->lock); + + sref_review(&queue); + } + + /* Never reached */ +} + +void __init +sref_bootstrap(void) +{ + spinlock_init(&sref_data.lock); + sref_queue_init(&sref_data.queue0); + sref_queue_init(&sref_data.queue1); + evcnt_register(&sref_data.ev_epoch, "sref_epoch"); + evcnt_register(&sref_data.ev_dirty_zero, "sref_dirty_zero"); + evcnt_register(&sref_data.ev_true_zero, "sref_true_zero"); + + sref_cache_init(sref_cache_get(), 0); +} + +static void __init +sref_setup_manager(struct sref_cache *cache, unsigned int cpu) +{ + char name[THREAD_NAME_SIZE]; + struct thread_attr attr; + struct thread *manager; + struct cpumap *cpumap; + int error; + + error = cpumap_create(&cpumap); + + if (error) + panic("sref: unable to create manager thread CPU map"); + + cpumap_zero(cpumap); + cpumap_set(cpumap, cpu); + snprintf(name, sizeof(name), "x15_sref_manage/%u", cpu); + thread_attr_init(&attr, name); + thread_attr_set_cpumap(&attr, cpumap); + thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_FIFO); + thread_attr_set_priority(&attr, THREAD_SCHED_RT_PRIO_MIN); + error = thread_create(&manager, &attr, sref_manage, cache); + cpumap_destroy(cpumap); + + if (error) + panic("sref: unable to create manager thread"); + + cache->manager = manager; +} + +void __init +sref_setup(void) +{ + unsigned int i; + + for (i = 1; i < cpu_count(); i++) + sref_cache_init(percpu_ptr(sref_cache, i), i); + + for (i = 0; i < cpu_count(); i++) + sref_setup_manager(percpu_ptr(sref_cache, i), i); +} + +void +sref_register(void) +{ + struct sref_cache *cache; + unsigned int cpu; + + assert(!thread_preempt_enabled()); + + cache = sref_cache_get(); + assert(!sref_cache_is_registered(cache)); + assert(!sref_cache_is_dirty(cache)); + + cpu = cpu_id(); + + spinlock_lock(&sref_data.lock); + + assert(!cpumap_test(&sref_data.registered_cpus, cpu)); + cpumap_set(&sref_data.registered_cpus, cpu); + sref_data.nr_registered_cpus++; + + if ((sref_data.nr_registered_cpus == 1) + && (sref_data.nr_pending_flushes == 0)) + sref_reset_pending_flushes(); + + spinlock_unlock(&sref_data.lock); + + sref_cache_mark_registered(cache); +} + +int +sref_unregister(void) +{ + struct sref_cache *cache; + unsigned int cpu; + int dirty, error; + + assert(!thread_preempt_enabled()); + + cache = sref_cache_get(); + assert(sref_cache_is_registered(cache)); + + /* + * Check the dirty flag after clearing the registered flag. The + * periodic event handler won't set the dirty flag if the processor + * is unregistered. It is then safe to test if that flag is set. + * Everything involved is processor-local, therefore a simple compiler + * barrier is enough to enforce ordering. + */ + sref_cache_clear_registered(cache); + barrier(); + dirty = sref_cache_check(cache); + + if (dirty) { + sref_cache_mark_registered(cache); + return ERROR_BUSY; + } + + cpu = cpu_id(); + + spinlock_lock(&sref_data.lock); + + assert(cpumap_test(&sref_data.registered_cpus, cpu)); + + if (!cpumap_test(&sref_data.pending_flushes, cpu)) { + assert(sref_data.nr_pending_flushes != 0); + error = 0; + } else if ((sref_data.nr_registered_cpus == 1) + && (sref_data.nr_pending_flushes == 1) + && sref_review_queue_empty()) { + cpumap_clear(&sref_data.pending_flushes, cpu); + sref_data.nr_pending_flushes--; + error = 0; + } else { + sref_cache_manage(cache); + error = ERROR_BUSY; + } + + if (error) + sref_cache_mark_registered(cache); + else { + cpumap_clear(&sref_data.registered_cpus, cpu); + sref_data.nr_registered_cpus--; + } + + spinlock_unlock(&sref_data.lock); + + return error; +} + +void +sref_report_periodic_event(void) +{ + struct sref_cache *cache; + + assert(!cpu_intr_enabled()); + assert(!thread_preempt_enabled()); + + cache = sref_cache_get(); + + if (!sref_cache_is_registered(cache) + || (cache->manager == thread_self())) + return; + + sref_cache_manage(cache); +} + +void +sref_counter_inc(struct sref_counter *counter) +{ + struct sref_cache *cache; + struct sref_delta *delta; + + cache = sref_cache_acquire(); + sref_cache_mark_dirty(cache); + delta = sref_cache_get_delta(cache, counter); + sref_delta_inc(delta); + sref_cache_release(cache); +} + +void +sref_counter_dec(struct sref_counter *counter) +{ + struct sref_cache *cache; + struct sref_delta *delta; + + cache = sref_cache_acquire(); + sref_cache_mark_dirty(cache); + delta = sref_cache_get_delta(cache, counter); + sref_delta_dec(delta); + sref_cache_release(cache); +} diff --git a/kern/sref.h b/kern/sref.h new file mode 100644 index 00000000..c854b9d2 --- /dev/null +++ b/kern/sref.h @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Scalable reference counting. + * + * The purpose of this module is to reduce the amount of inter-processor + * communication usually involved with reference counting. Scalable + * reference counters should only be used when multiprocessor scalability + * is important because of the costs they imply (increased memory usage + * and latencies). + * + * When a counter drops to 0, the no-reference function associated with it + * is called in work context. As a result, special care must be taken if + * using sref counters in the work module itself. + */ + +#ifndef _KERN_SREF_H +#define _KERN_SREF_H + +#include <kern/spinlock.h> + +struct sref_counter; + +/* + * Type for no-reference functions. + */ +typedef void (*sref_noref_fn_t)(struct sref_counter *); + +#include <kern/sref_i.h> + +/* + * Early initialization of the sref module. + * + * This function depends on the availability of percpu variables. + */ +void sref_bootstrap(void); + +/* + * Initialize the sref module. + * + * This function mostly takes care of setting up periodic maintenance. + */ +void sref_setup(void); + +/* + * Manage registration of the current processor. + * + * Registering tells the sref module that the current processor reports + * periodic events. When a processor enters a state in which reporting + * periodic events becomes irrelevant, it unregisters itself so that the + * other registered processors don't need to wait for it to make progress. + * For example, this is done inside the idle loop since it is obviously + * impossible to obtain or release references while idling. + * + * Unregistration can fail if internal data still require processing, in + * which case a maintenance thread is awaken and ERROR_BUSY is returned. + * + * Preemption must be disabled when calling these functions. + */ +void sref_register(void); +int sref_unregister(void); + +/* + * Report a periodic event (normally the periodic timer interrupt) on the + * current processor. + * + * Interrupts and preemption must be disabled when calling this function. + */ +void sref_report_periodic_event(void); + +/* + * Initialize a scalable reference counter. + * + * The counter is set to 1. The no-reference function is called (from thread + * context) when it is certain that the true number of references is 0. + */ +static inline void +sref_counter_init(struct sref_counter *counter, sref_noref_fn_t noref_fn) +{ + counter->noref_fn = noref_fn; + spinlock_init(&counter->lock); + counter->flags = 0; + counter->value = 1; +} + +void sref_counter_inc(struct sref_counter *counter); + +void sref_counter_dec(struct sref_counter *counter); + +#endif /* _KERN_SREF_H */ diff --git a/kern/sref_i.h b/kern/sref_i.h new file mode 100644 index 00000000..158ad429 --- /dev/null +++ b/kern/sref_i.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef _KERN_SREF_I_H +#define _KERN_SREF_I_H + +#include <kern/spinlock.h> +#include <kern/work.h> + +#define SREF_QUEUED 0x1 +#define SREF_DIRTY 0x2 + +/* + * Scalable reference counter. + * + * It's tempting to merge the flags into the next member, but since they're + * not protected by the same lock, store them separately. + */ +struct sref_counter { + sref_noref_fn_t noref_fn; + + union { + struct { + struct sref_counter *next; + struct spinlock lock; + int flags; + unsigned long value; + }; + + struct work work; + }; +}; + +#endif /* _KERN_SREF_I_H */ diff --git a/kern/thread.c b/kern/thread.c index fdf2fc3c..3d9e8bf9 100644 --- a/kern/thread.c +++ b/kern/thread.c @@ -97,6 +97,7 @@ #include <kern/percpu.h> #include <kern/spinlock.h> #include <kern/sprintf.h> +#include <kern/sref.h> #include <kern/stddef.h> #include <kern/string.h> #include <kern/task.h> @@ -1708,6 +1709,7 @@ static void thread_idle(void *arg) { struct thread *self; + int error; (void)arg; @@ -1715,6 +1717,13 @@ thread_idle(void *arg) for (;;) { thread_preempt_disable(); + error = sref_unregister(); + + if (error) { + assert(error == ERROR_BUSY); + goto error_sref; + } + llsync_unregister(); for (;;) { @@ -1729,6 +1738,9 @@ thread_idle(void *arg) } llsync_register(); + sref_register(); + +error_sref: thread_preempt_enable(); } } @@ -1997,11 +2009,13 @@ thread_run_scheduler(void) assert(!cpu_intr_enabled()); runq = thread_runq_local(); - llsync_register(); thread = thread_self(); assert(thread == runq->current); assert(thread->preempt == 1); + llsync_register(); + sref_register(); + spinlock_lock(&runq->lock); thread = thread_runq_get_next(thread_runq_local()); @@ -2055,6 +2069,7 @@ thread_tick_intr(void) runq = thread_runq_local(); evcnt_inc(&runq->ev_tick_intr); llsync_report_periodic_event(); + sref_report_periodic_event(); work_report_periodic_event(); thread = thread_self(); diff --git a/test/test_sref_noref.c b/test/test_sref_noref.c new file mode 100644 index 00000000..e58917c7 --- /dev/null +++ b/test/test_sref_noref.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2014 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This test module checks that the no-reference function of a scalable + * reference counter is actually called when the number of references drops + * to 0. An initial master thread creates a bunch of slave threads, more + * than the number of processors to enforce migrations. These slaves wait + * for the master to allocate a page for a test object with a scalable + * reference counter. Once they receive the page, they manipulate the + * counter until the master thread tells them to stop. The master thread + * also manipulates the counter for a fixed number of iterations before + * stopping the slaves. The master thread then joins all slaves to make + * sure all of them have released their reference on the test object. + * Finally, it releases the initial reference, at which point, the + * no-reference function should be called. + * + * Notes: + * The number of loops must be large enough to allow many epochs to occur. + * Also, it's very hard to artificially produce dirty zeroes. + */ + +#include <kern/assert.h> +#include <kern/condition.h> +#include <kern/evcnt.h> +#include <kern/kmem.h> +#include <kern/macros.h> +#include <kern/mutex.h> +#include <kern/sprintf.h> +#include <kern/sref.h> +#include <kern/stddef.h> +#include <kern/thread.h> +#include <test/test.h> +#include <vm/vm_kmem.h> + +#define NR_LOOPS (100UL * 1000 * 1000) + +struct test_obj { + struct sref_counter ref_counter; +}; + +static struct condition test_condition; +static struct mutex test_lock; +static struct test_obj *test_obj; +static volatile int test_stop; + +static void +test_manipulate_counter(struct test_obj *obj) +{ + sref_counter_inc(&obj->ref_counter); + thread_yield(); + sref_counter_dec(&obj->ref_counter); + thread_yield(); +} + +static void +test_ref(void *arg) +{ + struct test_obj *obj; + + (void)arg; + + mutex_lock(&test_lock); + + printk("waiting for page\n"); + + while (test_obj == NULL) + condition_wait(&test_condition, &test_lock); + + obj = test_obj; + + mutex_unlock(&test_lock); + + printk("page received, manipulate reference counter\n"); + + while (!test_stop) + test_manipulate_counter(obj); + + printk("thread exiting\n"); +} + +static void +test_obj_noref(struct sref_counter *counter) +{ + struct test_obj *obj; + + obj = structof(counter, struct test_obj, ref_counter); + vm_kmem_free((unsigned long)obj, sizeof(*obj)); + printk("0 references, page released\n"); + evcnt_info("sref_epoch"); + evcnt_info("sref_dirty_zero"); + evcnt_info("sref_true_zero"); +} + +static void +test_run(void *arg) +{ + char name[THREAD_NAME_SIZE]; + struct thread_attr attr; + struct thread **threads; + struct test_obj *obj; + volatile unsigned long loop; + unsigned long va; + unsigned int i, nr_threads; + int error; + + (void)arg; + + nr_threads = cpu_count() + 1; + threads = kmem_alloc(sizeof(*threads) * nr_threads); + assert(threads != NULL); + + for (i = 0; i < nr_threads; i++) { + snprintf(name, sizeof(name), "x15_test_ref/%u", i); + thread_attr_init(&attr, name); + error = thread_create(&threads[i], &attr, test_ref, NULL); + assert(!error); + } + + printk("allocating page\n"); + va = vm_kmem_alloc(sizeof(*obj)); + assert(va != 0); + obj = (void *)va; + sref_counter_init(&obj->ref_counter, test_obj_noref); + + printk("page allocated, 1 reference, publishing\n"); + + mutex_lock(&test_lock); + test_obj = obj; + condition_broadcast(&test_condition); + mutex_unlock(&test_lock); + + for (loop = 0; loop < NR_LOOPS; loop++) + test_manipulate_counter(obj); + + printk("stopping test, wait for threads\n"); + test_stop = 1; + + for (i = 0; i < nr_threads; i++) + thread_join(threads[i]); + + printk("releasing initial reference\n"); + sref_counter_dec(&obj->ref_counter); + + kmem_free(threads, sizeof(*threads) * nr_threads); +} + +void +test_setup(void) +{ + struct thread_attr attr; + struct thread *thread; + int error; + + condition_init(&test_condition); + mutex_init(&test_lock); + + thread_attr_init(&attr, "x15_test_run"); + thread_attr_set_detached(&attr); + error = thread_create(&thread, &attr, test_run, NULL); + assert(!error); +} |