kern/llsync: new module

This module provides lockless synchronization so that reads can safely occur during updates, without holding a lock. It is based on passive serialization as described in US patent 4809168, and achieves a goal similar to Linux RCU (Read-Copy Update).
author: Richard Braun <rbraun@sceen.net> 2013-05-15 02:12:57 +0200
committer: Richard Braun <rbraun@sceen.net> 2013-05-15 02:12:57 +0200
commit: 05eaaafa89b813e1bd5e34b4d47ec99cdc3a5911 (patch)
tree: 84fc50645b2a5ba4f17dfaa2a92059c350956b04
parent: b6555e76c36170e60e09125c0bcd7b997f68e4a9 (diff)
6 files changed, 509 insertions, 0 deletions
diff --git a/Makefrag.am b/Makefrag.am
index cb3ef7cf..1c305ca3 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -16,6 +16,9 @@ x15_SOURCES +=              \
 	kern/kmem_i.h       \
 	kern/limits.h       \
 	kern/list.h         \
+	kern/llsync.c       \
+	kern/llsync.h       \
+	kern/llsync_i.h     \
 	kern/macros.h       \
 	kern/mutex.c        \
 	kern/mutex.h        \
diff --git a/kern/kernel.c b/kern/kernel.c
index 93c0c19c..1acd5308 100644
--- a/kern/kernel.c
+++ b/kern/kernel.c
@@ -17,6 +17,7 @@
 
 #include <kern/init.h>
 #include <kern/kernel.h>
+#include <kern/llsync.h>
 #include <kern/panic.h>
 #include <kern/task.h>
 #include <kern/thread.h>
@@ -33,6 +34,7 @@ kernel_main(void)
     /* Initialize the kernel */
     task_setup();
     thread_setup();
+    llsync_setup();
 
     /* Rendezvous with APs */
     cpu_mp_sync();
diff --git a/kern/llsync.c b/kern/llsync.c
new file mode 100644
index 00000000..c5364520
--- /dev/null
+++ b/kern/llsync.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2013 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * The method used by this module is described in the expired US patent 
+ * 4809168, "Passive Serialization in a Multitasking Environment". It is
+ * similar to "Classic RCU (Read-Copy Update)" as found in Linux 2.6, with
+ * the notable difference that RCU actively starts grace periods, where
+ * passive serialization waits for two sequential "multiprocess checkpoints"
+ * (renamed global checkpoints in this implementation) to occur.
+ *
+ * It is used instead of RCU because of patents that may not allow writing
+ * an implementation not based on the Linux code (see
+ * http://lists.lttng.org/pipermail/lttng-dev/2013-May/020305.html). As
+ * patents expire, this module could be reworked to become a true RCU
+ * implementation. In the mean time, the module interface was carefully
+ * designed to be compatible with RCU.
+ *
+ * TODO Implement and use generic worker threads.
+ *
+ * TODO Gracefully handle large amounts of deferred works.
+ */
+
+#include <kern/bitmap.h>
+#include <kern/condition.h>
+#include <kern/list.h>
+#include <kern/llsync.h>
+#include <kern/llsync_i.h>
+#include <kern/macros.h>
+#include <kern/mutex.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/printk.h>
+#include <kern/spinlock.h>
+#include <kern/stddef.h>
+#include <kern/thread.h>
+#include <machine/cpu.h>
+
+#define LLSYNC_NR_WORKS_WARN 10000
+
+struct llsync_cpu_checkpoint llsync_cpu_checkpoints[MAX_CPUS];
+
+/*
+ * Global lock protecting the remaining module data.
+ *
+ * Interrupts must be disabled when acquiring this lock.
+ */
+static struct spinlock llsync_lock;
+
+/*
+ * Map of processors regularly checking in.
+ */
+static BITMAP_DECLARE(llsync_registered_cpus, MAX_CPUS);
+static unsigned int llsync_nr_registered_cpus;
+
+/*
+ * Map of processors for which a checkpoint commit is pending.
+ *
+ * To reduce contention, checking in only affects a single per-processor
+ * cache line. Special events (currently the system timer interrupt only)
+ * trigger checkpoint commits, which report the local state to this bitmap,
+ * thereby acquiring the global lock.
+ */
+static BITMAP_DECLARE(llsync_pending_checkpoints, MAX_CPUS);
+static unsigned int llsync_nr_pending_checkpoints;
+
+/*
+ * List of deferred works.
+ *
+ * The list number matches the number of global checkpoints that occurred
+ * since works contained in it were added, except list2 for which it's two
+ * or more.
+ */
+static struct list llsync_list0;
+static struct list llsync_list1;
+static struct list llsync_list2;
+
+/*
+ * Total number of deferred works.
+ *
+ * Mostly unused, except to monitor work processing.
+ */
+static unsigned long llsync_nr_works;
+
+/*
+ * Thread processing deferred works.
+ */
+static struct thread *llsync_worker;
+
+struct llsync_waiter {
+    struct llsync_work work;
+    struct mutex lock;
+    struct condition cond;
+    int done;
+};
+
+static void
+llsync_work(void *arg)
+{
+    struct llsync_work *work ;
+    struct list tmp;
+    unsigned long flags, nr_works;
+
+    (void)arg;
+
+    spinlock_lock_intr_save(&llsync_lock, &flags);
+
+    for (;;) {
+        while (list_empty(&llsync_list2))
+            thread_sleep(&llsync_lock);
+
+        list_set_head(&tmp, &llsync_list2);
+        list_init(&llsync_list2);
+
+        spinlock_unlock_intr_restore(&llsync_lock, flags);
+
+        nr_works = 0;
+
+        do {
+            work = list_first_entry(&tmp, struct llsync_work, node);
+            list_remove(&work->node);
+            nr_works++;
+            work->fn(work);
+        } while (!list_empty(&tmp));
+
+        spinlock_lock_intr_save(&llsync_lock, &flags);
+
+        llsync_nr_works -= nr_works;
+    }
+}
+
+void
+llsync_setup(void)
+{
+    struct thread_attr attr;
+    int error;
+
+    spinlock_init(&llsync_lock);
+    list_init(&llsync_list0);
+    list_init(&llsync_list1);
+    list_init(&llsync_list2);
+
+    attr.task = NULL;
+    attr.name = "x15_llsync_work";
+    attr.policy = THREAD_SCHED_POLICY_TS;
+    attr.priority = THREAD_SCHED_TS_PRIO_DEFAULT;
+    error = thread_create(&llsync_worker, &attr, llsync_work, NULL);
+
+    if (error)
+        panic("llsync: unable to create worker thread");
+}
+
+static void
+llsync_wakeup_worker(void)
+{
+    if (thread_self() != llsync_worker)
+        thread_wakeup(llsync_worker);
+}
+
+static void
+llsync_process_global_checkpoint(unsigned int cpu)
+{
+    int i, nr_cpus;
+
+    nr_cpus = cpu_count();
+    bitmap_copy(llsync_pending_checkpoints, llsync_registered_cpus, nr_cpus);
+    llsync_nr_pending_checkpoints = llsync_nr_registered_cpus;
+    list_concat(&llsync_list2, &llsync_list1);
+    list_set_head(&llsync_list1, &llsync_list0);
+    list_init(&llsync_list0);
+
+    llsync_reset_checkpoint(cpu);
+
+    bitmap_for_each(llsync_registered_cpus, nr_cpus, i)
+        if ((unsigned int)i != cpu)
+            cpu_send_llsync_reset(i);
+
+    if (!list_empty(&llsync_list2))
+        llsync_wakeup_worker();
+}
+
+void
+llsync_register_cpu(unsigned int cpu)
+{
+    unsigned long flags;
+
+    spinlock_lock_intr_save(&llsync_lock, &flags);
+
+    assert(!bitmap_test(llsync_registered_cpus, cpu));
+    bitmap_set(llsync_registered_cpus, cpu);
+    llsync_nr_registered_cpus++;
+
+    if (llsync_nr_registered_cpus == 1)
+        llsync_process_global_checkpoint(cpu);
+
+    spinlock_unlock_intr_restore(&llsync_lock, flags);
+}
+
+static void
+llsync_commit_checkpoint_common(unsigned int cpu)
+{
+    int pending;
+
+    pending = bitmap_test(llsync_pending_checkpoints, cpu);
+
+    if (!pending)
+        return;
+
+    bitmap_clear(llsync_pending_checkpoints, cpu);
+    llsync_nr_pending_checkpoints--;
+
+    if (llsync_nr_pending_checkpoints == 0)
+        llsync_process_global_checkpoint(cpu);
+}
+
+void
+llsync_unregister_cpu(unsigned int cpu)
+{
+    unsigned long flags;
+
+    llsync_reset_checkpoint(cpu);
+
+    spinlock_lock_intr_save(&llsync_lock, &flags);
+
+    assert(bitmap_test(llsync_registered_cpus, cpu));
+    bitmap_clear(llsync_registered_cpus, cpu);
+    llsync_nr_registered_cpus--;
+
+    if (llsync_nr_registered_cpus != 0)
+        llsync_commit_checkpoint_common(cpu);
+    else {
+        list_concat(&llsync_list1, &llsync_list0);
+        list_init(&llsync_list0);
+        llsync_process_global_checkpoint(cpu);
+    }
+
+    spinlock_unlock_intr_restore(&llsync_lock, flags);
+}
+
+void
+llsync_commit_checkpoint(unsigned int cpu)
+{
+    unsigned long flags;
+
+    if (!llsync_cpu_checkpoints[cpu].checked)
+        return;
+
+    spinlock_lock_intr_save(&llsync_lock, &flags);
+
+    if (!bitmap_test(llsync_registered_cpus, cpu))
+        return;
+
+    llsync_commit_checkpoint_common(cpu);
+
+    spinlock_unlock_intr_restore(&llsync_lock, flags);
+}
+
+void
+llsync_defer(struct llsync_work *work, llsync_fn_t fn)
+{
+    unsigned long flags;
+
+    work->fn = fn;
+
+    spinlock_lock_intr_save(&llsync_lock, &flags);
+
+    list_insert_tail(&llsync_list0, &work->node);
+    llsync_nr_works++;
+
+    if (llsync_nr_works == LLSYNC_NR_WORKS_WARN)
+        printk("llsync: warning: large number of deferred works\n");
+
+    spinlock_unlock_intr_restore(&llsync_lock, flags);
+}
+
+static void
+llsync_signal(struct llsync_work *work)
+{
+    struct llsync_waiter *waiter;
+
+    waiter = structof(work, struct llsync_waiter, work);
+
+    mutex_lock(&waiter->lock);
+    waiter->done = 1;
+    condition_signal(&waiter->cond);
+    mutex_unlock(&waiter->lock);
+}
+
+void
+llsync_wait(void)
+{
+    struct llsync_waiter waiter;
+
+    mutex_init(&waiter.lock);
+    condition_init(&waiter.cond);
+    waiter.done = 0;
+
+    llsync_defer(&waiter.work, llsync_signal);
+
+    mutex_lock(&waiter.lock);
+
+    while (!waiter.done)
+        condition_wait(&waiter.cond, &waiter.lock);
+
+    mutex_unlock(&waiter.lock);
+}
diff --git a/kern/llsync.h b/kern/llsync.h
new file mode 100644
index 00000000..32987a03
--- /dev/null
+++ b/kern/llsync.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (c) 2013 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Lockless synchronization.
+ */
+
+#ifndef _KERN_LLSYNC_H
+#define _KERN_LLSYNC_H
+
+#include <kern/list.h>
+#include <kern/macros.h>
+#include <kern/llsync_i.h>
+#include <kern/thread.h>
+#include <machine/mb.h>
+
+struct llsync_work;
+
+/*
+ * Type for work functions.
+ *
+ * Works are guaranteed to process in thread context and can block, but
+ * must not sleep for long durations.
+ */
+typedef void (*llsync_fn_t)(struct llsync_work *);
+
+/*
+ * Deferred work.
+ *
+ * This structure should be embedded in objects protected with lockless
+ * synchronization. It stores the work function and is passed to it as its
+ * only parameter. The function can then find the containing object with
+ * structof and release it.
+ */
+struct llsync_work {
+    struct list node;
+    llsync_fn_t fn;
+};
+
+/*
+ * Safely assign a pointer.
+ *
+ * This macro enforces memory ordering. It should be used to reference
+ * objects once they're completely built, so that readers accessing the
+ * pointer obtain consistent data.
+ */
+#define llsync_assign_ptr(ptr, value)   \
+MACRO_BEGIN                             \
+    mb_store();                         \
+    (ptr) = (value);                    \
+MACRO_END
+
+/*
+ * Safely access a pointer.
+ *
+ * No memory barrier, rely on data dependency to enforce ordering.
+ */
+#define llsync_read_ptr(ptr) (ptr)
+
+static inline void
+llsync_read_lock(void)
+{
+    thread_preempt_disable();
+}
+
+static inline void
+llsync_read_unlock(void)
+{
+    thread_preempt_enable();
+}
+
+/*
+ * Reset the checkpoint flag of a processor.
+ *
+ * Called from interrupt context.
+ */
+static inline void
+llsync_reset_checkpoint(unsigned int cpu)
+{
+    llsync_cpu_checkpoints[cpu].checked = 0;
+}
+
+/*
+ * Report that a processor has reached a checkpoint.
+ *
+ * Called during context switch.
+ */
+static inline void
+llsync_checkin(unsigned int cpu)
+{
+    llsync_cpu_checkpoints[cpu].checked = 1;
+}
+
+/*
+ * Initialize the llsync module.
+ */
+void llsync_setup(void);
+
+/*
+ * Report that a processor will be regularly checking in.
+ */
+void llsync_register_cpu(unsigned int cpu);
+
+/*
+ * Report that a processor has entered a state in which checking in becomes
+ * irrelevant (e.g. the idle loop).
+ */
+void llsync_unregister_cpu(unsigned int cpu);
+
+/*
+ * Commit a pending checkpoint.
+ *
+ * Checking in is a light processor-local operation. Committing a checkpoint
+ * is a heavier global one, and is performed less often, normally during the
+ * system timer interrupt.
+ */
+void llsync_commit_checkpoint(unsigned int cpu);
+
+/*
+ * Defer an operation until all existing read-side references are dropped,
+ * without blocking.
+ */
+void llsync_defer(struct llsync_work *work, llsync_fn_t fn);
+
+/*
+ * Wait for all existing read-side references to be dropped.
+ *
+ * This function sleeps, and may do so for a moderately long duration (a few
+ * system timer ticks).
+ */
+void llsync_wait(void);
+
+#endif /* _KERN_LLSYNC_H */
diff --git a/kern/llsync_i.h b/kern/llsync_i.h
new file mode 100644
index 00000000..89a7fe83
--- /dev/null
+++ b/kern/llsync_i.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2013 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_LLSYNC_I_H
+#define _KERN_LLSYNC_I_H
+
+#include <kern/param.h>
+
+/*
+ * Per-processor flag indicating if a processor checked in.
+ */
+struct llsync_cpu_checkpoint {
+    int checked;
+} __aligned(CPU_L1_SIZE);
+
+extern struct llsync_cpu_checkpoint llsync_cpu_checkpoints[MAX_CPUS];
+
+#endif /* _KERN_LLSYNC_I_H */
diff --git a/kern/thread.c b/kern/thread.c
index fa61976f..a42fd11a 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -88,6 +88,7 @@
 #include <kern/init.h>
 #include <kern/kmem.h>
 #include <kern/list.h>
+#include <kern/llsync.h>
 #include <kern/macros.h>
 #include <kern/mutex.h>
 #include <kern/panic.h>
@@ -473,6 +474,8 @@ thread_runq_schedule(struct thread_runq *runq, struct thread *prev)
     assert(!cpu_intr_enabled());
     spinlock_assert_locked(&runq->lock);
 
+    llsync_checkin(thread_runq_id(runq));
+
     thread_clear_flag(prev, THREAD_RESCHEDULE);
     thread_runq_put_prev(runq, prev);
 
@@ -1630,6 +1633,7 @@ thread_idle(void *arg)
 
     for (;;) {
         thread_preempt_disable();
+        llsync_unregister_cpu(cpu);
 
         for (;;) {
             cpu_intr_disable();
@@ -1642,6 +1646,7 @@ thread_idle(void *arg)
             cpu_idle();
         }
 
+        llsync_register_cpu(cpu);
         thread_preempt_enable();
     }
 }
@@ -1847,6 +1852,7 @@ thread_run(void)
     assert(cpu_intr_enabled());
 
     runq = thread_runq_local();
+    llsync_register_cpu(thread_runq_id(runq));
     thread = thread_self();
     assert(thread == runq->current);
     assert(thread->preempt == 1);
@@ -1894,6 +1900,7 @@ thread_tick(void)
     assert(!thread_preempt_enabled());
 
     runq = thread_runq_local();
+    llsync_commit_checkpoint(thread_runq_id(runq));
     thread = thread_self();
 
     spinlock_lock(&runq->lock);
author	Richard Braun <rbraun@sceen.net>	2013-05-15 02:12:57 +0200
committer	Richard Braun <rbraun@sceen.net>	2013-05-15 02:12:57 +0200
commit	05eaaafa89b813e1bd5e34b4d47ec99cdc3a5911 (patch)
tree	84fc50645b2a5ba4f17dfaa2a92059c350956b04
parent	b6555e76c36170e60e09125c0bcd7b997f68e4a9 (diff)