1 files changed, 319 insertions, 0 deletions
diff --git a/kern/llsync.c b/kern/llsync.c
new file mode 100644
index 00000000..c5364520
--- /dev/null
+++ b/kern/llsync.c
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2013 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * The method used by this module is described in the expired US patent 
+ * 4809168, "Passive Serialization in a Multitasking Environment". It is
+ * similar to "Classic RCU (Read-Copy Update)" as found in Linux 2.6, with
+ * the notable difference that RCU actively starts grace periods, where
+ * passive serialization waits for two sequential "multiprocess checkpoints"
+ * (renamed global checkpoints in this implementation) to occur.
+ *
+ * It is used instead of RCU because of patents that may not allow writing
+ * an implementation not based on the Linux code (see
+ * http://lists.lttng.org/pipermail/lttng-dev/2013-May/020305.html). As
+ * patents expire, this module could be reworked to become a true RCU
+ * implementation. In the mean time, the module interface was carefully
+ * designed to be compatible with RCU.
+ *
+ * TODO Implement and use generic worker threads.
+ *
+ * TODO Gracefully handle large amounts of deferred works.
+ */
+
+#include <kern/bitmap.h>
+#include <kern/condition.h>
+#include <kern/list.h>
+#include <kern/llsync.h>
+#include <kern/llsync_i.h>
+#include <kern/macros.h>
+#include <kern/mutex.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/printk.h>
+#include <kern/spinlock.h>
+#include <kern/stddef.h>
+#include <kern/thread.h>
+#include <machine/cpu.h>
+
+#define LLSYNC_NR_WORKS_WARN 10000
+
+struct llsync_cpu_checkpoint llsync_cpu_checkpoints[MAX_CPUS];
+
+/*
+ * Global lock protecting the remaining module data.
+ *
+ * Interrupts must be disabled when acquiring this lock.
+ */
+static struct spinlock llsync_lock;
+
+/*
+ * Map of processors regularly checking in.
+ */
+static BITMAP_DECLARE(llsync_registered_cpus, MAX_CPUS);
+static unsigned int llsync_nr_registered_cpus;
+
+/*
+ * Map of processors for which a checkpoint commit is pending.
+ *
+ * To reduce contention, checking in only affects a single per-processor
+ * cache line. Special events (currently the system timer interrupt only)
+ * trigger checkpoint commits, which report the local state to this bitmap,
+ * thereby acquiring the global lock.
+ */
+static BITMAP_DECLARE(llsync_pending_checkpoints, MAX_CPUS);
+static unsigned int llsync_nr_pending_checkpoints;
+
+/*
+ * List of deferred works.
+ *
+ * The list number matches the number of global checkpoints that occurred
+ * since works contained in it were added, except list2 for which it's two
+ * or more.
+ */
+static struct list llsync_list0;
+static struct list llsync_list1;
+static struct list llsync_list2;
+
+/*
+ * Total number of deferred works.
+ *
+ * Mostly unused, except to monitor work processing.
+ */
+static unsigned long llsync_nr_works;
+
+/*
+ * Thread processing deferred works.
+ */
+static struct thread *llsync_worker;
+
+struct llsync_waiter {
+    struct llsync_work work;
+    struct mutex lock;
+    struct condition cond;
+    int done;
+};
+
+static void
+llsync_work(void *arg)
+{
+    struct llsync_work *work ;
+    struct list tmp;
+    unsigned long flags, nr_works;
+
+    (void)arg;
+
+    spinlock_lock_intr_save(&llsync_lock, &flags);
+
+    for (;;) {
+        while (list_empty(&llsync_list2))
+            thread_sleep(&llsync_lock);
+
+        list_set_head(&tmp, &llsync_list2);
+        list_init(&llsync_list2);
+
+        spinlock_unlock_intr_restore(&llsync_lock, flags);
+
+        nr_works = 0;
+
+        do {
+            work = list_first_entry(&tmp, struct llsync_work, node);
+            list_remove(&work->node);
+            nr_works++;
+            work->fn(work);
+        } while (!list_empty(&tmp));
+
+        spinlock_lock_intr_save(&llsync_lock, &flags);
+
+        llsync_nr_works -= nr_works;
+    }
+}
+
+void
+llsync_setup(void)
+{
+    struct thread_attr attr;
+    int error;
+
+    spinlock_init(&llsync_lock);
+    list_init(&llsync_list0);
+    list_init(&llsync_list1);
+    list_init(&llsync_list2);
+
+    attr.task = NULL;
+    attr.name = "x15_llsync_work";
+    attr.policy = THREAD_SCHED_POLICY_TS;
+    attr.priority = THREAD_SCHED_TS_PRIO_DEFAULT;
+    error = thread_create(&llsync_worker, &attr, llsync_work, NULL);
+
+    if (error)
+        panic("llsync: unable to create worker thread");
+}
+
+static void
+llsync_wakeup_worker(void)
+{
+    if (thread_self() != llsync_worker)
+        thread_wakeup(llsync_worker);
+}
+
+static void
+llsync_process_global_checkpoint(unsigned int cpu)
+{
+    int i, nr_cpus;
+
+    nr_cpus = cpu_count();
+    bitmap_copy(llsync_pending_checkpoints, llsync_registered_cpus, nr_cpus);
+    llsync_nr_pending_checkpoints = llsync_nr_registered_cpus;
+    list_concat(&llsync_list2, &llsync_list1);
+    list_set_head(&llsync_list1, &llsync_list0);
+    list_init(&llsync_list0);
+
+    llsync_reset_checkpoint(cpu);
+
+    bitmap_for_each(llsync_registered_cpus, nr_cpus, i)
+        if ((unsigned int)i != cpu)
+            cpu_send_llsync_reset(i);
+
+    if (!list_empty(&llsync_list2))
+        llsync_wakeup_worker();
+}
+
+void
+llsync_register_cpu(unsigned int cpu)
+{
+    unsigned long flags;
+
+    spinlock_lock_intr_save(&llsync_lock, &flags);
+
+    assert(!bitmap_test(llsync_registered_cpus, cpu));
+    bitmap_set(llsync_registered_cpus, cpu);
+    llsync_nr_registered_cpus++;
+
+    if (llsync_nr_registered_cpus == 1)
+        llsync_process_global_checkpoint(cpu);
+
+    spinlock_unlock_intr_restore(&llsync_lock, flags);
+}
+
+static void
+llsync_commit_checkpoint_common(unsigned int cpu)
+{
+    int pending;
+
+    pending = bitmap_test(llsync_pending_checkpoints, cpu);
+
+    if (!pending)
+        return;
+
+    bitmap_clear(llsync_pending_checkpoints, cpu);
+    llsync_nr_pending_checkpoints--;
+
+    if (llsync_nr_pending_checkpoints == 0)
+        llsync_process_global_checkpoint(cpu);
+}
+
+void
+llsync_unregister_cpu(unsigned int cpu)
+{
+    unsigned long flags;
+
+    llsync_reset_checkpoint(cpu);
+
+    spinlock_lock_intr_save(&llsync_lock, &flags);
+
+    assert(bitmap_test(llsync_registered_cpus, cpu));
+    bitmap_clear(llsync_registered_cpus, cpu);
+    llsync_nr_registered_cpus--;
+
+    if (llsync_nr_registered_cpus != 0)
+        llsync_commit_checkpoint_common(cpu);
+    else {
+        list_concat(&llsync_list1, &llsync_list0);
+        list_init(&llsync_list0);
+        llsync_process_global_checkpoint(cpu);
+    }
+
+    spinlock_unlock_intr_restore(&llsync_lock, flags);
+}
+
+void
+llsync_commit_checkpoint(unsigned int cpu)
+{
+    unsigned long flags;
+
+    if (!llsync_cpu_checkpoints[cpu].checked)
+        return;
+
+    spinlock_lock_intr_save(&llsync_lock, &flags);
+
+    if (!bitmap_test(llsync_registered_cpus, cpu))
+        return;
+
+    llsync_commit_checkpoint_common(cpu);
+
+    spinlock_unlock_intr_restore(&llsync_lock, flags);
+}
+
+void
+llsync_defer(struct llsync_work *work, llsync_fn_t fn)
+{
+    unsigned long flags;
+
+    work->fn = fn;
+
+    spinlock_lock_intr_save(&llsync_lock, &flags);
+
+    list_insert_tail(&llsync_list0, &work->node);
+    llsync_nr_works++;
+
+    if (llsync_nr_works == LLSYNC_NR_WORKS_WARN)
+        printk("llsync: warning: large number of deferred works\n");
+
+    spinlock_unlock_intr_restore(&llsync_lock, flags);
+}
+
+static void
+llsync_signal(struct llsync_work *work)
+{
+    struct llsync_waiter *waiter;
+
+    waiter = structof(work, struct llsync_waiter, work);
+
+    mutex_lock(&waiter->lock);
+    waiter->done = 1;
+    condition_signal(&waiter->cond);
+    mutex_unlock(&waiter->lock);
+}
+
+void
+llsync_wait(void)
+{
+    struct llsync_waiter waiter;
+
+    mutex_init(&waiter.lock);
+    condition_init(&waiter.cond);
+    waiter.done = 0;
+
+    llsync_defer(&waiter.work, llsync_signal);
+
+    mutex_lock(&waiter.lock);
+
+    while (!waiter.done)
+        condition_wait(&waiter.cond, &waiter.lock);
+
+    mutex_unlock(&waiter.lock);
+}