kern/llsync: rework lockless synchronization

Use a global checkpoint identifier as a generation counter and remove reset interrupts. For some reason I can't remember, using reset interrupts was thought to be more efficient, perhaps because accessing a global variable on each checkpoint looked expensive. But it's really not scalable, and a read-mostly global variable can get cached locally and not incur expensive access. In addition, add a decent amount of documentation about the semantics with regard to the rest of the system. Explicitely state that checkpoints are triggered by context switches and that it's not allowed to block inside read-side critical sections. Make periodic events attempt to trigger checkpoints too. Add a thread-local read-side critical section nesting counter so that it can be reliably determined whether the processor is running a read-side critical section or not.
author: Richard Braun <rbraun@sceen.net> 2014-06-10 21:14:51 +0200
committer: Richard Braun <rbraun@sceen.net> 2014-06-10 21:14:51 +0200
commit: 73a935a3e8f12447d455bcf4a1a01c51907a53a0 (patch)
tree: 7fbde09a59bcb4f2193a905e91ef9b805dd49746 /kern/llsync_i.h
parent: f0e77fb79581c9227f758ad014a3c2778ae9d2f5 (diff)
1 files changed, 83 insertions, 6 deletions
diff --git a/kern/llsync_i.h b/kern/llsync_i.h
index 5dae472e..f14c9c8d 100644
--- a/kern/llsync_i.h
+++ b/kern/llsync_i.h
@@ -18,21 +18,98 @@
 #ifndef _KERN_LLSYNC_I_H
 #define _KERN_LLSYNC_I_H
 
+#include <kern/assert.h>
+#include <kern/cpumap.h>
 #include <kern/evcnt.h>
+#include <kern/macros.h>
 #include <kern/param.h>
+#include <kern/spinlock.h>
+#include <kern/work.h>
+#include <machine/cpu.h>
+
+/*
+ * Global data.
+ *
+ * The queue number matches the number of global checkpoints that occurred
+ * since works contained in it were added. After two global checkpoints,
+ * works are scheduled for processing.
+ *
+ * Interrupts must be disabled when acquiring the global data lock.
+ */
+struct llsync_data {
+    struct spinlock lock;
+    struct cpumap registered_cpus;
+    unsigned int nr_registered_cpus;
+    struct cpumap pending_checkpoints;
+    unsigned int nr_pending_checkpoints;
+    struct work_queue queue0;
+    struct work_queue queue1;
+    unsigned long nr_pending_works;
+    struct evcnt ev_global_checkpoint;
+    struct evcnt ev_periodic_checkin;
+    struct evcnt ev_failed_periodic_checkin;
+
+    /*
+     * Global checkpoint ID.
+     *
+     * This variable can be frequently accessed from many processors so :
+     *  - reserve a whole cache line for it
+     *  - apply optimistic accesses to reduce contention
+     */
+    struct {
+        volatile unsigned int value __aligned(CPU_L1_SIZE);
+    } gcid;
+};
+
+extern struct llsync_data llsync_data;
 
 /*
  * Per-processor data.
  *
- * Interrupts must be disabled on access.
+ * Every processor records whether it is registered and a local copy of the
+ * global checkpoint ID, which is meaningless on unregistered processors.
+ * The true global checkpoint ID is incremented when a global checkpoint occurs,
+ * after which all the local copies become stale. Checking in synchronizes
+ * the local copy of the global checkpoint ID.
+ *
+ * Interrupts and preemption must be disabled on access.
  */
-struct llsync_cpu {
+struct llsync_cpu_data {
     int registered;
-    int checked;
-    struct evcnt ev_reset;
-    struct evcnt ev_spurious_reset;
+    unsigned int gcid;
 } __aligned(CPU_L1_SIZE);
 
-extern struct llsync_cpu llsync_cpus[MAX_CPUS];
+extern struct llsync_cpu_data llsync_cpu_data[MAX_CPUS];
+
+static inline struct llsync_cpu_data *
+llsync_get_cpu_data(unsigned int cpu)
+{
+    return &llsync_cpu_data[cpu];
+}
+
+static inline void
+llsync_checkin(void)
+{
+    struct llsync_cpu_data *cpu_data;
+    unsigned int cpu, gcid;
+
+    assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
+
+    cpu = cpu_id();
+    cpu_data = llsync_get_cpu_data(cpu);
+
+    if (!cpu_data->registered)
+        return;
+
+    /*
+     * The global checkpoint ID obtained might be obsolete here, in which
+     * case a commit will not determine that a checkpoint actually occurred.
+     * This should seldom happen.
+     */
+    gcid = llsync_data.gcid.value;
+    assert((gcid - cpu_data->gcid) <= 1);
+    cpu_data->gcid = gcid;
+}
 
 #endif /* _KERN_LLSYNC_I_H */
author	Richard Braun <rbraun@sceen.net>	2014-06-10 21:14:51 +0200
committer	Richard Braun <rbraun@sceen.net>	2014-06-10 21:14:51 +0200
commit	73a935a3e8f12447d455bcf4a1a01c51907a53a0 (patch)
tree	7fbde09a59bcb4f2193a905e91ef9b805dd49746 /kern/llsync_i.h
parent	f0e77fb79581c9227f758ad014a3c2778ae9d2f5 (diff)