kern/llsync: rework lockless synchronization

Use a global checkpoint identifier as a generation counter and remove reset interrupts. For some reason I can't remember, using reset interrupts was thought to be more efficient, perhaps because accessing a global variable on each checkpoint looked expensive. But it's really not scalable, and a read-mostly global variable can get cached locally and not incur expensive access. In addition, add a decent amount of documentation about the semantics with regard to the rest of the system. Explicitely state that checkpoints are triggered by context switches and that it's not allowed to block inside read-side critical sections. Make periodic events attempt to trigger checkpoints too. Add a thread-local read-side critical section nesting counter so that it can be reliably determined whether the processor is running a read-side critical section or not.
author: Richard Braun <rbraun@sceen.net> 2014-06-10 21:14:51 +0200
committer: Richard Braun <rbraun@sceen.net> 2014-06-10 21:14:51 +0200
commit: 73a935a3e8f12447d455bcf4a1a01c51907a53a0 (patch)
tree: 7fbde09a59bcb4f2193a905e91ef9b805dd49746
parent: f0e77fb79581c9227f758ad014a3c2778ae9d2f5 (diff)
10 files changed, 324 insertions, 213 deletions
diff --git a/arch/x86/machine/cpu.c b/arch/x86/machine/cpu.c
index 8d680d8..18686a3 100644
--- a/arch/x86/machine/cpu.c
+++ b/arch/x86/machine/cpu.c
@@ -17,7 +17,6 @@
 
 #include <kern/assert.h>
 #include <kern/init.h>
-#include <kern/llsync.h>
 #include <kern/macros.h>
 #include <kern/panic.h>
 #include <kern/param.h>
@@ -608,13 +607,3 @@ cpu_thread_schedule_intr(struct trap_frame *frame)
 
     thread_schedule_intr();
 }
-
-void
-cpu_llsync_reset_intr(struct trap_frame *frame)
-{
-    (void)frame;
-
-    lapic_eoi();
-
-    llsync_reset_checkpoint(cpu_id());
-}
diff --git a/arch/x86/machine/cpu.h b/arch/x86/machine/cpu.h
index f1e9cfb..07dd98d 100644
--- a/arch/x86/machine/cpu.h
+++ b/arch/x86/machine/cpu.h
@@ -658,20 +658,6 @@ cpu_send_thread_schedule(unsigned int cpu)
  */
 void cpu_thread_schedule_intr(struct trap_frame *frame);
 
-/*
- * Request a remote processor to reset its checkpoint.
- */
-static inline void
-cpu_send_llsync_reset(unsigned int cpu)
-{
-    lapic_ipi_send(cpu_from_id(cpu)->apic_id, TRAP_LLSYNC_RESET);
-}
-
-/*
- * Interrupt handler for checkpoint reset requests.
- */
-void cpu_llsync_reset_intr(struct trap_frame *frame);
-
 #endif /* __ASSEMBLER__ */
 
 #endif /* _X86_CPU_H */
diff --git a/arch/x86/machine/trap.c b/arch/x86/machine/trap.c
index ad586ea..8e350f0 100644
--- a/arch/x86/machine/trap.c
+++ b/arch/x86/machine/trap.c
@@ -75,7 +75,6 @@ void trap_isr_machine_check(void);
 void trap_isr_simd_fp_exception(void);
 void trap_isr_pic_int7(void);
 void trap_isr_pic_int15(void);
-void trap_isr_llsync_reset(void);
 void trap_isr_thread_schedule(void);
 void trap_isr_cpu_halt(void);
 void trap_isr_lapic_timer(void);
@@ -202,8 +201,6 @@ trap_setup(void)
                  trap_isr_pic_int15, pic_intr_spurious);
 
     /* System defined traps */
-    trap_install(TRAP_LLSYNC_RESET, TRAP_HF_NOPREEMPT,
-                 trap_isr_llsync_reset, cpu_llsync_reset_intr);
     trap_install(TRAP_THREAD_SCHEDULE, TRAP_HF_NOPREEMPT,
                  trap_isr_thread_schedule, cpu_thread_schedule_intr);
     trap_install(TRAP_CPU_HALT, TRAP_HF_NOPREEMPT,
diff --git a/arch/x86/machine/trap.h b/arch/x86/machine/trap.h
index bc0426d..99e2389 100644
--- a/arch/x86/machine/trap.h
+++ b/arch/x86/machine/trap.h
@@ -53,7 +53,6 @@
  *
  * The local APIC assigns one priority every 16 vectors.
  */
-#define TRAP_LLSYNC_RESET       238
 #define TRAP_THREAD_SCHEDULE    239
 #define TRAP_CPU_HALT           240
 #define TRAP_LAPIC_TIMER        253
diff --git a/arch/x86/machine/trap_asm.S b/arch/x86/machine/trap_asm.S
index 7d06d9d..bb70f46 100644
--- a/arch/x86/machine/trap_asm.S
+++ b/arch/x86/machine/trap_asm.S
@@ -157,7 +157,6 @@ TRAP(TRAP_PIC_BASE + 7, trap_isr_pic_int7)
 TRAP(TRAP_PIC_BASE + 15, trap_isr_pic_int15)
 
 /* System defined traps */
-TRAP(TRAP_LLSYNC_RESET, trap_isr_llsync_reset)
 TRAP(TRAP_THREAD_SCHEDULE, trap_isr_thread_schedule)
 TRAP(TRAP_CPU_HALT, trap_isr_cpu_halt)
 TRAP(TRAP_LAPIC_TIMER, trap_isr_lapic_timer)
diff --git a/kern/llsync.c b/kern/llsync.c
index ba99a9b..7c3e1c6 100644
--- a/kern/llsync.c
+++ b/kern/llsync.c
@@ -50,50 +50,20 @@
 #include <kern/work.h>
 #include <machine/cpu.h>
 
-#define LLSYNC_NR_PENDING_WORKS_WARN 10000
-
-struct llsync_cpu llsync_cpus[MAX_CPUS];
-
-/*
- * Global lock protecting the remaining module data.
- *
- * Interrupts must be disabled when acquiring this lock.
- */
-static struct spinlock llsync_lock;
-
-/*
- * Map of processors regularly checking in.
- */
-static struct cpumap llsync_registered_cpus;
-static unsigned int llsync_nr_registered_cpus;
-
 /*
- * Map of processors for which a checkpoint commit is pending.
+ * Initial global checkpoint ID.
  *
- * To reduce contention, checking in only affects a single per-processor
- * cache line. Special events (currently the system timer interrupt only)
- * trigger checkpoint commits, which report the local state to this CPU
- * map, thereby acquiring the global lock.
+ * Set to a high value to make sure overflows are correctly handled.
  */
-static struct cpumap llsync_pending_checkpoints;
-static unsigned int llsync_nr_pending_checkpoints;
+#define LLSYNC_INITIAL_GCID ((unsigned int)-10)
 
 /*
- * Queues of deferred works.
- *
- * The queue number matches the number of global checkpoints that occurred
- * since works contained in it were added. After two global checkpoints,
- * works are scheduled for processing.
+ * Number of pending works beyond which to issue a warning.
  */
-static struct work_queue llsync_queue0;
-static struct work_queue llsync_queue1;
+#define LLSYNC_NR_PENDING_WORKS_WARN 10000
 
-/*
- * Number of works not yet scheduled for processing.
- *
- * Mostly unused, except for debugging.
- */
-static unsigned long llsync_nr_pending_works;
+struct llsync_data llsync_data;
+struct llsync_cpu_data llsync_cpu_data[MAX_CPUS];
 
 struct llsync_waiter {
     struct work work;
@@ -105,161 +75,165 @@ struct llsync_waiter {
 void __init
 llsync_setup(void)
 {
-    char name[EVCNT_NAME_SIZE];
-    unsigned int cpu;
-
-    spinlock_init(&llsync_lock);
-    work_queue_init(&llsync_queue0);
-    work_queue_init(&llsync_queue1);
-
-    for (cpu = 0; cpu < cpu_count(); cpu++) {
-        snprintf(name, sizeof(name), "llsync_reset/%u", cpu);
-        evcnt_register(&llsync_cpus[cpu].ev_reset, name);
-        snprintf(name, sizeof(name), "llsync_spurious_reset/%u", cpu);
-        evcnt_register(&llsync_cpus[cpu].ev_spurious_reset, name);
-    }
-}
-
-static void
-llsync_reset_checkpoint_common(unsigned int cpu)
-{
-    assert(!cpumap_test(&llsync_pending_checkpoints, cpu));
-    cpumap_set(&llsync_pending_checkpoints, cpu);
-    llsync_cpus[cpu].checked = 0;
+    spinlock_init(&llsync_data.lock);
+    work_queue_init(&llsync_data.queue0);
+    work_queue_init(&llsync_data.queue1);
+    evcnt_register(&llsync_data.ev_global_checkpoint,
+                   "llsync_global_checkpoint");
+    evcnt_register(&llsync_data.ev_periodic_checkin,
+                   "llsync_periodic_checkin");
+    evcnt_register(&llsync_data.ev_failed_periodic_checkin,
+                   "llsync_failed_periodic_checkin");
+    llsync_data.gcid.value = LLSYNC_INITIAL_GCID;
 }
 
 static void
-llsync_process_global_checkpoint(unsigned int cpu)
+llsync_process_global_checkpoint(void)
 {
     struct work_queue queue;
     unsigned int nr_works;
-    int i;
 
-    if (llsync_nr_registered_cpus == 0) {
-        work_queue_concat(&llsync_queue1, &llsync_queue0);
-        work_queue_init(&llsync_queue0);
-    }
+    assert(cpumap_find_first(&llsync_data.pending_checkpoints) == -1);
+    assert(llsync_data.nr_pending_checkpoints == 0);
 
-    work_queue_transfer(&queue, &llsync_queue1);
-    work_queue_transfer(&llsync_queue1, &llsync_queue0);
-    work_queue_init(&llsync_queue0);
-
-    llsync_nr_pending_checkpoints = llsync_nr_registered_cpus;
-
-    if (llsync_cpus[cpu].registered)
-        llsync_reset_checkpoint_common(cpu);
-
-    cpumap_for_each(&llsync_registered_cpus, i)
-        if ((unsigned int)i != cpu)
-            cpu_send_llsync_reset(i);
+    if (llsync_data.nr_registered_cpus == 0) {
+        work_queue_concat(&llsync_data.queue1, &llsync_data.queue0);
+        work_queue_init(&llsync_data.queue0);
+    } else {
+        cpumap_copy(&llsync_data.pending_checkpoints, &llsync_data.registered_cpus);
+        llsync_data.nr_pending_checkpoints = llsync_data.nr_registered_cpus;
+    }
 
+    work_queue_transfer(&queue, &llsync_data.queue1);
+    work_queue_transfer(&llsync_data.queue1, &llsync_data.queue0);
+    work_queue_init(&llsync_data.queue0);
     nr_works = work_queue_nr_works(&queue);
 
     if (nr_works != 0) {
-        llsync_nr_pending_works -= nr_works;
+        llsync_data.nr_pending_works -= nr_works;
         work_queue_schedule(&queue, 0);
     }
+
+    llsync_data.gcid.value++;
+    evcnt_inc(&llsync_data.ev_global_checkpoint);
 }
 
 static void
-llsync_commit_checkpoint_common(unsigned int cpu)
+llsync_commit_checkpoint(unsigned int cpu)
 {
     int pending;
 
-    pending = cpumap_test(&llsync_pending_checkpoints, cpu);
+    pending = cpumap_test(&llsync_data.pending_checkpoints, cpu);
 
     if (!pending)
         return;
 
-    cpumap_clear(&llsync_pending_checkpoints, cpu);
-    llsync_nr_pending_checkpoints--;
+    cpumap_clear(&llsync_data.pending_checkpoints, cpu);
+    llsync_data.nr_pending_checkpoints--;
 
-    if (llsync_nr_pending_checkpoints == 0)
-        llsync_process_global_checkpoint(cpu);
+    if (llsync_data.nr_pending_checkpoints == 0)
+        llsync_process_global_checkpoint();
 }
 
 void
-llsync_register_cpu(unsigned int cpu)
+llsync_register(void)
 {
+    struct llsync_cpu_data *cpu_data;
     unsigned long flags;
+    unsigned int cpu;
+
+    cpu = cpu_id();
+    cpu_data = llsync_get_cpu_data(cpu);
 
-    spinlock_lock_intr_save(&llsync_lock, &flags);
+    spinlock_lock_intr_save(&llsync_data.lock, &flags);
 
-    assert(!llsync_cpus[cpu].registered);
-    llsync_cpus[cpu].registered = 1;
+    assert(!cpu_data->registered);
+    cpu_data->registered = 1;
+    cpu_data->gcid = llsync_data.gcid.value;
 
-    assert(!cpumap_test(&llsync_registered_cpus, cpu));
-    cpumap_set(&llsync_registered_cpus, cpu);
-    llsync_nr_registered_cpus++;
+    assert(!cpumap_test(&llsync_data.registered_cpus, cpu));
+    cpumap_set(&llsync_data.registered_cpus, cpu);
+    llsync_data.nr_registered_cpus++;
 
-    assert(!cpumap_test(&llsync_pending_checkpoints, cpu));
+    assert(!cpumap_test(&llsync_data.pending_checkpoints, cpu));
 
-    if ((llsync_nr_registered_cpus == 1)
-        && (llsync_nr_pending_checkpoints == 0))
-        llsync_process_global_checkpoint(cpu);
+    if ((llsync_data.nr_registered_cpus == 1)
+        && (llsync_data.nr_pending_checkpoints == 0))
+        llsync_process_global_checkpoint();
 
-    spinlock_unlock_intr_restore(&llsync_lock, flags);
+    spinlock_unlock_intr_restore(&llsync_data.lock, flags);
 }
 
 void
-llsync_unregister_cpu(unsigned int cpu)
+llsync_unregister(void)
 {
+    struct llsync_cpu_data *cpu_data;
     unsigned long flags;
+    unsigned int cpu;
 
-    spinlock_lock_intr_save(&llsync_lock, &flags);
+    cpu = cpu_id();
+    cpu_data = llsync_get_cpu_data(cpu);
 
-    assert(llsync_cpus[cpu].registered);
-    llsync_cpus[cpu].registered = 0;
+    spinlock_lock_intr_save(&llsync_data.lock, &flags);
 
-    assert(cpumap_test(&llsync_registered_cpus, cpu));
-    cpumap_clear(&llsync_registered_cpus, cpu);
-    llsync_nr_registered_cpus--;
+    assert(cpu_data->registered);
+    cpu_data->registered = 0;
+
+    assert(cpumap_test(&llsync_data.registered_cpus, cpu));
+    cpumap_clear(&llsync_data.registered_cpus, cpu);
+    llsync_data.nr_registered_cpus--;
 
     /*
      * Processor registration qualifies as a checkpoint. Since unregistering
      * a processor also disables commits until it's registered again, perform
      * one now.
      */
-    llsync_commit_checkpoint_common(cpu);
+    llsync_commit_checkpoint(cpu);
 
-    spinlock_unlock_intr_restore(&llsync_lock, flags);
+    spinlock_unlock_intr_restore(&llsync_data.lock, flags);
 }
 
 void
-llsync_reset_checkpoint(unsigned int cpu)
+llsync_report_periodic_event(void)
 {
+    struct llsync_cpu_data *cpu_data;
+    unsigned int cpu, gcid;
+
     assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
 
-    spinlock_lock(&llsync_lock);
+    cpu = cpu_id();
+    cpu_data = llsync_get_cpu_data(cpu);
 
-    evcnt_inc(&llsync_cpus[cpu].ev_reset);
-    llsync_reset_checkpoint_common(cpu);
+    if (!cpu_data->registered)
+        return;
+
+    spinlock_lock(&llsync_data.lock);
+
+    gcid = llsync_data.gcid.value;
+    assert((gcid - cpu_data->gcid) <= 1);
 
     /*
-     * It may happen that this processor was registered at the time a global
-     * checkpoint occurred, but unregistered itself before receiving the reset
-     * interrupt. In this case, behave as if the reset request was received
-     * before unregistering by immediately committing the local checkpoint.
+     * If the local copy of the global checkpoint ID matches the true
+     * value, the current processor has checked in.
+     *
+     * Otherwise, there were no checkpoint since the last global checkpoint.
+     * Check whether this periodic event occurred during a read-side critical
+     * section, and if not, trigger a checkpoint.
      */
-    if (!llsync_cpus[cpu].registered) {
-        evcnt_inc(&llsync_cpus[cpu].ev_spurious_reset);
-        llsync_commit_checkpoint_common(cpu);
+    if (cpu_data->gcid == gcid)
+        llsync_commit_checkpoint(cpu);
+    else {
+        if (thread_llsync_in_read_cs())
+            evcnt_inc(&llsync_data.ev_failed_periodic_checkin);
+        else {
+            cpu_data->gcid = gcid;
+            evcnt_inc(&llsync_data.ev_periodic_checkin);
+            llsync_commit_checkpoint(cpu);
+        }
     }
 
-    spinlock_unlock(&llsync_lock);
-}
-
-void
-llsync_commit_checkpoint(unsigned int cpu)
-{
-    assert(!cpu_intr_enabled());
-
-    if (!(llsync_cpus[cpu].registered && llsync_cpus[cpu].checked))
-        return;
-
-    spinlock_lock(&llsync_lock);
-    llsync_commit_checkpoint_common(cpu);
-    spinlock_unlock(&llsync_lock);
+    spinlock_unlock(&llsync_data.lock);
 }
 
 void
@@ -267,15 +241,15 @@ llsync_defer(struct work *work)
 {
     unsigned long flags;
 
-    spinlock_lock_intr_save(&llsync_lock, &flags);
+    spinlock_lock_intr_save(&llsync_data.lock, &flags);
 
-    work_queue_push(&llsync_queue0, work);
-    llsync_nr_pending_works++;
+    work_queue_push(&llsync_data.queue0, work);
+    llsync_data.nr_pending_works++;
 
-    if (llsync_nr_pending_works == LLSYNC_NR_PENDING_WORKS_WARN)
+    if (llsync_data.nr_pending_works == LLSYNC_NR_PENDING_WORKS_WARN)
         printk("llsync: warning: large number of pending works\n");
 
-    spinlock_unlock_intr_restore(&llsync_lock, flags);
+    spinlock_unlock_intr_restore(&llsync_data.lock, flags);
 }
 
 static void
diff --git a/kern/llsync.h b/kern/llsync.h
index 1919b62..0d7438b 100644
--- a/kern/llsync.h
+++ b/kern/llsync.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013 Richard Braun.
+ * Copyright (c) 2013-2014 Richard Braun.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -16,6 +16,55 @@
  *
  *
  * Lockless synchronization.
+ *
+ * The llsync module provides services similar to RCU (Read-Copy Update).
+ * As such, it can be thought of as an efficient reader-writer lock
+ * replacement. It is efficient because read-side critical sections
+ * don't use expensive synchronization mechanisms such as locks or atomic
+ * instructions. Lockless synchronization is therefore best used for
+ * read-mostly objects. Updating still requires conventional lock-based
+ * synchronization.
+ *
+ * The basic idea is that read-side critical sections are assumed to hold
+ * read-side references, and objects for which there may be read-side
+ * references must exist as long as such references may be held. The llsync
+ * module tracks special system events to determine when read-side references
+ * can no longer exist.
+ *
+ * Since read-side critical sections can run concurrently with updates,
+ * it is important to make sure that objects are consistent when being
+ * accessed. This is achieved with a publish/subscribe mechanism that relies
+ * on the natural atomicity of machine word updates in memory, i.e. all
+ * supported architectures must guarantee that, when updating a word, and
+ * in turn a pointer, other processors reading that word obtain a valid
+ * value, that is either the previous or the next value of the word, but not
+ * a mixed-up value. The llsync module provides the llsync_assign_ptr() and
+ * llsync_read_ptr() wrappers that take care of low level details such as
+ * compiler and memory barriers, so that objects are completely built and
+ * consistent when published and accessed.
+ *
+ * As objects are published through pointers, multiple versions can exist at
+ * the same time. Previous versions cannot be deleted as long as read-side
+ * references may exist. Operations that must wait for all read-side references
+ * to be dropped can be either synchronous, i.e. block until it is safe to
+ * proceed, or be deferred, in which case they are queued and later handed to
+ * the work module. As a result, special care must be taken if using lockless
+ * synchronization in the work module itself.
+ *
+ * The two system events tracked by the llsync module are context switches
+ * and a periodic event, normally the periodic timer interrupt that drives
+ * the scheduler. Context switches are used as checkpoint triggers. A
+ * checkpoint is a point in execution at which no read-side reference can
+ * exist, i.e. the processor isn't running any read-side critical section.
+ * Since context switches can be very frequent, a checkpoint is local to
+ * the processor and lightweight. The periodic event is used to commit
+ * checkpoints globally so that other processors are aware of the progress
+ * of one another. As the system allows situations in which two periodic
+ * events can occur without a single context switch, the periodic event is
+ * also used as a checkpoint trigger. When all checkpoints have been
+ * committed, a global checkpoint occurs. The occurrence of global checkpoints
+ * allows the llsync module to determine when it is safe to process deferred
+ * work or unblock update sides.
  */
 
 #ifndef _KERN_LLSYNC_H
@@ -30,10 +79,6 @@
 
 /*
  * Safely assign a pointer.
- *
- * This macro enforces memory ordering. It should be used to reference
- * objects once they're completely built, so that readers accessing the
- * pointer obtain consistent data.
  */
 #define llsync_assign_ptr(ptr, value)   \
 MACRO_BEGIN                             \
@@ -48,27 +93,31 @@ MACRO_END
  */
 #define llsync_read_ptr(ptr) (ptr)
 
+/*
+ * Read-side critical section enter/exit functions.
+ *
+ * It is not allowed to block inside a read-side critical section.
+ */
+
 static inline void
 llsync_read_enter(void)
 {
-    thread_preempt_disable();
+    int in_read_cs;
+
+    in_read_cs = thread_llsync_in_read_cs();
+    thread_llsync_read_inc();
+
+    if (!in_read_cs)
+        thread_preempt_disable();
 }
 
 static inline void
 llsync_read_exit(void)
 {
-    thread_preempt_enable();
-}
+    thread_llsync_read_dec();
 
-/*
- * Report that a processor has reached a checkpoint.
- *
- * Called during context switch.
- */
-static inline void
-llsync_checkin(unsigned int cpu)
-{
-    llsync_cpus[cpu].checked = 1;
+    if (!thread_llsync_in_read_cs())
+        thread_preempt_enable();
 }
 
 /*
@@ -77,34 +126,39 @@ llsync_checkin(unsigned int cpu)
 void llsync_setup(void);
 
 /*
- * Report that a processor will be regularly checking in.
+ * Manage registration of the current processor.
  *
- * Registered processors perform checkpoint commits and receive checkpoint
- * reset interrupts.
- */
-void llsync_register_cpu(unsigned int cpu);
-
-/*
- * Report that a processor has entered a state in which checking in becomes
- * irrelevant (e.g. the idle loop).
+ * The caller must not be allowed to migrate when calling these functions.
+ *
+ * Registering tells the llsync module that the current processor reports
+ * context switches and periodic events.
+ *
+ * When a processor enters a state in which checking in becomes irrelevant,
+ * it unregisters itself so that the other registered processors don't need
+ * to wait for it to make progress. For example, this is done inside the
+ * idle loop since it is obviously impossible to enter a read-side critical
+ * section while idling.
  */
-void llsync_unregister_cpu(unsigned int cpu);
+void llsync_register(void);
+void llsync_unregister(void);
 
 /*
- * Commit a pending checkpoint.
+ * Report a context switch on the current processor.
  *
- * Checking in is a light processor-local operation. Committing a checkpoint
- * is a heavier global one, and is performed less often, normally during the
- * system timer interrupt.
+ * Interrupts and preemption must be disabled when calling this function.
  */
-void llsync_commit_checkpoint(unsigned int cpu);
+static inline void
+llsync_report_context_switch(void)
+{
+    llsync_checkin();
+}
 
 /*
- * Reset the checkpoint pending state of a processor.
+ * Report a periodic event on the current processor.
  *
- * Called from interrupt context.
+ * Interrupts and preemption must be disabled when calling this function.
  */
-void llsync_reset_checkpoint(unsigned int cpu);
+void llsync_report_periodic_event(void);
 
 /*
  * Defer an operation until all existing read-side references are dropped,
diff --git a/kern/llsync_i.h b/kern/llsync_i.h
index 5dae472..f14c9c8 100644
--- a/kern/llsync_i.h
+++ b/kern/llsync_i.h
@@ -18,21 +18,98 @@
 #ifndef _KERN_LLSYNC_I_H
 #define _KERN_LLSYNC_I_H
 
+#include <kern/assert.h>
+#include <kern/cpumap.h>
 #include <kern/evcnt.h>
+#include <kern/macros.h>
 #include <kern/param.h>
+#include <kern/spinlock.h>
+#include <kern/work.h>
+#include <machine/cpu.h>
+
+/*
+ * Global data.
+ *
+ * The queue number matches the number of global checkpoints that occurred
+ * since works contained in it were added. After two global checkpoints,
+ * works are scheduled for processing.
+ *
+ * Interrupts must be disabled when acquiring the global data lock.
+ */
+struct llsync_data {
+    struct spinlock lock;
+    struct cpumap registered_cpus;
+    unsigned int nr_registered_cpus;
+    struct cpumap pending_checkpoints;
+    unsigned int nr_pending_checkpoints;
+    struct work_queue queue0;
+    struct work_queue queue1;
+    unsigned long nr_pending_works;
+    struct evcnt ev_global_checkpoint;
+    struct evcnt ev_periodic_checkin;
+    struct evcnt ev_failed_periodic_checkin;
+
+    /*
+     * Global checkpoint ID.
+     *
+     * This variable can be frequently accessed from many processors so :
+     *  - reserve a whole cache line for it
+     *  - apply optimistic accesses to reduce contention
+     */
+    struct {
+        volatile unsigned int value __aligned(CPU_L1_SIZE);
+    } gcid;
+};
+
+extern struct llsync_data llsync_data;
 
 /*
  * Per-processor data.
  *
- * Interrupts must be disabled on access.
+ * Every processor records whether it is registered and a local copy of the
+ * global checkpoint ID, which is meaningless on unregistered processors.
+ * The true global checkpoint ID is incremented when a global checkpoint occurs,
+ * after which all the local copies become stale. Checking in synchronizes
+ * the local copy of the global checkpoint ID.
+ *
+ * Interrupts and preemption must be disabled on access.
  */
-struct llsync_cpu {
+struct llsync_cpu_data {
     int registered;
-    int checked;
-    struct evcnt ev_reset;
-    struct evcnt ev_spurious_reset;
+    unsigned int gcid;
 } __aligned(CPU_L1_SIZE);
 
-extern struct llsync_cpu llsync_cpus[MAX_CPUS];
+extern struct llsync_cpu_data llsync_cpu_data[MAX_CPUS];
+
+static inline struct llsync_cpu_data *
+llsync_get_cpu_data(unsigned int cpu)
+{
+    return &llsync_cpu_data[cpu];
+}
+
+static inline void
+llsync_checkin(void)
+{
+    struct llsync_cpu_data *cpu_data;
+    unsigned int cpu, gcid;
+
+    assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
+
+    cpu = cpu_id();
+    cpu_data = llsync_get_cpu_data(cpu);
+
+    if (!cpu_data->registered)
+        return;
+
+    /*
+     * The global checkpoint ID obtained might be obsolete here, in which
+     * case a commit will not determine that a checkpoint actually occurred.
+     * This should seldom happen.
+     */
+    gcid = llsync_data.gcid.value;
+    assert((gcid - cpu_data->gcid) <= 1);
+    cpu_data->gcid = gcid;
+}
 
 #endif /* _KERN_LLSYNC_I_H */
diff --git a/kern/thread.c b/kern/thread.c
index 60f5790..9ed55d3 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -482,7 +482,7 @@ thread_runq_schedule(struct thread_runq *runq, struct thread *prev)
     assert(!cpu_intr_enabled());
     spinlock_assert_locked(&runq->lock);
 
-    llsync_checkin(thread_runq_id(runq));
+    llsync_report_context_switch();
 
     thread_clear_flag(prev, THREAD_YIELD);
     thread_runq_put_prev(runq, prev);
@@ -1463,6 +1463,7 @@ thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
     thread->state = THREAD_SLEEPING;
     thread->preempt = 2;
     thread->pinned = 0;
+    thread->llsync_read = 0;
     thread->sched_policy = attr->policy;
     thread->sched_class = thread_policy_table[attr->policy];
     cpumap_copy(&thread->cpumap, cpumap);
@@ -1679,14 +1680,14 @@ static void
 thread_idle(void *arg)
 {
     struct thread *self;
-    unsigned int cpu;
+
+    (void)arg;
 
     self = thread_self();
-    cpu = thread_runq_id(arg);
 
     for (;;) {
         thread_preempt_disable();
-        llsync_unregister_cpu(cpu);
+        llsync_unregister();
 
         for (;;) {
             cpu_intr_disable();
@@ -1699,7 +1700,7 @@ thread_idle(void *arg)
             cpu_idle();
         }
 
-        llsync_register_cpu(cpu);
+        llsync_register();
         thread_preempt_enable();
     }
 }
@@ -1735,7 +1736,7 @@ thread_setup_idler(struct thread_runq *runq)
     thread_attr_init(&attr, name);
     thread_attr_set_cpumap(&attr, cpumap);
     thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_IDLE);
-    error = thread_init(idler, stack, &attr, thread_idle, runq);
+    error = thread_init(idler, stack, &attr, thread_idle, NULL);
 
     if (error)
         panic("thread: unable to initialize idler thread");
@@ -1946,7 +1947,7 @@ thread_run_scheduler(void)
     assert(!cpu_intr_enabled());
 
     runq = thread_runq_local();
-    llsync_register_cpu(thread_runq_id(runq));
+    llsync_register();
     thread = thread_self();
     assert(thread == runq->current);
     assert(thread->preempt == 1);
@@ -2003,7 +2004,7 @@ thread_tick_intr(void)
 
     runq = thread_runq_local();
     evcnt_inc(&runq->ev_tick);
-    llsync_commit_checkpoint(thread_runq_id(runq));
+    llsync_report_periodic_event();
     thread = thread_self();
 
     spinlock_lock(&runq->lock);
diff --git a/kern/thread.h b/kern/thread.h
index e570cd2..01d59e5 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -154,6 +154,7 @@ struct thread {
     /* Thread-local members */
     unsigned short preempt;
     unsigned short pinned;
+    unsigned short llsync_read;
 
     /* Common scheduling properties */
     unsigned char sched_policy;
@@ -448,6 +449,40 @@ thread_preempt_disable(void)
 }
 
 /*
+ * Lockless synchronization read-side critical section nesting counter
+ * control functions.
+ */
+
+static inline int
+thread_llsync_in_read_cs(void)
+{
+    struct thread *thread;
+
+    thread = thread_self();
+    return (thread->llsync_read != 0);
+}
+
+static inline void
+thread_llsync_read_inc(void)
+{
+    struct thread *thread;
+
+    thread = thread_self();
+    thread->llsync_read++;
+    assert(thread->llsync_read != 0);
+}
+
+static inline void
+thread_llsync_read_dec(void)
+{
+    struct thread *thread;
+
+    thread = thread_self();
+    assert(thread->llsync_read != 0);
+    thread->llsync_read--;
+}
+
+/*
  * Type for thread-specific data destructor.
  */
 typedef void (*thread_dtor_fn_t)(void *);
author	Richard Braun <rbraun@sceen.net>	2014-06-10 21:14:51 +0200
committer	Richard Braun <rbraun@sceen.net>	2014-06-10 21:14:51 +0200
commit	73a935a3e8f12447d455bcf4a1a01c51907a53a0 (patch)
tree	7fbde09a59bcb4f2193a905e91ef9b805dd49746
parent	f0e77fb79581c9227f758ad014a3c2778ae9d2f5 (diff)