summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/machine/cpu.c11
-rw-r--r--arch/x86/machine/cpu.h14
-rw-r--r--arch/x86/machine/trap.c3
-rw-r--r--arch/x86/machine/trap.h1
-rw-r--r--arch/x86/machine/trap_asm.S1
-rw-r--r--kern/llsync.c242
-rw-r--r--kern/llsync.h124
-rw-r--r--kern/llsync_i.h89
-rw-r--r--kern/thread.c17
-rw-r--r--kern/thread.h35
10 files changed, 324 insertions, 213 deletions
diff --git a/arch/x86/machine/cpu.c b/arch/x86/machine/cpu.c
index 8d680d8f..18686a38 100644
--- a/arch/x86/machine/cpu.c
+++ b/arch/x86/machine/cpu.c
@@ -17,7 +17,6 @@
#include <kern/assert.h>
#include <kern/init.h>
-#include <kern/llsync.h>
#include <kern/macros.h>
#include <kern/panic.h>
#include <kern/param.h>
@@ -608,13 +607,3 @@ cpu_thread_schedule_intr(struct trap_frame *frame)
thread_schedule_intr();
}
-
-void
-cpu_llsync_reset_intr(struct trap_frame *frame)
-{
- (void)frame;
-
- lapic_eoi();
-
- llsync_reset_checkpoint(cpu_id());
-}
diff --git a/arch/x86/machine/cpu.h b/arch/x86/machine/cpu.h
index f1e9cfb6..07dd98db 100644
--- a/arch/x86/machine/cpu.h
+++ b/arch/x86/machine/cpu.h
@@ -658,20 +658,6 @@ cpu_send_thread_schedule(unsigned int cpu)
*/
void cpu_thread_schedule_intr(struct trap_frame *frame);
-/*
- * Request a remote processor to reset its checkpoint.
- */
-static inline void
-cpu_send_llsync_reset(unsigned int cpu)
-{
- lapic_ipi_send(cpu_from_id(cpu)->apic_id, TRAP_LLSYNC_RESET);
-}
-
-/*
- * Interrupt handler for checkpoint reset requests.
- */
-void cpu_llsync_reset_intr(struct trap_frame *frame);
-
#endif /* __ASSEMBLER__ */
#endif /* _X86_CPU_H */
diff --git a/arch/x86/machine/trap.c b/arch/x86/machine/trap.c
index ad586ea9..8e350f00 100644
--- a/arch/x86/machine/trap.c
+++ b/arch/x86/machine/trap.c
@@ -75,7 +75,6 @@ void trap_isr_machine_check(void);
void trap_isr_simd_fp_exception(void);
void trap_isr_pic_int7(void);
void trap_isr_pic_int15(void);
-void trap_isr_llsync_reset(void);
void trap_isr_thread_schedule(void);
void trap_isr_cpu_halt(void);
void trap_isr_lapic_timer(void);
@@ -202,8 +201,6 @@ trap_setup(void)
trap_isr_pic_int15, pic_intr_spurious);
/* System defined traps */
- trap_install(TRAP_LLSYNC_RESET, TRAP_HF_NOPREEMPT,
- trap_isr_llsync_reset, cpu_llsync_reset_intr);
trap_install(TRAP_THREAD_SCHEDULE, TRAP_HF_NOPREEMPT,
trap_isr_thread_schedule, cpu_thread_schedule_intr);
trap_install(TRAP_CPU_HALT, TRAP_HF_NOPREEMPT,
diff --git a/arch/x86/machine/trap.h b/arch/x86/machine/trap.h
index bc0426d0..99e23896 100644
--- a/arch/x86/machine/trap.h
+++ b/arch/x86/machine/trap.h
@@ -53,7 +53,6 @@
*
* The local APIC assigns one priority every 16 vectors.
*/
-#define TRAP_LLSYNC_RESET 238
#define TRAP_THREAD_SCHEDULE 239
#define TRAP_CPU_HALT 240
#define TRAP_LAPIC_TIMER 253
diff --git a/arch/x86/machine/trap_asm.S b/arch/x86/machine/trap_asm.S
index 7d06d9dd..bb70f46e 100644
--- a/arch/x86/machine/trap_asm.S
+++ b/arch/x86/machine/trap_asm.S
@@ -157,7 +157,6 @@ TRAP(TRAP_PIC_BASE + 7, trap_isr_pic_int7)
TRAP(TRAP_PIC_BASE + 15, trap_isr_pic_int15)
/* System defined traps */
-TRAP(TRAP_LLSYNC_RESET, trap_isr_llsync_reset)
TRAP(TRAP_THREAD_SCHEDULE, trap_isr_thread_schedule)
TRAP(TRAP_CPU_HALT, trap_isr_cpu_halt)
TRAP(TRAP_LAPIC_TIMER, trap_isr_lapic_timer)
diff --git a/kern/llsync.c b/kern/llsync.c
index ba99a9b0..7c3e1c69 100644
--- a/kern/llsync.c
+++ b/kern/llsync.c
@@ -50,50 +50,20 @@
#include <kern/work.h>
#include <machine/cpu.h>
-#define LLSYNC_NR_PENDING_WORKS_WARN 10000
-
-struct llsync_cpu llsync_cpus[MAX_CPUS];
-
-/*
- * Global lock protecting the remaining module data.
- *
- * Interrupts must be disabled when acquiring this lock.
- */
-static struct spinlock llsync_lock;
-
-/*
- * Map of processors regularly checking in.
- */
-static struct cpumap llsync_registered_cpus;
-static unsigned int llsync_nr_registered_cpus;
-
/*
- * Map of processors for which a checkpoint commit is pending.
+ * Initial global checkpoint ID.
*
- * To reduce contention, checking in only affects a single per-processor
- * cache line. Special events (currently the system timer interrupt only)
- * trigger checkpoint commits, which report the local state to this CPU
- * map, thereby acquiring the global lock.
+ * Set to a high value to make sure overflows are correctly handled.
*/
-static struct cpumap llsync_pending_checkpoints;
-static unsigned int llsync_nr_pending_checkpoints;
+#define LLSYNC_INITIAL_GCID ((unsigned int)-10)
/*
- * Queues of deferred works.
- *
- * The queue number matches the number of global checkpoints that occurred
- * since works contained in it were added. After two global checkpoints,
- * works are scheduled for processing.
+ * Number of pending works beyond which to issue a warning.
*/
-static struct work_queue llsync_queue0;
-static struct work_queue llsync_queue1;
+#define LLSYNC_NR_PENDING_WORKS_WARN 10000
-/*
- * Number of works not yet scheduled for processing.
- *
- * Mostly unused, except for debugging.
- */
-static unsigned long llsync_nr_pending_works;
+struct llsync_data llsync_data;
+struct llsync_cpu_data llsync_cpu_data[MAX_CPUS];
struct llsync_waiter {
struct work work;
@@ -105,161 +75,165 @@ struct llsync_waiter {
void __init
llsync_setup(void)
{
- char name[EVCNT_NAME_SIZE];
- unsigned int cpu;
-
- spinlock_init(&llsync_lock);
- work_queue_init(&llsync_queue0);
- work_queue_init(&llsync_queue1);
-
- for (cpu = 0; cpu < cpu_count(); cpu++) {
- snprintf(name, sizeof(name), "llsync_reset/%u", cpu);
- evcnt_register(&llsync_cpus[cpu].ev_reset, name);
- snprintf(name, sizeof(name), "llsync_spurious_reset/%u", cpu);
- evcnt_register(&llsync_cpus[cpu].ev_spurious_reset, name);
- }
-}
-
-static void
-llsync_reset_checkpoint_common(unsigned int cpu)
-{
- assert(!cpumap_test(&llsync_pending_checkpoints, cpu));
- cpumap_set(&llsync_pending_checkpoints, cpu);
- llsync_cpus[cpu].checked = 0;
+ spinlock_init(&llsync_data.lock);
+ work_queue_init(&llsync_data.queue0);
+ work_queue_init(&llsync_data.queue1);
+ evcnt_register(&llsync_data.ev_global_checkpoint,
+ "llsync_global_checkpoint");
+ evcnt_register(&llsync_data.ev_periodic_checkin,
+ "llsync_periodic_checkin");
+ evcnt_register(&llsync_data.ev_failed_periodic_checkin,
+ "llsync_failed_periodic_checkin");
+ llsync_data.gcid.value = LLSYNC_INITIAL_GCID;
}
static void
-llsync_process_global_checkpoint(unsigned int cpu)
+llsync_process_global_checkpoint(void)
{
struct work_queue queue;
unsigned int nr_works;
- int i;
- if (llsync_nr_registered_cpus == 0) {
- work_queue_concat(&llsync_queue1, &llsync_queue0);
- work_queue_init(&llsync_queue0);
- }
+ assert(cpumap_find_first(&llsync_data.pending_checkpoints) == -1);
+ assert(llsync_data.nr_pending_checkpoints == 0);
- work_queue_transfer(&queue, &llsync_queue1);
- work_queue_transfer(&llsync_queue1, &llsync_queue0);
- work_queue_init(&llsync_queue0);
-
- llsync_nr_pending_checkpoints = llsync_nr_registered_cpus;
-
- if (llsync_cpus[cpu].registered)
- llsync_reset_checkpoint_common(cpu);
-
- cpumap_for_each(&llsync_registered_cpus, i)
- if ((unsigned int)i != cpu)
- cpu_send_llsync_reset(i);
+ if (llsync_data.nr_registered_cpus == 0) {
+ work_queue_concat(&llsync_data.queue1, &llsync_data.queue0);
+ work_queue_init(&llsync_data.queue0);
+ } else {
+ cpumap_copy(&llsync_data.pending_checkpoints, &llsync_data.registered_cpus);
+ llsync_data.nr_pending_checkpoints = llsync_data.nr_registered_cpus;
+ }
+ work_queue_transfer(&queue, &llsync_data.queue1);
+ work_queue_transfer(&llsync_data.queue1, &llsync_data.queue0);
+ work_queue_init(&llsync_data.queue0);
nr_works = work_queue_nr_works(&queue);
if (nr_works != 0) {
- llsync_nr_pending_works -= nr_works;
+ llsync_data.nr_pending_works -= nr_works;
work_queue_schedule(&queue, 0);
}
+
+ llsync_data.gcid.value++;
+ evcnt_inc(&llsync_data.ev_global_checkpoint);
}
static void
-llsync_commit_checkpoint_common(unsigned int cpu)
+llsync_commit_checkpoint(unsigned int cpu)
{
int pending;
- pending = cpumap_test(&llsync_pending_checkpoints, cpu);
+ pending = cpumap_test(&llsync_data.pending_checkpoints, cpu);
if (!pending)
return;
- cpumap_clear(&llsync_pending_checkpoints, cpu);
- llsync_nr_pending_checkpoints--;
+ cpumap_clear(&llsync_data.pending_checkpoints, cpu);
+ llsync_data.nr_pending_checkpoints--;
- if (llsync_nr_pending_checkpoints == 0)
- llsync_process_global_checkpoint(cpu);
+ if (llsync_data.nr_pending_checkpoints == 0)
+ llsync_process_global_checkpoint();
}
void
-llsync_register_cpu(unsigned int cpu)
+llsync_register(void)
{
+ struct llsync_cpu_data *cpu_data;
unsigned long flags;
+ unsigned int cpu;
+
+ cpu = cpu_id();
+ cpu_data = llsync_get_cpu_data(cpu);
- spinlock_lock_intr_save(&llsync_lock, &flags);
+ spinlock_lock_intr_save(&llsync_data.lock, &flags);
- assert(!llsync_cpus[cpu].registered);
- llsync_cpus[cpu].registered = 1;
+ assert(!cpu_data->registered);
+ cpu_data->registered = 1;
+ cpu_data->gcid = llsync_data.gcid.value;
- assert(!cpumap_test(&llsync_registered_cpus, cpu));
- cpumap_set(&llsync_registered_cpus, cpu);
- llsync_nr_registered_cpus++;
+ assert(!cpumap_test(&llsync_data.registered_cpus, cpu));
+ cpumap_set(&llsync_data.registered_cpus, cpu);
+ llsync_data.nr_registered_cpus++;
- assert(!cpumap_test(&llsync_pending_checkpoints, cpu));
+ assert(!cpumap_test(&llsync_data.pending_checkpoints, cpu));
- if ((llsync_nr_registered_cpus == 1)
- && (llsync_nr_pending_checkpoints == 0))
- llsync_process_global_checkpoint(cpu);
+ if ((llsync_data.nr_registered_cpus == 1)
+ && (llsync_data.nr_pending_checkpoints == 0))
+ llsync_process_global_checkpoint();
- spinlock_unlock_intr_restore(&llsync_lock, flags);
+ spinlock_unlock_intr_restore(&llsync_data.lock, flags);
}
void
-llsync_unregister_cpu(unsigned int cpu)
+llsync_unregister(void)
{
+ struct llsync_cpu_data *cpu_data;
unsigned long flags;
+ unsigned int cpu;
- spinlock_lock_intr_save(&llsync_lock, &flags);
+ cpu = cpu_id();
+ cpu_data = llsync_get_cpu_data(cpu);
- assert(llsync_cpus[cpu].registered);
- llsync_cpus[cpu].registered = 0;
+ spinlock_lock_intr_save(&llsync_data.lock, &flags);
- assert(cpumap_test(&llsync_registered_cpus, cpu));
- cpumap_clear(&llsync_registered_cpus, cpu);
- llsync_nr_registered_cpus--;
+ assert(cpu_data->registered);
+ cpu_data->registered = 0;
+
+ assert(cpumap_test(&llsync_data.registered_cpus, cpu));
+ cpumap_clear(&llsync_data.registered_cpus, cpu);
+ llsync_data.nr_registered_cpus--;
/*
* Processor registration qualifies as a checkpoint. Since unregistering
* a processor also disables commits until it's registered again, perform
* one now.
*/
- llsync_commit_checkpoint_common(cpu);
+ llsync_commit_checkpoint(cpu);
- spinlock_unlock_intr_restore(&llsync_lock, flags);
+ spinlock_unlock_intr_restore(&llsync_data.lock, flags);
}
void
-llsync_reset_checkpoint(unsigned int cpu)
+llsync_report_periodic_event(void)
{
+ struct llsync_cpu_data *cpu_data;
+ unsigned int cpu, gcid;
+
assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
- spinlock_lock(&llsync_lock);
+ cpu = cpu_id();
+ cpu_data = llsync_get_cpu_data(cpu);
- evcnt_inc(&llsync_cpus[cpu].ev_reset);
- llsync_reset_checkpoint_common(cpu);
+ if (!cpu_data->registered)
+ return;
+
+ spinlock_lock(&llsync_data.lock);
+
+ gcid = llsync_data.gcid.value;
+ assert((gcid - cpu_data->gcid) <= 1);
/*
- * It may happen that this processor was registered at the time a global
- * checkpoint occurred, but unregistered itself before receiving the reset
- * interrupt. In this case, behave as if the reset request was received
- * before unregistering by immediately committing the local checkpoint.
+ * If the local copy of the global checkpoint ID matches the true
+ * value, the current processor has checked in.
+ *
+ * Otherwise, there were no checkpoint since the last global checkpoint.
+ * Check whether this periodic event occurred during a read-side critical
+ * section, and if not, trigger a checkpoint.
*/
- if (!llsync_cpus[cpu].registered) {
- evcnt_inc(&llsync_cpus[cpu].ev_spurious_reset);
- llsync_commit_checkpoint_common(cpu);
+ if (cpu_data->gcid == gcid)
+ llsync_commit_checkpoint(cpu);
+ else {
+ if (thread_llsync_in_read_cs())
+ evcnt_inc(&llsync_data.ev_failed_periodic_checkin);
+ else {
+ cpu_data->gcid = gcid;
+ evcnt_inc(&llsync_data.ev_periodic_checkin);
+ llsync_commit_checkpoint(cpu);
+ }
}
- spinlock_unlock(&llsync_lock);
-}
-
-void
-llsync_commit_checkpoint(unsigned int cpu)
-{
- assert(!cpu_intr_enabled());
-
- if (!(llsync_cpus[cpu].registered && llsync_cpus[cpu].checked))
- return;
-
- spinlock_lock(&llsync_lock);
- llsync_commit_checkpoint_common(cpu);
- spinlock_unlock(&llsync_lock);
+ spinlock_unlock(&llsync_data.lock);
}
void
@@ -267,15 +241,15 @@ llsync_defer(struct work *work)
{
unsigned long flags;
- spinlock_lock_intr_save(&llsync_lock, &flags);
+ spinlock_lock_intr_save(&llsync_data.lock, &flags);
- work_queue_push(&llsync_queue0, work);
- llsync_nr_pending_works++;
+ work_queue_push(&llsync_data.queue0, work);
+ llsync_data.nr_pending_works++;
- if (llsync_nr_pending_works == LLSYNC_NR_PENDING_WORKS_WARN)
+ if (llsync_data.nr_pending_works == LLSYNC_NR_PENDING_WORKS_WARN)
printk("llsync: warning: large number of pending works\n");
- spinlock_unlock_intr_restore(&llsync_lock, flags);
+ spinlock_unlock_intr_restore(&llsync_data.lock, flags);
}
static void
diff --git a/kern/llsync.h b/kern/llsync.h
index 1919b62a..0d7438bb 100644
--- a/kern/llsync.h
+++ b/kern/llsync.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2013 Richard Braun.
+ * Copyright (c) 2013-2014 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -16,6 +16,55 @@
*
*
* Lockless synchronization.
+ *
+ * The llsync module provides services similar to RCU (Read-Copy Update).
+ * As such, it can be thought of as an efficient reader-writer lock
+ * replacement. It is efficient because read-side critical sections
+ * don't use expensive synchronization mechanisms such as locks or atomic
+ * instructions. Lockless synchronization is therefore best used for
+ * read-mostly objects. Updating still requires conventional lock-based
+ * synchronization.
+ *
+ * The basic idea is that read-side critical sections are assumed to hold
+ * read-side references, and objects for which there may be read-side
+ * references must exist as long as such references may be held. The llsync
+ * module tracks special system events to determine when read-side references
+ * can no longer exist.
+ *
+ * Since read-side critical sections can run concurrently with updates,
+ * it is important to make sure that objects are consistent when being
+ * accessed. This is achieved with a publish/subscribe mechanism that relies
+ * on the natural atomicity of machine word updates in memory, i.e. all
+ * supported architectures must guarantee that, when updating a word, and
+ * in turn a pointer, other processors reading that word obtain a valid
+ * value, that is either the previous or the next value of the word, but not
+ * a mixed-up value. The llsync module provides the llsync_assign_ptr() and
+ * llsync_read_ptr() wrappers that take care of low level details such as
+ * compiler and memory barriers, so that objects are completely built and
+ * consistent when published and accessed.
+ *
+ * As objects are published through pointers, multiple versions can exist at
+ * the same time. Previous versions cannot be deleted as long as read-side
+ * references may exist. Operations that must wait for all read-side references
+ * to be dropped can be either synchronous, i.e. block until it is safe to
+ * proceed, or be deferred, in which case they are queued and later handed to
+ * the work module. As a result, special care must be taken if using lockless
+ * synchronization in the work module itself.
+ *
+ * The two system events tracked by the llsync module are context switches
+ * and a periodic event, normally the periodic timer interrupt that drives
+ * the scheduler. Context switches are used as checkpoint triggers. A
+ * checkpoint is a point in execution at which no read-side reference can
+ * exist, i.e. the processor isn't running any read-side critical section.
+ * Since context switches can be very frequent, a checkpoint is local to
+ * the processor and lightweight. The periodic event is used to commit
+ * checkpoints globally so that other processors are aware of the progress
+ * of one another. As the system allows situations in which two periodic
+ * events can occur without a single context switch, the periodic event is
+ * also used as a checkpoint trigger. When all checkpoints have been
+ * committed, a global checkpoint occurs. The occurrence of global checkpoints
+ * allows the llsync module to determine when it is safe to process deferred
+ * work or unblock update sides.
*/
#ifndef _KERN_LLSYNC_H
@@ -30,10 +79,6 @@
/*
* Safely assign a pointer.
- *
- * This macro enforces memory ordering. It should be used to reference
- * objects once they're completely built, so that readers accessing the
- * pointer obtain consistent data.
*/
#define llsync_assign_ptr(ptr, value) \
MACRO_BEGIN \
@@ -48,27 +93,31 @@ MACRO_END
*/
#define llsync_read_ptr(ptr) (ptr)
+/*
+ * Read-side critical section enter/exit functions.
+ *
+ * It is not allowed to block inside a read-side critical section.
+ */
+
static inline void
llsync_read_enter(void)
{
- thread_preempt_disable();
+ int in_read_cs;
+
+ in_read_cs = thread_llsync_in_read_cs();
+ thread_llsync_read_inc();
+
+ if (!in_read_cs)
+ thread_preempt_disable();
}
static inline void
llsync_read_exit(void)
{
- thread_preempt_enable();
-}
+ thread_llsync_read_dec();
-/*
- * Report that a processor has reached a checkpoint.
- *
- * Called during context switch.
- */
-static inline void
-llsync_checkin(unsigned int cpu)
-{
- llsync_cpus[cpu].checked = 1;
+ if (!thread_llsync_in_read_cs())
+ thread_preempt_enable();
}
/*
@@ -77,34 +126,39 @@ llsync_checkin(unsigned int cpu)
void llsync_setup(void);
/*
- * Report that a processor will be regularly checking in.
+ * Manage registration of the current processor.
*
- * Registered processors perform checkpoint commits and receive checkpoint
- * reset interrupts.
- */
-void llsync_register_cpu(unsigned int cpu);
-
-/*
- * Report that a processor has entered a state in which checking in becomes
- * irrelevant (e.g. the idle loop).
+ * The caller must not be allowed to migrate when calling these functions.
+ *
+ * Registering tells the llsync module that the current processor reports
+ * context switches and periodic events.
+ *
+ * When a processor enters a state in which checking in becomes irrelevant,
+ * it unregisters itself so that the other registered processors don't need
+ * to wait for it to make progress. For example, this is done inside the
+ * idle loop since it is obviously impossible to enter a read-side critical
+ * section while idling.
*/
-void llsync_unregister_cpu(unsigned int cpu);
+void llsync_register(void);
+void llsync_unregister(void);
/*
- * Commit a pending checkpoint.
+ * Report a context switch on the current processor.
*
- * Checking in is a light processor-local operation. Committing a checkpoint
- * is a heavier global one, and is performed less often, normally during the
- * system timer interrupt.
+ * Interrupts and preemption must be disabled when calling this function.
*/
-void llsync_commit_checkpoint(unsigned int cpu);
+static inline void
+llsync_report_context_switch(void)
+{
+ llsync_checkin();
+}
/*
- * Reset the checkpoint pending state of a processor.
+ * Report a periodic event on the current processor.
*
- * Called from interrupt context.
+ * Interrupts and preemption must be disabled when calling this function.
*/
-void llsync_reset_checkpoint(unsigned int cpu);
+void llsync_report_periodic_event(void);
/*
* Defer an operation until all existing read-side references are dropped,
diff --git a/kern/llsync_i.h b/kern/llsync_i.h
index 5dae472e..f14c9c8d 100644
--- a/kern/llsync_i.h
+++ b/kern/llsync_i.h
@@ -18,21 +18,98 @@
#ifndef _KERN_LLSYNC_I_H
#define _KERN_LLSYNC_I_H
+#include <kern/assert.h>
+#include <kern/cpumap.h>
#include <kern/evcnt.h>
+#include <kern/macros.h>
#include <kern/param.h>
+#include <kern/spinlock.h>
+#include <kern/work.h>
+#include <machine/cpu.h>
+
+/*
+ * Global data.
+ *
+ * The queue number matches the number of global checkpoints that occurred
+ * since works contained in it were added. After two global checkpoints,
+ * works are scheduled for processing.
+ *
+ * Interrupts must be disabled when acquiring the global data lock.
+ */
+struct llsync_data {
+ struct spinlock lock;
+ struct cpumap registered_cpus;
+ unsigned int nr_registered_cpus;
+ struct cpumap pending_checkpoints;
+ unsigned int nr_pending_checkpoints;
+ struct work_queue queue0;
+ struct work_queue queue1;
+ unsigned long nr_pending_works;
+ struct evcnt ev_global_checkpoint;
+ struct evcnt ev_periodic_checkin;
+ struct evcnt ev_failed_periodic_checkin;
+
+ /*
+ * Global checkpoint ID.
+ *
+ * This variable can be frequently accessed from many processors so :
+ * - reserve a whole cache line for it
+ * - apply optimistic accesses to reduce contention
+ */
+ struct {
+ volatile unsigned int value __aligned(CPU_L1_SIZE);
+ } gcid;
+};
+
+extern struct llsync_data llsync_data;
/*
* Per-processor data.
*
- * Interrupts must be disabled on access.
+ * Every processor records whether it is registered and a local copy of the
+ * global checkpoint ID, which is meaningless on unregistered processors.
+ * The true global checkpoint ID is incremented when a global checkpoint occurs,
+ * after which all the local copies become stale. Checking in synchronizes
+ * the local copy of the global checkpoint ID.
+ *
+ * Interrupts and preemption must be disabled on access.
*/
-struct llsync_cpu {
+struct llsync_cpu_data {
int registered;
- int checked;
- struct evcnt ev_reset;
- struct evcnt ev_spurious_reset;
+ unsigned int gcid;
} __aligned(CPU_L1_SIZE);
-extern struct llsync_cpu llsync_cpus[MAX_CPUS];
+extern struct llsync_cpu_data llsync_cpu_data[MAX_CPUS];
+
+static inline struct llsync_cpu_data *
+llsync_get_cpu_data(unsigned int cpu)
+{
+ return &llsync_cpu_data[cpu];
+}
+
+static inline void
+llsync_checkin(void)
+{
+ struct llsync_cpu_data *cpu_data;
+ unsigned int cpu, gcid;
+
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ cpu = cpu_id();
+ cpu_data = llsync_get_cpu_data(cpu);
+
+ if (!cpu_data->registered)
+ return;
+
+ /*
+ * The global checkpoint ID obtained might be obsolete here, in which
+ * case a commit will not determine that a checkpoint actually occurred.
+ * This should seldom happen.
+ */
+ gcid = llsync_data.gcid.value;
+ assert((gcid - cpu_data->gcid) <= 1);
+ cpu_data->gcid = gcid;
+}
#endif /* _KERN_LLSYNC_I_H */
diff --git a/kern/thread.c b/kern/thread.c
index 60f57904..9ed55d3c 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -482,7 +482,7 @@ thread_runq_schedule(struct thread_runq *runq, struct thread *prev)
assert(!cpu_intr_enabled());
spinlock_assert_locked(&runq->lock);
- llsync_checkin(thread_runq_id(runq));
+ llsync_report_context_switch();
thread_clear_flag(prev, THREAD_YIELD);
thread_runq_put_prev(runq, prev);
@@ -1463,6 +1463,7 @@ thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
thread->state = THREAD_SLEEPING;
thread->preempt = 2;
thread->pinned = 0;
+ thread->llsync_read = 0;
thread->sched_policy = attr->policy;
thread->sched_class = thread_policy_table[attr->policy];
cpumap_copy(&thread->cpumap, cpumap);
@@ -1679,14 +1680,14 @@ static void
thread_idle(void *arg)
{
struct thread *self;
- unsigned int cpu;
+
+ (void)arg;
self = thread_self();
- cpu = thread_runq_id(arg);
for (;;) {
thread_preempt_disable();
- llsync_unregister_cpu(cpu);
+ llsync_unregister();
for (;;) {
cpu_intr_disable();
@@ -1699,7 +1700,7 @@ thread_idle(void *arg)
cpu_idle();
}
- llsync_register_cpu(cpu);
+ llsync_register();
thread_preempt_enable();
}
}
@@ -1735,7 +1736,7 @@ thread_setup_idler(struct thread_runq *runq)
thread_attr_init(&attr, name);
thread_attr_set_cpumap(&attr, cpumap);
thread_attr_set_policy(&attr, THREAD_SCHED_POLICY_IDLE);
- error = thread_init(idler, stack, &attr, thread_idle, runq);
+ error = thread_init(idler, stack, &attr, thread_idle, NULL);
if (error)
panic("thread: unable to initialize idler thread");
@@ -1946,7 +1947,7 @@ thread_run_scheduler(void)
assert(!cpu_intr_enabled());
runq = thread_runq_local();
- llsync_register_cpu(thread_runq_id(runq));
+ llsync_register();
thread = thread_self();
assert(thread == runq->current);
assert(thread->preempt == 1);
@@ -2003,7 +2004,7 @@ thread_tick_intr(void)
runq = thread_runq_local();
evcnt_inc(&runq->ev_tick);
- llsync_commit_checkpoint(thread_runq_id(runq));
+ llsync_report_periodic_event();
thread = thread_self();
spinlock_lock(&runq->lock);
diff --git a/kern/thread.h b/kern/thread.h
index e570cd29..01d59e5f 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -154,6 +154,7 @@ struct thread {
/* Thread-local members */
unsigned short preempt;
unsigned short pinned;
+ unsigned short llsync_read;
/* Common scheduling properties */
unsigned char sched_policy;
@@ -448,6 +449,40 @@ thread_preempt_disable(void)
}
/*
+ * Lockless synchronization read-side critical section nesting counter
+ * control functions.
+ */
+
+static inline int
+thread_llsync_in_read_cs(void)
+{
+ struct thread *thread;
+
+ thread = thread_self();
+ return (thread->llsync_read != 0);
+}
+
+static inline void
+thread_llsync_read_inc(void)
+{
+ struct thread *thread;
+
+ thread = thread_self();
+ thread->llsync_read++;
+ assert(thread->llsync_read != 0);
+}
+
+static inline void
+thread_llsync_read_dec(void)
+{
+ struct thread *thread;
+
+ thread = thread_self();
+ assert(thread->llsync_read != 0);
+ thread->llsync_read--;
+}
+
+/*
* Type for thread-specific data destructor.
*/
typedef void (*thread_dtor_fn_t)(void *);