diff options
author | Richard Braun <rbraun@sceen.net> | 2013-05-24 20:24:10 +0200 |
---|---|---|
committer | Richard Braun <rbraun@sceen.net> | 2013-05-24 20:25:43 +0200 |
commit | 2587e7386bf87c79da9dd804e082079fbe16f0a6 (patch) | |
tree | 1d7b5744db8e96913a1b5a2016c9fd987688569f | |
parent | 47713dee27ac3fee0d8a6dee1c104fa48b88c299 (diff) |
kern/llsync: fix checkpoint reset interrupt handling
This change makes reset requests write both the per-processor flag as well as
the checkpoint bitmap atomically, and adjusts the module logic accordingly.
It fixes a race between checkpoint reset and system timer interrupts where
the timer interrupt would make the local processor commit its checkpoint
although it can't be reliably determined that it reached a checkpoint since
the last global checkpoint, because the reset interrupt wasn't received yet.
This problem would rarely happen on real hardware because of the near-instant
handling of IPIs, but it was observed on virtual machines.
-rw-r--r-- | kern/llsync.c | 53 | ||||
-rw-r--r-- | kern/llsync.h | 21 |
2 files changed, 55 insertions, 19 deletions
diff --git a/kern/llsync.c b/kern/llsync.c index aa5ac346..f6862da7 100644 --- a/kern/llsync.c +++ b/kern/llsync.c @@ -172,14 +172,18 @@ llsync_wakeup_worker(void) } static void +llsync_reset_checkpoint_common(unsigned int cpu) +{ + assert(!bitmap_test(llsync_pending_checkpoints, cpu)); + bitmap_set(llsync_pending_checkpoints, cpu); + llsync_cpus[cpu].checked = 0; +} + +static void llsync_process_global_checkpoint(unsigned int cpu) { int i, nr_cpus; - nr_cpus = cpu_count(); - bitmap_copy(llsync_pending_checkpoints, llsync_registered_cpus, nr_cpus); - llsync_nr_pending_checkpoints = llsync_nr_registered_cpus; - if (llsync_nr_registered_cpus == 0) { list_concat(&llsync_list1, &llsync_list0); list_init(&llsync_list0); @@ -189,7 +193,12 @@ llsync_process_global_checkpoint(unsigned int cpu) list_set_head(&llsync_list1, &llsync_list0); list_init(&llsync_list0); - llsync_reset_checkpoint(cpu); + llsync_nr_pending_checkpoints = llsync_nr_registered_cpus; + + if (llsync_cpus[cpu].registered) + llsync_reset_checkpoint_common(cpu); + + nr_cpus = cpu_count(); bitmap_for_each(llsync_registered_cpus, nr_cpus, i) if ((unsigned int)i != cpu) @@ -206,17 +215,19 @@ llsync_register_cpu(unsigned int cpu) spinlock_lock_intr_save(&llsync_lock, &flags); + assert(!llsync_cpus[cpu].registered); + llsync_cpus[cpu].registered = 1; + assert(!bitmap_test(llsync_registered_cpus, cpu)); bitmap_set(llsync_registered_cpus, cpu); llsync_nr_registered_cpus++; + assert(!bitmap_test(llsync_pending_checkpoints, cpu)); + if ((llsync_nr_registered_cpus == 1) && (llsync_nr_pending_checkpoints == 0)) llsync_process_global_checkpoint(cpu); - assert(!llsync_cpus[cpu].registered); - llsync_cpus[cpu].registered = 1; - spinlock_unlock_intr_restore(&llsync_lock, flags); } @@ -251,12 +262,38 @@ llsync_unregister_cpu(unsigned int cpu) bitmap_clear(llsync_registered_cpus, cpu); llsync_nr_registered_cpus--; + /* + * Processor registration qualifies as a checkpoint. Since unregistering + * a processor also disables commits until it's registered again, perform + * one now. + */ llsync_commit_checkpoint_common(cpu); spinlock_unlock_intr_restore(&llsync_lock, flags); } void +llsync_reset_checkpoint(unsigned int cpu) +{ + assert(!cpu_intr_enabled()); + + spinlock_lock(&llsync_lock); + + llsync_reset_checkpoint_common(cpu); + + /* + * It may happen that this processor was registered at the time a global + * checkpoint occurred, but unregistered itself before receiving the reset + * interrupt. In this case, behave as if the reset request was received + * before unregistering by immediately committing the local checkpoint. + */ + if (!llsync_cpus[cpu].registered) + llsync_commit_checkpoint_common(cpu); + + spinlock_unlock(&llsync_lock); +} + +void llsync_commit_checkpoint(unsigned int cpu) { assert(!cpu_intr_enabled()); diff --git a/kern/llsync.h b/kern/llsync.h index c58030d0..e00baa44 100644 --- a/kern/llsync.h +++ b/kern/llsync.h @@ -83,17 +83,6 @@ llsync_read_unlock(void) } /* - * Reset the checkpoint flag of a processor. - * - * Called from interrupt context. - */ -static inline void -llsync_reset_checkpoint(unsigned int cpu) -{ - llsync_cpus[cpu].checked = 0; -} - -/* * Report that a processor has reached a checkpoint. * * Called during context switch. @@ -111,6 +100,9 @@ void llsync_setup(void); /* * Report that a processor will be regularly checking in. + * + * Registered processors perform checkpoint commits and receive checkpoint + * reset interrupts. */ void llsync_register_cpu(unsigned int cpu); @@ -130,6 +122,13 @@ void llsync_unregister_cpu(unsigned int cpu); void llsync_commit_checkpoint(unsigned int cpu); /* + * Reset the checkpoint pending state of a processor. + * + * Called from interrupt context. + */ +void llsync_reset_checkpoint(unsigned int cpu); + +/* * Defer an operation until all existing read-side references are dropped, * without blocking. */ |