summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2013-05-24 20:24:10 +0200
committerRichard Braun <rbraun@sceen.net>2013-05-24 20:25:43 +0200
commit2587e7386bf87c79da9dd804e082079fbe16f0a6 (patch)
tree1d7b5744db8e96913a1b5a2016c9fd987688569f
parent47713dee27ac3fee0d8a6dee1c104fa48b88c299 (diff)
kern/llsync: fix checkpoint reset interrupt handling
This change makes reset requests write both the per-processor flag as well as the checkpoint bitmap atomically, and adjusts the module logic accordingly. It fixes a race between checkpoint reset and system timer interrupts where the timer interrupt would make the local processor commit its checkpoint although it can't be reliably determined that it reached a checkpoint since the last global checkpoint, because the reset interrupt wasn't received yet. This problem would rarely happen on real hardware because of the near-instant handling of IPIs, but it was observed on virtual machines.
-rw-r--r--kern/llsync.c53
-rw-r--r--kern/llsync.h21
2 files changed, 55 insertions, 19 deletions
diff --git a/kern/llsync.c b/kern/llsync.c
index aa5ac346..f6862da7 100644
--- a/kern/llsync.c
+++ b/kern/llsync.c
@@ -172,14 +172,18 @@ llsync_wakeup_worker(void)
}
static void
+llsync_reset_checkpoint_common(unsigned int cpu)
+{
+ assert(!bitmap_test(llsync_pending_checkpoints, cpu));
+ bitmap_set(llsync_pending_checkpoints, cpu);
+ llsync_cpus[cpu].checked = 0;
+}
+
+static void
llsync_process_global_checkpoint(unsigned int cpu)
{
int i, nr_cpus;
- nr_cpus = cpu_count();
- bitmap_copy(llsync_pending_checkpoints, llsync_registered_cpus, nr_cpus);
- llsync_nr_pending_checkpoints = llsync_nr_registered_cpus;
-
if (llsync_nr_registered_cpus == 0) {
list_concat(&llsync_list1, &llsync_list0);
list_init(&llsync_list0);
@@ -189,7 +193,12 @@ llsync_process_global_checkpoint(unsigned int cpu)
list_set_head(&llsync_list1, &llsync_list0);
list_init(&llsync_list0);
- llsync_reset_checkpoint(cpu);
+ llsync_nr_pending_checkpoints = llsync_nr_registered_cpus;
+
+ if (llsync_cpus[cpu].registered)
+ llsync_reset_checkpoint_common(cpu);
+
+ nr_cpus = cpu_count();
bitmap_for_each(llsync_registered_cpus, nr_cpus, i)
if ((unsigned int)i != cpu)
@@ -206,17 +215,19 @@ llsync_register_cpu(unsigned int cpu)
spinlock_lock_intr_save(&llsync_lock, &flags);
+ assert(!llsync_cpus[cpu].registered);
+ llsync_cpus[cpu].registered = 1;
+
assert(!bitmap_test(llsync_registered_cpus, cpu));
bitmap_set(llsync_registered_cpus, cpu);
llsync_nr_registered_cpus++;
+ assert(!bitmap_test(llsync_pending_checkpoints, cpu));
+
if ((llsync_nr_registered_cpus == 1)
&& (llsync_nr_pending_checkpoints == 0))
llsync_process_global_checkpoint(cpu);
- assert(!llsync_cpus[cpu].registered);
- llsync_cpus[cpu].registered = 1;
-
spinlock_unlock_intr_restore(&llsync_lock, flags);
}
@@ -251,12 +262,38 @@ llsync_unregister_cpu(unsigned int cpu)
bitmap_clear(llsync_registered_cpus, cpu);
llsync_nr_registered_cpus--;
+ /*
+ * Processor registration qualifies as a checkpoint. Since unregistering
+ * a processor also disables commits until it's registered again, perform
+ * one now.
+ */
llsync_commit_checkpoint_common(cpu);
spinlock_unlock_intr_restore(&llsync_lock, flags);
}
void
+llsync_reset_checkpoint(unsigned int cpu)
+{
+ assert(!cpu_intr_enabled());
+
+ spinlock_lock(&llsync_lock);
+
+ llsync_reset_checkpoint_common(cpu);
+
+ /*
+ * It may happen that this processor was registered at the time a global
+ * checkpoint occurred, but unregistered itself before receiving the reset
+ * interrupt. In this case, behave as if the reset request was received
+ * before unregistering by immediately committing the local checkpoint.
+ */
+ if (!llsync_cpus[cpu].registered)
+ llsync_commit_checkpoint_common(cpu);
+
+ spinlock_unlock(&llsync_lock);
+}
+
+void
llsync_commit_checkpoint(unsigned int cpu)
{
assert(!cpu_intr_enabled());
diff --git a/kern/llsync.h b/kern/llsync.h
index c58030d0..e00baa44 100644
--- a/kern/llsync.h
+++ b/kern/llsync.h
@@ -83,17 +83,6 @@ llsync_read_unlock(void)
}
/*
- * Reset the checkpoint flag of a processor.
- *
- * Called from interrupt context.
- */
-static inline void
-llsync_reset_checkpoint(unsigned int cpu)
-{
- llsync_cpus[cpu].checked = 0;
-}
-
-/*
* Report that a processor has reached a checkpoint.
*
* Called during context switch.
@@ -111,6 +100,9 @@ void llsync_setup(void);
/*
* Report that a processor will be regularly checking in.
+ *
+ * Registered processors perform checkpoint commits and receive checkpoint
+ * reset interrupts.
*/
void llsync_register_cpu(unsigned int cpu);
@@ -130,6 +122,13 @@ void llsync_unregister_cpu(unsigned int cpu);
void llsync_commit_checkpoint(unsigned int cpu);
/*
+ * Reset the checkpoint pending state of a processor.
+ *
+ * Called from interrupt context.
+ */
+void llsync_reset_checkpoint(unsigned int cpu);
+
+/*
* Defer an operation until all existing read-side references are dropped,
* without blocking.
*/