thread: move the yield flag and the preemption level to the run queue

There is really no need to make the yield flag and the preemption level thread-local on a single processor system, so turn them into globals, as members of the unique run queue. This slightly improves preemption control functions by removing the indirection of the current thread. Another motivation for this change is that X15 is already an example of a thread-local preemption counter, because of SMP. As a result, it made sense to show another way, which is actually the common one on small single processor embedded systems.
author: Richard Braun <rbraun@sceen.net> 2018-01-11 23:34:15 +0100
committer: Richard Braun <rbraun@sceen.net> 2018-01-11 23:52:56 +0100
commit: d445fa8e41e5591c80727bbd59ca36620e03aea2 (patch)
tree: cbfbcb6473c3a8ba97c5a09350723d6c403f8f91
parent: 69470fda09aee6ab722b3fcdec5f42e687101f29 (diff)
1 files changed, 221 insertions, 187 deletions
diff --git a/src/thread.c b/src/thread.c
index 70f036e..30f5541 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
  * Copyright (c) 2017 Jerko Lenstra.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -68,6 +68,25 @@ struct thread_list {
  * The current thread, which is the one currently running on the processor,
  * isn't in any of the thread lists.
  *
+ * A thread may be be "voluntarily" preempted, when it yields the processor
+ * itself, or "involuntarily" preempted when an interrupt occurs. Since it's
+ * not possible to immediately yield the processor when preemption is
+ * disabled (including in the middle of an interrupt handler), preemption
+ * may only occur on specific events :
+ *  - Calling thread_yield() with preemption enabled.
+ *  - Preemption is reenabled, i.e. the preemption level goes back to 0.
+ *  - Return from an interrupt.
+ *
+ * Calling thread_yield() is completely voluntary and is ignored in this
+ * discussion. Returning from an interrupt actually reenables preemption,
+ * so the only case discussed is reenabling preemption. The mechanism used
+ * to trigger scheduling when preemption is reenabled is the yield flag.
+ * When the scheduler determines that the current thread should yield the
+ * processor, e.g. because an interrupt handler has awaken a higher priority
+ * thread, it sets the yield flag of the run queue. When preemption is
+ * reenabled, the flag is checked, and if true, the current thread yields
+ * the processor.
+ *
  * The idle thread runs when no other thread is in the running state.
  *
  * Interrupts and preemption must be disabled when accessing a run queue.
@@ -77,6 +96,8 @@ struct thread_list {
  */
 struct thread_runq {
     struct thread *current;
+    bool yield;
+    unsigned int preempt_level;
     unsigned int nr_threads;
     struct thread_list lists[THREAD_NR_PRIORITIES];
     struct thread *idle;
@@ -100,31 +121,13 @@ enum thread_state {
  * When the context is saved, the stack pointer is updated to the value
  * of the stack register at the time the thread context is saved.
  *
- * A thread may be be "voluntarily" preempted, when it yields the processor
- * itself, or "involuntarily" preempted when an interrupt occurs. Since it's
- * not possible to immediately yield the processor when preemption is
- * disabled (including in the middle of an interrupt handler), preemption
- * may only occur on specific events :
- *  - Calling thread_yield() with preemption enabled.
- *  - Preemption is reenabled, i.e. the preemption level goes back to 0.
- *  - Return from an interrupt.
- *
- * Calling thread_yield() is completely voluntary and is ignored in this
- * discussion. Returning from an interrupt actually reenables preemption,
- * so the only case discussed is reenabling preemption. The mechanism used
- * to trigger scheduling when preemption is reenabled is the yield flag.
- * When the scheduler determines that the current thread should yield the
- * processor, e.g. because an interrupt handler has awaken a higher priority
- * thread, it marks the current thread for yielding. When preemption is
- * reenabled, the flag is checked, and if true, the current thread yields
- * the processor.
+ * Accessing threads is subject to the same synchronization rules as a
+ * run queue.
  */
 struct thread {
     void *sp;
     enum thread_state state;
-    bool yield;
     struct list node;
-    unsigned int preempt_level;
     unsigned int priority;
     struct thread *joiner;
     char name[THREAD_NAME_MAX_SIZE];
@@ -165,215 +168,129 @@ thread_idle(void *arg)
 }
 
 static bool
-thread_is_running(const struct thread *thread)
+thread_scheduler_locked(void)
 {
-    return thread->state == THREAD_STATE_RUNNING;
+    return !cpu_intr_enabled() && !thread_preempt_enabled();
 }
 
 static void
-thread_set_running(struct thread *thread)
+thread_list_init(struct thread_list *list)
 {
-    thread->state = THREAD_STATE_RUNNING;
+    list_init(&list->threads);
 }
 
 static void
-thread_set_sleeping(struct thread *thread)
+thread_list_remove(struct thread *thread)
 {
-    thread->state = THREAD_STATE_SLEEPING;
-}
-
-static bool
-thread_is_dead(const struct thread *thread)
-{
-    return thread->state == THREAD_STATE_DEAD;
+    list_remove(&thread->node);
 }
 
 static void
-thread_set_dead(struct thread *thread)
+thread_list_enqueue(struct thread_list *list, struct thread *thread)
 {
-    thread->state = THREAD_STATE_DEAD;
+    list_insert_tail(&list->threads, &thread->node);
 }
 
-static bool
-thread_should_yield(const struct thread *thread)
+static struct thread *
+thread_list_dequeue(struct thread_list *list)
 {
-    return thread->yield;
-}
+    struct thread *thread;
 
-static void
-thread_set_yield(struct thread *thread)
-{
-    thread->yield = true;
+    thread = list_first_entry(&list->threads, typeof(*thread), node);
+    thread_list_remove(thread);
+    return thread;
 }
 
-static void
-thread_clear_yield(struct thread *thread)
+static bool
+thread_list_empty(struct thread_list *list)
 {
-    thread->yield = false;
+    return list_empty(&list->threads);
 }
 
-static unsigned int
-thread_get_priority(struct thread *thread)
+static bool
+thread_is_running(const struct thread *thread)
 {
-    return thread->priority;
+    return thread->state == THREAD_STATE_RUNNING;
 }
 
 static void
-thread_remove_from_list(struct thread *thread)
+thread_set_running(struct thread *thread)
 {
-    list_remove(&thread->node);
+    thread->state = THREAD_STATE_RUNNING;
 }
 
 static void
-thread_yield_if_needed(void)
+thread_set_sleeping(struct thread *thread)
 {
-    if (thread_should_yield(thread_self())) {
-        thread_yield();
-    }
+    thread->state = THREAD_STATE_SLEEPING;
 }
 
-void
-thread_preempt_disable(void)
+static bool
+thread_is_dead(const struct thread *thread)
 {
-    struct thread *thread;
-
-    thread = thread_self();
-    thread->preempt_level++;
-    assert(thread->preempt_level != 0);
-
-    /*
-     * This is a compiler barrier. It tells the compiler not to reorder
-     * the instructions it emits across this point.
-     *
-     * Reordering is one of the most effective ways to optimize generated
-     * machine code, and the C specification allows compilers to optimize
-     * very aggressively, which is why C shouldn't be thought of as a
-     * "portable assembler" any more.
-     *
-     * Sometimes, especially when building critical sections, strong control
-     * over ordering is required, and this is when compiler barriers should
-     * be used. In this kernel, disabling preemption is one of the primary
-     * ways to create critical sections. The following barrier makes sure
-     * that no instructions from the critical section leak out before it.
-     *
-     * When using GCC or a compatible compiler such as Clang, compiler
-     * barriers are implemented with an inline assembly expression that
-     * includes the "memory" clobber. This clobber tells the compiler that
-     * memory may change in unexpected ways. All memory accesses started
-     * before the barrier must complete before the barrier, and all memory
-     * accesses that complete after the barrier must start after the barrier.
-     * See barrier() in macros.h.
-     *
-     * When calling assembly functions, there is usually no need to add
-     * an explicit compiler barrier, because the compiler, not knowing
-     * what the external function does, normally assumes memory may change,
-     * i.e. that the function has unexpected side effects. In C code however,
-     * the compiler has better knowledge about side effects. That knowledge
-     * comes from the amount of code in a single compilation unit. The
-     * traditional compilation unit when building with optimizations is the
-     * source file, but with link-time optimizations (LTO), this can be the
-     * whole program. This is why specifying barriers in C code is required
-     * for a reliable behaviour.
-     *
-     * Also note that compiler barriers only affect the machine code
-     * generated by the compiler. Processors also internally perform
-     * various kinds of reordering. When confined to a single processor,
-     * this can safely be ignored because the processor model guarantees
-     * that the state seen by software matches program order. This
-     * assumption breaks on multiprocessor systems though, where memory
-     * barriers are also required to enforce ordering across processors.
-     */
-    barrier();
+    return thread->state == THREAD_STATE_DEAD;
 }
 
 static void
-thread_preempt_enable_no_yield(void)
-{
-    struct thread *thread;
-
-    /* See thread_preempt_disable() */
-    barrier();
-
-    thread = thread_self();
-    assert(thread->preempt_level != 0);
-    thread->preempt_level--;
-}
-
-void
-thread_preempt_enable(void)
+thread_set_dead(struct thread *thread)
 {
-    thread_preempt_enable_no_yield();
-    thread_yield_if_needed();
+    thread->state = THREAD_STATE_DEAD;
 }
 
-bool
-thread_preempt_enabled(void)
+static unsigned int
+thread_get_priority(struct thread *thread)
 {
-    struct thread *thread;
-
-    thread = thread_self();
-    return thread->preempt_level == 0;
+    return thread->priority;
 }
 
 static bool
-thread_scheduler_locked(void)
+thread_runq_should_yield(const struct thread_runq *runq)
 {
-    return !cpu_intr_enabled() && !thread_preempt_enabled();
-}
-
-static uint32_t
-thread_lock_scheduler(void)
-{
-    /*
-     * When disabling both preemption and interrupts, it is best to do it in
-     * this order, since, if an interrupt occurs after preemption is disabled
-     * but before interrupts are disabled, it may not cause a context switch.
-     *
-     * Besides, disabling interrupts first would slightly increase interrupt
-     * latencies.
-     */
-    thread_preempt_disable();
-    return cpu_intr_save();
+    return runq->yield;
 }
 
 static void
-thread_unlock_scheduler(uint32_t eflags, bool yield)
+thread_runq_set_yield(struct thread_runq *runq)
 {
-    cpu_intr_restore(eflags);
-
-    if (yield) {
-        thread_preempt_enable();
-    } else {
-        thread_preempt_enable_no_yield();
-    }
+    runq->yield = true;
 }
 
 static void
-thread_list_init(struct thread_list *list)
+thread_runq_clear_yield(struct thread_runq *runq)
 {
-    list_init(&list->threads);
+    runq->yield = false;
 }
 
-static void
-thread_list_enqueue(struct thread_list *list, struct thread *thread)
+static unsigned int
+thread_runq_get_preempt_level(const struct thread_runq *runq)
 {
-    list_insert_tail(&list->threads, &thread->node);
+    return runq->preempt_level;
 }
 
-static struct thread *
-thread_list_dequeue(struct thread_list *list)
+static void
+thread_runq_inc_preempt_level(struct thread_runq *runq)
 {
-    struct thread *thread;
-
-    thread = list_first_entry(&list->threads, typeof(*thread), node);
-    thread_remove_from_list(thread);
-    return thread;
+    /*
+     * There is no need to protect this increment by disabling interrupts
+     * or using a single interrupt-safe instruction such as inc, because
+     * the preemption level can only be 0 when other threads access it
+     * concurrently. As a result, all threads racing to disable preemption
+     * would read 0 and write 1. Although the increment is not atomic, the
+     * store is serialized such that the first thread that manages to update
+     * memory has effectively prevented all other threads from running,
+     * at least until it reenables preemption, which turns the preemption
+     * level back to 0, making the read-modify-write increment other threads
+     * may have started correct.
+     */
+    runq->preempt_level++;
+    assert(runq->preempt_level != 0);
 }
 
-static bool
-thread_list_empty(struct thread_list *list)
+static void
+thread_runq_dec_preempt_level(struct thread_runq *runq)
 {
-    return list_empty(&list->threads);
+    assert(runq->preempt_level != 0);
+    runq->preempt_level--;
 }
 
 static struct thread_list *
@@ -455,7 +372,7 @@ thread_runq_add(struct thread_runq *runq, struct thread *thread)
     assert(runq->nr_threads != 0);
 
     if (thread_get_priority(thread) > thread_get_priority(runq->current)) {
-        thread_set_yield(runq->current);
+        thread_runq_set_yield(runq);
     }
 }
 
@@ -466,7 +383,7 @@ thread_runq_remove(struct thread_runq *runq, struct thread *thread)
     runq->nr_threads--;
 
     assert(!thread_is_running(thread));
-    thread_remove_from_list(thread);
+    thread_list_remove(thread);
 }
 
 static void
@@ -477,7 +394,7 @@ thread_runq_schedule(struct thread_runq *runq)
     prev = thread_runq_get_current(runq);
 
     assert(thread_scheduler_locked());
-    assert(prev->preempt_level == 1);
+    assert(runq->preempt_level == 1);
 
     thread_runq_put_prev(runq, prev);
 
@@ -515,14 +432,127 @@ thread_runq_schedule(struct thread_runq *runq)
     }
 }
 
+static void
+thread_yield_if_needed(void)
+{
+    if (thread_runq_should_yield(&thread_runq)) {
+        thread_yield();
+    }
+}
+
+void
+thread_preempt_disable(void)
+{
+    thread_runq_inc_preempt_level(&thread_runq);
+
+    /*
+     * This is a compiler barrier. It tells the compiler not to reorder
+     * the instructions it emits across this point.
+     *
+     * Reordering is one of the most effective ways to optimize generated
+     * machine code, and the C specification allows compilers to optimize
+     * very aggressively, which is why C shouldn't be thought of as a
+     * "portable assembler" any more.
+     *
+     * Sometimes, especially when building critical sections, strong control
+     * over ordering is required, and this is when compiler barriers should
+     * be used. In this kernel, disabling preemption is one of the primary
+     * ways to create critical sections. The following barrier makes sure
+     * that no instructions from the critical section leak out before it.
+     *
+     * When using GCC or a compatible compiler such as Clang, compiler
+     * barriers are implemented with an inline assembly expression that
+     * includes the "memory" clobber. This clobber tells the compiler that
+     * memory may change in unexpected ways. All memory accesses started
+     * before the barrier must complete before the barrier, and all memory
+     * accesses that complete after the barrier must start after the barrier.
+     * See barrier() in macros.h.
+     *
+     * When calling assembly functions, there is usually no need to add
+     * an explicit compiler barrier, because the compiler, not knowing
+     * what the external function does, normally assumes memory may change,
+     * i.e. that the function has unexpected side effects. In C code however,
+     * the compiler has better knowledge about side effects. That knowledge
+     * comes from the amount of code in a single compilation unit. The
+     * traditional compilation unit when building with optimizations is the
+     * source file, but with link-time optimizations (LTO), this can be the
+     * whole program. This is why specifying barriers in C code is required
+     * for a reliable behaviour.
+     *
+     * Also note that compiler barriers only affect the machine code
+     * generated by the compiler. Processors also internally perform
+     * various kinds of reordering. When confined to a single processor,
+     * this can safely be ignored because the processor model guarantees
+     * that the state seen by software matches program order. This
+     * assumption breaks on multiprocessor systems though, where memory
+     * barriers are also required to enforce ordering across processors.
+     */
+    barrier();
+}
+
+static void
+thread_preempt_enable_no_yield(void)
+{
+    /* See thread_preempt_disable() */
+    barrier();
+
+    thread_runq_dec_preempt_level(&thread_runq);
+}
+
+void
+thread_preempt_enable(void)
+{
+    thread_preempt_enable_no_yield();
+    thread_yield_if_needed();
+}
+
+static unsigned int
+thread_preempt_level(void)
+{
+    return thread_runq_get_preempt_level(&thread_runq);
+}
+
+bool
+thread_preempt_enabled(void)
+{
+    return thread_preempt_level() == 0;
+}
+
+static uint32_t
+thread_lock_scheduler(void)
+{
+    /*
+     * When disabling both preemption and interrupts, it is best to do it in
+     * this order, since, if an interrupt occurs after preemption is disabled
+     * but before interrupts are disabled, it may not cause a context switch.
+     *
+     * Besides, disabling interrupts first would slightly increase interrupt
+     * latencies.
+     */
+    thread_preempt_disable();
+    return cpu_intr_save();
+}
+
+static void
+thread_unlock_scheduler(uint32_t eflags, bool yield)
+{
+    cpu_intr_restore(eflags);
+
+    if (yield) {
+        thread_preempt_enable();
+    } else {
+        thread_preempt_enable_no_yield();
+    }
+}
+
 void
 thread_enable_scheduler(void)
 {
     struct thread *thread;
 
+    assert(thread_preempt_level() == 1);
+
     thread = thread_runq_get_next(&thread_runq);
-    assert(thread);
-    assert(thread->preempt_level == 1);
     thread_load_context(thread);
 
     /* Never reached */
@@ -533,8 +563,8 @@ thread_main(thread_fn_t fn, void *arg)
 {
     assert(fn);
 
-    assert(thread_scheduler_locked());
-    assert(thread_self()->preempt_level == 1);
+    assert(!cpu_intr_enabled());
+    assert(thread_preempt_level() == 1);
 
     cpu_intr_enable();
     thread_preempt_enable();
@@ -654,8 +684,6 @@ thread_init(struct thread *thread, thread_fn_t fn, void *arg,
     }
 
     thread->state = THREAD_STATE_RUNNING;
-    thread->yield = false;
-    thread->preempt_level = 1;
     thread->priority = priority;
     thread->joiner = NULL;
     thread_set_name(thread, name);
@@ -777,34 +805,40 @@ thread_create_idle(void)
 }
 
 static void
-thread_runq_preinit(struct thread_runq *runq)
+thread_runq_init(struct thread_runq *runq)
 {
+    /*
+     * Set a dummy thread context with preemption disabled to prevent
+     * scheduling functions called before the scheduler is running from
+     * triggering a context switch.
+     */
     thread_init(&thread_dummy, NULL, NULL, "dummy", NULL, 0, 0);
     runq->current = &thread_dummy;
-}
-
-static void
-thread_runq_init(struct thread_runq *runq)
-{
+    runq->yield = false;
+    runq->preempt_level = 1;
     runq->nr_threads = 0;
 
     for (size_t i = 0; i < ARRAY_SIZE(runq->lists); i++) {
         thread_list_init(&runq->lists[i]);
     }
+}
 
+static void
+thread_runq_init_idle(struct thread_runq *runq)
+{
     runq->idle = thread_create_idle();
 }
 
 void
 thread_bootstrap(void)
 {
-    thread_runq_preinit(&thread_runq);
+    thread_runq_init(&thread_runq);
 }
 
 void
 thread_setup(void)
 {
-    thread_runq_init(&thread_runq);
+    thread_runq_init_idle(&thread_runq);
 }
 
 void
@@ -817,7 +851,7 @@ thread_yield(void)
     }
 
     eflags = thread_lock_scheduler();
-    thread_clear_yield(thread_self());
+    thread_runq_clear_yield(&thread_runq);
     thread_runq_schedule(&thread_runq);
     thread_unlock_scheduler(eflags, false);
 }
@@ -863,6 +897,6 @@ thread_report_tick(void)
 {
     assert(thread_scheduler_locked());
 
-    thread_set_yield(thread_self());
+    thread_runq_set_yield(&thread_runq);
     timer_report_tick();
 }
author	Richard Braun <rbraun@sceen.net>	2018-01-11 23:34:15 +0100
committer	Richard Braun <rbraun@sceen.net>	2018-01-11 23:52:56 +0100
commit	d445fa8e41e5591c80727bbd59ca36620e03aea2 (patch)
tree	cbfbcb6473c3a8ba97c5a09350723d6c403f8f91
parent	69470fda09aee6ab722b3fcdec5f42e687101f29 (diff)