1 files changed, 167 insertions, 195 deletions
diff --git a/src/thread.c b/src/thread.c
index 30f5541..6687191 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -35,24 +35,6 @@
 #include "cpu.h"
 #include "panic.h"
 #include "thread.h"
-#include "timer.h"
-
-/*
- * The compiler expects the stack pointer to be properly aligned when a
- * function is called, and maintains this alignment across a call chain.
- * The constraints are similar to the return value of malloc().
- * See the description of mem_alloc() in mem.h.
- *
- * Note that modern compilers expect the stack to be 16-byte aligned
- * even on 32-bits i386 processors, to cope with SSE instructions which
- * don't support unaligned accesses (see a revised version of the System V
- * Intel386 ABI [1] for more details). Since all floating point support is
- * disabled when building the kernel, this requirement can be safely ignored
- * and the legacy 4-byte alignment used instead.
- *
- * [1] https://www.uclibc.org/docs/psABI-i386.pdf
- */
-#define THREAD_STACK_ALIGN 4
 
 /*
  * List of threads sharing the same priority.
@@ -146,15 +128,6 @@ static struct thread_runq thread_runq;
 static struct thread thread_dummy;
 
 /*
- * Declarations for C/assembly functions that are global so that they can
- * be shared between thread.c and thread_asm.S, but are considered private to
- * the thread module.
- */
-void thread_load_context(struct thread *thread) __attribute__((noreturn));
-void thread_switch_context(struct thread *prev, struct thread *next);
-void thread_main(thread_fn_t fn, void *arg);
-
-/*
  * Function implementing the idle thread.
  */
 static void
@@ -167,12 +140,6 @@ thread_idle(void *arg)
     }
 }
 
-static bool
-thread_scheduler_locked(void)
-{
-    return !cpu_intr_enabled() && !thread_preempt_enabled();
-}
-
 static void
 thread_list_init(struct thread_list *list)
 {
@@ -186,7 +153,13 @@ thread_list_remove(struct thread *thread)
 }
 
 static void
-thread_list_enqueue(struct thread_list *list, struct thread *thread)
+thread_list_enqueue_head(struct thread_list *list, struct thread *thread)
+{
+    list_insert_head(&list->threads, &thread->node);
+}
+
+static void
+thread_list_enqueue_tail(struct thread_list *list, struct thread *thread)
 {
     list_insert_tail(&list->threads, &thread->node);
 }
@@ -202,12 +175,30 @@ thread_list_dequeue(struct thread_list *list)
 }
 
 static bool
-thread_list_empty(struct thread_list *list)
+thread_list_empty(const struct thread_list *list)
 {
     return list_empty(&list->threads);
 }
 
 static bool
+thread_list_singular(const struct thread_list *list)
+{
+    return list_singular(&list->threads);
+}
+
+static void *
+thread_get_stack_pointer(const struct thread *thread)
+{
+    return thread->sp;
+}
+
+static void
+thread_set_stack_pointer(struct thread *thread, void *sp)
+{
+    thread->sp = sp;
+}
+
+static bool
 thread_is_running(const struct thread *thread)
 {
     return thread->state == THREAD_STATE_RUNNING;
@@ -316,7 +307,7 @@ thread_runq_put_prev(struct thread_runq *runq, struct thread *thread)
     }
 
     list = thread_runq_get_list(runq, thread_get_priority(thread));
-    thread_list_enqueue(list, thread);
+    thread_list_enqueue_tail(list, thread);
 }
 
 static struct thread *
@@ -362,11 +353,12 @@ thread_runq_add(struct thread_runq *runq, struct thread *thread)
 {
     struct thread_list *list;
 
-    assert(thread_scheduler_locked());
+    assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
     assert(thread_is_running(thread));
 
     list = thread_runq_get_list(runq, thread_get_priority(thread));
-    thread_list_enqueue(list, thread);
+    thread_list_enqueue_head(list, thread);
 
     runq->nr_threads++;
     assert(runq->nr_threads != 0);
@@ -386,50 +378,53 @@ thread_runq_remove(struct thread_runq *runq, struct thread *thread)
     thread_list_remove(thread);
 }
 
+static void *
+thread_runq_schedule_from_pendsv(struct thread_runq *runq)
+{
+    struct thread *thread;
+
+    thread = thread_runq_get_current(runq);
+
+    assert(!cpu_intr_enabled());
+    assert(runq->preempt_level == 1);
+
+    thread_runq_put_prev(runq, thread);
+
+    if (!thread_is_running(thread)) {
+        thread_runq_remove(runq, thread);
+    }
+
+    return thread_runq_get_next(runq);
+}
+
 static void
 thread_runq_schedule(struct thread_runq *runq)
 {
-    struct thread *prev, *next;
+    assert(!cpu_intr_enabled());
+    assert(runq->preempt_level == 1);
 
-    prev = thread_runq_get_current(runq);
+    thread_runq_clear_yield(runq);
+}
 
-    assert(thread_scheduler_locked());
-    assert(runq->preempt_level == 1);
+static void
+thread_runq_tick(struct thread_runq *runq)
+{
+    struct thread_list *list;
 
-    thread_runq_put_prev(runq, prev);
+    assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
 
-    if (!thread_is_running(prev)) {
-        thread_runq_remove(runq, prev);
+    if (runq->current == runq->idle) {
+        return;
     }
 
-    next = thread_runq_get_next(runq);
+    list = thread_runq_get_list(runq, runq->current->priority);
 
-    if (prev != next) {
-        /*
-         * When switching context, it is extremely important that no
-         * data access generated by the compiler "leak" across the switch.
-         * All operations (i.e. side effects) started before the switch
-         * should complete before the switch, and all operations starting
-         * after the switch should start after the switch.
-         *
-         * This is what allows a thread waiting for an event to reliably
-         * "see" that event after another thread or interrupt handler has
-         * triggered it.
-         *
-         * This requires a barrier, and, since this is a single-processor
-         * scheduler, a compiler barrier (as opposed to memory barriers)
-         * is enough. But there is no such barrier here. The reason is that
-         * the context switch is implemented in assembly, and the compiler
-         * is unable to understand what the assembly code does. As a result,
-         * even with aggressive optimizations enabled, the compiler has to
-         * assume that memory may have changed in completely unexpected ways,
-         * which is equivalent to the inline assembly expression used to
-         * implement compiler barriers with GCC (see barrier() in macros.h).
-         *
-         * See thread_preempt_disable() for a description of compiler barriers.
-         */
-        thread_switch_context(prev, next);
+    if (thread_list_singular(list)) {
+        return;
     }
+
+    thread_runq_set_yield(&thread_runq);
 }
 
 static void
@@ -440,6 +435,12 @@ thread_yield_if_needed(void)
     }
 }
 
+static unsigned int
+thread_preempt_level(void)
+{
+    return thread_runq_get_preempt_level(&thread_runq);
+}
+
 void
 thread_preempt_disable(void)
 {
@@ -506,20 +507,14 @@ thread_preempt_enable(void)
     thread_yield_if_needed();
 }
 
-static unsigned int
-thread_preempt_level(void)
-{
-    return thread_runq_get_preempt_level(&thread_runq);
-}
-
 bool
 thread_preempt_enabled(void)
 {
     return thread_preempt_level() == 0;
 }
 
-static uint32_t
-thread_lock_scheduler(void)
+uint32_t
+thread_preempt_disable_intr_save(void)
 {
     /*
      * When disabling both preemption and interrupts, it is best to do it in
@@ -534,28 +529,37 @@ thread_lock_scheduler(void)
 }
 
 static void
-thread_unlock_scheduler(uint32_t eflags, bool yield)
+thread_preempt_enable_no_yield_intr_restore(uint32_t primask)
 {
-    cpu_intr_restore(eflags);
+    cpu_intr_restore(primask);
+    thread_preempt_enable_no_yield();
+}
 
-    if (yield) {
-        thread_preempt_enable();
-    } else {
-        thread_preempt_enable_no_yield();
-    }
+void
+thread_preempt_enable_intr_restore(uint32_t primask)
+{
+    /*
+     * A PendSV exception may only be raised if the preemption level goes
+     * back to 0, making it safe to reenable interrupts before.
+     */
+    cpu_intr_restore(primask);
+    thread_preempt_enable();
 }
 
 void
 thread_enable_scheduler(void)
 {
-    struct thread *thread;
-
+    assert(!cpu_intr_enabled());
     assert(thread_preempt_level() == 1);
 
-    thread = thread_runq_get_next(&thread_runq);
-    thread_load_context(thread);
+    thread_runq_get_next(&thread_runq);
+
+    cpu_intr_enable();
+
+    /* Load the first thread through an SVCall exception */
+    cpu_raise_svcall();
 
-    /* Never reached */
+    panic("thread: error: unable to load first thread");
 }
 
 void
@@ -563,11 +567,8 @@ thread_main(thread_fn_t fn, void *arg)
 {
     assert(fn);
 
-    assert(!cpu_intr_enabled());
-    assert(thread_preempt_level() == 1);
-
-    cpu_intr_enable();
-    thread_preempt_enable();
+    assert(cpu_intr_enabled());
+    assert(thread_preempt_enabled());
 
     fn(arg);
 
@@ -587,83 +588,17 @@ thread_set_name(struct thread *thread, const char *name)
 }
 
 static void
-thread_stack_push(uint32_t **stackp, size_t *stack_sizep, uint32_t word)
-{
-    uint32_t *stack;
-    size_t stack_size;
-
-    stack = *stackp;
-    stack_size = *stack_sizep;
-    assert(stack_size >= sizeof(word));
-    stack--;
-    stack_size -= sizeof(word);
-    *stack = word;
-    *stackp = stack;
-    *stack_sizep = stack_size;
-}
-
-static void *
-thread_stack_forge(char *stack_addr, size_t stack_size,
-                   thread_fn_t fn, void *arg)
-{
-    uint32_t *stack;
-
-    stack = (uint32_t *)(stack_addr + stack_size);
-
-    /*
-     * This part of the stack makes context restoration "return" to
-     * thread_main() as if it were called from address 0 (which stops
-     * backtracing when using a debugger).
-     *
-     * This is how an assembly call to thread_main() looks like, according
-     * to the ABI (System V Intel 386 ABI [1]) :
-     *  push arg
-     *  push fn
-     *  call thread_main
-     *
-     * Remember that the call instruction pushes the return address on the
-     * stack.
-     *
-     * [1] http://www.sco.com/developers/devspecs/abi386-4.pdf
-     */
-    thread_stack_push(&stack, &stack_size, (uint32_t)arg); /* 2nd argument */
-    thread_stack_push(&stack, &stack_size, (uint32_t)fn);  /* 1st argument */
-    thread_stack_push(&stack, &stack_size, (uint32_t)0);   /* Return address */
-    thread_stack_push(&stack, &stack_size, (uint32_t)thread_main);
-
-    /*
-     * This part of the stack contains the registers that should be restored.
-     * The selection of the registers to save is made according to the
-     * ABI, which specifies which registers are owned by the caller, and
-     * which are owned by the callee. Since, in all cases, switching context
-     * is achieved by calling the thread_switch_context() function, it
-     * is safe to rely on the ABI for this selection. Naturally, the
-     * registers that must be saved are those owned by the caller, since
-     * the compiler assumes all registers owned by the callee may have
-     * changed on return. See the System V Intel386 ABI "Registers and the
-     * Stack Frame".
-     *
-     * For debugging purposes, a complete save of all the registers may be
-     * performed instead, allowing precise inspection of the state of a
-     * thread not currently running on the processor.
-     *
-     * It is recommended to read the assembly code at the thread_restore_context
-     * label in thread_asm.S to better understand this stack frame.
-     */
-    thread_stack_push(&stack, &stack_size, 0);              /* EBP */
-    thread_stack_push(&stack, &stack_size, 0);              /* EBX */
-    thread_stack_push(&stack, &stack_size, 0);              /* EDI */
-    thread_stack_push(&stack, &stack_size, 0);              /* ESI */
-
-    return stack;
-}
-
-static void
 thread_init(struct thread *thread, thread_fn_t fn, void *arg,
             const char *name, char *stack, size_t stack_size,
             unsigned int priority)
 {
-    assert(P2ALIGNED((uintptr_t)stack, THREAD_STACK_ALIGN));
+    if (!P2ALIGNED((uint32_t)stack, CPU_STACK_ALIGN)) {
+        char *aligned_stack;
+
+        aligned_stack = (char *)(P2ALIGN((uintptr_t)stack, CPU_STACK_ALIGN));
+        stack_size -= (stack - aligned_stack);
+        stack = aligned_stack;
+    }
 
     /*
      * New threads are created in a state that is similar to preempted threads,
@@ -680,7 +615,7 @@ thread_init(struct thread *thread, thread_fn_t fn, void *arg,
      */
 
     if (stack) {
-        thread->sp = thread_stack_forge(stack, stack_size, fn, arg);
+        thread->sp = cpu_stack_forge(stack, stack_size, fn, arg);
     }
 
     thread->state = THREAD_STATE_RUNNING;
@@ -695,7 +630,7 @@ thread_create(struct thread **threadp, thread_fn_t fn, void *arg,
               const char *name, size_t stack_size, unsigned int priority)
 {
     struct thread *thread;
-    uint32_t eflags;
+    uint32_t primask;
     void *stack;
 
     assert(fn);
@@ -706,8 +641,8 @@ thread_create(struct thread **threadp, thread_fn_t fn, void *arg,
         return ENOMEM;
     }
 
-    if (stack_size < THREAD_STACK_MIN_SIZE) {
-        stack_size = THREAD_STACK_MIN_SIZE;
+    if (stack_size < THREAD_MIN_STACK_SIZE) {
+        stack_size = THREAD_MIN_STACK_SIZE;
     }
 
     stack = malloc(stack_size);
@@ -719,9 +654,9 @@ thread_create(struct thread **threadp, thread_fn_t fn, void *arg,
 
     thread_init(thread, fn, arg, name, stack, stack_size, priority);
 
-    eflags = thread_lock_scheduler();
+    primask = thread_preempt_disable_intr_save();
     thread_runq_add(&thread_runq, thread);
-    thread_unlock_scheduler(eflags, true);
+    thread_preempt_enable_intr_restore(primask);
 
     if (threadp) {
         *threadp = thread;
@@ -743,26 +678,32 @@ void
 thread_exit(void)
 {
     struct thread *thread;
+    uint32_t primask;
 
     thread = thread_self();
 
     assert(thread_preempt_enabled());
 
-    thread_lock_scheduler();
+    primask = thread_preempt_disable_intr_save();
+
     assert(thread_is_running(thread));
     thread_set_dead(thread);
     thread_wakeup(thread->joiner);
     thread_runq_schedule(&thread_runq);
 
+    thread_preempt_enable_intr_restore(primask);
+
+    cpu_raise_pendsv();
+
     panic("thread: error: dead thread walking");
 }
 
 void
 thread_join(struct thread *thread)
 {
-    uint32_t eflags;
+    uint32_t primask;
 
-    eflags = thread_lock_scheduler();
+    primask = thread_preempt_disable_intr_save();
 
     thread->joiner = thread_self();
 
@@ -770,7 +711,7 @@ thread_join(struct thread *thread)
         thread_sleep();
     }
 
-    thread_unlock_scheduler(eflags, true);
+    thread_preempt_enable_intr_restore(primask);
 
     thread_destroy(thread);
 }
@@ -793,14 +734,14 @@ thread_create_idle(void)
         panic("thread: unable to allocate idle thread");
     }
 
-    stack = malloc(THREAD_STACK_MIN_SIZE);
+    stack = malloc(THREAD_MIN_STACK_SIZE);
 
     if (!stack) {
         panic("thread: unable to allocate idle thread stack");
     }
 
     thread_init(idle, thread_idle, NULL, "idle",
-                stack, THREAD_STACK_MIN_SIZE, THREAD_IDLE_PRIORITY);
+                stack, THREAD_MIN_STACK_SIZE, THREAD_IDLE_PRIORITY);
     return idle;
 }
 
@@ -844,44 +785,78 @@ thread_setup(void)
 void
 thread_yield(void)
 {
-    uint32_t eflags;
+    uint32_t primask;
 
     if (!thread_preempt_enabled()) {
         return;
     }
 
-    eflags = thread_lock_scheduler();
-    thread_runq_clear_yield(&thread_runq);
+    primask = thread_preempt_disable_intr_save();
     thread_runq_schedule(&thread_runq);
-    thread_unlock_scheduler(eflags, false);
+    thread_preempt_enable_no_yield_intr_restore(primask);
+
+    cpu_raise_pendsv();
+}
+
+void *
+thread_yield_from_svcall(void)
+{
+    thread_preempt_enable_no_yield();
+    return thread_get_stack_pointer(thread_self());
+}
+
+void *
+thread_yield_from_pendsv(void *prev_sp)
+{
+    struct thread *thread;
+    uint32_t primask;
+
+    primask = thread_preempt_disable_intr_save();
+
+    thread_set_stack_pointer(thread_self(), prev_sp);
+    thread = thread_runq_schedule_from_pendsv(&thread_runq);
+    thread_preempt_enable_intr_restore(primask);
+
+    return thread_get_stack_pointer(thread);
 }
 
 void
 thread_sleep(void)
 {
     struct thread *thread;
-    uint32_t eflags;
+    uint32_t primask;
 
     thread = thread_self();
 
-    eflags = cpu_intr_save();
+    primask = cpu_intr_save();
+
     assert(thread_is_running(thread));
     thread_set_sleeping(thread);
     thread_runq_schedule(&thread_runq);
+
+    thread_preempt_enable();
+    cpu_intr_enable();
+
+    cpu_raise_pendsv();
+
+    cpu_intr_disable();
+    thread_preempt_disable();
+
     assert(thread_is_running(thread));
-    cpu_intr_restore(eflags);
+
+    cpu_intr_restore(primask);
 }
 
 void
 thread_wakeup(struct thread *thread)
 {
-    uint32_t eflags;
+    uint32_t primask;
 
     if (!thread || (thread == thread_self())) {
         return;
     }
 
-    eflags = thread_lock_scheduler();
+    primask = thread_preempt_disable_intr_save();
 
     if (!thread_is_running(thread)) {
         assert(!thread_is_dead(thread));
@@ -889,14 +864,11 @@ thread_wakeup(struct thread *thread)
         thread_runq_add(&thread_runq, thread);
     }
 
-    thread_unlock_scheduler(eflags, true);
+    thread_preempt_enable_intr_restore(primask);
 }
 
 void
 thread_report_tick(void)
 {
-    assert(thread_scheduler_locked());
-
-    thread_runq_set_yield(&thread_runq);
-    timer_report_tick();
+    thread_runq_tick(&thread_runq);
 }