kern/thread: improve processor-local scheduling

This change introduces scheduling classes, including support for real-time and time-sharing threads. The real-time class matches the requirements of the POSIX SCHED_FIFO and SCHED_RR policies. The time-sharing class makes use of a scheduling algorithm based on group ratio round-robin (GR3) to provide proportional fairness.
author: Richard Braun <rbraun@sceen.net> 2013-01-11 23:49:23 +0100
committer: Richard Braun <rbraun@sceen.net> 2013-01-11 23:49:23 +0100
commit: 644a177521a390133c98b9de83934a25e2be4f67 (patch)
tree: 186d4c4d5b23b4e0a94460ecb75e7a1f910a249f /kern
parent: fb1b8cf4f196903f770ce9b0274423626c145517 (diff)
4 files changed, 708 insertions, 52 deletions
diff --git a/kern/error.h b/kern/error.h
index bb1514a5..93de8423 100644
--- a/kern/error.h
+++ b/kern/error.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Richard Braun.
+ * Copyright (c) 2012, 2013 Richard Braun.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -19,5 +19,6 @@
 #define _KERN_ERROR_H
 
 #define ERROR_NOMEM 1
+#define ERROR_AGAIN 2
 
 #endif /* _KERN_ERROR_H */
diff --git a/kern/macros.h b/kern/macros.h
index 012dd15b..8e04d72b 100644
--- a/kern/macros.h
+++ b/kern/macros.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010 Richard Braun.
+ * Copyright (c) 2009, 2010, 2013 Richard Braun.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -44,6 +44,8 @@
 #define MIN(a, b)           ((a) < (b) ? (a) : (b))
 #define MAX(a, b)           ((a) > (b) ? (a) : (b))
 
+#define DIV_CEIL(n, d)      (((n) + (d) - 1) / (d))
+
 #define P2ALIGNED(x, a)     (((x) & ((a) - 1)) == 0)
 #define ISP2(x)             P2ALIGNED(x, x)
 #define P2ALIGN(x, a)       ((x) & -(a))
diff --git a/kern/thread.c b/kern/thread.c
index ed5385dc..d41c2390 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Richard Braun.
+ * Copyright (c) 2012, 2013 Richard Braun.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -34,13 +34,60 @@
 #include <vm/vm_map.h>
 
 /*
+ * Default time slice for real-time round-robin scheduling.
+ */
+#define THREAD_DEFAULT_RR_TIME_SLICE (HZ / 10)
+
+/*
+ * Run queue properties for real-time threads.
+ */
+struct thread_rt_runq {
+    unsigned int bitmap;
+    struct list threads[THREAD_SCHED_RT_PRIO_MAX + 1];
+};
+
+/*
+ * Group of threads sharing the same weight.
+ */
+struct thread_ts_group {
+    struct list node;
+    struct list threads;
+    unsigned int weight;
+    unsigned int work;
+};
+
+/*
+ * Run queue properties for time-sharing threads.
+ */
+struct thread_ts_runq {
+    struct thread_ts_group group_array[THREAD_SCHED_TS_PRIO_MAX + 1];
+    struct list groups;
+    struct thread_ts_group *current;
+    unsigned int weight;
+    unsigned int work;
+};
+
+/*
  * Per processor run queue.
  */
 struct thread_runq {
+    struct thread_rt_runq rt_runq;
+    struct thread_ts_runq ts_runq;
     struct thread *idle;
-    struct list threads;
 } __aligned(CPU_L1_SIZE);
 
+/*
+ * Operations of a scheduling class.
+ */
+struct thread_sched_ops {
+    void (*init_thread)(struct thread *thread, unsigned short priority);
+    int (*add)(struct thread_runq *runq, struct thread *thread);
+    void (*remove)(struct thread_runq *runq, struct thread *thread);
+    void (*put_prev)(struct thread_runq *runq, struct thread *thread);
+    struct thread * (*get_next)(struct thread_runq *runq);
+    void (*tick)(struct thread_runq *runq, struct thread *thread);
+};
+
 static struct thread_runq thread_runqs[MAX_CPUS];
 
 /*
@@ -56,39 +103,115 @@ static struct thread thread_idles[MAX_CPUS];
 static struct kmem_cache thread_cache;
 static struct kmem_cache thread_stack_cache;
 
+/*
+ * Table used to quickly map policies to classes.
+ */
+static unsigned char thread_policy_table[THREAD_NR_SCHED_POLICIES];
+
+/*
+ * Scheduling class operations.
+ */
+static struct thread_sched_ops thread_sched_ops[THREAD_NR_SCHED_CLASSES];
+
+static struct thread_attr thread_default_attr = {
+    NULL,
+    NULL,
+    THREAD_SCHED_POLICY_TS,
+    THREAD_SCHED_TS_PRIO_DEFAULT
+};
+
 static void __init
-thread_runq_init(struct thread_runq *runq, struct thread *idle)
+thread_runq_init_rt(struct thread_runq *runq)
 {
-    /* Consider migration and preemption disabled during initialization */
+    struct thread_rt_runq *rt_runq;
+    size_t i;
+
+    rt_runq = &runq->rt_runq;
+    rt_runq->bitmap = 0;
+
+    for (i = 0; i < ARRAY_SIZE(rt_runq->threads); i++)
+        list_init(&rt_runq->threads[i]);
+}
+
+static void __init
+thread_ts_group_init(struct thread_ts_group *group)
+{
+    list_init(&group->threads);
+    group->weight = 0;
+    group->work = 0;
+}
+
+static void __init
+thread_runq_init_ts(struct thread_runq *runq)
+{
+    struct thread_ts_runq *ts_runq;
+    size_t i;
+
+    ts_runq = &runq->ts_runq;
+
+    for (i = 0; i < ARRAY_SIZE(ts_runq->group_array); i++)
+        thread_ts_group_init(&ts_runq->group_array[i]);
+
+    list_init(&ts_runq->groups);
+    ts_runq->current = NULL;
+    ts_runq->weight = 0;
+    ts_runq->work = 0;
+}
+
+static void __init
+thread_runq_init_idle(struct thread_runq *runq)
+{
+    struct thread *idle;
+
+    /* Make sure preemption is disabled during initialization */
+    idle = &thread_idles[runq - thread_runqs];
     idle->flags = 0;
-    idle->pinned = 1;
     idle->preempt = 1;
+    idle->task = kernel_task;
     runq->idle = idle;
-    list_init(&runq->threads);
+}
+
+static void __init
+thread_runq_init(struct thread_runq *runq)
+{
+    thread_runq_init_rt(runq);
+    thread_runq_init_ts(runq);
+    thread_runq_init_idle(runq);
+}
+
+static int
+thread_runq_add(struct thread_runq *runq, struct thread *thread)
+{
+    assert(!cpu_intr_enabled());
+
+    return thread_sched_ops[thread->sched_class].add(runq, thread);
 }
 
 static void
-thread_runq_enqueue(struct thread_runq *runq, struct thread *thread)
+thread_runq_put_prev(struct thread_runq *runq, struct thread *thread)
 {
     assert(!cpu_intr_enabled());
-    list_insert_tail(&runq->threads, &thread->runq_node);
+
+    thread_sched_ops[thread->sched_class].put_prev(runq, thread);
 }
 
 static struct thread *
-thread_runq_dequeue(struct thread_runq *runq)
+thread_runq_get_next(struct thread_runq *runq)
 {
     struct thread *thread;
+    unsigned int i;
 
     assert(!cpu_intr_enabled());
 
-    if (list_empty(&runq->threads))
-        thread = NULL;
-    else {
-        thread = list_first_entry(&runq->threads, struct thread, runq_node);
-        list_remove(&thread->runq_node);
+    for (i = 0; i < ARRAY_SIZE(thread_sched_ops); i++) {
+        thread = thread_sched_ops[i].get_next(runq);
+
+        if (thread != NULL)
+            return thread;
     }
 
-    return thread;
+    /* The idle class should never be empty */
+    panic("thread: unable to find next thread");
 }
 
 static inline struct thread_runq *
@@ -97,13 +220,410 @@ thread_runq_local(void)
     return &thread_runqs[cpu_id()];
 }
 
+static void
+thread_sched_rt_init_thread(struct thread *thread, unsigned short priority)
+{
+    assert(priority <= THREAD_SCHED_RT_PRIO_MAX);
+    thread->rt_ctx.priority = priority;
+    thread->rt_ctx.time_slice = THREAD_DEFAULT_RR_TIME_SLICE;
+}
+
+static int
+thread_sched_rt_add(struct thread_runq *runq, struct thread *thread)
+{
+    struct thread_rt_runq *rt_runq;
+    struct list *threads;
+
+    rt_runq = &runq->rt_runq;
+    threads = &rt_runq->threads[thread->rt_ctx.priority];
+    list_insert_tail(threads, &thread->rt_ctx.node);
+
+    if (list_singular(threads))
+        rt_runq->bitmap |= (1U << thread->rt_ctx.priority);
+
+    return 0;
+}
+
+static void
+thread_sched_rt_remove(struct thread_runq *runq, struct thread *thread)
+{
+    struct thread_rt_runq *rt_runq;
+    struct list *threads;
+
+    rt_runq = &runq->rt_runq;
+    threads = &rt_runq->threads[thread->rt_ctx.priority];
+    list_remove(&thread->rt_ctx.node);
+
+    if (list_empty(threads))
+        rt_runq->bitmap &= ~(1U << thread->rt_ctx.priority);
+}
+
+static void
+thread_sched_rt_put_prev(struct thread_runq *runq, struct thread *thread)
+{
+    thread_sched_rt_add(runq, thread);
+}
+
+static struct thread *
+thread_sched_rt_get_next(struct thread_runq *runq)
+{
+    struct thread_rt_runq *rt_runq;
+    struct thread *thread;
+    struct list *threads;
+    unsigned int priority;
+
+    rt_runq = &runq->rt_runq;
+
+    if (rt_runq->bitmap == 0)
+        return NULL;
+
+    priority = THREAD_SCHED_RT_PRIO_MAX - __builtin_clz(rt_runq->bitmap);
+    threads = &rt_runq->threads[priority];
+    assert(!list_empty(threads));
+    thread = list_first_entry(threads, struct thread, rt_ctx.node);
+    thread_sched_rt_remove(runq, thread);
+    return thread;
+}
+
+static void
+thread_sched_rt_tick(struct thread_runq *runq, struct thread *thread)
+{
+    (void)runq;
+
+    if (thread->sched_policy != THREAD_SCHED_POLICY_RR)
+        return;
+
+    thread->rt_ctx.time_slice--;
+
+    if (thread->rt_ctx.time_slice > 0)
+        return;
+
+    thread->rt_ctx.time_slice = THREAD_DEFAULT_RR_TIME_SLICE;
+    thread->flags |= THREAD_RESCHEDULE;
+}
+
+static void
+thread_sched_ts_init_thread(struct thread *thread, unsigned short priority)
+{
+    assert(priority <= THREAD_SCHED_TS_PRIO_MAX);
+    thread->ts_ctx.weight = priority + 1;
+}
+
+static unsigned int
+thread_sched_ts_add_scale(unsigned int total_work, unsigned int total_weight,
+                          unsigned short thread_weight)
+{
+    assert(total_weight != 0);
+
+#ifndef __LP64__
+    if (likely(total_work < 0x10000))
+        return (total_work * thread_weight) / total_weight;
+#endif /* __LP64__ */
+
+    return (unsigned int)(((unsigned long long)total_work * thread_weight)
+                          / total_weight);
+}
+
+static unsigned int
+thread_sched_ts_add_scale_group(unsigned int group_work,
+                                unsigned int old_group_weight,
+                                unsigned int new_group_weight)
+{
+#ifndef __LP64__
+    if (likely((group_work < 0x10000) && (new_group_weight < 0x10000)))
+        return (group_work * new_group_weight) / old_group_weight;
+#endif /* __LP64__ */
+
+    return (unsigned int)(((unsigned long long)group_work * new_group_weight)
+                          / old_group_weight);
+}
+
+static int
+thread_sched_ts_add(struct thread_runq *runq, struct thread *thread)
+{
+    struct thread_ts_runq *ts_runq;
+    struct thread_ts_group *group, *tmp;
+    struct list *node, *init_node;
+    unsigned int work, group_weight, total_weight;
+
+    ts_runq = &runq->ts_runq;
+    group = &ts_runq->group_array[thread->ts_ctx.weight - 1];
+
+    group_weight = group->weight + thread->ts_ctx.weight;
+
+    if (group_weight < group->weight)
+        return ERROR_AGAIN;
+
+    total_weight = ts_runq->weight + thread->ts_ctx.weight;
+
+    if (total_weight < ts_runq->weight)
+        return ERROR_AGAIN;
+
+    node = (group->weight == 0)
+           ? list_last(&ts_runq->groups)
+           : list_prev(&group->node);
+    init_node = node;
+
+    while (!list_end(&ts_runq->groups, node)) {
+        tmp = list_entry(node, struct thread_ts_group, node);
+
+        if (tmp->weight >= group_weight)
+            break;
+
+        node = list_prev(node);
+    }
+
+    if (group->weight == 0)
+        list_insert_after(node, &group->node);
+    else if (node != init_node) {
+        list_remove(&group->node);
+        list_insert_after(node, &group->node);
+    }
+
+    if (ts_runq->current == NULL)
+        ts_runq->current = group;
+    else {
+        work = (group->weight == 0)
+               ? thread_sched_ts_add_scale(ts_runq->work, ts_runq->weight,
+                                           thread->ts_ctx.weight)
+               : thread_sched_ts_add_scale_group(group->work, group->weight,
+                                                 group_weight);
+
+        assert(work <= thread->ts_ctx.weight);
+        ts_runq->work += work;
+        group->work += work;
+    }
+
+    ts_runq->weight = total_weight;
+    group->weight = group_weight;
+    list_insert_tail(&group->threads, &thread->ts_ctx.node);
+    return 0;
+}
+
+static unsigned int
+thread_sched_ts_remove_scale(unsigned int group_work,
+                             unsigned int old_group_weight,
+                             unsigned int new_group_weight)
+{
+#ifndef __LP64__
+    if (likely((group_work < 0x10000) && (new_group_weight < 0x10000)))
+        return DIV_CEIL(group_work * new_group_weight, old_group_weight);
+#endif /* __LP64__ */
+
+    return (unsigned int)DIV_CEIL((unsigned long long)group_work
+                                  * new_group_weight,
+                                  old_group_weight);
+}
+
+static void
+thread_sched_ts_remove(struct thread_runq *runq, struct thread *thread)
+{
+    struct thread_ts_runq *ts_runq;
+    struct thread_ts_group *group, *tmp;
+    struct list *node, *init_node;
+    unsigned int work, group_weight;
+
+    ts_runq = &runq->ts_runq;
+    group = &ts_runq->group_array[thread->ts_ctx.weight - 1];
+
+    list_remove(&thread->ts_ctx.node);
+
+    group_weight = group->weight - thread->ts_ctx.weight;
+    work = thread_sched_ts_remove_scale(group->work, group->weight,
+                                        group_weight);
+    assert(work <= thread->ts_ctx.weight);
+    group->weight -= thread->ts_ctx.weight;
+    assert(work <= group->work);
+    group->work -= work;
+    ts_runq->weight -= thread->ts_ctx.weight;
+    assert(work <= ts_runq->work);
+    ts_runq->work -= work;
+
+    if (ts_runq->weight == 0)
+        ts_runq->current = NULL;
+
+    if (group->weight == 0)
+        list_remove(&group->node);
+    else {
+        node = list_next(&group->node);
+        init_node = node;
+
+        while (!list_end(&ts_runq->groups, node)) {
+            tmp = list_entry(node, struct thread_ts_group, node);
+
+            if (tmp->weight <= group->weight)
+                break;
+
+            node = list_next(node);
+        }
+
+        if (node != init_node) {
+            list_remove(&group->node);
+            list_insert_before(node, &group->node);
+        }
+    }
+}
+
+static void
+thread_sched_ts_put_prev(struct thread_runq *runq, struct thread *thread)
+{
+    struct thread_ts_runq *ts_runq;
+    struct thread_ts_group *group;
+
+    ts_runq = &runq->ts_runq;
+    group = &ts_runq->group_array[thread->ts_ctx.weight - 1];
+    assert(group == ts_runq->current);
+
+    list_insert_tail(&group->threads, &thread->ts_ctx.node);
+}
+
+static int
+thread_sched_ts_ratio_exceeded(struct thread_ts_group *current,
+                               struct thread_ts_group *next)
+{
+    unsigned long long a, b;
+
+#ifndef __LP64__
+    unsigned int ia, ib;
+
+    if (likely((current->weight < 0x10000) && (next->weight < 0x10000))) {
+        ia = (current->work + 1) * next->weight;
+        ib = (next->work + 1) * current->weight;
+        return ia > ib;
+    }
+#endif /* __LP64__ */
+
+    a = ((unsigned long long)current->work + 1) * next->weight;
+    b = ((unsigned long long)next->work + 1) * current->weight;
+    return a > b;
+}
+
+static struct thread *
+thread_sched_ts_get_next(struct thread_runq *runq)
+{
+    struct thread_ts_runq *ts_runq;
+    struct thread_ts_group *group, *next;
+    struct thread *thread;
+    struct list *node;
+
+    ts_runq = &runq->ts_runq;
+
+    if (ts_runq->weight == 0)
+        return NULL;
+
+    group = ts_runq->current;
+    node = list_next(&group->node);
+
+    if (list_end(&ts_runq->groups, node)) {
+        node = list_first(&ts_runq->groups);
+        group = list_entry(node, struct thread_ts_group, node);
+    } else {
+        next = list_entry(node, struct thread_ts_group, node);
+
+        if (thread_sched_ts_ratio_exceeded(group, next))
+            group = next;
+        else {
+            node = list_first(&ts_runq->groups);
+            group = list_entry(node, struct thread_ts_group, node);
+        }
+    }
+
+    ts_runq->current = group;
+    thread = list_first_entry(&group->threads, struct thread, ts_ctx.node);
+    list_remove(&thread->ts_ctx.node);
+    return thread;
+}
+
+static void
+thread_sched_ts_reset(struct thread_ts_runq *ts_runq)
+{
+    struct thread_ts_group *group;
+
+    ts_runq->work = 0;
+
+    list_for_each_entry(&ts_runq->groups, group, node) {
+        assert(group->work == group->weight);
+        group->work = 0;
+    }
+}
+
+static void
+thread_sched_ts_tick(struct thread_runq *runq, struct thread *thread)
+{
+    struct thread_ts_runq *ts_runq;
+    struct thread_ts_group *group;
+
+    ts_runq = &runq->ts_runq;
+    group = &ts_runq->group_array[thread->ts_ctx.weight - 1];
+    assert(group == ts_runq->current);
+
+    thread->flags |= THREAD_RESCHEDULE;
+
+    group->work++;
+    ts_runq->work++;
+
+    if (ts_runq->work == ts_runq->weight)
+        thread_sched_ts_reset(ts_runq);
+}
+
+static void
+thread_sched_idle_init_thread(struct thread *thread, unsigned short priority)
+{
+    (void)thread;
+    (void)priority;
+}
+
+static void __noreturn
+thread_sched_idle_panic(void)
+{
+    panic("thread: only idle threads are allowed in the idle class");
+}
+
+static int
+thread_sched_idle_add(struct thread_runq *runq, struct thread *thread)
+{
+    (void)runq;
+    (void)thread;
+
+    thread_sched_idle_panic();
+}
+
+static void
+thread_sched_idle_remove(struct thread_runq *runq, struct thread *thread)
+{
+    (void)runq;
+    (void)thread;
+
+    thread_sched_idle_panic();
+}
+
+static void
+thread_sched_idle_put_prev(struct thread_runq *runq, struct thread *thread)
+{
+    (void)runq;
+    (void)thread;
+}
+
+static struct thread *
+thread_sched_idle_get_next(struct thread_runq *runq)
+{
+    return runq->idle;
+}
+
+static void
+thread_sched_idle_tick(struct thread_runq *runq, struct thread *thread)
+{
+    (void)runq;
+    (void)thread;
+}
+
 void __init
 thread_bootstrap(void)
 {
     size_t i;
 
     for (i = 0; i < ARRAY_SIZE(thread_runqs); i++)
-        thread_runq_init(&thread_runqs[i], &thread_idles[i]);
+        thread_runq_init(&thread_runqs[i]);
 
     tcb_set_current(&thread_idles[0].tcb);
 }
@@ -117,6 +637,37 @@ thread_ap_bootstrap(void)
 void __init
 thread_setup(void)
 {
+    struct thread_sched_ops *ops;
+
+    thread_policy_table[THREAD_SCHED_POLICY_FIFO] = THREAD_SCHED_CLASS_RT;
+    thread_policy_table[THREAD_SCHED_POLICY_RR] = THREAD_SCHED_CLASS_RT;
+    thread_policy_table[THREAD_SCHED_POLICY_TS] = THREAD_SCHED_CLASS_TS;
+    thread_policy_table[THREAD_SCHED_POLICY_IDLE] = THREAD_SCHED_CLASS_IDLE;
+
+    ops = &thread_sched_ops[THREAD_SCHED_CLASS_RT];
+    ops->init_thread = thread_sched_rt_init_thread;
+    ops->add = thread_sched_rt_add;
+    ops->remove = thread_sched_rt_remove;
+    ops->put_prev = thread_sched_rt_put_prev;
+    ops->get_next = thread_sched_rt_get_next;
+    ops->tick = thread_sched_rt_tick;
+
+    ops = &thread_sched_ops[THREAD_SCHED_CLASS_TS];
+    ops->init_thread = thread_sched_ts_init_thread;
+    ops->add = thread_sched_ts_add;
+    ops->remove = thread_sched_ts_remove;
+    ops->put_prev = thread_sched_ts_put_prev;
+    ops->get_next = thread_sched_ts_get_next;
+    ops->tick = thread_sched_ts_tick;
+
+    ops = &thread_sched_ops[THREAD_SCHED_CLASS_IDLE];
+    ops->init_thread = thread_sched_idle_init_thread;
+    ops->add = thread_sched_idle_add;
+    ops->remove = thread_sched_idle_remove;
+    ops->put_prev = thread_sched_idle_put_prev;
+    ops->get_next = thread_sched_idle_get_next;
+    ops->tick = thread_sched_idle_tick;
+
     kmem_cache_init(&thread_cache, "thread", sizeof(struct thread),
                     CPU_L1_SIZE, NULL, NULL, NULL, 0);
     kmem_cache_init(&thread_stack_cache, "thread_stack", STACK_SIZE,
@@ -143,28 +694,48 @@ thread_main(void)
 }
 
 static void
-thread_init(struct thread *thread, struct task *task, void *stack,
-            const char *name, void (*fn)(void *), void *arg)
+thread_init_sched(struct thread *thread, unsigned short priority)
 {
+    thread_sched_ops[thread->sched_class].init_thread(thread, priority);
+}
+
+static void
+thread_init(struct thread *thread, void *stack, const struct thread_attr *attr,
+            void (*fn)(void *), void *arg)
+{
+    const char *name;
+    struct task *task;
+
     tcb_init(&thread->tcb, stack, thread_main);
 
-    if (name == NULL)
-        name = task->name;
+    if (attr == NULL)
+        attr = &thread_default_attr;
+
+    task = (attr->task == NULL) ? thread_current()->task : attr->task;
+    assert(task != NULL);
+    name = (attr->name == NULL) ? task->name : attr->name;
+    assert(name != NULL);
+    assert(attr->sched_policy < THREAD_NR_SCHED_POLICIES);
 
     thread->flags = 0;
     thread->pinned = 0;
     thread->preempt = 1;
+    thread->sched_policy = attr->sched_policy;
+    thread->sched_class = thread_policy_table[attr->sched_policy];
+    thread_init_sched(thread, attr->priority);
     thread->task = task;
     thread->stack = stack;
     strlcpy(thread->name, name, sizeof(thread->name));
     thread->fn = fn;
     thread->arg = arg;
+
     task_add_thread(task, thread);
 }
 
 int
-thread_create(struct thread **threadp, struct task *task, const char *name,
+thread_create(struct thread **threadp, const struct thread_attr *attr,
               void (*fn)(void *), void *arg)
+
 {
     struct thread *thread;
     unsigned long flags;
@@ -185,15 +756,20 @@ thread_create(struct thread **threadp, struct task *task, const char *name,
         goto error_stack;
     }
 
-    thread_init(thread, task, stack, name, fn, arg);
+    thread_init(thread, stack, attr, fn, arg);
 
     flags = cpu_intr_save();
-    thread_runq_enqueue(&thread_runqs[cpu_id()], thread);
+    error = thread_runq_add(&thread_runqs[cpu_id()], thread);
     cpu_intr_restore(flags);
 
+    if (error)
+        goto error_runq;
+
     *threadp = thread;
     return 0;
 
+error_runq:
+    kmem_cache_free(&thread_stack_cache, stack);
 error_stack:
     kmem_cache_free(&thread_cache, thread);
 error_thread:
@@ -213,7 +789,8 @@ static void __init
 thread_setup_idle(void)
 {
     char name[THREAD_NAME_SIZE];
-    struct thread_runq *runq;
+    struct thread_attr attr;
+    unsigned int cpu;
     void *stack;
 
     stack = kmem_cache_alloc(&thread_stack_cache);
@@ -221,27 +798,32 @@ thread_setup_idle(void)
     if (stack == NULL)
         panic("thread: unable to allocate idle thread stack");
 
-    snprintf(name, sizeof(name), "idle%u", cpu_id());
-    runq = thread_runq_local();
-    thread_init(runq->idle, kernel_task, stack, name, thread_idle, NULL);
+    /*
+     * Having interrupts enabled was required to allocate the stack, but
+     * at this stage, the idle thread is still the current thread, so disable
+     * interrupts while initializing it.
+     */
+    cpu_intr_disable();
+
+    cpu = cpu_id();
+    snprintf(name, sizeof(name), "idle%u", cpu);
+    attr.task = kernel_task;
+    attr.name = name;
+    attr.sched_policy = THREAD_SCHED_POLICY_IDLE;
+    thread_init(&thread_idles[cpu], stack, &attr, thread_idle, NULL);
 }
 
 void __init
 thread_run(void)
 {
-    struct thread_runq *runq;
     struct thread *thread;
 
     assert(cpu_intr_enabled());
 
+    /* This call disables interrupts */
     thread_setup_idle();
 
-    cpu_intr_disable();
-    runq = thread_runq_local();
-    thread = thread_runq_dequeue(runq);
-
-    if (thread == NULL)
-        thread = runq->idle;
+    thread = thread_runq_get_next(thread_runq_local());
 
     if (thread->task != kernel_task)
         pmap_load(thread->task->map->pmap);
@@ -273,16 +855,9 @@ thread_schedule(void)
 
         runq = thread_runq_local();
         prev = thread_current();
-        assert(prev != NULL);
-
-        if (prev != runq->idle)
-            thread_runq_enqueue(runq, prev);
-
         prev->flags &= ~THREAD_RESCHEDULE;
-        next = thread_runq_dequeue(runq);
-
-        if (next == NULL)
-            next = runq->idle;
+        thread_runq_put_prev(runq, prev);
+        next = thread_runq_get_next(runq);
 
         if (prev != next)
             thread_switch(prev, next);
@@ -326,6 +901,7 @@ thread_tick(void)
     assert(!cpu_intr_enabled());
 
     thread = thread_current();
-    assert(thread != NULL);
-    thread->flags |= THREAD_RESCHEDULE;
+    thread_preempt_disable();
+    thread_sched_ops[thread->sched_class].tick(thread_runq_local(), thread);
+    thread_preempt_enable_no_resched();
 }
diff --git a/kern/thread.h b/kern/thread.h
index 7775576a..ac9ad6d8 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012 Richard Braun.
+ * Copyright (c) 2012, 2013 Richard Braun.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -41,6 +41,60 @@ struct task;
 #define THREAD_RESCHEDULE   0x1 /* Thread marked for reschedule */
 
 /*
+ * Scheduling policies.
+ *
+ * The idle policy is reserved for the per-CPU idle threads.
+ */
+#define THREAD_SCHED_POLICY_FIFO    0
+#define THREAD_SCHED_POLICY_RR      1
+#define THREAD_SCHED_POLICY_TS      2
+#define THREAD_SCHED_POLICY_IDLE    3
+#define THREAD_NR_SCHED_POLICIES    4
+
+/*
+ * Scheduling classes.
+ *
+ * Classes are sorted by order of priority (lower indexes first). The same
+ * class can apply to several policies.
+ *
+ * The idle class is reserved for the per-CPU idle threads.
+ */
+#define THREAD_SCHED_CLASS_RT   0
+#define THREAD_SCHED_CLASS_TS   1
+#define THREAD_SCHED_CLASS_IDLE 2
+#define THREAD_NR_SCHED_CLASSES 3
+
+/*
+ * Real-time priority properties.
+ */
+#define THREAD_SCHED_RT_PRIO_MIN        0
+#define THREAD_SCHED_RT_PRIO_MAX        31
+
+/*
+ * Scheduling context of a real-time thread.
+ */
+struct thread_rt_ctx {
+    struct list node;
+    unsigned short priority;
+    unsigned short time_slice;
+};
+
+/*
+ * Time-sharing priority properties.
+ */
+#define THREAD_SCHED_TS_PRIO_MIN        0
+#define THREAD_SCHED_TS_PRIO_DEFAULT    20
+#define THREAD_SCHED_TS_PRIO_MAX        39
+
+/*
+ * Scheduling context of a time-sharing thread.
+ */
+struct thread_ts_ctx {
+    struct list node;
+    unsigned short weight;
+};
+
+/*
  * Thread structure.
  */
 struct thread {
@@ -48,9 +102,19 @@ struct thread {
     short flags;
     unsigned short pinned;
     unsigned short preempt;
-    struct list runq_node;
-    struct list task_node;
+
+    /* Common scheduling properties */
+    unsigned char sched_policy;
+    unsigned char sched_class;
+
+    /* Scheduling class specific contexts */
+    union {
+        struct thread_rt_ctx rt_ctx;
+        struct thread_ts_ctx ts_ctx;
+    };
+
     struct task *task;
+    struct list task_node;
     void *stack;
     char name[THREAD_NAME_SIZE];
     void (*fn)(void *);
@@ -58,6 +122,16 @@ struct thread {
 } __aligned(CPU_L1_SIZE);
 
 /*
+ * Thread creation attributes.
+ */
+struct thread_attr {
+    struct task *task;
+    const char *name;
+    unsigned char sched_policy;
+    unsigned short priority;
+};
+
+/*
  * Early initialization of the thread module.
  *
  * This function makes it possible to use migration and preemption control
@@ -78,9 +152,12 @@ void thread_setup(void);
 /*
  * Create a thread.
  *
- * If the given name is null, the task name is used instead.
+ * If the given attributes are NULL, default attributes are used. If the task
+ * is NULL, the caller task is selected. If the name is NULL, the task name is
+ * used instead. The default attributes also select the caller task and task
+ * name.
  */
-int thread_create(struct thread **threadp, struct task *task, const char *name,
+int thread_create(struct thread **threadp, const struct thread_attr *attr,
                   void (*fn)(void *), void *arg);
 
 /*
author	Richard Braun <rbraun@sceen.net>	2013-01-11 23:49:23 +0100
committer	Richard Braun <rbraun@sceen.net>	2013-01-11 23:49:23 +0100
commit	644a177521a390133c98b9de83934a25e2be4f67 (patch)
tree	186d4c4d5b23b4e0a94460ecb75e7a1f910a249f /kern
parent	fb1b8cf4f196903f770ce9b0274423626c145517 (diff)