summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile.am1
-rw-r--r--Makefrag.am6
-rw-r--r--arch/x86/machine/acpi.c4
-rw-r--r--arch/x86/machine/lapic.c8
-rw-r--r--arch/x86/machine/pit.c43
-rw-r--r--doc/intro.9.txt27
-rw-r--r--kern/clock.c101
-rw-r--r--kern/clock.h123
-rw-r--r--kern/clock_i.h42
-rw-r--r--kern/condition.c110
-rw-r--r--kern/condition.h17
-rw-r--r--kern/condition_types.h3
-rw-r--r--kern/mutex.h18
-rw-r--r--kern/mutex/mutex_adaptive.c121
-rw-r--r--kern/mutex/mutex_adaptive_i.h15
-rw-r--r--kern/mutex/mutex_pi_i.h8
-rw-r--r--kern/mutex/mutex_plain.c55
-rw-r--r--kern/mutex/mutex_plain_i.h16
-rw-r--r--kern/rbtree.h2
-rw-r--r--kern/rtmutex.c84
-rw-r--r--kern/rtmutex.h14
-rw-r--r--kern/rtmutex_i.h2
-rw-r--r--kern/semaphore.c37
-rw-r--r--kern/semaphore.h15
-rw-r--r--kern/semaphore_i.h3
-rw-r--r--kern/sleepq.c147
-rw-r--r--kern/sleepq.h31
-rw-r--r--kern/thread.c163
-rw-r--r--kern/thread.h34
-rw-r--r--kern/thread_i.h1
-rw-r--r--kern/timer.c536
-rw-r--r--kern/timer.h103
-rw-r--r--kern/timer_i.h41
-rw-r--r--kern/turnstile.c65
-rw-r--r--kern/turnstile.h12
-rw-r--r--kern/work.h2
-rw-r--r--kern/xcall.c1
37 files changed, 1784 insertions, 227 deletions
diff --git a/Makefile.am b/Makefile.am
index 413ac6db..de3e5749 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -32,6 +32,7 @@ AM_CFLAGS += \
-fsigned-char \
-fno-common
+# TODO Add option
AM_CFLAGS += -fno-stack-protector
AM_CFLAGS += -nostdlib
diff --git a/Makefrag.am b/Makefrag.am
index 1b39b724..f29e5926 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -20,6 +20,9 @@ x15_SOURCES += \
kern/bitmap_i.h \
kern/cbuf.c \
kern/cbuf.h \
+ kern/clock.c \
+ kern/clock.h \
+ kern/clock_i.h \
kern/condition.c \
kern/condition.h \
kern/condition_types.h \
@@ -108,6 +111,9 @@ x15_SOURCES += \
kern/thread.c \
kern/thread.h \
kern/thread_i.h \
+ kern/timer.c \
+ kern/timer.h \
+ kern/timer_i.h \
kern/turnstile.c \
kern/turnstile.h \
kern/turnstile_types.h \
diff --git a/arch/x86/machine/acpi.c b/arch/x86/machine/acpi.c
index 77202267..4ef52e3d 100644
--- a/arch/x86/machine/acpi.c
+++ b/arch/x86/machine/acpi.c
@@ -36,6 +36,7 @@
#include <machine/ioapic.h>
#include <machine/lapic.h>
#include <machine/pic.h>
+#include <machine/pit.h>
#include <machine/types.h>
#include <vm/vm_kmem.h>
@@ -693,9 +694,10 @@ error:
* For the sake of simplicity, it has been decided to ignore legacy
* specifications such as the multiprocessor specification, and use
* ACPI only. If ACPI is unavailable, consider the APIC system to
- * be missing and fall back to using the legacy XT-PIC.
+ * be missing and fall back to using the legacy XT-PIC and PIT.
*/
pic_setup();
+ pit_setup();
return 0;
}
diff --git a/arch/x86/machine/lapic.c b/arch/x86/machine/lapic.c
index a0eee852..1a81a180 100644
--- a/arch/x86/machine/lapic.c
+++ b/arch/x86/machine/lapic.c
@@ -20,11 +20,11 @@
#include <stddef.h>
#include <stdint.h>
+#include <kern/clock.h>
#include <kern/init.h>
#include <kern/log.h>
#include <kern/macros.h>
#include <kern/panic.h>
-#include <kern/thread.h>
#include <machine/cpu.h>
#include <machine/lapic.h>
#include <machine/pmap.h>
@@ -211,7 +211,7 @@ lapic_compute_freq(void)
lapic_bus_freq = (c1 - c2) * (1000000 / LAPIC_TIMER_CAL_DELAY);
log_info("lapic: bus frequency: %u.%02u MHz", lapic_bus_freq / 1000000,
lapic_bus_freq % 1000000);
- lapic_write(&lapic_map->timer_icr, lapic_bus_freq / THREAD_TICK_FREQ);
+ lapic_write(&lapic_map->timer_icr, lapic_bus_freq / CLOCK_FREQ);
lapic_write(&lapic_map->svr, 0);
}
@@ -238,7 +238,7 @@ lapic_setup_registers(void)
lapic_write(&lapic_map->lvt_lint1, LAPIC_LVT_MASK_INTR);
lapic_write(&lapic_map->lvt_error, TRAP_LAPIC_ERROR);
lapic_write(&lapic_map->timer_dcr, LAPIC_TIMER_DCR_DIV1);
- lapic_write(&lapic_map->timer_icr, lapic_bus_freq / THREAD_TICK_FREQ);
+ lapic_write(&lapic_map->timer_icr, lapic_bus_freq / CLOCK_FREQ);
}
void __init
@@ -333,7 +333,7 @@ lapic_timer_intr(struct trap_frame *frame)
(void)frame;
lapic_eoi();
- thread_tick_intr();
+ clock_tick_intr();
}
void
diff --git a/arch/x86/machine/pit.c b/arch/x86/machine/pit.c
index c31c7e0b..86513306 100644
--- a/arch/x86/machine/pit.c
+++ b/arch/x86/machine/pit.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011, 2012 Richard Braun.
+ * Copyright (c) 2011-2017 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -17,7 +17,11 @@
#include <assert.h>
+#include <kern/clock.h>
#include <kern/init.h>
+#include <kern/intr.h>
+#include <kern/log.h>
+#include <kern/macros.h>
#include <machine/io.h>
#include <machine/pit.h>
@@ -45,19 +49,46 @@
*/
#define PIT_MAX_COUNT 0xffff
-void __init
-pit_setup_free_running(void)
+/*
+ * Timer interrupt.
+ */
+#define PIT_INTR 0
+
+static int
+pit_intr(void *arg)
+{
+ (void)arg;
+
+ clock_tick_intr();
+ return 0;
+}
+
+static void __init
+pit_setup_common(uint16_t count)
{
io_write_byte(PIT_PORT_MODE, PIT_MODE_RATE_GEN | PIT_MODE_RW_LSB
| PIT_MODE_RW_MSB);
- io_write_byte(PIT_PORT_COUNTER0, PIT_MAX_COUNT & 0xff);
- io_write_byte(PIT_PORT_COUNTER0, PIT_MAX_COUNT >> 8);
+ io_write_byte(PIT_PORT_COUNTER0, count & 0xff);
+ io_write_byte(PIT_PORT_COUNTER0, count >> 8);
+}
+
+void __init
+pit_setup_free_running(void)
+{
+ pit_setup_common(PIT_MAX_COUNT);
}
void __init
pit_setup(void)
{
- /* TODO Implement */
+ int error;
+
+ pit_setup_common(DIV_CEIL(PIT_FREQ, CLOCK_FREQ));
+ error = intr_register(PIT_INTR, pit_intr, NULL);
+
+ if (error) {
+ log_err("pit: unable to register interrupt handler");
+ }
}
static unsigned int
diff --git a/doc/intro.9.txt b/doc/intro.9.txt
index 5af5a4e8..5b7ee2b1 100644
--- a/doc/intro.9.txt
+++ b/doc/intro.9.txt
@@ -102,6 +102,10 @@ module:kern/thread::
module:kern/work::
Work queue of deferred asynchronous lightweight jobs.
+All wait functions on synchronization objects can be time-bounded.
+This includes waiting for a mutex lock, a condition variable, or a
+semaphore.
+
Mutex implementations
~~~~~~~~~~~~~~~~~~~~~
@@ -134,6 +138,8 @@ module:kern/bitmap::
Arbitrary-length bit array.
module:kern/cbuf::
Circular character buffer.
+module:kern/clock::
+ Low resolution clock.
module:kern/error::
Common errors and error handling functions.
module:kern/hash::
@@ -158,6 +164,8 @@ module:kern/sprintf::
Formatted string functions.
module:kern/syscnt::
Generic 64-bits counter.
+module:kern/timer::
+ Low resolution timer.
X15 doesn't provide a generic queue interface, because the requirements
often vary too much. Similarly, it doesn't provide a hash table interface.
@@ -202,13 +210,13 @@ TODO Write when the virtual memory system is rewritten.
REAL-TIME
---------
-X15 complies with almost all the requirements of a true hard real-time
-multiprocessor system. It is a fully preemptible kernel with short,
-bounded preemption-based critical sections. It provides real-time
-scheduling policies and a complete priority inheritance algorithm.
-Preemption and interrupts are clearly decoupled so that interrupts
-can remain enabled as much as possible. Multiprocessor synchronization
-uses rigorously fair spin locks. The modules related to real-time are :
+X15 complies with all the requirements of a real-time multiprocessor
+system. It is a fully preemptible kernel with short, bounded
+preemption-based critical sections. It provides real-time scheduling
+policies and a complete priority inheritance algorithm. Preemption and
+interrupts are clearly decoupled so that interrupts can remain enabled
+as much as possible. Multiprocessor synchronization uses rigorously
+fair spin locks. The modules related to real-time are :
module:kern/rtmutex::
Mutual exclusion with priority inheritance.
@@ -225,8 +233,7 @@ Priority inheritance can also be enabled for regular mutexes. Please read
Victor Yodaiken's report {against-priority-inheritance} in order to fully
understand the implications of relying on priority inheritance.
-TODO X15 doesn't yet comply with all the requirements for hard real-time.
-For that, it still needs a high resolution timer system.
+TODO Separate man page with more description
[[portability]]
PORTABILITY
@@ -259,6 +266,8 @@ the future. In addition, the machine-independent code assumes an almost
completely relaxed memory model, but still expects no reordering between
dependent loads. This model closely matches the ARM family of processors.
+TODO Fix memory model description
+
[[posix_like_interface]]
POSIX-LIKE INTERFACE
--------------------
diff --git a/kern/clock.c b/kern/clock.c
new file mode 100644
index 00000000..b4a6f046
--- /dev/null
+++ b/kern/clock.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+
+#include <kern/atomic.h>
+#include <kern/clock.h>
+#include <kern/clock_i.h>
+#include <kern/init.h>
+#include <kern/llsync.h>
+#include <kern/percpu.h>
+#include <kern/sref.h>
+#include <kern/syscnt.h>
+#include <kern/thread.h>
+#include <kern/timer.h>
+#include <kern/work.h>
+#include <machine/boot.h>
+#include <machine/cpu.h>
+
+struct clock_cpu_data {
+ struct syscnt sc_tick_intrs;
+};
+
+static struct clock_cpu_data clock_cpu_data __percpu;
+
+union clock_global_time clock_global_time;
+
+static inline void __init
+clock_cpu_data_init(struct clock_cpu_data *cpu_data, unsigned int cpu)
+{
+ char name[SYSCNT_NAME_SIZE];
+
+ snprintf(name, sizeof(name), "clock_tick_intrs/%u", cpu);
+ syscnt_register(&cpu_data->sc_tick_intrs, name);
+}
+
+static int __init
+clock_setup(void)
+{
+ for (unsigned int cpu = 0; cpu < cpu_count(); cpu++) {
+ clock_cpu_data_init(percpu_ptr(clock_cpu_data, cpu), cpu);
+ }
+
+ return 0;
+}
+
+INIT_OP_DEFINE(clock_setup,
+ INIT_OP_DEP(boot_setup_intr, true),
+ INIT_OP_DEP(cpu_mp_probe, true),
+ INIT_OP_DEP(syscnt_setup, true));
+
+void clock_tick_intr(void)
+{
+ struct clock_cpu_data *cpu_data;
+
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ if (cpu_id() == 0) {
+#ifdef ATOMIC_HAVE_64B_OPS
+
+ atomic_add(&clock_global_time.ticks, 1, ATOMIC_RELAXED);
+
+#else /* ATOMIC_HAVE_64B_OPS */
+
+ union clock_global_time t;
+
+ t.ticks = clock_global_time.ticks;
+ t.ticks++;
+
+ atomic_store(&clock_global_time.high2, t.high1, ATOMIC_RELAXED);
+ atomic_store_release(&clock_global_time.low, t.low);
+ atomic_store_release(&clock_global_time.high1, t.high1);
+
+#endif /* ATOMIC_HAVE_64B_OPS */
+ }
+
+ timer_report_periodic_event();
+ llsync_report_periodic_event();
+ sref_report_periodic_event();
+ work_report_periodic_event();
+ thread_report_periodic_event();
+
+ cpu_data = cpu_local_ptr(clock_cpu_data);
+ syscnt_inc(&cpu_data->sc_tick_intrs);
+}
diff --git a/kern/clock.h b/kern/clock.h
new file mode 100644
index 00000000..30db0a82
--- /dev/null
+++ b/kern/clock.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Timekeeping module.
+ */
+
+#ifndef _KERN_CLOCK_H
+#define _KERN_CLOCK_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <kern/atomic.h>
+#include <kern/clock_i.h>
+#include <kern/init.h>
+#include <kern/macros.h>
+
+/*
+ * Clock frequency.
+ *
+ * TODO Clock frequency selection.
+ */
+#define CLOCK_FREQ 200
+
+#if (1000 % CLOCK_FREQ) != 0
+#error "invalid clock frequency"
+#endif /* (1000 % CLOCK_FREQ) != 0 */
+
+/*
+ * Arbitrary value used to determine if a time is in the past or the future.
+ *
+ * Time is represented as 64-bits unsigned integers counting ticks. The
+ * global time currently starts from 0 but this isn't a strong assumption
+ * users should rely on. Instead, all time checks involve a time reference
+ * against which to compare. The result of that comparison, done by
+ * substraction, is either in the future, i.e. the difference is less
+ * than the expire threshold, or in the past, i.e. the difference is
+ * greater (keep in mind the result is unsigned). The threshold must be
+ * large enough to allow both a wide range of possible times in the future,
+ * but also enough time in the past for reliable timeout detection. Note
+ * that using signed integers would be equivalent to dividing the range
+ * in two (almost) equal past and future halves.
+ */
+#define CLOCK_EXPIRE_THRESHOLD (-(1ULL << 60))
+
+static inline uint64_t
+clock_get_time(void)
+{
+ extern union clock_global_time clock_global_time;
+
+#ifdef ATOMIC_HAVE_64B_OPS
+
+ /*
+ * Don't enforce a stronger memory order, since :
+ * 1/ it's useless as long as the reader remains on the same processor
+ * 2/ thread migration enforces sequential consistency
+ */
+ return atomic_load(&clock_global_time.ticks, ATOMIC_RELAXED);
+
+#else /* ATOMIC_HAVE_64B_OPS */
+
+ uint32_t high1, low, high2;
+
+ /*
+ * For machines with no 64-bits atomic accessors, this implementation uses
+ * a variant of the two-digit monotonic-clock algorithm, described in the
+ * paper "Concurrent Reading and Writing of Clocks" by Leslie Lamport.
+ */
+
+ do {
+ high1 = atomic_load_acquire(&clock_global_time.high1);
+ low = atomic_load_acquire(&clock_global_time.low);
+ high2 = atomic_load(&clock_global_time.high2, ATOMIC_RELAXED);
+ } while (high1 != high2);
+
+ return ((uint64_t)high2 << 32) | low;
+
+#endif /* ATOMIC_HAVE_64B_OPS */
+}
+
+static inline uint64_t
+clock_ticks_to_ms(uint64_t ticks)
+{
+ return ticks * (1000 / CLOCK_FREQ);
+}
+
+static inline uint64_t
+clock_ticks_from_ms(uint64_t ms)
+{
+ return DIV_CEIL(ms, (1000 / CLOCK_FREQ));
+}
+
+static inline bool
+clock_time_expired(uint64_t t, uint64_t ref)
+{
+ return (t - ref) > CLOCK_EXPIRE_THRESHOLD;
+}
+
+static inline bool
+clock_time_occurred(uint64_t t, uint64_t ref)
+{
+ return (t == ref) || clock_time_expired(t, ref);
+}
+
+void clock_tick_intr(void);
+
+INIT_OP_DECLARE(clock_setup);
+
+#endif /* _KERN_CLOCK_H */
diff --git a/kern/clock_i.h b/kern/clock_i.h
new file mode 100644
index 00000000..830f48c5
--- /dev/null
+++ b/kern/clock_i.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_CLOCK_I_H
+#define _KERN_CLOCK_I_H
+
+#include <stdint.h>
+
+#include <machine/cpu.h>
+
+union clock_global_time {
+ alignas(CPU_L1_SIZE) uint64_t ticks;
+
+#ifndef ATOMIC_HAVE_64B_OPS
+ struct {
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ uint32_t high1;
+ uint32_t low;
+#else /* __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ */
+ uint32_t low;
+ uint32_t high1;
+#endif /* __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ */
+ uint32_t high2;
+ };
+#endif /* ATOMIC_HAVE_64B_OPS */
+};
+
+#endif /* _KERN_CLOCK_I_H */
diff --git a/kern/condition.c b/kern/condition.c
index c8ea5f39..e6d65951 100644
--- a/kern/condition.c
+++ b/kern/condition.c
@@ -21,6 +21,7 @@
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
+#include <stdint.h>
#include <kern/condition.h>
#include <kern/condition_types.h>
@@ -28,52 +29,14 @@
#include <kern/sleepq.h>
#include <kern/thread.h>
-static void
-condition_inc_nr_sleeping_waiters(struct condition *condition)
-{
- condition->nr_sleeping_waiters++;
- assert(condition->nr_sleeping_waiters != 0);
-}
-
-static void
-condition_dec_nr_sleeping_waiters(struct condition *condition)
-{
- assert(condition->nr_sleeping_waiters != 0);
- condition->nr_sleeping_waiters--;
-}
-
-static void
-condition_inc_nr_pending_waiters(struct condition *condition)
-{
- condition->nr_pending_waiters++;
- assert(condition->nr_pending_waiters != 0);
-}
-
-static void
-condition_dec_nr_pending_waiters(struct condition *condition)
-{
- assert(condition->nr_pending_waiters != 0);
- condition->nr_pending_waiters--;
-}
-
-static void
-condition_move_waiters(struct condition *condition)
-{
- unsigned short old;
-
- assert(condition->nr_sleeping_waiters != 0);
- old = condition->nr_pending_waiters;
- condition->nr_pending_waiters += condition->nr_sleeping_waiters;
- assert(old < condition->nr_pending_waiters);
- condition->nr_sleeping_waiters = 0;
-}
-
-void
-condition_wait(struct condition *condition, struct mutex *mutex)
+static int
+condition_wait_common(struct condition *condition, struct mutex *mutex,
+ bool timed, uint64_t ticks)
{
struct condition *last_cond;
struct sleepq *sleepq;
unsigned long flags;
+ int error;
mutex_assert_locked(mutex);
@@ -101,23 +64,41 @@ condition_wait(struct condition *condition, struct mutex *mutex)
if (last_cond != NULL) {
assert(last_cond == condition);
-
- if (condition->nr_pending_waiters != 0) {
- sleepq_signal(sleepq);
- }
+ sleepq_wakeup(sleepq);
}
- condition_inc_nr_sleeping_waiters(condition);
- sleepq_wait(sleepq, "cond");
- condition_dec_nr_pending_waiters(condition);
+ if (timed) {
+ error = sleepq_timedwait(sleepq, "cond", ticks);
+ } else {
+ sleepq_wait(sleepq, "cond");
+ error = 0;
+ }
- if (condition->nr_pending_waiters != 0) {
+ if (!error) {
thread_set_last_cond(condition);
}
sleepq_return(sleepq, flags);
mutex_lock(mutex);
+
+ return error;
+}
+
+void
+condition_wait(struct condition *condition, struct mutex *mutex)
+{
+ int error;
+
+ error = condition_wait_common(condition, mutex, false, 0);
+ assert(!error);
+}
+
+int
+condition_timedwait(struct condition *condition,
+ struct mutex *mutex, uint64_t ticks)
+{
+ return condition_wait_common(condition, mutex, true, ticks);
}
void
@@ -132,16 +113,8 @@ condition_signal(struct condition *condition)
return;
}
- if (condition->nr_sleeping_waiters == 0) {
- goto out;
- }
-
sleepq_signal(sleepq);
- condition_dec_nr_sleeping_waiters(condition);
- condition_inc_nr_pending_waiters(condition);
-
-out:
sleepq_release(sleepq, flags);
}
@@ -157,15 +130,8 @@ condition_broadcast(struct condition *condition)
return;
}
- if (condition->nr_sleeping_waiters == 0) {
- goto out;
- }
-
- sleepq_signal(sleepq);
-
- condition_move_waiters(condition);
+ sleepq_broadcast(sleepq);
-out:
sleepq_release(sleepq, flags);
}
@@ -181,17 +147,7 @@ condition_wakeup(struct condition *condition)
return;
}
- if (condition->nr_pending_waiters == 0) {
- goto out;
- }
-
- /*
- * Rely on the FIFO ordering of sleep queues so that signalling multiple
- * times always wakes up the same thread, as long as that thread didn't
- * reacquire the sleep queue.
- */
- sleepq_signal(sleepq);
+ sleepq_wakeup(sleepq);
-out:
sleepq_release(sleepq, flags);
}
diff --git a/kern/condition.h b/kern/condition.h
index 0ce8d94f..90a59f0d 100644
--- a/kern/condition.h
+++ b/kern/condition.h
@@ -27,6 +27,8 @@
#ifndef _KERN_CONDITION_H
#define _KERN_CONDITION_H
+#include <stdint.h>
+
#include <kern/condition_types.h>
#include <kern/mutex_types.h>
@@ -35,20 +37,21 @@ struct condition;
/*
* Initialize a condition variable.
*/
-static inline void
-condition_init(struct condition *condition)
-{
- condition->nr_sleeping_waiters = 0;
- condition->nr_pending_waiters = 0;
-}
+#define condition_init(c) ((void)(c))
/*
- * Wait for a wake-up on the given condition variable.
+ * Wait for a signal on the given condition variable.
*
* The associated mutex must be locked when calling this function.
* It is unlocked before waiting and relocked before returning.
+ *
+ * When bounding the duration of the wait, the caller must pass an absolute
+ * time in ticks, and ERROR_TIMEDOUT is returned if that time is reached
+ * before the sleep queue is signalled.
*/
void condition_wait(struct condition *condition, struct mutex *mutex);
+int condition_timedwait(struct condition *condition,
+ struct mutex *mutex, uint64_t ticks);
/*
* Wake up one (signal) or all (broadcast) threads waiting on a
diff --git a/kern/condition_types.h b/kern/condition_types.h
index 13a29205..abd42f21 100644
--- a/kern/condition_types.h
+++ b/kern/condition_types.h
@@ -22,8 +22,7 @@
#define _KERN_CONDITION_TYPES_H
struct condition {
- unsigned short nr_sleeping_waiters;
- unsigned short nr_pending_waiters;
+ unsigned int _unused;
};
#endif /* _KERN_CONDITION_TYPES_H */
diff --git a/kern/mutex.h b/kern/mutex.h
index f192a70a..8cb7aaca 100644
--- a/kern/mutex.h
+++ b/kern/mutex.h
@@ -27,6 +27,8 @@
#error "only one of X15_MUTEX_PI and X15_MUTEX_ADAPTIVE may be defined"
#endif
+#include <stdint.h>
+
#if defined(X15_MUTEX_PI)
#include <kern/mutex/mutex_pi_i.h>
#elif defined(X15_MUTEX_ADAPTIVE)
@@ -77,6 +79,22 @@ mutex_lock(struct mutex *mutex)
}
/*
+ * Lock a mutex, with a time boundary.
+ *
+ * The time boundary is an absolute time in ticks.
+ *
+ * If successful, the mutex is locked, otherwise an error is returned.
+ * A mutex can only be locked once.
+ *
+ * This function may sleep.
+ */
+static inline int
+mutex_timedlock(struct mutex *mutex, uint64_t ticks)
+{
+ return mutex_impl_timedlock(mutex, ticks);
+}
+
+/*
* Unlock a mutex.
*
* The mutex must be locked, and must have been locked by the calling
diff --git a/kern/mutex/mutex_adaptive.c b/kern/mutex/mutex_adaptive.c
index ffc47169..34fdd221 100644
--- a/kern/mutex/mutex_adaptive.c
+++ b/kern/mutex/mutex_adaptive.c
@@ -15,11 +15,14 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <kern/atomic.h>
+#include <kern/clock.h>
+#include <kern/error.h>
#include <kern/mutex.h>
#include <kern/mutex_types.h>
#include <kern/sleepq.h>
@@ -47,20 +50,23 @@ mutex_adaptive_is_owner(struct mutex *mutex, uintptr_t owner)
return mutex_adaptive_get_thread(prev) == mutex_adaptive_get_thread(owner);
}
-void
-mutex_adaptive_lock_slow(struct mutex *mutex)
+static int
+mutex_adaptive_lock_slow_common(struct mutex *mutex, bool timed, uint64_t ticks)
{
uintptr_t self, owner;
struct sleepq *sleepq;
+ struct thread *thread;
unsigned long flags;
+ int error;
+ error = 0;
self = (uintptr_t)thread_self();
sleepq = sleepq_lend(mutex, false, &flags);
mutex_adaptive_set_contended(mutex);
- for (;;) {
+ do {
owner = atomic_cas_acquire(&mutex->owner, MUTEX_ADAPTIVE_CONTENDED,
self | MUTEX_ADAPTIVE_CONTENDED);
assert(owner & MUTEX_ADAPTIVE_CONTENDED);
@@ -75,40 +81,131 @@ mutex_adaptive_lock_slow(struct mutex *mutex)
*/
while (mutex_adaptive_is_owner(mutex, owner)) {
if (thread_is_running(mutex_adaptive_get_thread(owner))) {
+ if (timed && clock_time_occurred(ticks, clock_get_time())) {
+ error = ERROR_TIMEDOUT;
+ break;
+ }
+
cpu_pause();
} else {
- sleepq_wait(sleepq, "mutex");
+ if (!timed) {
+ sleepq_wait(sleepq, "mutex");
+ } else {
+ error = sleepq_timedwait(sleepq, "mutex", ticks);
+
+ if (error) {
+ break;
+ }
+ }
}
}
- }
+ } while (!error);
/*
- * A potentially spinning thread wouldn't be accounted in the sleep queue,
- * but the only potentially spinning thread is the new owner.
+ * Attempt to clear the contended bit.
+ *
+ * In case of success, the current thread becomes the new owner, and
+ * simply checking if the sleep queue is empty is enough.
+ *
+ * Keep in mind accesses to the mutex word aren't synchronized by
+ * the sleep queue, i.e. an unlock may occur completely concurrently
+ * while attempting to clear the contended bit .
*/
+
+ if (error) {
+ if (sleepq_empty(sleepq)) {
+ owner = atomic_load(&mutex->owner, ATOMIC_RELAXED);
+ assert(owner & MUTEX_ADAPTIVE_CONTENDED);
+ thread = mutex_adaptive_get_thread(owner);
+
+ /* If there is an owner, try to clear the contended bit */
+ if (thread != NULL) {
+ owner = atomic_cas(&mutex->owner, owner,
+ (uintptr_t)thread, ATOMIC_RELAXED);
+ assert(owner & MUTEX_ADAPTIVE_CONTENDED);
+ thread = mutex_adaptive_get_thread(owner);
+ }
+
+ /*
+ * If there is no owner, the previous owner is currently unlocking
+ * the mutex, waiting for either a successful signal, or the
+ * value of the mutex to become different from the contended bit.
+ */
+ if (thread == NULL) {
+ owner = atomic_cas(&mutex->owner, owner, 0, ATOMIC_RELAXED);
+ assert(owner == MUTEX_ADAPTIVE_CONTENDED);
+ }
+ }
+
+ goto out;
+ }
+
if (sleepq_empty(sleepq)) {
atomic_store(&mutex->owner, self, ATOMIC_RELAXED);
}
+out:
sleepq_return(sleepq, flags);
+
+ return error;
+}
+
+void
+mutex_adaptive_lock_slow(struct mutex *mutex)
+{
+ int error;
+
+ error = mutex_adaptive_lock_slow_common(mutex, false, 0);
+ assert(!error);
+}
+
+int
+mutex_adaptive_timedlock_slow(struct mutex *mutex, uint64_t ticks)
+{
+ return mutex_adaptive_lock_slow_common(mutex, true, ticks);
}
void
mutex_adaptive_unlock_slow(struct mutex *mutex)
{
- uintptr_t owner;
+ uintptr_t self, owner;
struct sleepq *sleepq;
unsigned long flags;
+ int error;
- atomic_store(&mutex->owner, MUTEX_ADAPTIVE_CONTENDED, ATOMIC_RELEASE);
+ self = (uintptr_t)thread_self() | MUTEX_ADAPTIVE_CONTENDED;
+
+ for (;;) {
+ owner = atomic_cas_release(&mutex->owner, self,
+ MUTEX_ADAPTIVE_CONTENDED);
+
+ if (owner == self) {
+ break;
+ } else {
+ /*
+ * The contended bit was cleared after the fast path failed,
+ * but before the slow path (re)started.
+ */
+ assert(owner == (uintptr_t)thread_self());
+ error = mutex_adaptive_unlock_fast(mutex);
+
+ if (error) {
+ continue;
+ }
+
+ return;
+ }
+ }
for (;;) {
owner = atomic_load(&mutex->owner, ATOMIC_RELAXED);
/*
- * This only happens if another thread was able to become the new
- * owner, in which case that thread isn't spinning on the current
- * thread, i.e. there is no need for an additional reference.
+ * This only happens if :
+ * 1/ Another thread was able to become the new owner, in which
+ * case that thread isn't spinning on the current thread, i.e.
+ * there is no need for an additional reference.
+ * 2/ A timeout cleared the contended bit.
*/
if (owner != MUTEX_ADAPTIVE_CONTENDED) {
break;
diff --git a/kern/mutex/mutex_adaptive_i.h b/kern/mutex/mutex_adaptive_i.h
index b9952ec6..be822c24 100644
--- a/kern/mutex/mutex_adaptive_i.h
+++ b/kern/mutex/mutex_adaptive_i.h
@@ -78,6 +78,7 @@ mutex_adaptive_unlock_fast(struct mutex *mutex)
}
void mutex_adaptive_lock_slow(struct mutex *mutex);
+int mutex_adaptive_timedlock_slow(struct mutex *mutex, uint64_t ticks);
void mutex_adaptive_unlock_slow(struct mutex *mutex);
/*
@@ -105,6 +106,20 @@ mutex_impl_lock(struct mutex *mutex)
}
}
+static inline int
+mutex_impl_timedlock(struct mutex *mutex, uint64_t ticks)
+{
+ int error;
+
+ error = mutex_adaptive_lock_fast(mutex);
+
+ if (unlikely(error)) {
+ error = mutex_adaptive_timedlock_slow(mutex, ticks);
+ }
+
+ return error;
+}
+
static inline void
mutex_impl_unlock(struct mutex *mutex)
{
diff --git a/kern/mutex/mutex_pi_i.h b/kern/mutex/mutex_pi_i.h
index 6c39db74..616f09b7 100644
--- a/kern/mutex/mutex_pi_i.h
+++ b/kern/mutex/mutex_pi_i.h
@@ -23,6 +23,8 @@
" use <kern/mutex.h> instead"
#endif
+#include <stdint.h>
+
#include <kern/mutex_types.h>
#include <kern/rtmutex.h>
@@ -51,6 +53,12 @@ mutex_impl_lock(struct mutex *mutex)
rtmutex_lock(&mutex->rtmutex);
}
+static inline int
+mutex_impl_timedlock(struct mutex *mutex, uint64_t ticks)
+{
+ return rtmutex_timedlock(&mutex->rtmutex, ticks);
+}
+
static inline void
mutex_impl_unlock(struct mutex *mutex)
{
diff --git a/kern/mutex/mutex_plain.c b/kern/mutex/mutex_plain.c
index 5e4ba537..58fc4878 100644
--- a/kern/mutex/mutex_plain.c
+++ b/kern/mutex/mutex_plain.c
@@ -15,20 +15,25 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
+#include <stdint.h>
#include <kern/atomic.h>
#include <kern/mutex.h>
#include <kern/mutex_types.h>
#include <kern/sleepq.h>
-void
-mutex_plain_lock_slow(struct mutex *mutex)
+static int
+mutex_plain_lock_slow_common(struct mutex *mutex, bool timed, uint64_t ticks)
{
unsigned int state;
struct sleepq *sleepq;
unsigned long flags;
+ int error;
+
+ error = 0;
sleepq = sleepq_lend(mutex, false, &flags);
@@ -39,14 +44,49 @@ mutex_plain_lock_slow(struct mutex *mutex)
break;
}
- sleepq_wait(sleepq, "mutex");
+ if (!timed) {
+ sleepq_wait(sleepq, "mutex");
+ } else {
+ error = sleepq_timedwait(sleepq, "mutex", ticks);
+
+ if (error) {
+ break;
+ }
+ }
+ }
+
+ if (error) {
+ if (sleepq_empty(sleepq)) {
+ atomic_cas(&mutex->state, MUTEX_CONTENDED,
+ MUTEX_LOCKED, ATOMIC_RELAXED);
+ }
+
+ goto out;
}
if (sleepq_empty(sleepq)) {
atomic_store(&mutex->state, MUTEX_LOCKED, ATOMIC_RELAXED);
}
+out:
sleepq_return(sleepq, flags);
+
+ return error;
+}
+
+void
+mutex_plain_lock_slow(struct mutex *mutex)
+{
+ int error;
+
+ error = mutex_plain_lock_slow_common(mutex, false, 0);
+ assert(!error);
+}
+
+int
+mutex_plain_timedlock_slow(struct mutex *mutex, uint64_t ticks)
+{
+ return mutex_plain_lock_slow_common(mutex, true, ticks);
}
void
@@ -57,8 +97,11 @@ mutex_plain_unlock_slow(struct mutex *mutex)
sleepq = sleepq_acquire(mutex, false, &flags);
- if (sleepq != NULL) {
- sleepq_signal(sleepq);
- sleepq_release(sleepq, flags);
+ if (sleepq == NULL) {
+ return;
}
+
+ sleepq_signal(sleepq);
+
+ sleepq_release(sleepq, flags);
}
diff --git a/kern/mutex/mutex_plain_i.h b/kern/mutex/mutex_plain_i.h
index 4f112b89..58e565ed 100644
--- a/kern/mutex/mutex_plain_i.h
+++ b/kern/mutex/mutex_plain_i.h
@@ -24,6 +24,7 @@
#endif
#include <assert.h>
+#include <stdint.h>
#include <kern/atomic.h>
#include <kern/error.h>
@@ -71,6 +72,7 @@ mutex_plain_unlock_fast(struct mutex *mutex)
}
void mutex_plain_lock_slow(struct mutex *mutex);
+int mutex_plain_timedlock_slow(struct mutex *mutex, uint64_t ticks);
void mutex_plain_unlock_slow(struct mutex *mutex);
/*
@@ -98,6 +100,20 @@ mutex_impl_lock(struct mutex *mutex)
}
}
+static inline int
+mutex_impl_timedlock(struct mutex *mutex, uint64_t ticks)
+{
+ int error;
+
+ error = mutex_plain_lock_fast(mutex);
+
+ if (unlikely(error)) {
+ error = mutex_plain_timedlock_slow(mutex, ticks);
+ }
+
+ return error;
+}
+
static inline void
mutex_impl_unlock(struct mutex *mutex)
{
diff --git a/kern/rbtree.h b/kern/rbtree.h
index 4ae8353f..3de240b6 100644
--- a/kern/rbtree.h
+++ b/kern/rbtree.h
@@ -265,6 +265,8 @@ rbtree_insert_slot(struct rbtree *tree, rbtree_slot_t slot,
* Remove a node from a tree.
*
* After completion, the node is stale.
+ *
+ * TODO rbtree_replace.
*/
void rbtree_remove(struct rbtree *tree, struct rbtree_node *node);
diff --git a/kern/rtmutex.c b/kern/rtmutex.c
index db239206..0070b93f 100644
--- a/kern/rtmutex.c
+++ b/kern/rtmutex.c
@@ -16,6 +16,7 @@
*/
#include <assert.h>
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
@@ -26,20 +27,28 @@
#include <kern/thread.h>
#include <kern/turnstile.h>
+static struct thread *
+rtmutex_get_thread(uintptr_t owner)
+{
+ return (struct thread *)(owner & RTMUTEX_OWNER_MASK);
+}
+
static void
rtmutex_set_contended(struct rtmutex *rtmutex)
{
atomic_or(&rtmutex->owner, RTMUTEX_CONTENDED, ATOMIC_RELEASE);
}
-void
-rtmutex_lock_slow(struct rtmutex *rtmutex)
+static int
+rtmutex_lock_slow_common(struct rtmutex *rtmutex, bool timed, uint64_t ticks)
{
struct turnstile *turnstile;
uintptr_t self, owner;
struct thread *thread;
uintptr_t bits;
+ int error;
+ error = 0;
self = (uintptr_t)thread_self();
turnstile = turnstile_lend(rtmutex);
@@ -56,11 +65,40 @@ rtmutex_lock_slow(struct rtmutex *rtmutex)
break;
}
- thread = (struct thread *)(owner & RTMUTEX_OWNER_MASK);
- turnstile_wait(turnstile, "rtmutex", thread);
+ thread = rtmutex_get_thread(owner);
+
+ if (!timed) {
+ turnstile_wait(turnstile, "rtmutex", thread);
+ } else {
+ error = turnstile_timedwait(turnstile, "rtmutex", thread, ticks);
+
+ if (error) {
+ break;
+ }
+ }
+
bits |= RTMUTEX_FORCE_WAIT;
}
+ if (error) {
+ /*
+ * Keep in mind more than one thread may have timed out on waiting.
+ * These threads aren't considered waiters, making the turnstile
+ * empty. The first to reacquire the turnstile clears the contention
+ * bits, allowing the owner to unlock through the fast path.
+ */
+ if (turnstile_empty(turnstile)) {
+ owner = atomic_load(&rtmutex->owner, ATOMIC_RELAXED);
+
+ if (owner & RTMUTEX_CONTENDED) {
+ owner &= RTMUTEX_OWNER_MASK;
+ atomic_store(&rtmutex->owner, owner, ATOMIC_RELAXED);
+ }
+ }
+
+ goto out;
+ }
+
turnstile_own(turnstile);
if (turnstile_empty(turnstile)) {
@@ -68,6 +106,7 @@ rtmutex_lock_slow(struct rtmutex *rtmutex)
assert(owner == (self | bits));
}
+out:
turnstile_return(turnstile);
/*
@@ -76,27 +115,54 @@ rtmutex_lock_slow(struct rtmutex *rtmutex)
* introducing unbounded priority inversion.
* Instead, let new waiters do it, using their own priority.
*/
+
+ return error;
+}
+
+void
+rtmutex_lock_slow(struct rtmutex *rtmutex)
+{
+ int error;
+
+ error = rtmutex_lock_slow_common(rtmutex, false, 0);
+ assert(!error);
+}
+
+int
+rtmutex_timedlock_slow(struct rtmutex *rtmutex, uint64_t ticks)
+{
+ return rtmutex_lock_slow_common(rtmutex, true, ticks);
}
void
rtmutex_unlock_slow(struct rtmutex *rtmutex)
{
struct turnstile *turnstile;
- uintptr_t self, owner;
+ uintptr_t owner;
- self = (uintptr_t)thread_self();
+ for (;;) {
+ turnstile = turnstile_acquire(rtmutex);
+
+ if (turnstile != NULL) {
+ break;
+ }
+
+ owner = rtmutex_unlock_fast(rtmutex);
- turnstile = turnstile_acquire(rtmutex);
- assert(turnstile != NULL);
+ if (!(owner & RTMUTEX_CONTENDED)) {
+ return;
+ }
+ }
owner = atomic_swap_release(&rtmutex->owner,
RTMUTEX_FORCE_WAIT | RTMUTEX_CONTENDED);
- assert((owner & RTMUTEX_OWNER_MASK) == self);
+ assert(rtmutex_get_thread(owner) == thread_self());
turnstile_disown(turnstile);
turnstile_signal(turnstile);
turnstile_release(turnstile);
+ /* TODO Make private, use thread_set_priority_propagation_needed instead */
thread_propagate_priority();
}
diff --git a/kern/rtmutex.h b/kern/rtmutex.h
index ec79afa9..87cd15ad 100644
--- a/kern/rtmutex.h
+++ b/kern/rtmutex.h
@@ -87,6 +87,20 @@ rtmutex_lock(struct rtmutex *rtmutex)
}
}
+static inline int
+rtmutex_timedlock(struct rtmutex *rtmutex, uint64_t ticks)
+{
+ uintptr_t prev_owner;
+
+ prev_owner = rtmutex_lock_fast(rtmutex);
+
+ if (unlikely(prev_owner != 0)) {
+ return rtmutex_timedlock_slow(rtmutex, ticks);
+ }
+
+ return 0;
+}
+
/*
* Unlock a real-time mutex.
*
diff --git a/kern/rtmutex_i.h b/kern/rtmutex_i.h
index 984cfd16..75ac5e4a 100644
--- a/kern/rtmutex_i.h
+++ b/kern/rtmutex_i.h
@@ -74,6 +74,8 @@ rtmutex_unlock_fast(struct rtmutex *rtmutex)
void rtmutex_lock_slow(struct rtmutex *rtmutex);
+int rtmutex_timedlock_slow(struct rtmutex *rtmutex, uint64_t ticks);
+
void rtmutex_unlock_slow(struct rtmutex *rtmutex);
#endif /* _KERN_RTMUTEX_I_H */
diff --git a/kern/semaphore.c b/kern/semaphore.c
index 7e94dafd..72e843a9 100644
--- a/kern/semaphore.c
+++ b/kern/semaphore.c
@@ -15,19 +15,25 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
+#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
+#include <stdint.h>
#include <kern/semaphore.h>
#include <kern/semaphore_i.h>
#include <kern/sleepq.h>
-void
-semaphore_wait_slow(struct semaphore *semaphore)
+static int
+semaphore_wait_slow_common(struct semaphore *semaphore,
+ bool timed, uint64_t ticks)
{
struct sleepq *sleepq;
unsigned long flags;
unsigned int prev;
+ int error;
+
+ error = 0;
sleepq = sleepq_lend(semaphore, false, &flags);
@@ -38,10 +44,35 @@ semaphore_wait_slow(struct semaphore *semaphore)
break;
}
- sleepq_wait(sleepq, "sem");
+ if (!timed) {
+ sleepq_wait(sleepq, "sem");
+ } else {
+ error = sleepq_timedwait(sleepq, "sem", ticks);
+
+ if (error) {
+ break;
+ }
+ }
}
sleepq_return(sleepq, flags);
+
+ return error;
+}
+
+void
+semaphore_wait_slow(struct semaphore *semaphore)
+{
+ int error;
+
+ error = semaphore_wait_slow_common(semaphore, false, 0);
+ assert(!error);
+}
+
+int
+semaphore_timedwait_slow(struct semaphore *semaphore, uint64_t ticks)
+{
+ return semaphore_wait_slow_common(semaphore, true, ticks);
}
void
diff --git a/kern/semaphore.h b/kern/semaphore.h
index e08927ec..e1acbf21 100644
--- a/kern/semaphore.h
+++ b/kern/semaphore.h
@@ -33,6 +33,7 @@
#define _KERN_SEMAPHORE_H
#include <assert.h>
+#include <stdint.h>
#include <kern/atomic.h>
#include <kern/error.h>
@@ -93,6 +94,20 @@ semaphore_wait(struct semaphore *semaphore)
}
}
+static inline int
+semaphore_timedwait(struct semaphore *semaphore, uint64_t ticks)
+{
+ unsigned int prev;
+
+ prev = semaphore_dec(semaphore);
+
+ if (unlikely(prev == 0)) {
+ return semaphore_timedwait_slow(semaphore, ticks);
+ }
+
+ return 0;
+}
+
/*
* Unlock a semaphore.
*
diff --git a/kern/semaphore_i.h b/kern/semaphore_i.h
index acd7cd48..6e79b137 100644
--- a/kern/semaphore_i.h
+++ b/kern/semaphore_i.h
@@ -19,6 +19,7 @@
#define _KERN_SEMAPHORE_I_H
#include <assert.h>
+#include <stdint.h>
#include <kern/atomic.h>
@@ -56,6 +57,8 @@ semaphore_inc(struct semaphore *semaphore)
void semaphore_wait_slow(struct semaphore *semaphore);
+int semaphore_timedwait_slow(struct semaphore *semaphore, uint64_t ticks);
+
void semaphore_post_slow(struct semaphore *semaphore);
#endif /* _KERN_SEMAPHORE_I_H */
diff --git a/kern/sleepq.c b/kern/sleepq.c
index 99282d97..170b1a9f 100644
--- a/kern/sleepq.c
+++ b/kern/sleepq.c
@@ -38,19 +38,28 @@ struct sleepq_bucket {
struct list list;
};
+struct sleepq_waiter {
+ struct list node;
+ struct thread *thread;
+ bool pending_wakeup;
+};
+
+/*
+ * Waiters are queued in FIFO order and inserted at the head of the
+ * list of waiters. The pointer to the "oldest" waiter is used as
+ * a marker between threads waiting for a signal/broadcast (from the
+ * beginning up to and including the oldest waiter) and threads pending
+ * for wake-up (all the following threads up to the end of the list).
+ */
struct sleepq {
- struct sleepq_bucket *bucket;
+ alignas(CPU_L1_SIZE) struct sleepq_bucket *bucket;
struct list node;
const void *sync_obj;
struct list waiters;
+ struct sleepq_waiter *oldest_waiter;
struct sleepq *next_free;
};
-struct sleepq_waiter {
- struct list node;
- struct thread *thread;
-};
-
#define SLEEPQ_HTABLE_SIZE 128
#define SLEEPQ_COND_HTABLE_SIZE 64
@@ -76,11 +85,28 @@ static void
sleepq_waiter_init(struct sleepq_waiter *waiter, struct thread *thread)
{
waiter->thread = thread;
+ waiter->pending_wakeup = false;
+}
+
+static bool
+sleepq_waiter_pending_wakeup(const struct sleepq_waiter *waiter)
+{
+ return waiter->pending_wakeup;
+}
+
+static void
+sleepq_waiter_set_pending_wakeup(struct sleepq_waiter *waiter)
+{
+ waiter->pending_wakeup = true;
}
static void
sleepq_waiter_wakeup(struct sleepq_waiter *waiter)
{
+ if (!sleepq_waiter_pending_wakeup(waiter)) {
+ return;
+ }
+
thread_wakeup(waiter->thread);
}
@@ -90,6 +116,7 @@ sleepq_assert_init_state(const struct sleepq *sleepq)
assert(sleepq->bucket == NULL);
assert(sleepq->sync_obj == NULL);
assert(list_empty(&sleepq->waiters));
+ assert(sleepq->oldest_waiter == NULL);
assert(sleepq->next_free == NULL);
}
@@ -190,6 +217,7 @@ sleepq_ctor(void *ptr)
sleepq->bucket = NULL;
sleepq->sync_obj = NULL;
list_init(&sleepq->waiters);
+ sleepq->oldest_waiter = NULL;
sleepq->next_free = NULL;
}
@@ -367,15 +395,38 @@ sleepq_return(struct sleepq *sleepq, unsigned long flags)
}
static void
+sleepq_shift_oldest_waiter(struct sleepq *sleepq)
+{
+ struct list *node;
+
+ assert(sleepq->oldest_waiter != NULL);
+
+ node = list_prev(&sleepq->oldest_waiter->node);
+
+ if (list_end(&sleepq->waiters, node)) {
+ sleepq->oldest_waiter = NULL;
+ } else {
+ sleepq->oldest_waiter = list_entry(node, struct sleepq_waiter, node);
+ }
+}
+
+static void
sleepq_add_waiter(struct sleepq *sleepq, struct sleepq_waiter *waiter)
{
list_insert_head(&sleepq->waiters, &waiter->node);
+
+ if (sleepq->oldest_waiter == NULL) {
+ sleepq->oldest_waiter = waiter;
+ }
}
static void
sleepq_remove_waiter(struct sleepq *sleepq, struct sleepq_waiter *waiter)
{
- (void)sleepq;
+ if (sleepq->oldest_waiter == waiter) {
+ sleepq_shift_oldest_waiter(sleepq);
+ }
+
list_remove(&waiter->node);
}
@@ -385,19 +436,48 @@ sleepq_empty(const struct sleepq *sleepq)
return list_empty(&sleepq->waiters);
}
-void
-sleepq_wait(struct sleepq *sleepq, const char *wchan)
+static int
+sleepq_wait_common(struct sleepq *sleepq, const char *wchan,
+ bool timed, uint64_t ticks)
{
struct sleepq_waiter waiter;
struct thread *thread;
+ int error;
thread = thread_self();
sleepq_waiter_init(&waiter, thread);
sleepq_add_waiter(sleepq, &waiter);
- thread_sleep(&sleepq->bucket->lock, sleepq->sync_obj, wchan);
+ if (!timed) {
+ thread_sleep(&sleepq->bucket->lock, sleepq->sync_obj, wchan);
+ error = 0;
+ } else {
+ error = thread_timedsleep(&sleepq->bucket->lock, sleepq->sync_obj,
+ wchan, ticks);
+
+ if (error && sleepq_waiter_pending_wakeup(&waiter)) {
+ error = 0;
+ }
+ }
sleepq_remove_waiter(sleepq, &waiter);
+
+ return error;
+}
+
+void
+sleepq_wait(struct sleepq *sleepq, const char *wchan)
+{
+ int error;
+
+ error = sleepq_wait_common(sleepq, wchan, false, 0);
+ assert(!error);
+}
+
+int
+sleepq_timedwait(struct sleepq *sleepq, const char *wchan, uint64_t ticks)
+{
+ return sleepq_wait_common(sleepq, wchan, true, ticks);
}
void
@@ -405,10 +485,55 @@ sleepq_signal(struct sleepq *sleepq)
{
struct sleepq_waiter *waiter;
- if (sleepq_empty(sleepq)) {
+ if (list_empty(&sleepq->waiters)) {
return;
}
waiter = list_last_entry(&sleepq->waiters, struct sleepq_waiter, node);
+ sleepq_waiter_set_pending_wakeup(waiter);
+ sleepq_waiter_wakeup(waiter);
+}
+
+static void
+sleepq_wakeup_common(struct sleepq *sleepq)
+{
+ struct sleepq_waiter *waiter;
+
+ assert(!list_empty(&sleepq->waiters));
+
+ waiter = list_last_entry(&sleepq->waiters, struct sleepq_waiter, node);
sleepq_waiter_wakeup(waiter);
}
+
+void
+sleepq_broadcast(struct sleepq *sleepq)
+{
+ struct sleepq_waiter *waiter;
+
+ if (sleepq->oldest_waiter == NULL) {
+ goto out;
+ }
+
+ list_for_each_entry(&sleepq->waiters, waiter, node) {
+ sleepq_waiter_set_pending_wakeup(waiter);
+
+ if (waiter == sleepq->oldest_waiter) {
+ break;
+ }
+ }
+
+ sleepq->oldest_waiter = NULL;
+
+out:
+ sleepq_wakeup_common(sleepq);
+}
+
+void
+sleepq_wakeup(struct sleepq *sleepq)
+{
+ if (list_empty(&sleepq->waiters)) {
+ return;
+ }
+
+ sleepq_wakeup_common(sleepq);
+}
diff --git a/kern/sleepq.h b/kern/sleepq.h
index 3de765a3..827d0436 100644
--- a/kern/sleepq.h
+++ b/kern/sleepq.h
@@ -26,17 +26,13 @@
* the associated mutex, at which point two sleep queues are locked.
* Handling condition variable sleep queues slightly differently
* allows preventing deadlocks while keeping overall complexity low.
- *
- * In addition, despite being used to implement condition variables,
- * this implementation doesn't provide a broadcast call. The rationale
- * is to force users to implement "chained waking" in order to avoid
- * the thundering herd effect.
*/
#ifndef _KERN_SLEEPQ_H
#define _KERN_SLEEPQ_H
#include <stdbool.h>
+#include <stdint.h>
#include <kern/init.h>
@@ -98,7 +94,7 @@ void sleepq_return(struct sleepq *sleepq, unsigned long flags);
bool sleepq_empty(const struct sleepq *sleepq);
/*
- * Wait for a wake up on the given sleep queue.
+ * Wait for a wake-up on the given sleep queue.
*
* The sleep queue must be lent when calling this function. It is
* released and later reacquired before returning from this function.
@@ -110,8 +106,13 @@ bool sleepq_empty(const struct sleepq *sleepq);
* the queue, the queue is not immediately considered empty.
*
* Threads are queued in FIFO order.
+ *
+ * When bounding the duration of the wait, the caller must pass an absolute
+ * time in ticks, and ERROR_TIMEDOUT is returned if that time is reached
+ * before the sleep queue is signalled.
*/
void sleepq_wait(struct sleepq *sleepq, const char *wchan);
+int sleepq_timedwait(struct sleepq *sleepq, const char *wchan, uint64_t ticks);
/*
* Wake up a thread waiting on the given sleep queue, if any.
@@ -125,10 +126,26 @@ void sleepq_wait(struct sleepq *sleepq, const char *wchan);
*
* Threads are queued in FIFO order, which means signalling a sleep
* queue multiple times always awakens the same thread, regardless
- * of new waiters, as long as that first thread didn't reacquire the
+ * of new waiters, as long as that thread didn't reacquire the
* sleep queue.
+ *
+ * A broadcast differs only by also making all currently waiting threads
+ * pending for wake-up. As with sleepq_signal, a single thread may be
+ * awaken. The rationale is to force users to implement "chained waking"
+ * in order to avoid the thundering herd effect.
*/
void sleepq_signal(struct sleepq *sleepq);
+void sleepq_broadcast(struct sleepq *sleepq);
+
+/*
+ * Wake up a pending thread.
+ *
+ * This function may only wake up a thread pending for wake-up after a
+ * broadcast. It is used to chain wake-ups to avoid the thundering herd
+ * effect. If there are no threads pending for wake-up, this function
+ * does nothing.
+ */
+void sleepq_wakeup(struct sleepq *sleepq);
/*
* This init operation provides :
diff --git a/kern/thread.c b/kern/thread.c
index fba95e06..e4d8f7cf 100644
--- a/kern/thread.c
+++ b/kern/thread.c
@@ -91,6 +91,7 @@
#include <string.h>
#include <kern/atomic.h>
+#include <kern/clock.h>
#include <kern/condition.h>
#include <kern/cpumap.h>
#include <kern/error.h>
@@ -108,6 +109,7 @@
#include <kern/syscnt.h>
#include <kern/task.h>
#include <kern/thread.h>
+#include <kern/timer.h>
#include <kern/turnstile.h>
#include <kern/work.h>
#include <machine/cpu.h>
@@ -160,7 +162,7 @@
/*
* Default time slice for real-time round-robin scheduling.
*/
-#define THREAD_DEFAULT_RR_TIME_SLICE (THREAD_TICK_FREQ / 10)
+#define THREAD_DEFAULT_RR_TIME_SLICE (CLOCK_FREQ / 10)
/*
* Maximum number of threads which can be pulled from a remote run queue
@@ -171,7 +173,7 @@
/*
* Delay (in ticks) between two balance attempts when a run queue is idle.
*/
-#define THREAD_IDLE_BALANCE_TICKS (THREAD_TICK_FREQ / 2)
+#define THREAD_IDLE_BALANCE_TICKS (CLOCK_FREQ / 2)
/*
* Run queue properties for real-time threads.
@@ -191,7 +193,7 @@ struct thread_rt_runq {
/*
* Round slice base unit for fair-scheduling threads.
*/
-#define THREAD_FS_ROUND_SLICE_BASE (THREAD_TICK_FREQ / 10)
+#define THREAD_FS_ROUND_SLICE_BASE (CLOCK_FREQ / 10)
/*
* Group of threads sharing the same weight.
@@ -257,7 +259,6 @@ struct thread_runq {
unsigned int idle_balance_ticks;
struct syscnt sc_schedule_intrs;
- struct syscnt sc_tick_intrs;
struct syscnt sc_boosts;
};
@@ -451,8 +452,6 @@ thread_runq_init(struct thread_runq *runq, unsigned int cpu,
runq->idle_balance_ticks = (unsigned int)-1;
snprintf(name, sizeof(name), "thread_schedule_intrs/%u", cpu);
syscnt_register(&runq->sc_schedule_intrs, name);
- snprintf(name, sizeof(name), "thread_tick_intrs/%u", cpu);
- syscnt_register(&runq->sc_tick_intrs, name);
snprintf(name, sizeof(name), "thread_boosts/%u", cpu);
syscnt_register(&runq->sc_boosts, name);
}
@@ -1862,7 +1861,7 @@ thread_lock_runq(struct thread *thread, unsigned long *flags)
struct thread_runq *runq;
for (;;) {
- runq = thread->runq;
+ runq = thread->runq; /* TODO Atomic access */
spinlock_lock_intr_save(&runq->lock, flags);
@@ -2421,17 +2420,94 @@ thread_join(struct thread *thread)
thread_join_common(thread);
}
-void
-thread_sleep(struct spinlock *interlock, const void *wchan_addr,
- const char *wchan_desc)
+static int
+thread_wakeup_common(struct thread *thread, int error)
{
struct thread_runq *runq;
+ unsigned long flags;
+
+ if ((thread == NULL) || (thread == thread_self())) {
+ return ERROR_INVAL;
+ }
+
+ /*
+ * There is at most one reference on threads that were never dispatched,
+ * in which case there is no need to lock anything.
+ */
+ if (thread->runq == NULL) {
+ assert(thread->state != THREAD_RUNNING);
+ thread_clear_wchan(thread);
+ thread->state = THREAD_RUNNING;
+ } else {
+ runq = thread_lock_runq(thread, &flags);
+
+ if (thread->state == THREAD_RUNNING) {
+ thread_unlock_runq(runq, flags);
+ return ERROR_INVAL;
+ }
+
+ thread_clear_wchan(thread);
+ thread->state = THREAD_RUNNING;
+ thread_unlock_runq(runq, flags);
+ }
+
+ thread_preempt_disable();
+ cpu_intr_save(&flags);
+
+ if (!thread->pinned) {
+ runq = thread_get_real_sched_ops(thread)->select_runq(thread);
+ } else {
+ runq = thread->runq;
+ spinlock_lock(&runq->lock);
+ }
+
+ thread->wakeup_error = error;
+ thread_runq_wakeup(runq, thread);
+ spinlock_unlock(&runq->lock);
+ cpu_intr_restore(flags);
+ thread_preempt_enable();
+
+ return 0;
+}
+
+int
+thread_wakeup(struct thread *thread)
+{
+ return thread_wakeup_common(thread, 0);
+}
+
+struct thread_timeout_waiter {
+ struct thread *thread;
+ struct timer timer;
+};
+
+static void
+thread_timeout(struct timer *timer)
+{
+ struct thread_timeout_waiter *waiter;
+
+ waiter = structof(timer, struct thread_timeout_waiter, timer);
+ thread_wakeup_common(waiter->thread, ERROR_TIMEDOUT);
+}
+
+static int
+thread_sleep_common(struct spinlock *interlock, const void *wchan_addr,
+ const char *wchan_desc, bool timed, uint64_t ticks)
+{
+ struct thread_timeout_waiter waiter;
+ struct thread_runq *runq;
struct thread *thread;
unsigned long flags;
thread = thread_self();
assert(thread->preempt == 1);
+ if (timed) {
+ waiter.thread = thread;
+ timer_init(&waiter.timer, thread_timeout, TIMER_INTR);
+ timer_schedule(&waiter.timer, ticks);
+ }
+
runq = thread_runq_local();
spinlock_lock_intr_save(&runq->lock, &flags);
@@ -2448,58 +2524,49 @@ thread_sleep(struct spinlock *interlock, const void *wchan_addr,
spinlock_unlock_intr_restore(&runq->lock, flags);
+ if (timed) {
+ timer_cancel(&waiter.timer);
+ }
+
if (interlock != NULL) {
spinlock_lock(interlock);
thread_preempt_enable_no_resched();
}
assert(thread->preempt == 1);
+
+ return thread->wakeup_error;
}
void
-thread_wakeup(struct thread *thread)
+thread_sleep(struct spinlock *interlock, const void *wchan_addr,
+ const char *wchan_desc)
{
- struct thread_runq *runq;
- unsigned long flags;
-
- if ((thread == NULL) || (thread == thread_self())) {
- return;
- }
-
- /*
- * There is at most one reference on threads that were never dispatched,
- * in which case there is no need to lock anything.
- */
- if (thread->runq == NULL) {
- assert(thread->state != THREAD_RUNNING);
- thread_clear_wchan(thread);
- thread->state = THREAD_RUNNING;
- } else {
- runq = thread_lock_runq(thread, &flags);
+ int error;
- if (thread->state == THREAD_RUNNING) {
- thread_unlock_runq(runq, flags);
- return;
- }
+ error = thread_sleep_common(interlock, wchan_addr, wchan_desc, false, 0);
+ assert(!error);
+}
- thread_clear_wchan(thread);
- thread->state = THREAD_RUNNING;
- thread_unlock_runq(runq, flags);
- }
+int
+thread_timedsleep(struct spinlock *interlock, const void *wchan_addr,
+ const char *wchan_desc, uint64_t ticks)
+{
+ return thread_sleep_common(interlock, wchan_addr, wchan_desc, true, ticks);
+}
+void
+thread_delay(uint64_t ticks, bool absolute)
+{
thread_preempt_disable();
- cpu_intr_save(&flags);
- if (!thread->pinned) {
- runq = thread_get_real_sched_ops(thread)->select_runq(thread);
- } else {
- runq = thread->runq;
- spinlock_lock(&runq->lock);
+ if (!absolute) {
+ /* Add a tick to avoid quantization errors */
+ ticks += clock_get_time() + 1;
}
- thread_runq_wakeup(runq, thread);
- spinlock_unlock(&runq->lock);
- cpu_intr_restore(flags);
+ thread_timedsleep(NULL, thread_self(), "delay", ticks);
+
thread_preempt_enable();
}
@@ -2570,7 +2637,7 @@ thread_schedule_intr(void)
}
void
-thread_tick_intr(void)
+thread_report_periodic_event(void)
{
const struct thread_sched_ops *ops;
struct thread_runq *runq;
@@ -2579,10 +2646,6 @@ thread_tick_intr(void)
thread_assert_interrupted();
runq = thread_runq_local();
- syscnt_inc(&runq->sc_tick_intrs);
- llsync_report_periodic_event();
- sref_report_periodic_event();
- work_report_periodic_event();
thread = thread_self();
spinlock_lock(&runq->lock);
diff --git a/kern/thread.h b/kern/thread.h
index 1c052364..a3f2670d 100644
--- a/kern/thread.h
+++ b/kern/thread.h
@@ -36,6 +36,7 @@
#include <assert.h>
#include <stdbool.h>
#include <stddef.h>
+#include <stdint.h>
#include <stdnoreturn.h>
#include <kern/atomic.h>
@@ -48,14 +49,6 @@
#include <machine/tcb.h>
/*
- * Scheduler tick frequency.
- *
- * The selected value of 200 translates to a period of 5ms, small enough to
- * provide low latency, and is practical as both a dividend and divisor.
- */
-#define THREAD_TICK_FREQ 200
-
-/*
* Thread structure.
*/
struct thread;
@@ -218,18 +211,29 @@ void thread_join(struct thread *thread);
* address should refer to a relevant synchronization object, normally
* containing the interlock, but not necessarily.
*
+ * When bounding the duration of the sleep, the caller must pass an absolute
+ * time in ticks, and ERROR_TIMEDOUT is returned if that time is reached
+ * before the thread is awaken.
+ *
* Implies a memory barrier.
*/
void thread_sleep(struct spinlock *interlock, const void *wchan_addr,
const char *wchan_desc);
+int thread_timedsleep(struct spinlock *interlock, const void *wchan_addr,
+ const char *wchan_desc, uint64_t ticks);
/*
* Schedule a thread for execution on a processor.
*
- * No action is performed if the target thread is NULL, the calling thread,
- * or already in the running state.
+ * If the target thread is NULL, the calling thread, or already in the
+ * running state, no action is performed and ERROR_INVAL is returned.
*/
-void thread_wakeup(struct thread *thread);
+int thread_wakeup(struct thread *thread);
+
+/*
+ * Suspend execution of the calling thread.
+ */
+void thread_delay(uint64_t ticks, bool absolute);
/*
* Start running threads on the local processor.
@@ -253,10 +257,11 @@ void thread_yield(void);
void thread_schedule_intr(void);
/*
- * Report a periodic timer interrupt on the thread currently running on
- * the local processor.
+ * Report a periodic event on the current processor.
+ *
+ * Interrupts and preemption must be disabled when calling this function.
*/
-void thread_tick_intr(void);
+void thread_report_periodic_event(void);
/*
* Set thread scheduling parameters.
@@ -647,6 +652,7 @@ thread_intr_leave(void)
}
}
+/* TODO Use in interrupt handlers instead of manual interrupt/preemption checks */
static inline void
thread_assert_interrupted(void)
{
diff --git a/kern/thread_i.h b/kern/thread_i.h
index cd349771..9ff4b764 100644
--- a/kern/thread_i.h
+++ b/kern/thread_i.h
@@ -107,6 +107,7 @@ struct thread {
bool in_runq; /* (r) */
const void *wchan_addr; /* (r) */
const char *wchan_desc; /* (r) */
+ int wakeup_error; /* (r) */
unsigned short state; /* (r) */
/* Sleep queue available for lending */
diff --git a/kern/timer.c b/kern/timer.c
new file mode 100644
index 00000000..25f59f24
--- /dev/null
+++ b/kern/timer.c
@@ -0,0 +1,536 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation is based on "Hashed and Hierarchical Timing Wheels:
+ * Efficient Data Structures for Implementing a Timer Facility" by George
+ * Varghese and Tony Lauck. Specifically, it implements scheme 6.1.2.
+ */
+
+#include <assert.h>
+#include <stdalign.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <kern/clock.h>
+#include <kern/error.h>
+#include <kern/init.h>
+#include <kern/hlist.h>
+#include <kern/macros.h>
+#include <kern/panic.h>
+#include <kern/percpu.h>
+#include <kern/spinlock.h>
+#include <kern/thread.h>
+#include <kern/timer.h>
+#include <kern/timer_i.h>
+#include <kern/work.h>
+#include <machine/boot.h>
+#include <machine/cpu.h>
+
+/*
+ * Timer states.
+ */
+#define TIMER_TS_READY 1
+#define TIMER_TS_SCHEDULED 2
+#define TIMER_TS_RUNNING 3
+#define TIMER_TS_DONE 4
+
+/*
+ * Timer flags.
+ */
+#define TIMER_TF_DETACHED 0x1
+#define TIMER_TF_INTR 0x2
+#define TIMER_TF_HIGH_PRIO 0x4
+#define TIMER_TF_CANCELED 0x8
+
+#define TIMER_HTABLE_SIZE 2048
+
+#if !ISP2(TIMER_HTABLE_SIZE)
+#error "hash table size must be a power of two"
+#endif /* !ISP2(TIMER_HTABLE_SIZE) */
+
+#define TIMER_HTABLE_MASK (TIMER_HTABLE_SIZE - 1)
+
+struct timer_bucket {
+ struct hlist timers;
+};
+
+/*
+ * The hash table bucket matching the last time member has already been
+ * processed, and the next periodic event resumes from the next bucket.
+ *
+ * Locking order: interrupts -> timer_cpu_data.
+ */
+struct timer_cpu_data {
+ unsigned int cpu;
+ struct spinlock lock;
+ uint64_t last_time;
+ struct timer_bucket htable[TIMER_HTABLE_SIZE];
+};
+
+static struct timer_cpu_data timer_cpu_data __percpu;
+
+static struct timer_cpu_data *
+timer_lock_cpu_data(struct timer *timer, unsigned long *flagsp)
+{
+ struct timer_cpu_data *cpu_data;
+
+ cpu_data = percpu_ptr(timer_cpu_data, timer->cpu);
+ spinlock_lock_intr_save(&cpu_data->lock, flagsp);
+ return cpu_data;
+}
+
+static void
+timer_unlock_cpu_data(struct timer_cpu_data *cpu_data, unsigned long flags)
+{
+ spinlock_unlock_intr_restore(&cpu_data->lock, flags);
+}
+
+/*
+ * Timer state functions.
+ */
+
+static bool
+timer_ready(const struct timer *timer)
+{
+ return timer->state == TIMER_TS_READY;
+}
+
+static void
+timer_set_ready(struct timer *timer)
+{
+ timer->state = TIMER_TS_READY;
+}
+
+static bool
+timer_scheduled(const struct timer *timer)
+{
+ return timer->state == TIMER_TS_SCHEDULED;
+}
+
+static void
+timer_set_scheduled(struct timer *timer, unsigned int cpu)
+{
+ timer->cpu = cpu;
+ timer->state = TIMER_TS_SCHEDULED;
+}
+
+static bool
+timer_running(const struct timer *timer)
+{
+ return timer->state == TIMER_TS_RUNNING;
+}
+
+static void
+timer_set_running(struct timer *timer)
+{
+ timer->state = TIMER_TS_RUNNING;
+}
+
+static bool
+timer_done(const struct timer *timer)
+{
+ return timer->state == TIMER_TS_DONE;
+}
+
+static void
+timer_set_done(struct timer *timer)
+{
+ timer->state = TIMER_TS_DONE;
+}
+
+/*
+ * Timer flags functions.
+ */
+
+static bool
+timer_detached(const struct timer *timer)
+{
+ return timer->flags & TIMER_TF_DETACHED;
+}
+
+static void
+timer_set_detached(struct timer *timer)
+{
+ timer->flags |= TIMER_TF_DETACHED;
+}
+
+static bool
+timer_is_intr(const struct timer *timer)
+{
+ return timer->flags & TIMER_TF_INTR;
+}
+
+static void
+timer_set_intr(struct timer *timer)
+{
+ timer->flags |= TIMER_TF_INTR;
+}
+
+static bool
+timer_is_high_prio(const struct timer *timer)
+{
+ return timer->flags & TIMER_TF_HIGH_PRIO;
+}
+
+static void
+timer_set_high_prio(struct timer *timer)
+{
+ timer->flags |= TIMER_TF_HIGH_PRIO;
+}
+
+static bool
+timer_canceled(const struct timer *timer)
+{
+ return timer->flags & TIMER_TF_CANCELED;
+}
+
+static void
+timer_set_canceled(struct timer *timer)
+{
+ timer->flags |= TIMER_TF_CANCELED;
+}
+
+static void
+timer_set_time(struct timer *timer, uint64_t ticks)
+{
+ timer->ticks = ticks;
+}
+
+static bool
+timer_occurred(const struct timer *timer, uint64_t ref)
+{
+ return clock_time_occurred(timer->ticks, ref);
+}
+
+static uintptr_t
+timer_hash(uint64_t ticks)
+{
+ return ticks;
+}
+
+static void
+timer_run(struct timer *timer)
+{
+ struct timer_cpu_data *cpu_data;
+ unsigned long cpu_flags;
+
+ assert(timer_running(timer));
+
+ timer->fn(timer);
+
+ if (timer_detached(timer)) {
+ return;
+ }
+
+ cpu_data = timer_lock_cpu_data(timer, &cpu_flags);
+
+ /*
+ * The timer handler may have :
+ * - rescheduled itself
+ * - been canceled
+ * - none of the above
+ *
+ * If the handler didn't call a timer function, or if the timer was
+ * canceled, set the state to done and wake up the joiner, if any.
+ *
+ * If the handler rescheduled the timer, nothing must be done. This
+ * is also true if the timer was canceled after being rescheduled by
+ * the handler (in this case, cancellation won't wait for a signal).
+ * These cases can be identified by checking if the timer state is
+ * different from running.
+ */
+
+ if (timer_running(timer)) {
+ timer_set_done(timer);
+ thread_wakeup(timer->joiner);
+ }
+
+ timer_unlock_cpu_data(cpu_data, cpu_flags);
+}
+
+static void
+timer_run_work(struct work *work)
+{
+ struct timer *timer;
+
+ timer = structof(work, struct timer, work);
+ timer_run(timer);
+}
+
+static void
+timer_process(struct timer *timer)
+{
+ int work_flags;
+
+ if (timer_is_intr(timer)) {
+ timer_run(timer);
+ return;
+ }
+
+ if (timer_is_high_prio(timer)) {
+ work_flags = TIMER_TF_HIGH_PRIO;
+ } else {
+ work_flags = 0;
+ }
+
+ work_init(&timer->work, timer_run_work);
+ work_schedule(&timer->work, work_flags);
+}
+
+static void
+timer_bucket_init(struct timer_bucket *bucket)
+{
+ hlist_init(&bucket->timers);
+}
+
+static void
+timer_bucket_add(struct timer_bucket *bucket, struct timer *timer)
+{
+ hlist_insert_head(&bucket->timers, &timer->node);
+}
+
+static void
+timer_bucket_remove(struct timer_bucket *bucket, struct timer *timer)
+{
+ (void)bucket;
+ hlist_remove(&timer->node);
+}
+
+static void
+timer_cpu_data_init(struct timer_cpu_data *cpu_data, unsigned int cpu)
+{
+ cpu_data->cpu = cpu;
+ spinlock_init(&cpu_data->lock);
+
+ /* See periodic event handling */
+ cpu_data->last_time = clock_get_time() - 1;
+
+ for (size_t i = 0; i < ARRAY_SIZE(cpu_data->htable); i++) {
+ timer_bucket_init(&cpu_data->htable[i]);
+ }
+}
+
+static struct timer_cpu_data *
+timer_cpu_data_acquire_local(unsigned long *flags)
+{
+ struct timer_cpu_data *cpu_data;
+
+ thread_pin();
+ cpu_data = cpu_local_ptr(timer_cpu_data);
+ spinlock_lock_intr_save(&cpu_data->lock, flags);
+ return cpu_data;
+}
+
+static void
+timer_cpu_data_release_local(struct timer_cpu_data *cpu_data,
+ unsigned long flags)
+{
+ spinlock_unlock_intr_restore(&cpu_data->lock, flags);
+ thread_unpin();
+}
+
+static struct timer_bucket *
+timer_cpu_data_get_bucket(struct timer_cpu_data *cpu_data, uint64_t ticks)
+{
+ uintptr_t index;
+
+ index = timer_hash(ticks) & TIMER_HTABLE_MASK;
+ assert(index < ARRAY_SIZE(cpu_data->htable));
+ return &cpu_data->htable[index];
+}
+
+static void
+timer_cpu_data_add(struct timer_cpu_data *cpu_data, struct timer *timer)
+{
+ struct timer_bucket *bucket;
+
+ assert(timer_ready(timer));
+
+ bucket = timer_cpu_data_get_bucket(cpu_data, timer->ticks);
+ timer_bucket_add(bucket, timer);
+}
+
+static void
+timer_cpu_data_remove(struct timer_cpu_data *cpu_data, struct timer *timer)
+{
+ struct timer_bucket *bucket;
+
+ assert(timer_scheduled(timer));
+
+ bucket = timer_cpu_data_get_bucket(cpu_data, timer->ticks);
+ timer_bucket_remove(bucket, timer);
+}
+
+static void
+timer_bucket_filter(struct timer_bucket *bucket, uint64_t now,
+ struct hlist *timers)
+{
+ struct timer *timer, *tmp;
+
+ hlist_for_each_entry_safe(&bucket->timers, timer, tmp, node) {
+ assert(timer_scheduled(timer));
+
+ if (!timer_occurred(timer, now)) {
+ continue;
+ }
+
+ hlist_remove(&timer->node);
+ timer_set_running(timer);
+ hlist_insert_head(timers, &timer->node);
+ }
+}
+
+static int __init
+timer_setup(void)
+{
+ for (unsigned int cpu = 0; cpu < cpu_count(); cpu++) {
+ timer_cpu_data_init(percpu_ptr(timer_cpu_data, cpu), cpu);
+ }
+
+ return 0;
+}
+
+INIT_OP_DEFINE(timer_setup,
+ INIT_OP_DEP(boot_setup_intr, true),
+ INIT_OP_DEP(cpu_mp_probe, true));
+
+void timer_init(struct timer *timer, timer_fn_t fn, int flags)
+{
+ timer->fn = fn;
+ timer->state = TIMER_TS_READY;
+ timer->flags = 0;
+ timer->joiner = NULL;
+
+ if (flags & TIMER_DETACHED) {
+ timer_set_detached(timer);
+ }
+
+ if (flags & TIMER_INTR) {
+ timer_set_intr(timer);
+ } else if (flags & TIMER_HIGH_PRIO) {
+ timer_set_high_prio(timer);
+ }
+}
+
+void
+timer_schedule(struct timer *timer, uint64_t ticks)
+{
+ struct timer_cpu_data *cpu_data;
+ unsigned long cpu_flags;
+
+ cpu_data = timer_cpu_data_acquire_local(&cpu_flags);
+
+ if (timer_canceled(timer)) {
+ goto out;
+ }
+
+ /*
+ * If called from the handler, the timer is running. If rescheduled
+ * after completion, it's done.
+ */
+ if (timer_running(timer) || timer_done(timer)) {
+ timer_set_ready(timer);
+ }
+
+ timer_set_time(timer, ticks);
+
+ if (timer_occurred(timer, cpu_data->last_time)) {
+ ticks = cpu_data->last_time + 1;
+ }
+
+ timer_cpu_data_add(cpu_data, timer);
+ timer_set_scheduled(timer, cpu_data->cpu);
+
+out:
+ timer_cpu_data_release_local(cpu_data, cpu_flags);
+}
+
+void
+timer_cancel(struct timer *timer)
+{
+ struct timer_cpu_data *cpu_data;
+ unsigned long cpu_flags;
+
+ assert(!timer_detached(timer));
+
+ cpu_data = timer_lock_cpu_data(timer, &cpu_flags);
+
+ assert(timer->joiner == NULL);
+
+ timer_set_canceled(timer);
+
+ if (timer_scheduled(timer)) {
+ timer_cpu_data_remove(cpu_data, timer);
+ } else {
+ timer->joiner = thread_self();
+
+ while (!timer_done(timer)) {
+ if (timer_is_intr(timer)) {
+ timer_unlock_cpu_data(cpu_data, cpu_flags);
+ cpu_pause();
+ cpu_data = timer_lock_cpu_data(timer, &cpu_flags);
+ } else {
+ thread_sleep(&cpu_data->lock, timer, "tmr_cncl");
+ }
+ }
+
+ assert(timer_done(timer));
+
+ timer->joiner = NULL;
+ }
+
+ timer_set_ready(timer);
+
+ timer_unlock_cpu_data(cpu_data, cpu_flags);
+}
+
+void
+timer_report_periodic_event(void)
+{
+ struct timer_cpu_data *cpu_data;
+ struct timer_bucket *bucket;
+ struct timer *timer;
+ struct hlist timers;
+ uint64_t ticks, now;
+
+ assert(!cpu_intr_enabled());
+ assert(!thread_preempt_enabled());
+
+ now = clock_get_time();
+ hlist_init(&timers);
+ cpu_data = cpu_local_ptr(timer_cpu_data);
+
+ spinlock_lock(&cpu_data->lock);
+
+ for (ticks = cpu_data->last_time + 1;
+ clock_time_occurred(ticks, now);
+ ticks++) {
+ bucket = timer_cpu_data_get_bucket(cpu_data, ticks);
+ timer_bucket_filter(bucket, now, &timers);
+ }
+
+ cpu_data->last_time = now;
+
+ spinlock_unlock(&cpu_data->lock);
+
+ while (!hlist_empty(&timers)) {
+ timer = hlist_first_entry(&timers, struct timer, node);
+ hlist_remove(&timer->node);
+ timer_process(timer);
+ }
+}
diff --git a/kern/timer.h b/kern/timer.h
new file mode 100644
index 00000000..46616686
--- /dev/null
+++ b/kern/timer.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Low resolution timer system.
+ */
+
+#ifndef _KERN_TIMER_H
+#define _KERN_TIMER_H
+
+#include <stdint.h>
+
+#include <kern/init.h>
+
+/*
+ * Scheduling flags.
+ */
+#define TIMER_DETACHED 0x1 /* Timer completion isn't synchronized */
+#define TIMER_INTR 0x2 /* Handler is run from interrupt context */
+#define TIMER_HIGH_PRIO 0x4 /* Handler is run in high priority thread */
+
+struct timer;
+
+/*
+ * Type for timer functions.
+ */
+typedef void (*timer_fn_t)(struct timer *);
+
+#include <kern/timer_i.h>
+
+/*
+ * Return the absolute expiration time of the timer, in ticks.
+ */
+static inline uint64_t
+timer_get_time(const struct timer *timer)
+{
+ return timer->ticks; /* TODO atomic */
+}
+
+/*
+ * Initialize a timer.
+ *
+ * Timers that are reponsible for releasing their own resources must
+ * be detached.
+ */
+void timer_init(struct timer *timer, timer_fn_t fn, int flags);
+
+/*
+ * Schedule a timer.
+ *
+ * The time of expiration is an absolute time in ticks.
+ *
+ * Timers may safely be rescheduled after completion. Periodic timers are
+ * implemented by rescheduling from the handler.
+ *
+ * If the timer has been canceled, this function does nothing. A
+ * canceled timer must be reinitialized before being scheduled again.
+ */
+void timer_schedule(struct timer *timer, uint64_t ticks);
+
+/*
+ * Cancel a timer.
+ *
+ * The given timer must not be detached.
+ *
+ * If the timer has already expired, this function waits until the timer
+ * function completes, or returns immediately if the function has already
+ * completed.
+ *
+ * This function may safely be called from the timer handler, but not on
+ * the current timer. Canceling a timer from the handler is achieved by
+ * simply not rescheduling it.
+ */
+void timer_cancel(struct timer *timer);
+
+/*
+ * Report a periodic event on the current processor.
+ *
+ * Interrupts and preemption must be disabled when calling this function.
+ */
+void timer_report_periodic_event(void);
+
+/*
+ * This init operation provides :
+ * - timer initialization and scheduling
+ * - module fully initialized
+ */
+INIT_OP_DECLARE(timer_setup);
+
+#endif /* _KERN_TIMER_H */
diff --git a/kern/timer_i.h b/kern/timer_i.h
new file mode 100644
index 00000000..4ed01f22
--- /dev/null
+++ b/kern/timer_i.h
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _KERN_TIMER_I_H
+#define _KERN_TIMER_I_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <kern/hlist.h>
+#include <kern/work.h>
+
+struct timer {
+ union {
+ struct hlist_node node;
+ struct work work;
+ };
+
+ uint64_t ticks;
+ timer_fn_t fn;
+ unsigned int cpu;
+ unsigned short state;
+ unsigned short flags;
+ struct thread *joiner;
+};
+
+#endif /* _KERN_TIMER_I_H */
diff --git a/kern/turnstile.c b/kern/turnstile.c
index 1e667773..e59a7f3f 100644
--- a/kern/turnstile.c
+++ b/kern/turnstile.c
@@ -196,7 +196,6 @@ static void
turnstile_remove_waiter(struct turnstile *turnstile,
struct turnstile_waiter *waiter)
{
- assert(turnstile_waiter_awaken(waiter));
plist_remove(&turnstile->waiters, &waiter->node);
turnstile_update_top_waiter(turnstile);
}
@@ -283,6 +282,9 @@ turnstile_td_disown(struct turnstile_td *td, struct turnstile *turnstile)
turnstile->owner = NULL;
}
+/*
+ * A turnstile must be "reowned" whenever its top waiter has changed.
+ */
static void
turnstile_td_reown(struct turnstile_td *td, struct turnstile *turnstile)
{
@@ -407,7 +409,6 @@ turnstile_assert_init_state(const struct turnstile *turnstile)
assert(plist_empty(&turnstile->waiters));
assert(turnstile->next_free == NULL);
assert(turnstile->top_waiter == NULL);
- assert(turnstile->owner == NULL);
}
static void
@@ -675,7 +676,11 @@ turnstile_update_owner(struct turnstile *turnstile, struct thread *owner)
thread_ref(owner);
spinlock_lock(&td->lock);
- if (turnstile->owner == NULL) {
+ if (turnstile_empty(turnstile)) {
+ if (turnstile->owner != NULL) {
+ turnstile_td_disown(td, turnstile);
+ }
+ } else if (turnstile->owner == NULL) {
turnstile_td_own(td, turnstile);
} else {
turnstile_td_reown(td, turnstile);
@@ -688,14 +693,16 @@ turnstile_update_owner(struct turnstile *turnstile, struct thread *owner)
spinlock_lock(&turnstile->bucket->lock);
}
-void
-turnstile_wait(struct turnstile *turnstile, const char *wchan,
- struct thread *owner)
+static int
+turnstile_wait_common(struct turnstile *turnstile, const char *wchan,
+ struct thread *owner, bool timed, uint64_t ticks)
{
struct turnstile_waiter waiter;
struct turnstile_td *td;
struct thread *thread;
+ int error;
+ error = 0;
thread = thread_self();
assert(thread != owner);
@@ -718,9 +725,25 @@ turnstile_wait(struct turnstile *turnstile, const char *wchan,
for (;;) {
if (!turnstile_waiter_awaken(&waiter)) {
- thread_sleep(&turnstile->bucket->lock, turnstile->sync_obj, wchan);
+ if (!timed) {
+ thread_sleep(&turnstile->bucket->lock,
+ turnstile->sync_obj, wchan);
+ } else {
+ error = thread_timedsleep(&turnstile->bucket->lock,
+ turnstile->sync_obj, wchan, ticks);
+
+ if (error) {
+ if (turnstile_waiter_awaken(&waiter)) {
+ error = 0;
+ } else {
+ break;
+ }
+ }
+ }
}
+ assert(turnstile_waiter_awaken(&waiter));
+
/*
* The real priority of a thread may change between waking up
* and reacquiring the turnstile.
@@ -738,6 +761,30 @@ turnstile_wait(struct turnstile *turnstile, const char *wchan,
turnstile_td_set_waiter(td, NULL);
turnstile_remove_waiter(turnstile, &waiter);
spinlock_unlock(&td->lock);
+
+ if (error && (turnstile->owner != NULL)) {
+ /* This function temporarily unlocks the turnstile */
+ turnstile_update_owner(turnstile, turnstile->owner);
+ }
+
+ return error;
+}
+
+void
+turnstile_wait(struct turnstile *turnstile, const char *wchan,
+ struct thread *owner)
+{
+ int error;
+
+ error = turnstile_wait_common(turnstile, wchan, owner, false, 0);
+ assert(!error);
+}
+
+int
+turnstile_timedwait(struct turnstile *turnstile, const char *wchan,
+ struct thread *owner, uint64_t ticks)
+{
+ return turnstile_wait_common(turnstile, wchan, owner, true, ticks);
}
void
@@ -783,6 +830,10 @@ turnstile_disown(struct turnstile *turnstile)
struct turnstile_td *td;
struct thread *owner;
+ if (turnstile->owner == NULL) {
+ return;
+ }
+
owner = thread_self();
assert(turnstile->owner == owner);
assert(!turnstile_empty(turnstile));
diff --git a/kern/turnstile.h b/kern/turnstile.h
index 0473a4c6..e7b4a5e3 100644
--- a/kern/turnstile.h
+++ b/kern/turnstile.h
@@ -28,6 +28,7 @@
#include <stdbool.h>
#include <stddef.h>
+#include <stdint.h>
#include <kern/init.h>
#include <kern/plist.h>
@@ -157,9 +158,17 @@ bool turnstile_empty(const struct turnstile *turnstile);
* the associated synchronization object. The priority of the caller
* is propagated to the chain of turnstiles and owners as necessary
* to prevent unbounded priority inversion.
+ *
+ * When bounding the duration of the wait, the caller must pass an absolute
+ * time in ticks, and ERROR_TIMEDOUT is returned if that time is reached
+ * before the turnstile is signalled. In addition, if a timeout occurs,
+ * the calling thread temporarily releases the turnstile before returning,
+ * causing other threads to consider the turnstile as empty.
*/
void turnstile_wait(struct turnstile *turnstile, const char *wchan,
struct thread *owner);
+int turnstile_timedwait(struct turnstile *turnstile, const char *wchan,
+ struct thread *owner, uint64_t ticks);
/*
* Wake up a thread waiting on the given turnstile, if any.
@@ -175,8 +184,7 @@ void turnstile_signal(struct turnstile *turnstile);
* Own/disown a turnstile.
*
* The turnstile must be lent when taking ownership, acquired when
- * releasing it. Owning has no effect on empty turnstiles.
- * Conversely, an empty turnstile cannot be disowned.
+ * releasing it.
*
* Ownership must be updated atomically with regard to the ownership
* of the associated synchronization object.
diff --git a/kern/work.h b/kern/work.h
index a8df1f7e..2d7bd62f 100644
--- a/kern/work.h
+++ b/kern/work.h
@@ -46,6 +46,8 @@ typedef void (*work_fn_t)(struct work *);
* This structure should be embedded in objects related to the work. It
* stores the work function and is passed to it as its only parameter.
* The function can then find the containing object with the structof macro.
+ *
+ * TODO Make private.
*/
struct work {
struct work *next;
diff --git a/kern/xcall.c b/kern/xcall.c
index 44bd41f5..b5ed4b24 100644
--- a/kern/xcall.c
+++ b/kern/xcall.c
@@ -133,6 +133,7 @@ xcall_call(xcall_fn_t fn, void *arg, unsigned int cpu)
thread_preempt_disable();
+ /* TODO Fix to match interrupt context semantics */
if (cpu == cpu_id()) {
unsigned long flags;