Port to QEMU netduino2 (cortex-m3)

author: Richard Braun <rbraun@sceen.net> 2018-01-23 21:24:31 +0100
committer: Richard Braun <rbraun@sceen.net> 2018-01-23 21:26:31 +0100
commit: 4778a84feb6c53e08fd2f15e33f2d1df64c0737f (patch)
tree: 7841ca102a5c041b5dd7e448e36af7065d81ed2d
parent: 06844a6997166e5845b4ef7dfbccf5aac3a6a352 (diff)
25 files changed, 797 insertions, 1390 deletions
diff --git a/Makefile b/Makefile
index 7a970ec..eec3c9a 100644
--- a/Makefile
+++ b/Makefile
@@ -8,7 +8,7 @@ VERSION = 1.0
 #
 # Here is an example of overriding the compiler :
 # $ make CC=clang
-CC = gcc
+CC = arm-none-eabi-gcc
 
 # C preprocessor flags.
 #
@@ -16,9 +16,6 @@ CC = gcc
 # non free, and as a result, the gcc-doc package is part of the non-free
 # components.
 
-# Generate code for a 32-bits environment.
-X1_CPPFLAGS = -m32
-
 # Do not search the standard system directories for header files.
 # The kernel is a free standing environment, where no host library can
 # work, at least not without (usually heavy) integration work.
@@ -80,14 +77,19 @@ X1_CFLAGS += -Wno-unneeded-internal-declaration
 # Set the language as C99 with GNU extensions.
 X1_CFLAGS += -std=gnu99
 
-# Build with optimizations as specified by the -O2 option.
-X1_CFLAGS += -O2
+# Build with optimizations as specified by the -Os option.
+X1_CFLAGS += -Os
 
 # Include debugging symbols, giving inspection tools a lot more debugging
 # data to work with, e.g. allowing them to translate between addresses and
 # source locations.
 X1_CFLAGS += -g
 
+X1_CFLAGS += -mcpu=cortex-m3
+X1_CFLAGS += -mthumb
+
+X1_CFLAGS += -fsigned-char
+
 # Target a free standing environment as defined by C99.
 #
 # This option tells the compiler that it may not assume a hosted environment,
@@ -125,9 +127,6 @@ X1_CFLAGS += -fno-strict-aliasing
 # global variable are made, this option will make the link fail.
 X1_CFLAGS += -fno-common
 
-# Disable all extended intruction sets that require special kernel support.
-X1_CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mno-avx
-
 # Append user-provided compiler flags, if any.
 #
 # Here are some examples :
@@ -147,9 +146,6 @@ X1_CFLAGS += $(CFLAGS)
 # the GNU FDL license is considered non free, and as a result, the gcc-doc
 # package is part of the non-free components.
 
-# Link for a 32-bits environment.
-X1_LDFLAGS = -m32
-
 # Build a static executable, with no shared library.
 X1_LDFLAGS += -static
 
@@ -178,22 +174,20 @@ LIBS = -lgcc
 BINARY = x1
 
 SOURCES = \
-	src/boot_asm.S \
 	src/boot.c \
+	src/boot_asm.S \
 	src/condvar.c \
 	src/cpu.c \
 	src/cpu_asm.S \
-	src/i8254.c \
-	src/i8259.c \
-	src/io_asm.S \
 	src/main.c \
 	src/mem.c \
 	src/mutex.c \
+	src/nvic.c \
 	src/panic.c \
 	src/stdio.c \
 	src/string.c \
 	src/sw.c \
-	src/thread_asm.S \
+	src/systick.c \
 	src/thread.c \
 	src/timer.c \
 	src/uart.c
diff --git a/lib/macros.h b/lib/macros.h
index 6369599..d478f67 100644
--- a/lib/macros.h
+++ b/lib/macros.h
@@ -33,7 +33,9 @@
 #error "GCC 4+ required"
 #endif
 
+#ifndef __ASSEMBLER__
 #include <stddef.h>
+#endif
 
 #define MACRO_BEGIN         ({
 #define MACRO_END           })
diff --git a/qemu.sh b/qemu.sh
index 2466cf3..f9e7930 100755
--- a/qemu.sh
+++ b/qemu.sh
@@ -2,8 +2,6 @@
 
 # Start the QEMU emulator with options doing the following :
 #  - GDB remote access on the local TCP port 1234
-#  - 64MB of physical memory (RAM)
-#  - No video device (automatically sets the first serial port as the console)
 #
 # In order to dump all exceptions and interrupts to a log file, you may add
 # the following options :
@@ -12,8 +10,9 @@
 #
 # Note that these debugging options do not work when KVM is enabled.
 
-qemu-system-i386 \
+qemu-system-arm \
+        -M netduino2 \
+        -cpu cortex-m3 \
         -gdb tcp::1234 \
-        -m 64 \
-        -nographic \
-        -kernel x1
+        -monitor stdio \
+        -d guest_errors -kernel x1
diff --git a/src/io.h b/src/asm.h
index 19aa719..ca7c925 100644
--- a/src/io.h
+++ b/src/asm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2018 Richard Braun.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -18,30 +18,32 @@
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
- *
- *
- * I/O ports access.
- *
- * The x86 architecture is special in that, in addition to the physical memory
- * address space, it also has an I/O port space. Most modern processors use
- * the physical memory address space to access memory-mapped device memory and
- * registers, and that's also the case on x86, but the I/O port space is also
- * used for this purpose, at least for some legacy devices.
  */
 
-#ifndef _IO_H
-#define _IO_H
+#ifndef _ASM_H
+#define _ASM_H
 
-#include <stdint.h>
+#ifndef __ASSEMBLER__
+#error "this header may only be included from assembly source files"
+#endif
 
 /*
- * Read a byte from an I/O port.
+ * Enable the assembler to generate Thumb-2 instructions.
+ *
+ * See https://sourceware.org/binutils/docs/as/ARM_002dInstruction_002dSet.html#ARM_002dInstruction_002dSet
  */
-uint8_t io_read(uint16_t port);
+.syntax unified
 
 /*
- * Write a byte to an I/O port.
+ * The .thumb_func directive tells the assembler to export the symbol as
+ * a thumb function, with bit 0 set.
+ *
+ * The .global directive tells the assembler to make the symbol global,
+ * i.e. to make it visible to other compilation units.
  */
-void io_write(uint16_t port, uint8_t byte);
+#define ASM_FUNC(x)     \
+.thumb_func;            \
+.global x;              \
+x:
 
-#endif /* _IO_H */
+#endif /* _ASM_H */
diff --git a/src/boot.c b/src/boot.c
index ac08818..f17349c 100644
--- a/src/boot.c
+++ b/src/boot.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
  * Copyright (c) 2017 Jerko Lenstra.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -22,22 +22,34 @@
  */
 
 #include <stdint.h>
+#include <string.h>
 
 #include <lib/macros.h>
 
 #include "boot.h"
+#include "cpu.h"
+#include "main.h"
 
-/*
- * This is the boot stack, used by the boot code to set the value of
- * the ESP register very early once control is passed to the kernel.
- *
- * It is aligned to 4 bytes to comply with the System V Intel 386 ABI [1].
- * While not strictly required since x86 supports unaligned accesses,
- * aligned accesses are faster, and the compiler generates instructions
- * accessing the stack that assume it's aligned.
- *
- * See the assembly code at the boot_start label in boot_asm.S.
- *
- * [1] http://www.sco.com/developers/devspecs/abi386-4.pdf
- */
-uint8_t boot_stack[BOOT_STACK_SIZE] __aligned(4);
+extern char _lma_data_addr;
+extern char _data_start;
+extern char _data_end;
+extern char _bss_start;
+extern char _bss_end;
+
+void boot_main(void);
+
+uint8_t boot_stack[BOOT_STACK_SIZE] __aligned(CPU_STACK_ALIGN);
+
+static void
+boot_copy_data(void)
+{
+    memcpy(&_data_start, &_lma_data_addr, &_data_end - &_data_start);
+}
+
+void
+boot_main(void)
+{
+    cpu_intr_disable();
+    boot_copy_data();
+    main();
+}
diff --git a/src/boot.h b/src/boot.h
index d813605..27cd8e1 100644
--- a/src/boot.h
+++ b/src/boot.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
  * Copyright (c) 2017 Jerko Lenstra.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -24,11 +24,32 @@
 #ifndef _BOOT_H
 #define _BOOT_H
 
+#include <lib/macros.h>
+
+#include "cpu.h"
+
 /*
  * The size of the boot stack.
  *
  * See the boot_stack variable in boot.c.
  */
-#define BOOT_STACK_SIZE 4096
+#define BOOT_STACK_SIZE 512
+
+#if !P2ALIGNED(BOOT_STACK_SIZE, CPU_STACK_ALIGN)
+#error "misaligned boot stack"
+#endif
+
+#ifndef __ASSEMBLER__
+
+#include <stdint.h>
+
+extern uint8_t boot_stack[BOOT_STACK_SIZE];
+
+/*
+ * Entry point.
+ */
+void boot_start(void);
+
+#endif /* __ASSEMBLER__ */
 
 #endif /* _BOOT_H */
diff --git a/src/boot_asm.S b/src/boot_asm.S
index 018f096..17823d6 100644
--- a/src/boot_asm.S
+++ b/src/boot_asm.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
  * Copyright (c) 2017 Jerko Lenstra.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -21,47 +21,9 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include "asm.h"
 #include "boot.h"
-
-/*
- * These are values used in the OS image header, as defined by the multiboot
- * specification.
- *
- * See https://www.gnu.org/software/grub/manual/multiboot/multiboot.html.
- */
-#define BOOT_HDR_MAGIC  0x1BADB002
-#define BOOT_HDR_CHECK  0x2BADB002
-#define BOOT_HDR_FLAGS  0x0
-
-/*
- * The .section directive tells the assembler which section the following
- * instructions should go into.
- *
- * The "a" flag makes the section allocatable, meaning memory will be
- * allocated for that section at load time.
- *
- * See https://sourceware.org/binutils/docs-2.29/as/Section.html#Section.
- */
-.section .hdr, "a"
-
-/* Generate code for i386 */
-.code32
-
-/*
- * The .int directive is used to emit verbatim machine words. Here, the
- * third word is the checksum of the first two, defined as "a 32-bit
- * unsigned value which, when added to the other magic fields (i.e.
- * ‘magic’ and ‘flags’), must have a 32-bit unsigned sum of zero".
- * Intuitively, adding the two first words and making the result negative
- * gives a value that, when added to the other fields, gives 0, despite
- * the word being unsigned. This trick works because values use two's
- * complement representation.
- *
- * See https://en.wikipedia.org/wiki/Two%27s_complement.
- */
-.int BOOT_HDR_MAGIC
-.int BOOT_HDR_FLAGS
-.int -(BOOT_HDR_FLAGS + BOOT_HDR_MAGIC)
+#include "cpu.h"
 
 /*
  * Put the following instructions into the .text section, which is
@@ -74,23 +36,36 @@
  * be run when control is passed to the kernel. The address of this symbol
  * is what the following command returns :
  *   readelf -aW x1 | grep "Entry point"
+ */
+ASM_FUNC(boot_start)
+  mov %r0, %r13                     /* load boot stack in PSP */
+  msr psp, %r0
+  mov %r0, $2                       /* use PSP as stack pointer */
+  msr control, %r0
+  isb                               /* force CONTROL register evaluation,
+                                       i.e. R13 is now PSP */
+  ldr %r0, =cpu_exc_stack           /* load exception stack in MSP */
+  add %r0, %r0, CPU_EXC_STACK_SIZE
+  msr msp, %r0
+  bl boot_clear_bss                 /* boot_clear_bss() */
+  b boot_main                       /* goto boot_main */
+
+/*
+ * Clear the .bss section.
  *
- * The .global directive tells the assembler to make the symbol global,
- * i.e. to make it visible to other compilation units.
- *
- * When this code is run, the machine state should comply with what the
- * multiboot specification defines.
+ * The stack, being part of the .bss section, must not be used to save
+ * data, since that data will be cleared.
  */
-.global boot_start
-boot_start:
-  cmp $BOOT_HDR_CHECK, %eax     /* Compare EAX against the expected value */
-  jne .                         /* If not equal, jump to the current address.
-                                   This is an infinite loop. */
-  mov $boot_stack, %esp         /* Set up a stack */
-  add $BOOT_STACK_SIZE, %esp    /* On x86, stacks grow downwards, so start
-                                   at the top */
-  jmp main                      /* Jump to the C main function */
+ASM_FUNC(boot_clear_bss)            /* void                             */
+  mov %r0, #0                       /* boot_clear_bss(void)             */
+  ldr %r1, =_bss_start              /* {                                */
+  ldr %r2, =_bss_end                /*     uint32_t *r1 = &_bss_start;  */
+                                    /*     uint32_t *r2 = &_bss_end;    */
 
-loop:
-  hlt                           /* Never reached, for safety */
-  jmp loop
+1:
+  cmp %r1, %r2                      /*     while (r1 < r2) {            */
+  it hs                             /*         *r1 = 0;                 */
+  bxhs %r14                         /*         r1++;                    */
+  str %r0, [%r1]                    /*     }                            */
+  add %r1, %r1, #4                  /* }                                */
+  b 1b
diff --git a/src/cpu.c b/src/cpu.c
index 325aab5..62251e1 100644
--- a/src/cpu.c
+++ b/src/cpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -18,10 +18,6 @@
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
- *
- *
- * All references to the Intel 64 and IA-32 Architecture Software Developer's
- * Manual are valid for order number: 325462-061US, December 2016.
  */
 
 #include <assert.h>
@@ -32,88 +28,41 @@
 
 #include <lib/macros.h>
 
+#include "boot.h"
 #include "cpu.h"
-#include "i8259.h"
+#include "nvic.h"
 #include "thread.h"
+#include "timer.h"
 
 /*
- * Segment flags.
- *
- * See Intel 64 and IA-32 Architecture Software Developer's Manual, Volume 3
- * System Programming Guide :
- *  - 3.4.5 Segment Descriptors
- *  - 3.5 System Descriptor Types
+ * xPSR register bits.
  */
-#define CPU_SEG_DATA_RW         0x00000200
-#define CPU_SEG_CODE_RX         0x00000900
-#define CPU_SEG_S               0x00001000
-#define CPU_SEG_P               0x00008000
-#define CPU_SEG_DB              0x00400000
-#define CPU_SEG_G               0x00800000
-
-#define CPU_IDT_SIZE (CPU_IDT_VECT_IRQ_BASE + I8259_NR_IRQ_VECTORS)
+#define CPU_PSR_8BYTE_STACK_ALIGN   0x00000200
+#define CPU_PSR_THUMB               0x01000000
 
 /*
- * Segment descriptor.
- *
- * These entries are found in the GDT and IDT tables (described below).
- * When loading a segment register, the value of the register is a
- * segment selector, which is an index (in bytes) along with flags.
- *
- * See Intel 64 and IA-32 Architecture Software Developer's Manual,
- * Volume 3 System Programming Guide :
- *  - 3.4.2 Segment Selectors
- *  - 3.4.5 Segment Descriptors
+ * Declarations for C/assembly functions that are global so that they can
+ * be shared between cpu.c and cpu_asm.S, but are considered private to
+ * the cpu module.
  */
-struct cpu_seg_desc {
-    uint32_t low;
-    uint32_t high;
-};
+void cpu_exc_main(void);
+void cpu_exc_svcall(void);
+void cpu_exc_pendsv(void);
+void cpu_irq_main(void);
 
 /*
- * A pseudo descriptor is an operand for the LGDT/LIDT instructions.
- *
- * See Intel 64 and IA-32 Architecture Software Developer's Manual,
- * Volume 3 System Programming Guide, 3.5.1 Segment Descriptor Tables,
- * Figure 3-11 Pseudo-Descriptor Formats.
- *
- * This structure is packed to prevent any holes between limit and base.
+ * Exception vector table.
  */
-struct cpu_pseudo_desc {
-    uint16_t limit;
-    uint32_t base;
-} __packed;
-
-/*
- * These segment descriptor tables are the Global Descriptor Table (GDT)
- * and the Interrupt Descriptor Table (IDT) respectively. The GDT was
- * historically used to create segments. Segmentation could be used to run
- * multiple instances of the same program at different locations in memory,
- * by changing the base address of segments. It could implement a simple
- * form of memory protection by restricting the length of segments. With
- * modern virtual memory based entirely on paging, segmentation has become
- * obsolete, and all modern systems use a flat memory model, where all
- * segments span the entire physical space. Segments may still be used to
- * provide per-processor or per-thread variables (e.g. this is how TLS,
- * thread-local storage, is implemented).
- *
- * The IDT is used for exception and interrupt handling, collectively known
- * as interrupts. Here, "exception" refers to interrupts originating from
- * the CPU such as a division by zero exception, whereas "IRQ" refers to
- * interrupts raised by external devices. These terms are often used
- * interchangeably. What's important to keep in mind is that interrupts
- * divert the flow of execution of the processor. The IDT tells the processor
- * where to branch when an interrupt occurs.
- *
- * The GDT and IDT should be 8-byte aligned for best performance.
- *
- * See Intel 64 and IA-32 Architecture Software Developer's Manual, Volume 3
- * System Programming Guide :
- *  - 3.5.1 Segment Descriptor Tables (GDT)
- *  - 6.10 Interrupt Descriptor Table (IDT)
- */
-static struct cpu_seg_desc cpu_gdt[CPU_GDT_SIZE] __aligned(8);
-static struct cpu_seg_desc cpu_idt[CPU_IDT_SIZE] __aligned(8);
+static const void *cpu_vector_table[] __used __section(".vectors") = {
+    [0]                                     = &boot_stack[ARRAY_SIZE(boot_stack)],
+    [CPU_EXC_RESET]                         = boot_start,
+    [CPU_EXC_NMI ... CPU_EXC_USAGEFAULT]    = cpu_exc_main,
+    [CPU_EXC_SVCALL]                        = cpu_exc_svcall,
+    [CPU_EXC_DEBUGMONITOR]                  = cpu_exc_main,
+    [CPU_EXC_PENDSV]                        = cpu_exc_pendsv,
+    [CPU_EXC_SYSTICK]                       = cpu_exc_main,
+    [CPU_EXC_IRQ_BASE ... CPU_EXC_IRQ_MAX]  = cpu_irq_main,
+};
 
 /*
  * Handler for external interrupt requests.
@@ -128,96 +77,36 @@ struct cpu_irq_handler {
  *
  * Interrupts and preemption must be disabled when accessing the handlers.
  */
-static struct cpu_irq_handler cpu_irq_handlers[I8259_NR_IRQ_VECTORS];
+static struct cpu_irq_handler cpu_irq_handlers[CPU_NR_IRQS];
 
 /*
- * The interrupt frame is the stack content forged by interrupt handlers.
+ * The exception frame is the stack content forged by exception handlers.
  * They store the data needed to restore the processor to its state prior
- * to the interrupt.
+ * to the exception.
  */
-struct cpu_intr_frame {
-    /* These members are pushed by the low level ISRs */
-    uint32_t edi;
-    uint32_t esi;
-    uint32_t ebp;
-    uint32_t edx;
-    uint32_t ecx;
-    uint32_t ebx;
-    uint32_t eax;
-    uint32_t vector;
-
-    /*
-     * This member may be pushed by either the CPU or the low level ISRs
-     * for exceptions/interrupts that don't emit such an error code.
-     */
-    uint32_t error;
+struct cpu_exc_frame {
+    /* These members are pushed by cpu_exc_pendsv() */
+    uint32_t r4;
+    uint32_t r5;
+    uint32_t r6;
+    uint32_t r7;
+    uint32_t r8;
+    uint32_t r9;
+    uint32_t r10;
+    uint32_t r11;
 
     /* These members are automatically pushed by the CPU */
-    uint32_t eip;
-    uint32_t cs;
-    uint32_t eflags;
+    uint32_t r0;
+    uint32_t r1;
+    uint32_t r2;
+    uint32_t r3;
+    uint32_t r12;
+    uint32_t r14;
+    uint32_t r15;
+    uint32_t psr;
 };
 
-/*
- * Declarations for C/assembly functions that are global so that they can
- * be shared between cpu.c and cpu_asm.S, but are considered private to
- * the cpu module.
- */
-uint32_t cpu_get_eflags(void);
-void cpu_set_eflags(uint32_t eflags);
-void cpu_load_gdt(const struct cpu_pseudo_desc *desc);
-void cpu_load_idt(const struct cpu_pseudo_desc *desc);
-void cpu_intr_main(const struct cpu_intr_frame *frame);
-
-/*
- * Low level interrupt service routines.
- *
- * These are the addresses where the CPU directly branches to when an
- * interrupt is received.
- */
-void cpu_isr_divide_error(void);
-void cpu_isr_general_protection(void);
-void cpu_isr_32(void);
-void cpu_isr_33(void);
-void cpu_isr_34(void);
-void cpu_isr_35(void);
-void cpu_isr_36(void);
-void cpu_isr_37(void);
-void cpu_isr_38(void);
-void cpu_isr_39(void);
-void cpu_isr_40(void);
-void cpu_isr_41(void);
-void cpu_isr_42(void);
-void cpu_isr_43(void);
-void cpu_isr_44(void);
-void cpu_isr_45(void);
-void cpu_isr_46(void);
-void cpu_isr_47(void);
-
-uint32_t
-cpu_intr_save(void)
-{
-    uint32_t eflags;
-
-    eflags = cpu_get_eflags();
-    cpu_intr_disable();
-    return eflags;
-}
-
-void
-cpu_intr_restore(uint32_t eflags)
-{
-    cpu_set_eflags(eflags);
-}
-
-bool
-cpu_intr_enabled(void)
-{
-    uint32_t eflags;
-
-    eflags = cpu_get_eflags();
-    return eflags & CPU_EFL_IF;
-}
+uint8_t cpu_exc_stack[CPU_EXC_STACK_SIZE] __aligned(CPU_STACK_ALIGN);
 
 void
 cpu_halt(void)
@@ -230,97 +119,13 @@ cpu_halt(void)
 }
 
 static void
-cpu_default_intr_handler(void)
-{
-    printf("cpu: error: unhandled interrupt\n");
-    cpu_halt();
-}
-
-static void
-cpu_seg_desc_init_null(struct cpu_seg_desc *desc)
-{
-    desc->low = 0;
-    desc->high = 0;
-}
-
-static void
-cpu_seg_desc_init_code(struct cpu_seg_desc *desc)
-{
-    /*
-     * Base: 0
-     * Limit: 0xffffffff
-     * Privilege level: 0 (most privileged)
-     */
-    desc->low = 0xffff;
-    desc->high = CPU_SEG_G
-                 | CPU_SEG_DB
-                 | (0xf << 16)
-                 | CPU_SEG_P
-                 | CPU_SEG_S
-                 | CPU_SEG_CODE_RX;
-}
-
-static void
-cpu_seg_desc_init_data(struct cpu_seg_desc *desc)
-{
-    /*
-     * Base: 0
-     * Limit: 0xffffffff
-     * Privilege level: 0 (most privileged)
-     */
-    desc->low = 0xffff;
-    desc->high = CPU_SEG_G
-                 | CPU_SEG_DB
-                 | (0xf << 16)
-                 | CPU_SEG_P
-                 | CPU_SEG_S
-                 | CPU_SEG_DATA_RW;
-}
-
-static void
-cpu_seg_desc_init_intr_gate(struct cpu_seg_desc *desc,
-                            void (*handler)(void))
-{
-    desc->low = (CPU_GDT_SEL_CODE << 16)
-                | (((uint32_t)handler) & 0xffff);
-    desc->high = (((uint32_t)handler) & 0xffff0000)
-                 | CPU_SEG_P
-                 | 0xe00;
-}
-
-static void
-cpu_pseudo_desc_init(struct cpu_pseudo_desc *desc,
-                     const void *addr, size_t size)
-{
-    assert(size <= 0x10000);
-    desc->limit = size - 1;
-    desc->base = (uint32_t)addr;
-}
-
-static struct cpu_seg_desc *
-cpu_get_gdt_entry(size_t selector)
-{
-    size_t index;
-
-    /*
-     * The first 3 bits are the TI and RPL bits
-     *
-     * See Intel 64 and IA-32 Architecture Software Developer's Manual,
-     * Volume 3 System Programming Guide, 3.4.2 Segment Selectors.
-     */
-    index = selector >> 3;
-    assert(index < ARRAY_SIZE(cpu_gdt));
-    return &cpu_gdt[index];
-}
-
-static void
 cpu_irq_handler_init(struct cpu_irq_handler *handler)
 {
     handler->fn = NULL;
 }
 
 static struct cpu_irq_handler *
-cpu_lookup_irq_handler(unsigned int irq)
+cpu_get_irq_handler(unsigned int irq)
 {
     assert(irq < ARRAY_SIZE(cpu_irq_handlers));
     return &cpu_irq_handlers[irq];
@@ -335,182 +140,122 @@ cpu_irq_handler_set_fn(struct cpu_irq_handler *handler,
     handler->arg = arg;
 }
 
-static void
-cpu_setup_gdt(void)
+static inline uint32_t
+cpu_read_ipsr(void)
 {
-    struct cpu_pseudo_desc pseudo_desc;
-
-    cpu_seg_desc_init_null(cpu_get_gdt_entry(CPU_GDT_SEL_NULL));
-    cpu_seg_desc_init_code(cpu_get_gdt_entry(CPU_GDT_SEL_CODE));
-    cpu_seg_desc_init_data(cpu_get_gdt_entry(CPU_GDT_SEL_DATA));
+    uint32_t vector;
 
-    cpu_pseudo_desc_init(&pseudo_desc, cpu_gdt, sizeof(cpu_gdt));
-    cpu_load_gdt(&pseudo_desc);
+    asm volatile("mrs %0, ipsr" : "=r" (vector));
+    return vector;
 }
 
-static void
-cpu_setup_idt(void)
+void
+cpu_exc_main(void)
 {
-    struct cpu_pseudo_desc pseudo_desc;
+    uint32_t vector, primask;
 
-    for (size_t i = 0; i < ARRAY_SIZE(cpu_irq_handlers); i++) {
-        cpu_irq_handler_init(cpu_lookup_irq_handler(i));
-    }
+    vector = cpu_read_ipsr();
 
-    for (size_t i = 0; i < ARRAY_SIZE(cpu_idt); i++) {
-        cpu_seg_desc_init_intr_gate(&cpu_idt[i], cpu_default_intr_handler);
-    }
+    assert(vector < CPU_EXC_IRQ_BASE);
 
-    cpu_seg_desc_init_intr_gate(&cpu_idt[CPU_IDT_VECT_DIV],
-                                cpu_isr_divide_error);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[CPU_IDT_VECT_GP],
-                                cpu_isr_general_protection);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[32], cpu_isr_32);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[33], cpu_isr_33);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[34], cpu_isr_34);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[35], cpu_isr_35);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[36], cpu_isr_36);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[37], cpu_isr_37);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[38], cpu_isr_38);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[39], cpu_isr_39);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[40], cpu_isr_40);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[41], cpu_isr_41);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[42], cpu_isr_42);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[43], cpu_isr_43);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[44], cpu_isr_44);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[45], cpu_isr_45);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[46], cpu_isr_46);
-    cpu_seg_desc_init_intr_gate(&cpu_idt[47], cpu_isr_47);
-
-    cpu_pseudo_desc_init(&pseudo_desc, cpu_idt, sizeof(cpu_idt));
-    cpu_load_idt(&pseudo_desc);
-}
-
-static void
-cpu_print_frame(const struct cpu_intr_frame *frame)
-{
-    printf("cpu: vector: %-8x eip: %08x eax: %08x ebx: %08x\n"
-           "cpu:  error: %-8x esp: %08x ecx: %08x edx: %08x\n"
-           "cpu: eflags: %08x ebp: %08x esi: %08x edi: %08x\n",
-           (unsigned int)frame->vector, (unsigned int)frame->eip,
-           (unsigned int)frame->eax, (unsigned int)frame->ebx,
-           (unsigned int)frame->error, (unsigned int)(frame + 1),
-           (unsigned int)frame->ecx, (unsigned int)frame->edx,
-           (unsigned int)frame->eflags, (unsigned int)frame->ebp,
-           (unsigned int)frame->esi, (unsigned int)frame->edi);
-}
+    /*
+     * Interrupt handlers may call functions that may in turn yield the
+     * processor. When running in interrupt context, as opposed to thread
+     * context, there is no way to yield the processor, because the context
+     * isn't saved into a scheduled structure, which is what threads are
+     * for. As a result, disable preemption to prevent an invalid context
+     * switch.
+     */
+    primask = thread_preempt_disable_intr_save();
 
-static void
-cpu_exc_main(const struct cpu_intr_frame *frame)
-{
-    printf("cpu: exception:\n");
-    cpu_print_frame(frame);
-
-    switch (frame->vector)
-    {
-    case CPU_IDT_VECT_DIV:
-        panic("cpu: divide error");
-    case CPU_IDT_VECT_GP:
-        panic("cpu: general protection fault");
+    switch (vector) {
+    case CPU_EXC_SYSTICK:
+        thread_report_tick();
+        timer_report_tick();
+        break;
     default:
-        cpu_default_intr_handler();
+        printf("cpu: error: unhandled exception:%lu\n", (unsigned long)vector);
+        cpu_halt();
     }
+
+    thread_preempt_enable_intr_restore(primask);
 }
 
 void
-cpu_intr_main(const struct cpu_intr_frame *frame)
+cpu_irq_main(void)
 {
     struct cpu_irq_handler *handler;
+    uint32_t primask;
     unsigned int irq;
 
-    assert(!cpu_intr_enabled());
-    assert(frame->vector < ARRAY_SIZE(cpu_idt));
+    irq = cpu_read_ipsr() - CPU_EXC_IRQ_BASE;
 
-    /*
-     * Interrupt handlers may call functions that may in turn yield the
-     * processor. When running in interrupt context, as opposed to thread
-     * context, there is no way to yield the processor, because the context
-     * isn't saved into a scheduled structure, which is what threads are
-     * for. As a result, disable preemption to prevent an invalid context
-     * switch.
-     */
-    thread_preempt_disable();
-
-    if (frame->vector < CPU_IDT_VECT_IRQ_BASE) {
-        cpu_exc_main(frame);
-    } else {
-        irq = frame->vector - CPU_IDT_VECT_IRQ_BASE;
-
-        /*
-         * Acknowledge the IRQ as early as possible to allow another one to
-         * be raised.
-         */
-        i8259_irq_eoi(irq);
-
-        handler = cpu_lookup_irq_handler(irq);
-
-        if (!handler || !handler->fn) {
-            printf("cpu: error: invalid handler for irq %u\n", irq);
-        } else {
-            handler->fn(handler->arg);
-        }
+    primask = thread_preempt_disable_intr_save();
+
+    handler = cpu_get_irq_handler(irq);
+
+    if (!handler || !handler->fn) {
+        panic("cpu: error: invalid handler for irq %u", irq);
     }
 
-    /*
-     * On entry, preemption could have been either enabled or disabled.
-     * If it was enabled, this call will reenable it. As a side effect,
-     * it will check if the current thread was marked for yielding, e.g.
-     * because the interrupt handler has awaken a higher priority thread,
-     * in which case a context switch is triggerred. Such context switches
-     * are called involuntary.
-     *
-     * Here is what the stack looks like when such a context switch occurs :
-     *
-     * |                                 | Stack grows down.
-     * |                                 |
-     * | stack of the interrupted thread |
-     * |                                 |
-     * +---------------------------------+ <- interrupt occurs
-     * |                                 |
-     * | struct cpu_intr_frame           |
-     * |                                 |
-     * +---------------------------------+
-     * |                                 |
-     * | cpu_intr_main stack frame       |
-     * |                                 |
-     * +---------------------------------+
-     * |                                 |
-     * | thread function stack frames    |
-     * |                                 |
-     * +---------------------------------+
-     * |                                 |
-     * | thread context on switch        | See thread_switch_context in
-     * |                                 | thread_asm.S.
-     * +---------------------------------+
-     */
-    thread_preempt_enable();
+    handler->fn(handler->arg);
+
+    thread_preempt_enable_intr_restore(primask);
 }
 
 void
 cpu_irq_register(unsigned int irq, cpu_irq_handler_fn_t fn, void *arg)
 {
     struct cpu_irq_handler *handler;
-    uint32_t eflags;
+    uint32_t primask;
 
-    thread_preempt_disable();
-    eflags = cpu_intr_save();
+    primask = thread_preempt_disable_intr_save();
 
-    handler = cpu_lookup_irq_handler(irq);
+    handler = cpu_get_irq_handler(irq);
     cpu_irq_handler_set_fn(handler, fn, arg);
-    i8259_irq_enable(irq);
+    nvic_irq_enable(irq);
 
-    thread_preempt_enable();
-    cpu_intr_restore(eflags);
+    thread_preempt_enable_intr_restore(primask);
+}
+
+void *
+cpu_stack_forge(void *stack, size_t size, thread_fn_t fn, void *arg)
+{
+    struct cpu_exc_frame *frame;
+
+    assert(P2ALIGNED((uintptr_t)stack, CPU_STACK_ALIGN));
+
+    if (size <= sizeof(*frame)) {
+        panic("cpu: error: stack too small");
+    }
+
+    frame = stack + size;
+    frame--;
+
+    frame->r4  = 4;
+    frame->r5  = 5;
+    frame->r6  = 6;
+    frame->r7  = 7;
+    frame->r8  = 8;
+    frame->r9  = 9;
+    frame->r10 = 10;
+    frame->r11 = 11;
+    frame->r0  = (uint32_t)fn;
+    frame->r1  = (uint32_t)arg;
+    frame->r2  = 2;
+    frame->r3  = 3;
+    frame->r12 = 12;
+    frame->r14 = 0;
+    frame->r15 = (uint32_t)thread_main & ~1; /* Must be halfword aligned */
+    frame->psr = CPU_PSR_THUMB;
+
+    return frame;
 }
 
 void
 cpu_setup(void)
 {
-    cpu_setup_gdt();
-    cpu_setup_idt();
+    for (size_t i = 0; i < ARRAY_SIZE(cpu_irq_handlers); i++) {
+        cpu_irq_handler_init(cpu_get_irq_handler(i));
+    }
 }
diff --git a/src/cpu.h b/src/cpu.h
index bedf0f0..69d7921 100644
--- a/src/cpu.h
+++ b/src/cpu.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -24,54 +24,55 @@
  *
  * The main functionality of this module is to provide interrupt control,
  * and registration of IRQ handlers.
- *
- * See the i8259 module.
  */
 
 #ifndef _CPU_H
 #define _CPU_H
 
-/*
- * EFLAGS register flags.
- *
- * See Intel 64 and IA-32 Architecture Software Developer's Manual, Volume 3
- * System Programming Guide, 2.3 System Flags and Fields in The EFLAGS Register.
- */
-#define CPU_EFL_IF      0x200   /* Enable maskable hardware interrupts */
+#include <lib/macros.h>
 
-/*
- * GDT segment descriptor indexes, in bytes.
- *
- * See Intel 64 and IA-32 Architecture Software Developer's Manual, Volume 3
- * System Programming Guide, 3.4.1 Segment Descriptor Tables.
- */
-#define CPU_GDT_SEL_NULL    0x00
-#define CPU_GDT_SEL_CODE    0x08
-#define CPU_GDT_SEL_DATA    0x10
-#define CPU_GDT_SIZE        3
+#define CPU_STACK_ALIGN 8
+
+#define CPU_EXC_STACK_SIZE 4096
+
+#if !P2ALIGNED(CPU_EXC_STACK_SIZE, CPU_STACK_ALIGN)
+#error "misaligned exception stack"
+#endif
+
+#define CPU_EXC_RESET           1
+#define CPU_EXC_NMI             2
+#define CPU_EXC_HARDFAULT       3
+#define CPU_EXC_MEMMANAGE       4
+#define CPU_EXC_BUSFAULT        5
+#define CPU_EXC_USAGEFAULT      6
+#define CPU_EXC_SVCALL          11
+#define CPU_EXC_DEBUGMONITOR    12
+#define CPU_EXC_PENDSV          14
+#define CPU_EXC_SYSTICK         15
+#define CPU_EXC_IRQ_BASE        16
+#define CPU_EXC_IRQ_MAX         255
+#define CPU_NR_EXCEPTIONS       (CPU_EXC_IRQ_MAX + 1)
+#define CPU_NR_IRQS             (CPU_NR_EXCEPTIONS - CPU_EXC_IRQ_BASE)
 
 /*
- * IDT segment descriptor indexes (exception and interrupt vectors).
- *
- * There are actually a lot more potential exceptions on x86. This list
- * only includes vectors that are handled by the implementation.
- *
- * See Intel 64 and IA-32 Architecture Software Developer's Manual, Volume 3
- * System Programming Guide, 6.3 Sources of Interrupts.
+ * PRIMASK register bits.
  */
-#define CPU_IDT_VECT_DIV            0   /* Divide error */
-#define CPU_IDT_VECT_GP             13  /* General protection fault */
-#define CPU_IDT_VECT_IRQ_BASE       32  /* Base vector for external IRQs */
+#define CPU_PRIMASK_I           0x1
 
 /*
- * Preprocessor declarations may be included by assembly source files, but
- * C declarations may not.
+ * Memory mapped processor registers.
  */
+#define CPU_REG_ICSR            0xe000ed04
+
+#define CPU_ICSR_PENDSVSET      0x10000000
+
 #ifndef __ASSEMBLER__
 
 #include <stdbool.h>
 #include <stdint.h>
 
+#include "thread.h"
+
 /*
  * Type for IRQ handler functions.
  *
@@ -79,37 +80,109 @@
  */
 typedef void (*cpu_irq_handler_fn_t)(void *arg);
 
+static inline void
+cpu_inst_barrier(void)
+{
+    asm volatile("isb" : : : "memory");
+}
+
+static inline uint32_t
+cpu_read_primask(void)
+{
+    uint32_t primask;
+
+    asm volatile("mrs %0, primask" : "=r" (primask));
+    return primask;
+}
+
 /*
  * Enable/disable interrupts.
  *
  * These functions imply a compiler barrier.
  * See thread_preempt_disable() in thread.c.
  */
-void cpu_intr_enable(void);
-void cpu_intr_disable(void);
+static inline void
+cpu_intr_disable(void)
+{
+    /*
+     * The cpsid instruction is self-synchronizing and doesn't require
+     * an instruction barrier.
+     */
+    asm volatile("cpsid i" : : : "memory");
+}
+
+static inline void
+cpu_intr_enable(void)
+{
+    /*
+     * The cpsie instruction isn't self-synchronizing. If pending interrupts
+     * must be processed immediately, add an instruction barrier after.
+     */
+    asm volatile("cpsie i" : : : "memory");
+}
 
 /*
- * Disable/restore interrupts.
+ * Disable/enable interrupts.
  *
  * Calls to these functions can safely nest.
  *
  * These functions imply a compiler barrier.
  * See thread_preempt_disable() in thread.c.
  */
-uint32_t cpu_intr_save(void);
-void cpu_intr_restore(uint32_t eflags);
+static inline uint32_t
+cpu_intr_save(void)
+{
+    uint32_t primask;
+
+    primask = cpu_read_primask();
+    cpu_intr_disable();
+    return primask;
+}
+
+static inline void
+cpu_intr_restore(uint32_t primask)
+{
+    asm volatile("msr primask, %0" : : "r" (primask) : "memory");
+}
 
 /*
  * Return true if interrupts are enabled.
  *
  * Implies a compiler barrier.
  */
-bool cpu_intr_enabled(void);
+static inline bool
+cpu_intr_enabled(void)
+{
+    uint32_t primask;
+
+    primask = cpu_read_primask();
+    return !(primask & CPU_PRIMASK_I);
+}
 
 /*
  * Enter an idle state until the next interrupt.
  */
-void cpu_idle(void);
+static inline void
+cpu_idle(void)
+{
+    asm volatile("wfi" : : : "memory");
+}
+
+static inline void
+cpu_raise_svcall(void)
+{
+    asm volatile("svc $0" : : : "memory");
+}
+
+static inline void
+cpu_raise_pendsv(void)
+{
+    volatile uint32_t *icsr;
+
+    icsr = (void *)CPU_REG_ICSR;
+    *icsr = CPU_ICSR_PENDSVSET;
+    cpu_inst_barrier();
+}
 
 /*
  * Completely halt execution on the processor.
@@ -126,6 +199,8 @@ void cpu_halt(void) __attribute__((noreturn));
  */
 void cpu_irq_register(unsigned int irq, cpu_irq_handler_fn_t fn, void *arg);
 
+void * cpu_stack_forge(void *stack, size_t size, thread_fn_t fn, void *arg);
+
 /*
  * Initialize the cpu module.
  */
diff --git a/src/cpu_asm.S b/src/cpu_asm.S
index e14fb1f..b14df47 100644
--- a/src/cpu_asm.S
+++ b/src/cpu_asm.S
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,161 +20,34 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include "asm.h"
+#include "boot.h"
 #include "cpu.h"
 
-.section .text
-.code32
+.section .text, "ax"
 
-.global cpu_get_eflags
-cpu_get_eflags:
-  pushf
-  pop %eax
-  ret
-
-.global cpu_set_eflags
-cpu_set_eflags:
-  mov 4(%esp), %eax
-  push %eax
-  popf
-  ret
-
-.global cpu_intr_enable
-cpu_intr_enable:
-  sti
-  ret
-
-.global cpu_intr_disable
-cpu_intr_disable:
-  cli
-  ret
-
-.global cpu_idle
-cpu_idle:
-  hlt
-  ret
-
-.global cpu_load_gdt
-cpu_load_gdt:
-  mov 4(%esp), %eax             /* eax = &desc */
-  lgdt (%eax)                   /* lgdt(*eax) */
-
-  mov $CPU_GDT_SEL_DATA, %eax
-  mov %eax, %ds
-  mov %eax, %es
-  mov %eax, %fs
-  mov %eax, %gs
-  mov %eax, %ss
-
-  /*
-   * The code segment register cannot directly be written to, and is instead
-   * modified by performing a long jump.
-   *
-   * See Intel 64 and IA-32 Architecture Software Developer's Manual :
-   *  - Volume 2 Instruction Set Reference
-   *    - 3.2 Instructions (A-L)
-   *      - JMP
-   *        - Far Jumps in Protected Mode
-   *  - Volume 3 System Programming Guide:
-   *    - 3.4.3 Segment Registers
-   */
-  ljmp $CPU_GDT_SEL_CODE, $1f
-
-1:
-  ret
-
-.global cpu_load_idt
-cpu_load_idt:
-  mov 4(%esp), %eax             /* eax = &desc */
-  lidt (%eax)                   /* lidt(*eax) */
-  ret
-
-/*
- * See struct cpu_intr_frame in cpu.c.
- */
-.macro CPU_INTR_STORE_REGISTERS
-  push %edi
-  push %esi
-  push %ebp
-  push %edx
-  push %ecx
-  push %ebx
-  push %eax
+.macro CPU_EXC_STORE_REGISTERS
+  stmfd %r0!, {%r4-%r11}
 .endm
 
-.macro CPU_INTR_LOAD_REGISTERS
-  pop %eax
-  pop %ebx
-  pop %ecx
-  pop %edx
-  pop %ebp
-  pop %esi
-  pop %edi
+.macro CPU_EXC_LOAD_REGISTERS
+  ldmfd %r0!, {%r4-%r11}
 .endm
 
-/*
- * Some interrupts push an error code, and some don't.
- * Have a single interrupt frame layout by pushing a dummy error code.
- */
-#define CPU_INTR(vector, name)          \
-.global name;                           \
-name:                                   \
-  pushl $0;                             \
-  pushl $(vector);                      \
-  jmp cpu_intr_common
-
-#define CPU_INTR_ERROR(vector, name)    \
-.global name;                           \
-name:                                   \
-  pushl $(vector);                      \
-  jmp cpu_intr_common
-
-/*
- * This is the first common low level entry point for all exceptions and
- * interrupts. When reached, the stack contains the registers automatically
- * pushed by the processor, an error code and the vector. It's important
- * to note that the stack pointer still points to the stack of the thread
- * running when the interrupt occurs. Actually, in this implementation,
- * the entire interrupt handler borrows the stack of the interrupted thread.
- *
- * This is dangerous because the stack must then be large enough for both
- * the largest call chain of the interrupted thread as well as the largest
- * call chain of any interrupt handler. On some implementations, especially
- * those with very demanding real-time constraints, interrupt handling may
- * nest to avoid waiting for the current interrupt to be serviced before
- * starting handling a higher priority one, leading to even larger stack
- * needs. This is why many operating systems dedicate a separate stack for
- * interrupt handling.
- */
-cpu_intr_common:
-  CPU_INTR_STORE_REGISTERS
-  push %esp                 /* push the address of the interrupt frame */
-  call cpu_intr_main        /* cpu_intr_main(frame) */
-  add $4, %esp              /* restore the stack pointer */
-  CPU_INTR_LOAD_REGISTERS
-  add $8, %esp              /* skip vector and error */
-  iret                      /* return from interrupt */
-
-CPU_INTR(CPU_IDT_VECT_DIV, cpu_isr_divide_error)
-CPU_INTR_ERROR(CPU_IDT_VECT_GP, cpu_isr_general_protection)
-
-/*
- * XXX There must be as many low level ISRs as there are possible IRQ vectors.
- *
- * See the i8259 module.
- */
-CPU_INTR(32, cpu_isr_32)
-CPU_INTR(33, cpu_isr_33)
-CPU_INTR(34, cpu_isr_34)
-CPU_INTR(35, cpu_isr_35)
-CPU_INTR(36, cpu_isr_36)
-CPU_INTR(37, cpu_isr_37)
-CPU_INTR(38, cpu_isr_38)
-CPU_INTR(39, cpu_isr_39)
-CPU_INTR(40, cpu_isr_40)
-CPU_INTR(41, cpu_isr_41)
-CPU_INTR(42, cpu_isr_42)
-CPU_INTR(43, cpu_isr_43)
-CPU_INTR(44, cpu_isr_44)
-CPU_INTR(45, cpu_isr_45)
-CPU_INTR(46, cpu_isr_46)
-CPU_INTR(47, cpu_isr_47)
+ASM_FUNC(cpu_exc_svcall)
+  stmfd %r13!, {%r14}           /* save R14 */
+  bl thread_yield_from_svcall   /* thread_yield_from_svcall() */
+  CPU_EXC_LOAD_REGISTERS
+  msr psp, %r0                  /* reload stack in case of context switch */
+  ldmfd %r13!, {%r14}           /* restore R14 */
+  bx %r14                       /* return from exception */
+
+ASM_FUNC(cpu_exc_pendsv)
+  stmfd %r13!, {%r14}           /* save R14 */
+  mrs %r0, psp                  /* pass the new stack pointer as argument 0 */
+  CPU_EXC_STORE_REGISTERS
+  bl thread_yield_from_pendsv   /* thread_yield_from_pendsv(sp) */
+  CPU_EXC_LOAD_REGISTERS
+  msr psp, %r0                  /* reload stack in case of context switch */
+  ldmfd %r13!, {%r14}           /* restore R14 */
+  bx %r14                       /* return from exception */
diff --git a/src/i8254.c b/src/i8254.c
deleted file mode 100644
index fc4f48d..0000000
--- a/src/i8254.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Copyright (c) 2017 Richard Braun.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <lib/macros.h>
-
-#include "cpu.h"
-#include "i8254.h"
-#include "io.h"
-#include "thread.h"
-
-#define I8254_FREQ                  1193182
-
-#define I8254_PORT_CHANNEL0         0x40
-#define I8254_PORT_MODE             0x43
-
-#define I8254_CONTROL_BINARY        0x00
-#define I8254_CONTROL_RATE_GEN      0x04
-#define I8254_CONTROL_RW_LSB        0x10
-#define I8254_CONTROL_RW_MSB        0x20
-#define I8254_CONTROL_COUNTER0      0x00
-
-#define I8254_INITIAL_COUNT         DIV_CEIL(I8254_FREQ, THREAD_SCHED_FREQ)
-
-#define I8254_IRQ                   0
-
-static void
-i8254_irq_handler(void *arg)
-{
-    (void)arg;
-    thread_report_tick();
-}
-
-void
-i8254_setup(void)
-{
-    uint16_t value;
-
-    /*
-     * Program the timer to raise an interrupt at the scheduling frequency.
-     */
-
-    io_write(I8254_PORT_MODE, I8254_CONTROL_COUNTER0
-                              | I8254_CONTROL_RW_MSB
-                              | I8254_CONTROL_RW_LSB
-                              | I8254_CONTROL_RATE_GEN
-                              | I8254_CONTROL_BINARY);
-
-    value = I8254_INITIAL_COUNT;
-    io_write(I8254_PORT_CHANNEL0, value & 0xff);
-    io_write(I8254_PORT_CHANNEL0, value >> 8);
-
-    cpu_irq_register(I8254_IRQ, i8254_irq_handler, NULL);
-}
diff --git a/src/i8259.c b/src/i8259.c
deleted file mode 100644
index aed7068..0000000
--- a/src/i8259.c
+++ /dev/null
@@ -1,257 +0,0 @@
-/*
- * Copyright (c) 2017 Richard Braun.
- * Copyright (c) 2017 Jerko Lenstra.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- *
- *
- * IRQ means Interrupt ReQuest. They're used by external hardware to signal
- * the CPU, and in turn the OS, that an external event has happened and
- * requires processing. The usual model is shown in the image at
- * https://upload.wikimedia.org/wikipedia/commons/thumb/1/15/PIC_Hardware_interrupt_path.svg/300px-PIC_Hardware_interrupt_path.svg.png.
- *
- * This driver implements IRQ handling on the Intel 8259 PIC. The IBM PC/AT
- * actually uses 2 of these PICs for external interrupt handling, as shown
- * in https://masherz.files.wordpress.com/2010/08/217.jpg. The public
- * interface completely hides this detail and considers all given IRQs
- * as logical indexes, used to find the corresponding PIC (master or slave)
- * and the local IRQ on that PIC.
- *
- * 8259 datasheet :
- *   https://pdos.csail.mit.edu/6.828/2010/readings/hardware/8259A.pdf
- */
-
-#include <assert.h>
-#include <errno.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <stdint.h>
-
-#include <lib/macros.h>
-
-#include "cpu.h"
-#include "i8259.h"
-#include "io.h"
-
-#define I8259_IRQ_CASCADE   2        /* IRQ used for cascading on the master */
-#define I8259_NR_IRQS       8
-
-/*
- * Initialization Control Word 1 bits.
- */
-#define I8259_ICW1_ICW4     0x01    /* State that a 4th ICW will be sent */
-#define I8259_ICW1_INIT     0x10    /* This bit must be set */
-
-/*
- * Initialization Control Word 4 bits.
- */
-#define I8259_ICW4_8086     0x01    /* 8086 mode, as x86 is still compatible
-                                       with the old 8086 processor */
-
-#define I8259_OCW2_EOI      0x20    /* End of interrupt control word */
-
-enum {
-    I8259_PIC_ID_MASTER,
-    I8259_PIC_ID_SLAVE,
-    I8259_NR_PICS
-};
-
-/*
- * Intel 8259 programmable interrupt controller.
- */
-struct i8259_pic {
-    uint16_t cmd_port;      /* Command I/O port of the PIC */
-    uint16_t data_port;     /* Data I/O port of the PIC */
-    uint8_t imr;            /* Cached value of the IMR register */
-    bool master;            /* True if this PIC is the master */
-};
-
-/*
- * Static instances of PIC objects.
- */
-static struct i8259_pic i8259_pics[] = {
-    [I8259_PIC_ID_MASTER] = {
-        .cmd_port = 0x20,
-        .data_port = 0x21,
-        .imr = 0xff,
-        .master = true,
-    },
-    [I8259_PIC_ID_SLAVE] = {
-        .cmd_port = 0xa0,
-        .data_port = 0xa1,
-        .imr = 0xff,
-        .master = false,
-    },
-};
-
-static struct i8259_pic *
-i8259_get_pic(unsigned int id)
-{
-    assert(id < ARRAY_SIZE(i8259_pics));
-    return &i8259_pics[id];
-}
-
-static int
-i8259_convert_global_irq(unsigned int irq, struct i8259_pic **pic,
-                         unsigned int *local_irq)
-{
-    int error;
-
-    if (irq < I8259_NR_IRQS) {
-        *pic = i8259_get_pic(I8259_PIC_ID_MASTER);
-
-        if (local_irq) {
-            *local_irq = irq;
-        }
-
-        error = 0;
-    } else if (irq < (I8259_NR_IRQS * I8259_NR_PICS)) {
-        *pic = i8259_get_pic(I8259_PIC_ID_SLAVE);
-
-        if (local_irq) {
-            *local_irq = irq - I8259_NR_IRQS;
-        }
-
-        error = 0;
-    } else {
-        *local_irq = 0;
-        error = EINVAL;
-    }
-
-    return error;
-}
-
-static void
-i8259_pic_write_cmd(const struct i8259_pic *pic, uint8_t byte)
-{
-    io_write(pic->cmd_port, byte);
-}
-
-static void
-i8259_pic_write_data(const struct i8259_pic *pic, uint8_t byte)
-{
-    io_write(pic->data_port, byte);
-}
-
-static void
-i8259_pic_apply_imr(const struct i8259_pic *pic)
-{
-    io_write(pic->data_port, pic->imr);
-}
-
-static void
-i8259_pic_enable_irq(struct i8259_pic *pic, unsigned int irq)
-{
-    assert(irq < I8259_NR_IRQS);
-
-    pic->imr &= ~(1 << irq);
-    i8259_pic_apply_imr(pic);
-}
-
-static void
-i8259_pic_disable_irq(struct i8259_pic *pic, unsigned int irq)
-{
-    assert(irq < I8259_NR_IRQS);
-
-    pic->imr |= (1 << irq);
-    i8259_pic_apply_imr(pic);
-}
-
-static void
-i8259_pic_eoi(struct i8259_pic *pic)
-{
-    io_write(pic->cmd_port, I8259_OCW2_EOI);
-}
-
-void
-i8259_setup(void)
-{
-    struct i8259_pic *master, *slave;
-
-    master = i8259_get_pic(I8259_PIC_ID_MASTER);
-    slave = i8259_get_pic(I8259_PIC_ID_SLAVE);
-
-    i8259_pic_write_cmd(master, I8259_ICW1_INIT | I8259_ICW1_ICW4);
-    i8259_pic_write_cmd(slave, I8259_ICW1_INIT | I8259_ICW1_ICW4);
-    i8259_pic_write_data(master, CPU_IDT_VECT_IRQ_BASE);
-    i8259_pic_write_data(slave, CPU_IDT_VECT_IRQ_BASE + I8259_NR_IRQS);
-    i8259_pic_write_data(master, 1 << I8259_IRQ_CASCADE);
-    i8259_pic_write_data(slave, I8259_IRQ_CASCADE);
-    i8259_pic_write_data(master, I8259_ICW4_8086);
-    i8259_pic_write_data(slave, I8259_ICW4_8086);
-
-    i8259_pic_enable_irq(master, I8259_IRQ_CASCADE);
-    i8259_pic_apply_imr(master);
-    i8259_pic_apply_imr(slave);
-}
-
-void
-i8259_irq_enable(unsigned int irq)
-{
-    struct i8259_pic *pic;
-    unsigned int local_irq;
-    int error;
-
-    error = i8259_convert_global_irq(irq, &pic, &local_irq);
-    assert(!error);
-    i8259_pic_enable_irq(pic, local_irq);
-}
-
-void
-i8259_irq_disable(unsigned int irq)
-{
-    struct i8259_pic *pic;
-    unsigned int local_irq;
-    int error;
-
-    error = i8259_convert_global_irq(irq, &pic, &local_irq);
-    assert(!error);
-    i8259_pic_disable_irq(pic, local_irq);
-}
-
-void
-i8259_irq_eoi(unsigned int irq)
-{
-    struct i8259_pic *pic;
-    int error;
-
-    assert(!cpu_intr_enabled());
-
-    error = i8259_convert_global_irq(irq, &pic, NULL);
-    assert(!error);
-
-    if (!pic->master) {
-        /*
-         * The order in which EOI messages are sent (master then slave or the
-         * reverse) is irrelevant :
-         *  - If the slave is sent the EOI message first, it may raise another
-         *    interrupt right away, in which case it will be pending at the
-         *    master until the latter is sent the EOI message too.
-         *  - If the master is sent the EOI message first, it may raise another
-         *    interrupt right away, in which case it will be pending at the
-         *    processor until interrupts are reenabled, assuming that this
-         *    function is called with interrupts disabled, and that interrupts
-         *    remain disabled until control is returned to the interrupted
-         *    thread.
-         */
-        i8259_pic_eoi(i8259_get_pic(I8259_PIC_ID_MASTER));
-    }
-
-    i8259_pic_eoi(pic);
-}
diff --git a/src/kernel.lds b/src/kernel.lds
index f4f209a..3872a87 100644
--- a/src/kernel.lds
+++ b/src/kernel.lds
@@ -29,7 +29,8 @@ ENTRY(boot_start)
  */
 MEMORY
 {
-    RAM : ORIGIN = 1M, LENGTH = 63M
+    FLASH : ORIGIN = 0x0,        LENGTH = 0x100000
+    RAM   : ORIGIN = 0x20000000, LENGTH = 0x20000
 }
 
 /*
@@ -50,7 +51,7 @@ MEMORY
  */
 PHDRS
 {
-    hdr     PT_LOAD FLAGS(4);
+    vectors PT_LOAD FLAGS(5);
     text    PT_LOAD FLAGS(5);
     data    PT_LOAD FLAGS(6);
 }
@@ -84,22 +85,28 @@ PHDRS
  */
 SECTIONS
 {
-    .hdr : {
-        *(.hdr)
-    } > RAM : hdr
+    .vectors : {
+        *(.vectors)
+    } > FLASH : vectors
 
     .text : {
         *(.text*)
-    } > RAM : text
+        *(.rodata*)
+    } > FLASH : text
+
+    _lma_data_addr = .;
 
     .data : {
-        *(.rodata*)
+        _data_start = .;
         *(.data*)
-    } > RAM : data
+        _data_end = .;
+    } > RAM AT > FLASH : data
 
     .bss : {
+        _bss_start = .;
         *(.bss)
-    } > RAM : data
+        _bss_end = .;
+    } > RAM AT > FLASH : data
 
     /*
      * The .eh_frame section is used by DWARF tools to unwind the stack,
@@ -109,5 +116,6 @@ SECTIONS
      */
     /DISCARD/ : {
         *(.eh_frame)
+        *(.ARM*)
     }
 }
diff --git a/src/main.c b/src/main.c
index 839e273..aa64234 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,29 +20,22 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <stdbool.h>
 #include <stdio.h>
 
 #include <lib/macros.h>
 #include <lib/shell.h>
 
 #include "cpu.h"
-#include "i8254.h"
-#include "i8259.h"
+#include "main.h"
 #include "mem.h"
 #include "panic.h"
 #include "sw.h"
+#include "systick.h"
 #include "thread.h"
 #include "timer.h"
 #include "uart.h"
 
 /*
- * XXX The Clang compiler apparently doesn't like the lack of prototype for
- * the main function in free standing mode.
- */
-void main(void);
-
-/*
  * This function is the main entry point for C code. It's called from
  * assembly code in the boot module, very soon after control is passed
  * to the kernel.
@@ -52,9 +45,8 @@ main(void)
 {
     thread_bootstrap();
     cpu_setup();
-    i8259_setup();
-    i8254_setup();
     uart_setup();
+    systick_setup();
     mem_setup();
     thread_setup();
     timer_setup();
diff --git a/src/i8254.h b/src/main.h
index c70ded9..9a98d0d 100644
--- a/src/i8254.h
+++ b/src/main.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2018 Richard Braun.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -18,17 +18,11 @@
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
- *
- *
- * Intel 8254 programmable interval timer (PIT) driver.
  */
 
-#ifndef _I8254_H
-#define _I8254_H
+#ifndef _MAIN_H
+#define _MAIN_H
 
-/*
- * Initialize the i8254 module.
- */
-void i8254_setup(void);
+void main(void);
 
-#endif /* _I8254_H */
+#endif /* _MAIN_H */
diff --git a/src/mem.c b/src/mem.c
index 572f2be..185da4f 100644
--- a/src/mem.c
+++ b/src/mem.c
@@ -144,7 +144,7 @@
  * the heap data, or any other statically allocated uninitialized data, in
  * the kernel image file.
  */
-#define MEM_HEAP_SIZE       (32 * 1024 * 1024)
+#define MEM_HEAP_SIZE       (64 * 1024)
 
 /*
  * Alignment required on addresses returned by mem_alloc().
diff --git a/src/thread_asm.S b/src/nvic.c
index 0327fce..865abf3 100644
--- a/src/thread_asm.S
+++ b/src/nvic.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2018 Richard Braun.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,32 +20,57 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-.section .text
-.code32
+#include <stdint.h>
 
-.global thread_load_context
-thread_load_context:
-  mov 4(%esp), %eax             /* Load the thread address */
-  mov (%eax), %esp              /* Switch to the thread stack */
-  jmp thread_restore_context
+#include "cpu.h"
+#include "nvic.h"
 
-.global thread_switch_context
-thread_switch_context:
-  mov 4(%esp), %eax             /* Get prev thread address */
-  mov 8(%esp), %ecx             /* Get next thread address */
+#define NVIC_BASE_ADDR 0xe000e100
 
-  push %ebp                     /* Save registers owned by the caller */
-  push %ebx
-  push %edi
-  push %esi
+struct nvic_regs {
+    uint32_t iser[16];
+    uint32_t icer[16];
+    uint32_t ispr[16];
+    uint32_t icpr[16];
+    uint32_t iabr[16];
+    uint32_t reserved[47];
+    uint32_t ipr[124];
+};
 
-  mov %esp, (%eax)              /* Save prev thread stack pointer */
-  mov (%ecx), %esp              /* Switch to the stack of the next thread */
+static volatile struct nvic_regs *nvic_regs = (void *)NVIC_BASE_ADDR;
 
-thread_restore_context:
-  pop %esi
-  pop %edi
-  pop %ebx
-  pop %ebp
+static void
+nvic_get_dest(unsigned int irq, volatile uint32_t *array,
+              volatile uint32_t **reg, uint32_t *mask)
+{
+    *reg = &array[irq / 32];
+    *mask = (1 << (irq % 32));
+}
 
-  ret
+void
+nvic_irq_enable(unsigned int irq)
+{
+    volatile uint32_t *reg;
+    uint32_t mask;
+    uint32_t primask;
+
+    nvic_get_dest(irq, nvic_regs->iser, &reg, &mask);
+
+    primask = cpu_intr_save();
+    *reg |= mask;
+    cpu_intr_restore(primask);
+}
+
+void
+nvic_irq_disable(unsigned int irq)
+{
+    volatile uint32_t *reg;
+    uint32_t mask;
+    uint32_t primask;
+
+    nvic_get_dest(irq, nvic_regs->icer, &reg, &mask);
+
+    primask = cpu_intr_save();
+    *reg |= mask;
+    cpu_intr_restore(primask);
+}
diff --git a/src/io_asm.S b/src/nvic.h
index adf0c01..48b4f6a 100644
--- a/src/io_asm.S
+++ b/src/nvic.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2018 Richard Braun.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,18 +20,11 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-.section .text
-.code32
+#ifndef _NVIC_H
+#define _NVIC_H
 
-.global io_read
-io_read:
- mov 4(%esp), %edx              /* edx = port */
- in %dx, %al                    /* al = in(dx) */
- ret
+void nvic_irq_enable(unsigned int irq);
 
-.global io_write
-io_write:
- mov 8(%esp), %eax              /* eax = byte */
- mov 4(%esp), %edx              /* edx = port */
- out %al, %dx                   /* out(al, dx) */
- ret
+void nvic_irq_disable(unsigned int irq);
+
+#endif /* _NVIC_H */
diff --git a/src/stdio.c b/src/stdio.c
index 6aee7b3..2591971 100644
--- a/src/stdio.c
+++ b/src/stdio.c
@@ -24,7 +24,6 @@
 #include <stdint.h>
 #include <stdio.h>
 
-#include "cpu.h"
 #include "thread.h"
 #include "uart.h"
 
@@ -68,11 +67,10 @@ printf(const char *format, ...)
 int
 vprintf(const char *format, va_list ap)
 {
-    uint32_t eflags;
+    uint32_t primask;
     int length;
 
-    thread_preempt_disable();
-    eflags = cpu_intr_save();
+    primask = thread_preempt_disable_intr_save();
 
     length = vsnprintf(printf_buffer, sizeof(printf_buffer), format, ap);
 
@@ -80,8 +78,7 @@ vprintf(const char *format, va_list ap)
         uart_write((uint8_t)*ptr);
     }
 
-    cpu_intr_restore(eflags);
-    thread_preempt_enable();
+    thread_preempt_enable_intr_restore(primask);
 
     return length;
 }
diff --git a/src/systick.c b/src/systick.c
new file mode 100644
index 0000000..1307d98
--- /dev/null
+++ b/src/systick.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2018 Richard Braun.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include "panic.h"
+#include "systick.h"
+#include "thread.h"
+
+#define SYSTICK_BASE_ADDR 0xe000e010
+
+#define SYSTICK_CSR_ENABLE  0x1
+#define SYSTICK_CSR_TICKINT 0x2
+
+#define SYSTICK_CALIB_NOREF         0x80000000
+#define SYSTICK_CALIB_SKEW          0x40000000
+#define SYSTICK_CALIB_TENMS_MASK    0x00ffffff
+
+struct systick_regs {
+    uint32_t csr;
+    uint32_t rvr;
+    uint32_t cvr;
+    uint32_t calib;
+};
+
+static volatile struct systick_regs *systick_regs = (void *)SYSTICK_BASE_ADDR;
+
+static void
+systick_check_calib(void)
+{
+    uint32_t calib;
+
+    calib = systick_regs->calib;
+
+    if ((calib & SYSTICK_CALIB_NOREF)
+        || (calib & SYSTICK_CALIB_SKEW)
+        || (calib & SYSTICK_CALIB_TENMS_MASK) == 0) {
+        panic("systick: unusable");
+    }
+}
+
+void
+systick_setup(void)
+{
+    uint32_t tenms, counter;
+
+    systick_check_calib();
+
+    tenms = systick_regs->calib & SYSTICK_CALIB_TENMS_MASK;
+    counter = (tenms * 100) / THREAD_SCHED_FREQ;
+    systick_regs->rvr = counter;
+    systick_regs->cvr = 0;
+    systick_regs->csr = (SYSTICK_CSR_TICKINT | SYSTICK_CSR_ENABLE);
+}
diff --git a/src/i8259.h b/src/systick.h
index 102d77b..70b3dcc 100644
--- a/src/i8259.h
+++ b/src/systick.h
@@ -1,6 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
- * Copyright (c) 2017 Jerko Lenstra.
+ * Copyright (c) 2018 Richard Braun.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -19,39 +18,11 @@
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
- *
- *
- * Intel 8259 programmable interrupt controller (PIC) driver.
  */
 
-#ifndef _I8259_H
-#define _I8259_H
-
-/*
- * Range of vectors used for IRQ handling, 8 per PIC.
- */
-#define I8259_NR_IRQ_VECTORS 16
+#ifndef _SYSTICK_H
+#define _SYSTICK_H
 
-/*
- * Initialize the i8259 module.
- */
-void i8259_setup(void);
-
-/*
- * Enable an IRQ line on the PIC.
- */
-void i8259_irq_enable(unsigned int irq);
-
-/*
- * Disable an IRQ line on the PIC.
- */
-void i8259_irq_disable(unsigned int irq);
-
-/*
- * Report an end of interrupt.
- *
- * This function must be called with interrupts disabled.
- */
-void i8259_irq_eoi(unsigned int irq);
+void systick_setup(void);
 
-#endif /* _I8259_H */
+#endif /* _SYSTICK_H */
diff --git a/src/thread.c b/src/thread.c
index 30f5541..6687191 100644
--- a/src/thread.c
+++ b/src/thread.c
@@ -35,24 +35,6 @@
 #include "cpu.h"
 #include "panic.h"
 #include "thread.h"
-#include "timer.h"
-
-/*
- * The compiler expects the stack pointer to be properly aligned when a
- * function is called, and maintains this alignment across a call chain.
- * The constraints are similar to the return value of malloc().
- * See the description of mem_alloc() in mem.h.
- *
- * Note that modern compilers expect the stack to be 16-byte aligned
- * even on 32-bits i386 processors, to cope with SSE instructions which
- * don't support unaligned accesses (see a revised version of the System V
- * Intel386 ABI [1] for more details). Since all floating point support is
- * disabled when building the kernel, this requirement can be safely ignored
- * and the legacy 4-byte alignment used instead.
- *
- * [1] https://www.uclibc.org/docs/psABI-i386.pdf
- */
-#define THREAD_STACK_ALIGN 4
 
 /*
  * List of threads sharing the same priority.
@@ -146,15 +128,6 @@ static struct thread_runq thread_runq;
 static struct thread thread_dummy;
 
 /*
- * Declarations for C/assembly functions that are global so that they can
- * be shared between thread.c and thread_asm.S, but are considered private to
- * the thread module.
- */
-void thread_load_context(struct thread *thread) __attribute__((noreturn));
-void thread_switch_context(struct thread *prev, struct thread *next);
-void thread_main(thread_fn_t fn, void *arg);
-
-/*
  * Function implementing the idle thread.
  */
 static void
@@ -167,12 +140,6 @@ thread_idle(void *arg)
     }
 }
 
-static bool
-thread_scheduler_locked(void)
-{
-    return !cpu_intr_enabled() && !thread_preempt_enabled();
-}
-
 static void
 thread_list_init(struct thread_list *list)
 {
@@ -186,7 +153,13 @@ thread_list_remove(struct thread *thread)
 }
 
 static void
-thread_list_enqueue(struct thread_list *list, struct thread *thread)
+thread_list_enqueue_head(struct thread_list *list, struct thread *thread)
+{
+    list_insert_head(&list->threads, &thread->node);
+}
+
+static void
+thread_list_enqueue_tail(struct thread_list *list, struct thread *thread)
 {
     list_insert_tail(&list->threads, &thread->node);
 }
@@ -202,12 +175,30 @@ thread_list_dequeue(struct thread_list *list)
 }
 
 static bool
-thread_list_empty(struct thread_list *list)
+thread_list_empty(const struct thread_list *list)
 {
     return list_empty(&list->threads);
 }
 
 static bool
+thread_list_singular(const struct thread_list *list)
+{
+    return list_singular(&list->threads);
+}
+
+static void *
+thread_get_stack_pointer(const struct thread *thread)
+{
+    return thread->sp;
+}
+
+static void
+thread_set_stack_pointer(struct thread *thread, void *sp)
+{
+    thread->sp = sp;
+}
+
+static bool
 thread_is_running(const struct thread *thread)
 {
     return thread->state == THREAD_STATE_RUNNING;
@@ -316,7 +307,7 @@ thread_runq_put_prev(struct thread_runq *runq, struct thread *thread)
     }
 
     list = thread_runq_get_list(runq, thread_get_priority(thread));
-    thread_list_enqueue(list, thread);
+    thread_list_enqueue_tail(list, thread);
 }
 
 static struct thread *
@@ -362,11 +353,12 @@ thread_runq_add(struct thread_runq *runq, struct thread *thread)
 {
     struct thread_list *list;
 
-    assert(thread_scheduler_locked());
+    assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
     assert(thread_is_running(thread));
 
     list = thread_runq_get_list(runq, thread_get_priority(thread));
-    thread_list_enqueue(list, thread);
+    thread_list_enqueue_head(list, thread);
 
     runq->nr_threads++;
     assert(runq->nr_threads != 0);
@@ -386,50 +378,53 @@ thread_runq_remove(struct thread_runq *runq, struct thread *thread)
     thread_list_remove(thread);
 }
 
+static void *
+thread_runq_schedule_from_pendsv(struct thread_runq *runq)
+{
+    struct thread *thread;
+
+    thread = thread_runq_get_current(runq);
+
+    assert(!cpu_intr_enabled());
+    assert(runq->preempt_level == 1);
+
+    thread_runq_put_prev(runq, thread);
+
+    if (!thread_is_running(thread)) {
+        thread_runq_remove(runq, thread);
+    }
+
+    return thread_runq_get_next(runq);
+}
+
 static void
 thread_runq_schedule(struct thread_runq *runq)
 {
-    struct thread *prev, *next;
+    assert(!cpu_intr_enabled());
+    assert(runq->preempt_level == 1);
 
-    prev = thread_runq_get_current(runq);
+    thread_runq_clear_yield(runq);
+}
 
-    assert(thread_scheduler_locked());
-    assert(runq->preempt_level == 1);
+static void
+thread_runq_tick(struct thread_runq *runq)
+{
+    struct thread_list *list;
 
-    thread_runq_put_prev(runq, prev);
+    assert(!cpu_intr_enabled());
+    assert(!thread_preempt_enabled());
 
-    if (!thread_is_running(prev)) {
-        thread_runq_remove(runq, prev);
+    if (runq->current == runq->idle) {
+        return;
     }
 
-    next = thread_runq_get_next(runq);
+    list = thread_runq_get_list(runq, runq->current->priority);
 
-    if (prev != next) {
-        /*
-         * When switching context, it is extremely important that no
-         * data access generated by the compiler "leak" across the switch.
-         * All operations (i.e. side effects) started before the switch
-         * should complete before the switch, and all operations starting
-         * after the switch should start after the switch.
-         *
-         * This is what allows a thread waiting for an event to reliably
-         * "see" that event after another thread or interrupt handler has
-         * triggered it.
-         *
-         * This requires a barrier, and, since this is a single-processor
-         * scheduler, a compiler barrier (as opposed to memory barriers)
-         * is enough. But there is no such barrier here. The reason is that
-         * the context switch is implemented in assembly, and the compiler
-         * is unable to understand what the assembly code does. As a result,
-         * even with aggressive optimizations enabled, the compiler has to
-         * assume that memory may have changed in completely unexpected ways,
-         * which is equivalent to the inline assembly expression used to
-         * implement compiler barriers with GCC (see barrier() in macros.h).
-         *
-         * See thread_preempt_disable() for a description of compiler barriers.
-         */
-        thread_switch_context(prev, next);
+    if (thread_list_singular(list)) {
+        return;
     }
+
+    thread_runq_set_yield(&thread_runq);
 }
 
 static void
@@ -440,6 +435,12 @@ thread_yield_if_needed(void)
     }
 }
 
+static unsigned int
+thread_preempt_level(void)
+{
+    return thread_runq_get_preempt_level(&thread_runq);
+}
+
 void
 thread_preempt_disable(void)
 {
@@ -506,20 +507,14 @@ thread_preempt_enable(void)
     thread_yield_if_needed();
 }
 
-static unsigned int
-thread_preempt_level(void)
-{
-    return thread_runq_get_preempt_level(&thread_runq);
-}
-
 bool
 thread_preempt_enabled(void)
 {
     return thread_preempt_level() == 0;
 }
 
-static uint32_t
-thread_lock_scheduler(void)
+uint32_t
+thread_preempt_disable_intr_save(void)
 {
     /*
      * When disabling both preemption and interrupts, it is best to do it in
@@ -534,28 +529,37 @@ thread_lock_scheduler(void)
 }
 
 static void
-thread_unlock_scheduler(uint32_t eflags, bool yield)
+thread_preempt_enable_no_yield_intr_restore(uint32_t primask)
 {
-    cpu_intr_restore(eflags);
+    cpu_intr_restore(primask);
+    thread_preempt_enable_no_yield();
+}
 
-    if (yield) {
-        thread_preempt_enable();
-    } else {
-        thread_preempt_enable_no_yield();
-    }
+void
+thread_preempt_enable_intr_restore(uint32_t primask)
+{
+    /*
+     * A PendSV exception may only be raised if the preemption level goes
+     * back to 0, making it safe to reenable interrupts before.
+     */
+    cpu_intr_restore(primask);
+    thread_preempt_enable();
 }
 
 void
 thread_enable_scheduler(void)
 {
-    struct thread *thread;
-
+    assert(!cpu_intr_enabled());
     assert(thread_preempt_level() == 1);
 
-    thread = thread_runq_get_next(&thread_runq);
-    thread_load_context(thread);
+    thread_runq_get_next(&thread_runq);
+
+    cpu_intr_enable();
+
+    /* Load the first thread through an SVCall exception */
+    cpu_raise_svcall();
 
-    /* Never reached */
+    panic("thread: error: unable to load first thread");
 }
 
 void
@@ -563,11 +567,8 @@ thread_main(thread_fn_t fn, void *arg)
 {
     assert(fn);
 
-    assert(!cpu_intr_enabled());
-    assert(thread_preempt_level() == 1);
-
-    cpu_intr_enable();
-    thread_preempt_enable();
+    assert(cpu_intr_enabled());
+    assert(thread_preempt_enabled());
 
     fn(arg);
 
@@ -587,83 +588,17 @@ thread_set_name(struct thread *thread, const char *name)
 }
 
 static void
-thread_stack_push(uint32_t **stackp, size_t *stack_sizep, uint32_t word)
-{
-    uint32_t *stack;
-    size_t stack_size;
-
-    stack = *stackp;
-    stack_size = *stack_sizep;
-    assert(stack_size >= sizeof(word));
-    stack--;
-    stack_size -= sizeof(word);
-    *stack = word;
-    *stackp = stack;
-    *stack_sizep = stack_size;
-}
-
-static void *
-thread_stack_forge(char *stack_addr, size_t stack_size,
-                   thread_fn_t fn, void *arg)
-{
-    uint32_t *stack;
-
-    stack = (uint32_t *)(stack_addr + stack_size);
-
-    /*
-     * This part of the stack makes context restoration "return" to
-     * thread_main() as if it were called from address 0 (which stops
-     * backtracing when using a debugger).
-     *
-     * This is how an assembly call to thread_main() looks like, according
-     * to the ABI (System V Intel 386 ABI [1]) :
-     *  push arg
-     *  push fn
-     *  call thread_main
-     *
-     * Remember that the call instruction pushes the return address on the
-     * stack.
-     *
-     * [1] http://www.sco.com/developers/devspecs/abi386-4.pdf
-     */
-    thread_stack_push(&stack, &stack_size, (uint32_t)arg); /* 2nd argument */
-    thread_stack_push(&stack, &stack_size, (uint32_t)fn);  /* 1st argument */
-    thread_stack_push(&stack, &stack_size, (uint32_t)0);   /* Return address */
-    thread_stack_push(&stack, &stack_size, (uint32_t)thread_main);
-
-    /*
-     * This part of the stack contains the registers that should be restored.
-     * The selection of the registers to save is made according to the
-     * ABI, which specifies which registers are owned by the caller, and
-     * which are owned by the callee. Since, in all cases, switching context
-     * is achieved by calling the thread_switch_context() function, it
-     * is safe to rely on the ABI for this selection. Naturally, the
-     * registers that must be saved are those owned by the caller, since
-     * the compiler assumes all registers owned by the callee may have
-     * changed on return. See the System V Intel386 ABI "Registers and the
-     * Stack Frame".
-     *
-     * For debugging purposes, a complete save of all the registers may be
-     * performed instead, allowing precise inspection of the state of a
-     * thread not currently running on the processor.
-     *
-     * It is recommended to read the assembly code at the thread_restore_context
-     * label in thread_asm.S to better understand this stack frame.
-     */
-    thread_stack_push(&stack, &stack_size, 0);              /* EBP */
-    thread_stack_push(&stack, &stack_size, 0);              /* EBX */
-    thread_stack_push(&stack, &stack_size, 0);              /* EDI */
-    thread_stack_push(&stack, &stack_size, 0);              /* ESI */
-
-    return stack;
-}
-
-static void
 thread_init(struct thread *thread, thread_fn_t fn, void *arg,
             const char *name, char *stack, size_t stack_size,
             unsigned int priority)
 {
-    assert(P2ALIGNED((uintptr_t)stack, THREAD_STACK_ALIGN));
+    if (!P2ALIGNED((uint32_t)stack, CPU_STACK_ALIGN)) {
+        char *aligned_stack;
+
+        aligned_stack = (char *)(P2ALIGN((uintptr_t)stack, CPU_STACK_ALIGN));
+        stack_size -= (stack - aligned_stack);
+        stack = aligned_stack;
+    }
 
     /*
      * New threads are created in a state that is similar to preempted threads,
@@ -680,7 +615,7 @@ thread_init(struct thread *thread, thread_fn_t fn, void *arg,
      */
 
     if (stack) {
-        thread->sp = thread_stack_forge(stack, stack_size, fn, arg);
+        thread->sp = cpu_stack_forge(stack, stack_size, fn, arg);
     }
 
     thread->state = THREAD_STATE_RUNNING;
@@ -695,7 +630,7 @@ thread_create(struct thread **threadp, thread_fn_t fn, void *arg,
               const char *name, size_t stack_size, unsigned int priority)
 {
     struct thread *thread;
-    uint32_t eflags;
+    uint32_t primask;
     void *stack;
 
     assert(fn);
@@ -706,8 +641,8 @@ thread_create(struct thread **threadp, thread_fn_t fn, void *arg,
         return ENOMEM;
     }
 
-    if (stack_size < THREAD_STACK_MIN_SIZE) {
-        stack_size = THREAD_STACK_MIN_SIZE;
+    if (stack_size < THREAD_MIN_STACK_SIZE) {
+        stack_size = THREAD_MIN_STACK_SIZE;
     }
 
     stack = malloc(stack_size);
@@ -719,9 +654,9 @@ thread_create(struct thread **threadp, thread_fn_t fn, void *arg,
 
     thread_init(thread, fn, arg, name, stack, stack_size, priority);
 
-    eflags = thread_lock_scheduler();
+    primask = thread_preempt_disable_intr_save();
     thread_runq_add(&thread_runq, thread);
-    thread_unlock_scheduler(eflags, true);
+    thread_preempt_enable_intr_restore(primask);
 
     if (threadp) {
         *threadp = thread;
@@ -743,26 +678,32 @@ void
 thread_exit(void)
 {
     struct thread *thread;
+    uint32_t primask;
 
     thread = thread_self();
 
     assert(thread_preempt_enabled());
 
-    thread_lock_scheduler();
+    primask = thread_preempt_disable_intr_save();
+
     assert(thread_is_running(thread));
     thread_set_dead(thread);
     thread_wakeup(thread->joiner);
     thread_runq_schedule(&thread_runq);
 
+    thread_preempt_enable_intr_restore(primask);
+
+    cpu_raise_pendsv();
+
     panic("thread: error: dead thread walking");
 }
 
 void
 thread_join(struct thread *thread)
 {
-    uint32_t eflags;
+    uint32_t primask;
 
-    eflags = thread_lock_scheduler();
+    primask = thread_preempt_disable_intr_save();
 
     thread->joiner = thread_self();
 
@@ -770,7 +711,7 @@ thread_join(struct thread *thread)
         thread_sleep();
     }
 
-    thread_unlock_scheduler(eflags, true);
+    thread_preempt_enable_intr_restore(primask);
 
     thread_destroy(thread);
 }
@@ -793,14 +734,14 @@ thread_create_idle(void)
         panic("thread: unable to allocate idle thread");
     }
 
-    stack = malloc(THREAD_STACK_MIN_SIZE);
+    stack = malloc(THREAD_MIN_STACK_SIZE);
 
     if (!stack) {
         panic("thread: unable to allocate idle thread stack");
     }
 
     thread_init(idle, thread_idle, NULL, "idle",
-                stack, THREAD_STACK_MIN_SIZE, THREAD_IDLE_PRIORITY);
+                stack, THREAD_MIN_STACK_SIZE, THREAD_IDLE_PRIORITY);
     return idle;
 }
 
@@ -844,44 +785,78 @@ thread_setup(void)
 void
 thread_yield(void)
 {
-    uint32_t eflags;
+    uint32_t primask;
 
     if (!thread_preempt_enabled()) {
         return;
     }
 
-    eflags = thread_lock_scheduler();
-    thread_runq_clear_yield(&thread_runq);
+    primask = thread_preempt_disable_intr_save();
     thread_runq_schedule(&thread_runq);
-    thread_unlock_scheduler(eflags, false);
+    thread_preempt_enable_no_yield_intr_restore(primask);
+
+    cpu_raise_pendsv();
+}
+
+void *
+thread_yield_from_svcall(void)
+{
+    thread_preempt_enable_no_yield();
+    return thread_get_stack_pointer(thread_self());
+}
+
+void *
+thread_yield_from_pendsv(void *prev_sp)
+{
+    struct thread *thread;
+    uint32_t primask;
+
+    primask = thread_preempt_disable_intr_save();
+
+    thread_set_stack_pointer(thread_self(), prev_sp);
+    thread = thread_runq_schedule_from_pendsv(&thread_runq);
+    thread_preempt_enable_intr_restore(primask);
+
+    return thread_get_stack_pointer(thread);
 }
 
 void
 thread_sleep(void)
 {
     struct thread *thread;
-    uint32_t eflags;
+    uint32_t primask;
 
     thread = thread_self();
 
-    eflags = cpu_intr_save();
+    primask = cpu_intr_save();
+
     assert(thread_is_running(thread));
     thread_set_sleeping(thread);
     thread_runq_schedule(&thread_runq);
+
+    thread_preempt_enable();
+    cpu_intr_enable();
+
+    cpu_raise_pendsv();
+
+    cpu_intr_disable();
+    thread_preempt_disable();
+
     assert(thread_is_running(thread));
-    cpu_intr_restore(eflags);
+
+    cpu_intr_restore(primask);
 }
 
 void
 thread_wakeup(struct thread *thread)
 {
-    uint32_t eflags;
+    uint32_t primask;
 
     if (!thread || (thread == thread_self())) {
         return;
     }
 
-    eflags = thread_lock_scheduler();
+    primask = thread_preempt_disable_intr_save();
 
     if (!thread_is_running(thread)) {
         assert(!thread_is_dead(thread));
@@ -889,14 +864,11 @@ thread_wakeup(struct thread *thread)
         thread_runq_add(&thread_runq, thread);
     }
 
-    thread_unlock_scheduler(eflags, true);
+    thread_preempt_enable_intr_restore(primask);
 }
 
 void
 thread_report_tick(void)
 {
-    assert(thread_scheduler_locked());
-
-    thread_runq_set_yield(&thread_runq);
-    timer_report_tick();
+    thread_runq_tick(&thread_runq);
 }
diff --git a/src/thread.h b/src/thread.h
index 78f87da..13c4810 100644
--- a/src/thread.h
+++ b/src/thread.h
@@ -85,6 +85,7 @@
 
 #include <stdbool.h>
 #include <stddef.h>
+#include <stdint.h>
 
 /*
  * The scheduling frequency is the rate at which the clock used for scheduling
@@ -105,7 +106,7 @@
 /*
  * Minimum size of thread stacks.
  */
-#define THREAD_STACK_MIN_SIZE 512
+#define THREAD_MIN_STACK_SIZE 512
 
 /*
  * Total number of thread priorities.
@@ -208,6 +209,9 @@ const char * thread_name(const struct thread *thread);
  */
 void thread_yield(void);
 
+void * thread_yield_from_svcall(void);
+void * thread_yield_from_pendsv(void *prev_sp);
+
 /*
  * Make the calling thread sleep until awaken.
  *
@@ -312,6 +316,12 @@ void thread_preempt_disable(void);
 bool thread_preempt_enabled(void);
 
 /*
+ * Preemption control functions which also disable interrupts.
+ */
+uint32_t thread_preempt_disable_intr_save(void);
+void thread_preempt_enable_intr_restore(uint32_t primask);
+
+/*
  * Report a tick.
  *
  * This function must be called from interrupt context.
@@ -319,6 +329,11 @@ bool thread_preempt_enabled(void);
 void thread_report_tick(void);
 
 /*
+ * Entry point for new threads.
+ */
+void thread_main(thread_fn_t fn, void *arg);
+
+/*
  * Enable the scheduler.
  */
 void thread_enable_scheduler(void) __attribute__((noreturn));
diff --git a/src/timer.c b/src/timer.c
index a9fc6f7..a4d2045 100644
--- a/src/timer.c
+++ b/src/timer.c
@@ -140,7 +140,7 @@ static void
 timer_process_list(unsigned long now)
 {
     struct timer *timer;
-    uint32_t eflags;
+    uint32_t primask;
 
     mutex_lock(&timer_mutex);
 
@@ -160,7 +160,7 @@ timer_process_list(unsigned long now)
         mutex_lock(&timer_mutex);
     }
 
-    eflags = cpu_intr_save();
+    primask = cpu_intr_save();
 
     timer_list_empty = list_empty(&timer_list);
 
@@ -169,7 +169,7 @@ timer_process_list(unsigned long now)
         timer_wakeup_ticks = timer->ticks;
     }
 
-    cpu_intr_restore(eflags);
+    cpu_intr_restore(primask);
 
     mutex_unlock(&timer_mutex);
 }
@@ -178,13 +178,12 @@ static void
 timer_run(void *arg)
 {
     unsigned long now;
-    uint32_t eflags;
+    uint32_t primask;
 
     (void)arg;
 
     for (;;) {
-        thread_preempt_disable();
-        eflags = cpu_intr_save();
+        primask = thread_preempt_disable_intr_save();
 
         for (;;) {
             now = timer_ticks;
@@ -196,8 +195,7 @@ timer_run(void *arg)
             thread_sleep();
         }
 
-        cpu_intr_restore(eflags);
-        thread_preempt_enable();
+        thread_preempt_enable_intr_restore(primask);
 
         timer_process_list(now);
     }
@@ -215,7 +213,8 @@ timer_setup(void)
     mutex_init(&timer_mutex);
 
     error = thread_create(&timer_thread, timer_run, NULL,
-                          "timer", TIMER_STACK_SIZE, THREAD_MAX_PRIORITY);
+                          "timer", TIMER_STACK_SIZE, THREAD_MIN_PRIORITY);
+                          //"timer", TIMER_STACK_SIZE, THREAD_MAX_PRIORITY);
 
     if (error) {
         panic("timer: unable to create thread");
@@ -226,11 +225,11 @@ unsigned long
 timer_now(void)
 {
     unsigned long ticks;
-    uint32_t eflags;
+    uint32_t primask;
 
-    eflags = cpu_intr_save();
+    primask = cpu_intr_save();
     ticks = timer_ticks;
-    cpu_intr_restore(eflags);
+    cpu_intr_restore(primask);
 
     return ticks;
 }
@@ -259,7 +258,7 @@ void
 timer_schedule(struct timer *timer, unsigned long ticks)
 {
     struct timer *tmp;
-    uint32_t eflags;
+    uint32_t primask;
 
     mutex_lock(&timer_mutex);
 
@@ -302,10 +301,10 @@ timer_schedule(struct timer *timer, unsigned long ticks)
      * By holding the mutex while clearing the list empty flag, potential
      * spurious wake-ups are completely avoided.
      */
-    eflags = cpu_intr_save();
+    primask = cpu_intr_save();
     timer_list_empty = false;
     timer_wakeup_ticks = timer->ticks;
-    cpu_intr_restore(eflags);
+    cpu_intr_restore(primask);
 
     mutex_unlock(&timer_mutex);
 }
diff --git a/src/uart.c b/src/uart.c
index 8697874..456c6da 100644
--- a/src/uart.c
+++ b/src/uart.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
  * Copyright (c) 2017 Jerko Lenstra.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -21,7 +21,6 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include <assert.h>
 #include <errno.h>
 #include <stdbool.h>
 #include <stddef.h>
@@ -32,34 +31,19 @@
 #include <lib/macros.h>
 
 #include "cpu.h"
-#include "io.h"
 #include "uart.h"
 #include "thread.h"
 
-#define UART_BAUD_RATE          115200
+#define UART_USART1_ADDR        0x40011000
+#define UART_USART1_IRQ         37
 
-#define UART_CLOCK              115200
-#define UART_DIVISOR            (UART_CLOCK / UART_BAUD_RATE)
+#define UART_SR_RXNE            0x00000020
+#define UART_SR_TXE             0x00000080
 
-#define UART_IRQ                4
-
-#define UART_IER_DATA           0x1
-
-#define UART_LCR_8BITS          0x3
-#define UART_LCR_STOP1          0
-#define UART_LCR_PARITY_NONE    0
-#define UART_LCR_DLAB           0x80
-
-#define UART_LSR_DATA_READY     0x01
-#define UART_LSR_TX_EMPTY       0x20
-
-#define UART_COM1_PORT          0x3F8
-#define UART_REG_DAT            0
-#define UART_REG_DIVL           0
-#define UART_REG_IER            1
-#define UART_REG_DIVH           1
-#define UART_REG_LCR            3
-#define UART_REG_LSR            5
+#define UART_CR1_RE             0x00000004
+#define UART_CR1_TE             0x00000008
+#define UART_CR1_RXNEIE         0x00000020
+#define UART_CR1_UE             0x00002000
 
 #define UART_BUFFER_SIZE        16
 
@@ -67,6 +51,18 @@
 #error "invalid buffer size"
 #endif
 
+struct uart_regs {
+    uint32_t sr;
+    uint32_t dr;
+    uint32_t brr;
+    uint32_t cr1;
+    uint32_t cr2;
+    uint32_t cr3;
+    uint32_t gtpr;
+};
+
+static volatile struct uart_regs *uart_usart1_regs;
+
 /*
  * Data shared between threads and the interrupt handler.
  *
@@ -79,24 +75,24 @@ static struct thread *uart_waiter;
 static void
 uart_irq_handler(void *arg)
 {
-    uint8_t byte;
-    int error;
+    uint32_t reg;
     bool spurious;
+    int error;
 
     (void)arg;
 
     spurious = true;
 
     for (;;) {
-        byte = io_read(UART_COM1_PORT + UART_REG_LSR);
+        reg = uart_usart1_regs->sr;
 
-        if (!(byte & UART_LSR_DATA_READY)) {
+        if (!(reg & UART_SR_RXNE)) {
             break;
         }
 
         spurious = false;
-        byte = io_read(UART_COM1_PORT + UART_REG_DAT);
-        error = cbuf_pushb(&uart_cbuf, byte, false);
+        reg = uart_usart1_regs->dr;
+        error = cbuf_pushb(&uart_cbuf, (uint8_t)reg, false);
 
         if (error) {
             printf("uart: error: buffer full\n");
@@ -114,35 +110,40 @@ uart_setup(void)
 {
     cbuf_init(&uart_cbuf, uart_buffer, sizeof(uart_buffer));
 
-    io_write(UART_COM1_PORT + UART_REG_LCR, UART_LCR_DLAB);
-    io_write(UART_COM1_PORT + UART_REG_DIVL, UART_DIVISOR);
-    io_write(UART_COM1_PORT + UART_REG_DIVH, UART_DIVISOR >> 8);
-    io_write(UART_COM1_PORT + UART_REG_LCR, UART_LCR_8BITS | UART_LCR_STOP1
-                                            | UART_LCR_PARITY_NONE);
-    io_write(UART_COM1_PORT + UART_REG_IER, UART_IER_DATA);
+    uart_usart1_regs = (void *)UART_USART1_ADDR;
+    uart_usart1_regs->cr1 |= UART_CR1_UE
+                            | UART_CR1_RXNEIE
+                            | UART_CR1_TE
+                            | UART_CR1_RE;
 
-    cpu_irq_register(UART_IRQ, uart_irq_handler, NULL);
+    cpu_irq_register(UART_USART1_IRQ, uart_irq_handler, NULL);
 }
 
 static void
 uart_tx_wait(void)
 {
-    uint8_t byte;
+    /*
+     * XXX The QEMU stm32f2xx_usart driver doesn't seem to correctly emulate
+     * the UART_SR_TXE bit.
+     */
+#if 0
+    uint32_t sr;
 
     for (;;) {
-        byte = io_read(UART_COM1_PORT + UART_REG_LSR);
+        sr = uart_usart1_regs->sr;
 
-        if (byte & UART_LSR_TX_EMPTY) {
+        if (sr & UART_SR_TXE) {
             break;
         }
     }
+#endif
 }
 
 static void
 uart_write_byte(uint8_t byte)
 {
     uart_tx_wait();
-    io_write(UART_COM1_PORT + UART_REG_DAT, byte);
+    uart_usart1_regs->dr = byte;
 }
 
 void
@@ -158,10 +159,9 @@ uart_write(uint8_t byte)
 int
 uart_read(uint8_t *byte)
 {
-    int eflags, error;
+    int primask, error;
 
-    thread_preempt_disable();
-    eflags = cpu_intr_save();
+    primask = thread_preempt_disable_intr_save();
 
     if (uart_waiter) {
         error = EBUSY;
@@ -183,8 +183,7 @@ uart_read(uint8_t *byte)
     error = 0;
 
 out:
-    cpu_intr_restore(eflags);
-    thread_preempt_enable();
+    thread_preempt_enable_intr_restore(primask);
 
     return error;
 }
author	Richard Braun <rbraun@sceen.net>	2018-01-23 21:24:31 +0100
committer	Richard Braun <rbraun@sceen.net>	2018-01-23 21:26:31 +0100
commit	4778a84feb6c53e08fd2f15e33f2d1df64c0737f (patch)
tree	7841ca102a5c041b5dd7e448e36af7065d81ed2d
parent	06844a6997166e5845b4ef7dfbccf5aac3a6a352 (diff)