4 files changed, 235 insertions, 233 deletions
diff --git a/kern/spinlock.c b/kern/spinlock.c
index 5ba91169..58eb74a6 100644
--- a/kern/spinlock.c
+++ b/kern/spinlock.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -19,54 +19,46 @@
  * Synchronization on Shared-Memory Multiprocessors" by John M. Mellor-Crummey
  * and Michael L. Scott, which describes MCS locks, among other algorithms.
  *
- * In order to avoid the need to allocate a qnode for every spin lock
- * currently held, and also to keep the size of locks to a single 32-bits
- * word, this module actually uses a variant of the MCS locks. The
- * differences are presented below.
+ * Here are additional issues this module solves that require modifications
+ * to the original MCS algorithm :
+ *  - There must not be any limit on the number of spin locks a thread may
+ *    hold, and spinlocks must not need dynamic memory allocation.
+ *  - Unlocking a spin lock must be a non-blocking operation. Without
+ *    this requirement, a locking operation may be interrupted in the
+ *    middle of a hand-off sequence, preventing the unlock operation
+ *    from completing, potentially causing tricky deadlocks.
+ *  - Spin lock storage must not exceed 32 bits.
  *
- * First, the lock owner is never part of the lock queue. This makes it
- * possible to use a qnode only during the lock operation, not after.
- * This means a single qnode per execution context is required even when
- * holding multiple spin locks simultaneously. In order to achieve that,
- * a spin lock not only refers to the last waiter, but also to the first,
- * which is the owner's qnode->next in the original algorithm.
+ * In order to solve these issues, the lock owner is never part of the
+ * lock queue. This makes it possible to use a qnode only during the lock
+ * operation, not after. This means a single qnode per execution context
+ * is required even when holding multiple spin locks simultaneously.
  *
- * Next, instead of two pointers, the lock is a single word storing
- * compressed references to both the first and last waiters. Those
- * references are integers called QIDs, for qnode IDs. They can be
- * broken down into 3 parts :
- *  - the lock bit
- *  - the execution context
- *  - the target CPU ID
+ * In addition, instead of making the owner perform a hand-off sequence
+ * to unblock the first waiter when unlocking, the latter directly spins
+ * on the lock word, and is the one performing the hand-off sequence with
+ * the second waiter. As a side effect, this also optimizes spinning for
+ * the common case of a single waiter.
  *
- * The layout of a QID is carefully crafted to match the lock values
- * expected by the fast paths. Without contention, a QID value must
- * be SPINLOCK_UNLOCKED when a lock isn't held, and SPINLOCK_LOCKED
- * when it is. Besides, without contention, the reference to the first
- * waiter is logically NULL so that the whole lock value matches one
- * of SPINLOCK_UNLOCKED or SPINLOCK_LOCKED. This means that the values
- * of the execution context and the CPU ID must be 0 in the absence of
- * contention. In the presence of contention, the execution context and
- * CPU ID are used to uniquely identify a statically allocated qnode,
- * and fast paths operations fail. The lock operation must make sure
- * that the lock value is restored to SPINLOCK_LOCKED if there is no
- * more contention, an operation called downgrading.
+ * When a lock is held, the lock bit is set, and when a lock is contended
+ * the contended bit is set. When contended, the lock word also contains
+ * a compressed reference to the last waiter. That reference is called a
+ * QID (for qnode ID). It is structured into two parts :
+ *  - the execution context
+ *  - the CPU ID
  *
- * In order to enforce correct visibility of the critical section,
- * acquire and release atomic operations are used when accessing a
- * qnode lock. Similarly, acquire and release semantics are also used
- * when accessing the lock word which stores the first and last QIDs,
- * so that the memory operations on the qnodes referenced by those QIDs
- * are correctly enforced. Accessing the next QID in a qnode however
- * is performed with no memory order constraint, because the qnodes
- * referenced by those QID are never accessed unless they are the first
- * or the last, cases which do enforce ordering.
+ * The QID is used to uniquely identify a statically allocated qnode,
+ * The lock operation must make sure that the lock value is restored
+ * to SPINLOCK_LOCKED if there is no more contention, an operation
+ * called downgrading.
  */
 
 #include <assert.h>
 #include <errno.h>
+#include <limits.h>
 #include <stdalign.h>
 #include <stddef.h>
+#include <stdint.h>
 
 #include <kern/atomic.h>
 #include <kern/init.h>
@@ -78,262 +70,277 @@
 #include <kern/thread.h>
 #include <machine/cpu.h>
 
-#define SPINLOCK_QID_LOCK_BIT       1
+#define SPINLOCK_CONTENDED          0x2
 
-#define SPINLOCK_QID_CTX_BITS       2
-#define SPINLOCK_QID_CTX_SHIFT      SPINLOCK_QID_LOCK_BIT
-#define SPINLOCK_QID_CTX_MAX        (1 << SPINLOCK_QID_CTX_BITS)
-#define SPINLOCK_QID_CTX_MASK       (SPINLOCK_QID_CTX_MAX - 1)
+#define SPINLOCK_LOCKED_BITS        1
+#define SPINLOCK_CONTENDED_BITS     1
 
-#define SPINLOCK_QID_CPU_BITS       13
-#define SPINLOCK_QID_CPU_SHIFT      (SPINLOCK_QID_CTX_BITS \
-                                     + SPINLOCK_QID_CTX_SHIFT)
-#define SPINLOCK_QID_CPU_MAX        (1 << SPINLOCK_QID_CPU_BITS)
-#define SPINLOCK_QID_CPU_MASK       (SPINLOCK_QID_CPU_MAX - 1)
+#define SPINLOCK_QID_SHIFT          (SPINLOCK_CONTENDED_BITS \
+                                     + SPINLOCK_LOCKED_BITS)
 
-#define SPINLOCK_QID_BITS           (SPINLOCK_QID_CPU_BITS          \
-                                     + SPINLOCK_QID_CTX_BITS        \
-                                     + SPINLOCK_QID_LOCK_BIT)
-#define SPINLOCK_QID_MAX            (1 << SPINLOCK_QID_BITS)
-#define SPINLOCK_QID_MASK           (SPINLOCK_QID_MAX - 1)
+#define SPINLOCK_QID_CTX_BITS       1
+#define SPINLOCK_QID_CTX_SHIFT      0
+#define SPINLOCK_QID_CTX_MASK       ((1U << SPINLOCK_QID_CTX_BITS) - 1)
 
-#define SPINLOCK_QID_MAX_BITS       16
+#define SPINLOCK_QID_CPU_BITS       29
+#define SPINLOCK_QID_CPU_SHIFT      (SPINLOCK_QID_CTX_SHIFT \
+                                     + SPINLOCK_QID_CTX_BITS)
+#define SPINLOCK_QID_CPU_MASK       ((1U << SPINLOCK_QID_CPU_BITS) - 1)
 
-#define SPINLOCK_QID_NULL           SPINLOCK_UNLOCKED
-#define SPINLOCK_QID_LOCKED         SPINLOCK_LOCKED
-
-#if SPINLOCK_QID_BITS > SPINLOCK_QID_MAX_BITS
-#error "spinlock qid too large"
-#endif
+#define SPINLOCK_BITS               (SPINLOCK_QID_CPU_BITS      \
+                                     + SPINLOCK_QID_CTX_BITS    \
+                                     + SPINLOCK_CONTENDED_BITS  \
+                                     + SPINLOCK_LOCKED_BITS)
 
-#if CONFIG_MAX_CPUS > (1 << SPINLOCK_QID_CPU_BITS)
+#if CONFIG_MAX_CPUS > (1U << SPINLOCK_QID_CPU_BITS)
 #error "maximum number of supported processors too large"
 #endif
 
+static_assert(SPINLOCK_BITS <= (CHAR_BIT * sizeof(uint32_t)),
+              "spinlock too large");
+
 struct spinlock_qnode {
-    unsigned int next_qid;
+    alignas(CPU_L1_SIZE) struct spinlock_qnode *next;
     bool locked;
 };
 
-#define SPINLOCK_CTX_INVALID    0
-#define SPINLOCK_CTX_THREAD     1
-#define SPINLOCK_CTX_INTR       2
-#define SPINLOCK_CTX_NMI        3
-#define SPINLOCK_NR_CTXS        4
-
-#if SPINLOCK_CTX_INVALID != 0
-#error "the invalid context value must be 0"
-#endif
+/* TODO NMI support */
+enum {
+    SPINLOCK_CTX_THREAD,
+    SPINLOCK_CTX_INTR,
+    SPINLOCK_NR_CTXS
+};
 
-#if SPINLOCK_NR_CTXS > SPINLOCK_QID_CTX_MAX
-#error "maximum number of contexts too large"
-#endif
+static_assert(SPINLOCK_NR_CTXS <= (SPINLOCK_QID_CTX_MASK + 1),
+              "maximum number of contexts too large");
 
 struct spinlock_cpu_data {
-    alignas(CPU_L1_SIZE) struct spinlock_qnode qnodes[SPINLOCK_NR_CTXS - 1];
+    struct spinlock_qnode qnodes[SPINLOCK_NR_CTXS];
 };
 
 static struct spinlock_cpu_data spinlock_cpu_data __percpu;
 
-void
-spinlock_init(struct spinlock *lock)
+static struct spinlock_qnode *
+spinlock_cpu_data_get_qnode(struct spinlock_cpu_data *cpu_data,
+                            unsigned int ctx)
 {
-    lock->value = SPINLOCK_UNLOCKED;
+    assert(ctx < ARRAY_SIZE(cpu_data->qnodes));
+    return &cpu_data->qnodes[ctx];
+}
 
-#ifdef SPINLOCK_TRACK_OWNER
-    lock->owner = NULL;
-#endif /* SPINLOCK_TRACK_OWNER */
+static uint32_t
+spinlock_qid_build(unsigned int ctx, unsigned int cpu)
+{
+    assert(ctx <= SPINLOCK_QID_CTX_MASK);
+    assert(cpu <= SPINLOCK_QID_CPU_MASK);
+
+    return (cpu << SPINLOCK_QID_CPU_SHIFT) | (ctx << SPINLOCK_QID_CTX_SHIFT);
 }
 
 static unsigned int
-spinlock_qid2ctx(unsigned int qid)
+spinlock_qid_ctx(uint32_t qid)
 {
     return (qid >> SPINLOCK_QID_CTX_SHIFT) & SPINLOCK_QID_CTX_MASK;
 }
 
 static unsigned int
-spinlock_qid2cpu(unsigned int qid)
+spinlock_qid_cpu(uint32_t qid)
 {
     return (qid >> SPINLOCK_QID_CPU_SHIFT) & SPINLOCK_QID_CPU_MASK;
 }
 
-static unsigned int
-spinlock_build_qid(unsigned int cpu, unsigned int ctx)
+void
+spinlock_init(struct spinlock *lock)
 {
-    return (cpu << SPINLOCK_QID_CPU_SHIFT)
-           | (ctx << SPINLOCK_QID_CTX_SHIFT)
-           | SPINLOCK_QID_LOCK_BIT;
+    lock->value = SPINLOCK_UNLOCKED;
+
+#ifdef SPINLOCK_TRACK_OWNER
+    lock->owner = NULL;
+#endif /* SPINLOCK_TRACK_OWNER */
 }
 
 static void
-spinlock_get_local_qnode(struct spinlock_qnode **qnodep, unsigned int *qidp)
+spinlock_qnode_init(struct spinlock_qnode *qnode)
 {
-    struct spinlock_cpu_data *cpu_data;
-    unsigned int ctx;
-
-    cpu_data = cpu_local_ptr(spinlock_cpu_data);
-
-    /* TODO NMI support */
-    ctx = thread_interrupted() ? SPINLOCK_CTX_INTR : SPINLOCK_CTX_THREAD;
-    *qnodep = &cpu_data->qnodes[ctx - 1];
-    *qidp = spinlock_build_qid(cpu_id(), ctx);
+    qnode->next = NULL;
 }
 
 static struct spinlock_qnode *
-spinlock_get_remote_qnode(unsigned int qid)
+spinlock_qnode_wait_next(const struct spinlock_qnode *qnode)
 {
-    struct spinlock_cpu_data *cpu_data;
-    unsigned int ctx, cpu;
+    struct spinlock_qnode *next;
 
-    ctx = spinlock_qid2ctx(qid);
+    for (;;) {
+        next = atomic_load(&qnode->next, ATOMIC_ACQUIRE);
 
-    if (ctx == SPINLOCK_CTX_INVALID) {
-        return NULL;
-    }
+        if (next) {
+            break;
+        }
 
-    ctx--;
-    assert(ctx < ARRAY_SIZE(cpu_data->qnodes));
+        cpu_pause();
+    }
 
-    cpu = spinlock_qid2cpu(qid);
-    cpu_data = percpu_ptr(spinlock_cpu_data, cpu);
-    return &cpu_data->qnodes[ctx];
+    return next;
 }
 
 static void
-spinlock_store_first_qid(struct spinlock *lock, unsigned int newqid)
+spinlock_qnode_set_next(struct spinlock_qnode *qnode, struct spinlock_qnode *next)
 {
-    unsigned int oldval, newval, prev;
-
-    assert(newqid < SPINLOCK_QID_MAX);
-
-    newqid <<= SPINLOCK_QID_MAX_BITS;
-
-    do {
-        oldval = atomic_load(&lock->value, ATOMIC_RELAXED);
-        newval = newqid | (oldval & SPINLOCK_QID_MASK);
-        prev = atomic_cas_release(&lock->value, oldval, newval);
-    } while (prev != oldval);
+    assert(next);
+    atomic_store(&qnode->next, next, ATOMIC_RELEASE);
 }
 
-static unsigned int
-spinlock_load_first_qid(const struct spinlock *lock)
+static void
+spinlock_qnode_set_locked(struct spinlock_qnode *qnode)
 {
-    unsigned int value;
-
-    value = atomic_load_acquire(&lock->value);
-    return (value >> SPINLOCK_QID_MAX_BITS) & SPINLOCK_QID_MASK;
+    qnode->locked = true;
 }
 
-static unsigned int
-spinlock_swap_last_qid(struct spinlock *lock, unsigned int newqid)
+static void
+spinlock_qnode_wait_locked(const struct spinlock_qnode *qnode)
 {
-    unsigned int oldval, newval, prev;
+    bool locked;
 
-    assert(newqid < SPINLOCK_QID_MAX);
+    for (;;) {
+        locked = atomic_load(&qnode->locked, ATOMIC_ACQUIRE);
 
-    do {
-        oldval = atomic_load(&lock->value, ATOMIC_RELAXED);
-        newval = (oldval & (SPINLOCK_QID_MASK << SPINLOCK_QID_MAX_BITS))
-                 | newqid;
-        prev = atomic_cas_acq_rel(&lock->value, oldval, newval);
-    } while (prev != oldval);
+        if (!locked) {
+            break;
+        }
 
-    return prev & SPINLOCK_QID_MASK;
+        cpu_pause();
+    }
 }
 
-static unsigned int
-spinlock_try_downgrade(struct spinlock *lock, unsigned int oldqid)
+static void
+spinlock_qnode_clear_locked(struct spinlock_qnode *qnode)
 {
-    unsigned int prev;
-
-    prev = atomic_cas(&lock->value, oldqid,
-                      SPINLOCK_QID_LOCKED, ATOMIC_RELAXED);
-
-    assert((prev >> SPINLOCK_QID_MAX_BITS) == 0);
-    assert(prev != SPINLOCK_QID_NULL);
+    atomic_store(&qnode->locked, false, ATOMIC_RELEASE);
+}
 
-    if (prev != oldqid) {
-        return EBUSY;
-    }
+static void
+spinlock_get_local_qnode(struct spinlock_qnode **qnode, uint32_t *qid)
+{
+    struct spinlock_cpu_data *cpu_data;
+    unsigned int ctx;
 
-    return 0;
+    cpu_data = cpu_local_ptr(spinlock_cpu_data);
+    ctx = thread_interrupted() ? SPINLOCK_CTX_INTR : SPINLOCK_CTX_THREAD;
+    *qnode = spinlock_cpu_data_get_qnode(cpu_data, ctx);
+    *qid = spinlock_qid_build(ctx, cpu_id());
 }
 
-void
-spinlock_lock_slow(struct spinlock *lock)
+static uint32_t
+spinlock_enqueue(struct spinlock *lock, uint32_t qid)
 {
-    struct spinlock_qnode *qnode, *prev_qnode;
-    unsigned int qid, prev_qid, next_qid, ctx;
-    bool locked;
-    int error;
+    uint32_t old_value, new_value, prev, next;
 
-    spinlock_get_local_qnode(&qnode, &qid);
-    qnode->next_qid = SPINLOCK_QID_NULL;
+    next = (qid << SPINLOCK_QID_SHIFT) | SPINLOCK_CONTENDED;
 
-    prev_qid = spinlock_swap_last_qid(lock, qid);
-
-    if (prev_qid != SPINLOCK_QID_NULL) {
-        qnode->locked = true;
-        ctx = spinlock_qid2ctx(prev_qid);
+    for (;;) {
+        old_value = atomic_load(&lock->value, ATOMIC_RELAXED);
+        new_value = next | (old_value & SPINLOCK_LOCKED);
+        prev = atomic_cas(&lock->value, old_value, new_value, ATOMIC_RELEASE);
 
-        if (ctx == SPINLOCK_CTX_INVALID) {
-            spinlock_store_first_qid(lock, qid);
-        } else {
-            prev_qnode = spinlock_get_remote_qnode(prev_qid);
-            atomic_store(&prev_qnode->next_qid, qid, ATOMIC_RELAXED);
+        if (prev == old_value) {
+            break;
         }
 
-        for (;;) {
-            locked = atomic_load_acquire(&qnode->locked);
+        cpu_pause();
+    }
 
-            if (!locked) {
-                break;
-            }
+    return prev;
+}
 
-            cpu_pause();
-        }
+static struct spinlock_qnode *
+spinlock_get_remote_qnode(uint32_t qid)
+{
+    struct spinlock_cpu_data *cpu_data;
+    unsigned int ctx, cpu;
 
-        spinlock_store_first_qid(lock, SPINLOCK_QID_NULL);
-    }
+    /* This fence synchronizes with queueing */
+    atomic_fence_acquire();
 
-    spinlock_own(lock);
-    error = spinlock_try_downgrade(lock, qid);
+    ctx = spinlock_qid_ctx(qid);
+    cpu = spinlock_qid_cpu(qid);
+    cpu_data = percpu_ptr(spinlock_cpu_data, cpu);
+    return spinlock_cpu_data_get_qnode(cpu_data, ctx);
+}
 
-    if (!error) {
-        return;
-    }
+static void
+spinlock_set_locked(struct spinlock *lock)
+{
+    atomic_or(&lock->value, SPINLOCK_LOCKED, ATOMIC_RELAXED);
+}
+
+static void
+spinlock_wait_locked(const struct spinlock *lock)
+{
+    uint32_t value;
 
     for (;;) {
-        next_qid = atomic_load(&qnode->next_qid, ATOMIC_RELAXED);
+        value = atomic_load(&lock->value, ATOMIC_ACQUIRE);
 
-        if (next_qid != SPINLOCK_QID_NULL) {
+        if (!(value & SPINLOCK_LOCKED)) {
             break;
         }
 
         cpu_pause();
     }
+}
+
+static int
+spinlock_downgrade(struct spinlock *lock, uint32_t qid)
+{
+    uint32_t value, prev;
+
+    value = (qid << SPINLOCK_QID_SHIFT) | SPINLOCK_CONTENDED;
+    prev = atomic_cas(&lock->value, value, SPINLOCK_LOCKED, ATOMIC_RELAXED);
+    assert(prev & SPINLOCK_CONTENDED);
+
+    if (prev != value) {
+        return EBUSY;
+    }
 
-    spinlock_store_first_qid(lock, next_qid);
+    return 0;
 }
 
 void
-spinlock_unlock_slow(struct spinlock *lock)
+spinlock_lock_slow(struct spinlock *lock)
 {
-    struct spinlock_qnode *next_qnode;
-    unsigned int next_qid;
+    struct spinlock_qnode *qnode, *prev_qnode, *next_qnode;
+    uint32_t prev, qid;
+    int error;
 
-    for (;;) {
-        next_qid = spinlock_load_first_qid(lock);
+    spinlock_get_local_qnode(&qnode, &qid);
+    spinlock_qnode_init(qnode);
 
-        if (next_qid != SPINLOCK_QID_NULL) {
-            break;
-        }
+    prev = spinlock_enqueue(lock, qid);
 
-        cpu_pause();
+    if (prev & SPINLOCK_CONTENDED) {
+        prev_qnode = spinlock_get_remote_qnode(prev >> SPINLOCK_QID_SHIFT);
+        spinlock_qnode_set_locked(qnode);
+        spinlock_qnode_set_next(prev_qnode, qnode);
+        spinlock_qnode_wait_locked(qnode);
+    }
+
+    /*
+     * If uncontended, the previous lock value could be used to check whether
+     * the lock bit was also cleared, but this wait operation also enforces
+     * acquire ordering.
+     */
+    spinlock_wait_locked(lock);
+
+    spinlock_own(lock);
+    error = spinlock_downgrade(lock, qid);
+
+    if (!error) {
+        return;
     }
 
-    next_qnode = spinlock_get_remote_qnode(next_qid);
-    atomic_store_release(&next_qnode->locked, false);
+    spinlock_set_locked(lock);
+    next_qnode = spinlock_qnode_wait_next(qnode);
+    spinlock_qnode_clear_locked(next_qnode);
 }
 
 static int __init
diff --git a/kern/spinlock.h b/kern/spinlock.h
index d88d535e..f9e74f56 100644
--- a/kern/spinlock.h
+++ b/kern/spinlock.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2017 Richard Braun.
+ * Copyright (c) 2012-2018 Richard Braun.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -26,6 +26,11 @@
 #ifndef KERN_SPINLOCK_H
 #define KERN_SPINLOCK_H
 
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <kern/atomic.h>
 #include <kern/init.h>
 #include <kern/macros.h>
 #include <kern/spinlock_i.h>
@@ -34,7 +39,17 @@
 
 struct spinlock;
 
-#define spinlock_assert_locked(lock) assert((lock)->value != SPINLOCK_UNLOCKED)
+/* TODO Remove, let users do it instead */
+#define spinlock_assert_locked(lock) assert(spinlock_locked(lock))
+
+static inline bool
+spinlock_locked(const struct spinlock *lock)
+{
+    uint32_t value;
+
+    value = atomic_load(&lock->value, ATOMIC_RELAXED);
+    return value != SPINLOCK_UNLOCKED;
+}
 
 #ifdef SPINLOCK_TRACK_OWNER
 
diff --git a/kern/spinlock_i.h b/kern/spinlock_i.h
index cd1bd368..da1233b4 100644
--- a/kern/spinlock_i.h
+++ b/kern/spinlock_i.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012-2017 Richard Braun.
+ * Copyright (c) 2012-2018 Richard Braun.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -30,12 +30,12 @@
 #include <machine/cpu.h>
 
 /*
- * Non-contended lock values.
+ * Uncontended lock values.
  *
- * Any other lock value implies a contended lock.
+ * Any other value implies a contended lock.
  */
-#define SPINLOCK_UNLOCKED   0
-#define SPINLOCK_LOCKED     1
+#define SPINLOCK_UNLOCKED   0x0
+#define SPINLOCK_LOCKED     0x1
 
 #ifdef SPINLOCK_TRACK_OWNER
 
@@ -63,7 +63,7 @@ spinlock_disown(struct spinlock *lock)
 static inline int
 spinlock_lock_fast(struct spinlock *lock)
 {
-    unsigned int prev;
+    uint32_t prev;
 
     prev = atomic_cas_acquire(&lock->value, SPINLOCK_UNLOCKED, SPINLOCK_LOCKED);
 
@@ -75,25 +75,8 @@ spinlock_lock_fast(struct spinlock *lock)
     return 0;
 }
 
-static inline int
-spinlock_unlock_fast(struct spinlock *lock)
-{
-    unsigned int prev;
-
-    spinlock_disown(lock);
-    prev = atomic_cas_release(&lock->value, SPINLOCK_LOCKED, SPINLOCK_UNLOCKED);
-
-    if (unlikely(prev != SPINLOCK_LOCKED)) {
-        return EBUSY;
-    }
-
-    return 0;
-}
-
 void spinlock_lock_slow(struct spinlock *lock);
 
-void spinlock_unlock_slow(struct spinlock *lock);
-
 static inline void
 spinlock_lock_common(struct spinlock *lock)
 {
@@ -109,13 +92,8 @@ spinlock_lock_common(struct spinlock *lock)
 static inline void
 spinlock_unlock_common(struct spinlock *lock)
 {
-    int error;
-
-    error = spinlock_unlock_fast(lock);
-
-    if (unlikely(error)) {
-        spinlock_unlock_slow(lock);
-    }
+    spinlock_disown(lock);
+    atomic_and(&lock->value, ~SPINLOCK_LOCKED, ATOMIC_RELEASE);
 }
 
 #endif /* KERN_SPINLOCK_I_H */
diff --git a/kern/spinlock_types.h b/kern/spinlock_types.h
index 301b8933..fd160a5f 100644
--- a/kern/spinlock_types.h
+++ b/kern/spinlock_types.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017-2018 Richard Braun.
  *
  * This program is free software: you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -21,6 +21,8 @@
 #ifndef KERN_SPINLOCK_TYPES_H
 #define KERN_SPINLOCK_TYPES_H
 
+#include <stdint.h>
+
 #ifdef CONFIG_SPINLOCK_DEBUG
 #define SPINLOCK_TRACK_OWNER
 #endif
@@ -28,7 +30,7 @@
 struct thread;
 
 struct spinlock {
-    unsigned int value;
+    uint32_t value;
 
 #ifdef SPINLOCK_TRACK_OWNER
     struct thread *owner;