diff options
Diffstat (limited to 'kern')
-rw-r--r-- | kern/spinlock.c | 405 | ||||
-rw-r--r-- | kern/spinlock.h | 19 | ||||
-rw-r--r-- | kern/spinlock_i.h | 38 | ||||
-rw-r--r-- | kern/spinlock_types.h | 6 |
4 files changed, 235 insertions, 233 deletions
diff --git a/kern/spinlock.c b/kern/spinlock.c index 5ba91169..58eb74a6 100644 --- a/kern/spinlock.c +++ b/kern/spinlock.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Richard Braun. + * Copyright (c) 2017-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,54 +19,46 @@ * Synchronization on Shared-Memory Multiprocessors" by John M. Mellor-Crummey * and Michael L. Scott, which describes MCS locks, among other algorithms. * - * In order to avoid the need to allocate a qnode for every spin lock - * currently held, and also to keep the size of locks to a single 32-bits - * word, this module actually uses a variant of the MCS locks. The - * differences are presented below. + * Here are additional issues this module solves that require modifications + * to the original MCS algorithm : + * - There must not be any limit on the number of spin locks a thread may + * hold, and spinlocks must not need dynamic memory allocation. + * - Unlocking a spin lock must be a non-blocking operation. Without + * this requirement, a locking operation may be interrupted in the + * middle of a hand-off sequence, preventing the unlock operation + * from completing, potentially causing tricky deadlocks. + * - Spin lock storage must not exceed 32 bits. * - * First, the lock owner is never part of the lock queue. This makes it - * possible to use a qnode only during the lock operation, not after. - * This means a single qnode per execution context is required even when - * holding multiple spin locks simultaneously. In order to achieve that, - * a spin lock not only refers to the last waiter, but also to the first, - * which is the owner's qnode->next in the original algorithm. + * In order to solve these issues, the lock owner is never part of the + * lock queue. This makes it possible to use a qnode only during the lock + * operation, not after. This means a single qnode per execution context + * is required even when holding multiple spin locks simultaneously. * - * Next, instead of two pointers, the lock is a single word storing - * compressed references to both the first and last waiters. Those - * references are integers called QIDs, for qnode IDs. They can be - * broken down into 3 parts : - * - the lock bit - * - the execution context - * - the target CPU ID + * In addition, instead of making the owner perform a hand-off sequence + * to unblock the first waiter when unlocking, the latter directly spins + * on the lock word, and is the one performing the hand-off sequence with + * the second waiter. As a side effect, this also optimizes spinning for + * the common case of a single waiter. * - * The layout of a QID is carefully crafted to match the lock values - * expected by the fast paths. Without contention, a QID value must - * be SPINLOCK_UNLOCKED when a lock isn't held, and SPINLOCK_LOCKED - * when it is. Besides, without contention, the reference to the first - * waiter is logically NULL so that the whole lock value matches one - * of SPINLOCK_UNLOCKED or SPINLOCK_LOCKED. This means that the values - * of the execution context and the CPU ID must be 0 in the absence of - * contention. In the presence of contention, the execution context and - * CPU ID are used to uniquely identify a statically allocated qnode, - * and fast paths operations fail. The lock operation must make sure - * that the lock value is restored to SPINLOCK_LOCKED if there is no - * more contention, an operation called downgrading. + * When a lock is held, the lock bit is set, and when a lock is contended + * the contended bit is set. When contended, the lock word also contains + * a compressed reference to the last waiter. That reference is called a + * QID (for qnode ID). It is structured into two parts : + * - the execution context + * - the CPU ID * - * In order to enforce correct visibility of the critical section, - * acquire and release atomic operations are used when accessing a - * qnode lock. Similarly, acquire and release semantics are also used - * when accessing the lock word which stores the first and last QIDs, - * so that the memory operations on the qnodes referenced by those QIDs - * are correctly enforced. Accessing the next QID in a qnode however - * is performed with no memory order constraint, because the qnodes - * referenced by those QID are never accessed unless they are the first - * or the last, cases which do enforce ordering. + * The QID is used to uniquely identify a statically allocated qnode, + * The lock operation must make sure that the lock value is restored + * to SPINLOCK_LOCKED if there is no more contention, an operation + * called downgrading. */ #include <assert.h> #include <errno.h> +#include <limits.h> #include <stdalign.h> #include <stddef.h> +#include <stdint.h> #include <kern/atomic.h> #include <kern/init.h> @@ -78,262 +70,277 @@ #include <kern/thread.h> #include <machine/cpu.h> -#define SPINLOCK_QID_LOCK_BIT 1 +#define SPINLOCK_CONTENDED 0x2 -#define SPINLOCK_QID_CTX_BITS 2 -#define SPINLOCK_QID_CTX_SHIFT SPINLOCK_QID_LOCK_BIT -#define SPINLOCK_QID_CTX_MAX (1 << SPINLOCK_QID_CTX_BITS) -#define SPINLOCK_QID_CTX_MASK (SPINLOCK_QID_CTX_MAX - 1) +#define SPINLOCK_LOCKED_BITS 1 +#define SPINLOCK_CONTENDED_BITS 1 -#define SPINLOCK_QID_CPU_BITS 13 -#define SPINLOCK_QID_CPU_SHIFT (SPINLOCK_QID_CTX_BITS \ - + SPINLOCK_QID_CTX_SHIFT) -#define SPINLOCK_QID_CPU_MAX (1 << SPINLOCK_QID_CPU_BITS) -#define SPINLOCK_QID_CPU_MASK (SPINLOCK_QID_CPU_MAX - 1) +#define SPINLOCK_QID_SHIFT (SPINLOCK_CONTENDED_BITS \ + + SPINLOCK_LOCKED_BITS) -#define SPINLOCK_QID_BITS (SPINLOCK_QID_CPU_BITS \ - + SPINLOCK_QID_CTX_BITS \ - + SPINLOCK_QID_LOCK_BIT) -#define SPINLOCK_QID_MAX (1 << SPINLOCK_QID_BITS) -#define SPINLOCK_QID_MASK (SPINLOCK_QID_MAX - 1) +#define SPINLOCK_QID_CTX_BITS 1 +#define SPINLOCK_QID_CTX_SHIFT 0 +#define SPINLOCK_QID_CTX_MASK ((1U << SPINLOCK_QID_CTX_BITS) - 1) -#define SPINLOCK_QID_MAX_BITS 16 +#define SPINLOCK_QID_CPU_BITS 29 +#define SPINLOCK_QID_CPU_SHIFT (SPINLOCK_QID_CTX_SHIFT \ + + SPINLOCK_QID_CTX_BITS) +#define SPINLOCK_QID_CPU_MASK ((1U << SPINLOCK_QID_CPU_BITS) - 1) -#define SPINLOCK_QID_NULL SPINLOCK_UNLOCKED -#define SPINLOCK_QID_LOCKED SPINLOCK_LOCKED - -#if SPINLOCK_QID_BITS > SPINLOCK_QID_MAX_BITS -#error "spinlock qid too large" -#endif +#define SPINLOCK_BITS (SPINLOCK_QID_CPU_BITS \ + + SPINLOCK_QID_CTX_BITS \ + + SPINLOCK_CONTENDED_BITS \ + + SPINLOCK_LOCKED_BITS) -#if CONFIG_MAX_CPUS > (1 << SPINLOCK_QID_CPU_BITS) +#if CONFIG_MAX_CPUS > (1U << SPINLOCK_QID_CPU_BITS) #error "maximum number of supported processors too large" #endif +static_assert(SPINLOCK_BITS <= (CHAR_BIT * sizeof(uint32_t)), + "spinlock too large"); + struct spinlock_qnode { - unsigned int next_qid; + alignas(CPU_L1_SIZE) struct spinlock_qnode *next; bool locked; }; -#define SPINLOCK_CTX_INVALID 0 -#define SPINLOCK_CTX_THREAD 1 -#define SPINLOCK_CTX_INTR 2 -#define SPINLOCK_CTX_NMI 3 -#define SPINLOCK_NR_CTXS 4 - -#if SPINLOCK_CTX_INVALID != 0 -#error "the invalid context value must be 0" -#endif +/* TODO NMI support */ +enum { + SPINLOCK_CTX_THREAD, + SPINLOCK_CTX_INTR, + SPINLOCK_NR_CTXS +}; -#if SPINLOCK_NR_CTXS > SPINLOCK_QID_CTX_MAX -#error "maximum number of contexts too large" -#endif +static_assert(SPINLOCK_NR_CTXS <= (SPINLOCK_QID_CTX_MASK + 1), + "maximum number of contexts too large"); struct spinlock_cpu_data { - alignas(CPU_L1_SIZE) struct spinlock_qnode qnodes[SPINLOCK_NR_CTXS - 1]; + struct spinlock_qnode qnodes[SPINLOCK_NR_CTXS]; }; static struct spinlock_cpu_data spinlock_cpu_data __percpu; -void -spinlock_init(struct spinlock *lock) +static struct spinlock_qnode * +spinlock_cpu_data_get_qnode(struct spinlock_cpu_data *cpu_data, + unsigned int ctx) { - lock->value = SPINLOCK_UNLOCKED; + assert(ctx < ARRAY_SIZE(cpu_data->qnodes)); + return &cpu_data->qnodes[ctx]; +} -#ifdef SPINLOCK_TRACK_OWNER - lock->owner = NULL; -#endif /* SPINLOCK_TRACK_OWNER */ +static uint32_t +spinlock_qid_build(unsigned int ctx, unsigned int cpu) +{ + assert(ctx <= SPINLOCK_QID_CTX_MASK); + assert(cpu <= SPINLOCK_QID_CPU_MASK); + + return (cpu << SPINLOCK_QID_CPU_SHIFT) | (ctx << SPINLOCK_QID_CTX_SHIFT); } static unsigned int -spinlock_qid2ctx(unsigned int qid) +spinlock_qid_ctx(uint32_t qid) { return (qid >> SPINLOCK_QID_CTX_SHIFT) & SPINLOCK_QID_CTX_MASK; } static unsigned int -spinlock_qid2cpu(unsigned int qid) +spinlock_qid_cpu(uint32_t qid) { return (qid >> SPINLOCK_QID_CPU_SHIFT) & SPINLOCK_QID_CPU_MASK; } -static unsigned int -spinlock_build_qid(unsigned int cpu, unsigned int ctx) +void +spinlock_init(struct spinlock *lock) { - return (cpu << SPINLOCK_QID_CPU_SHIFT) - | (ctx << SPINLOCK_QID_CTX_SHIFT) - | SPINLOCK_QID_LOCK_BIT; + lock->value = SPINLOCK_UNLOCKED; + +#ifdef SPINLOCK_TRACK_OWNER + lock->owner = NULL; +#endif /* SPINLOCK_TRACK_OWNER */ } static void -spinlock_get_local_qnode(struct spinlock_qnode **qnodep, unsigned int *qidp) +spinlock_qnode_init(struct spinlock_qnode *qnode) { - struct spinlock_cpu_data *cpu_data; - unsigned int ctx; - - cpu_data = cpu_local_ptr(spinlock_cpu_data); - - /* TODO NMI support */ - ctx = thread_interrupted() ? SPINLOCK_CTX_INTR : SPINLOCK_CTX_THREAD; - *qnodep = &cpu_data->qnodes[ctx - 1]; - *qidp = spinlock_build_qid(cpu_id(), ctx); + qnode->next = NULL; } static struct spinlock_qnode * -spinlock_get_remote_qnode(unsigned int qid) +spinlock_qnode_wait_next(const struct spinlock_qnode *qnode) { - struct spinlock_cpu_data *cpu_data; - unsigned int ctx, cpu; + struct spinlock_qnode *next; - ctx = spinlock_qid2ctx(qid); + for (;;) { + next = atomic_load(&qnode->next, ATOMIC_ACQUIRE); - if (ctx == SPINLOCK_CTX_INVALID) { - return NULL; - } + if (next) { + break; + } - ctx--; - assert(ctx < ARRAY_SIZE(cpu_data->qnodes)); + cpu_pause(); + } - cpu = spinlock_qid2cpu(qid); - cpu_data = percpu_ptr(spinlock_cpu_data, cpu); - return &cpu_data->qnodes[ctx]; + return next; } static void -spinlock_store_first_qid(struct spinlock *lock, unsigned int newqid) +spinlock_qnode_set_next(struct spinlock_qnode *qnode, struct spinlock_qnode *next) { - unsigned int oldval, newval, prev; - - assert(newqid < SPINLOCK_QID_MAX); - - newqid <<= SPINLOCK_QID_MAX_BITS; - - do { - oldval = atomic_load(&lock->value, ATOMIC_RELAXED); - newval = newqid | (oldval & SPINLOCK_QID_MASK); - prev = atomic_cas_release(&lock->value, oldval, newval); - } while (prev != oldval); + assert(next); + atomic_store(&qnode->next, next, ATOMIC_RELEASE); } -static unsigned int -spinlock_load_first_qid(const struct spinlock *lock) +static void +spinlock_qnode_set_locked(struct spinlock_qnode *qnode) { - unsigned int value; - - value = atomic_load_acquire(&lock->value); - return (value >> SPINLOCK_QID_MAX_BITS) & SPINLOCK_QID_MASK; + qnode->locked = true; } -static unsigned int -spinlock_swap_last_qid(struct spinlock *lock, unsigned int newqid) +static void +spinlock_qnode_wait_locked(const struct spinlock_qnode *qnode) { - unsigned int oldval, newval, prev; + bool locked; - assert(newqid < SPINLOCK_QID_MAX); + for (;;) { + locked = atomic_load(&qnode->locked, ATOMIC_ACQUIRE); - do { - oldval = atomic_load(&lock->value, ATOMIC_RELAXED); - newval = (oldval & (SPINLOCK_QID_MASK << SPINLOCK_QID_MAX_BITS)) - | newqid; - prev = atomic_cas_acq_rel(&lock->value, oldval, newval); - } while (prev != oldval); + if (!locked) { + break; + } - return prev & SPINLOCK_QID_MASK; + cpu_pause(); + } } -static unsigned int -spinlock_try_downgrade(struct spinlock *lock, unsigned int oldqid) +static void +spinlock_qnode_clear_locked(struct spinlock_qnode *qnode) { - unsigned int prev; - - prev = atomic_cas(&lock->value, oldqid, - SPINLOCK_QID_LOCKED, ATOMIC_RELAXED); - - assert((prev >> SPINLOCK_QID_MAX_BITS) == 0); - assert(prev != SPINLOCK_QID_NULL); + atomic_store(&qnode->locked, false, ATOMIC_RELEASE); +} - if (prev != oldqid) { - return EBUSY; - } +static void +spinlock_get_local_qnode(struct spinlock_qnode **qnode, uint32_t *qid) +{ + struct spinlock_cpu_data *cpu_data; + unsigned int ctx; - return 0; + cpu_data = cpu_local_ptr(spinlock_cpu_data); + ctx = thread_interrupted() ? SPINLOCK_CTX_INTR : SPINLOCK_CTX_THREAD; + *qnode = spinlock_cpu_data_get_qnode(cpu_data, ctx); + *qid = spinlock_qid_build(ctx, cpu_id()); } -void -spinlock_lock_slow(struct spinlock *lock) +static uint32_t +spinlock_enqueue(struct spinlock *lock, uint32_t qid) { - struct spinlock_qnode *qnode, *prev_qnode; - unsigned int qid, prev_qid, next_qid, ctx; - bool locked; - int error; + uint32_t old_value, new_value, prev, next; - spinlock_get_local_qnode(&qnode, &qid); - qnode->next_qid = SPINLOCK_QID_NULL; + next = (qid << SPINLOCK_QID_SHIFT) | SPINLOCK_CONTENDED; - prev_qid = spinlock_swap_last_qid(lock, qid); - - if (prev_qid != SPINLOCK_QID_NULL) { - qnode->locked = true; - ctx = spinlock_qid2ctx(prev_qid); + for (;;) { + old_value = atomic_load(&lock->value, ATOMIC_RELAXED); + new_value = next | (old_value & SPINLOCK_LOCKED); + prev = atomic_cas(&lock->value, old_value, new_value, ATOMIC_RELEASE); - if (ctx == SPINLOCK_CTX_INVALID) { - spinlock_store_first_qid(lock, qid); - } else { - prev_qnode = spinlock_get_remote_qnode(prev_qid); - atomic_store(&prev_qnode->next_qid, qid, ATOMIC_RELAXED); + if (prev == old_value) { + break; } - for (;;) { - locked = atomic_load_acquire(&qnode->locked); + cpu_pause(); + } - if (!locked) { - break; - } + return prev; +} - cpu_pause(); - } +static struct spinlock_qnode * +spinlock_get_remote_qnode(uint32_t qid) +{ + struct spinlock_cpu_data *cpu_data; + unsigned int ctx, cpu; - spinlock_store_first_qid(lock, SPINLOCK_QID_NULL); - } + /* This fence synchronizes with queueing */ + atomic_fence_acquire(); - spinlock_own(lock); - error = spinlock_try_downgrade(lock, qid); + ctx = spinlock_qid_ctx(qid); + cpu = spinlock_qid_cpu(qid); + cpu_data = percpu_ptr(spinlock_cpu_data, cpu); + return spinlock_cpu_data_get_qnode(cpu_data, ctx); +} - if (!error) { - return; - } +static void +spinlock_set_locked(struct spinlock *lock) +{ + atomic_or(&lock->value, SPINLOCK_LOCKED, ATOMIC_RELAXED); +} + +static void +spinlock_wait_locked(const struct spinlock *lock) +{ + uint32_t value; for (;;) { - next_qid = atomic_load(&qnode->next_qid, ATOMIC_RELAXED); + value = atomic_load(&lock->value, ATOMIC_ACQUIRE); - if (next_qid != SPINLOCK_QID_NULL) { + if (!(value & SPINLOCK_LOCKED)) { break; } cpu_pause(); } +} + +static int +spinlock_downgrade(struct spinlock *lock, uint32_t qid) +{ + uint32_t value, prev; + + value = (qid << SPINLOCK_QID_SHIFT) | SPINLOCK_CONTENDED; + prev = atomic_cas(&lock->value, value, SPINLOCK_LOCKED, ATOMIC_RELAXED); + assert(prev & SPINLOCK_CONTENDED); + + if (prev != value) { + return EBUSY; + } - spinlock_store_first_qid(lock, next_qid); + return 0; } void -spinlock_unlock_slow(struct spinlock *lock) +spinlock_lock_slow(struct spinlock *lock) { - struct spinlock_qnode *next_qnode; - unsigned int next_qid; + struct spinlock_qnode *qnode, *prev_qnode, *next_qnode; + uint32_t prev, qid; + int error; - for (;;) { - next_qid = spinlock_load_first_qid(lock); + spinlock_get_local_qnode(&qnode, &qid); + spinlock_qnode_init(qnode); - if (next_qid != SPINLOCK_QID_NULL) { - break; - } + prev = spinlock_enqueue(lock, qid); - cpu_pause(); + if (prev & SPINLOCK_CONTENDED) { + prev_qnode = spinlock_get_remote_qnode(prev >> SPINLOCK_QID_SHIFT); + spinlock_qnode_set_locked(qnode); + spinlock_qnode_set_next(prev_qnode, qnode); + spinlock_qnode_wait_locked(qnode); + } + + /* + * If uncontended, the previous lock value could be used to check whether + * the lock bit was also cleared, but this wait operation also enforces + * acquire ordering. + */ + spinlock_wait_locked(lock); + + spinlock_own(lock); + error = spinlock_downgrade(lock, qid); + + if (!error) { + return; } - next_qnode = spinlock_get_remote_qnode(next_qid); - atomic_store_release(&next_qnode->locked, false); + spinlock_set_locked(lock); + next_qnode = spinlock_qnode_wait_next(qnode); + spinlock_qnode_clear_locked(next_qnode); } static int __init diff --git a/kern/spinlock.h b/kern/spinlock.h index d88d535e..f9e74f56 100644 --- a/kern/spinlock.h +++ b/kern/spinlock.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2017 Richard Braun. + * Copyright (c) 2012-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,6 +26,11 @@ #ifndef KERN_SPINLOCK_H #define KERN_SPINLOCK_H +#include <assert.h> +#include <stdbool.h> +#include <stdint.h> + +#include <kern/atomic.h> #include <kern/init.h> #include <kern/macros.h> #include <kern/spinlock_i.h> @@ -34,7 +39,17 @@ struct spinlock; -#define spinlock_assert_locked(lock) assert((lock)->value != SPINLOCK_UNLOCKED) +/* TODO Remove, let users do it instead */ +#define spinlock_assert_locked(lock) assert(spinlock_locked(lock)) + +static inline bool +spinlock_locked(const struct spinlock *lock) +{ + uint32_t value; + + value = atomic_load(&lock->value, ATOMIC_RELAXED); + return value != SPINLOCK_UNLOCKED; +} #ifdef SPINLOCK_TRACK_OWNER diff --git a/kern/spinlock_i.h b/kern/spinlock_i.h index cd1bd368..da1233b4 100644 --- a/kern/spinlock_i.h +++ b/kern/spinlock_i.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2017 Richard Braun. + * Copyright (c) 2012-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -30,12 +30,12 @@ #include <machine/cpu.h> /* - * Non-contended lock values. + * Uncontended lock values. * - * Any other lock value implies a contended lock. + * Any other value implies a contended lock. */ -#define SPINLOCK_UNLOCKED 0 -#define SPINLOCK_LOCKED 1 +#define SPINLOCK_UNLOCKED 0x0 +#define SPINLOCK_LOCKED 0x1 #ifdef SPINLOCK_TRACK_OWNER @@ -63,7 +63,7 @@ spinlock_disown(struct spinlock *lock) static inline int spinlock_lock_fast(struct spinlock *lock) { - unsigned int prev; + uint32_t prev; prev = atomic_cas_acquire(&lock->value, SPINLOCK_UNLOCKED, SPINLOCK_LOCKED); @@ -75,25 +75,8 @@ spinlock_lock_fast(struct spinlock *lock) return 0; } -static inline int -spinlock_unlock_fast(struct spinlock *lock) -{ - unsigned int prev; - - spinlock_disown(lock); - prev = atomic_cas_release(&lock->value, SPINLOCK_LOCKED, SPINLOCK_UNLOCKED); - - if (unlikely(prev != SPINLOCK_LOCKED)) { - return EBUSY; - } - - return 0; -} - void spinlock_lock_slow(struct spinlock *lock); -void spinlock_unlock_slow(struct spinlock *lock); - static inline void spinlock_lock_common(struct spinlock *lock) { @@ -109,13 +92,8 @@ spinlock_lock_common(struct spinlock *lock) static inline void spinlock_unlock_common(struct spinlock *lock) { - int error; - - error = spinlock_unlock_fast(lock); - - if (unlikely(error)) { - spinlock_unlock_slow(lock); - } + spinlock_disown(lock); + atomic_and(&lock->value, ~SPINLOCK_LOCKED, ATOMIC_RELEASE); } #endif /* KERN_SPINLOCK_I_H */ diff --git a/kern/spinlock_types.h b/kern/spinlock_types.h index 301b8933..fd160a5f 100644 --- a/kern/spinlock_types.h +++ b/kern/spinlock_types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 Richard Braun. + * Copyright (c) 2017-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -21,6 +21,8 @@ #ifndef KERN_SPINLOCK_TYPES_H #define KERN_SPINLOCK_TYPES_H +#include <stdint.h> + #ifdef CONFIG_SPINLOCK_DEBUG #define SPINLOCK_TRACK_OWNER #endif @@ -28,7 +30,7 @@ struct thread; struct spinlock { - unsigned int value; + uint32_t value; #ifdef SPINLOCK_TRACK_OWNER struct thread *owner; |