diff options
author | Richard Braun <rbraun@sceen.net> | 2017-04-03 22:53:21 +0200 |
---|---|---|
committer | Richard Braun <rbraun@sceen.net> | 2017-04-03 23:01:07 +0200 |
commit | 7933e1b8c746bcc9e972a8103a5a9b3a6db1c794 (patch) | |
tree | a9a0f83c77a0eb6c4fb11f8befa368b19258b216 | |
parent | 15c64dfeffe5107f8e8af89a994f8ea050505dce (diff) |
kern/spinlock: new fair and scalable implementation
This new implementation, based on the MCS locks, provides rigorous
fairness and excellent scalability.
-rw-r--r-- | Makefrag.am | 1 | ||||
-rw-r--r-- | kern/spinlock.c | 322 | ||||
-rw-r--r-- | kern/spinlock.h | 84 | ||||
-rw-r--r-- | kern/spinlock_i.h | 65 | ||||
-rw-r--r-- | kern/spinlock_types.h | 2 |
5 files changed, 439 insertions, 35 deletions
diff --git a/Makefrag.am b/Makefrag.am index d99e9597..4cecd3f9 100644 --- a/Makefrag.am +++ b/Makefrag.am @@ -62,6 +62,7 @@ x15_SOURCES += \ kern/semaphore_i.h \ kern/sleepq.c \ kern/sleepq.h \ + kern/spinlock.c \ kern/spinlock.h \ kern/spinlock_i.h \ kern/spinlock_types.h \ diff --git a/kern/spinlock.c b/kern/spinlock.c new file mode 100644 index 00000000..948d93cf --- /dev/null +++ b/kern/spinlock.c @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This implementation is based on the paper "Algorithms for Scalable + * Synchronization on Shared-Memory Multiprocessors" by John M. Mellor-Crummey + * and Michael L. Scott, which describes MCS locks, among other algorithms. + * + * In order to avoid the need to allocate a qnode for every spin lock + * currently held, and also to keep the size of locks to a single 32-bits + * word, this module actually uses a variant of the MCS locks. The + * differences are presented below. + * + * First, the lock owner is never part of the lock queue. This makes it + * possible to use a qnode only during the lock operation, not after. + * This means a single qnode per execution context is required even when + * holding multiple spin locks simultaneously. In order to achieve that, + * a spin lock not only refers to the last waiter, but also to the first, + * which is the owner's qnode->next in the original algorithm. + * + * Next, instead of two pointers, the lock is a single word storing + * compressed references to both the first and last waiters. Those + * references are integers called QIDs, for qnode IDs. They can be + * broken down into 3 parts : + * - the lock bit + * - the execution context + * - the target CPU ID + * + * The layout of a QID is carefully crafted to match the lock values + * expected by the fast paths. Without contention, a QID value must + * be SPINLOCK_UNLOCKED when a lock isn't held, and SPINLOCK_LOCKED + * when it is. Besides, without contention, the reference to the first + * waiter is logically NULL so that the whole lock value matches one + * of SPINLOCK_UNLOCKED or SPINLOCK_LOCKED. This means that the values + * of the execution context and the CPU ID must be 0 in the absence of + * contention. In the presence of contention, the execution context and + * CPU ID are used to uniquely identify a statically allocated qnode, + * and fast paths operations fail. The lock operation must make sure + * that the lock value is restored to SPINLOCK_LOCKED if there is no + * more contention, an operation called downgrading. + */ + +#include <stddef.h> + +#include <kern/assert.h> +#include <kern/error.h> +#include <kern/macros.h> +#include <kern/percpu.h> +#include <kern/spinlock.h> +#include <kern/spinlock_i.h> +#include <kern/spinlock_types.h> +#include <kern/thread.h> +#include <machine/atomic.h> +#include <machine/cpu.h> +#include <machine/mb.h> + +#define SPINLOCK_QID_LOCK_BIT 1 + +#define SPINLOCK_QID_CTX_BITS 2 +#define SPINLOCK_QID_CTX_SHIFT SPINLOCK_QID_LOCK_BIT +#define SPINLOCK_QID_CTX_MAX (1 << SPINLOCK_QID_CTX_BITS) +#define SPINLOCK_QID_CTX_MASK (SPINLOCK_QID_CTX_MAX - 1) + +#define SPINLOCK_QID_CPU_BITS 13 +#define SPINLOCK_QID_CPU_SHIFT (SPINLOCK_QID_CTX_BITS \ + + SPINLOCK_QID_CTX_SHIFT) +#define SPINLOCK_QID_CPU_MAX (1 << SPINLOCK_QID_CPU_BITS) +#define SPINLOCK_QID_CPU_MASK (SPINLOCK_QID_CPU_MAX - 1) + +#define SPINLOCK_QID_BITS (SPINLOCK_QID_CPU_BITS \ + + SPINLOCK_QID_CTX_BITS \ + + SPINLOCK_QID_LOCK_BIT) +#define SPINLOCK_QID_MAX (1 << SPINLOCK_QID_BITS) +#define SPINLOCK_QID_MASK (SPINLOCK_QID_MAX - 1) + +#define SPINLOCK_QID_MAX_BITS 16 + +#define SPINLOCK_QID_NULL SPINLOCK_UNLOCKED +#define SPINLOCK_QID_LOCKED SPINLOCK_LOCKED + +#if SPINLOCK_QID_BITS > SPINLOCK_QID_MAX_BITS +#error spinlock qid too large +#endif + +#if X15_MAX_CPUS > (1 << SPINLOCK_QID_CPU_BITS) +#error maximum number of supported processors too large +#endif + +struct spinlock_qnode { + unsigned int next_qid; + bool locked; +}; + +#define SPINLOCK_CTX_INVALID 0 +#define SPINLOCK_CTX_THREAD 1 +#define SPINLOCK_CTX_INTR 2 +#define SPINLOCK_CTX_NMI 3 +#define SPINLOCK_NR_CTXS 4 + +#if SPINLOCK_CTX_INVALID != 0 +#error the invalid context value must be 0 +#endif + +#if SPINLOCK_NR_CTXS > SPINLOCK_QID_CTX_MAX +#error maximum number of contexts too large +#endif + +struct spinlock_cpu_data { + struct spinlock_qnode qnodes[SPINLOCK_NR_CTXS - 1] __aligned(CPU_L1_SIZE); +}; + +static struct spinlock_cpu_data spinlock_cpu_data __percpu; + +void +spinlock_init(struct spinlock *lock) +{ + lock->value = SPINLOCK_UNLOCKED; +} + +static unsigned int +spinlock_qid2ctx(unsigned int qid) +{ + return (qid >> SPINLOCK_QID_CTX_SHIFT) & SPINLOCK_QID_CTX_MASK; +} + +static unsigned int +spinlock_qid2cpu(unsigned int qid) +{ + return (qid >> SPINLOCK_QID_CPU_SHIFT) & SPINLOCK_QID_CPU_MASK; +} + +static unsigned int +spinlock_build_qid(unsigned int cpu, unsigned int ctx) +{ + return (cpu << SPINLOCK_QID_CPU_SHIFT) + | (ctx << SPINLOCK_QID_CTX_SHIFT) + | SPINLOCK_QID_LOCK_BIT; +} + +static void +spinlock_get_local_qnode(struct spinlock_qnode **qnodep, unsigned int *qidp) +{ + struct spinlock_cpu_data *cpu_data; + unsigned int ctx; + + cpu_data = cpu_local_ptr(spinlock_cpu_data); + + /* TODO NMI support */ + ctx = thread_interrupted() ? SPINLOCK_CTX_INTR : SPINLOCK_CTX_THREAD; + *qnodep = &cpu_data->qnodes[ctx - 1]; + *qidp = spinlock_build_qid(cpu_id(), ctx); +} + +static struct spinlock_qnode * +spinlock_get_remote_qnode(unsigned int qid) +{ + struct spinlock_cpu_data *cpu_data; + unsigned int ctx, cpu; + + ctx = spinlock_qid2ctx(qid); + + if (ctx == SPINLOCK_CTX_INVALID) { + return NULL; + } + + ctx--; + assert(ctx < ARRAY_SIZE(cpu_data->qnodes)); + + cpu = spinlock_qid2cpu(qid); + cpu_data = percpu_ptr(spinlock_cpu_data, cpu); + return &cpu_data->qnodes[ctx]; +} + +static void +spinlock_store_first_qid(struct spinlock *lock, unsigned int newqid) +{ + unsigned int oldval, newval, prev; + + assert(newqid < SPINLOCK_QID_MAX); + + newqid <<= SPINLOCK_QID_MAX_BITS; + + do { + oldval = read_once(lock->value); + newval = newqid | (oldval & SPINLOCK_QID_MASK); + prev = atomic_cas_uint(&lock->value, oldval, newval); + } while (prev != oldval); +} + +static unsigned int +spinlock_load_first_qid(const struct spinlock *lock) +{ + unsigned int value; + + value = read_once(lock->value); + return (value >> SPINLOCK_QID_MAX_BITS) & SPINLOCK_QID_MASK; +} + +static unsigned int +spinlock_swap_last_qid(struct spinlock *lock, unsigned int newqid) +{ + unsigned int oldval, newval, prev; + + assert(newqid < SPINLOCK_QID_MAX); + + do { + oldval = read_once(lock->value); + newval = (oldval & (SPINLOCK_QID_MASK << SPINLOCK_QID_MAX_BITS)) + | newqid; + prev = atomic_cas_uint(&lock->value, oldval, newval); + } while (prev != oldval); + + return prev & SPINLOCK_QID_MASK; +} + +static unsigned int +spinlock_try_downgrade(struct spinlock *lock, unsigned int oldqid) +{ + unsigned int prev; + + prev = atomic_cas_uint(&lock->value, oldqid, SPINLOCK_QID_LOCKED); + + assert((prev >> SPINLOCK_QID_MAX_BITS) == 0); + assert(prev != SPINLOCK_QID_NULL); + + if (prev != oldqid) { + return ERROR_BUSY; + } + + return 0; +} + +void +spinlock_lock_slow(struct spinlock *lock) +{ + struct spinlock_qnode *qnode, *prev_qnode; + unsigned int qid, prev_qid, next_qid, ctx; + bool locked; + int error; + + spinlock_get_local_qnode(&qnode, &qid); + qnode->next_qid = SPINLOCK_QID_NULL; + + prev_qid = spinlock_swap_last_qid(lock, qid); + + if (prev_qid != SPINLOCK_QID_NULL) { + qnode->locked = true; + ctx = spinlock_qid2ctx(prev_qid); + + if (ctx == SPINLOCK_CTX_INVALID) { + spinlock_store_first_qid(lock, qid); + } else { + prev_qnode = spinlock_get_remote_qnode(prev_qid); + write_once(prev_qnode->next_qid, qid); + } + + for (;;) { + locked = read_once(qnode->locked); + mb_load(); + + if (!locked) { + break; + } + + cpu_pause(); + } + + spinlock_store_first_qid(lock, SPINLOCK_QID_NULL); + } + + error = spinlock_try_downgrade(lock, qid); + + if (!error) { + return; + } + + for (;;) { + next_qid = read_once(qnode->next_qid); + + if (next_qid != SPINLOCK_QID_NULL) { + break; + } + + cpu_pause(); + } + + spinlock_store_first_qid(lock, next_qid); +} + +void +spinlock_unlock_slow(struct spinlock *lock) +{ + struct spinlock_qnode *next_qnode; + unsigned int next_qid; + + for (;;) { + next_qid = spinlock_load_first_qid(lock); + + if (next_qid != SPINLOCK_QID_NULL) { + break; + } + + cpu_pause(); + } + + mb_store(); + next_qnode = spinlock_get_remote_qnode(next_qid); + write_once(next_qnode->locked, false); +} diff --git a/kern/spinlock.h b/kern/spinlock.h index b165e759..62974922 100644 --- a/kern/spinlock.h +++ b/kern/spinlock.h @@ -15,16 +15,17 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. * * - * Spin lock. + * Spin locks. * * Critical sections built with spin locks run with preemption disabled. + * + * This module provides fair spin locks which guarantee time-bounded lock + * acquisition depending only on the number of contending processors. */ #ifndef _KERN_SPINLOCK_H #define _KERN_SPINLOCK_H -#include <kern/assert.h> -#include <kern/macros.h> #include <kern/spinlock_i.h> #include <kern/spinlock_types.h> #include <kern/thread.h> @@ -32,21 +33,27 @@ struct spinlock; -static inline void -spinlock_init(struct spinlock *lock) -{ - lock->locked = 0; -} +#define spinlock_assert_locked(lock) assert((lock)->value != SPINLOCK_UNLOCKED) -#define spinlock_assert_locked(lock) assert((lock)->locked) +/* + * Initialize a spin lock. + */ +void spinlock_init(struct spinlock *lock); +/* + * Attempt to lock the given spin lock. + * + * Return 0 on success, ERROR_BUSY if the spin lock is already locked. + * + * Preemption is disabled on success. + */ static inline int spinlock_trylock(struct spinlock *lock) { int error; thread_preempt_disable(); - error = spinlock_tryacquire(lock); + error = spinlock_lock_fast(lock); if (error) { thread_preempt_enable(); @@ -55,17 +62,35 @@ spinlock_trylock(struct spinlock *lock) return error; } +/* + * Lock a spin lock. + * + * If the spin lock is already locked, the calling thread spins until the + * spin lock is unlocked. + * + * A spin lock can only be locked once. + * + * This function disables preemption. + */ static inline void spinlock_lock(struct spinlock *lock) { thread_preempt_disable(); - spinlock_acquire(lock); + spinlock_lock_common(lock); } +/* + * Unlock a spin lock. + * + * The spin lock must be locked, and must have been locked on the same + * processor it is unlocked on. + * + * This function may reenable preemption. + */ static inline void spinlock_unlock(struct spinlock *lock) { - spinlock_release(lock); + spinlock_unlock_common(lock); thread_preempt_enable(); } @@ -74,6 +99,15 @@ spinlock_unlock(struct spinlock *lock) * critical sections. */ +/* + * Attempt to lock the given spin lock. + * + * Return 0 on success, ERROR_BUSY if the spin lock is already locked. + * + * Preemption and interrupts are disabled on success, in which case the + * flags passed by the caller are filled with the previous value of the + * CPU flags. + */ static inline int spinlock_trylock_intr_save(struct spinlock *lock, unsigned long *flags) { @@ -81,7 +115,7 @@ spinlock_trylock_intr_save(struct spinlock *lock, unsigned long *flags) thread_preempt_disable(); cpu_intr_save(flags); - error = spinlock_tryacquire(lock); + error = spinlock_lock_fast(lock); if (error) { cpu_intr_restore(*flags); @@ -91,18 +125,38 @@ spinlock_trylock_intr_save(struct spinlock *lock, unsigned long *flags) return error; } +/* + * Lock a spin lock. + * + * If the spin lock is already locked, the calling thread spins until the + * spin lock is unlocked. + * + * A spin lock can only be locked once. + * + * This function disables preemption and interrupts. The flags passed by + * the caller are filled with the previous value of the CPU flags. + */ static inline void spinlock_lock_intr_save(struct spinlock *lock, unsigned long *flags) { thread_preempt_disable(); cpu_intr_save(flags); - spinlock_acquire(lock); + spinlock_lock_common(lock); } +/* + * Unlock a spin lock. + * + * The spin lock must be locked, and must have been locked on the same + * processor it is unlocked on. + * + * This function may reenable preemption and interrupts, using the given + * flags which must have been obtained with a lock or trylock operation. + */ static inline void spinlock_unlock_intr_restore(struct spinlock *lock, unsigned long flags) { - spinlock_release(lock); + spinlock_unlock_common(lock); cpu_intr_restore(flags); thread_preempt_enable(); } diff --git a/kern/spinlock_i.h b/kern/spinlock_i.h index 88cb89e4..69de0b01 100644 --- a/kern/spinlock_i.h +++ b/kern/spinlock_i.h @@ -18,49 +18,76 @@ #ifndef _KERN_SPINLOCK_I_H #define _KERN_SPINLOCK_I_H -#include <kern/assert.h> +#include <stddef.h> +#include <stdint.h> + #include <kern/error.h> #include <kern/spinlock_types.h> #include <machine/atomic.h> #include <machine/cpu.h> +/* + * Non-contended lock values. + * + * Any other lock value implies a contended lock. + */ +#define SPINLOCK_UNLOCKED 0 +#define SPINLOCK_LOCKED 1 + static inline int -spinlock_tryacquire(struct spinlock *lock) +spinlock_lock_fast(struct spinlock *lock) { - unsigned int state; + unsigned int prev; - state = atomic_swap_uint(&lock->locked, 1); + prev = atomic_cas_uint(&lock->value, SPINLOCK_UNLOCKED, SPINLOCK_LOCKED); - if (state == 0) { - return 0; + if (prev != SPINLOCK_UNLOCKED) { + return ERROR_BUSY; } - return ERROR_BUSY; + return 0; } +static inline int +spinlock_unlock_fast(struct spinlock *lock) +{ + unsigned int prev; + + prev = atomic_cas_uint(&lock->value, SPINLOCK_LOCKED, SPINLOCK_UNLOCKED); + + if (prev != SPINLOCK_LOCKED) { + return ERROR_BUSY; + } + + return 0; +} + +void spinlock_lock_slow(struct spinlock *lock); + +void spinlock_unlock_slow(struct spinlock *lock); + static inline void -spinlock_acquire(struct spinlock *lock) +spinlock_lock_common(struct spinlock *lock) { int error; - for (;;) { - error = spinlock_tryacquire(lock); + error = spinlock_lock_fast(lock); - if (!error) { - break; - } - - cpu_pause(); + if (error) { + spinlock_lock_slow(lock); } } static inline void -spinlock_release(struct spinlock *lock) +spinlock_unlock_common(struct spinlock *lock) { - unsigned int locked; + int error; - locked = atomic_swap_uint(&lock->locked, 0); - assert(locked); + error = spinlock_unlock_fast(lock); + + if (error) { + spinlock_unlock_slow(lock); + } } #endif /* _KERN_SPINLOCK_I_H */ diff --git a/kern/spinlock_types.h b/kern/spinlock_types.h index f5a03866..7dbca377 100644 --- a/kern/spinlock_types.h +++ b/kern/spinlock_types.h @@ -22,7 +22,7 @@ #define _KERN_SPINLOCK_TYPES_H struct spinlock { - unsigned int locked; + unsigned int value; }; #endif /* _KERN_SPINLOCK_TYPES_H */ |