/* * Copyright (c) 2017-2018 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * * This implementation is based on the paper "Algorithms for Scalable * Synchronization on Shared-Memory Multiprocessors" by John M. Mellor-Crummey * and Michael L. Scott, which describes MCS locks, among other algorithms. * * Here are additional issues this module solves that require modifications * to the original MCS algorithm : * - There must not be any limit on the number of spin locks a thread may * hold, and spinlocks must not need dynamic memory allocation. * - Unlocking a spin lock must be a non-blocking operation. Without * this requirement, a locking operation may be interrupted in the * middle of a hand-off sequence, preventing the unlock operation * from completing, potentially causing tricky deadlocks. * - Spin lock storage must not exceed 32 bits. * * In order to solve these issues, the lock owner is never part of the * lock queue. This makes it possible to use a qnode only during the lock * operation, not after. This means a single qnode per execution context * is required even when holding multiple spin locks simultaneously. * * In addition, instead of making the owner perform a hand-off sequence * to unblock the first waiter when unlocking, the latter directly spins * on the lock word, and is the one performing the hand-off sequence with * the second waiter. As a side effect, this also optimizes spinning for * the common case of a single waiter. * * When a lock is held, the lock bit is set, and when a lock is contended * the contended bit is set. When contended, the lock word also contains * a compressed reference to the last waiter. That reference is called a * QID (for qnode ID). It is structured into two parts : * - the execution context * - the CPU ID * * The QID is used to uniquely identify a statically allocated qnode. * * The lock operation must make sure that the lock value is restored * to SPINLOCK_LOCKED if there is no more contention, an operation * called downgrading. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SPINLOCK_CONTENDED 0x2 #define SPINLOCK_LOCKED_BITS 1 #define SPINLOCK_CONTENDED_BITS 1 #define SPINLOCK_QID_SHIFT (SPINLOCK_CONTENDED_BITS \ + SPINLOCK_LOCKED_BITS) #define SPINLOCK_QID_CTX_BITS 1 #define SPINLOCK_QID_CTX_SHIFT 0 #define SPINLOCK_QID_CTX_MASK ((1U << SPINLOCK_QID_CTX_BITS) - 1) #define SPINLOCK_QID_CPU_BITS 29 #define SPINLOCK_QID_CPU_SHIFT (SPINLOCK_QID_CTX_SHIFT \ + SPINLOCK_QID_CTX_BITS) #define SPINLOCK_QID_CPU_MASK ((1U << SPINLOCK_QID_CPU_BITS) - 1) #define SPINLOCK_BITS (SPINLOCK_QID_CPU_BITS \ + SPINLOCK_QID_CTX_BITS \ + SPINLOCK_CONTENDED_BITS \ + SPINLOCK_LOCKED_BITS) #if CONFIG_MAX_CPUS > (1U << SPINLOCK_QID_CPU_BITS) #error "maximum number of supported processors too large" #endif static_assert(SPINLOCK_BITS <= (CHAR_BIT * sizeof(uint32_t)), "spinlock too large"); struct spinlock_qnode { alignas(CPU_L1_SIZE) struct spinlock_qnode *next; bool locked; }; /* TODO NMI support */ enum { SPINLOCK_CTX_THREAD, SPINLOCK_CTX_INTR, SPINLOCK_NR_CTXS }; static_assert(SPINLOCK_NR_CTXS <= (SPINLOCK_QID_CTX_MASK + 1), "maximum number of contexts too large"); struct spinlock_cpu_data { struct spinlock_qnode qnodes[SPINLOCK_NR_CTXS]; }; static struct spinlock_cpu_data spinlock_cpu_data __percpu; static struct spinlock_qnode * spinlock_cpu_data_get_qnode(struct spinlock_cpu_data *cpu_data, unsigned int ctx) { assert(ctx < ARRAY_SIZE(cpu_data->qnodes)); return &cpu_data->qnodes[ctx]; } static uint32_t spinlock_qid_build(unsigned int ctx, unsigned int cpu) { assert(ctx <= SPINLOCK_QID_CTX_MASK); assert(cpu <= SPINLOCK_QID_CPU_MASK); return (cpu << SPINLOCK_QID_CPU_SHIFT) | (ctx << SPINLOCK_QID_CTX_SHIFT); } static unsigned int spinlock_qid_ctx(uint32_t qid) { return (qid >> SPINLOCK_QID_CTX_SHIFT) & SPINLOCK_QID_CTX_MASK; } static unsigned int spinlock_qid_cpu(uint32_t qid) { return (qid >> SPINLOCK_QID_CPU_SHIFT) & SPINLOCK_QID_CPU_MASK; } void spinlock_init(struct spinlock *lock) { lock->value = SPINLOCK_UNLOCKED; #ifdef SPINLOCK_TRACK_OWNER lock->owner = NULL; #endif /* SPINLOCK_TRACK_OWNER */ } static void spinlock_qnode_init(struct spinlock_qnode *qnode) { qnode->next = NULL; } static struct spinlock_qnode * spinlock_qnode_wait_next(const struct spinlock_qnode *qnode) { struct spinlock_qnode *next; for (;;) { next = atomic_load(&qnode->next, ATOMIC_ACQUIRE); if (next) { break; } cpu_pause(); } return next; } static void spinlock_qnode_set_next(struct spinlock_qnode *qnode, struct spinlock_qnode *next) { assert(next); atomic_store(&qnode->next, next, ATOMIC_RELEASE); } static void spinlock_qnode_set_locked(struct spinlock_qnode *qnode) { qnode->locked = true; } static void spinlock_qnode_wait_locked(const struct spinlock_qnode *qnode) { bool locked; for (;;) { locked = atomic_load(&qnode->locked, ATOMIC_ACQUIRE); if (!locked) { break; } cpu_pause(); } } static void spinlock_qnode_clear_locked(struct spinlock_qnode *qnode) { atomic_store(&qnode->locked, false, ATOMIC_RELEASE); } static void spinlock_get_local_qnode(struct spinlock_qnode **qnode, uint32_t *qid) { struct spinlock_cpu_data *cpu_data; unsigned int ctx; cpu_data = cpu_local_ptr(spinlock_cpu_data); ctx = thread_interrupted() ? SPINLOCK_CTX_INTR : SPINLOCK_CTX_THREAD; *qnode = spinlock_cpu_data_get_qnode(cpu_data, ctx); *qid = spinlock_qid_build(ctx, cpu_id()); } static uint32_t spinlock_enqueue(struct spinlock *lock, uint32_t qid) { uint32_t old_value, new_value, prev, next; next = (qid << SPINLOCK_QID_SHIFT) | SPINLOCK_CONTENDED; for (;;) { old_value = atomic_load(&lock->value, ATOMIC_RELAXED); new_value = next | (old_value & SPINLOCK_LOCKED); prev = atomic_cas(&lock->value, old_value, new_value, ATOMIC_RELEASE); if (prev == old_value) { break; } cpu_pause(); } return prev; } static struct spinlock_qnode * spinlock_get_remote_qnode(uint32_t qid) { struct spinlock_cpu_data *cpu_data; unsigned int ctx, cpu; /* This fence synchronizes with queueing */ atomic_fence(ATOMIC_ACQUIRE); ctx = spinlock_qid_ctx(qid); cpu = spinlock_qid_cpu(qid); cpu_data = percpu_ptr(spinlock_cpu_data, cpu); return spinlock_cpu_data_get_qnode(cpu_data, ctx); } static void spinlock_set_locked(struct spinlock *lock) { atomic_or(&lock->value, SPINLOCK_LOCKED, ATOMIC_RELAXED); } static void spinlock_wait_locked(const struct spinlock *lock) { uint32_t value; for (;;) { value = atomic_load(&lock->value, ATOMIC_ACQUIRE); if (!(value & SPINLOCK_LOCKED)) { break; } cpu_pause(); } } static int spinlock_downgrade(struct spinlock *lock, uint32_t qid) { uint32_t value, prev; value = (qid << SPINLOCK_QID_SHIFT) | SPINLOCK_CONTENDED; prev = atomic_cas(&lock->value, value, SPINLOCK_LOCKED, ATOMIC_RELAXED); assert(prev & SPINLOCK_CONTENDED); if (prev != value) { return EBUSY; } return 0; } void spinlock_lock_slow(struct spinlock *lock) { struct spinlock_qnode *qnode, *prev_qnode, *next_qnode; uint32_t prev, qid; int error; spinlock_get_local_qnode(&qnode, &qid); spinlock_qnode_init(qnode); prev = spinlock_enqueue(lock, qid); if (prev & SPINLOCK_CONTENDED) { prev_qnode = spinlock_get_remote_qnode(prev >> SPINLOCK_QID_SHIFT); spinlock_qnode_set_locked(qnode); spinlock_qnode_set_next(prev_qnode, qnode); spinlock_qnode_wait_locked(qnode); } /* * If uncontended, the previous lock value could be used to check whether * the lock bit was also cleared, but this wait operation also enforces * acquire ordering. */ spinlock_wait_locked(lock); spinlock_own(lock); error = spinlock_downgrade(lock, qid); if (!error) { return; } spinlock_set_locked(lock); next_qnode = spinlock_qnode_wait_next(qnode); spinlock_qnode_clear_locked(next_qnode); } static int __init spinlock_setup(void) { return 0; } INIT_OP_DEFINE(spinlock_setup, INIT_OP_DEP(thread_setup_booter, true));