/*
* Copyright (c) 2017 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
*
* This implementation is based on "Solaris(tm) Internals: Solaris 10 and
* OpenSolaris Kernel Architecture, Second Edition" by Richard McDougall
* and Jim Mauro. See "Part Six: Platform Specifics, Chapter 17: Locking
* and Synchronization, Section 7 Turnstiles and Priority Inheritance".
*
* The differences are outlined below.
*
* This implementation doesn't support read/write locks, only mutual
* exclusion, because ownership doesn't apply well to read/write locks.
*
* Instead of using an external sleep queue object, this module implements
* that functionality itself. The reasons behind this decision are :
* - the use of expensive priority lists used to queue threads, that
* a simpler sleep queue implementation shouldn't use
* - increased coupling with the scheduler
*
* Locking order : bucket (turnstile) -> turnstile_td -> thread_runq
*
* This order allows changing the owner of a turnstile without unlocking it
* which is important because a turnstile is normally used to synchronize
* access to the owner. Unlocking a turnstile would allow the owner to
* change and make complex transient states visible. The drawback is that
* a thread cannot be requeued atomically when its priority is changed.
* That deferred requeue is done during priority propagation.
*
* TODO Analyse hash parameters.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/*
* Locking keys :
* (b) bucket
*/
struct turnstile_bucket {
alignas(CPU_L1_SIZE) struct spinlock lock;
struct hlist turnstiles; /* (b) */
};
/*
* Adding/removing waiters to/from a turnstile are performed while
* holding both the waiter's thread data and the turnstile locks.
*
* Changing the owner of a turnstile and linking/unlinking the turnstile
* into/from the owner's list of owned turnstiles are done atomically,
* while holding both the owner's thread data and the turnstile locks.
*
* Locking keys :
* (b) bucket
* (t) turnstile_td
*
* (*) A turnstile is referenced in thread data after/before being
* added/removed to/from its bucket. Referencing a turnstile in
* thread data requires holding the thread data lock. This implies
* that, while holding the thread data lock, if the referenced
* turnstile isn't NULL, the bucket pointer is also not NULL and
* stable.
*/
struct turnstile {
struct turnstile_bucket *bucket; /* (b*) */
struct hlist_node node; /* (b) */
const void *sync_obj; /* (b) */
struct plist waiters; /* (b,t) */
struct turnstile *next_free; /* (b) */
struct turnstile_waiter *top_waiter; /* (b,t) */
struct thread *owner; /* (b,t) */
struct plist_node td_node; /* (t) */
};
/*
* Locking keys :
* (b) bucket
* (t) turnstile_td
*/
struct turnstile_waiter {
struct plist_node node; /* (b,t) */
struct thread *thread; /* (b,t) */
bool awaken; /* (b) */
};
#define TURNSTILE_HTABLE_SIZE 128
#if !ISP2(TURNSTILE_HTABLE_SIZE)
#error "hash table size must be a power of two"
#endif /* !ISP2(TURNSTILE_HTABLE_SIZE) */
#define TURNSTILE_HTABLE_MASK (TURNSTILE_HTABLE_SIZE - 1)
static struct turnstile_bucket turnstile_htable[TURNSTILE_HTABLE_SIZE];
static struct kmem_cache turnstile_cache;
static uintptr_t
turnstile_hash(const void *addr)
{
return ((uintptr_t)addr >> 8) ^ (uintptr_t)addr;
}
static void
turnstile_waiter_init(struct turnstile_waiter *waiter, struct thread *thread)
{
plist_node_init(&waiter->node, thread_real_global_priority(thread));
waiter->thread = thread;
waiter->awaken = false;
}
static unsigned int
turnstile_waiter_priority(const struct turnstile_waiter *waiter)
{
return plist_node_priority(&waiter->node);
}
static bool
turnstile_waiter_awaken(const struct turnstile_waiter *waiter)
{
return waiter->awaken;
}
static void
turnstile_waiter_set_awaken(struct turnstile_waiter *waiter)
{
waiter->awaken = true;
}
static void
turnstile_waiter_clear_awaken(struct turnstile_waiter *waiter)
{
waiter->awaken = false;
}
static void
turnstile_waiter_wakeup(struct turnstile_waiter *waiter)
{
if (turnstile_waiter_awaken(waiter)) {
return;
}
thread_wakeup(waiter->thread);
turnstile_waiter_set_awaken(waiter);
}
static void
turnstile_update_top_waiter(struct turnstile *turnstile)
{
if (turnstile_empty(turnstile)) {
turnstile->top_waiter = NULL;
return;
}
turnstile->top_waiter = plist_last_entry(&turnstile->waiters,
struct turnstile_waiter, node);
}
static void
turnstile_add_waiter(struct turnstile *turnstile,
struct turnstile_waiter *waiter)
{
assert(!turnstile_waiter_awaken(waiter));
plist_add(&turnstile->waiters, &waiter->node);
turnstile_update_top_waiter(turnstile);
}
static void
turnstile_remove_waiter(struct turnstile *turnstile,
struct turnstile_waiter *waiter)
{
plist_remove(&turnstile->waiters, &waiter->node);
turnstile_update_top_waiter(turnstile);
}
static void
turnstile_requeue_waiter(struct turnstile *turnstile,
struct turnstile_waiter *waiter)
{
unsigned int global_priority;
global_priority = thread_real_global_priority(waiter->thread);
assert(global_priority != plist_node_priority(&waiter->node));
plist_remove(&turnstile->waiters, &waiter->node);
plist_node_set_priority(&waiter->node, global_priority);
plist_add(&turnstile->waiters, &waiter->node);
turnstile_update_top_waiter(turnstile);
}
static void
turnstile_td_set_waiter(struct turnstile_td *td,
struct turnstile_waiter *waiter)
{
td->waiter = waiter;
}
static struct turnstile_waiter *
turnstile_td_get_waiter(const struct turnstile_td *td)
{
return td->waiter;
}
static void
turnstile_td_update_top_priority(struct turnstile_td *td)
{
struct turnstile_waiter *top_waiter;
struct turnstile *top_turnstile;
if (plist_empty(&td->owned_turnstiles)) {
td->top_global_priority = 0;
return;
}
top_turnstile = plist_last_entry(&td->owned_turnstiles,
struct turnstile, td_node);
top_waiter = top_turnstile->top_waiter;
if (top_waiter == NULL) {
td->top_global_priority = 0;
} else {
td->top_global_priority = turnstile_waiter_priority(top_waiter);
td->top_sched_policy = thread_real_sched_policy(top_waiter->thread);
td->top_priority = thread_real_priority(top_waiter->thread);
}
}
static void
turnstile_td_own(struct turnstile_td *td, struct turnstile *turnstile)
{
struct turnstile_waiter *top_waiter;
unsigned int top_priority;
assert(turnstile->owner == NULL);
top_waiter = turnstile->top_waiter;
assert(top_waiter != NULL);
top_priority = thread_real_global_priority(top_waiter->thread);
plist_node_init(&turnstile->td_node, top_priority);
plist_add(&td->owned_turnstiles, &turnstile->td_node);
turnstile_td_update_top_priority(td);
turnstile->owner = structof(td, struct thread, turnstile_td);
}
static void
turnstile_td_disown(struct turnstile_td *td, struct turnstile *turnstile)
{
assert(turnstile->owner == structof(td, struct thread, turnstile_td));
assert(!plist_node_unlinked(&turnstile->td_node));
plist_remove(&td->owned_turnstiles, &turnstile->td_node);
turnstile_td_update_top_priority(td);
turnstile->owner = NULL;
}
/*
* A turnstile must be "reowned" whenever its top waiter has changed.
*/
static void
turnstile_td_reown(struct turnstile_td *td, struct turnstile *turnstile)
{
assert(turnstile->owner == structof(td, struct thread, turnstile_td));
assert(!plist_node_unlinked(&turnstile->td_node));
plist_remove(&td->owned_turnstiles, &turnstile->td_node);
turnstile->owner = NULL;
turnstile_td_own(td, turnstile);
}
/*
* The caller must hold the turnstile thread data lock and no turnstile
* locks when calling this function. The thread data are unlocked on return.
*
* In addition, this function drops a reference on the thread associated
* with the given thread data.
*/
static void
turnstile_td_propagate_priority_loop(struct turnstile_td *td)
{
unsigned int user_priority, real_priority, top_priority;
struct turnstile_waiter *waiter;
struct turnstile *turnstile;
struct thread *thread;
unsigned short priority;
unsigned char policy;
int error;
/*
* At the very least, this function must make sure that :
* - the given thread has its intended priority, which is the
* highest among its own and all the waiters in the turnstiles
* it owns, and
* - the thread is at its intended position in the turnstile it's
* waiting on, if any.
*/
for (;;) {
thread = structof(td, struct thread, turnstile_td);
user_priority = thread_user_global_priority(thread);
real_priority = thread_real_global_priority(thread);
top_priority = td->top_global_priority;
if (top_priority > user_priority) {
policy = td->top_sched_policy;
priority = td->top_priority;
} else {
top_priority = user_priority;
policy = thread_user_sched_policy(thread);
priority = thread_user_priority(thread);
}
if (top_priority != real_priority) {
thread_pi_setscheduler(thread, policy, priority);
}
waiter = turnstile_td_get_waiter(td);
if ((waiter == NULL)
|| (top_priority == turnstile_waiter_priority(waiter))) {
spinlock_unlock(&td->lock);
thread_unref(thread);
return;
}
turnstile = turnstile_td_get_turnstile(td);
assert(turnstile != NULL);
error = spinlock_trylock(&turnstile->bucket->lock);
if (error) {
spinlock_unlock(&td->lock);
spinlock_lock(&td->lock);
continue;
}
/*
* This couldn't be done while changing the thread's priority
* because of locking restrictions. Do it now.
*/
turnstile_requeue_waiter(turnstile, waiter);
spinlock_unlock(&td->lock);
thread_unref(thread);
thread = turnstile->owner;
if (thread == NULL) {
break;
}
td = thread_turnstile_td(thread);
thread_ref(thread);
spinlock_lock(&td->lock);
turnstile_td_reown(td, turnstile);
spinlock_unlock(&turnstile->bucket->lock);
}
spinlock_unlock(&turnstile->bucket->lock);
}
void
turnstile_td_propagate_priority(struct turnstile_td *td)
{
struct thread *thread;
thread = structof(td, struct thread, turnstile_td);
thread_ref(thread);
spinlock_lock(&td->lock);
turnstile_td_propagate_priority_loop(td);
}
static void
turnstile_assert_init_state(const struct turnstile *turnstile)
{
assert(turnstile->bucket == NULL);
assert(turnstile->sync_obj == NULL);
assert(plist_empty(&turnstile->waiters));
assert(turnstile->next_free == NULL);
assert(turnstile->top_waiter == NULL);
}
static void
turnstile_use(struct turnstile *turnstile, const void *sync_obj)
{
assert(turnstile->sync_obj == NULL);
turnstile->sync_obj = sync_obj;
}
static void
turnstile_unuse(struct turnstile *turnstile)
{
assert(turnstile->sync_obj != NULL);
turnstile->sync_obj = NULL;
}
static bool
turnstile_in_use(const struct turnstile *turnstile)
{
return turnstile->sync_obj != NULL;
}
static bool
turnstile_in_use_by(const struct turnstile *turnstile, const void *sync_obj)
{
return turnstile->sync_obj == sync_obj;
}
static void
turnstile_bucket_init(struct turnstile_bucket *bucket)
{
spinlock_init(&bucket->lock);
hlist_init(&bucket->turnstiles);
}
static struct turnstile_bucket *
turnstile_bucket_get(const void *sync_obj)
{
uintptr_t index;
index = turnstile_hash(sync_obj) & TURNSTILE_HTABLE_MASK;
assert(index < ARRAY_SIZE(turnstile_htable));
return &turnstile_htable[index];
}
static void
turnstile_bucket_add(struct turnstile_bucket *bucket,
struct turnstile *turnstile)
{
assert(turnstile->bucket == NULL);
turnstile->bucket = bucket;
hlist_insert_head(&bucket->turnstiles, &turnstile->node);
}
static void
turnstile_bucket_remove(struct turnstile_bucket *bucket,
struct turnstile *turnstile)
{
assert(turnstile->bucket == bucket);
turnstile->bucket = NULL;
hlist_remove(&turnstile->node);
}
static struct turnstile *
turnstile_bucket_lookup(const struct turnstile_bucket *bucket,
const void *sync_obj)
{
struct turnstile *turnstile;
hlist_for_each_entry(&bucket->turnstiles, turnstile, node) {
if (turnstile_in_use_by(turnstile, sync_obj)) {
return turnstile;
}
}
return NULL;
}
static void
turnstile_ctor(void *ptr)
{
struct turnstile *turnstile;
turnstile = ptr;
turnstile->bucket = NULL;
turnstile->sync_obj = NULL;
plist_init(&turnstile->waiters);
turnstile->next_free = NULL;
turnstile->top_waiter = NULL;
turnstile->owner = NULL;
}
static int __init
turnstile_setup(void)
{
unsigned int i;
for (i = 0; i < ARRAY_SIZE(turnstile_htable); i++) {
turnstile_bucket_init(&turnstile_htable[i]);
}
kmem_cache_init(&turnstile_cache, "turnstile", sizeof(struct turnstile),
CPU_L1_SIZE, turnstile_ctor, 0);
return 0;
}
INIT_OP_DEFINE(turnstile_setup,
INIT_OP_DEP(kmem_setup, true));
struct turnstile *
turnstile_create(void)
{
struct turnstile *turnstile;
turnstile = kmem_cache_alloc(&turnstile_cache);
if (turnstile == NULL) {
return NULL;
}
turnstile_assert_init_state(turnstile);
return turnstile;
}
void
turnstile_destroy(struct turnstile *turnstile)
{
turnstile_assert_init_state(turnstile);
kmem_cache_free(&turnstile_cache, turnstile);
}
struct turnstile *
turnstile_acquire(const void *sync_obj)
{
struct turnstile_bucket *bucket;
struct turnstile *turnstile;
assert(sync_obj != NULL);
bucket = turnstile_bucket_get(sync_obj);
spinlock_lock(&bucket->lock);
turnstile = turnstile_bucket_lookup(bucket, sync_obj);
if (turnstile == NULL) {
spinlock_unlock(&bucket->lock);
return NULL;
}
return turnstile;
}
void
turnstile_release(struct turnstile *turnstile)
{
spinlock_unlock(&turnstile->bucket->lock);
}
static void
turnstile_push_free(struct turnstile *turnstile,
struct turnstile *free_turnstile)
{
assert(free_turnstile->next_free == NULL);
free_turnstile->next_free = turnstile->next_free;
turnstile->next_free = free_turnstile;
}
static struct turnstile *
turnstile_pop_free(struct turnstile *turnstile)
{
struct turnstile *free_turnstile;
free_turnstile = turnstile->next_free;
if (free_turnstile == NULL) {
return NULL;
}
turnstile->next_free = free_turnstile->next_free;
free_turnstile->next_free = NULL;
return free_turnstile;
}
struct turnstile *
turnstile_lend(const void *sync_obj)
{
struct turnstile_bucket *bucket;
struct turnstile *turnstile, *prev;
struct turnstile_td *td;
assert(sync_obj != NULL);
turnstile = thread_turnstile_lend();
turnstile_assert_init_state(turnstile);
td = thread_turnstile_td(thread_self());
bucket = turnstile_bucket_get(sync_obj);
spinlock_lock(&bucket->lock);
prev = turnstile_bucket_lookup(bucket, sync_obj);
if (prev == NULL) {
turnstile_use(turnstile, sync_obj);
turnstile_bucket_add(bucket, turnstile);
} else {
turnstile_push_free(prev, turnstile);
turnstile = prev;
}
spinlock_lock(&td->lock);
turnstile_td_set_turnstile(td, turnstile);
spinlock_unlock(&td->lock);
return turnstile;
}
void
turnstile_return(struct turnstile *turnstile)
{
struct turnstile_bucket *bucket;
struct turnstile *free_turnstile;
struct turnstile_td *td;
assert(turnstile_in_use(turnstile));
td = thread_turnstile_td(thread_self());
spinlock_lock(&td->lock);
turnstile_td_set_turnstile(td, NULL);
spinlock_unlock(&td->lock);
bucket = turnstile->bucket;
free_turnstile = turnstile_pop_free(turnstile);
if (free_turnstile == NULL) {
turnstile_bucket_remove(bucket, turnstile);
turnstile_unuse(turnstile);
free_turnstile = turnstile;
}
spinlock_unlock(&bucket->lock);
turnstile_assert_init_state(free_turnstile);
thread_turnstile_return(free_turnstile);
}
bool
turnstile_empty(const struct turnstile *turnstile)
{
return plist_empty(&turnstile->waiters);
}
static void
turnstile_update_owner(struct turnstile *turnstile, struct thread *owner)
{
struct turnstile_td *td;
assert(owner != NULL);
assert((turnstile->owner == NULL) || (turnstile->owner == owner));
td = thread_turnstile_td(owner);
thread_ref(owner);
spinlock_lock(&td->lock);
if (turnstile_empty(turnstile)) {
if (turnstile->owner != NULL) {
turnstile_td_disown(td, turnstile);
}
} else if (turnstile->owner == NULL) {
turnstile_td_own(td, turnstile);
} else {
turnstile_td_reown(td, turnstile);
}
spinlock_unlock(&turnstile->bucket->lock);
turnstile_td_propagate_priority_loop(td);
spinlock_lock(&turnstile->bucket->lock);
}
static int
turnstile_wait_common(struct turnstile *turnstile, const char *wchan,
struct thread *owner, bool timed, uint64_t ticks)
{
struct turnstile_waiter waiter;
struct turnstile_td *td;
struct thread *thread;
int error;
error = 0;
thread = thread_self();
assert(thread != owner);
td = thread_turnstile_td(thread);
spinlock_lock(&td->lock);
turnstile_waiter_init(&waiter, thread);
turnstile_add_waiter(turnstile, &waiter);
turnstile_td_set_waiter(td, &waiter);
spinlock_unlock(&td->lock);
if (owner == NULL) {
if (turnstile->top_waiter == &waiter) {
turnstile_waiter_set_awaken(&waiter);
}
} else {
/* This function temporarily unlocks the turnstile */
turnstile_update_owner(turnstile, owner);
}
for (;;) {
if (!turnstile_waiter_awaken(&waiter)) {
if (!timed) {
thread_sleep(&turnstile->bucket->lock,
turnstile->sync_obj, wchan);
} else {
error = thread_timedsleep(&turnstile->bucket->lock,
turnstile->sync_obj, wchan, ticks);
if (error) {
if (turnstile_waiter_awaken(&waiter)) {
error = 0;
} else {
break;
}
}
}
}
/* Handle spurious wakeups */
if (!turnstile_waiter_awaken(&waiter)) {
continue;
}
/*
* The real priority of a thread may change between waking up
* and reacquiring the turnstile.
*/
if (turnstile->top_waiter == &waiter) {
break;
}
/* Otherwise, make sure the new top waiter is awaken */
turnstile_waiter_wakeup(turnstile->top_waiter);
turnstile_waiter_clear_awaken(&waiter);
}
spinlock_lock(&td->lock);
turnstile_td_set_waiter(td, NULL);
turnstile_remove_waiter(turnstile, &waiter);
spinlock_unlock(&td->lock);
if (error && (turnstile->owner != NULL)) {
/* This function temporarily unlocks the turnstile */
turnstile_update_owner(turnstile, turnstile->owner);
}
return error;
}
void
turnstile_wait(struct turnstile *turnstile, const char *wchan,
struct thread *owner)
{
int error;
error = turnstile_wait_common(turnstile, wchan, owner, false, 0);
assert(!error);
}
int
turnstile_timedwait(struct turnstile *turnstile, const char *wchan,
struct thread *owner, uint64_t ticks)
{
return turnstile_wait_common(turnstile, wchan, owner, true, ticks);
}
void
turnstile_signal(struct turnstile *turnstile)
{
struct turnstile_waiter *waiter;
if (turnstile_empty(turnstile)) {
return;
}
waiter = plist_last_entry(&turnstile->waiters,
struct turnstile_waiter, node);
turnstile_waiter_wakeup(waiter);
}
void
turnstile_own(struct turnstile *turnstile)
{
struct turnstile_td *td;
struct thread *owner;
unsigned int top_priority;
assert(turnstile->owner == NULL);
if (turnstile_empty(turnstile)) {
return;
}
owner = thread_self();
top_priority = turnstile_waiter_priority(turnstile->top_waiter);
assert(thread_real_global_priority(owner) >= top_priority);
td = thread_turnstile_td(owner);
spinlock_lock(&td->lock);
turnstile_td_own(td, turnstile);
spinlock_unlock(&td->lock);
}
void
turnstile_disown(struct turnstile *turnstile)
{
struct turnstile_td *td;
struct thread *owner;
if (turnstile->owner == NULL) {
return;
}
owner = thread_self();
assert(turnstile->owner == owner);
assert(!turnstile_empty(turnstile));
td = thread_turnstile_td(owner);
spinlock_lock(&td->lock);
turnstile_td_disown(td, turnstile);
spinlock_unlock(&td->lock);
}