/*
* Copyright (c) 2011-2018 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
* Upstream site with license notes :
* http://git.sceen.net/rbraun/librbraun.git/
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/*
* Mask applied on an entry to obtain its address.
*/
#define RDXTREE_ENTRY_ADDR_MASK (~0x3UL)
/*
* Global properties used to shape radix trees.
*/
#define RDXTREE_RADIX 6
#define RDXTREE_RADIX_SIZE (1UL << RDXTREE_RADIX)
#define RDXTREE_RADIX_MASK (RDXTREE_RADIX_SIZE - 1)
#if RDXTREE_RADIX < 6
typedef unsigned long rdxtree_bm_t;
#define rdxtree_ffs(x) __builtin_ffsl(x)
#elif RDXTREE_RADIX == 6 /* RDXTREE_RADIX < 6 */
typedef unsigned long long rdxtree_bm_t;
#define rdxtree_ffs(x) __builtin_ffsll(x)
#else /* RDXTREE_RADIX < 6 */
#error "radix too high"
#endif /* RDXTREE_RADIX < 6 */
/*
* Allocation bitmap size in bits.
*/
#define RDXTREE_BM_SIZE (sizeof(rdxtree_bm_t) * CHAR_BIT)
/*
* Empty/full allocation bitmap words.
*/
#define RDXTREE_BM_EMPTY ((rdxtree_bm_t)0)
#define RDXTREE_BM_FULL \
((~(rdxtree_bm_t)0) >> (RDXTREE_BM_SIZE - RDXTREE_RADIX_SIZE))
/*
* Radix tree node.
*
* The height of a tree is the number of nodes to traverse until stored
* pointers are reached. A height of 0 means the entries of a node (or the
* tree root) directly point to stored pointers.
*
* The index is valid if and only if the parent isn't NULL.
*
* Concerning the allocation bitmap, a bit is set when the node it denotes,
* or one of its children, can be used to allocate an entry. Conversely, a bit
* is clear when the matching node and all of its children have no free entry.
*
* In order to support safe lockless lookups, in particular during a resize,
* each node includes the height of its subtree, which is invariant during
* the entire node lifetime. Since the tree height does vary, it can't be
* used to determine whether the tree root is a node or a stored pointer.
* This implementation assumes that all nodes and stored pointers are at least
* 4-byte aligned, and uses the least significant bit of entries to indicate
* the pointer type. This bit is set for internal nodes, and clear for stored
* pointers so that they can be accessed from slots without conversion.
*/
struct rdxtree_node {
union {
struct {
struct rdxtree_node *parent;
unsigned short index;
};
/* Deferred destruction when unlinked */
struct work work;
};
unsigned short height;
unsigned short nr_entries;
rdxtree_bm_t alloc_bm;
void *entries[RDXTREE_RADIX_SIZE];
};
static struct kmem_cache rdxtree_node_cache;
static inline void
rdxtree_assert_alignment(const void *ptr)
{
assert(((uintptr_t)ptr & ~RDXTREE_ENTRY_ADDR_MASK) == 0);
(void)ptr;
}
static inline void *
rdxtree_entry_addr(void *entry)
{
return (void *)((uintptr_t)entry & RDXTREE_ENTRY_ADDR_MASK);
}
static inline bool
rdxtree_entry_is_node(const void *entry)
{
return ((uintptr_t)entry & 1) != 0;
}
static inline void *
rdxtree_node_to_entry(struct rdxtree_node *node)
{
return (void *)((uintptr_t)node | 1);
}
static void
rdxtree_node_ctor(void *buf)
{
struct rdxtree_node *node;
node = buf;
node->nr_entries = 0;
node->alloc_bm = RDXTREE_BM_FULL;
memset(node->entries, 0, sizeof(node->entries));
}
static int
rdxtree_node_create(struct rdxtree_node **nodep, unsigned short height)
{
struct rdxtree_node *node;
node = kmem_cache_alloc(&rdxtree_node_cache);
if (node == NULL) {
return ENOMEM;
}
rdxtree_assert_alignment(node);
node->parent = NULL;
node->height = height;
*nodep = node;
return 0;
}
static void
rdxtree_node_destroy(struct rdxtree_node *node)
{
/* See rdxtree_shrink() */
if (node->nr_entries != 0) {
assert(node->nr_entries == 1);
assert(node->entries[0] != NULL);
node->entries[0] = NULL;
node->nr_entries = 0;
node->alloc_bm = RDXTREE_BM_FULL;
}
kmem_cache_free(&rdxtree_node_cache, node);
}
static void
rdxtree_node_destroy_deferred(struct work *work)
{
struct rdxtree_node *node;
node = structof(work, struct rdxtree_node, work);
rdxtree_node_destroy(node);
}
static void
rdxtree_node_schedule_destruction(struct rdxtree_node *node)
{
assert(node->parent == NULL);
work_init(&node->work, rdxtree_node_destroy_deferred);
rcu_defer(&node->work);
}
static inline void
rdxtree_node_link(struct rdxtree_node *node, struct rdxtree_node *parent,
unsigned short index)
{
node->parent = parent;
node->index = index;
}
static inline void
rdxtree_node_unlink(struct rdxtree_node *node)
{
assert(node->parent != NULL);
node->parent = NULL;
}
static inline bool
rdxtree_node_full(struct rdxtree_node *node)
{
return (node->nr_entries == ARRAY_SIZE(node->entries));
}
static inline bool
rdxtree_node_empty(struct rdxtree_node *node)
{
return (node->nr_entries == 0);
}
static inline void
rdxtree_node_insert(struct rdxtree_node *node, unsigned short index,
void *entry)
{
assert(index < ARRAY_SIZE(node->entries));
assert(node->entries[index] == NULL);
node->nr_entries++;
rcu_store_ptr(node->entries[index], entry);
}
static inline void
rdxtree_node_insert_node(struct rdxtree_node *node, unsigned short index,
struct rdxtree_node *child)
{
rdxtree_node_insert(node, index, rdxtree_node_to_entry(child));
}
static inline void
rdxtree_node_remove(struct rdxtree_node *node, unsigned short index)
{
assert(index < ARRAY_SIZE(node->entries));
assert(node->entries[index] != NULL);
node->nr_entries--;
rcu_store_ptr(node->entries[index], NULL);
}
static inline void *
rdxtree_node_find(struct rdxtree_node *node, unsigned short *indexp)
{
unsigned short index;
void *ptr;
index = *indexp;
while (index < ARRAY_SIZE(node->entries)) {
ptr = rdxtree_entry_addr(rcu_load_ptr(node->entries[index]));
if (ptr != NULL) {
*indexp = index;
return ptr;
}
index++;
}
return NULL;
}
static inline void
rdxtree_node_bm_set(struct rdxtree_node *node, unsigned short index)
{
node->alloc_bm |= (rdxtree_bm_t)1 << index;
}
static inline void
rdxtree_node_bm_clear(struct rdxtree_node *node, unsigned short index)
{
node->alloc_bm &= ~((rdxtree_bm_t)1 << index);
}
static inline bool
rdxtree_node_bm_is_set(struct rdxtree_node *node, unsigned short index)
{
return (node->alloc_bm & ((rdxtree_bm_t)1 << index));
}
static inline bool
rdxtree_node_bm_empty(struct rdxtree_node *node)
{
return (node->alloc_bm == RDXTREE_BM_EMPTY);
}
static inline unsigned short
rdxtree_node_bm_first(struct rdxtree_node *node)
{
return rdxtree_ffs(node->alloc_bm) - 1;
}
static inline rdxtree_key_t
rdxtree_max_key(unsigned short height)
{
size_t shift;
shift = RDXTREE_RADIX * height;
if (likely(shift < (sizeof(rdxtree_key_t) * CHAR_BIT))) {
return ((rdxtree_key_t)1 << shift) - 1;
} else {
return ~((rdxtree_key_t)0);
}
}
static inline bool
rdxtree_key_alloc_enabled(const struct rdxtree *tree)
{
return (tree->flags & RDXTREE_KEY_ALLOC);
}
static void
rdxtree_shrink(struct rdxtree *tree)
{
struct rdxtree_node *node;
void *entry;
while (tree->height > 0) {
node = rdxtree_entry_addr(tree->root);
if (node->nr_entries != 1) {
break;
}
entry = node->entries[0];
if (entry == NULL) {
break;
}
tree->height--;
if (tree->height > 0) {
rdxtree_node_unlink(rdxtree_entry_addr(entry));
}
rcu_store_ptr(tree->root, entry);
/*
* There is still one valid entry (the first one) in this node. It
* must remain valid as long as read-side references can exist so
* that concurrent lookups can find the rest of the tree. Therefore,
* this entry isn't reset before node destruction.
*/
rdxtree_node_schedule_destruction(node);
}
}
static int
rdxtree_grow(struct rdxtree *tree, rdxtree_key_t key)
{
struct rdxtree_node *root, *node;
unsigned short new_height;
int error;
new_height = tree->height + 1;
while (key > rdxtree_max_key(new_height)) {
new_height++;
}
if (tree->root == NULL) {
tree->height = new_height;
return 0;
}
root = rdxtree_entry_addr(tree->root);
do {
error = rdxtree_node_create(&node, tree->height);
if (error) {
rdxtree_shrink(tree);
return error;
}
if (tree->height == 0) {
if (rdxtree_key_alloc_enabled(tree)) {
rdxtree_node_bm_clear(node, 0);
}
} else {
rdxtree_node_link(root, node, 0);
if (rdxtree_key_alloc_enabled(tree)
&& rdxtree_node_bm_empty(root)) {
rdxtree_node_bm_clear(node, 0);
}
}
rdxtree_node_insert(node, 0, tree->root);
tree->height++;
rcu_store_ptr(tree->root, rdxtree_node_to_entry(node));
root = node;
} while (new_height > tree->height);
return 0;
}
static void
rdxtree_cleanup(struct rdxtree *tree, struct rdxtree_node *node)
{
struct rdxtree_node *prev;
for (;;) {
if (likely(!rdxtree_node_empty(node))) {
if (unlikely(node->parent == NULL)) {
rdxtree_shrink(tree);
}
break;
}
if (node->parent == NULL) {
tree->height = 0;
rcu_store_ptr(tree->root, NULL);
rdxtree_node_schedule_destruction(node);
break;
}
prev = node;
node = node->parent;
rdxtree_node_unlink(prev);
rdxtree_node_remove(node, prev->index);
rdxtree_node_schedule_destruction(prev);
}
}
static void
rdxtree_insert_bm_clear(struct rdxtree_node *node, unsigned short index)
{
for (;;) {
rdxtree_node_bm_clear(node, index);
if (!rdxtree_node_full(node) || (node->parent == NULL)) {
break;
}
index = node->index;
node = node->parent;
}
}
int
rdxtree_insert_common(struct rdxtree *tree, rdxtree_key_t key,
void *ptr, void ***slotp)
{
struct rdxtree_node *node, *prev;
unsigned short height, shift;
unsigned short index = 0; /* GCC */
int error;
assert(ptr != NULL);
rdxtree_assert_alignment(ptr);
if (unlikely(key > rdxtree_max_key(tree->height))) {
error = rdxtree_grow(tree, key);
if (error) {
return error;
}
}
height = tree->height;
if (unlikely(height == 0)) {
if (tree->root != NULL) {
return EBUSY;
}
rcu_store_ptr(tree->root, ptr);
if (slotp != NULL) {
*slotp = &tree->root;
}
return 0;
}
node = rdxtree_entry_addr(tree->root);
shift = (height - 1) * RDXTREE_RADIX;
prev = NULL;
do {
if (node == NULL) {
error = rdxtree_node_create(&node, height - 1);
if (error) {
if (prev == NULL) {
tree->height = 0;
} else {
rdxtree_cleanup(tree, prev);
}
return error;
}
if (prev == NULL) {
rcu_store_ptr(tree->root, rdxtree_node_to_entry(node));
} else {
rdxtree_node_link(node, prev, index);
rdxtree_node_insert_node(prev, index, node);
}
}
prev = node;
index = (unsigned short)(key >> shift) & RDXTREE_RADIX_MASK;
node = rdxtree_entry_addr(prev->entries[index]);
shift -= RDXTREE_RADIX;
height--;
} while (height > 0);
if (unlikely(node != NULL)) {
return EBUSY;
}
rdxtree_node_insert(prev, index, ptr);
if (rdxtree_key_alloc_enabled(tree)) {
rdxtree_insert_bm_clear(prev, index);
}
if (slotp != NULL) {
*slotp = &prev->entries[index];
}
return 0;
}
int
rdxtree_insert_alloc_common(struct rdxtree *tree, void *ptr,
rdxtree_key_t *keyp, void ***slotp)
{
struct rdxtree_node *node, *prev;
unsigned short height, shift;
unsigned short index = 0; /* GCC */
rdxtree_key_t key;
int error;
assert(rdxtree_key_alloc_enabled(tree));
assert(ptr != NULL);
rdxtree_assert_alignment(ptr);
height = tree->height;
if (unlikely(height == 0)) {
if (tree->root == NULL) {
rcu_store_ptr(tree->root, ptr);
*keyp = 0;
if (slotp != NULL) {
*slotp = &tree->root;
}
return 0;
}
goto grow;
}
node = rdxtree_entry_addr(tree->root);
key = 0;
shift = (height - 1) * RDXTREE_RADIX;
prev = NULL;
do {
if (node == NULL) {
error = rdxtree_node_create(&node, height - 1);
if (error) {
rdxtree_cleanup(tree, prev);
return error;
}
rdxtree_node_link(node, prev, index);
rdxtree_node_insert_node(prev, index, node);
}
prev = node;
index = rdxtree_node_bm_first(node);
if (index == (unsigned short)-1) {
goto grow;
}
key |= (rdxtree_key_t)index << shift;
node = rdxtree_entry_addr(node->entries[index]);
shift -= RDXTREE_RADIX;
height--;
} while (height > 0);
rdxtree_node_insert(prev, index, ptr);
rdxtree_insert_bm_clear(prev, index);
if (slotp != NULL) {
*slotp = &prev->entries[index];
}
goto out;
grow:
key = rdxtree_max_key(height) + 1;
error = rdxtree_insert_common(tree, key, ptr, slotp);
if (error) {
return error;
}
out:
*keyp = key;
return 0;
}
static void
rdxtree_remove_bm_set(struct rdxtree_node *node, unsigned short index)
{
do {
rdxtree_node_bm_set(node, index);
if (node->parent == NULL) {
break;
}
index = node->index;
node = node->parent;
} while (!rdxtree_node_bm_is_set(node, index));
}
void *
rdxtree_remove(struct rdxtree *tree, rdxtree_key_t key)
{
struct rdxtree_node *node, *prev;
unsigned short height, shift, index;
height = tree->height;
if (unlikely(key > rdxtree_max_key(height))) {
return NULL;
}
node = rdxtree_entry_addr(tree->root);
if (unlikely(height == 0)) {
rcu_store_ptr(tree->root, NULL);
return node;
}
shift = (height - 1) * RDXTREE_RADIX;
do {
if (node == NULL) {
return NULL;
}
prev = node;
index = (unsigned short)(key >> shift) & RDXTREE_RADIX_MASK;
node = rdxtree_entry_addr(node->entries[index]);
shift -= RDXTREE_RADIX;
height--;
} while (height > 0);
if (node == NULL) {
return NULL;
}
if (rdxtree_key_alloc_enabled(tree)) {
rdxtree_remove_bm_set(prev, index);
}
rdxtree_node_remove(prev, index);
rdxtree_cleanup(tree, prev);
return node;
}
void *
rdxtree_lookup_common(const struct rdxtree *tree, rdxtree_key_t key,
bool get_slot)
{
struct rdxtree_node *node, *prev;
unsigned short height, shift, index;
void *entry;
entry = rcu_load_ptr(tree->root);
if (entry == NULL) {
node = NULL;
height = 0;
} else {
node = rdxtree_entry_addr(entry);
height = rdxtree_entry_is_node(entry) ? node->height + 1 : 0;
}
if (key > rdxtree_max_key(height)) {
return NULL;
}
if (height == 0) {
if (node == NULL) {
return NULL;
}
return get_slot ? (void *)&tree->root : node;
}
shift = (height - 1) * RDXTREE_RADIX;
do {
if (node == NULL) {
return NULL;
}
prev = node;
index = (unsigned short)(key >> shift) & RDXTREE_RADIX_MASK;
entry = rcu_load_ptr(node->entries[index]);
node = rdxtree_entry_addr(entry);
shift -= RDXTREE_RADIX;
height--;
} while (height > 0);
if (node == NULL) {
return NULL;
}
return get_slot ? (void *)&prev->entries[index] : node;
}
void *
rdxtree_replace_slot(void **slot, void *ptr)
{
void *old;
assert(ptr != NULL);
rdxtree_assert_alignment(ptr);
old = *slot;
assert(old != NULL);
rdxtree_assert_alignment(old);
rcu_store_ptr(*slot, ptr);
return old;
}
static void *
rdxtree_walk_next(struct rdxtree *tree, struct rdxtree_iter *iter)
{
struct rdxtree_node *root, *node, *prev;
unsigned short height, shift, index, orig_index;
rdxtree_key_t key;
void *entry;
entry = rcu_load_ptr(tree->root);
if (entry == NULL) {
return NULL;
}
if (!rdxtree_entry_is_node(entry)) {
if (iter->key != (rdxtree_key_t)-1) {
return NULL;
} else {
iter->key = 0;
return rdxtree_entry_addr(entry);
}
}
key = iter->key + 1;
if ((key == 0) && (iter->node != NULL)) {
return NULL;
}
root = rdxtree_entry_addr(entry);
restart:
node = root;
height = root->height + 1;
if (key > rdxtree_max_key(height)) {
return NULL;
}
shift = (height - 1) * RDXTREE_RADIX;
do {
prev = node;
index = (key >> shift) & RDXTREE_RADIX_MASK;
orig_index = index;
node = rdxtree_node_find(node, &index);
if (node == NULL) {
shift += RDXTREE_RADIX;
key = ((key >> shift) + 1) << shift;
if (key == 0) {
return NULL;
}
goto restart;
}
if (orig_index != index) {
key = ((key >> shift) + (index - orig_index)) << shift;
}
shift -= RDXTREE_RADIX;
height--;
} while (height > 0);
iter->node = prev;
iter->key = key;
return node;
}
void *
rdxtree_walk(struct rdxtree *tree, struct rdxtree_iter *iter)
{
unsigned short index, orig_index;
void *ptr;
if (iter->node == NULL) {
return rdxtree_walk_next(tree, iter);
}
index = (iter->key + 1) & RDXTREE_RADIX_MASK;
if (index != 0) {
orig_index = index;
ptr = rdxtree_node_find(iter->node, &index);
if (ptr != NULL) {
iter->key += (index - orig_index) + 1;
return ptr;
}
}
return rdxtree_walk_next(tree, iter);
}
void
rdxtree_remove_all(struct rdxtree *tree)
{
struct rdxtree_node *node, *parent;
struct rdxtree_iter iter;
if (tree->height == 0) {
if (tree->root != NULL) {
rcu_store_ptr(tree->root, NULL);
}
return;
}
for (;;) {
rdxtree_iter_init(&iter);
rdxtree_walk_next(tree, &iter);
if (iter.node == NULL) {
break;
}
node = iter.node;
parent = node->parent;
if (parent == NULL) {
rdxtree_init(tree, tree->flags);
} else {
if (rdxtree_key_alloc_enabled(tree)) {
rdxtree_remove_bm_set(parent, node->index);
}
rdxtree_node_remove(parent, node->index);
rdxtree_cleanup(tree, parent);
node->parent = NULL;
}
rdxtree_node_schedule_destruction(node);
}
}
static int __init
rdxtree_setup(void)
{
kmem_cache_init(&rdxtree_node_cache, "rdxtree_node",
sizeof(struct rdxtree_node), 0,
rdxtree_node_ctor, KMEM_CACHE_PAGE_ONLY);
return 0;
}
INIT_OP_DEFINE(rdxtree_setup,
INIT_OP_DEP(kmem_bootstrap, true),
INIT_OP_DEP(rcu_bootstrap, true));