diff options
author | Richard Braun <rbraun@sceen.net> | 2012-12-07 20:29:45 +0100 |
---|---|---|
committer | Richard Braun <rbraun@sceen.net> | 2012-12-07 20:29:45 +0100 |
commit | 80f72c03069a8347ba43e127b21ffdc7d33823d3 (patch) | |
tree | 05ee662ff67babd4b875121214f06dc06f88aa4f | |
parent | 4da4afcdd6a7e44205a0ba0fd609c98c75cf1826 (diff) |
kern/kmem: rework buffer-to-slab lookup
Instead of using a red-black tree, rely on the VM system to store kmem
specific private data.
-rw-r--r-- | kern/kmem.c | 142 | ||||
-rw-r--r-- | kern/kmem.h | 7 | ||||
-rw-r--r-- | vm/vm_kmem.c | 13 | ||||
-rw-r--r-- | vm/vm_kmem.h | 6 | ||||
-rw-r--r-- | vm/vm_page.h | 1 | ||||
-rw-r--r-- | vm/vm_phys.c | 1 |
6 files changed, 86 insertions, 84 deletions
diff --git a/kern/kmem.c b/kern/kmem.c index 459c61d4..3ad4d638 100644 --- a/kern/kmem.c +++ b/kern/kmem.c @@ -24,21 +24,15 @@ * differences are outlined below. * * The per-cache self-scaling hash table for buffer-to-bufctl conversion, - * described in 3.2.3 "Slab Layout for Large Objects", has been replaced by - * a red-black tree storing slabs, sorted by address. The use of a - * self-balancing tree for buffer-to-slab conversions provides a few advantages - * over a hash table. Unlike a hash table, a BST provides a "lookup nearest" - * operation, so obtaining the slab data (whether it is embedded in the slab or - * off slab) from a buffer address simply consists of a "lookup nearest towards - * 0" tree search. Storing slabs instead of buffers also considerably reduces - * the number of elements to retain. Finally, a self-balancing tree is a true - * self-scaling data structure, whereas a hash table requires periodic - * maintenance and complete resizing, which is expensive. The only drawback is - * that releasing a buffer to the slab layer takes logarithmic time instead of - * constant time. But as the data set size is kept reasonable (because slabs - * are stored instead of buffers) and because the CPU pool layer services most - * requests, avoiding many accesses to the slab layer, it is considered an - * acceptable tradeoff. + * described in 3.2.3 "Slab Layout for Large Objects", has been replaced with + * a constant time buffer-to-slab lookup that relies on the VM system. When + * slabs are created, their backing virtual pages are mapped to physical pages + * that are never aliased by other virtual addresses (unless explicitely by + * other kernel code). The lookup operation provides the associated physical + * page descriptor which can store data private to this allocator. The main + * drawback of this method is that it needs to walk the low level page tables, + * but it's expected that these have already been cached by the CPU due to + * prior access by the allocator user, depending on the hardware properties. * * This implementation uses per-CPU pools of objects, which service most * allocation requests. These pools act as caches (but are named differently @@ -57,13 +51,13 @@ #include <kern/panic.h> #include <kern/param.h> #include <kern/printk.h> -#include <kern/rbtree.h> #include <kern/sprintf.h> #include <kern/stddef.h> #include <kern/stdint.h> #include <kern/string.h> #include <machine/cpu.h> #include <vm/vm_kmem.h> +#include <vm/vm_page.h> /* * Minimum required alignment. @@ -284,8 +278,7 @@ kmem_slab_create(struct kmem_cache *cache, size_t color) slab = (struct kmem_slab *)(slab_buf + cache->slab_size) - 1; } - list_node_init(&slab->list_node); - rbtree_node_init(&slab->tree_node); + list_node_init(&slab->node); slab->nr_refs = 0; slab->first_free = NULL; slab->addr = slab_buf + color; @@ -305,34 +298,34 @@ kmem_slab_create(struct kmem_cache *cache, size_t color) return slab; } -static inline int -kmem_slab_use_tree(int flags) +static inline unsigned long +kmem_slab_buf(const struct kmem_slab *slab) { - return !(flags & KMEM_CF_DIRECT) || (flags & KMEM_CF_VERIFY); + return P2ALIGN((unsigned long)slab->addr, PAGE_SIZE); } -static inline int -kmem_slab_cmp_lookup(const void *addr, const struct rbtree_node *node) +static void +kmem_slab_vmref(struct kmem_slab *slab, size_t size) { - struct kmem_slab *slab; + struct vm_page *page; + unsigned long va, end; - slab = rbtree_entry(node, struct kmem_slab, tree_node); + va = kmem_slab_buf(slab); + end = va + size; - if (addr == slab->addr) - return 0; - else if (addr < slab->addr) - return -1; - else - return 1; + do { + page = vm_kmem_lookup_page(va); + assert(page != NULL); + assert(page->slab_priv == NULL); + page->slab_priv = slab; + va += PAGE_SIZE; + } while (va < end); } static inline int -kmem_slab_cmp_insert(const struct rbtree_node *a, const struct rbtree_node *b) +kmem_slab_lookup_needed(int flags) { - struct kmem_slab *slab; - - slab = rbtree_entry(a, struct kmem_slab, tree_node); - return kmem_slab_cmp_lookup(slab->addr, b); + return !(flags & KMEM_CF_DIRECT) || (flags & KMEM_CF_VERIFY); } static void @@ -558,7 +551,6 @@ kmem_cache_init(struct kmem_cache *cache, const char *name, size_t obj_size, list_node_init(&cache->node); list_init(&cache->partial_slabs); list_init(&cache->free_slabs); - rbtree_init(&cache->active_slabs); cache->obj_size = obj_size; cache->align = align; cache->buf_size = buf_size; @@ -633,10 +625,13 @@ kmem_cache_grow(struct kmem_cache *cache) /* mutex_lock(&cache->mutex); */ if (slab != NULL) { - list_insert_tail(&cache->free_slabs, &slab->list_node); + list_insert_tail(&cache->free_slabs, &slab->node); cache->nr_bufs += cache->bufs_per_slab; cache->nr_slabs++; cache->nr_free_slabs++; + + if (kmem_slab_lookup_needed(cache->flags)) + kmem_slab_vmref(slab, cache->slab_size); } /* @@ -662,10 +657,9 @@ kmem_cache_alloc_from_slab(struct kmem_cache *cache) union kmem_bufctl *bufctl; if (!list_empty(&cache->partial_slabs)) - slab = list_first_entry(&cache->partial_slabs, struct kmem_slab, - list_node); + slab = list_first_entry(&cache->partial_slabs, struct kmem_slab, node); else if (!list_empty(&cache->free_slabs)) - slab = list_first_entry(&cache->free_slabs, struct kmem_slab, list_node); + slab = list_first_entry(&cache->free_slabs, struct kmem_slab, node); else return NULL; @@ -679,7 +673,7 @@ kmem_cache_alloc_from_slab(struct kmem_cache *cache) * The slab has become complete. */ if (slab->nr_refs == cache->bufs_per_slab) { - list_remove(&slab->list_node); + list_remove(&slab->node); if (slab->nr_refs == 1) cache->nr_free_slabs--; @@ -687,8 +681,8 @@ kmem_cache_alloc_from_slab(struct kmem_cache *cache) /* * The slab has become partial. */ - list_remove(&slab->list_node); - list_insert_tail(&cache->partial_slabs, &slab->list_node); + list_remove(&slab->node); + list_insert_tail(&cache->partial_slabs, &slab->node); cache->nr_free_slabs--; } else if (!list_singular(&cache->partial_slabs)) { struct list *node; @@ -701,10 +695,10 @@ kmem_cache_alloc_from_slab(struct kmem_cache *cache) assert(slab->nr_refs > 1); - for (node = list_prev(&slab->list_node); + for (node = list_prev(&slab->node); !list_end(&cache->partial_slabs, node); node = list_prev(node)) { - tmp = list_entry(node, struct kmem_slab, list_node); + tmp = list_entry(node, struct kmem_slab, node); if (tmp->nr_refs >= slab->nr_refs) break; @@ -714,16 +708,12 @@ kmem_cache_alloc_from_slab(struct kmem_cache *cache) * If the direct neighbor was found, the list is already sorted. * If no slab was found, the slab is inserted at the head of the list. */ - if (node != list_prev(&slab->list_node)) { - list_remove(&slab->list_node); - list_insert_after(node, &slab->list_node); + if (node != list_prev(&slab->node)) { + list_remove(&slab->node); + list_insert_after(node, &slab->node); } } - if ((slab->nr_refs == 1) && kmem_slab_use_tree(cache->flags)) - rbtree_insert(&cache->active_slabs, &slab->tree_node, - kmem_slab_cmp_insert); - return kmem_bufctl_to_buf(bufctl, cache); } @@ -743,14 +733,14 @@ kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf) slab = (struct kmem_slab *)P2END((unsigned long)buf, cache->slab_size) - 1; } else { - struct rbtree_node *node; - - node = rbtree_lookup_nearest(&cache->active_slabs, buf, - kmem_slab_cmp_lookup, RBTREE_LEFT); - assert(node != NULL); - slab = rbtree_entry(node, struct kmem_slab, tree_node); - assert((unsigned long)buf < (P2ALIGN((unsigned long)slab->addr - + cache->slab_size, PAGE_SIZE))); + struct vm_page *page; + + page = vm_kmem_lookup_page((unsigned long)buf); + assert(page != NULL); + slab = page->slab_priv; + assert(slab != NULL); + assert((unsigned long)buf >= kmem_slab_buf(slab)); + assert((unsigned long)buf < (kmem_slab_buf(slab) + cache->slab_size)); } assert(slab->nr_refs >= 1); @@ -765,22 +755,19 @@ kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf) * The slab has become free. */ if (slab->nr_refs == 0) { - if (kmem_slab_use_tree(cache->flags)) - rbtree_remove(&cache->active_slabs, &slab->tree_node); - /* * The slab was partial. */ if (cache->bufs_per_slab > 1) - list_remove(&slab->list_node); + list_remove(&slab->node); - list_insert_tail(&cache->free_slabs, &slab->list_node); + list_insert_tail(&cache->free_slabs, &slab->node); cache->nr_free_slabs++; } else if (slab->nr_refs == (cache->bufs_per_slab - 1)) { /* * The slab has become partial. */ - list_insert(&cache->partial_slabs, &slab->list_node); + list_insert(&cache->partial_slabs, &slab->node); } else if (!list_singular(&cache->partial_slabs)) { struct list *node; struct kmem_slab *tmp; @@ -792,10 +779,10 @@ kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf) assert(slab->nr_refs > 0); - for (node = list_next(&slab->list_node); + for (node = list_next(&slab->node); !list_end(&cache->partial_slabs, node); node = list_next(node)) { - tmp = list_entry(node, struct kmem_slab, list_node); + tmp = list_entry(node, struct kmem_slab, node); if (tmp->nr_refs <= slab->nr_refs) break; @@ -805,9 +792,9 @@ kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf) * If the direct neighbor was found, the list is already sorted. * If no slab was found, the slab is inserted at the tail of the list. */ - if (node != list_next(&slab->list_node)) { - list_remove(&slab->list_node); - list_insert_before(node, &slab->list_node); + if (node != list_next(&slab->node)) { + list_remove(&slab->node); + list_insert_before(node, &slab->node); } } } @@ -911,22 +898,19 @@ slab_alloc: static void kmem_cache_free_verify(struct kmem_cache *cache, void *buf) { - struct rbtree_node *node; struct kmem_buftag *buftag; struct kmem_slab *slab; union kmem_bufctl *bufctl; + struct vm_page *page; unsigned char *redzone_byte; unsigned long slabend; - /* mutex_lock(&cache->mutex); */ - node = rbtree_lookup_nearest(&cache->active_slabs, buf, - kmem_slab_cmp_lookup, RBTREE_LEFT); - /* mutex_unlock(&cache->mutex); */ + page = vm_kmem_lookup_page((unsigned long)buf); - if (node == NULL) + if (page == NULL) kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); - slab = rbtree_entry(node, struct kmem_slab, tree_node); + slab = page->slab_priv; slabend = P2ALIGN((unsigned long)slab->addr + cache->slab_size, PAGE_SIZE); if ((unsigned long)buf >= slabend) diff --git a/kern/kmem.h b/kern/kmem.h index 025afb21..b01c8f3a 100644 --- a/kern/kmem.h +++ b/kern/kmem.h @@ -23,7 +23,6 @@ #include <kern/list.h> #include <kern/param.h> -#include <kern/rbtree.h> #include <kern/stddef.h> /* @@ -145,8 +144,7 @@ struct kmem_buftag { * it describes. */ struct kmem_slab { - struct list list_node; - struct rbtree_node tree_node; + struct list node; unsigned long nr_refs; union kmem_bufctl *first_free; void *addr; @@ -185,7 +183,7 @@ typedef void (*kmem_slab_free_fn_t)(unsigned long, size_t); #define KMEM_CF_NO_CPU_POOL 0x1 /* CPU pool layer disabled */ #define KMEM_CF_SLAB_EXTERNAL 0x2 /* Slab data is off slab */ #define KMEM_CF_VERIFY 0x4 /* Debugging facilities enabled */ -#define KMEM_CF_DIRECT 0x8 /* No buf-to-slab tree lookup */ +#define KMEM_CF_DIRECT 0x8 /* Quick buf-to-slab lookup */ /* * Cache of objects. @@ -209,7 +207,6 @@ struct kmem_cache { struct list node; /* Cache list linkage */ struct list partial_slabs; struct list free_slabs; - struct rbtree active_slabs; int flags; size_t obj_size; /* User-provided size */ size_t align; diff --git a/vm/vm_kmem.c b/vm/vm_kmem.c index d51b2ced..5cc7374a 100644 --- a/vm/vm_kmem.c +++ b/vm/vm_kmem.c @@ -80,6 +80,19 @@ vm_kmem_boot_space(unsigned long *start, unsigned long *end) *end = vm_kmem_boot_start; } +struct vm_page * +vm_kmem_lookup_page(unsigned long va) +{ + phys_addr_t pa; + + pa = pmap_kextract(va); + + if (pa == 0) + return NULL; + + return vm_phys_lookup_page(pa); +} + static int vm_kmem_alloc_check(size_t size) { diff --git a/vm/vm_kmem.h b/vm/vm_kmem.h index 6ecf8e17..d0da49f3 100644 --- a/vm/vm_kmem.h +++ b/vm/vm_kmem.h @@ -52,6 +52,12 @@ unsigned long vm_kmem_bootalloc(size_t size); void vm_kmem_boot_space(unsigned long *start, unsigned long *end); /* + * Return the page descriptor for the physical page mapped at va in kernel + * space. The given address must be mapped and valid. + */ +struct vm_page * vm_kmem_lookup_page(unsigned long va); + +/* * Allocate memory from the kernel map. */ unsigned long vm_kmem_alloc(size_t size); diff --git a/vm/vm_page.h b/vm/vm_page.h index e5ad8fc5..205f3d2f 100644 --- a/vm/vm_page.h +++ b/vm/vm_page.h @@ -42,6 +42,7 @@ struct vm_page { unsigned short seg_index; unsigned short order; phys_addr_t phys_addr; + void *slab_priv; }; static inline phys_addr_t diff --git a/vm/vm_phys.c b/vm/vm_phys.c index 14abf90e..cba286a2 100644 --- a/vm/vm_phys.c +++ b/vm/vm_phys.c @@ -155,6 +155,7 @@ vm_phys_init_page(struct vm_page *page, unsigned short seg_index, page->seg_index = seg_index; page->order = order; page->phys_addr = pa; + page->slab_priv = NULL; } static void __init |