diff options
author | Richard Braun <rbraun@sceen.net> | 2017-01-11 21:31:53 +0100 |
---|---|---|
committer | Richard Braun <rbraun@sceen.net> | 2017-01-11 21:31:53 +0100 |
commit | 135f428f0a50eb9988f0b40a60357dfedbcc7f18 (patch) | |
tree | 38eb76350879b55227295a2fa31c7bdaffae08f9 | |
parent | 0a7bb2b9e2441cd0610a0687f39a38b5c66a6f46 (diff) |
kern/kmem: rework slab allocation
Allocating slabs from the page allocator only is likely to cause
fragmentation. Instead, allocate larger-than-page slabs from
kernel virtual memory, and page-sized slabs from the page allocator.
-rw-r--r-- | kern/kmem.c | 328 | ||||
-rw-r--r-- | kern/kmem.h | 7 | ||||
-rw-r--r-- | kern/kmem_i.h | 4 | ||||
-rw-r--r-- | vm/vm_map.c | 3 | ||||
-rw-r--r-- | vm/vm_page.h | 3 |
5 files changed, 209 insertions, 136 deletions
diff --git a/kern/kmem.c b/kern/kmem.c index 59bb99d4..bda5dcbf 100644 --- a/kern/kmem.c +++ b/kern/kmem.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2014 Richard Braun. + * Copyright (c) 2010-2017 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -25,11 +25,11 @@ * * The per-cache self-scaling hash table for buffer-to-bufctl conversion, * described in 3.2.3 "Slab Layout for Large Objects", has been replaced with - * a constant time buffer-to-slab lookup that relies on the VM system. Slabs - * are allocated from the direct mapping of physical memory, which enables - * the retrieval of physical addresses backing slabs with a simple shift. - * Physical addresses are then used to find page descriptors, which store - * data private to this allocator. + * a constant time buffer-to-slab lookup that relies on the VM system. + * + * Slabs are allocated from the physical page allocator if they're page-sized, + * and from kernel virtual memory if they're bigger, in order to prevent + * physical memory fragmentation from making slab allocations fail. * * This implementation uses per-CPU pools of objects, which service most * allocation requests. These pools act as caches (but are named differently @@ -41,6 +41,8 @@ * TODO Rework the CPU pool layer to use the SLQB algorithm by Nick Piggin. */ +#include <stdbool.h> + #include <kern/assert.h> #include <kern/init.h> #include <kern/limits.h> @@ -59,6 +61,8 @@ #include <kern/string.h> #include <kern/thread.h> #include <machine/cpu.h> +#include <machine/pmap.h> +#include <vm/vm_kmem.h> #include <vm/vm_page.h> /* @@ -226,6 +230,45 @@ kmem_bufctl_to_buf(union kmem_bufctl *bufctl, struct kmem_cache *cache) return (void *)bufctl - cache->bufctl_dist; } +static inline bool +kmem_pagealloc_virtual(size_t size) +{ + return (size > PAGE_SIZE); +} + +static void * +kmem_pagealloc(size_t size) +{ + if (kmem_pagealloc_virtual(size)) { + return vm_kmem_alloc(size); + } else { + struct vm_page *page; + + page = vm_page_alloc(vm_page_order(size), VM_PAGE_SEL_DIRECTMAP, + VM_PAGE_KMEM); + + if (page == NULL) { + return NULL; + } + + return vm_page_direct_ptr(page); + } +} + +static void +kmem_pagefree(void *ptr, size_t size) +{ + if (kmem_pagealloc_virtual(size)) { + vm_kmem_free(ptr, size); + } else { + struct vm_page *page; + + page = vm_page_lookup(vm_page_direct_pa((unsigned long)ptr)); + assert(page != NULL); + vm_page_free(page, vm_page_order(size)); + } +} + static void kmem_slab_create_verify(struct kmem_slab *slab, struct kmem_cache *cache) { @@ -254,27 +297,23 @@ kmem_slab_create_verify(struct kmem_slab *slab, struct kmem_cache *cache) static struct kmem_slab * kmem_slab_create(struct kmem_cache *cache, size_t color) { - struct vm_page *page; struct kmem_slab *slab; union kmem_bufctl *bufctl; size_t buf_size; unsigned long buffers; void *slab_buf; - page = vm_page_alloc(cache->slab_order, VM_PAGE_SEL_DIRECTMAP, - VM_PAGE_KMEM); + slab_buf = kmem_pagealloc(cache->slab_size); - if (page == NULL) { + if (slab_buf == NULL) { return NULL; } - slab_buf = vm_page_direct_ptr(page); - if (cache->flags & KMEM_CF_SLAB_EXTERNAL) { slab = kmem_cache_alloc(&kmem_slab_cache); if (slab == NULL) { - vm_page_free(page, cache->slab_order); + kmem_pagefree(slab_buf, cache->slab_size); return NULL; } } else { @@ -309,30 +348,6 @@ kmem_slab_buf(const struct kmem_slab *slab) } static void -kmem_slab_vmref(struct kmem_slab *slab, size_t size) -{ - struct vm_page *page; - unsigned long va, end; - - va = kmem_slab_buf(slab); - end = va + size; - - do { - page = vm_page_lookup(vm_page_direct_pa(va)); - assert(page != NULL); - assert(page->slab_priv == NULL); - page->slab_priv = slab; - va += PAGE_SIZE; - } while (va < end); -} - -static inline int -kmem_slab_lookup_needed(int flags) -{ - return !(flags & KMEM_CF_DIRECT) || (flags & KMEM_CF_VERIFY); -} - -static void kmem_cpu_pool_init(struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache) { mutex_init(&cpu_pool->lock); @@ -457,83 +472,72 @@ kmem_cache_error(struct kmem_cache *cache, void *buf, int error, void *arg) } /* - * Compute an appropriate slab size for the given cache. + * Compute properties such as slab size for the given cache. * * Once the slab size is known, this function sets the related properties - * (buffers per slab and maximum color). It can also set the KMEM_CF_DIRECT - * and/or KMEM_CF_SLAB_EXTERNAL flags depending on the resulting layout. + * (buffers per slab and maximum color). It can also set some KMEM_CF_xxx + * flags depending on the resulting layout. */ static void -kmem_cache_compute_sizes(struct kmem_cache *cache, int flags) +kmem_cache_compute_properties(struct kmem_cache *cache, int flags) { - size_t i, buffers, buf_size, slab_size, free_slab_size; - size_t waste, waste_min, optimal_size = optimal_size; - int embed, optimal_embed = optimal_embed; - unsigned int slab_order, optimal_order = optimal_order; - - buf_size = cache->buf_size; + size_t size, waste; + int embed; - if (buf_size < KMEM_BUF_SIZE_THRESHOLD) { + if (cache->buf_size < KMEM_BUF_SIZE_THRESHOLD) { flags |= KMEM_CACHE_NOOFFSLAB; } - i = 0; - waste_min = (size_t)-1; - - do { - i++; - - slab_order = vm_page_order(i * buf_size); - slab_size = PAGE_SIZE << slab_order; - free_slab_size = slab_size; + cache->slab_size = PAGE_SIZE; + for (;;) { if (flags & KMEM_CACHE_NOOFFSLAB) { - free_slab_size -= sizeof(struct kmem_slab); + embed = 1; + } else { + waste = cache->slab_size % cache->buf_size; + embed = (sizeof(struct kmem_slab) <= waste); } - buffers = free_slab_size / buf_size; - waste = free_slab_size % buf_size; + size = cache->slab_size; - if (buffers > i) { - i = buffers; + if (embed) { + size -= sizeof(struct kmem_slab); } - if (flags & KMEM_CACHE_NOOFFSLAB) { - embed = 1; - } else if (sizeof(struct kmem_slab) <= waste) { - embed = 1; - waste -= sizeof(struct kmem_slab); - } else { - embed = 0; + if (size >= cache->buf_size) { + break; } - if (waste <= waste_min) { - waste_min = waste; - optimal_order = slab_order; - optimal_size = slab_size; - optimal_embed = embed; - } - } while ((buffers < KMEM_MIN_BUFS_PER_SLAB) - && (slab_size < KMEM_SLAB_SIZE_THRESHOLD)); + cache->slab_size += PAGE_SIZE; + } - assert(!(flags & KMEM_CACHE_NOOFFSLAB) || optimal_embed); + /* + * A user may force page allocation in order to guarantee that virtual + * memory isn't used. This is normally done for objects that are used + * to implement virtual memory and avoid circular dependencies. + * + * When forcing the use of direct page allocation, only allow single + * page allocations in order to completely prevent physical memory + * fragmentation from making slab allocations fail. + */ + if ((flags & KMEM_CACHE_PAGE_ONLY) && (cache->slab_size != PAGE_SIZE)) { + panic("kmem: unable to guarantee page allocation"); + } - cache->slab_order = optimal_order; - cache->slab_size = optimal_size; - slab_size = cache->slab_size - - (optimal_embed ? sizeof(struct kmem_slab) : 0); - cache->bufs_per_slab = slab_size / buf_size; - cache->color_max = slab_size % buf_size; + cache->bufs_per_slab = size / cache->buf_size; + cache->color_max = size % cache->buf_size; + /* + * Make sure the first page of a slab buffer can be found from the + * address of the first object. + * + * See kmem_slab_buf(). + */ if (cache->color_max >= PAGE_SIZE) { - cache->color_max = PAGE_SIZE - 1; + cache->color_max = 0; } - if (optimal_embed) { - if (cache->slab_size == PAGE_SIZE) { - cache->flags |= KMEM_CF_DIRECT; - } - } else { + if (!embed) { cache->flags |= KMEM_CF_SLAB_EXTERNAL; } } @@ -545,6 +549,7 @@ kmem_cache_init(struct kmem_cache *cache, const char *name, size_t obj_size, struct kmem_cpu_pool_type *cpu_pool_type; size_t i, buf_size; +#define KMEM_VERIFY #ifdef KMEM_VERIFY cache->flags = KMEM_CF_VERIFY; #else /* KMEM_CF_VERIFY */ @@ -592,7 +597,7 @@ kmem_cache_init(struct kmem_cache *cache, const char *name, size_t obj_size, cache->buf_size = buf_size; } - kmem_cache_compute_sizes(cache, flags); + kmem_cache_compute_properties(cache, flags); for (cpu_pool_type = kmem_cpu_pool_types; buf_size <= cpu_pool_type->buf_size; @@ -615,6 +620,100 @@ kmem_cache_empty(struct kmem_cache *cache) return cache->nr_objs == cache->nr_bufs; } +static struct kmem_slab * +kmem_cache_buf_to_slab(const struct kmem_cache *cache, void *buf) +{ + if ((cache->flags & KMEM_CF_SLAB_EXTERNAL) + || (cache->slab_size != PAGE_SIZE)) { + return NULL; + } + + return (struct kmem_slab *)vm_page_end((unsigned long)buf) - 1; +} + +static inline bool +kmem_cache_registration_required(const struct kmem_cache *cache) +{ + return ((cache->flags & KMEM_CF_SLAB_EXTERNAL) + || (cache->flags & KMEM_CF_VERIFY) + || (cache->slab_size != PAGE_SIZE)); +} + +static void +kmem_cache_register(struct kmem_cache *cache, struct kmem_slab *slab) +{ + struct vm_page *page; + unsigned long va, end; + phys_addr_t pa; + bool virtual; + int error; + + assert(kmem_cache_registration_required(cache)); + assert(slab->nr_refs == 0); + + virtual = kmem_pagealloc_virtual(cache->slab_size); + + for (va = kmem_slab_buf(slab), end = va + cache->slab_size; + va < end; + va += PAGE_SIZE) { + if (virtual) { + error = pmap_kextract(va, &pa); + assert(!error); + } else { + pa = vm_page_direct_pa(va); + } + + page = vm_page_lookup(pa); + assert(page != NULL); + assert((virtual && vm_page_type(page) == VM_PAGE_KERNEL) + || (!virtual && vm_page_type(page) == VM_PAGE_KMEM)); + assert(page->slab_priv == NULL); + page->slab_priv = slab; + } +} + +static struct kmem_slab * +kmem_cache_lookup(struct kmem_cache *cache, void *buf) +{ + struct kmem_slab *slab; + struct vm_page *page; + unsigned long va; + phys_addr_t pa; + bool virtual; + int error; + + assert(kmem_cache_registration_required(cache)); + + virtual = kmem_pagealloc_virtual(cache->slab_size); + va = (unsigned long)buf; + + if (virtual) { + error = pmap_kextract(va, &pa); + + if (error) { + return NULL; + } + } else { + pa = vm_page_direct_pa(va); + } + + page = vm_page_lookup(pa); + + if (page == NULL) { + return NULL; + } + + if ((virtual && (vm_page_type(page) != VM_PAGE_KERNEL)) + || (!virtual && (vm_page_type(page) != VM_PAGE_KMEM))) { + return NULL; + } + + slab = page->slab_priv; + assert((unsigned long)buf >= kmem_slab_buf(slab)); + assert((unsigned long)buf < (kmem_slab_buf(slab) + cache->slab_size)); + return slab; +} + static int kmem_cache_grow(struct kmem_cache *cache) { @@ -648,8 +747,8 @@ kmem_cache_grow(struct kmem_cache *cache) cache->nr_slabs++; cache->nr_free_slabs++; - if (kmem_slab_lookup_needed(cache->flags)) { - kmem_slab_vmref(slab, cache->slab_size); + if (kmem_cache_registration_required(cache)) { + kmem_cache_register(cache, slab); } } @@ -720,19 +819,11 @@ kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf) struct kmem_slab *slab; union kmem_bufctl *bufctl; - if (cache->flags & KMEM_CF_DIRECT) { - assert(cache->slab_size == PAGE_SIZE); - slab = (struct kmem_slab *)P2END((unsigned long)buf, cache->slab_size) - - 1; - } else { - struct vm_page *page; + slab = kmem_cache_buf_to_slab(cache, buf); - page = vm_page_lookup(vm_page_direct_pa((unsigned long)buf)); - assert(page != NULL); - slab = page->slab_priv; + if (slab == NULL) { + slab = kmem_cache_lookup(cache, buf); assert(slab != NULL); - assert((unsigned long)buf >= kmem_slab_buf(slab)); - assert((unsigned long)buf < (kmem_slab_buf(slab) + cache->slab_size)); } assert(slab->nr_refs >= 1); @@ -746,6 +837,7 @@ kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf) if (slab->nr_refs == 0) { /* The slab has become free */ + /* If it was partial, remove it from its list */ if (cache->bufs_per_slab != 1) { list_remove(&slab->node); } @@ -872,17 +964,10 @@ kmem_cache_free_verify(struct kmem_cache *cache, void *buf) struct kmem_buftag *buftag; struct kmem_slab *slab; union kmem_bufctl *bufctl; - struct vm_page *page; unsigned char *redzone_byte; unsigned long slabend; - page = vm_page_lookup(vm_page_direct_pa((unsigned long)buf)); - - if (page == NULL) { - kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); - } - - slab = page->slab_priv; + slab = kmem_cache_lookup(cache, buf); if (slab == NULL) { kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); @@ -1020,8 +1105,7 @@ kmem_cache_info(struct kmem_cache *cache) return; } - snprintf(flags_str, sizeof(flags_str), "%s%s%s", - (cache->flags & KMEM_CF_DIRECT) ? " DIRECT" : "", + snprintf(flags_str, sizeof(flags_str), "%s%s", (cache->flags & KMEM_CF_SLAB_EXTERNAL) ? " SLAB_EXTERNAL" : "", (cache->flags & KMEM_CF_VERIFY) ? " VERIFY" : ""); @@ -1130,16 +1214,7 @@ kmem_alloc(size_t size) kmem_alloc_verify(cache, buf, size); } } else { - struct vm_page *page; - - page = vm_page_alloc(vm_page_order(size), VM_PAGE_SEL_DIRECTMAP, - VM_PAGE_KERNEL); - - if (page == NULL) { - return NULL; - } - - buf = vm_page_direct_ptr(page); + buf = kmem_pagealloc(size); } return buf; @@ -1201,10 +1276,7 @@ kmem_free(void *ptr, size_t size) kmem_cache_free(cache, ptr); } else { - struct vm_page *page; - - page = vm_page_lookup(vm_page_direct_pa((unsigned long)ptr)); - vm_page_free(page, vm_page_order(size)); + kmem_pagefree(ptr, size); } } diff --git a/kern/kmem.h b/kern/kmem.h index 3b2b49ae..b1861557 100644 --- a/kern/kmem.h +++ b/kern/kmem.h @@ -46,13 +46,14 @@ typedef void (*kmem_ctor_fn_t)(void *); * Cache creation flags. */ #define KMEM_CACHE_NOOFFSLAB 0x1 /* Don't allocate external slab data */ -#define KMEM_CACHE_VERIFY 0x2 /* Use debugging facilities */ +#define KMEM_CACHE_PAGE_ONLY 0x2 /* Allocate slabs from the page allocator */ +#define KMEM_CACHE_VERIFY 0x4 /* Use debugging facilities */ /* * Initialize a cache. * - * If a slab allocation/free function pointer is NULL, the default backend - * (vm_kmem on the kernel map) is used for the allocation/free action. + * Slabs may be allocated either from the page allocator or from kernel + * virtual memory, unless KMEM_CACHE_PAGE_ONLY is set. */ void kmem_cache_init(struct kmem_cache *cache, const char *name, size_t obj_size, size_t align, kmem_ctor_fn_t ctor, diff --git a/kern/kmem_i.h b/kern/kmem_i.h index 9a0973ba..08b11c54 100644 --- a/kern/kmem_i.h +++ b/kern/kmem_i.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2014 Richard Braun. + * Copyright (c) 2010-2017 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -160,7 +160,6 @@ struct kmem_slab { */ #define KMEM_CF_SLAB_EXTERNAL 0x1 /* Slab data is off slab */ #define KMEM_CF_VERIFY 0x2 /* Debugging facilities enabled */ -#define KMEM_CF_DIRECT 0x4 /* Quick buf-to-slab lookup */ /* * Cache of objects. @@ -182,7 +181,6 @@ struct kmem_cache { size_t align; size_t buf_size; /* Aligned object size */ size_t bufctl_dist; /* Distance from buffer to bufctl */ - unsigned int slab_order; size_t slab_size; size_t color; size_t color_max; diff --git a/vm/vm_map.c b/vm/vm_map.c index 2c8c31a4..78ff2cc3 100644 --- a/vm/vm_map.c +++ b/vm/vm_map.c @@ -720,7 +720,8 @@ vm_map_setup(void) vm_map_init(kernel_map, kernel_pmap, VM_MIN_KMEM_ADDRESS, VM_MAX_KMEM_ADDRESS); kmem_cache_init(&vm_map_entry_cache, "vm_map_entry", - sizeof(struct vm_map_entry), 0, NULL, 0); + sizeof(struct vm_map_entry), 0, NULL, + KMEM_CACHE_PAGE_ONLY); kmem_cache_init(&vm_map_cache, "vm_map", sizeof(struct vm_map), 0, NULL, 0); } diff --git a/vm/vm_page.h b/vm/vm_page.h index 718d64aa..097bcc6b 100644 --- a/vm/vm_page.h +++ b/vm/vm_page.h @@ -39,6 +39,7 @@ #define vm_page_ptoa(page) ((page) << PAGE_SHIFT) #define vm_page_trunc(addr) P2ALIGN(addr, PAGE_SIZE) #define vm_page_round(addr) P2ROUND(addr, PAGE_SIZE) +#define vm_page_end(addr) P2END(addr, PAGE_SIZE) #define vm_page_aligned(addr) P2ALIGNED(addr, PAGE_SIZE) /* @@ -62,7 +63,7 @@ #define VM_PAGE_RESERVED 1 /* Page reserved at boot time */ #define VM_PAGE_TABLE 2 /* Page is part of the page table */ #define VM_PAGE_PMAP 3 /* Page stores pmap-specific data */ -#define VM_PAGE_KMEM 4 /* Page is part of a kmem slab */ +#define VM_PAGE_KMEM 4 /* Page is a direct-mapped kmem slab */ #define VM_PAGE_OBJECT 5 /* Page is part of a VM object */ #define VM_PAGE_KERNEL 6 /* Type for generic kernel allocations */ |