summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2017-01-11 21:31:53 +0100
committerRichard Braun <rbraun@sceen.net>2017-01-11 21:31:53 +0100
commit135f428f0a50eb9988f0b40a60357dfedbcc7f18 (patch)
tree38eb76350879b55227295a2fa31c7bdaffae08f9
parent0a7bb2b9e2441cd0610a0687f39a38b5c66a6f46 (diff)
kern/kmem: rework slab allocation
Allocating slabs from the page allocator only is likely to cause fragmentation. Instead, allocate larger-than-page slabs from kernel virtual memory, and page-sized slabs from the page allocator.
-rw-r--r--kern/kmem.c328
-rw-r--r--kern/kmem.h7
-rw-r--r--kern/kmem_i.h4
-rw-r--r--vm/vm_map.c3
-rw-r--r--vm/vm_page.h3
5 files changed, 209 insertions, 136 deletions
diff --git a/kern/kmem.c b/kern/kmem.c
index 59bb99d4..bda5dcbf 100644
--- a/kern/kmem.c
+++ b/kern/kmem.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2014 Richard Braun.
+ * Copyright (c) 2010-2017 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -25,11 +25,11 @@
*
* The per-cache self-scaling hash table for buffer-to-bufctl conversion,
* described in 3.2.3 "Slab Layout for Large Objects", has been replaced with
- * a constant time buffer-to-slab lookup that relies on the VM system. Slabs
- * are allocated from the direct mapping of physical memory, which enables
- * the retrieval of physical addresses backing slabs with a simple shift.
- * Physical addresses are then used to find page descriptors, which store
- * data private to this allocator.
+ * a constant time buffer-to-slab lookup that relies on the VM system.
+ *
+ * Slabs are allocated from the physical page allocator if they're page-sized,
+ * and from kernel virtual memory if they're bigger, in order to prevent
+ * physical memory fragmentation from making slab allocations fail.
*
* This implementation uses per-CPU pools of objects, which service most
* allocation requests. These pools act as caches (but are named differently
@@ -41,6 +41,8 @@
* TODO Rework the CPU pool layer to use the SLQB algorithm by Nick Piggin.
*/
+#include <stdbool.h>
+
#include <kern/assert.h>
#include <kern/init.h>
#include <kern/limits.h>
@@ -59,6 +61,8 @@
#include <kern/string.h>
#include <kern/thread.h>
#include <machine/cpu.h>
+#include <machine/pmap.h>
+#include <vm/vm_kmem.h>
#include <vm/vm_page.h>
/*
@@ -226,6 +230,45 @@ kmem_bufctl_to_buf(union kmem_bufctl *bufctl, struct kmem_cache *cache)
return (void *)bufctl - cache->bufctl_dist;
}
+static inline bool
+kmem_pagealloc_virtual(size_t size)
+{
+ return (size > PAGE_SIZE);
+}
+
+static void *
+kmem_pagealloc(size_t size)
+{
+ if (kmem_pagealloc_virtual(size)) {
+ return vm_kmem_alloc(size);
+ } else {
+ struct vm_page *page;
+
+ page = vm_page_alloc(vm_page_order(size), VM_PAGE_SEL_DIRECTMAP,
+ VM_PAGE_KMEM);
+
+ if (page == NULL) {
+ return NULL;
+ }
+
+ return vm_page_direct_ptr(page);
+ }
+}
+
+static void
+kmem_pagefree(void *ptr, size_t size)
+{
+ if (kmem_pagealloc_virtual(size)) {
+ vm_kmem_free(ptr, size);
+ } else {
+ struct vm_page *page;
+
+ page = vm_page_lookup(vm_page_direct_pa((unsigned long)ptr));
+ assert(page != NULL);
+ vm_page_free(page, vm_page_order(size));
+ }
+}
+
static void
kmem_slab_create_verify(struct kmem_slab *slab, struct kmem_cache *cache)
{
@@ -254,27 +297,23 @@ kmem_slab_create_verify(struct kmem_slab *slab, struct kmem_cache *cache)
static struct kmem_slab *
kmem_slab_create(struct kmem_cache *cache, size_t color)
{
- struct vm_page *page;
struct kmem_slab *slab;
union kmem_bufctl *bufctl;
size_t buf_size;
unsigned long buffers;
void *slab_buf;
- page = vm_page_alloc(cache->slab_order, VM_PAGE_SEL_DIRECTMAP,
- VM_PAGE_KMEM);
+ slab_buf = kmem_pagealloc(cache->slab_size);
- if (page == NULL) {
+ if (slab_buf == NULL) {
return NULL;
}
- slab_buf = vm_page_direct_ptr(page);
-
if (cache->flags & KMEM_CF_SLAB_EXTERNAL) {
slab = kmem_cache_alloc(&kmem_slab_cache);
if (slab == NULL) {
- vm_page_free(page, cache->slab_order);
+ kmem_pagefree(slab_buf, cache->slab_size);
return NULL;
}
} else {
@@ -309,30 +348,6 @@ kmem_slab_buf(const struct kmem_slab *slab)
}
static void
-kmem_slab_vmref(struct kmem_slab *slab, size_t size)
-{
- struct vm_page *page;
- unsigned long va, end;
-
- va = kmem_slab_buf(slab);
- end = va + size;
-
- do {
- page = vm_page_lookup(vm_page_direct_pa(va));
- assert(page != NULL);
- assert(page->slab_priv == NULL);
- page->slab_priv = slab;
- va += PAGE_SIZE;
- } while (va < end);
-}
-
-static inline int
-kmem_slab_lookup_needed(int flags)
-{
- return !(flags & KMEM_CF_DIRECT) || (flags & KMEM_CF_VERIFY);
-}
-
-static void
kmem_cpu_pool_init(struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache)
{
mutex_init(&cpu_pool->lock);
@@ -457,83 +472,72 @@ kmem_cache_error(struct kmem_cache *cache, void *buf, int error, void *arg)
}
/*
- * Compute an appropriate slab size for the given cache.
+ * Compute properties such as slab size for the given cache.
*
* Once the slab size is known, this function sets the related properties
- * (buffers per slab and maximum color). It can also set the KMEM_CF_DIRECT
- * and/or KMEM_CF_SLAB_EXTERNAL flags depending on the resulting layout.
+ * (buffers per slab and maximum color). It can also set some KMEM_CF_xxx
+ * flags depending on the resulting layout.
*/
static void
-kmem_cache_compute_sizes(struct kmem_cache *cache, int flags)
+kmem_cache_compute_properties(struct kmem_cache *cache, int flags)
{
- size_t i, buffers, buf_size, slab_size, free_slab_size;
- size_t waste, waste_min, optimal_size = optimal_size;
- int embed, optimal_embed = optimal_embed;
- unsigned int slab_order, optimal_order = optimal_order;
-
- buf_size = cache->buf_size;
+ size_t size, waste;
+ int embed;
- if (buf_size < KMEM_BUF_SIZE_THRESHOLD) {
+ if (cache->buf_size < KMEM_BUF_SIZE_THRESHOLD) {
flags |= KMEM_CACHE_NOOFFSLAB;
}
- i = 0;
- waste_min = (size_t)-1;
-
- do {
- i++;
-
- slab_order = vm_page_order(i * buf_size);
- slab_size = PAGE_SIZE << slab_order;
- free_slab_size = slab_size;
+ cache->slab_size = PAGE_SIZE;
+ for (;;) {
if (flags & KMEM_CACHE_NOOFFSLAB) {
- free_slab_size -= sizeof(struct kmem_slab);
+ embed = 1;
+ } else {
+ waste = cache->slab_size % cache->buf_size;
+ embed = (sizeof(struct kmem_slab) <= waste);
}
- buffers = free_slab_size / buf_size;
- waste = free_slab_size % buf_size;
+ size = cache->slab_size;
- if (buffers > i) {
- i = buffers;
+ if (embed) {
+ size -= sizeof(struct kmem_slab);
}
- if (flags & KMEM_CACHE_NOOFFSLAB) {
- embed = 1;
- } else if (sizeof(struct kmem_slab) <= waste) {
- embed = 1;
- waste -= sizeof(struct kmem_slab);
- } else {
- embed = 0;
+ if (size >= cache->buf_size) {
+ break;
}
- if (waste <= waste_min) {
- waste_min = waste;
- optimal_order = slab_order;
- optimal_size = slab_size;
- optimal_embed = embed;
- }
- } while ((buffers < KMEM_MIN_BUFS_PER_SLAB)
- && (slab_size < KMEM_SLAB_SIZE_THRESHOLD));
+ cache->slab_size += PAGE_SIZE;
+ }
- assert(!(flags & KMEM_CACHE_NOOFFSLAB) || optimal_embed);
+ /*
+ * A user may force page allocation in order to guarantee that virtual
+ * memory isn't used. This is normally done for objects that are used
+ * to implement virtual memory and avoid circular dependencies.
+ *
+ * When forcing the use of direct page allocation, only allow single
+ * page allocations in order to completely prevent physical memory
+ * fragmentation from making slab allocations fail.
+ */
+ if ((flags & KMEM_CACHE_PAGE_ONLY) && (cache->slab_size != PAGE_SIZE)) {
+ panic("kmem: unable to guarantee page allocation");
+ }
- cache->slab_order = optimal_order;
- cache->slab_size = optimal_size;
- slab_size = cache->slab_size
- - (optimal_embed ? sizeof(struct kmem_slab) : 0);
- cache->bufs_per_slab = slab_size / buf_size;
- cache->color_max = slab_size % buf_size;
+ cache->bufs_per_slab = size / cache->buf_size;
+ cache->color_max = size % cache->buf_size;
+ /*
+ * Make sure the first page of a slab buffer can be found from the
+ * address of the first object.
+ *
+ * See kmem_slab_buf().
+ */
if (cache->color_max >= PAGE_SIZE) {
- cache->color_max = PAGE_SIZE - 1;
+ cache->color_max = 0;
}
- if (optimal_embed) {
- if (cache->slab_size == PAGE_SIZE) {
- cache->flags |= KMEM_CF_DIRECT;
- }
- } else {
+ if (!embed) {
cache->flags |= KMEM_CF_SLAB_EXTERNAL;
}
}
@@ -545,6 +549,7 @@ kmem_cache_init(struct kmem_cache *cache, const char *name, size_t obj_size,
struct kmem_cpu_pool_type *cpu_pool_type;
size_t i, buf_size;
+#define KMEM_VERIFY
#ifdef KMEM_VERIFY
cache->flags = KMEM_CF_VERIFY;
#else /* KMEM_CF_VERIFY */
@@ -592,7 +597,7 @@ kmem_cache_init(struct kmem_cache *cache, const char *name, size_t obj_size,
cache->buf_size = buf_size;
}
- kmem_cache_compute_sizes(cache, flags);
+ kmem_cache_compute_properties(cache, flags);
for (cpu_pool_type = kmem_cpu_pool_types;
buf_size <= cpu_pool_type->buf_size;
@@ -615,6 +620,100 @@ kmem_cache_empty(struct kmem_cache *cache)
return cache->nr_objs == cache->nr_bufs;
}
+static struct kmem_slab *
+kmem_cache_buf_to_slab(const struct kmem_cache *cache, void *buf)
+{
+ if ((cache->flags & KMEM_CF_SLAB_EXTERNAL)
+ || (cache->slab_size != PAGE_SIZE)) {
+ return NULL;
+ }
+
+ return (struct kmem_slab *)vm_page_end((unsigned long)buf) - 1;
+}
+
+static inline bool
+kmem_cache_registration_required(const struct kmem_cache *cache)
+{
+ return ((cache->flags & KMEM_CF_SLAB_EXTERNAL)
+ || (cache->flags & KMEM_CF_VERIFY)
+ || (cache->slab_size != PAGE_SIZE));
+}
+
+static void
+kmem_cache_register(struct kmem_cache *cache, struct kmem_slab *slab)
+{
+ struct vm_page *page;
+ unsigned long va, end;
+ phys_addr_t pa;
+ bool virtual;
+ int error;
+
+ assert(kmem_cache_registration_required(cache));
+ assert(slab->nr_refs == 0);
+
+ virtual = kmem_pagealloc_virtual(cache->slab_size);
+
+ for (va = kmem_slab_buf(slab), end = va + cache->slab_size;
+ va < end;
+ va += PAGE_SIZE) {
+ if (virtual) {
+ error = pmap_kextract(va, &pa);
+ assert(!error);
+ } else {
+ pa = vm_page_direct_pa(va);
+ }
+
+ page = vm_page_lookup(pa);
+ assert(page != NULL);
+ assert((virtual && vm_page_type(page) == VM_PAGE_KERNEL)
+ || (!virtual && vm_page_type(page) == VM_PAGE_KMEM));
+ assert(page->slab_priv == NULL);
+ page->slab_priv = slab;
+ }
+}
+
+static struct kmem_slab *
+kmem_cache_lookup(struct kmem_cache *cache, void *buf)
+{
+ struct kmem_slab *slab;
+ struct vm_page *page;
+ unsigned long va;
+ phys_addr_t pa;
+ bool virtual;
+ int error;
+
+ assert(kmem_cache_registration_required(cache));
+
+ virtual = kmem_pagealloc_virtual(cache->slab_size);
+ va = (unsigned long)buf;
+
+ if (virtual) {
+ error = pmap_kextract(va, &pa);
+
+ if (error) {
+ return NULL;
+ }
+ } else {
+ pa = vm_page_direct_pa(va);
+ }
+
+ page = vm_page_lookup(pa);
+
+ if (page == NULL) {
+ return NULL;
+ }
+
+ if ((virtual && (vm_page_type(page) != VM_PAGE_KERNEL))
+ || (!virtual && (vm_page_type(page) != VM_PAGE_KMEM))) {
+ return NULL;
+ }
+
+ slab = page->slab_priv;
+ assert((unsigned long)buf >= kmem_slab_buf(slab));
+ assert((unsigned long)buf < (kmem_slab_buf(slab) + cache->slab_size));
+ return slab;
+}
+
static int
kmem_cache_grow(struct kmem_cache *cache)
{
@@ -648,8 +747,8 @@ kmem_cache_grow(struct kmem_cache *cache)
cache->nr_slabs++;
cache->nr_free_slabs++;
- if (kmem_slab_lookup_needed(cache->flags)) {
- kmem_slab_vmref(slab, cache->slab_size);
+ if (kmem_cache_registration_required(cache)) {
+ kmem_cache_register(cache, slab);
}
}
@@ -720,19 +819,11 @@ kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf)
struct kmem_slab *slab;
union kmem_bufctl *bufctl;
- if (cache->flags & KMEM_CF_DIRECT) {
- assert(cache->slab_size == PAGE_SIZE);
- slab = (struct kmem_slab *)P2END((unsigned long)buf, cache->slab_size)
- - 1;
- } else {
- struct vm_page *page;
+ slab = kmem_cache_buf_to_slab(cache, buf);
- page = vm_page_lookup(vm_page_direct_pa((unsigned long)buf));
- assert(page != NULL);
- slab = page->slab_priv;
+ if (slab == NULL) {
+ slab = kmem_cache_lookup(cache, buf);
assert(slab != NULL);
- assert((unsigned long)buf >= kmem_slab_buf(slab));
- assert((unsigned long)buf < (kmem_slab_buf(slab) + cache->slab_size));
}
assert(slab->nr_refs >= 1);
@@ -746,6 +837,7 @@ kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf)
if (slab->nr_refs == 0) {
/* The slab has become free */
+ /* If it was partial, remove it from its list */
if (cache->bufs_per_slab != 1) {
list_remove(&slab->node);
}
@@ -872,17 +964,10 @@ kmem_cache_free_verify(struct kmem_cache *cache, void *buf)
struct kmem_buftag *buftag;
struct kmem_slab *slab;
union kmem_bufctl *bufctl;
- struct vm_page *page;
unsigned char *redzone_byte;
unsigned long slabend;
- page = vm_page_lookup(vm_page_direct_pa((unsigned long)buf));
-
- if (page == NULL) {
- kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
- }
-
- slab = page->slab_priv;
+ slab = kmem_cache_lookup(cache, buf);
if (slab == NULL) {
kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL);
@@ -1020,8 +1105,7 @@ kmem_cache_info(struct kmem_cache *cache)
return;
}
- snprintf(flags_str, sizeof(flags_str), "%s%s%s",
- (cache->flags & KMEM_CF_DIRECT) ? " DIRECT" : "",
+ snprintf(flags_str, sizeof(flags_str), "%s%s",
(cache->flags & KMEM_CF_SLAB_EXTERNAL) ? " SLAB_EXTERNAL" : "",
(cache->flags & KMEM_CF_VERIFY) ? " VERIFY" : "");
@@ -1130,16 +1214,7 @@ kmem_alloc(size_t size)
kmem_alloc_verify(cache, buf, size);
}
} else {
- struct vm_page *page;
-
- page = vm_page_alloc(vm_page_order(size), VM_PAGE_SEL_DIRECTMAP,
- VM_PAGE_KERNEL);
-
- if (page == NULL) {
- return NULL;
- }
-
- buf = vm_page_direct_ptr(page);
+ buf = kmem_pagealloc(size);
}
return buf;
@@ -1201,10 +1276,7 @@ kmem_free(void *ptr, size_t size)
kmem_cache_free(cache, ptr);
} else {
- struct vm_page *page;
-
- page = vm_page_lookup(vm_page_direct_pa((unsigned long)ptr));
- vm_page_free(page, vm_page_order(size));
+ kmem_pagefree(ptr, size);
}
}
diff --git a/kern/kmem.h b/kern/kmem.h
index 3b2b49ae..b1861557 100644
--- a/kern/kmem.h
+++ b/kern/kmem.h
@@ -46,13 +46,14 @@ typedef void (*kmem_ctor_fn_t)(void *);
* Cache creation flags.
*/
#define KMEM_CACHE_NOOFFSLAB 0x1 /* Don't allocate external slab data */
-#define KMEM_CACHE_VERIFY 0x2 /* Use debugging facilities */
+#define KMEM_CACHE_PAGE_ONLY 0x2 /* Allocate slabs from the page allocator */
+#define KMEM_CACHE_VERIFY 0x4 /* Use debugging facilities */
/*
* Initialize a cache.
*
- * If a slab allocation/free function pointer is NULL, the default backend
- * (vm_kmem on the kernel map) is used for the allocation/free action.
+ * Slabs may be allocated either from the page allocator or from kernel
+ * virtual memory, unless KMEM_CACHE_PAGE_ONLY is set.
*/
void kmem_cache_init(struct kmem_cache *cache, const char *name,
size_t obj_size, size_t align, kmem_ctor_fn_t ctor,
diff --git a/kern/kmem_i.h b/kern/kmem_i.h
index 9a0973ba..08b11c54 100644
--- a/kern/kmem_i.h
+++ b/kern/kmem_i.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2010-2014 Richard Braun.
+ * Copyright (c) 2010-2017 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -160,7 +160,6 @@ struct kmem_slab {
*/
#define KMEM_CF_SLAB_EXTERNAL 0x1 /* Slab data is off slab */
#define KMEM_CF_VERIFY 0x2 /* Debugging facilities enabled */
-#define KMEM_CF_DIRECT 0x4 /* Quick buf-to-slab lookup */
/*
* Cache of objects.
@@ -182,7 +181,6 @@ struct kmem_cache {
size_t align;
size_t buf_size; /* Aligned object size */
size_t bufctl_dist; /* Distance from buffer to bufctl */
- unsigned int slab_order;
size_t slab_size;
size_t color;
size_t color_max;
diff --git a/vm/vm_map.c b/vm/vm_map.c
index 2c8c31a4..78ff2cc3 100644
--- a/vm/vm_map.c
+++ b/vm/vm_map.c
@@ -720,7 +720,8 @@ vm_map_setup(void)
vm_map_init(kernel_map, kernel_pmap,
VM_MIN_KMEM_ADDRESS, VM_MAX_KMEM_ADDRESS);
kmem_cache_init(&vm_map_entry_cache, "vm_map_entry",
- sizeof(struct vm_map_entry), 0, NULL, 0);
+ sizeof(struct vm_map_entry), 0, NULL,
+ KMEM_CACHE_PAGE_ONLY);
kmem_cache_init(&vm_map_cache, "vm_map", sizeof(struct vm_map),
0, NULL, 0);
}
diff --git a/vm/vm_page.h b/vm/vm_page.h
index 718d64aa..097bcc6b 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -39,6 +39,7 @@
#define vm_page_ptoa(page) ((page) << PAGE_SHIFT)
#define vm_page_trunc(addr) P2ALIGN(addr, PAGE_SIZE)
#define vm_page_round(addr) P2ROUND(addr, PAGE_SIZE)
+#define vm_page_end(addr) P2END(addr, PAGE_SIZE)
#define vm_page_aligned(addr) P2ALIGNED(addr, PAGE_SIZE)
/*
@@ -62,7 +63,7 @@
#define VM_PAGE_RESERVED 1 /* Page reserved at boot time */
#define VM_PAGE_TABLE 2 /* Page is part of the page table */
#define VM_PAGE_PMAP 3 /* Page stores pmap-specific data */
-#define VM_PAGE_KMEM 4 /* Page is part of a kmem slab */
+#define VM_PAGE_KMEM 4 /* Page is a direct-mapped kmem slab */
#define VM_PAGE_OBJECT 5 /* Page is part of a VM object */
#define VM_PAGE_KERNEL 6 /* Type for generic kernel allocations */