/* * Copyright (c) 2010-2017 Richard Braun. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . * * * This allocator is based on the paper "The Slab Allocator: An Object-Caching * Kernel Memory Allocator" by Jeff Bonwick. * * It allows the allocation of objects (i.e. fixed-size typed buffers) from * caches and is efficient in both space and time. This implementation follows * many of the indications from the paper mentioned. The most notable * differences are outlined below. * * The per-cache self-scaling hash table for buffer-to-bufctl conversion, * described in 3.2.3 "Slab Layout for Large Objects", has been replaced with * a constant time buffer-to-slab lookup that relies on the VM system. * * Slabs are allocated from the physical page allocator if they're page-sized, * and from kernel virtual memory if they're bigger, in order to prevent * physical memory fragmentation from making slab allocations fail. * * This implementation uses per-CPU pools of objects, which service most * allocation requests. These pools act as caches (but are named differently * to avoid confusion with CPU caches) that reduce contention on multiprocessor * systems. When a pool is empty and cannot provide an object, it is filled by * transferring multiple objects from the slab layer. The symmetric case is * handled likewise. * * TODO Rework the CPU pool layer to use the SLQB algorithm by Nick Piggin. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Minimum required alignment. */ #define KMEM_ALIGN_MIN 8 /* * Minimum number of buffers per slab. * * This value is ignored when the slab size exceeds a threshold. */ #define KMEM_MIN_BUFS_PER_SLAB 8 /* * Special slab size beyond which the minimum number of buffers per slab is * ignored when computing the slab size of a cache. */ #define KMEM_SLAB_SIZE_THRESHOLD (8 * PAGE_SIZE) /* * Special buffer size under which slab data is unconditionally allocated * from its associated slab. */ #define KMEM_BUF_SIZE_THRESHOLD (PAGE_SIZE / 8) /* * The transfer size of a CPU pool is computed by dividing the pool size by * this value. */ #define KMEM_CPU_POOL_TRANSFER_RATIO 2 /* * Logarithm of the size of the smallest general cache. */ #define KMEM_CACHES_FIRST_ORDER 5 /* * Number of caches backing general purpose allocations. */ #define KMEM_NR_MEM_CACHES 13 /* * Options for kmem_cache_alloc_verify(). */ #define KMEM_AV_NOCONSTRUCT 0 #define KMEM_AV_CONSTRUCT 1 /* * Error codes for kmem_cache_error(). */ #define KMEM_ERR_INVALID 0 /* Invalid address being freed */ #define KMEM_ERR_DOUBLEFREE 1 /* Freeing already free address */ #define KMEM_ERR_BUFTAG 2 /* Invalid buftag content */ #define KMEM_ERR_MODIFIED 3 /* Buffer modified while free */ #define KMEM_ERR_REDZONE 4 /* Redzone violation */ /* * Available CPU pool types. * * For each entry, the CPU pool size applies from the entry buf_size * (excluded) up to (and including) the buf_size of the preceding entry. * * See struct kmem_cpu_pool_type for a description of the values. */ static struct kmem_cpu_pool_type kmem_cpu_pool_types[] __read_mostly = { { 32768, 1, 0, NULL }, { 4096, 8, CPU_L1_SIZE, NULL }, { 256, 64, CPU_L1_SIZE, NULL }, { 0, 128, CPU_L1_SIZE, NULL } }; /* * Caches where CPU pool arrays are allocated from. */ static struct kmem_cache kmem_cpu_array_caches[ARRAY_SIZE(kmem_cpu_pool_types)]; /* * Cache for off slab data. */ static struct kmem_cache kmem_slab_cache; /* * General caches array. */ static struct kmem_cache kmem_caches[KMEM_NR_MEM_CACHES]; /* * List of all caches managed by the allocator. */ static struct list kmem_cache_list; static struct mutex kmem_cache_list_lock; static void kmem_cache_error(struct kmem_cache *cache, void *buf, int error, void *arg); static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache); static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf); static void * kmem_buf_verify_bytes(void *buf, void *pattern, size_t size) { char *ptr, *pattern_ptr, *end; end = buf + size; for (ptr = buf, pattern_ptr = pattern; ptr < end; ptr++, pattern_ptr++) { if (*ptr != *pattern_ptr) { return ptr; } } return NULL; } static void kmem_buf_fill(void *buf, uint64_t pattern, size_t size) { uint64_t *ptr, *end; assert(P2ALIGNED((uintptr_t)buf, sizeof(uint64_t))); assert(P2ALIGNED(size, sizeof(uint64_t))); end = buf + size; for (ptr = buf; ptr < end; ptr++) { *ptr = pattern; } } static void * kmem_buf_verify_fill(void *buf, uint64_t old, uint64_t new, size_t size) { uint64_t *ptr, *end; assert(P2ALIGNED((uintptr_t)buf, sizeof(uint64_t))); assert(P2ALIGNED(size, sizeof(uint64_t))); end = buf + size; for (ptr = buf; ptr < end; ptr++) { if (*ptr != old) { return kmem_buf_verify_bytes(ptr, &old, sizeof(old)); } *ptr = new; } return NULL; } static inline union kmem_bufctl * kmem_buf_to_bufctl(void *buf, struct kmem_cache *cache) { return (union kmem_bufctl *)(buf + cache->bufctl_dist); } static inline struct kmem_buftag * kmem_buf_to_buftag(void *buf, struct kmem_cache *cache) { return (struct kmem_buftag *)(buf + cache->buftag_dist); } static inline void * kmem_bufctl_to_buf(union kmem_bufctl *bufctl, struct kmem_cache *cache) { return (void *)bufctl - cache->bufctl_dist; } static inline bool kmem_pagealloc_is_virtual(size_t size) { return (size > PAGE_SIZE); } static void * kmem_pagealloc(size_t size) { if (kmem_pagealloc_is_virtual(size)) { return vm_kmem_alloc(size); } else { struct vm_page *page; page = vm_page_alloc(vm_page_order(size), VM_PAGE_SEL_DIRECTMAP, VM_PAGE_KMEM); if (page == NULL) { return NULL; } return vm_page_direct_ptr(page); } } static void kmem_pagefree(void *ptr, size_t size) { if (kmem_pagealloc_is_virtual(size)) { vm_kmem_free(ptr, size); } else { struct vm_page *page; page = vm_page_lookup(vm_page_direct_pa((uintptr_t)ptr)); assert(page != NULL); vm_page_free(page, vm_page_order(size)); } } static void kmem_slab_create_verify(struct kmem_slab *slab, struct kmem_cache *cache) { struct kmem_buftag *buftag; unsigned long buffers; size_t buf_size; void *buf; buf_size = cache->buf_size; buf = slab->addr; buftag = kmem_buf_to_buftag(buf, cache); for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) { kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist); buftag->state = KMEM_BUFTAG_FREE; buf += buf_size; buftag = kmem_buf_to_buftag(buf, cache); } } /* * Create an empty slab for a cache. * * The caller must drop all locks before calling this function. */ static struct kmem_slab * kmem_slab_create(struct kmem_cache *cache, size_t color) { struct kmem_slab *slab; union kmem_bufctl *bufctl; size_t buf_size; unsigned long buffers; void *slab_buf; slab_buf = kmem_pagealloc(cache->slab_size); if (slab_buf == NULL) { return NULL; } if (cache->flags & KMEM_CF_SLAB_EXTERNAL) { slab = kmem_cache_alloc(&kmem_slab_cache); if (slab == NULL) { kmem_pagefree(slab_buf, cache->slab_size); return NULL; } } else { slab = (struct kmem_slab *)(slab_buf + cache->slab_size) - 1; } list_node_init(&slab->node); slab->nr_refs = 0; slab->first_free = NULL; slab->addr = slab_buf + color; buf_size = cache->buf_size; bufctl = kmem_buf_to_bufctl(slab->addr, cache); for (buffers = cache->bufs_per_slab; buffers != 0; buffers--) { bufctl->next = slab->first_free; slab->first_free = bufctl; bufctl = (union kmem_bufctl *)((void *)bufctl + buf_size); } if (cache->flags & KMEM_CF_VERIFY) { kmem_slab_create_verify(slab, cache); } return slab; } static inline uintptr_t kmem_slab_buf(const struct kmem_slab *slab) { return P2ALIGN((uintptr_t)slab->addr, PAGE_SIZE); } static void kmem_cpu_pool_init(struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache) { mutex_init(&cpu_pool->lock); cpu_pool->flags = cache->flags; cpu_pool->size = 0; cpu_pool->transfer_size = 0; cpu_pool->nr_objs = 0; cpu_pool->array = NULL; } static inline struct kmem_cpu_pool * kmem_cpu_pool_get(struct kmem_cache *cache) { return &cache->cpu_pools[cpu_id()]; } static inline void kmem_cpu_pool_build(struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache, void **array) { cpu_pool->size = cache->cpu_pool_type->array_size; cpu_pool->transfer_size = (cpu_pool->size + KMEM_CPU_POOL_TRANSFER_RATIO - 1) / KMEM_CPU_POOL_TRANSFER_RATIO; cpu_pool->array = array; } static inline void * kmem_cpu_pool_pop(struct kmem_cpu_pool *cpu_pool) { cpu_pool->nr_objs--; return cpu_pool->array[cpu_pool->nr_objs]; } static inline void kmem_cpu_pool_push(struct kmem_cpu_pool *cpu_pool, void *obj) { cpu_pool->array[cpu_pool->nr_objs] = obj; cpu_pool->nr_objs++; } static int kmem_cpu_pool_fill(struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache) { kmem_ctor_fn_t ctor; void *buf; int i; ctor = (cpu_pool->flags & KMEM_CF_VERIFY) ? NULL : cache->ctor; mutex_lock(&cache->lock); for (i = 0; i < cpu_pool->transfer_size; i++) { buf = kmem_cache_alloc_from_slab(cache); if (buf == NULL) { break; } if (ctor != NULL) { ctor(buf); } kmem_cpu_pool_push(cpu_pool, buf); } mutex_unlock(&cache->lock); return i; } static void kmem_cpu_pool_drain(struct kmem_cpu_pool *cpu_pool, struct kmem_cache *cache) { void *obj; int i; mutex_lock(&cache->lock); for (i = cpu_pool->transfer_size; i > 0; i--) { obj = kmem_cpu_pool_pop(cpu_pool); kmem_cache_free_to_slab(cache, obj); } mutex_unlock(&cache->lock); } static void kmem_cache_error(struct kmem_cache *cache, void *buf, int error, void *arg) { struct kmem_buftag *buftag; printf("kmem: error: cache: %s, buffer: %p\n", cache->name, buf); switch(error) { case KMEM_ERR_INVALID: panic("kmem: freeing invalid address"); break; case KMEM_ERR_DOUBLEFREE: panic("kmem: attempting to free the same address twice"); break; case KMEM_ERR_BUFTAG: buftag = arg; panic("kmem: invalid buftag content, buftag state: %p", (void *)buftag->state); break; case KMEM_ERR_MODIFIED: panic("kmem: free buffer modified, fault address: %p, " "offset in buffer: %td", arg, arg - buf); break; case KMEM_ERR_REDZONE: panic("kmem: write beyond end of buffer, fault address: %p, " "offset in buffer: %td", arg, arg - buf); break; default: panic("kmem: unknown error"); } /* * Never reached. */ } /* * Compute properties such as slab size for the given cache. * * Once the slab size is known, this function sets the related properties * (buffers per slab and maximum color). It can also set some KMEM_CF_xxx * flags depending on the resulting layout. */ static void kmem_cache_compute_properties(struct kmem_cache *cache, int flags) { size_t size, waste; int embed; if (cache->buf_size < KMEM_BUF_SIZE_THRESHOLD) { flags |= KMEM_CACHE_NOOFFSLAB; } cache->slab_size = PAGE_SIZE; for (;;) { if (flags & KMEM_CACHE_NOOFFSLAB) { embed = 1; } else { waste = cache->slab_size % cache->buf_size; embed = (sizeof(struct kmem_slab) <= waste); } size = cache->slab_size; if (embed) { size -= sizeof(struct kmem_slab); } if (size >= cache->buf_size) { break; } cache->slab_size += PAGE_SIZE; } /* * A user may force page allocation in order to guarantee that virtual * memory isn't used. This is normally done for objects that are used * to implement virtual memory and avoid circular dependencies. * * When forcing the use of direct page allocation, only allow single * page allocations in order to completely prevent physical memory * fragmentation from making slab allocations fail. */ if ((flags & KMEM_CACHE_PAGE_ONLY) && (cache->slab_size != PAGE_SIZE)) { panic("kmem: unable to guarantee page allocation"); } cache->bufs_per_slab = size / cache->buf_size; cache->color_max = size % cache->buf_size; /* * Make sure the first page of a slab buffer can be found from the * address of the first object. * * See kmem_slab_buf(). */ if (cache->color_max >= PAGE_SIZE) { cache->color_max = 0; } if (!embed) { cache->flags |= KMEM_CF_SLAB_EXTERNAL; } } void kmem_cache_init(struct kmem_cache *cache, const char *name, size_t obj_size, size_t align, kmem_ctor_fn_t ctor, int flags) { struct kmem_cpu_pool_type *cpu_pool_type; size_t i, buf_size; #ifdef CONFIG_KMEM_DEBUG cache->flags = KMEM_CF_VERIFY; #else /* CONFIG_KMEM_DEBUG */ cache->flags = 0; #endif /* CONFIG_KMEM_DEBUG */ if (flags & KMEM_CACHE_VERIFY) { cache->flags |= KMEM_CF_VERIFY; } if (align < KMEM_ALIGN_MIN) { align = KMEM_ALIGN_MIN; } assert(obj_size > 0); assert(ISP2(align)); assert(align < PAGE_SIZE); buf_size = P2ROUND(obj_size, align); mutex_init(&cache->lock); list_node_init(&cache->node); list_init(&cache->partial_slabs); list_init(&cache->free_slabs); cache->obj_size = obj_size; cache->align = align; cache->buf_size = buf_size; cache->bufctl_dist = buf_size - sizeof(union kmem_bufctl); cache->color = 0; cache->nr_objs = 0; cache->nr_bufs = 0; cache->nr_slabs = 0; cache->nr_free_slabs = 0; cache->ctor = ctor; strlcpy(cache->name, name, sizeof(cache->name)); cache->buftag_dist = 0; cache->redzone_pad = 0; if (cache->flags & KMEM_CF_VERIFY) { cache->bufctl_dist = buf_size; cache->buftag_dist = cache->bufctl_dist + sizeof(union kmem_bufctl); cache->redzone_pad = cache->bufctl_dist - cache->obj_size; buf_size += sizeof(union kmem_bufctl) + sizeof(struct kmem_buftag); buf_size = P2ROUND(buf_size, align); cache->buf_size = buf_size; } kmem_cache_compute_properties(cache, flags); for (cpu_pool_type = kmem_cpu_pool_types; buf_size <= cpu_pool_type->buf_size; cpu_pool_type++); cache->cpu_pool_type = cpu_pool_type; for (i = 0; i < ARRAY_SIZE(cache->cpu_pools); i++) { kmem_cpu_pool_init(&cache->cpu_pools[i], cache); } mutex_lock(&kmem_cache_list_lock); list_insert_tail(&kmem_cache_list, &cache->node); mutex_unlock(&kmem_cache_list_lock); } static inline int kmem_cache_empty(struct kmem_cache *cache) { return cache->nr_objs == cache->nr_bufs; } static struct kmem_slab * kmem_cache_buf_to_slab(const struct kmem_cache *cache, void *buf) { if ((cache->flags & KMEM_CF_SLAB_EXTERNAL) || (cache->slab_size != PAGE_SIZE)) { return NULL; } return (struct kmem_slab *)vm_page_end((uintptr_t)buf) - 1; } static inline bool kmem_cache_registration_required(const struct kmem_cache *cache) { return ((cache->flags & KMEM_CF_SLAB_EXTERNAL) || (cache->flags & KMEM_CF_VERIFY) || (cache->slab_size != PAGE_SIZE)); } static void kmem_cache_register(struct kmem_cache *cache, struct kmem_slab *slab) { struct vm_page *page; uintptr_t va, end; phys_addr_t pa; bool virtual; int error; assert(kmem_cache_registration_required(cache)); assert(slab->nr_refs == 0); virtual = kmem_pagealloc_is_virtual(cache->slab_size); for (va = kmem_slab_buf(slab), end = va + cache->slab_size; va < end; va += PAGE_SIZE) { if (virtual) { error = pmap_kextract(va, &pa); assert(!error); } else { pa = vm_page_direct_pa(va); } page = vm_page_lookup(pa); assert(page != NULL); assert((virtual && vm_page_type(page) == VM_PAGE_KERNEL) || (!virtual && vm_page_type(page) == VM_PAGE_KMEM)); assert(vm_page_get_priv(page) == NULL); vm_page_set_priv(page, slab); } } static struct kmem_slab * kmem_cache_lookup(struct kmem_cache *cache, void *buf) { struct kmem_slab *slab; struct vm_page *page; uintptr_t va; phys_addr_t pa; bool virtual; int error; assert(kmem_cache_registration_required(cache)); virtual = kmem_pagealloc_is_virtual(cache->slab_size); va = (uintptr_t)buf; if (virtual) { error = pmap_kextract(va, &pa); if (error) { return NULL; } } else { pa = vm_page_direct_pa(va); } page = vm_page_lookup(pa); if (page == NULL) { return NULL; } if ((virtual && (vm_page_type(page) != VM_PAGE_KERNEL)) || (!virtual && (vm_page_type(page) != VM_PAGE_KMEM))) { return NULL; } slab = vm_page_get_priv(page); assert((uintptr_t)buf >= kmem_slab_buf(slab)); assert((uintptr_t)buf < (kmem_slab_buf(slab) + cache->slab_size)); return slab; } static int kmem_cache_grow(struct kmem_cache *cache) { struct kmem_slab *slab; size_t color; int empty; mutex_lock(&cache->lock); if (!kmem_cache_empty(cache)) { mutex_unlock(&cache->lock); return 1; } color = cache->color; cache->color += cache->align; if (cache->color > cache->color_max) { cache->color = 0; } mutex_unlock(&cache->lock); slab = kmem_slab_create(cache, color); mutex_lock(&cache->lock); if (slab != NULL) { list_insert_head(&cache->free_slabs, &slab->node); cache->nr_bufs += cache->bufs_per_slab; cache->nr_slabs++; cache->nr_free_slabs++; if (kmem_cache_registration_required(cache)) { kmem_cache_register(cache, slab); } } /* * Even if our slab creation failed, another thread might have succeeded * in growing the cache. */ empty = kmem_cache_empty(cache); mutex_unlock(&cache->lock); return !empty; } /* * Allocate a raw (unconstructed) buffer from the slab layer of a cache. * * The cache must be locked before calling this function. */ static void * kmem_cache_alloc_from_slab(struct kmem_cache *cache) { struct kmem_slab *slab; union kmem_bufctl *bufctl; if (!list_empty(&cache->partial_slabs)) { slab = list_first_entry(&cache->partial_slabs, struct kmem_slab, node); } else if (!list_empty(&cache->free_slabs)) { slab = list_first_entry(&cache->free_slabs, struct kmem_slab, node); } else { return NULL; } bufctl = slab->first_free; assert(bufctl != NULL); slab->first_free = bufctl->next; slab->nr_refs++; cache->nr_objs++; if (slab->nr_refs == cache->bufs_per_slab) { /* The slab has become complete */ list_remove(&slab->node); if (slab->nr_refs == 1) { cache->nr_free_slabs--; } } else if (slab->nr_refs == 1) { /* * The slab has become partial. Insert the new slab at the end of * the list to reduce fragmentation. */ list_remove(&slab->node); list_insert_tail(&cache->partial_slabs, &slab->node); cache->nr_free_slabs--; } return kmem_bufctl_to_buf(bufctl, cache); } /* * Release a buffer to the slab layer of a cache. * * The cache must be locked before calling this function. */ static void kmem_cache_free_to_slab(struct kmem_cache *cache, void *buf) { struct kmem_slab *slab; union kmem_bufctl *bufctl; slab = kmem_cache_buf_to_slab(cache, buf); if (slab == NULL) { slab = kmem_cache_lookup(cache, buf); assert(slab != NULL); } assert(slab->nr_refs >= 1); assert(slab->nr_refs <= cache->bufs_per_slab); bufctl = kmem_buf_to_bufctl(buf, cache); bufctl->next = slab->first_free; slab->first_free = bufctl; slab->nr_refs--; cache->nr_objs--; if (slab->nr_refs == 0) { /* The slab has become free */ /* If it was partial, remove it from its list */ if (cache->bufs_per_slab != 1) { list_remove(&slab->node); } list_insert_head(&cache->free_slabs, &slab->node); cache->nr_free_slabs++; } else if (slab->nr_refs == (cache->bufs_per_slab - 1)) { /* The slab has become partial */ list_insert_head(&cache->partial_slabs, &slab->node); } } static void kmem_cache_alloc_verify(struct kmem_cache *cache, void *buf, int construct) { struct kmem_buftag *buftag; union kmem_bufctl *bufctl; void *addr; buftag = kmem_buf_to_buftag(buf, cache); if (buftag->state != KMEM_BUFTAG_FREE) { kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag); } addr = kmem_buf_verify_fill(buf, KMEM_FREE_PATTERN, KMEM_UNINIT_PATTERN, cache->bufctl_dist); if (addr != NULL) { kmem_cache_error(cache, buf, KMEM_ERR_MODIFIED, addr); } addr = buf + cache->obj_size; memset(addr, KMEM_REDZONE_BYTE, cache->redzone_pad); bufctl = kmem_buf_to_bufctl(buf, cache); bufctl->redzone = KMEM_REDZONE_WORD; buftag->state = KMEM_BUFTAG_ALLOC; if (construct && (cache->ctor != NULL)) { cache->ctor(buf); } } void * kmem_cache_alloc(struct kmem_cache *cache) { struct kmem_cpu_pool *cpu_pool; int filled, verify; void *buf; thread_pin(); cpu_pool = kmem_cpu_pool_get(cache); mutex_lock(&cpu_pool->lock); fast_alloc: if (likely(cpu_pool->nr_objs > 0)) { buf = kmem_cpu_pool_pop(cpu_pool); verify = (cpu_pool->flags & KMEM_CF_VERIFY); mutex_unlock(&cpu_pool->lock); thread_unpin(); if (verify) { kmem_cache_alloc_verify(cache, buf, KMEM_AV_CONSTRUCT); } return buf; } if (cpu_pool->array != NULL) { filled = kmem_cpu_pool_fill(cpu_pool, cache); if (!filled) { mutex_unlock(&cpu_pool->lock); thread_unpin(); filled = kmem_cache_grow(cache); if (!filled) { return NULL; } thread_pin(); cpu_pool = kmem_cpu_pool_get(cache); mutex_lock(&cpu_pool->lock); } goto fast_alloc; } mutex_unlock(&cpu_pool->lock); thread_unpin(); slab_alloc: mutex_lock(&cache->lock); buf = kmem_cache_alloc_from_slab(cache); mutex_unlock(&cache->lock); if (buf == NULL) { filled = kmem_cache_grow(cache); if (!filled) { return NULL; } goto slab_alloc; } if (cache->flags & KMEM_CF_VERIFY) { kmem_cache_alloc_verify(cache, buf, KMEM_AV_NOCONSTRUCT); } if (cache->ctor != NULL) { cache->ctor(buf); } return buf; } static void kmem_cache_free_verify(struct kmem_cache *cache, void *buf) { struct kmem_buftag *buftag; struct kmem_slab *slab; union kmem_bufctl *bufctl; unsigned char *redzone_byte; uintptr_t slabend; slab = kmem_cache_lookup(cache, buf); if (slab == NULL) { kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); } slabend = P2ALIGN((uintptr_t)slab->addr + cache->slab_size, PAGE_SIZE); if ((uintptr_t)buf >= slabend) { kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); } if ((((uintptr_t)buf - (uintptr_t)slab->addr) % cache->buf_size) != 0) { kmem_cache_error(cache, buf, KMEM_ERR_INVALID, NULL); } /* * As the buffer address is valid, accessing its buftag is safe. */ buftag = kmem_buf_to_buftag(buf, cache); if (buftag->state != KMEM_BUFTAG_ALLOC) { if (buftag->state == KMEM_BUFTAG_FREE) { kmem_cache_error(cache, buf, KMEM_ERR_DOUBLEFREE, NULL); } else { kmem_cache_error(cache, buf, KMEM_ERR_BUFTAG, buftag); } } redzone_byte = buf + cache->obj_size; bufctl = kmem_buf_to_bufctl(buf, cache); while (redzone_byte < (unsigned char *)bufctl) { if (*redzone_byte != KMEM_REDZONE_BYTE) { kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte); } redzone_byte++; } if (bufctl->redzone != KMEM_REDZONE_WORD) { unsigned long word; word = KMEM_REDZONE_WORD; redzone_byte = kmem_buf_verify_bytes(&bufctl->redzone, &word, sizeof(bufctl->redzone)); kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte); } kmem_buf_fill(buf, KMEM_FREE_PATTERN, cache->bufctl_dist); buftag->state = KMEM_BUFTAG_FREE; } void kmem_cache_free(struct kmem_cache *cache, void *obj) { struct kmem_cpu_pool *cpu_pool; void **array; thread_pin(); cpu_pool = kmem_cpu_pool_get(cache); if (cpu_pool->flags & KMEM_CF_VERIFY) { thread_unpin(); kmem_cache_free_verify(cache, obj); thread_pin(); cpu_pool = kmem_cpu_pool_get(cache); } mutex_lock(&cpu_pool->lock); fast_free: if (likely(cpu_pool->nr_objs < cpu_pool->size)) { kmem_cpu_pool_push(cpu_pool, obj); mutex_unlock(&cpu_pool->lock); thread_unpin(); return; } if (cpu_pool->array != NULL) { kmem_cpu_pool_drain(cpu_pool, cache); goto fast_free; } mutex_unlock(&cpu_pool->lock); array = kmem_cache_alloc(cache->cpu_pool_type->array_cache); if (array != NULL) { mutex_lock(&cpu_pool->lock); /* * Another thread may have built the CPU pool while the lock was * dropped. */ if (cpu_pool->array != NULL) { mutex_unlock(&cpu_pool->lock); thread_unpin(); kmem_cache_free(cache->cpu_pool_type->array_cache, array); thread_pin(); cpu_pool = kmem_cpu_pool_get(cache); mutex_lock(&cpu_pool->lock); goto fast_free; } kmem_cpu_pool_build(cpu_pool, cache, array); goto fast_free; } thread_unpin(); mutex_lock(&cache->lock); kmem_cache_free_to_slab(cache, obj); mutex_unlock(&cache->lock); } void kmem_cache_info(struct kmem_cache *cache) { char flags_str[64]; snprintf(flags_str, sizeof(flags_str), "%s%s", (cache->flags & KMEM_CF_SLAB_EXTERNAL) ? " SLAB_EXTERNAL" : "", (cache->flags & KMEM_CF_VERIFY) ? " VERIFY" : ""); mutex_lock(&cache->lock); printf("kmem: flags: 0x%x%s\n" "kmem: obj_size: %zu\n" "kmem: align: %zu\n" "kmem: buf_size: %zu\n" "kmem: bufctl_dist: %zu\n" "kmem: slab_size: %zu\n" "kmem: color_max: %zu\n" "kmem: bufs_per_slab: %lu\n" "kmem: nr_objs: %lu\n" "kmem: nr_bufs: %lu\n" "kmem: nr_slabs: %lu\n" "kmem: nr_free_slabs: %lu\n" "kmem: buftag_dist: %zu\n" "kmem: redzone_pad: %zu\n" "kmem: cpu_pool_size: %d\n", cache->flags, flags_str, cache->obj_size, cache->align, cache->buf_size, cache->bufctl_dist, cache->slab_size, cache->color_max, cache->bufs_per_slab, cache->nr_objs, cache->nr_bufs, cache->nr_slabs, cache->nr_free_slabs, cache->buftag_dist, cache->redzone_pad, cache->cpu_pool_type->array_size); mutex_unlock(&cache->lock); } #ifdef CONFIG_SHELL static struct kmem_cache * kmem_lookup_cache(const char *name) { struct kmem_cache *cache; mutex_lock(&kmem_cache_list_lock); list_for_each_entry(&kmem_cache_list, cache, node) { if (strcmp(cache->name, name) == 0) { goto out; } } cache = NULL; out: mutex_unlock(&kmem_cache_list_lock); return cache; } static void kmem_shell_info(int argc, char **argv) { struct kmem_cache *cache; if (argc < 2) { kmem_info(); } else { cache = kmem_lookup_cache(argv[1]); if (cache == NULL) { printf("kmem: info: invalid argument\n"); return; } kmem_cache_info(cache); } } static struct shell_cmd kmem_shell_cmds[] = { SHELL_CMD_INITIALIZER("kmem_info", kmem_shell_info, "kmem_info []", "display information about kernel memory and caches"), }; static int __init kmem_setup_shell(void) { SHELL_REGISTER_CMDS(kmem_shell_cmds); return 0; } INIT_OP_DEFINE(kmem_setup_shell, INIT_OP_DEP(kmem_setup, true), INIT_OP_DEP(printf_setup, true), INIT_OP_DEP(shell_setup, true), INIT_OP_DEP(thread_setup, true)); #endif /* CONFIG_SHELL */ static int __init kmem_bootstrap(void) { struct kmem_cpu_pool_type *cpu_pool_type; char name[KMEM_NAME_SIZE]; size_t i, size; /* Make sure a bufctl can always be stored in a buffer */ assert(sizeof(union kmem_bufctl) <= KMEM_ALIGN_MIN); list_init(&kmem_cache_list); mutex_init(&kmem_cache_list_lock); for (i = 0; i < ARRAY_SIZE(kmem_cpu_pool_types); i++) { cpu_pool_type = &kmem_cpu_pool_types[i]; cpu_pool_type->array_cache = &kmem_cpu_array_caches[i]; sprintf(name, "kmem_cpu_array_%d", cpu_pool_type->array_size); size = sizeof(void *) * cpu_pool_type->array_size; kmem_cache_init(cpu_pool_type->array_cache, name, size, cpu_pool_type->array_align, NULL, 0); } /* * Prevent off slab data for the slab cache to avoid infinite recursion. */ kmem_cache_init(&kmem_slab_cache, "kmem_slab", sizeof(struct kmem_slab), 0, NULL, KMEM_CACHE_NOOFFSLAB); size = 1 << KMEM_CACHES_FIRST_ORDER; for (i = 0; i < ARRAY_SIZE(kmem_caches); i++) { sprintf(name, "kmem_%zu", size); kmem_cache_init(&kmem_caches[i], name, size, 0, NULL, 0); size <<= 1; } return 0; } INIT_OP_DEFINE(kmem_bootstrap, INIT_OP_DEP(thread_bootstrap, true), INIT_OP_DEP(vm_page_setup, true)); static int __init kmem_setup(void) { return 0; } INIT_OP_DEFINE(kmem_setup, INIT_OP_DEP(kmem_bootstrap, true), INIT_OP_DEP(vm_kmem_setup, true)); static inline size_t kmem_get_index(unsigned long size) { return log2_order(size) - KMEM_CACHES_FIRST_ORDER; } static void kmem_alloc_verify(struct kmem_cache *cache, void *buf, size_t size) { size_t redzone_size; void *redzone; assert(size <= cache->obj_size); redzone = buf + size; redzone_size = cache->obj_size - size; memset(redzone, KMEM_REDZONE_BYTE, redzone_size); } void * kmem_alloc(size_t size) { size_t index; void *buf; if (size == 0) { return NULL; } index = kmem_get_index(size); if (index < ARRAY_SIZE(kmem_caches)) { struct kmem_cache *cache; cache = &kmem_caches[index]; buf = kmem_cache_alloc(cache); if ((buf != NULL) && (cache->flags & KMEM_CF_VERIFY)) { kmem_alloc_verify(cache, buf, size); } } else { buf = kmem_pagealloc(size); } return buf; } void * kmem_zalloc(size_t size) { void *ptr; ptr = kmem_alloc(size); if (ptr == NULL) { return NULL; } memset(ptr, 0, size); return ptr; } static void kmem_free_verify(struct kmem_cache *cache, void *buf, size_t size) { unsigned char *redzone_byte, *redzone_end; assert(size <= cache->obj_size); redzone_byte = buf + size; redzone_end = buf + cache->obj_size; while (redzone_byte < redzone_end) { if (*redzone_byte != KMEM_REDZONE_BYTE) { kmem_cache_error(cache, buf, KMEM_ERR_REDZONE, redzone_byte); } redzone_byte++; } } void kmem_free(void *ptr, size_t size) { size_t index; if ((ptr == NULL) || (size == 0)) { return; } index = kmem_get_index(size); if (index < ARRAY_SIZE(kmem_caches)) { struct kmem_cache *cache; cache = &kmem_caches[index]; if (cache->flags & KMEM_CF_VERIFY) { kmem_free_verify(cache, ptr, size); } kmem_cache_free(cache, ptr); } else { kmem_pagefree(ptr, size); } } void kmem_info(void) { size_t total_reclaim, total_reclaim_physical, total_reclaim_virtual; size_t total, total_physical, total_virtual; size_t mem_usage, mem_reclaim; struct kmem_cache *cache; total = 0; total_physical = 0; total_virtual = 0; total_reclaim = 0; total_reclaim_physical = 0; total_reclaim_virtual = 0; printf("kmem: cache obj slab bufs objs bufs " " total reclaimable\n" "kmem: name size size /slab usage count " " memory memory\n"); mutex_lock(&kmem_cache_list_lock); list_for_each_entry(&kmem_cache_list, cache, node) { mutex_lock(&cache->lock); mem_usage = (cache->nr_slabs * cache->slab_size) >> 10; mem_reclaim = (cache->nr_free_slabs * cache->slab_size) >> 10; total += mem_usage; total_reclaim += mem_reclaim; if (kmem_pagealloc_is_virtual(cache->slab_size)) { total_virtual += mem_usage; total_reclaim_virtual += mem_reclaim; } else { total_physical += mem_usage; total_reclaim_physical += mem_reclaim; } printf("kmem: %-19s %6zu %3zuk %4lu %6lu %6lu %7zuk %10zuk\n", cache->name, cache->obj_size, cache->slab_size >> 10, cache->bufs_per_slab, cache->nr_objs, cache->nr_bufs, mem_usage, mem_reclaim); mutex_unlock(&cache->lock); } mutex_unlock(&kmem_cache_list_lock); printf("total: %zuk (phys: %zuk virt: %zuk), " "reclaim: %zuk (phys: %zuk virt: %zuk)\n", total, total_physical, total_virtual, total_reclaim, total_reclaim_physical, total_reclaim_virtual); }