/* * Copyright (c) 2017 Richard Braun. * Copyright (c) 2017 Jerko Lenstra. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * * Algorithm * --------- * This allocator is based on "The Art of Computer Programming" by Donald Knuth. * The algorithm itself is described in : * - Volume 1 - Fundamental Algorithms * - Chapter 2 – Information Structures * - 2.5 Dynamic Storage * - Algorithm A (First-fit method) * - Algorithm C (Liberation with boundary tags) * * The point of a memory allocator is to manage memory in terms of allocation * and liberation requests. Allocation finds and reserves memory for a user, * whereas liberation makes that memory available again for future allocations. * Memory is not created, it must be available from the start, and the allocator * simply tracks its usage with metadata, allocated from memory itself. * * Data structures * --------------- * Here is a somewhat graphical representation of how memory is organized in * this implementation : * * allocated block free block * +------+-----------------+ +------+-----------------+ * | size | allocation flag | | size | allocation flag | <- header boundary * +------+-----------------+ +------+-----------------+ tag * | | | free list node (prev | <- payload or * . payload . | and next pointers) | free list node * . . +------------------------+ * . . . . * . . . . * . . . . * +------+-----------------+ +------+-----------------+ * | size | allocation flag | | size | allocation flag | <- footer boundary * +------+-----------------+ +------+-----------------+ tag * * Here is a view of multiple contiguous blocks : * * +------+-----------------+ <--+ * | size | allocation flag | | * +------+-----------------+ +- single block * | | | * . payload . | * . . | * . . | * +------+-----------------+ | * | size | allocation flag | | * +------+-----------------+ <--+ * +------+-----------------+ * | size | allocation flag | * +------+-----------------+ * | | * . payload . * . . * . . * +------+-----------------+ * | size | allocation flag | * +------+-----------------+ * +------+-----------------+ * | size | allocation flag | * +------+-----------------+ * | | * . payload . * . . * . . * +------+-----------------+ * | size | allocation flag | * +------+-----------------+ * * The reason for the footer boundary tag is merging on liberation. When * called, the mem_free() function is given a pointer to a payload. Since * the size of a boundary tag is fixed, the address of the whole block * can easily be computed. In order to reduce fragmentation, i.e. a state * where all free blocks are small and prevent allocating bigger blocks, * the allocator attempts to merge neighbor free blocks. Obtaining the * address of the next block is easily achieved by simply adding the size * of the current block, stored in the boundary tag, to the address of * the block. But without a footer boundary tag, finding the address of * the previous block is computationally expensive. * * Alignment * --------- * The word "aligned" and references to "alignment" in general can be * found throughout the documentation of this module. Alignment is a * property of a value, usually an address, to be a multiple of a size. * This value is said to be "size-byte aligned", or "aligned on a size * byte boundary". Common sizes include the processor word or cache line * sizes. For example, the x86 architecture is 32-bits, making the word * size 4 bytes. Addresses such as 0, 4, 8, 12, 512 and 516 are 4-byte * aligned, whereas 1, 2, 3, 511, 513, 514 and 515 aren't. * * * Pointers * -------- * The code in this module makes extensive use of pointer arithmetic and * conversion between pointer types. It's important to keep in mind that, * in C, the void pointer is meant as a generic reference to objects of * any type. As a result, any pointer can be assigned to a void pointer * without explicit casting, and a void pointer may be assigned to any * pointer as well. */ #include #include #include #include #include #include #include "mem.h" #include "mutex.h" /* * Total size of the backing storage heap. * * Note that the resulting kernel image file remains much smaller than this. * The reason is because the heap is defined as uninitialized data, which are * allocated out of the bss section by default. The bss section is filled * with zeroes when the kernel image is loaded by the boot loader, as * mandated by the ELF specification, which means there is no need to store * the heap data, or any other statically allocated uninitialized data, in * the kernel image file. */ #define MEM_HEAP_SIZE (32 * 1024 * 1024) /* * Alignment required on addresses returned by mem_alloc(). * * See the description of mem_alloc() in the public header. */ #define MEM_ALIGN 4 /* * Minimum size of a block. * * When free, the payload of a block is used as storage for the free list node. */ #define MEM_BLOCK_MIN_SIZE P2ROUND(((sizeof(struct mem_btag) * 2) \ + sizeof(struct mem_free_node)), MEM_ALIGN) /* * The heap itself must be aligned, so that the first block is also aligned. * Assuming all blocks have an aligned size, the last block must also end on * an aligned address. * * This kind of check increases safety and robustness when changing * compile-time parameters such as the heap size declared above. * * Another relevant check would be to make sure the heap is large enough * to contain at least one block, but since the minimum block size is * defined using the C sizeof operator, unknown to the C preprocessor, * this check requires C11 static assertions. */ #if !P2ALIGNED(MEM_HEAP_SIZE, MEM_ALIGN) #error "invalid heap size" #endif /* * Masks applied on boundary tags to extract the size and the allocation flag. */ #define MEM_BTAG_ALLOCATED_MASK ((size_t)0x1) #define MEM_BTAG_SIZE_MASK (~MEM_BTAG_ALLOCATED_MASK) /* * Boundary tag. * * Since the alignment constraint specified for mem_alloc() applies to * payloads, not blocks, it's important that boundary tags also be aligned. * This is a check that would best be performed with C11 static assertions. * * In addition, the alignment constraint implies that the least significant * bit is always 0. Therefore, this bit is used to store the allocation flag. */ struct mem_btag { size_t value; }; /* * Memory block. * * Note the use of a C99 flexible array member. This construction enables * the implementation to directly access the payload without pointer * arithmetic or casting, and is one of the preferred ways to deal with * headers. */ struct mem_block { struct mem_btag btag; char payload[]; }; /* * Free list node. * * This structure is used as the payload of free blocks. */ struct mem_free_node { struct list node; }; /* * List of free nodes. * * Here is an example of a TODO entry, a method used to store and retrieve * pending tasks using source code only : * * TODO Statistics counters. */ struct mem_free_list { struct list free_nodes; }; /* * Memory heap. * * This allocator uses a single heap initialized with a single large free * block. The allocator could be extended to support multiple heaps, each * initialized with a single free block, forming an initial free list of * more than one block. * * The heap must be correctly aligned, so that the first block is * correctly aligned. */ static char mem_heap[MEM_HEAP_SIZE] __aligned(MEM_ALIGN); /* * The unique free list. * * In order to improve allocation speed, multiple lists may be used, each * for a specific size range. Such lists are called segregated free lists * and enable more allocation policies, such as instant-fit. */ static struct mem_free_list mem_free_list; /* * Global mutex used to serialize access to allocation data. */ static struct mutex mem_mutex; static bool mem_aligned(size_t value) { return P2ALIGNED(value, MEM_ALIGN); } static void * mem_heap_end(void) { return &mem_heap[sizeof(mem_heap)]; } static bool mem_btag_allocated(const struct mem_btag *btag) { return btag->value & MEM_BTAG_ALLOCATED_MASK; } static void mem_btag_set_allocated(struct mem_btag *btag) { btag->value |= MEM_BTAG_ALLOCATED_MASK; } static void mem_btag_clear_allocated(struct mem_btag *btag) { btag->value &= ~MEM_BTAG_ALLOCATED_MASK; } static size_t mem_btag_size(const struct mem_btag *btag) { return btag->value & MEM_BTAG_SIZE_MASK; } static void mem_btag_init(struct mem_btag *btag, size_t size) { btag->value = size; mem_btag_set_allocated(btag); } static size_t mem_block_size(const struct mem_block *block) { return mem_btag_size(&block->btag); } static struct mem_block * mem_block_from_payload(void *payload) { size_t offset; /* * Here, payload refers to the payload member in struct mem_block, not * the local variable. */ offset = offsetof(struct mem_block, payload); /* * Always keep pointer arithmetic in mind ! * * The rule is fairly simple : whenever arithmetic operators are used * on pointers, the operation is scaled on the type size, so that e.g. * adding 1 means pointing to the next element. A good way to remember * this is to remember that pointers can be used as arrays, so that * both these expressions are equivalent : * - &ptr[1] * - ptr + 1 * * As a result, when counting in bytes and not in objects, it is * necessary to cast into a suitable pointer type. The char * type * is specifically meant for this as C99 mandates that sizeof(char) be 1 * (6.5.3.4 The sizeof operator). * * As a side note, a GNU extension allows using pointer arithmetic on * void pointers, where the "size of void" is 1, turning pointer * arithmetic on void pointers into byte arithmetic, allowing this * expression to be written as : * * return payload - offset; * * See https://gcc.gnu.org/onlinedocs/gcc-6.4.0/gcc/Pointer-Arith.html */ return (struct mem_block *)((char *)payload - offset); } static void * mem_block_end(const struct mem_block *block) { /* See mem_block_from_payload() */ return (struct mem_block *)((char *)block + mem_block_size(block)); } static struct mem_btag * mem_block_header_btag(struct mem_block *block) { return &block->btag; } static struct mem_btag * mem_block_footer_btag(struct mem_block *block) { struct mem_btag *btag; btag = mem_block_end(block); /* * See ISO/IEC 9899:1999, 6.5.2.1 "Array subscripting" : * "A postfix expression followed by an expression in square brackets [] * is a subscripted designation of an element of an array object. The * definition of the subscript operator [] is that E1[E2] is identical to * (*((E1)+(E2)))". * * This, by the way, implies the following equivalent expressions : * - &btag[-1] * - &(-1)[btag] * - &*(btag - 1) * - btag - 1 */ return &btag[-1]; } static struct mem_block * mem_block_prev(struct mem_block *block) { struct mem_btag *btag; if ((char *)block == mem_heap) { return NULL; } btag = mem_block_header_btag(block); return (struct mem_block *)((char *)block - mem_btag_size(&btag[-1])); } static struct mem_block * mem_block_next(struct mem_block *block) { block = mem_block_end(block); if ((void *)block == mem_heap_end()) { return NULL; } return block; } static bool mem_block_allocated(struct mem_block *block) { return mem_btag_allocated(mem_block_header_btag(block)); } static void mem_block_set_allocated(struct mem_block *block) { mem_btag_set_allocated(mem_block_header_btag(block)); mem_btag_set_allocated(mem_block_footer_btag(block)); } static void mem_block_clear_allocated(struct mem_block *block) { mem_btag_clear_allocated(mem_block_header_btag(block)); mem_btag_clear_allocated(mem_block_footer_btag(block)); } static void mem_block_init(struct mem_block *block, size_t size) { mem_btag_init(mem_block_header_btag(block), size); mem_btag_init(mem_block_footer_btag(block), size); } static void * mem_block_payload(struct mem_block *block) { return block->payload; } static struct mem_free_node * mem_block_get_free_node(struct mem_block *block) { assert(!mem_block_allocated(block)); return mem_block_payload(block); } static bool mem_block_inside_heap(const struct mem_block *block) { void *heap_end; heap_end = mem_heap_end(); return (((char *)block >= mem_heap) && ((void *)block->payload < heap_end) && ((void *)mem_block_end(block) <= heap_end)); } static void mem_free_list_add(struct mem_free_list *list, struct mem_block *block) { struct mem_free_node *free_node; assert(mem_block_allocated(block)); mem_block_clear_allocated(block); free_node = mem_block_get_free_node(block); /* * Free blocks may be added at either the head or the tail of a list. * In this case, it's normally better to add at the head, because the * first-fit algorithm implementation starts from the beginning. This * means there is a good chance that a block recently freed may "soon" * be allocated again. Since it's likely that this block was accessed * before it was freed, there is a good chance that (part of) its memory * is still in the processor cache, potentially increasing the chances * of cache hits and saving a few expensive accesses from the processor * to memory. This is an example of inexpensive micro-optimization. */ list_insert_head(&list->free_nodes, &free_node->node); } static void mem_free_list_remove(struct mem_free_list *list, struct mem_block *block) { struct mem_free_node *free_node; (void)list; assert(!mem_block_allocated(block)); free_node = mem_block_get_free_node(block); list_remove(&free_node->node); mem_block_set_allocated(block); } static struct mem_block * mem_free_list_find(struct mem_free_list *list, size_t size) { struct mem_free_node *free_node; struct mem_block *block; /* * The algorithmic complexity of this operation is O(n) [1] which * basically means the maximum number of steps, and time, for the * operation to complete depend on the number of elements in the list. * This is one of the main reasons why memory allocation is generally * avoided in interrupt handlers and real-time applications. Special * allocators with guaranteed constant time or a fixed and known worst * case time, may be used in these cases. * * [1] https://en.wikipedia.org/wiki/Big_O_notation */ list_for_each_entry(&list->free_nodes, free_node, node) { block = mem_block_from_payload(free_node); if (mem_block_size(block) >= size) { return block; } } return NULL; } static void mem_free_list_init(struct mem_free_list *list) { list_init(&list->free_nodes); } static bool mem_block_inside(struct mem_block *block, void *addr) { return (addr >= (void *)block) && (addr < mem_block_end(block)); } static bool mem_block_overlap(struct mem_block *block1, struct mem_block *block2) { return mem_block_inside(block1, block2) || mem_block_inside(block2, block1); } static struct mem_block * mem_block_split(struct mem_block *block, size_t size) { struct mem_block *block2; size_t total_size; assert(mem_block_allocated(block)); assert(mem_aligned(size)); if (mem_block_size(block) < (size + MEM_BLOCK_MIN_SIZE)) { return NULL; } total_size = mem_block_size(block); mem_block_init(block, size); block2 = mem_block_end(block); mem_block_init(block2, total_size - size); return block2; } static struct mem_block * mem_block_merge(struct mem_block *block1, struct mem_block *block2) { size_t size; assert(!mem_block_overlap(block1, block2)); if (mem_block_allocated(block1) || mem_block_allocated(block2)) { return NULL; } mem_free_list_remove(&mem_free_list, block1); mem_free_list_remove(&mem_free_list, block2); size = mem_block_size(block1) + mem_block_size(block2); if (block1 > block2) { block1 = block2; } mem_block_init(block1, size); mem_free_list_add(&mem_free_list, block1); return block1; } void mem_setup(void) { struct mem_block *block; block = (struct mem_block *)mem_heap; mem_block_init(block, sizeof(mem_heap)); mem_free_list_init(&mem_free_list); mem_free_list_add(&mem_free_list, block); mutex_init(&mem_mutex); } static size_t mem_convert_to_block_size(size_t size) { /* * Make sure all blocks have a correctly aligned size. That, and the fact * that the heap address is also aligned, means all block addresses are * aligned. */ size = P2ROUND(size, MEM_ALIGN); size += sizeof(struct mem_btag) * 2; if (size < MEM_BLOCK_MIN_SIZE) { size = MEM_BLOCK_MIN_SIZE; } return size; } void * mem_alloc(size_t size) { struct mem_block *block, *block2; void *ptr; if (size == 0) { return NULL; } size = mem_convert_to_block_size(size); mutex_lock(&mem_mutex); block = mem_free_list_find(&mem_free_list, size); if (block == NULL) { mutex_unlock(&mem_mutex); return NULL; } mem_free_list_remove(&mem_free_list, block); block2 = mem_block_split(block, size); if (block2 != NULL) { mem_free_list_add(&mem_free_list, block2); } mutex_unlock(&mem_mutex); ptr = mem_block_payload(block); assert(mem_aligned((uintptr_t)ptr)); return ptr; } void mem_free(void *ptr) { struct mem_block *block, *tmp; if (!ptr) { return; } assert(mem_aligned((uintptr_t)ptr)); block = mem_block_from_payload(ptr); assert(mem_block_inside_heap(block)); mutex_lock(&mem_mutex); mem_free_list_add(&mem_free_list, block); tmp = mem_block_prev(block); if (tmp) { tmp = mem_block_merge(block, tmp); if (tmp) { block = tmp; } } tmp = mem_block_next(block); if (tmp) { mem_block_merge(block, tmp); } mutex_unlock(&mem_mutex); }