diff options
Diffstat (limited to 'src/mem.c')
-rw-r--r-- | src/mem.c | 692 |
1 files changed, 692 insertions, 0 deletions
diff --git a/src/mem.c b/src/mem.c new file mode 100644 index 0000000..572f2be --- /dev/null +++ b/src/mem.c @@ -0,0 +1,692 @@ +/* + * Copyright (c) 2017 Richard Braun. + * Copyright (c) 2017 Jerko Lenstra. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * + * Algorithm + * --------- + * This allocator is based on "The Art of Computer Programming" by Donald Knuth. + * The algorithm itself is described in : + * - Volume 1 - Fundamental Algorithms + * - Chapter 2 – Information Structures + * - 2.5 Dynamic Storage + * - Algorithm A (First-fit method) + * - Algorithm C (Liberation with boundary tags) + * + * The point of a memory allocator is to manage memory in terms of allocation + * and liberation requests. Allocation finds and reserves memory for a user, + * whereas liberation makes that memory available again for future allocations. + * Memory is not created, it must be available from the start, and the allocator + * simply tracks its usage with metadata, allocated from memory itself. + * + * Data structures + * --------------- + * Here is a somewhat graphical representation of how memory is organized in + * this implementation : + * + * allocated block free block + * +------+-----------------+ +------+-----------------+ + * | size | allocation flag | | size | allocation flag | <- header boundary + * +------+-----------------+ +------+-----------------+ tag + * | | | free list node (prev | <- payload or + * . payload . | and next pointers) | free list node + * . . +------------------------+ + * . . . . + * . . . . + * . . . . + * +------+-----------------+ +------+-----------------+ + * | size | allocation flag | | size | allocation flag | <- footer boundary + * +------+-----------------+ +------+-----------------+ tag + * + * Here is a view of multiple contiguous blocks : + * + * +------+-----------------+ <--+ + * | size | allocation flag | | + * +------+-----------------+ +- single block + * | | | + * . payload . | + * . . | + * . . | + * +------+-----------------+ | + * | size | allocation flag | | + * +------+-----------------+ <--+ + * +------+-----------------+ + * | size | allocation flag | + * +------+-----------------+ + * | | + * . payload . + * . . + * . . + * +------+-----------------+ + * | size | allocation flag | + * +------+-----------------+ + * +------+-----------------+ + * | size | allocation flag | + * +------+-----------------+ + * | | + * . payload . + * . . + * . . + * +------+-----------------+ + * | size | allocation flag | + * +------+-----------------+ + * + * The reason for the footer boundary tag is merging on liberation. When + * called, the mem_free() function is given a pointer to a payload. Since + * the size of a boundary tag is fixed, the address of the whole block + * can easily be computed. In order to reduce fragmentation, i.e. a state + * where all free blocks are small and prevent allocating bigger blocks, + * the allocator attempts to merge neighbor free blocks. Obtaining the + * address of the next block is easily achieved by simply adding the size + * of the current block, stored in the boundary tag, to the address of + * the block. But without a footer boundary tag, finding the address of + * the previous block is computationally expensive. + * + * Alignment + * --------- + * The word "aligned" and references to "alignment" in general can be + * found throughout the documentation of this module. Alignment is a + * property of a value, usually an address, to be a multiple of a size. + * This value is said to be "size-byte aligned", or "aligned on a size + * byte boundary". Common sizes include the processor word or cache line + * sizes. For example, the x86 architecture is 32-bits, making the word + * size 4 bytes. Addresses such as 0, 4, 8, 12, 512 and 516 are 4-byte + * aligned, whereas 1, 2, 3, 511, 513, 514 and 515 aren't. + * + * + * Pointers + * -------- + * The code in this module makes extensive use of pointer arithmetic and + * conversion between pointer types. It's important to keep in mind that, + * in C, the void pointer is meant as a generic reference to objects of + * any type. As a result, any pointer can be assigned to a void pointer + * without explicit casting, and a void pointer may be assigned to any + * pointer as well. + */ + +#include <assert.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include <lib/list.h> +#include <lib/macros.h> + +#include "mem.h" +#include "mutex.h" + +/* + * Total size of the backing storage heap. + * + * Note that the resulting kernel image file remains much smaller than this. + * The reason is because the heap is defined as uninitialized data, which are + * allocated out of the bss section by default. The bss section is filled + * with zeroes when the kernel image is loaded by the boot loader, as + * mandated by the ELF specification, which means there is no need to store + * the heap data, or any other statically allocated uninitialized data, in + * the kernel image file. + */ +#define MEM_HEAP_SIZE (32 * 1024 * 1024) + +/* + * Alignment required on addresses returned by mem_alloc(). + * + * See the description of mem_alloc() in the public header. + */ +#define MEM_ALIGN 4 + +/* + * Minimum size of a block. + * + * When free, the payload of a block is used as storage for the free list node. + */ +#define MEM_BLOCK_MIN_SIZE P2ROUND(((sizeof(struct mem_btag) * 2) \ + + sizeof(struct mem_free_node)), MEM_ALIGN) + +/* + * The heap itself must be aligned, so that the first block is also aligned. + * Assuming all blocks have an aligned size, the last block must also end on + * an aligned address. + * + * This kind of check increases safety and robustness when changing + * compile-time parameters such as the heap size declared above. + * + * Another relevant check would be to make sure the heap is large enough + * to contain at least one block, but since the minimum block size is + * defined using the C sizeof operator, unknown to the C preprocessor, + * this check requires C11 static assertions. + */ +#if !P2ALIGNED(MEM_HEAP_SIZE, MEM_ALIGN) +#error "invalid heap size" +#endif + +/* + * Masks applied on boundary tags to extract the size and the allocation flag. + */ +#define MEM_BTAG_ALLOCATED_MASK ((size_t)0x1) +#define MEM_BTAG_SIZE_MASK (~MEM_BTAG_ALLOCATED_MASK) + +/* + * Boundary tag. + * + * Since the alignment constraint specified for mem_alloc() applies to + * payloads, not blocks, it's important that boundary tags also be aligned. + * This is a check that would best be performed with C11 static assertions. + * + * In addition, the alignment constraint implies that the least significant + * bit is always 0. Therefore, this bit is used to store the allocation flag. + */ +struct mem_btag { + size_t value; +}; + +/* + * Memory block. + * + * Note the use of a C99 flexible array member. This construction enables + * the implementation to directly access the payload without pointer + * arithmetic or casting, and is one of the preferred ways to deal with + * headers. + */ +struct mem_block { + struct mem_btag btag; + char payload[]; +}; + +/* + * Free list node. + * + * This structure is used as the payload of free blocks. + */ +struct mem_free_node { + struct list node; +}; + +/* + * List of free nodes. + * + * Here is an example of a TODO entry, a method used to store and retrieve + * pending tasks using source code only : + * + * TODO Statistics counters. + */ +struct mem_free_list { + struct list free_nodes; +}; + +/* + * Memory heap. + * + * This allocator uses a single heap initialized with a single large free + * block. The allocator could be extended to support multiple heaps, each + * initialized with a single free block, forming an initial free list of + * more than one block. + * + * The heap must be correctly aligned, so that the first block is + * correctly aligned. + */ +static char mem_heap[MEM_HEAP_SIZE] __aligned(MEM_ALIGN); + +/* + * The unique free list. + * + * In order to improve allocation speed, multiple lists may be used, each + * for a specific size range. Such lists are called segregated free lists + * and enable more allocation policies, such as instant-fit. + */ +static struct mem_free_list mem_free_list; + +/* + * Global mutex used to serialize access to allocation data. + */ +static struct mutex mem_mutex; + +static bool +mem_aligned(size_t value) +{ + return P2ALIGNED(value, MEM_ALIGN); +} + +static void * +mem_heap_end(void) +{ + return &mem_heap[sizeof(mem_heap)]; +} + +static bool +mem_btag_allocated(const struct mem_btag *btag) +{ + return btag->value & MEM_BTAG_ALLOCATED_MASK; +} + +static void +mem_btag_set_allocated(struct mem_btag *btag) +{ + btag->value |= MEM_BTAG_ALLOCATED_MASK; +} + +static void +mem_btag_clear_allocated(struct mem_btag *btag) +{ + btag->value &= ~MEM_BTAG_ALLOCATED_MASK; +} + +static size_t +mem_btag_size(const struct mem_btag *btag) +{ + return btag->value & MEM_BTAG_SIZE_MASK; +} + +static void +mem_btag_init(struct mem_btag *btag, size_t size) +{ + btag->value = size; + mem_btag_set_allocated(btag); +} + +static size_t +mem_block_size(const struct mem_block *block) +{ + return mem_btag_size(&block->btag); +} + +static struct mem_block * +mem_block_from_payload(void *payload) +{ + size_t offset; + + /* + * Here, payload refers to the payload member in struct mem_block, not + * the local variable. + */ + offset = offsetof(struct mem_block, payload); + + /* + * Always keep pointer arithmetic in mind ! + * + * The rule is fairly simple : whenever arithmetic operators are used + * on pointers, the operation is scaled on the type size, so that e.g. + * adding 1 means pointing to the next element. A good way to remember + * this is to remember that pointers can be used as arrays, so that + * both these expressions are equivalent : + * - &ptr[1] + * - ptr + 1 + * + * As a result, when counting in bytes and not in objects, it is + * necessary to cast into a suitable pointer type. The char * type + * is specifically meant for this as C99 mandates that sizeof(char) be 1 + * (6.5.3.4 The sizeof operator). + * + * As a side note, a GNU extension allows using pointer arithmetic on + * void pointers, where the "size of void" is 1, turning pointer + * arithmetic on void pointers into byte arithmetic, allowing this + * expression to be written as : + * + * return payload - offset; + * + * See https://gcc.gnu.org/onlinedocs/gcc-6.4.0/gcc/Pointer-Arith.html + */ + return (struct mem_block *)((char *)payload - offset); +} + +static void * +mem_block_end(const struct mem_block *block) +{ + /* See mem_block_from_payload() */ + return (struct mem_block *)((char *)block + mem_block_size(block)); +} + +static struct mem_btag * +mem_block_header_btag(struct mem_block *block) +{ + return &block->btag; +} + +static struct mem_btag * +mem_block_footer_btag(struct mem_block *block) +{ + struct mem_btag *btag; + + btag = mem_block_end(block); + + /* + * See ISO/IEC 9899:1999, 6.5.2.1 "Array subscripting" : + * "A postfix expression followed by an expression in square brackets [] + * is a subscripted designation of an element of an array object. The + * definition of the subscript operator [] is that E1[E2] is identical to + * (*((E1)+(E2)))". + * + * This, by the way, implies the following equivalent expressions : + * - &btag[-1] + * - &(-1)[btag] + * - &*(btag - 1) + * - btag - 1 + */ + return &btag[-1]; +} + +static struct mem_block * +mem_block_prev(struct mem_block *block) +{ + struct mem_btag *btag; + + if ((char *)block == mem_heap) { + return NULL; + } + + btag = mem_block_header_btag(block); + return (struct mem_block *)((char *)block - mem_btag_size(&btag[-1])); +} + +static struct mem_block * +mem_block_next(struct mem_block *block) +{ + block = mem_block_end(block); + + if ((void *)block == mem_heap_end()) { + return NULL; + } + + return block; +} + +static bool +mem_block_allocated(struct mem_block *block) +{ + return mem_btag_allocated(mem_block_header_btag(block)); +} + +static void +mem_block_set_allocated(struct mem_block *block) +{ + mem_btag_set_allocated(mem_block_header_btag(block)); + mem_btag_set_allocated(mem_block_footer_btag(block)); +} + +static void +mem_block_clear_allocated(struct mem_block *block) +{ + mem_btag_clear_allocated(mem_block_header_btag(block)); + mem_btag_clear_allocated(mem_block_footer_btag(block)); +} + +static void +mem_block_init(struct mem_block *block, size_t size) +{ + mem_btag_init(mem_block_header_btag(block), size); + mem_btag_init(mem_block_footer_btag(block), size); +} + +static void * +mem_block_payload(struct mem_block *block) +{ + return block->payload; +} + +static struct mem_free_node * +mem_block_get_free_node(struct mem_block *block) +{ + assert(!mem_block_allocated(block)); + return mem_block_payload(block); +} + +static bool +mem_block_inside_heap(const struct mem_block *block) +{ + void *heap_end; + + heap_end = mem_heap_end(); + return (((char *)block >= mem_heap) + && ((void *)block->payload < heap_end) + && ((void *)mem_block_end(block) <= heap_end)); +} + +static void +mem_free_list_add(struct mem_free_list *list, struct mem_block *block) +{ + struct mem_free_node *free_node; + + assert(mem_block_allocated(block)); + + mem_block_clear_allocated(block); + free_node = mem_block_get_free_node(block); + + /* + * Free blocks may be added at either the head or the tail of a list. + * In this case, it's normally better to add at the head, because the + * first-fit algorithm implementation starts from the beginning. This + * means there is a good chance that a block recently freed may "soon" + * be allocated again. Since it's likely that this block was accessed + * before it was freed, there is a good chance that (part of) its memory + * is still in the processor cache, potentially increasing the chances + * of cache hits and saving a few expensive accesses from the processor + * to memory. This is an example of inexpensive micro-optimization. + */ + list_insert_head(&list->free_nodes, &free_node->node); +} + +static void +mem_free_list_remove(struct mem_free_list *list, struct mem_block *block) +{ + struct mem_free_node *free_node; + + (void)list; + + assert(!mem_block_allocated(block)); + + free_node = mem_block_get_free_node(block); + list_remove(&free_node->node); + mem_block_set_allocated(block); +} + +static struct mem_block * +mem_free_list_find(struct mem_free_list *list, size_t size) +{ + struct mem_free_node *free_node; + struct mem_block *block; + + /* + * The algorithmic complexity of this operation is O(n) [1] which + * basically means the maximum number of steps, and time, for the + * operation to complete depend on the number of elements in the list. + * This is one of the main reasons why memory allocation is generally + * avoided in interrupt handlers and real-time applications. Special + * allocators with guaranteed constant time or a fixed and known worst + * case time, may be used in these cases. + * + * [1] https://en.wikipedia.org/wiki/Big_O_notation + */ + list_for_each_entry(&list->free_nodes, free_node, node) { + block = mem_block_from_payload(free_node); + + if (mem_block_size(block) >= size) { + return block; + } + } + + return NULL; +} + +static void +mem_free_list_init(struct mem_free_list *list) +{ + list_init(&list->free_nodes); +} + +static bool +mem_block_inside(struct mem_block *block, void *addr) +{ + return (addr >= (void *)block) && (addr < mem_block_end(block)); +} + +static bool +mem_block_overlap(struct mem_block *block1, struct mem_block *block2) +{ + return mem_block_inside(block1, block2) + || mem_block_inside(block2, block1); +} + +static struct mem_block * +mem_block_split(struct mem_block *block, size_t size) +{ + struct mem_block *block2; + size_t total_size; + + assert(mem_block_allocated(block)); + assert(mem_aligned(size)); + + if (mem_block_size(block) < (size + MEM_BLOCK_MIN_SIZE)) { + return NULL; + } + + total_size = mem_block_size(block); + mem_block_init(block, size); + block2 = mem_block_end(block); + mem_block_init(block2, total_size - size); + + return block2; +} + +static struct mem_block * +mem_block_merge(struct mem_block *block1, struct mem_block *block2) +{ + size_t size; + + assert(!mem_block_overlap(block1, block2)); + + if (mem_block_allocated(block1) || mem_block_allocated(block2)) { + return NULL; + } + + mem_free_list_remove(&mem_free_list, block1); + mem_free_list_remove(&mem_free_list, block2); + size = mem_block_size(block1) + mem_block_size(block2); + + if (block1 > block2) { + block1 = block2; + } + + mem_block_init(block1, size); + mem_free_list_add(&mem_free_list, block1); + return block1; +} + +void +mem_setup(void) +{ + struct mem_block *block; + + block = (struct mem_block *)mem_heap; + mem_block_init(block, sizeof(mem_heap)); + mem_free_list_init(&mem_free_list); + mem_free_list_add(&mem_free_list, block); + mutex_init(&mem_mutex); +} + +static size_t +mem_convert_to_block_size(size_t size) +{ + /* + * Make sure all blocks have a correctly aligned size. That, and the fact + * that the heap address is also aligned, means all block addresses are + * aligned. + */ + size = P2ROUND(size, MEM_ALIGN); + size += sizeof(struct mem_btag) * 2; + + if (size < MEM_BLOCK_MIN_SIZE) { + size = MEM_BLOCK_MIN_SIZE; + } + + return size; +} + +void * +mem_alloc(size_t size) +{ + struct mem_block *block, *block2; + void *ptr; + + if (size == 0) { + return NULL; + } + + size = mem_convert_to_block_size(size); + + mutex_lock(&mem_mutex); + + block = mem_free_list_find(&mem_free_list, size); + + if (block == NULL) { + mutex_unlock(&mem_mutex); + return NULL; + } + + mem_free_list_remove(&mem_free_list, block); + block2 = mem_block_split(block, size); + + if (block2 != NULL) { + mem_free_list_add(&mem_free_list, block2); + } + + mutex_unlock(&mem_mutex); + + ptr = mem_block_payload(block); + assert(mem_aligned((uintptr_t)ptr)); + return ptr; +} + +void +mem_free(void *ptr) +{ + struct mem_block *block, *tmp; + + if (!ptr) { + return; + } + + assert(mem_aligned((uintptr_t)ptr)); + + block = mem_block_from_payload(ptr); + assert(mem_block_inside_heap(block)); + + mutex_lock(&mem_mutex); + + mem_free_list_add(&mem_free_list, block); + + tmp = mem_block_prev(block); + + if (tmp) { + tmp = mem_block_merge(block, tmp); + + if (tmp) { + block = tmp; + } + } + + tmp = mem_block_next(block); + + if (tmp) { + mem_block_merge(block, tmp); + } + + mutex_unlock(&mem_mutex); +} |