summaryrefslogtreecommitdiff
path: root/src/mem.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/mem.c')
-rw-r--r--src/mem.c692
1 files changed, 692 insertions, 0 deletions
diff --git a/src/mem.c b/src/mem.c
new file mode 100644
index 0000000..572f2be
--- /dev/null
+++ b/src/mem.c
@@ -0,0 +1,692 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ * Copyright (c) 2017 Jerko Lenstra.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ *
+ * Algorithm
+ * ---------
+ * This allocator is based on "The Art of Computer Programming" by Donald Knuth.
+ * The algorithm itself is described in :
+ * - Volume 1 - Fundamental Algorithms
+ * - Chapter 2 – Information Structures
+ * - 2.5 Dynamic Storage
+ * - Algorithm A (First-fit method)
+ * - Algorithm C (Liberation with boundary tags)
+ *
+ * The point of a memory allocator is to manage memory in terms of allocation
+ * and liberation requests. Allocation finds and reserves memory for a user,
+ * whereas liberation makes that memory available again for future allocations.
+ * Memory is not created, it must be available from the start, and the allocator
+ * simply tracks its usage with metadata, allocated from memory itself.
+ *
+ * Data structures
+ * ---------------
+ * Here is a somewhat graphical representation of how memory is organized in
+ * this implementation :
+ *
+ * allocated block free block
+ * +------+-----------------+ +------+-----------------+
+ * | size | allocation flag | | size | allocation flag | <- header boundary
+ * +------+-----------------+ +------+-----------------+ tag
+ * | | | free list node (prev | <- payload or
+ * . payload . | and next pointers) | free list node
+ * . . +------------------------+
+ * . . . .
+ * . . . .
+ * . . . .
+ * +------+-----------------+ +------+-----------------+
+ * | size | allocation flag | | size | allocation flag | <- footer boundary
+ * +------+-----------------+ +------+-----------------+ tag
+ *
+ * Here is a view of multiple contiguous blocks :
+ *
+ * +------+-----------------+ <--+
+ * | size | allocation flag | |
+ * +------+-----------------+ +- single block
+ * | | |
+ * . payload . |
+ * . . |
+ * . . |
+ * +------+-----------------+ |
+ * | size | allocation flag | |
+ * +------+-----------------+ <--+
+ * +------+-----------------+
+ * | size | allocation flag |
+ * +------+-----------------+
+ * | |
+ * . payload .
+ * . .
+ * . .
+ * +------+-----------------+
+ * | size | allocation flag |
+ * +------+-----------------+
+ * +------+-----------------+
+ * | size | allocation flag |
+ * +------+-----------------+
+ * | |
+ * . payload .
+ * . .
+ * . .
+ * +------+-----------------+
+ * | size | allocation flag |
+ * +------+-----------------+
+ *
+ * The reason for the footer boundary tag is merging on liberation. When
+ * called, the mem_free() function is given a pointer to a payload. Since
+ * the size of a boundary tag is fixed, the address of the whole block
+ * can easily be computed. In order to reduce fragmentation, i.e. a state
+ * where all free blocks are small and prevent allocating bigger blocks,
+ * the allocator attempts to merge neighbor free blocks. Obtaining the
+ * address of the next block is easily achieved by simply adding the size
+ * of the current block, stored in the boundary tag, to the address of
+ * the block. But without a footer boundary tag, finding the address of
+ * the previous block is computationally expensive.
+ *
+ * Alignment
+ * ---------
+ * The word "aligned" and references to "alignment" in general can be
+ * found throughout the documentation of this module. Alignment is a
+ * property of a value, usually an address, to be a multiple of a size.
+ * This value is said to be "size-byte aligned", or "aligned on a size
+ * byte boundary". Common sizes include the processor word or cache line
+ * sizes. For example, the x86 architecture is 32-bits, making the word
+ * size 4 bytes. Addresses such as 0, 4, 8, 12, 512 and 516 are 4-byte
+ * aligned, whereas 1, 2, 3, 511, 513, 514 and 515 aren't.
+ *
+ *
+ * Pointers
+ * --------
+ * The code in this module makes extensive use of pointer arithmetic and
+ * conversion between pointer types. It's important to keep in mind that,
+ * in C, the void pointer is meant as a generic reference to objects of
+ * any type. As a result, any pointer can be assigned to a void pointer
+ * without explicit casting, and a void pointer may be assigned to any
+ * pointer as well.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <lib/list.h>
+#include <lib/macros.h>
+
+#include "mem.h"
+#include "mutex.h"
+
+/*
+ * Total size of the backing storage heap.
+ *
+ * Note that the resulting kernel image file remains much smaller than this.
+ * The reason is because the heap is defined as uninitialized data, which are
+ * allocated out of the bss section by default. The bss section is filled
+ * with zeroes when the kernel image is loaded by the boot loader, as
+ * mandated by the ELF specification, which means there is no need to store
+ * the heap data, or any other statically allocated uninitialized data, in
+ * the kernel image file.
+ */
+#define MEM_HEAP_SIZE (32 * 1024 * 1024)
+
+/*
+ * Alignment required on addresses returned by mem_alloc().
+ *
+ * See the description of mem_alloc() in the public header.
+ */
+#define MEM_ALIGN 4
+
+/*
+ * Minimum size of a block.
+ *
+ * When free, the payload of a block is used as storage for the free list node.
+ */
+#define MEM_BLOCK_MIN_SIZE P2ROUND(((sizeof(struct mem_btag) * 2) \
+ + sizeof(struct mem_free_node)), MEM_ALIGN)
+
+/*
+ * The heap itself must be aligned, so that the first block is also aligned.
+ * Assuming all blocks have an aligned size, the last block must also end on
+ * an aligned address.
+ *
+ * This kind of check increases safety and robustness when changing
+ * compile-time parameters such as the heap size declared above.
+ *
+ * Another relevant check would be to make sure the heap is large enough
+ * to contain at least one block, but since the minimum block size is
+ * defined using the C sizeof operator, unknown to the C preprocessor,
+ * this check requires C11 static assertions.
+ */
+#if !P2ALIGNED(MEM_HEAP_SIZE, MEM_ALIGN)
+#error "invalid heap size"
+#endif
+
+/*
+ * Masks applied on boundary tags to extract the size and the allocation flag.
+ */
+#define MEM_BTAG_ALLOCATED_MASK ((size_t)0x1)
+#define MEM_BTAG_SIZE_MASK (~MEM_BTAG_ALLOCATED_MASK)
+
+/*
+ * Boundary tag.
+ *
+ * Since the alignment constraint specified for mem_alloc() applies to
+ * payloads, not blocks, it's important that boundary tags also be aligned.
+ * This is a check that would best be performed with C11 static assertions.
+ *
+ * In addition, the alignment constraint implies that the least significant
+ * bit is always 0. Therefore, this bit is used to store the allocation flag.
+ */
+struct mem_btag {
+ size_t value;
+};
+
+/*
+ * Memory block.
+ *
+ * Note the use of a C99 flexible array member. This construction enables
+ * the implementation to directly access the payload without pointer
+ * arithmetic or casting, and is one of the preferred ways to deal with
+ * headers.
+ */
+struct mem_block {
+ struct mem_btag btag;
+ char payload[];
+};
+
+/*
+ * Free list node.
+ *
+ * This structure is used as the payload of free blocks.
+ */
+struct mem_free_node {
+ struct list node;
+};
+
+/*
+ * List of free nodes.
+ *
+ * Here is an example of a TODO entry, a method used to store and retrieve
+ * pending tasks using source code only :
+ *
+ * TODO Statistics counters.
+ */
+struct mem_free_list {
+ struct list free_nodes;
+};
+
+/*
+ * Memory heap.
+ *
+ * This allocator uses a single heap initialized with a single large free
+ * block. The allocator could be extended to support multiple heaps, each
+ * initialized with a single free block, forming an initial free list of
+ * more than one block.
+ *
+ * The heap must be correctly aligned, so that the first block is
+ * correctly aligned.
+ */
+static char mem_heap[MEM_HEAP_SIZE] __aligned(MEM_ALIGN);
+
+/*
+ * The unique free list.
+ *
+ * In order to improve allocation speed, multiple lists may be used, each
+ * for a specific size range. Such lists are called segregated free lists
+ * and enable more allocation policies, such as instant-fit.
+ */
+static struct mem_free_list mem_free_list;
+
+/*
+ * Global mutex used to serialize access to allocation data.
+ */
+static struct mutex mem_mutex;
+
+static bool
+mem_aligned(size_t value)
+{
+ return P2ALIGNED(value, MEM_ALIGN);
+}
+
+static void *
+mem_heap_end(void)
+{
+ return &mem_heap[sizeof(mem_heap)];
+}
+
+static bool
+mem_btag_allocated(const struct mem_btag *btag)
+{
+ return btag->value & MEM_BTAG_ALLOCATED_MASK;
+}
+
+static void
+mem_btag_set_allocated(struct mem_btag *btag)
+{
+ btag->value |= MEM_BTAG_ALLOCATED_MASK;
+}
+
+static void
+mem_btag_clear_allocated(struct mem_btag *btag)
+{
+ btag->value &= ~MEM_BTAG_ALLOCATED_MASK;
+}
+
+static size_t
+mem_btag_size(const struct mem_btag *btag)
+{
+ return btag->value & MEM_BTAG_SIZE_MASK;
+}
+
+static void
+mem_btag_init(struct mem_btag *btag, size_t size)
+{
+ btag->value = size;
+ mem_btag_set_allocated(btag);
+}
+
+static size_t
+mem_block_size(const struct mem_block *block)
+{
+ return mem_btag_size(&block->btag);
+}
+
+static struct mem_block *
+mem_block_from_payload(void *payload)
+{
+ size_t offset;
+
+ /*
+ * Here, payload refers to the payload member in struct mem_block, not
+ * the local variable.
+ */
+ offset = offsetof(struct mem_block, payload);
+
+ /*
+ * Always keep pointer arithmetic in mind !
+ *
+ * The rule is fairly simple : whenever arithmetic operators are used
+ * on pointers, the operation is scaled on the type size, so that e.g.
+ * adding 1 means pointing to the next element. A good way to remember
+ * this is to remember that pointers can be used as arrays, so that
+ * both these expressions are equivalent :
+ * - &ptr[1]
+ * - ptr + 1
+ *
+ * As a result, when counting in bytes and not in objects, it is
+ * necessary to cast into a suitable pointer type. The char * type
+ * is specifically meant for this as C99 mandates that sizeof(char) be 1
+ * (6.5.3.4 The sizeof operator).
+ *
+ * As a side note, a GNU extension allows using pointer arithmetic on
+ * void pointers, where the "size of void" is 1, turning pointer
+ * arithmetic on void pointers into byte arithmetic, allowing this
+ * expression to be written as :
+ *
+ * return payload - offset;
+ *
+ * See https://gcc.gnu.org/onlinedocs/gcc-6.4.0/gcc/Pointer-Arith.html
+ */
+ return (struct mem_block *)((char *)payload - offset);
+}
+
+static void *
+mem_block_end(const struct mem_block *block)
+{
+ /* See mem_block_from_payload() */
+ return (struct mem_block *)((char *)block + mem_block_size(block));
+}
+
+static struct mem_btag *
+mem_block_header_btag(struct mem_block *block)
+{
+ return &block->btag;
+}
+
+static struct mem_btag *
+mem_block_footer_btag(struct mem_block *block)
+{
+ struct mem_btag *btag;
+
+ btag = mem_block_end(block);
+
+ /*
+ * See ISO/IEC 9899:1999, 6.5.2.1 "Array subscripting" :
+ * "A postfix expression followed by an expression in square brackets []
+ * is a subscripted designation of an element of an array object. The
+ * definition of the subscript operator [] is that E1[E2] is identical to
+ * (*((E1)+(E2)))".
+ *
+ * This, by the way, implies the following equivalent expressions :
+ * - &btag[-1]
+ * - &(-1)[btag]
+ * - &*(btag - 1)
+ * - btag - 1
+ */
+ return &btag[-1];
+}
+
+static struct mem_block *
+mem_block_prev(struct mem_block *block)
+{
+ struct mem_btag *btag;
+
+ if ((char *)block == mem_heap) {
+ return NULL;
+ }
+
+ btag = mem_block_header_btag(block);
+ return (struct mem_block *)((char *)block - mem_btag_size(&btag[-1]));
+}
+
+static struct mem_block *
+mem_block_next(struct mem_block *block)
+{
+ block = mem_block_end(block);
+
+ if ((void *)block == mem_heap_end()) {
+ return NULL;
+ }
+
+ return block;
+}
+
+static bool
+mem_block_allocated(struct mem_block *block)
+{
+ return mem_btag_allocated(mem_block_header_btag(block));
+}
+
+static void
+mem_block_set_allocated(struct mem_block *block)
+{
+ mem_btag_set_allocated(mem_block_header_btag(block));
+ mem_btag_set_allocated(mem_block_footer_btag(block));
+}
+
+static void
+mem_block_clear_allocated(struct mem_block *block)
+{
+ mem_btag_clear_allocated(mem_block_header_btag(block));
+ mem_btag_clear_allocated(mem_block_footer_btag(block));
+}
+
+static void
+mem_block_init(struct mem_block *block, size_t size)
+{
+ mem_btag_init(mem_block_header_btag(block), size);
+ mem_btag_init(mem_block_footer_btag(block), size);
+}
+
+static void *
+mem_block_payload(struct mem_block *block)
+{
+ return block->payload;
+}
+
+static struct mem_free_node *
+mem_block_get_free_node(struct mem_block *block)
+{
+ assert(!mem_block_allocated(block));
+ return mem_block_payload(block);
+}
+
+static bool
+mem_block_inside_heap(const struct mem_block *block)
+{
+ void *heap_end;
+
+ heap_end = mem_heap_end();
+ return (((char *)block >= mem_heap)
+ && ((void *)block->payload < heap_end)
+ && ((void *)mem_block_end(block) <= heap_end));
+}
+
+static void
+mem_free_list_add(struct mem_free_list *list, struct mem_block *block)
+{
+ struct mem_free_node *free_node;
+
+ assert(mem_block_allocated(block));
+
+ mem_block_clear_allocated(block);
+ free_node = mem_block_get_free_node(block);
+
+ /*
+ * Free blocks may be added at either the head or the tail of a list.
+ * In this case, it's normally better to add at the head, because the
+ * first-fit algorithm implementation starts from the beginning. This
+ * means there is a good chance that a block recently freed may "soon"
+ * be allocated again. Since it's likely that this block was accessed
+ * before it was freed, there is a good chance that (part of) its memory
+ * is still in the processor cache, potentially increasing the chances
+ * of cache hits and saving a few expensive accesses from the processor
+ * to memory. This is an example of inexpensive micro-optimization.
+ */
+ list_insert_head(&list->free_nodes, &free_node->node);
+}
+
+static void
+mem_free_list_remove(struct mem_free_list *list, struct mem_block *block)
+{
+ struct mem_free_node *free_node;
+
+ (void)list;
+
+ assert(!mem_block_allocated(block));
+
+ free_node = mem_block_get_free_node(block);
+ list_remove(&free_node->node);
+ mem_block_set_allocated(block);
+}
+
+static struct mem_block *
+mem_free_list_find(struct mem_free_list *list, size_t size)
+{
+ struct mem_free_node *free_node;
+ struct mem_block *block;
+
+ /*
+ * The algorithmic complexity of this operation is O(n) [1] which
+ * basically means the maximum number of steps, and time, for the
+ * operation to complete depend on the number of elements in the list.
+ * This is one of the main reasons why memory allocation is generally
+ * avoided in interrupt handlers and real-time applications. Special
+ * allocators with guaranteed constant time or a fixed and known worst
+ * case time, may be used in these cases.
+ *
+ * [1] https://en.wikipedia.org/wiki/Big_O_notation
+ */
+ list_for_each_entry(&list->free_nodes, free_node, node) {
+ block = mem_block_from_payload(free_node);
+
+ if (mem_block_size(block) >= size) {
+ return block;
+ }
+ }
+
+ return NULL;
+}
+
+static void
+mem_free_list_init(struct mem_free_list *list)
+{
+ list_init(&list->free_nodes);
+}
+
+static bool
+mem_block_inside(struct mem_block *block, void *addr)
+{
+ return (addr >= (void *)block) && (addr < mem_block_end(block));
+}
+
+static bool
+mem_block_overlap(struct mem_block *block1, struct mem_block *block2)
+{
+ return mem_block_inside(block1, block2)
+ || mem_block_inside(block2, block1);
+}
+
+static struct mem_block *
+mem_block_split(struct mem_block *block, size_t size)
+{
+ struct mem_block *block2;
+ size_t total_size;
+
+ assert(mem_block_allocated(block));
+ assert(mem_aligned(size));
+
+ if (mem_block_size(block) < (size + MEM_BLOCK_MIN_SIZE)) {
+ return NULL;
+ }
+
+ total_size = mem_block_size(block);
+ mem_block_init(block, size);
+ block2 = mem_block_end(block);
+ mem_block_init(block2, total_size - size);
+
+ return block2;
+}
+
+static struct mem_block *
+mem_block_merge(struct mem_block *block1, struct mem_block *block2)
+{
+ size_t size;
+
+ assert(!mem_block_overlap(block1, block2));
+
+ if (mem_block_allocated(block1) || mem_block_allocated(block2)) {
+ return NULL;
+ }
+
+ mem_free_list_remove(&mem_free_list, block1);
+ mem_free_list_remove(&mem_free_list, block2);
+ size = mem_block_size(block1) + mem_block_size(block2);
+
+ if (block1 > block2) {
+ block1 = block2;
+ }
+
+ mem_block_init(block1, size);
+ mem_free_list_add(&mem_free_list, block1);
+ return block1;
+}
+
+void
+mem_setup(void)
+{
+ struct mem_block *block;
+
+ block = (struct mem_block *)mem_heap;
+ mem_block_init(block, sizeof(mem_heap));
+ mem_free_list_init(&mem_free_list);
+ mem_free_list_add(&mem_free_list, block);
+ mutex_init(&mem_mutex);
+}
+
+static size_t
+mem_convert_to_block_size(size_t size)
+{
+ /*
+ * Make sure all blocks have a correctly aligned size. That, and the fact
+ * that the heap address is also aligned, means all block addresses are
+ * aligned.
+ */
+ size = P2ROUND(size, MEM_ALIGN);
+ size += sizeof(struct mem_btag) * 2;
+
+ if (size < MEM_BLOCK_MIN_SIZE) {
+ size = MEM_BLOCK_MIN_SIZE;
+ }
+
+ return size;
+}
+
+void *
+mem_alloc(size_t size)
+{
+ struct mem_block *block, *block2;
+ void *ptr;
+
+ if (size == 0) {
+ return NULL;
+ }
+
+ size = mem_convert_to_block_size(size);
+
+ mutex_lock(&mem_mutex);
+
+ block = mem_free_list_find(&mem_free_list, size);
+
+ if (block == NULL) {
+ mutex_unlock(&mem_mutex);
+ return NULL;
+ }
+
+ mem_free_list_remove(&mem_free_list, block);
+ block2 = mem_block_split(block, size);
+
+ if (block2 != NULL) {
+ mem_free_list_add(&mem_free_list, block2);
+ }
+
+ mutex_unlock(&mem_mutex);
+
+ ptr = mem_block_payload(block);
+ assert(mem_aligned((uintptr_t)ptr));
+ return ptr;
+}
+
+void
+mem_free(void *ptr)
+{
+ struct mem_block *block, *tmp;
+
+ if (!ptr) {
+ return;
+ }
+
+ assert(mem_aligned((uintptr_t)ptr));
+
+ block = mem_block_from_payload(ptr);
+ assert(mem_block_inside_heap(block));
+
+ mutex_lock(&mem_mutex);
+
+ mem_free_list_add(&mem_free_list, block);
+
+ tmp = mem_block_prev(block);
+
+ if (tmp) {
+ tmp = mem_block_merge(block, tmp);
+
+ if (tmp) {
+ block = tmp;
+ }
+ }
+
+ tmp = mem_block_next(block);
+
+ if (tmp) {
+ mem_block_merge(block, tmp);
+ }
+
+ mutex_unlock(&mem_mutex);
+}