diff options
Diffstat (limited to 'vm/vm_phys.c')
-rw-r--r-- | vm/vm_phys.c | 625 |
1 files changed, 625 insertions, 0 deletions
diff --git a/vm/vm_phys.c b/vm/vm_phys.c new file mode 100644 index 00000000..3e8a70f0 --- /dev/null +++ b/vm/vm_phys.c @@ -0,0 +1,625 @@ +/* + * Copyright (c) 2010, 2011, 2012 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This implementation uses the binary buddy system to manage its heap. + * Descriptions of the buddy system can be found in the following works : + * - "UNIX Internals: The New Frontiers", by Uresh Vahalia. + * - "Dynamic Storage Allocation: A Survey and Critical Review", + * by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles. + * + * In addition, this allocator uses per-cpu pools of pages for order 0 + * (i.e. single page) allocations. These pools act as caches (but are named + * differently to avoid confusion with CPU caches) that reduce contention on + * multiprocessor systems. When a pool is empty and cannot provide a page, + * it is filled by transferring multiple pages from the backend buddy system. + * The symmetric case is handled likewise. + */ + +#include <kern/init.h> +#include <kern/panic.h> +#include <kern/param.h> +#include <kern/printk.h> +#include <kern/types.h> +#include <lib/assert.h> +#include <lib/list.h> +#include <lib/macros.h> +#include <lib/sprintf.h> +#include <lib/stddef.h> +#include <lib/string.h> +#include <machine/cpu.h> +#include <vm/vm_kmem.h> +#include <vm/vm_page.h> +#include <vm/vm_phys.h> + +/* + * Number of free block lists per segment. + */ +#define VM_PHYS_NR_FREE_LISTS 11 + +/* + * The size of a CPU pool is computed by dividing the number of pages in its + * containing segment by this value. + */ +#define VM_PHYS_CPU_POOL_RATIO 1024 + +/* + * Maximum number of pages in a CPU pool. + */ +#define VM_PHYS_CPU_POOL_MAX_SIZE 128 + +/* + * The transfer size of a CPU pool is computed by dividing the pool size by + * this value. + */ +#define VM_PHYS_CPU_POOL_TRANSFER_RATIO 2 + +/* + * Per-processor cache of pages. + */ +struct vm_phys_cpu_pool { + /* struct mutex mutex; */ + int size; + int transfer_size; + int nr_pages; + struct list pages; +}; + +/* + * Special order value. + * + * When a page is free, its order is the index of its free list. + */ +#define VM_PHYS_ORDER_ALLOCATED VM_PHYS_NR_FREE_LISTS + +/* + * Doubly-linked list of free blocks. + */ +struct vm_phys_free_list { + unsigned long size; + struct list blocks; +}; + +/* + * Segment name buffer size. + */ +#define VM_PHYS_NAME_SIZE 16 + +/* + * Segment of contiguous memory. + */ +struct vm_phys_seg { + struct vm_phys_cpu_pool cpu_pools[MAX_CPUS]; + + struct list node; + vm_phys_t start; + vm_phys_t end; + struct vm_page *pages; + struct vm_page *pages_end; + /* struct mutex mutex; */ + struct vm_phys_free_list free_lists[VM_PHYS_NR_FREE_LISTS]; + unsigned long nr_free_pages; + char name[VM_PHYS_NAME_SIZE]; +}; + +/* + * Bootstrap information about a segment. + */ +struct vm_phys_boot_seg { + vm_phys_t avail_start; + vm_phys_t avail_end; +}; + +int vm_phys_ready; + +/* + * Segment lists, ordered by priority (higher priority lists have lower + * numerical priorities). + */ +static struct list vm_phys_seg_lists[VM_NR_PHYS_SEGLIST]; + +/* + * Segment table. + */ +static struct vm_phys_seg vm_phys_segs[VM_MAX_PHYS_SEG]; + +/* + * Bootstrap segment table. + */ +static struct vm_phys_boot_seg vm_phys_boot_segs[VM_MAX_PHYS_SEG] __initdata; + +/* + * Number of loaded segments. + */ +static unsigned int vm_phys_segs_size; + +static int vm_phys_load_initialized __initdata = 0; + +static void __init +vm_phys_init_page(struct vm_page *page, unsigned short seg_index, + unsigned short order, vm_phys_t pa) +{ + page->seg_index = seg_index; + page->order = order; + page->phys_addr = pa; +} + +static void __init +vm_phys_free_list_init(struct vm_phys_free_list *free_list) +{ + free_list->size = 0; + list_init(&free_list->blocks); +} + +static inline void +vm_phys_free_list_insert(struct vm_phys_free_list *free_list, + struct vm_page *page) +{ + assert(page->order == VM_PHYS_ORDER_ALLOCATED); + + free_list->size++; + list_insert(&free_list->blocks, &page->node); +} + +static inline void +vm_phys_free_list_remove(struct vm_phys_free_list *free_list, + struct vm_page *page) +{ + assert(free_list->size != 0); + assert(!list_empty(&free_list->blocks)); + assert(page->order < VM_PHYS_NR_FREE_LISTS); + + free_list->size--; + list_remove(&page->node); +} + +static struct vm_page * +vm_phys_seg_alloc_from_buddy(struct vm_phys_seg *seg, unsigned int order) +{ + struct vm_phys_free_list *free_list; + struct vm_page *page, *buddy; + unsigned int i; + + assert(order < VM_PHYS_NR_FREE_LISTS); + + for (i = order; i < VM_PHYS_NR_FREE_LISTS; i++) { + free_list = &seg->free_lists[i]; + + if (free_list->size != 0) + break; + } + + if (i == VM_PHYS_NR_FREE_LISTS) + return NULL; + + page = list_first_entry(&free_list->blocks, struct vm_page, node); + vm_phys_free_list_remove(free_list, page); + page->order = VM_PHYS_ORDER_ALLOCATED; + + while (i > order) { + i--; + buddy = &page[1 << i]; + vm_phys_free_list_insert(&seg->free_lists[i], buddy); + buddy->order = i; + } + + seg->nr_free_pages -= (1 << order); + return page; +} + +static void +vm_phys_seg_free_to_buddy(struct vm_phys_seg *seg, struct vm_page *page, + unsigned int order) +{ + struct vm_page *buddy; + vm_phys_t pa, buddy_pa; + unsigned int nr_pages; + + assert(page >= seg->pages); + assert(page < seg->pages_end); + assert(page->order == VM_PHYS_ORDER_ALLOCATED); + assert(order < VM_PHYS_NR_FREE_LISTS); + + nr_pages = (1 << order); + pa = page->phys_addr; + + while (order < (VM_PHYS_NR_FREE_LISTS - 1)) { + buddy_pa = pa ^ vm_page_ptoa(1 << order); + + if ((buddy_pa < seg->start) || (buddy_pa >= seg->end)) + break; + + buddy = &seg->pages[vm_page_atop(buddy_pa - seg->start)]; + + if (buddy->order != order) + break; + + vm_phys_free_list_remove(&seg->free_lists[order], buddy); + buddy->order = VM_PHYS_ORDER_ALLOCATED; + order++; + pa &= -vm_page_ptoa(1 << order); + page = &seg->pages[vm_page_atop(pa - seg->start)]; + } + + vm_phys_free_list_insert(&seg->free_lists[order], page); + page->order = order; + seg->nr_free_pages += nr_pages; +} + +static void __init +vm_phys_cpu_pool_init(struct vm_phys_cpu_pool *cpu_pool, int size) +{ + cpu_pool->size = size; + cpu_pool->transfer_size = (size + VM_PHYS_CPU_POOL_TRANSFER_RATIO - 1) + / VM_PHYS_CPU_POOL_TRANSFER_RATIO; + cpu_pool->nr_pages = 0; + list_init(&cpu_pool->pages); +} + +static inline struct vm_phys_cpu_pool * +vm_phys_cpu_pool_get(struct vm_phys_seg *seg) +{ + return &seg->cpu_pools[cpu_id()]; +} + +static inline struct vm_page * +vm_phys_cpu_pool_pop(struct vm_phys_cpu_pool *cpu_pool) +{ + struct vm_page *page; + + assert(cpu_pool->nr_pages != 0); + cpu_pool->nr_pages--; + page = list_first_entry(&cpu_pool->pages, struct vm_page, node); + list_remove(&page->node); + return page; +} + +static inline void +vm_phys_cpu_pool_push(struct vm_phys_cpu_pool *cpu_pool, struct vm_page *page) +{ + assert(cpu_pool->nr_pages < cpu_pool->size); + cpu_pool->nr_pages++; + list_insert(&cpu_pool->pages, &page->node); +} + +static int +vm_phys_cpu_pool_fill(struct vm_phys_cpu_pool *cpu_pool, + struct vm_phys_seg *seg) +{ + struct vm_page *page; + int i; + + assert(cpu_pool->nr_pages == 0); + + /* mutex_lock(&seg->mutex); */ + + for (i = 0; i < cpu_pool->transfer_size; i++) { + page = vm_phys_seg_alloc_from_buddy(seg, 0); + + if (page == NULL) + break; + + vm_phys_cpu_pool_push(cpu_pool, page); + } + + /* mutex_unlock(&seg->mutex); */ + + return i; +} + +static void +vm_phys_cpu_pool_drain(struct vm_phys_cpu_pool *cpu_pool, + struct vm_phys_seg *seg) +{ + struct vm_page *page; + int i; + + assert(cpu_pool->nr_pages == cpu_pool->size); + + /* mutex_lock(&seg->mutex); */ + + for (i = cpu_pool->transfer_size; i > 0; i--) { + page = vm_phys_cpu_pool_pop(cpu_pool); + vm_phys_seg_free_to_buddy(seg, page, 0); + } + + /* mutex_unlock(&seg->mutex); */ +} + +static inline vm_phys_t __init +vm_phys_seg_size(struct vm_phys_seg *seg) +{ + return seg->end - seg->start; +} + +static int __init +vm_phys_seg_compute_pool_size(struct vm_phys_seg *seg) +{ + vm_phys_t size; + + size = vm_page_atop(vm_phys_seg_size(seg)) / VM_PHYS_CPU_POOL_RATIO; + + if (size == 0) + size = 1; + else if (size > VM_PHYS_CPU_POOL_MAX_SIZE) + size = VM_PHYS_CPU_POOL_MAX_SIZE; + + return size; +} + +static void __init +vm_phys_seg_init(struct vm_phys_seg *seg, struct vm_page *pages) +{ + vm_phys_t pa; + int pool_size; + unsigned int i; + + pool_size = vm_phys_seg_compute_pool_size(seg); + + for (i = 0; i < ARRAY_SIZE(seg->cpu_pools); i++) + vm_phys_cpu_pool_init(&seg->cpu_pools[i], pool_size); + + seg->pages = pages; + seg->pages_end = pages + vm_page_atop(vm_phys_seg_size(seg)); + /* mutex_init(&seg->mutex); */ + + for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++) + vm_phys_free_list_init(&seg->free_lists[i]); + + seg->nr_free_pages = 0; + i = seg - vm_phys_segs; + + for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE) + vm_phys_init_page(&pages[vm_page_atop(pa - seg->start)], i, + VM_PHYS_ORDER_ALLOCATED, pa); +} + +static struct vm_page * +vm_phys_seg_alloc(struct vm_phys_seg *seg, unsigned int order) +{ + struct vm_phys_cpu_pool *cpu_pool; + struct vm_page *page; + int filled; + + assert(order < VM_PHYS_NR_FREE_LISTS); + + if (order == 0) { + cpu_pool = vm_phys_cpu_pool_get(seg); + + /* mutex_lock(&cpu_pool->mutex); */ + + if (cpu_pool->nr_pages == 0) { + filled = vm_phys_cpu_pool_fill(cpu_pool, seg); + + if (!filled) { + /* mutex_unlock(&cpu_pool->mutex); */ + return NULL; + } + } + + page = vm_phys_cpu_pool_pop(cpu_pool); + /* mutex_unlock(&cpu_pool->mutex); */ + } else { + /* mutex_lock(&seg->mutex); */ + page = vm_phys_seg_alloc_from_buddy(seg, order); + /* mutex_unlock(&seg->mutex); */ + } + + return page; +} + +static void +vm_phys_seg_free(struct vm_phys_seg *seg, struct vm_page *page, + unsigned int order) +{ + struct vm_phys_cpu_pool *cpu_pool; + + assert(order < VM_PHYS_NR_FREE_LISTS); + + if (order == 0) { + cpu_pool = vm_phys_cpu_pool_get(seg); + + /* mutex_lock(&cpu_pool->mutex); */ + + if (cpu_pool->nr_pages == cpu_pool->size) + vm_phys_cpu_pool_drain(cpu_pool, seg); + + vm_phys_cpu_pool_push(cpu_pool, page); + /* mutex_unlock(&cpu_pool->mutex); */ + } else { + /* mutex_lock(&seg->mutex); */ + vm_phys_seg_free_to_buddy(seg, page, order); + /* mutex_unlock(&seg->mutex); */ + } +} + +void __init +vm_phys_load(const char *name, vm_phys_t start, vm_phys_t end, + vm_phys_t avail_start, vm_phys_t avail_end, + unsigned int seglist_prio) +{ + struct vm_phys_boot_seg *boot_seg; + struct vm_phys_seg *seg; + struct list *seg_list; + unsigned int i; + + assert(name != NULL); + assert(start < end); + assert(seglist_prio < ARRAY_SIZE(vm_phys_seg_lists)); + + if (!vm_phys_load_initialized) { + for (i = 0; i < ARRAY_SIZE(vm_phys_seg_lists); i++) + list_init(&vm_phys_seg_lists[i]); + + vm_phys_segs_size = 0; + vm_phys_load_initialized = 1; + } + + if (vm_phys_segs_size >= ARRAY_SIZE(vm_phys_segs)) + panic("vm_phys: too many physical segments"); + + seg_list = &vm_phys_seg_lists[seglist_prio]; + seg = &vm_phys_segs[vm_phys_segs_size]; + boot_seg = &vm_phys_boot_segs[vm_phys_segs_size]; + + list_insert_tail(seg_list, &seg->node); + seg->start = start; + seg->end = end; + strcpy(seg->name, name); /* TODO: strlcpy */ + boot_seg->avail_start = avail_start; + boot_seg->avail_end = avail_end; + + vm_phys_segs_size++; +} + +vm_phys_t __init +vm_phys_bootalloc(void) +{ + struct vm_phys_boot_seg *boot_seg; + struct vm_phys_seg *seg; + struct list *seg_list; + vm_phys_t pa; + + for (seg_list = &vm_phys_seg_lists[ARRAY_SIZE(vm_phys_seg_lists) - 1]; + seg_list >= vm_phys_seg_lists; + seg_list--) + list_for_each_entry(seg_list, seg, node) { + boot_seg = &vm_phys_boot_segs[seg - vm_phys_segs]; + + if ((boot_seg->avail_end - boot_seg->avail_start) > 1) { + pa = boot_seg->avail_start; + boot_seg->avail_start += PAGE_SIZE; + return pa; + } + } + + panic("vm_phys: no physical memory available"); +} + +void __init +vm_phys_setup(void) +{ + struct vm_phys_boot_seg *boot_seg; + struct vm_phys_seg *seg; + struct vm_page *map, *start, *end; + size_t pages, map_size; + unsigned int i; + + /* + * Compute the memory map size. + */ + pages = 0; + + for (i = 0; i < vm_phys_segs_size; i++) + pages += vm_page_atop(vm_phys_seg_size(&vm_phys_segs[i])); + + map_size = P2ROUND(pages * sizeof(struct vm_page), PAGE_SIZE); + printk("vm_phys: page table size: %u entries (%uk)\n", pages, + map_size >> 10); + map = (struct vm_page *)vm_kmem_bootalloc(map_size); + + /* + * Initialize the segments, associating them to the memory map. When + * the segments are initialized, all their pages are set allocated, + * with a block size of one (order 0). They are then released, which + * populates the free lists. + */ + for (i = 0; i < vm_phys_segs_size; i++) { + seg = &vm_phys_segs[i]; + boot_seg = &vm_phys_boot_segs[i]; + vm_phys_seg_init(seg, map); + + start = seg->pages + vm_page_atop(boot_seg->avail_start - seg->start); + end = seg->pages + vm_page_atop(boot_seg->avail_end - seg->start); + + while (start < end) { + vm_phys_seg_free_to_buddy(seg, start, 0); + start++; + } + + map += vm_page_atop(vm_phys_seg_size(seg)); + } + + vm_phys_ready = 1; +} + +void __init +vm_phys_manage(struct vm_page *page) +{ + assert(page->seg_index < ARRAY_SIZE(vm_phys_segs)); + + vm_phys_seg_free_to_buddy(&vm_phys_segs[page->seg_index], page, 0); +} + +struct vm_page * +vm_phys_lookup_page(vm_phys_t pa) +{ + struct vm_phys_seg *seg; + unsigned int i; + + for (i = 0; i < vm_phys_segs_size; i++) { + seg = &vm_phys_segs[i]; + + if ((pa >= seg->start) && (pa < seg->end)) + return &seg->pages[vm_page_atop(pa - seg->start)]; + } + + return NULL; +} + +struct vm_page * +vm_phys_alloc(unsigned int order) +{ + struct vm_phys_seg *seg; + struct list *seg_list; + struct vm_page *page; + + for (seg_list = &vm_phys_seg_lists[ARRAY_SIZE(vm_phys_seg_lists) - 1]; + seg_list >= vm_phys_seg_lists; + seg_list--) + list_for_each_entry(seg_list, seg, node) { + page = vm_phys_seg_alloc(seg, order); + + if (page != NULL) + return page; + } + + return NULL; +} + +void +vm_phys_free(struct vm_page *page, unsigned int order) +{ + assert(page->seg_index < ARRAY_SIZE(vm_phys_segs)); + + vm_phys_seg_free(&vm_phys_segs[page->seg_index], page, order); +} + +void +vm_phys_info(void) +{ + struct vm_phys_seg *seg; + unsigned long pages; + unsigned int i; + + for (i = 0; i < vm_phys_segs_size; i++) { + seg = &vm_phys_segs[i]; + pages = (unsigned long)(seg->pages_end - seg->pages); + printk("vm_phys: %s: pages: %lu (%luM), free: %lu (%luM)\n", seg->name, + pages, pages >> (20 - PAGE_SHIFT), seg->nr_free_pages, + seg->nr_free_pages >> (20 - PAGE_SHIFT)); + } +} |