diff options
-rw-r--r-- | Makefrag.am | 3 | ||||
-rw-r--r-- | arch/x86/machine/pmap.h | 1 | ||||
-rw-r--r-- | kern/llsync.c | 6 | ||||
-rw-r--r-- | kern/rdxtree.c | 2 | ||||
-rw-r--r-- | kern/rdxtree.h | 10 | ||||
-rw-r--r-- | kern/sref.c | 1 | ||||
-rw-r--r-- | vm/vm_kmem.c | 89 | ||||
-rw-r--r-- | vm/vm_kmem.h | 5 | ||||
-rw-r--r-- | vm/vm_object.c | 142 | ||||
-rw-r--r-- | vm/vm_object.h | 78 | ||||
-rw-r--r-- | vm/vm_object_types.h | 36 | ||||
-rw-r--r-- | vm/vm_page.c | 23 | ||||
-rw-r--r-- | vm/vm_page.h | 79 | ||||
-rw-r--r-- | vm/vm_setup.c | 6 |
14 files changed, 448 insertions, 33 deletions
diff --git a/Makefrag.am b/Makefrag.am index 8a966b25..47b42958 100644 --- a/Makefrag.am +++ b/Makefrag.am @@ -117,6 +117,9 @@ x15_SOURCES += \ vm/vm_kmem.h \ vm/vm_map.c \ vm/vm_map.h \ + vm/vm_object.c \ + vm/vm_object.h \ + vm/vm_object_types.h \ vm/vm_page.c \ vm/vm_page.h \ vm/vm_prot.h \ diff --git a/arch/x86/machine/pmap.h b/arch/x86/machine/pmap.h index 2b23126d..84042583 100644 --- a/arch/x86/machine/pmap.h +++ b/arch/x86/machine/pmap.h @@ -16,6 +16,7 @@ * * * TODO Comment. + * TODO Rename MIN/MAX to START/END. */ #ifndef _X86_PMAP_H diff --git a/kern/llsync.c b/kern/llsync.c index e3398693..8ba87b67 100644 --- a/kern/llsync.c +++ b/kern/llsync.c @@ -102,8 +102,6 @@ llsync_setup(void) cpu_data = percpu_ptr(llsync_cpu_data, i); work_queue_init(&cpu_data->queue0); } - - llsync_is_ready = true; } static void @@ -181,6 +179,10 @@ llsync_register(void) unsigned long flags; unsigned int cpu; + if (!llsync_is_ready) { + llsync_is_ready = true; + } + cpu = cpu_id(); cpu_data = llsync_get_cpu_data(); diff --git a/kern/rdxtree.c b/kern/rdxtree.c index 9d124393..77f6a68a 100644 --- a/kern/rdxtree.c +++ b/kern/rdxtree.c @@ -900,5 +900,5 @@ rdxtree_setup(void) { kmem_cache_init(&rdxtree_node_cache, "rdxtree_node", sizeof(struct rdxtree_node), 0, - rdxtree_node_ctor, 0); + rdxtree_node_ctor, KMEM_CACHE_PAGE_ONLY); } diff --git a/kern/rdxtree.h b/kern/rdxtree.h index e3a2ba06..a30512cf 100644 --- a/kern/rdxtree.h +++ b/kern/rdxtree.h @@ -29,7 +29,9 @@ #include <stddef.h> #include <stdint.h> -typedef uint32_t rdxtree_key_t; +#include <kern/llsync.h> + +typedef uint64_t rdxtree_key_t; /* * Radix tree initialization flags. @@ -154,6 +156,12 @@ rdxtree_lookup_slot(const struct rdxtree *tree, rdxtree_key_t key) return rdxtree_lookup_common(tree, key, 1); } +static inline void * +rdxtree_load_slot(void **slot) +{ + return llsync_read_ptr(*slot); +} + /* * Replace a pointer in a tree. * diff --git a/kern/sref.c b/kern/sref.c index 7c4f14e1..7167efff 100644 --- a/kern/sref.c +++ b/kern/sref.c @@ -288,6 +288,7 @@ sref_weakref_tryget(struct sref_weakref *weakref) { uintptr_t addr, oldval, newval; + /* TODO Review */ do { addr = atomic_load(&weakref->addr, ATOMIC_RELAXED); newval = addr & SREF_WEAKREF_MASK; diff --git a/vm/vm_kmem.c b/vm/vm_kmem.c index 466019ae..da372b00 100644 --- a/vm/vm_kmem.c +++ b/vm/vm_kmem.c @@ -13,9 +13,6 @@ * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. - * - * - * TODO Rework so that pmap update errors can be handled. */ #include <assert.h> @@ -33,6 +30,7 @@ #include <vm/vm_inherit.h> #include <vm/vm_kmem.h> #include <vm/vm_map.h> +#include <vm/vm_object.h> #include <vm/vm_page.h> #include <vm/vm_prot.h> @@ -42,6 +40,24 @@ static struct vm_map kernel_map_store; struct vm_map *kernel_map __read_mostly = &kernel_map_store; +static struct vm_object vm_kmem_kernel_object; + +static uint64_t +vm_kmem_offset(uintptr_t va) +{ + assert(va >= PMAP_MIN_KMEM_ADDRESS); + return va - PMAP_MIN_KMEM_ADDRESS; +} + +void __init +vm_kmem_setup(void) +{ + uint64_t size; + + size = vm_kmem_offset(PMAP_MAX_KMEM_ADDRESS); + vm_object_init(&vm_kmem_kernel_object, size); +} + static int vm_kmem_alloc_check(size_t size) { @@ -98,6 +114,7 @@ vm_kmem_alloc(size_t size) { struct vm_page *page; uintptr_t va, start, end; + int error; size = vm_page_round(size); va = (uintptr_t)vm_kmem_alloc_va(size); @@ -110,30 +127,38 @@ vm_kmem_alloc(size_t size) page = vm_page_alloc(0, VM_PAGE_SEL_HIGHMEM, VM_PAGE_KERNEL); if (page == NULL) { - goto error_page; + goto error; } - pmap_enter(kernel_pmap, start, vm_page_to_pa(page), - VM_PROT_READ | VM_PROT_WRITE, PMAP_PEF_GLOBAL); - } + /* + * The page becomes managed by the object and is freed in case + * of failure. + */ + error = vm_object_insert(&vm_kmem_kernel_object, page, + vm_kmem_offset(start)); - pmap_update(kernel_pmap); - return (void *)va; + if (error) { + goto error; + } -error_page: - size = start - va; + error = pmap_enter(kernel_pmap, start, vm_page_to_pa(page), + VM_PROT_READ | VM_PROT_WRITE, PMAP_PEF_GLOBAL); - if (size != 0) { - pmap_update(kernel_pmap); - vm_kmem_free((void *)va, size); + if (error || (start - va == vm_page_ptoa(1000))) { + goto error; + } } - size = end - start; + error = pmap_update(kernel_pmap); - if (size != 0) { - vm_kmem_free_va((void *)start, size); + if (error) { + goto error; } + return (void *)va; + +error: + vm_kmem_free((void *)va, size); return NULL; } @@ -141,10 +166,7 @@ void vm_kmem_free(void *addr, size_t size) { const struct cpumap *cpumap; - struct vm_page *page; uintptr_t va, end; - phys_addr_t pa; - int error; va = (uintptr_t)addr; size = vm_page_round(size); @@ -152,16 +174,14 @@ vm_kmem_free(void *addr, size_t size) cpumap = cpumap_all(); while (va < end) { - error = pmap_kextract(va, &pa); - assert(!error); pmap_remove(kernel_pmap, va, cpumap); - page = vm_page_lookup(pa); - assert(page != NULL); - vm_page_free(page, 0); va += PAGE_SIZE; } pmap_update(kernel_pmap); + vm_object_remove(&vm_kmem_kernel_object, + vm_kmem_offset((uintptr_t)addr), + vm_kmem_offset(end)); vm_kmem_free_va(addr, size); } @@ -172,6 +192,7 @@ vm_kmem_map_pa(phys_addr_t pa, size_t size, uintptr_t offset, map_va; size_t map_size; phys_addr_t start; + int error; start = vm_page_trunc(pa); map_size = vm_page_round(pa + size) - start; @@ -182,11 +203,19 @@ vm_kmem_map_pa(phys_addr_t pa, size_t size, } for (offset = 0; offset < map_size; offset += PAGE_SIZE) { - pmap_enter(kernel_pmap, map_va + offset, start + offset, - VM_PROT_READ | VM_PROT_WRITE, PMAP_PEF_GLOBAL); + error = pmap_enter(kernel_pmap, map_va + offset, start + offset, + VM_PROT_READ | VM_PROT_WRITE, PMAP_PEF_GLOBAL); + + if (error) { + goto error; + } } - pmap_update(kernel_pmap); + error = pmap_update(kernel_pmap); + + if (error) { + goto error; + } if (map_vap != NULL) { *map_vap = map_va; @@ -197,6 +226,10 @@ vm_kmem_map_pa(phys_addr_t pa, size_t size, } return (void *)(map_va + (uintptr_t)(pa & PAGE_MASK)); + +error: + vm_kmem_unmap_pa(map_va, map_size); + return NULL; } void diff --git a/vm/vm_kmem.h b/vm/vm_kmem.h index 3628a690..d845c1d8 100644 --- a/vm/vm_kmem.h +++ b/vm/vm_kmem.h @@ -45,6 +45,11 @@ extern char _end; extern struct vm_map *kernel_map; /* + * Initialize the vm_kmem module. + */ +void vm_kmem_setup(void); + +/* * Allocate pure virtual kernel pages. * * The caller is reponsible for taking care of the underlying physical memory. diff --git a/vm/vm_object.c b/vm/vm_object.c new file mode 100644 index 00000000..874b26f5 --- /dev/null +++ b/vm/vm_object.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * This implementation is based on the paper "A lockless pagecache in Linux" + * by Nick Piggin. It allows looking up pages without contention on VM objects. + */ + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> + +#include <kern/init.h> +#include <kern/llsync.h> +#include <kern/mutex.h> +#include <kern/rdxtree.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <machine/page.h> + +void __init +vm_object_setup(void) +{ +} + +void __init +vm_object_init(struct vm_object *object, uint64_t size) +{ + assert(vm_page_aligned(size)); + + mutex_init(&object->lock); + rdxtree_init(&object->pages, 0); + object->size = size; + object->nr_pages = 0; +} + +int +vm_object_insert(struct vm_object *object, struct vm_page *page, + uint64_t offset) +{ + int error; + + assert(vm_page_aligned(offset)); + + /* + * The page may have no references. Add one before publishing + * so that concurrent lookups succeed. + */ + vm_page_ref(page); + + mutex_lock(&object->lock); + + if (offset >= object->size) { + error = ERROR_INVAL; + goto error; + } + + error = rdxtree_insert(&object->pages, vm_page_atop(offset), page); + + if (error) { + goto error; + } + + vm_page_link(page, object, offset); + object->nr_pages++; + assert(object->nr_pages != 0); + + mutex_unlock(&object->lock); + + return 0; + +error: + mutex_unlock(&object->lock); + + vm_page_unref(page); + + return error; +} + +void +vm_object_remove(struct vm_object *object, uint64_t start, uint64_t end) +{ + struct vm_page *page; + uint64_t offset; + + assert(vm_page_aligned(start)); + assert(vm_page_aligned(end)); + assert(start <= end); + + mutex_lock(&object->lock); + + for (offset = start; offset < end; offset += PAGE_SIZE) { + page = rdxtree_remove(&object->pages, vm_page_atop(offset)); + + if (page == NULL) { + continue; + } + + vm_page_unlink(page); + vm_page_unref(page); + assert(object->nr_pages != 0); + object->nr_pages--; + } + + mutex_unlock(&object->lock); +} + +struct vm_page * +vm_object_lookup(struct vm_object *object, uint64_t offset) +{ + struct vm_page *page; + int error; + + llsync_read_enter(); + + do { + page = rdxtree_lookup(&object->pages, vm_page_atop(offset)); + + if (page == NULL) { + break; + } + + error = vm_page_tryref(page); + } while (error); + + llsync_read_exit(); + + return page; +} diff --git a/vm/vm_object.h b/vm/vm_object.h new file mode 100644 index 00000000..046937f2 --- /dev/null +++ b/vm/vm_object.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Virtual memory object. + * + * The purpose of VM objects is to track pages that are resident in + * physical memory. They collectively form the page cache. + */ + +#ifndef _VM_OBJECT_H +#define _VM_OBJECT_H + +#include <stdint.h> + +#include <kern/rdxtree.h> +#include <vm/vm_object_types.h> +#include <vm/vm_page.h> + +struct vm_object; + +/* + * Initialize the vm_object module. + */ +void vm_object_setup(void); + +/* + * Initialize a VM object. + */ +void vm_object_init(struct vm_object *object, uint64_t size); + +/* + * Insert a page into a VM object. + * + * The offset must be page-aligned. + * + * The page becomes managed, and gains a reference. If successful, + * the reference is kept. Otherwise it's dropped. If the page had + * no references on entry, and a failure occurs, the page is freed. + */ +int vm_object_insert(struct vm_object *object, struct vm_page *page, + uint64_t offset); + +/* + * Remove pages from a VM object. + * + * The range boundaries must be page-aligned. + * + * Holes in the given range are silently skipped. Pages that are removed + * become unmanaged and lose a reference. + */ +void vm_object_remove(struct vm_object *object, uint64_t start, uint64_t end); + +/* + * Look up a page in a VM object. + * + * The offset must be page-aligned. + * + * If successful, the returned page gains a reference. Note that, if a valid + * page is returned, it may already have been removed from the object, or + * moved at a different offset. + */ +struct vm_page * vm_object_lookup(struct vm_object *object, uint64_t offset); + +#endif /* _VM_OBJECT_H */ diff --git a/vm/vm_object_types.h b/vm/vm_object_types.h new file mode 100644 index 00000000..4026002e --- /dev/null +++ b/vm/vm_object_types.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2017 Richard Braun. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * + * Isolated type definition used to avoid inclusion circular dependencies. + */ + +#ifndef _VM_OBJECT_TYPES_H +#define _VM_OBJECT_TYPES_H + +#include <stdint.h> + +#include <kern/mutex.h> +#include <kern/rdxtree.h> + +struct vm_object { + struct mutex lock; + struct rdxtree pages; + uint64_t size; + unsigned long nr_pages; +}; + +#endif /* _VM_OBJECT_TYPES_H */ diff --git a/vm/vm_page.c b/vm/vm_page.c index 00674499..950d04a1 100644 --- a/vm/vm_page.c +++ b/vm/vm_page.c @@ -167,6 +167,10 @@ vm_page_init(struct vm_page *page, unsigned short zone_index, phys_addr_t pa) page->zone_index = zone_index; page->order = VM_PAGE_ORDER_UNLISTED; page->phys_addr = pa; + + page->nr_refs = 0; + + page->object = NULL; } void @@ -740,6 +744,7 @@ vm_page_setup(void) SHELL_REGISTER_CMDS(vm_page_shell_cmds); } +/* TODO Rename to avoid confusion with "managed pages" */ void __init vm_page_manage(struct vm_page *page) { @@ -767,6 +772,22 @@ vm_page_lookup(phys_addr_t pa) return NULL; } +static bool +vm_page_block_referenced(const struct vm_page *page, unsigned int order) +{ + unsigned int i, nr_pages; + + nr_pages = 1 << order; + + for (i = 0; i < nr_pages; i++) { + if (vm_page_referenced(&page[i])) { + return true; + } + } + + return false; +} + struct vm_page * vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type) { @@ -777,6 +798,7 @@ vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type) page = vm_page_zone_alloc(&vm_page_zones[i], order, type); if (page != NULL) { + assert(!vm_page_block_referenced(page, order)); return page; } } @@ -788,6 +810,7 @@ void vm_page_free(struct vm_page *page, unsigned int order) { assert(page->zone_index < ARRAY_SIZE(vm_page_zones)); + assert(!vm_page_block_referenced(page, order)); vm_page_zone_free(&vm_page_zones[page->zone_index], page, order); } diff --git a/vm/vm_page.h b/vm/vm_page.h index 026c1180..ea267db8 100644 --- a/vm/vm_page.h +++ b/vm/vm_page.h @@ -16,15 +16,20 @@ * * * Physical page management. + * + * A page is said to be managed if it's linked to a VM object, in which + * case there is at least one reference to it. */ #ifndef _VM_VM_PAGE_H #define _VM_VM_PAGE_H #include <assert.h> +#include <stdbool.h> #include <stddef.h> #include <stdint.h> +#include <kern/atomic.h> #include <kern/list.h> #include <kern/log2.h> #include <kern/macros.h> @@ -32,11 +37,14 @@ #include <machine/pmap.h> #include <machine/pmem.h> #include <machine/types.h> +#include <vm/vm_object_types.h> /* * Address/page conversion and rounding macros (not inline functions to * be easily usable on both virtual and physical addresses, which may not * have the same type size). + * + * TODO Rename btop and ptob. */ #define vm_page_atop(addr) ((addr) >> PAGE_SHIFT) #define vm_page_ptoa(page) ((page) << PAGE_SHIFT) @@ -80,6 +88,12 @@ struct vm_page { unsigned short order; phys_addr_t phys_addr; void *priv; + + unsigned int nr_refs; + + /* VM object back reference */ + struct vm_object *object; + uint64_t offset; }; static inline unsigned short @@ -139,6 +153,21 @@ vm_page_get_priv(const struct vm_page *page) return page->priv; } +static inline void +vm_page_link(struct vm_page *page, struct vm_object *object, uint64_t offset) +{ + assert(object != NULL); + page->object = object; + page->offset = offset; +} + +static inline void +vm_page_unlink(struct vm_page *page) +{ + assert(page->object != NULL); + page->object = NULL; +} + /* * Load physical memory into the vm_page module at boot time. * @@ -193,12 +222,16 @@ struct vm_page * vm_page_lookup(phys_addr_t pa); * * The selector is used to determine the zones from which allocation can * be attempted. + * + * If successful, the returned pages have no references. */ struct vm_page * vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type); /* * Release a block of 2^order physical pages. + * + * The pages must have no references. */ void vm_page_free(struct vm_page *page, unsigned int order); @@ -212,4 +245,50 @@ const char * vm_page_zone_name(unsigned int zone_index); */ void vm_page_log_info(void); +static inline bool +vm_page_referenced(const struct vm_page *page) +{ + return atomic_load(&page->nr_refs, ATOMIC_RELAXED) != 0; +} + +static inline void +vm_page_ref(struct vm_page *page) +{ + unsigned int nr_refs; + + nr_refs = atomic_fetch_add(&page->nr_refs, 1, ATOMIC_RELAXED); + assert(nr_refs != (unsigned int)-1); +} + +static inline void +vm_page_unref(struct vm_page *page) +{ + unsigned int nr_refs; + + nr_refs = atomic_fetch_sub_acq_rel(&page->nr_refs, 1); + assert(nr_refs != 0); + + if (nr_refs == 1) { + vm_page_free(page, 0); + } +} + +static inline int +vm_page_tryref(struct vm_page *page) +{ + unsigned int nr_refs, prev; + + do { + nr_refs = atomic_load(&page->nr_refs, ATOMIC_RELAXED); + + if (nr_refs == 0) { + return ERROR_AGAIN; + } + + prev = atomic_cas_acquire(&page->nr_refs, nr_refs, nr_refs + 1); + } while (prev != nr_refs); + + return 0; +} + #endif /* _VM_VM_PAGE_H */ diff --git a/vm/vm_setup.c b/vm/vm_setup.c index 0957815f..9fb9c52d 100644 --- a/vm/vm_setup.c +++ b/vm/vm_setup.c @@ -20,7 +20,9 @@ #include <kern/rdxtree.h> #include <kern/percpu.h> #include <machine/pmap.h> +#include <vm/vm_kmem.h> #include <vm/vm_map.h> +#include <vm/vm_object.h> #include <vm/vm_page.h> #include <vm/vm_setup.h> @@ -29,8 +31,10 @@ vm_setup(void) { vm_page_setup(); kmem_setup(); - vm_map_setup(); rdxtree_setup(); + vm_object_setup(); + vm_map_setup(); + vm_kmem_setup(); pmap_setup(); percpu_setup(); } |