summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefrag.am3
-rw-r--r--arch/x86/machine/pmap.h1
-rw-r--r--kern/llsync.c6
-rw-r--r--kern/rdxtree.c2
-rw-r--r--kern/rdxtree.h10
-rw-r--r--kern/sref.c1
-rw-r--r--vm/vm_kmem.c89
-rw-r--r--vm/vm_kmem.h5
-rw-r--r--vm/vm_object.c142
-rw-r--r--vm/vm_object.h78
-rw-r--r--vm/vm_object_types.h36
-rw-r--r--vm/vm_page.c23
-rw-r--r--vm/vm_page.h79
-rw-r--r--vm/vm_setup.c6
14 files changed, 448 insertions, 33 deletions
diff --git a/Makefrag.am b/Makefrag.am
index 8a966b25..47b42958 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -117,6 +117,9 @@ x15_SOURCES += \
vm/vm_kmem.h \
vm/vm_map.c \
vm/vm_map.h \
+ vm/vm_object.c \
+ vm/vm_object.h \
+ vm/vm_object_types.h \
vm/vm_page.c \
vm/vm_page.h \
vm/vm_prot.h \
diff --git a/arch/x86/machine/pmap.h b/arch/x86/machine/pmap.h
index 2b23126d..84042583 100644
--- a/arch/x86/machine/pmap.h
+++ b/arch/x86/machine/pmap.h
@@ -16,6 +16,7 @@
*
*
* TODO Comment.
+ * TODO Rename MIN/MAX to START/END.
*/
#ifndef _X86_PMAP_H
diff --git a/kern/llsync.c b/kern/llsync.c
index e3398693..8ba87b67 100644
--- a/kern/llsync.c
+++ b/kern/llsync.c
@@ -102,8 +102,6 @@ llsync_setup(void)
cpu_data = percpu_ptr(llsync_cpu_data, i);
work_queue_init(&cpu_data->queue0);
}
-
- llsync_is_ready = true;
}
static void
@@ -181,6 +179,10 @@ llsync_register(void)
unsigned long flags;
unsigned int cpu;
+ if (!llsync_is_ready) {
+ llsync_is_ready = true;
+ }
+
cpu = cpu_id();
cpu_data = llsync_get_cpu_data();
diff --git a/kern/rdxtree.c b/kern/rdxtree.c
index 9d124393..77f6a68a 100644
--- a/kern/rdxtree.c
+++ b/kern/rdxtree.c
@@ -900,5 +900,5 @@ rdxtree_setup(void)
{
kmem_cache_init(&rdxtree_node_cache, "rdxtree_node",
sizeof(struct rdxtree_node), 0,
- rdxtree_node_ctor, 0);
+ rdxtree_node_ctor, KMEM_CACHE_PAGE_ONLY);
}
diff --git a/kern/rdxtree.h b/kern/rdxtree.h
index e3a2ba06..a30512cf 100644
--- a/kern/rdxtree.h
+++ b/kern/rdxtree.h
@@ -29,7 +29,9 @@
#include <stddef.h>
#include <stdint.h>
-typedef uint32_t rdxtree_key_t;
+#include <kern/llsync.h>
+
+typedef uint64_t rdxtree_key_t;
/*
* Radix tree initialization flags.
@@ -154,6 +156,12 @@ rdxtree_lookup_slot(const struct rdxtree *tree, rdxtree_key_t key)
return rdxtree_lookup_common(tree, key, 1);
}
+static inline void *
+rdxtree_load_slot(void **slot)
+{
+ return llsync_read_ptr(*slot);
+}
+
/*
* Replace a pointer in a tree.
*
diff --git a/kern/sref.c b/kern/sref.c
index 7c4f14e1..7167efff 100644
--- a/kern/sref.c
+++ b/kern/sref.c
@@ -288,6 +288,7 @@ sref_weakref_tryget(struct sref_weakref *weakref)
{
uintptr_t addr, oldval, newval;
+ /* TODO Review */
do {
addr = atomic_load(&weakref->addr, ATOMIC_RELAXED);
newval = addr & SREF_WEAKREF_MASK;
diff --git a/vm/vm_kmem.c b/vm/vm_kmem.c
index 466019ae..da372b00 100644
--- a/vm/vm_kmem.c
+++ b/vm/vm_kmem.c
@@ -13,9 +13,6 @@
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- *
- * TODO Rework so that pmap update errors can be handled.
*/
#include <assert.h>
@@ -33,6 +30,7 @@
#include <vm/vm_inherit.h>
#include <vm/vm_kmem.h>
#include <vm/vm_map.h>
+#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_prot.h>
@@ -42,6 +40,24 @@
static struct vm_map kernel_map_store;
struct vm_map *kernel_map __read_mostly = &kernel_map_store;
+static struct vm_object vm_kmem_kernel_object;
+
+static uint64_t
+vm_kmem_offset(uintptr_t va)
+{
+ assert(va >= PMAP_MIN_KMEM_ADDRESS);
+ return va - PMAP_MIN_KMEM_ADDRESS;
+}
+
+void __init
+vm_kmem_setup(void)
+{
+ uint64_t size;
+
+ size = vm_kmem_offset(PMAP_MAX_KMEM_ADDRESS);
+ vm_object_init(&vm_kmem_kernel_object, size);
+}
+
static int
vm_kmem_alloc_check(size_t size)
{
@@ -98,6 +114,7 @@ vm_kmem_alloc(size_t size)
{
struct vm_page *page;
uintptr_t va, start, end;
+ int error;
size = vm_page_round(size);
va = (uintptr_t)vm_kmem_alloc_va(size);
@@ -110,30 +127,38 @@ vm_kmem_alloc(size_t size)
page = vm_page_alloc(0, VM_PAGE_SEL_HIGHMEM, VM_PAGE_KERNEL);
if (page == NULL) {
- goto error_page;
+ goto error;
}
- pmap_enter(kernel_pmap, start, vm_page_to_pa(page),
- VM_PROT_READ | VM_PROT_WRITE, PMAP_PEF_GLOBAL);
- }
+ /*
+ * The page becomes managed by the object and is freed in case
+ * of failure.
+ */
+ error = vm_object_insert(&vm_kmem_kernel_object, page,
+ vm_kmem_offset(start));
- pmap_update(kernel_pmap);
- return (void *)va;
+ if (error) {
+ goto error;
+ }
-error_page:
- size = start - va;
+ error = pmap_enter(kernel_pmap, start, vm_page_to_pa(page),
+ VM_PROT_READ | VM_PROT_WRITE, PMAP_PEF_GLOBAL);
- if (size != 0) {
- pmap_update(kernel_pmap);
- vm_kmem_free((void *)va, size);
+ if (error || (start - va == vm_page_ptoa(1000))) {
+ goto error;
+ }
}
- size = end - start;
+ error = pmap_update(kernel_pmap);
- if (size != 0) {
- vm_kmem_free_va((void *)start, size);
+ if (error) {
+ goto error;
}
+ return (void *)va;
+
+error:
+ vm_kmem_free((void *)va, size);
return NULL;
}
@@ -141,10 +166,7 @@ void
vm_kmem_free(void *addr, size_t size)
{
const struct cpumap *cpumap;
- struct vm_page *page;
uintptr_t va, end;
- phys_addr_t pa;
- int error;
va = (uintptr_t)addr;
size = vm_page_round(size);
@@ -152,16 +174,14 @@ vm_kmem_free(void *addr, size_t size)
cpumap = cpumap_all();
while (va < end) {
- error = pmap_kextract(va, &pa);
- assert(!error);
pmap_remove(kernel_pmap, va, cpumap);
- page = vm_page_lookup(pa);
- assert(page != NULL);
- vm_page_free(page, 0);
va += PAGE_SIZE;
}
pmap_update(kernel_pmap);
+ vm_object_remove(&vm_kmem_kernel_object,
+ vm_kmem_offset((uintptr_t)addr),
+ vm_kmem_offset(end));
vm_kmem_free_va(addr, size);
}
@@ -172,6 +192,7 @@ vm_kmem_map_pa(phys_addr_t pa, size_t size,
uintptr_t offset, map_va;
size_t map_size;
phys_addr_t start;
+ int error;
start = vm_page_trunc(pa);
map_size = vm_page_round(pa + size) - start;
@@ -182,11 +203,19 @@ vm_kmem_map_pa(phys_addr_t pa, size_t size,
}
for (offset = 0; offset < map_size; offset += PAGE_SIZE) {
- pmap_enter(kernel_pmap, map_va + offset, start + offset,
- VM_PROT_READ | VM_PROT_WRITE, PMAP_PEF_GLOBAL);
+ error = pmap_enter(kernel_pmap, map_va + offset, start + offset,
+ VM_PROT_READ | VM_PROT_WRITE, PMAP_PEF_GLOBAL);
+
+ if (error) {
+ goto error;
+ }
}
- pmap_update(kernel_pmap);
+ error = pmap_update(kernel_pmap);
+
+ if (error) {
+ goto error;
+ }
if (map_vap != NULL) {
*map_vap = map_va;
@@ -197,6 +226,10 @@ vm_kmem_map_pa(phys_addr_t pa, size_t size,
}
return (void *)(map_va + (uintptr_t)(pa & PAGE_MASK));
+
+error:
+ vm_kmem_unmap_pa(map_va, map_size);
+ return NULL;
}
void
diff --git a/vm/vm_kmem.h b/vm/vm_kmem.h
index 3628a690..d845c1d8 100644
--- a/vm/vm_kmem.h
+++ b/vm/vm_kmem.h
@@ -45,6 +45,11 @@ extern char _end;
extern struct vm_map *kernel_map;
/*
+ * Initialize the vm_kmem module.
+ */
+void vm_kmem_setup(void);
+
+/*
* Allocate pure virtual kernel pages.
*
* The caller is reponsible for taking care of the underlying physical memory.
diff --git a/vm/vm_object.c b/vm/vm_object.c
new file mode 100644
index 00000000..874b26f5
--- /dev/null
+++ b/vm/vm_object.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation is based on the paper "A lockless pagecache in Linux"
+ * by Nick Piggin. It allows looking up pages without contention on VM objects.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <kern/init.h>
+#include <kern/llsync.h>
+#include <kern/mutex.h>
+#include <kern/rdxtree.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <machine/page.h>
+
+void __init
+vm_object_setup(void)
+{
+}
+
+void __init
+vm_object_init(struct vm_object *object, uint64_t size)
+{
+ assert(vm_page_aligned(size));
+
+ mutex_init(&object->lock);
+ rdxtree_init(&object->pages, 0);
+ object->size = size;
+ object->nr_pages = 0;
+}
+
+int
+vm_object_insert(struct vm_object *object, struct vm_page *page,
+ uint64_t offset)
+{
+ int error;
+
+ assert(vm_page_aligned(offset));
+
+ /*
+ * The page may have no references. Add one before publishing
+ * so that concurrent lookups succeed.
+ */
+ vm_page_ref(page);
+
+ mutex_lock(&object->lock);
+
+ if (offset >= object->size) {
+ error = ERROR_INVAL;
+ goto error;
+ }
+
+ error = rdxtree_insert(&object->pages, vm_page_atop(offset), page);
+
+ if (error) {
+ goto error;
+ }
+
+ vm_page_link(page, object, offset);
+ object->nr_pages++;
+ assert(object->nr_pages != 0);
+
+ mutex_unlock(&object->lock);
+
+ return 0;
+
+error:
+ mutex_unlock(&object->lock);
+
+ vm_page_unref(page);
+
+ return error;
+}
+
+void
+vm_object_remove(struct vm_object *object, uint64_t start, uint64_t end)
+{
+ struct vm_page *page;
+ uint64_t offset;
+
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+ assert(start <= end);
+
+ mutex_lock(&object->lock);
+
+ for (offset = start; offset < end; offset += PAGE_SIZE) {
+ page = rdxtree_remove(&object->pages, vm_page_atop(offset));
+
+ if (page == NULL) {
+ continue;
+ }
+
+ vm_page_unlink(page);
+ vm_page_unref(page);
+ assert(object->nr_pages != 0);
+ object->nr_pages--;
+ }
+
+ mutex_unlock(&object->lock);
+}
+
+struct vm_page *
+vm_object_lookup(struct vm_object *object, uint64_t offset)
+{
+ struct vm_page *page;
+ int error;
+
+ llsync_read_enter();
+
+ do {
+ page = rdxtree_lookup(&object->pages, vm_page_atop(offset));
+
+ if (page == NULL) {
+ break;
+ }
+
+ error = vm_page_tryref(page);
+ } while (error);
+
+ llsync_read_exit();
+
+ return page;
+}
diff --git a/vm/vm_object.h b/vm/vm_object.h
new file mode 100644
index 00000000..046937f2
--- /dev/null
+++ b/vm/vm_object.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Virtual memory object.
+ *
+ * The purpose of VM objects is to track pages that are resident in
+ * physical memory. They collectively form the page cache.
+ */
+
+#ifndef _VM_OBJECT_H
+#define _VM_OBJECT_H
+
+#include <stdint.h>
+
+#include <kern/rdxtree.h>
+#include <vm/vm_object_types.h>
+#include <vm/vm_page.h>
+
+struct vm_object;
+
+/*
+ * Initialize the vm_object module.
+ */
+void vm_object_setup(void);
+
+/*
+ * Initialize a VM object.
+ */
+void vm_object_init(struct vm_object *object, uint64_t size);
+
+/*
+ * Insert a page into a VM object.
+ *
+ * The offset must be page-aligned.
+ *
+ * The page becomes managed, and gains a reference. If successful,
+ * the reference is kept. Otherwise it's dropped. If the page had
+ * no references on entry, and a failure occurs, the page is freed.
+ */
+int vm_object_insert(struct vm_object *object, struct vm_page *page,
+ uint64_t offset);
+
+/*
+ * Remove pages from a VM object.
+ *
+ * The range boundaries must be page-aligned.
+ *
+ * Holes in the given range are silently skipped. Pages that are removed
+ * become unmanaged and lose a reference.
+ */
+void vm_object_remove(struct vm_object *object, uint64_t start, uint64_t end);
+
+/*
+ * Look up a page in a VM object.
+ *
+ * The offset must be page-aligned.
+ *
+ * If successful, the returned page gains a reference. Note that, if a valid
+ * page is returned, it may already have been removed from the object, or
+ * moved at a different offset.
+ */
+struct vm_page * vm_object_lookup(struct vm_object *object, uint64_t offset);
+
+#endif /* _VM_OBJECT_H */
diff --git a/vm/vm_object_types.h b/vm/vm_object_types.h
new file mode 100644
index 00000000..4026002e
--- /dev/null
+++ b/vm/vm_object_types.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definition used to avoid inclusion circular dependencies.
+ */
+
+#ifndef _VM_OBJECT_TYPES_H
+#define _VM_OBJECT_TYPES_H
+
+#include <stdint.h>
+
+#include <kern/mutex.h>
+#include <kern/rdxtree.h>
+
+struct vm_object {
+ struct mutex lock;
+ struct rdxtree pages;
+ uint64_t size;
+ unsigned long nr_pages;
+};
+
+#endif /* _VM_OBJECT_TYPES_H */
diff --git a/vm/vm_page.c b/vm/vm_page.c
index 00674499..950d04a1 100644
--- a/vm/vm_page.c
+++ b/vm/vm_page.c
@@ -167,6 +167,10 @@ vm_page_init(struct vm_page *page, unsigned short zone_index, phys_addr_t pa)
page->zone_index = zone_index;
page->order = VM_PAGE_ORDER_UNLISTED;
page->phys_addr = pa;
+
+ page->nr_refs = 0;
+
+ page->object = NULL;
}
void
@@ -740,6 +744,7 @@ vm_page_setup(void)
SHELL_REGISTER_CMDS(vm_page_shell_cmds);
}
+/* TODO Rename to avoid confusion with "managed pages" */
void __init
vm_page_manage(struct vm_page *page)
{
@@ -767,6 +772,22 @@ vm_page_lookup(phys_addr_t pa)
return NULL;
}
+static bool
+vm_page_block_referenced(const struct vm_page *page, unsigned int order)
+{
+ unsigned int i, nr_pages;
+
+ nr_pages = 1 << order;
+
+ for (i = 0; i < nr_pages; i++) {
+ if (vm_page_referenced(&page[i])) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
struct vm_page *
vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type)
{
@@ -777,6 +798,7 @@ vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type)
page = vm_page_zone_alloc(&vm_page_zones[i], order, type);
if (page != NULL) {
+ assert(!vm_page_block_referenced(page, order));
return page;
}
}
@@ -788,6 +810,7 @@ void
vm_page_free(struct vm_page *page, unsigned int order)
{
assert(page->zone_index < ARRAY_SIZE(vm_page_zones));
+ assert(!vm_page_block_referenced(page, order));
vm_page_zone_free(&vm_page_zones[page->zone_index], page, order);
}
diff --git a/vm/vm_page.h b/vm/vm_page.h
index 026c1180..ea267db8 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -16,15 +16,20 @@
*
*
* Physical page management.
+ *
+ * A page is said to be managed if it's linked to a VM object, in which
+ * case there is at least one reference to it.
*/
#ifndef _VM_VM_PAGE_H
#define _VM_VM_PAGE_H
#include <assert.h>
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
+#include <kern/atomic.h>
#include <kern/list.h>
#include <kern/log2.h>
#include <kern/macros.h>
@@ -32,11 +37,14 @@
#include <machine/pmap.h>
#include <machine/pmem.h>
#include <machine/types.h>
+#include <vm/vm_object_types.h>
/*
* Address/page conversion and rounding macros (not inline functions to
* be easily usable on both virtual and physical addresses, which may not
* have the same type size).
+ *
+ * TODO Rename btop and ptob.
*/
#define vm_page_atop(addr) ((addr) >> PAGE_SHIFT)
#define vm_page_ptoa(page) ((page) << PAGE_SHIFT)
@@ -80,6 +88,12 @@ struct vm_page {
unsigned short order;
phys_addr_t phys_addr;
void *priv;
+
+ unsigned int nr_refs;
+
+ /* VM object back reference */
+ struct vm_object *object;
+ uint64_t offset;
};
static inline unsigned short
@@ -139,6 +153,21 @@ vm_page_get_priv(const struct vm_page *page)
return page->priv;
}
+static inline void
+vm_page_link(struct vm_page *page, struct vm_object *object, uint64_t offset)
+{
+ assert(object != NULL);
+ page->object = object;
+ page->offset = offset;
+}
+
+static inline void
+vm_page_unlink(struct vm_page *page)
+{
+ assert(page->object != NULL);
+ page->object = NULL;
+}
+
/*
* Load physical memory into the vm_page module at boot time.
*
@@ -193,12 +222,16 @@ struct vm_page * vm_page_lookup(phys_addr_t pa);
*
* The selector is used to determine the zones from which allocation can
* be attempted.
+ *
+ * If successful, the returned pages have no references.
*/
struct vm_page * vm_page_alloc(unsigned int order, unsigned int selector,
unsigned short type);
/*
* Release a block of 2^order physical pages.
+ *
+ * The pages must have no references.
*/
void vm_page_free(struct vm_page *page, unsigned int order);
@@ -212,4 +245,50 @@ const char * vm_page_zone_name(unsigned int zone_index);
*/
void vm_page_log_info(void);
+static inline bool
+vm_page_referenced(const struct vm_page *page)
+{
+ return atomic_load(&page->nr_refs, ATOMIC_RELAXED) != 0;
+}
+
+static inline void
+vm_page_ref(struct vm_page *page)
+{
+ unsigned int nr_refs;
+
+ nr_refs = atomic_fetch_add(&page->nr_refs, 1, ATOMIC_RELAXED);
+ assert(nr_refs != (unsigned int)-1);
+}
+
+static inline void
+vm_page_unref(struct vm_page *page)
+{
+ unsigned int nr_refs;
+
+ nr_refs = atomic_fetch_sub_acq_rel(&page->nr_refs, 1);
+ assert(nr_refs != 0);
+
+ if (nr_refs == 1) {
+ vm_page_free(page, 0);
+ }
+}
+
+static inline int
+vm_page_tryref(struct vm_page *page)
+{
+ unsigned int nr_refs, prev;
+
+ do {
+ nr_refs = atomic_load(&page->nr_refs, ATOMIC_RELAXED);
+
+ if (nr_refs == 0) {
+ return ERROR_AGAIN;
+ }
+
+ prev = atomic_cas_acquire(&page->nr_refs, nr_refs, nr_refs + 1);
+ } while (prev != nr_refs);
+
+ return 0;
+}
+
#endif /* _VM_VM_PAGE_H */
diff --git a/vm/vm_setup.c b/vm/vm_setup.c
index 0957815f..9fb9c52d 100644
--- a/vm/vm_setup.c
+++ b/vm/vm_setup.c
@@ -20,7 +20,9 @@
#include <kern/rdxtree.h>
#include <kern/percpu.h>
#include <machine/pmap.h>
+#include <vm/vm_kmem.h>
#include <vm/vm_map.h>
+#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_setup.h>
@@ -29,8 +31,10 @@ vm_setup(void)
{
vm_page_setup();
kmem_setup();
- vm_map_setup();
rdxtree_setup();
+ vm_object_setup();
+ vm_map_setup();
+ vm_kmem_setup();
pmap_setup();
percpu_setup();
}