summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Braun <rbraun@sceen.net>2017-07-02 12:07:46 +0200
committerRichard Braun <rbraun@sceen.net>2017-07-02 14:37:55 +0200
commit62142bef38dee8cc8878290a8775da2ae8c1d86d (patch)
tree799d1538ccde20ed9298931752416f7d5eab8533
parenta822e80fe2b15cb93d01388b2d2a9f944ca72372 (diff)
vm/vm_object: new module
Start a very simple VM object implementation for page tracking only. The locking protocol is still not well defined, especially for pages. The only purpose of the current code is to allow the kernel virtual memory interface to release pages on physical mapping creation errors.
-rw-r--r--Makefrag.am3
-rw-r--r--vm/vm_object.c142
-rw-r--r--vm/vm_object.h78
-rw-r--r--vm/vm_object_types.h36
-rw-r--r--vm/vm_page.c23
-rw-r--r--vm/vm_page.h79
-rw-r--r--vm/vm_setup.c4
7 files changed, 364 insertions, 1 deletions
diff --git a/Makefrag.am b/Makefrag.am
index 886ec1f5..025c0bf1 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -115,6 +115,9 @@ x15_SOURCES += \
vm/vm_kmem.h \
vm/vm_map.c \
vm/vm_map.h \
+ vm/vm_object.c \
+ vm/vm_object.h \
+ vm/vm_object_types.h \
vm/vm_page.c \
vm/vm_page.h \
vm/vm_prot.h \
diff --git a/vm/vm_object.c b/vm/vm_object.c
new file mode 100644
index 00000000..874b26f5
--- /dev/null
+++ b/vm/vm_object.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation is based on the paper "A lockless pagecache in Linux"
+ * by Nick Piggin. It allows looking up pages without contention on VM objects.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <kern/init.h>
+#include <kern/llsync.h>
+#include <kern/mutex.h>
+#include <kern/rdxtree.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <machine/page.h>
+
+void __init
+vm_object_setup(void)
+{
+}
+
+void __init
+vm_object_init(struct vm_object *object, uint64_t size)
+{
+ assert(vm_page_aligned(size));
+
+ mutex_init(&object->lock);
+ rdxtree_init(&object->pages, 0);
+ object->size = size;
+ object->nr_pages = 0;
+}
+
+int
+vm_object_insert(struct vm_object *object, struct vm_page *page,
+ uint64_t offset)
+{
+ int error;
+
+ assert(vm_page_aligned(offset));
+
+ /*
+ * The page may have no references. Add one before publishing
+ * so that concurrent lookups succeed.
+ */
+ vm_page_ref(page);
+
+ mutex_lock(&object->lock);
+
+ if (offset >= object->size) {
+ error = ERROR_INVAL;
+ goto error;
+ }
+
+ error = rdxtree_insert(&object->pages, vm_page_atop(offset), page);
+
+ if (error) {
+ goto error;
+ }
+
+ vm_page_link(page, object, offset);
+ object->nr_pages++;
+ assert(object->nr_pages != 0);
+
+ mutex_unlock(&object->lock);
+
+ return 0;
+
+error:
+ mutex_unlock(&object->lock);
+
+ vm_page_unref(page);
+
+ return error;
+}
+
+void
+vm_object_remove(struct vm_object *object, uint64_t start, uint64_t end)
+{
+ struct vm_page *page;
+ uint64_t offset;
+
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+ assert(start <= end);
+
+ mutex_lock(&object->lock);
+
+ for (offset = start; offset < end; offset += PAGE_SIZE) {
+ page = rdxtree_remove(&object->pages, vm_page_atop(offset));
+
+ if (page == NULL) {
+ continue;
+ }
+
+ vm_page_unlink(page);
+ vm_page_unref(page);
+ assert(object->nr_pages != 0);
+ object->nr_pages--;
+ }
+
+ mutex_unlock(&object->lock);
+}
+
+struct vm_page *
+vm_object_lookup(struct vm_object *object, uint64_t offset)
+{
+ struct vm_page *page;
+ int error;
+
+ llsync_read_enter();
+
+ do {
+ page = rdxtree_lookup(&object->pages, vm_page_atop(offset));
+
+ if (page == NULL) {
+ break;
+ }
+
+ error = vm_page_tryref(page);
+ } while (error);
+
+ llsync_read_exit();
+
+ return page;
+}
diff --git a/vm/vm_object.h b/vm/vm_object.h
new file mode 100644
index 00000000..046937f2
--- /dev/null
+++ b/vm/vm_object.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Virtual memory object.
+ *
+ * The purpose of VM objects is to track pages that are resident in
+ * physical memory. They collectively form the page cache.
+ */
+
+#ifndef _VM_OBJECT_H
+#define _VM_OBJECT_H
+
+#include <stdint.h>
+
+#include <kern/rdxtree.h>
+#include <vm/vm_object_types.h>
+#include <vm/vm_page.h>
+
+struct vm_object;
+
+/*
+ * Initialize the vm_object module.
+ */
+void vm_object_setup(void);
+
+/*
+ * Initialize a VM object.
+ */
+void vm_object_init(struct vm_object *object, uint64_t size);
+
+/*
+ * Insert a page into a VM object.
+ *
+ * The offset must be page-aligned.
+ *
+ * The page becomes managed, and gains a reference. If successful,
+ * the reference is kept. Otherwise it's dropped. If the page had
+ * no references on entry, and a failure occurs, the page is freed.
+ */
+int vm_object_insert(struct vm_object *object, struct vm_page *page,
+ uint64_t offset);
+
+/*
+ * Remove pages from a VM object.
+ *
+ * The range boundaries must be page-aligned.
+ *
+ * Holes in the given range are silently skipped. Pages that are removed
+ * become unmanaged and lose a reference.
+ */
+void vm_object_remove(struct vm_object *object, uint64_t start, uint64_t end);
+
+/*
+ * Look up a page in a VM object.
+ *
+ * The offset must be page-aligned.
+ *
+ * If successful, the returned page gains a reference. Note that, if a valid
+ * page is returned, it may already have been removed from the object, or
+ * moved at a different offset.
+ */
+struct vm_page * vm_object_lookup(struct vm_object *object, uint64_t offset);
+
+#endif /* _VM_OBJECT_H */
diff --git a/vm/vm_object_types.h b/vm/vm_object_types.h
new file mode 100644
index 00000000..4026002e
--- /dev/null
+++ b/vm/vm_object_types.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definition used to avoid inclusion circular dependencies.
+ */
+
+#ifndef _VM_OBJECT_TYPES_H
+#define _VM_OBJECT_TYPES_H
+
+#include <stdint.h>
+
+#include <kern/mutex.h>
+#include <kern/rdxtree.h>
+
+struct vm_object {
+ struct mutex lock;
+ struct rdxtree pages;
+ uint64_t size;
+ unsigned long nr_pages;
+};
+
+#endif /* _VM_OBJECT_TYPES_H */
diff --git a/vm/vm_page.c b/vm/vm_page.c
index 00674499..950d04a1 100644
--- a/vm/vm_page.c
+++ b/vm/vm_page.c
@@ -167,6 +167,10 @@ vm_page_init(struct vm_page *page, unsigned short zone_index, phys_addr_t pa)
page->zone_index = zone_index;
page->order = VM_PAGE_ORDER_UNLISTED;
page->phys_addr = pa;
+
+ page->nr_refs = 0;
+
+ page->object = NULL;
}
void
@@ -740,6 +744,7 @@ vm_page_setup(void)
SHELL_REGISTER_CMDS(vm_page_shell_cmds);
}
+/* TODO Rename to avoid confusion with "managed pages" */
void __init
vm_page_manage(struct vm_page *page)
{
@@ -767,6 +772,22 @@ vm_page_lookup(phys_addr_t pa)
return NULL;
}
+static bool
+vm_page_block_referenced(const struct vm_page *page, unsigned int order)
+{
+ unsigned int i, nr_pages;
+
+ nr_pages = 1 << order;
+
+ for (i = 0; i < nr_pages; i++) {
+ if (vm_page_referenced(&page[i])) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
struct vm_page *
vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type)
{
@@ -777,6 +798,7 @@ vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type)
page = vm_page_zone_alloc(&vm_page_zones[i], order, type);
if (page != NULL) {
+ assert(!vm_page_block_referenced(page, order));
return page;
}
}
@@ -788,6 +810,7 @@ void
vm_page_free(struct vm_page *page, unsigned int order)
{
assert(page->zone_index < ARRAY_SIZE(vm_page_zones));
+ assert(!vm_page_block_referenced(page, order));
vm_page_zone_free(&vm_page_zones[page->zone_index], page, order);
}
diff --git a/vm/vm_page.h b/vm/vm_page.h
index 026c1180..ea267db8 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -16,15 +16,20 @@
*
*
* Physical page management.
+ *
+ * A page is said to be managed if it's linked to a VM object, in which
+ * case there is at least one reference to it.
*/
#ifndef _VM_VM_PAGE_H
#define _VM_VM_PAGE_H
#include <assert.h>
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
+#include <kern/atomic.h>
#include <kern/list.h>
#include <kern/log2.h>
#include <kern/macros.h>
@@ -32,11 +37,14 @@
#include <machine/pmap.h>
#include <machine/pmem.h>
#include <machine/types.h>
+#include <vm/vm_object_types.h>
/*
* Address/page conversion and rounding macros (not inline functions to
* be easily usable on both virtual and physical addresses, which may not
* have the same type size).
+ *
+ * TODO Rename btop and ptob.
*/
#define vm_page_atop(addr) ((addr) >> PAGE_SHIFT)
#define vm_page_ptoa(page) ((page) << PAGE_SHIFT)
@@ -80,6 +88,12 @@ struct vm_page {
unsigned short order;
phys_addr_t phys_addr;
void *priv;
+
+ unsigned int nr_refs;
+
+ /* VM object back reference */
+ struct vm_object *object;
+ uint64_t offset;
};
static inline unsigned short
@@ -139,6 +153,21 @@ vm_page_get_priv(const struct vm_page *page)
return page->priv;
}
+static inline void
+vm_page_link(struct vm_page *page, struct vm_object *object, uint64_t offset)
+{
+ assert(object != NULL);
+ page->object = object;
+ page->offset = offset;
+}
+
+static inline void
+vm_page_unlink(struct vm_page *page)
+{
+ assert(page->object != NULL);
+ page->object = NULL;
+}
+
/*
* Load physical memory into the vm_page module at boot time.
*
@@ -193,12 +222,16 @@ struct vm_page * vm_page_lookup(phys_addr_t pa);
*
* The selector is used to determine the zones from which allocation can
* be attempted.
+ *
+ * If successful, the returned pages have no references.
*/
struct vm_page * vm_page_alloc(unsigned int order, unsigned int selector,
unsigned short type);
/*
* Release a block of 2^order physical pages.
+ *
+ * The pages must have no references.
*/
void vm_page_free(struct vm_page *page, unsigned int order);
@@ -212,4 +245,50 @@ const char * vm_page_zone_name(unsigned int zone_index);
*/
void vm_page_log_info(void);
+static inline bool
+vm_page_referenced(const struct vm_page *page)
+{
+ return atomic_load(&page->nr_refs, ATOMIC_RELAXED) != 0;
+}
+
+static inline void
+vm_page_ref(struct vm_page *page)
+{
+ unsigned int nr_refs;
+
+ nr_refs = atomic_fetch_add(&page->nr_refs, 1, ATOMIC_RELAXED);
+ assert(nr_refs != (unsigned int)-1);
+}
+
+static inline void
+vm_page_unref(struct vm_page *page)
+{
+ unsigned int nr_refs;
+
+ nr_refs = atomic_fetch_sub_acq_rel(&page->nr_refs, 1);
+ assert(nr_refs != 0);
+
+ if (nr_refs == 1) {
+ vm_page_free(page, 0);
+ }
+}
+
+static inline int
+vm_page_tryref(struct vm_page *page)
+{
+ unsigned int nr_refs, prev;
+
+ do {
+ nr_refs = atomic_load(&page->nr_refs, ATOMIC_RELAXED);
+
+ if (nr_refs == 0) {
+ return ERROR_AGAIN;
+ }
+
+ prev = atomic_cas_acquire(&page->nr_refs, nr_refs, nr_refs + 1);
+ } while (prev != nr_refs);
+
+ return 0;
+}
+
#endif /* _VM_VM_PAGE_H */
diff --git a/vm/vm_setup.c b/vm/vm_setup.c
index 0957815f..77f5fdb8 100644
--- a/vm/vm_setup.c
+++ b/vm/vm_setup.c
@@ -21,6 +21,7 @@
#include <kern/percpu.h>
#include <machine/pmap.h>
#include <vm/vm_map.h>
+#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_setup.h>
@@ -29,8 +30,9 @@ vm_setup(void)
{
vm_page_setup();
kmem_setup();
- vm_map_setup();
rdxtree_setup();
+ vm_object_setup();
+ vm_map_setup();
pmap_setup();
percpu_setup();
}