summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefrag.am3
-rw-r--r--vm/vm_object.c142
-rw-r--r--vm/vm_object.h78
-rw-r--r--vm/vm_object_types.h36
-rw-r--r--vm/vm_page.c23
-rw-r--r--vm/vm_page.h79
-rw-r--r--vm/vm_setup.c4
7 files changed, 364 insertions, 1 deletions
diff --git a/Makefrag.am b/Makefrag.am
index 886ec1f5..025c0bf1 100644
--- a/Makefrag.am
+++ b/Makefrag.am
@@ -115,6 +115,9 @@ x15_SOURCES += \
vm/vm_kmem.h \
vm/vm_map.c \
vm/vm_map.h \
+ vm/vm_object.c \
+ vm/vm_object.h \
+ vm/vm_object_types.h \
vm/vm_page.c \
vm/vm_page.h \
vm/vm_prot.h \
diff --git a/vm/vm_object.c b/vm/vm_object.c
new file mode 100644
index 00000000..874b26f5
--- /dev/null
+++ b/vm/vm_object.c
@@ -0,0 +1,142 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation is based on the paper "A lockless pagecache in Linux"
+ * by Nick Piggin. It allows looking up pages without contention on VM objects.
+ */
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <kern/init.h>
+#include <kern/llsync.h>
+#include <kern/mutex.h>
+#include <kern/rdxtree.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <machine/page.h>
+
+void __init
+vm_object_setup(void)
+{
+}
+
+void __init
+vm_object_init(struct vm_object *object, uint64_t size)
+{
+ assert(vm_page_aligned(size));
+
+ mutex_init(&object->lock);
+ rdxtree_init(&object->pages, 0);
+ object->size = size;
+ object->nr_pages = 0;
+}
+
+int
+vm_object_insert(struct vm_object *object, struct vm_page *page,
+ uint64_t offset)
+{
+ int error;
+
+ assert(vm_page_aligned(offset));
+
+ /*
+ * The page may have no references. Add one before publishing
+ * so that concurrent lookups succeed.
+ */
+ vm_page_ref(page);
+
+ mutex_lock(&object->lock);
+
+ if (offset >= object->size) {
+ error = ERROR_INVAL;
+ goto error;
+ }
+
+ error = rdxtree_insert(&object->pages, vm_page_atop(offset), page);
+
+ if (error) {
+ goto error;
+ }
+
+ vm_page_link(page, object, offset);
+ object->nr_pages++;
+ assert(object->nr_pages != 0);
+
+ mutex_unlock(&object->lock);
+
+ return 0;
+
+error:
+ mutex_unlock(&object->lock);
+
+ vm_page_unref(page);
+
+ return error;
+}
+
+void
+vm_object_remove(struct vm_object *object, uint64_t start, uint64_t end)
+{
+ struct vm_page *page;
+ uint64_t offset;
+
+ assert(vm_page_aligned(start));
+ assert(vm_page_aligned(end));
+ assert(start <= end);
+
+ mutex_lock(&object->lock);
+
+ for (offset = start; offset < end; offset += PAGE_SIZE) {
+ page = rdxtree_remove(&object->pages, vm_page_atop(offset));
+
+ if (page == NULL) {
+ continue;
+ }
+
+ vm_page_unlink(page);
+ vm_page_unref(page);
+ assert(object->nr_pages != 0);
+ object->nr_pages--;
+ }
+
+ mutex_unlock(&object->lock);
+}
+
+struct vm_page *
+vm_object_lookup(struct vm_object *object, uint64_t offset)
+{
+ struct vm_page *page;
+ int error;
+
+ llsync_read_enter();
+
+ do {
+ page = rdxtree_lookup(&object->pages, vm_page_atop(offset));
+
+ if (page == NULL) {
+ break;
+ }
+
+ error = vm_page_tryref(page);
+ } while (error);
+
+ llsync_read_exit();
+
+ return page;
+}
diff --git a/vm/vm_object.h b/vm/vm_object.h
new file mode 100644
index 00000000..046937f2
--- /dev/null
+++ b/vm/vm_object.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Virtual memory object.
+ *
+ * The purpose of VM objects is to track pages that are resident in
+ * physical memory. They collectively form the page cache.
+ */
+
+#ifndef _VM_OBJECT_H
+#define _VM_OBJECT_H
+
+#include <stdint.h>
+
+#include <kern/rdxtree.h>
+#include <vm/vm_object_types.h>
+#include <vm/vm_page.h>
+
+struct vm_object;
+
+/*
+ * Initialize the vm_object module.
+ */
+void vm_object_setup(void);
+
+/*
+ * Initialize a VM object.
+ */
+void vm_object_init(struct vm_object *object, uint64_t size);
+
+/*
+ * Insert a page into a VM object.
+ *
+ * The offset must be page-aligned.
+ *
+ * The page becomes managed, and gains a reference. If successful,
+ * the reference is kept. Otherwise it's dropped. If the page had
+ * no references on entry, and a failure occurs, the page is freed.
+ */
+int vm_object_insert(struct vm_object *object, struct vm_page *page,
+ uint64_t offset);
+
+/*
+ * Remove pages from a VM object.
+ *
+ * The range boundaries must be page-aligned.
+ *
+ * Holes in the given range are silently skipped. Pages that are removed
+ * become unmanaged and lose a reference.
+ */
+void vm_object_remove(struct vm_object *object, uint64_t start, uint64_t end);
+
+/*
+ * Look up a page in a VM object.
+ *
+ * The offset must be page-aligned.
+ *
+ * If successful, the returned page gains a reference. Note that, if a valid
+ * page is returned, it may already have been removed from the object, or
+ * moved at a different offset.
+ */
+struct vm_page * vm_object_lookup(struct vm_object *object, uint64_t offset);
+
+#endif /* _VM_OBJECT_H */
diff --git a/vm/vm_object_types.h b/vm/vm_object_types.h
new file mode 100644
index 00000000..4026002e
--- /dev/null
+++ b/vm/vm_object_types.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright (c) 2017 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Isolated type definition used to avoid inclusion circular dependencies.
+ */
+
+#ifndef _VM_OBJECT_TYPES_H
+#define _VM_OBJECT_TYPES_H
+
+#include <stdint.h>
+
+#include <kern/mutex.h>
+#include <kern/rdxtree.h>
+
+struct vm_object {
+ struct mutex lock;
+ struct rdxtree pages;
+ uint64_t size;
+ unsigned long nr_pages;
+};
+
+#endif /* _VM_OBJECT_TYPES_H */
diff --git a/vm/vm_page.c b/vm/vm_page.c
index 00674499..950d04a1 100644
--- a/vm/vm_page.c
+++ b/vm/vm_page.c
@@ -167,6 +167,10 @@ vm_page_init(struct vm_page *page, unsigned short zone_index, phys_addr_t pa)
page->zone_index = zone_index;
page->order = VM_PAGE_ORDER_UNLISTED;
page->phys_addr = pa;
+
+ page->nr_refs = 0;
+
+ page->object = NULL;
}
void
@@ -740,6 +744,7 @@ vm_page_setup(void)
SHELL_REGISTER_CMDS(vm_page_shell_cmds);
}
+/* TODO Rename to avoid confusion with "managed pages" */
void __init
vm_page_manage(struct vm_page *page)
{
@@ -767,6 +772,22 @@ vm_page_lookup(phys_addr_t pa)
return NULL;
}
+static bool
+vm_page_block_referenced(const struct vm_page *page, unsigned int order)
+{
+ unsigned int i, nr_pages;
+
+ nr_pages = 1 << order;
+
+ for (i = 0; i < nr_pages; i++) {
+ if (vm_page_referenced(&page[i])) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
struct vm_page *
vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type)
{
@@ -777,6 +798,7 @@ vm_page_alloc(unsigned int order, unsigned int selector, unsigned short type)
page = vm_page_zone_alloc(&vm_page_zones[i], order, type);
if (page != NULL) {
+ assert(!vm_page_block_referenced(page, order));
return page;
}
}
@@ -788,6 +810,7 @@ void
vm_page_free(struct vm_page *page, unsigned int order)
{
assert(page->zone_index < ARRAY_SIZE(vm_page_zones));
+ assert(!vm_page_block_referenced(page, order));
vm_page_zone_free(&vm_page_zones[page->zone_index], page, order);
}
diff --git a/vm/vm_page.h b/vm/vm_page.h
index 026c1180..ea267db8 100644
--- a/vm/vm_page.h
+++ b/vm/vm_page.h
@@ -16,15 +16,20 @@
*
*
* Physical page management.
+ *
+ * A page is said to be managed if it's linked to a VM object, in which
+ * case there is at least one reference to it.
*/
#ifndef _VM_VM_PAGE_H
#define _VM_VM_PAGE_H
#include <assert.h>
+#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
+#include <kern/atomic.h>
#include <kern/list.h>
#include <kern/log2.h>
#include <kern/macros.h>
@@ -32,11 +37,14 @@
#include <machine/pmap.h>
#include <machine/pmem.h>
#include <machine/types.h>
+#include <vm/vm_object_types.h>
/*
* Address/page conversion and rounding macros (not inline functions to
* be easily usable on both virtual and physical addresses, which may not
* have the same type size).
+ *
+ * TODO Rename btop and ptob.
*/
#define vm_page_atop(addr) ((addr) >> PAGE_SHIFT)
#define vm_page_ptoa(page) ((page) << PAGE_SHIFT)
@@ -80,6 +88,12 @@ struct vm_page {
unsigned short order;
phys_addr_t phys_addr;
void *priv;
+
+ unsigned int nr_refs;
+
+ /* VM object back reference */
+ struct vm_object *object;
+ uint64_t offset;
};
static inline unsigned short
@@ -139,6 +153,21 @@ vm_page_get_priv(const struct vm_page *page)
return page->priv;
}
+static inline void
+vm_page_link(struct vm_page *page, struct vm_object *object, uint64_t offset)
+{
+ assert(object != NULL);
+ page->object = object;
+ page->offset = offset;
+}
+
+static inline void
+vm_page_unlink(struct vm_page *page)
+{
+ assert(page->object != NULL);
+ page->object = NULL;
+}
+
/*
* Load physical memory into the vm_page module at boot time.
*
@@ -193,12 +222,16 @@ struct vm_page * vm_page_lookup(phys_addr_t pa);
*
* The selector is used to determine the zones from which allocation can
* be attempted.
+ *
+ * If successful, the returned pages have no references.
*/
struct vm_page * vm_page_alloc(unsigned int order, unsigned int selector,
unsigned short type);
/*
* Release a block of 2^order physical pages.
+ *
+ * The pages must have no references.
*/
void vm_page_free(struct vm_page *page, unsigned int order);
@@ -212,4 +245,50 @@ const char * vm_page_zone_name(unsigned int zone_index);
*/
void vm_page_log_info(void);
+static inline bool
+vm_page_referenced(const struct vm_page *page)
+{
+ return atomic_load(&page->nr_refs, ATOMIC_RELAXED) != 0;
+}
+
+static inline void
+vm_page_ref(struct vm_page *page)
+{
+ unsigned int nr_refs;
+
+ nr_refs = atomic_fetch_add(&page->nr_refs, 1, ATOMIC_RELAXED);
+ assert(nr_refs != (unsigned int)-1);
+}
+
+static inline void
+vm_page_unref(struct vm_page *page)
+{
+ unsigned int nr_refs;
+
+ nr_refs = atomic_fetch_sub_acq_rel(&page->nr_refs, 1);
+ assert(nr_refs != 0);
+
+ if (nr_refs == 1) {
+ vm_page_free(page, 0);
+ }
+}
+
+static inline int
+vm_page_tryref(struct vm_page *page)
+{
+ unsigned int nr_refs, prev;
+
+ do {
+ nr_refs = atomic_load(&page->nr_refs, ATOMIC_RELAXED);
+
+ if (nr_refs == 0) {
+ return ERROR_AGAIN;
+ }
+
+ prev = atomic_cas_acquire(&page->nr_refs, nr_refs, nr_refs + 1);
+ } while (prev != nr_refs);
+
+ return 0;
+}
+
#endif /* _VM_VM_PAGE_H */
diff --git a/vm/vm_setup.c b/vm/vm_setup.c
index 0957815f..77f5fdb8 100644
--- a/vm/vm_setup.c
+++ b/vm/vm_setup.c
@@ -21,6 +21,7 @@
#include <kern/percpu.h>
#include <machine/pmap.h>
#include <vm/vm_map.h>
+#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_setup.h>
@@ -29,8 +30,9 @@ vm_setup(void)
{
vm_page_setup();
kmem_setup();
- vm_map_setup();
rdxtree_setup();
+ vm_object_setup();
+ vm_map_setup();
pmap_setup();
percpu_setup();
}