Initial commit

author: Richard Braun <rbraun@sceen.net> 2012-09-30 19:31:58 +0200
committer: Richard Braun <rbraun@sceen.net> 2012-09-30 19:31:58 +0200
commit: 69504fc63720b4bf2677d6074285b82256bc9b83 (patch)
tree: 47fad139526df60554e3fd26a7b8b1577f29d2d0 /vm
11 files changed, 1977 insertions, 0 deletions
diff --git a/vm/vm_inherit.h b/vm/vm_inherit.h
new file mode 100644
index 00000000..b50d490d
--- /dev/null
+++ b/vm/vm_inherit.h
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VM_VM_INHERIT_H
+#define _VM_VM_INHERIT_H
+
+/*
+ * Inheritance flags.
+ */
+#define VM_INHERIT_SHARE    0
+#define VM_INHERIT_COPY     1
+#define VM_INHERIT_NONE     2
+#define VM_INHERIT_DEFAULT  VM_INHERIT_COPY
+
+#endif /* _VM_VM_INHERIT_H */
diff --git a/vm/vm_kmem.c b/vm/vm_kmem.c
new file mode 100644
index 00000000..c9fd4027
--- /dev/null
+++ b/vm/vm_kmem.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/init.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/types.h>
+#include <lib/assert.h>
+#include <lib/stddef.h>
+#include <machine/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+
+/*
+ * Kernel map and storage.
+ */
+static struct vm_map kernel_map_store;
+struct vm_map *kernel_map = &kernel_map_store;
+
+/*
+ * Heap boundaries during bootstrap.
+ */
+static unsigned long vm_kmem_boot_start __initdata;
+static unsigned long vm_kmem_boot_end __initdata;
+
+void __init
+vm_kmem_setup(void)
+{
+    pmap_virtual_space(&vm_kmem_boot_start, &vm_kmem_boot_end);
+    assert(vm_page_aligned(vm_kmem_boot_start));
+    assert(vm_page_aligned(vm_kmem_boot_end));
+}
+
+unsigned long __init
+vm_kmem_bootalloc(size_t size)
+{
+    unsigned long start, va;
+    vm_phys_t pa;
+
+    assert(size > 0);
+
+    size = vm_page_round(size);
+
+    if ((vm_kmem_boot_end - vm_kmem_boot_start) < size)
+        panic("vm_kmem: no virtual space available");
+
+    start = vm_kmem_boot_start;
+    vm_kmem_boot_start += size;
+
+    if (pmap_klimit < vm_kmem_boot_start)
+        pmap_growkernel(vm_kmem_boot_start);
+
+    for (va = start; va < vm_kmem_boot_start; va += PAGE_SIZE) {
+        pa = vm_phys_bootalloc();
+        pmap_kenter(va, pa);
+    }
+
+    return start;
+}
+
+void __init
+vm_kmem_boot_space(unsigned long *start, unsigned long *end)
+{
+    *start = VM_MIN_KERNEL_ADDRESS;
+    *end = vm_kmem_boot_start;
+}
+
+static int
+vm_kmem_alloc_check(size_t size)
+{
+    if (size == 0)
+        return -1;
+
+    return 0;
+}
+
+static int
+vm_kmem_free_check(unsigned long addr, size_t size)
+{
+    if (!vm_page_aligned(addr))
+        return -1;
+
+    return vm_kmem_alloc_check(size);
+}
+
+static unsigned long
+vm_kmem_alloc_va(size_t size)
+{
+    unsigned long va;
+    int error, flags;
+
+    size = vm_page_round(size);
+
+    va = 0;
+    flags = VM_MAP_PROT_ALL | VM_MAP_MAX_PROT_ALL | VM_MAP_INHERIT_NONE
+            | VM_MAP_ADVISE_NORMAL;
+    error = vm_map_enter(kernel_map, NULL, 0, &va, size, 0, flags);
+
+    if (error)
+        return 0;
+
+    return va;
+}
+
+static void
+vm_kmem_free_va(unsigned long addr, size_t size)
+{
+    unsigned long end;
+
+    end = addr + vm_page_round(size);
+    pmap_kremove(addr, end);
+    vm_map_remove(kernel_map, addr, end);
+}
+
+unsigned long
+vm_kmem_alloc(size_t size)
+{
+    struct vm_page *page;
+    unsigned long va, start, end;
+
+    assert(vm_kmem_alloc_check(size) == 0);
+
+    va = vm_kmem_alloc_va(size);
+
+    if (va == 0)
+        return 0;
+
+    for (start = va, end = va + size; start < end; start += PAGE_SIZE) {
+        page = vm_phys_alloc(0);
+
+        if (page == NULL)
+            goto error_page;
+
+        pmap_kenter(start, vm_page_to_pa(page));
+    }
+
+    return va;
+
+error_page:
+    vm_kmem_free(va, size);
+    return 0;
+}
+
+void
+vm_kmem_free(unsigned long addr, size_t size)
+{
+    struct vm_page *page;
+    unsigned long va, end;
+    vm_phys_t pa;
+
+    assert(vm_kmem_free_check(addr, size) == 0);
+
+    size = vm_page_round(size);
+    end = addr + size;
+
+    for (va = addr; va < end; va += PAGE_SIZE) {
+        pa = pmap_kextract(va);
+
+        if (pa == 0)
+            continue;
+
+        page = vm_phys_lookup_page(pa);
+        assert(page != NULL);
+        vm_phys_free(page, 0);
+    }
+
+    vm_kmem_free_va(addr, size);
+}
+
+void *
+vm_kmem_map_pa(vm_phys_t addr, size_t size, unsigned long *map_addrp,
+               size_t *map_sizep)
+{
+    unsigned long offset, map_addr;
+    size_t map_size;
+    vm_phys_t start;
+
+    assert(vm_kmem_alloc_check(size) == 0);
+
+    start = vm_page_trunc(addr);
+    map_size = vm_page_round(addr + size) - start;
+    map_addr = vm_kmem_alloc_va(map_size);
+
+    if (map_addr == 0)
+        return NULL;
+
+    for (offset = 0; offset < map_size; offset += PAGE_SIZE)
+        pmap_kenter(map_addr + offset, start + offset);
+
+    if (map_addrp != NULL)
+        *map_addrp = map_addr;
+
+    if (map_sizep != NULL)
+        *map_sizep = map_size;
+
+    return (void *)(map_addr + (unsigned long)(addr & PAGE_MASK));
+}
+
+void
+vm_kmem_unmap_pa(unsigned long map_addr, size_t map_size)
+{
+    assert(vm_kmem_free_check(map_addr, map_size) == 0);
+    vm_kmem_free_va(map_addr, map_size);
+}
diff --git a/vm/vm_kmem.h b/vm/vm_kmem.h
new file mode 100644
index 00000000..e23ab9bd
--- /dev/null
+++ b/vm/vm_kmem.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VM_VM_KMEM_H
+#define _VM_VM_KMEM_H
+
+#include <kern/types.h>
+
+/*
+ * Special kernel addresses.
+ */
+extern char _text;
+extern char _rodata;
+extern char _data;
+extern char _bss;
+extern char _end;
+
+/*
+ * The kernel map.
+ */
+extern struct vm_map *kernel_map;
+
+/*
+ * Initialize the vm_kmem module.
+ */
+void vm_kmem_setup(void);
+
+/*
+ * Early kernel memory allocator.
+ *
+ * The main purpose of this function is to allow the allocation of the
+ * physical page table.
+ */
+unsigned long vm_kmem_bootalloc(size_t size);
+
+/*
+ * Return the range of initial virtual memory used by the kernel.
+ */
+void vm_kmem_boot_space(unsigned long *start, unsigned long *end);
+
+/*
+ * Allocate memory from the kernel map.
+ */
+unsigned long vm_kmem_alloc(size_t size);
+
+/*
+ * Release memory back to the kernel map.
+ */
+void vm_kmem_free(unsigned long addr, size_t size);
+
+/*
+ * Map physical memory in a kernel map.
+ *
+ * Return the address at which the mapped memory can be accessed. If map_addrp
+ * and/or map_sizep aren't NULL, they are updated to the address and size of
+ * the mapping created.
+ *
+ * This is a convenience function for modules that must map random regions of
+ * physical memory, and as such, it doesn't expect a page-aligned input range.
+ *
+ * TODO When mapping attributes are implemented, make this function disable
+ * caching on the mapping.
+ */
+void * vm_kmem_map_pa(vm_phys_t addr, size_t size, unsigned long *map_addrp,
+                      size_t *map_sizep);
+
+/*
+ * Unmap physical memory from a kernel map.
+ */
+void vm_kmem_unmap_pa(unsigned long map_addr, size_t map_size);
+
+#endif /* _VM_VM_KMEM_H */
diff --git a/vm/vm_map.c b/vm/vm_map.c
new file mode 100644
index 00000000..4030cdc6
--- /dev/null
+++ b/vm/vm_map.c
@@ -0,0 +1,644 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * XXX This module is far from complete. It just provides the basic support
+ * needed for kernel allocation.
+ */
+
+#include <kern/error.h>
+#include <kern/init.h>
+#include <kern/kmem.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/printk.h>
+#include <lib/assert.h>
+#include <lib/list.h>
+#include <lib/macros.h>
+#include <lib/rbtree.h>
+#include <lib/stddef.h>
+#include <lib/stdint.h>
+#include <machine/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+
+/*
+ * Special threshold which disables the use of the free area cache address.
+ */
+#define VM_MAP_NO_FIND_CACHE (~(size_t)0)
+
+/*
+ * Mapping request.
+ *
+ * Most members are input parameters from a call to e.g. vm_map_enter(). The
+ * start member is also an output argument. The next member is used internally
+ * by the mapping functions.
+ */
+struct vm_map_request {
+    struct vm_object *object;
+    unsigned long offset;
+    unsigned long start;
+    size_t size;
+    size_t align;
+    int flags;
+    struct vm_map_entry *next;
+};
+
+/*
+ * Statically allocated map entry for the first kernel map entry.
+ */
+static struct vm_map_entry vm_map_kernel_entry;
+
+/*
+ * Statically allocated map entry for the kernel map entry allocator.
+ *
+ * The purpose of this entry is to reserve virtual space for the kernel map
+ * entries (those used in the kernel map). The reason is to avoid recursion,
+ * as normal map entries are allocated from the kernel map (like any other
+ * normal kernel object).
+ */
+static struct vm_map_entry vm_map_kentry_entry;
+
+/*
+ * Cache for the map entries used in the kernel map.
+ */
+static struct kmem_cache vm_map_kentry_cache;
+
+/*
+ * Cache for normal map entries.
+ */
+static struct kmem_cache vm_map_entry_cache;
+
+/*
+ * Address of the next free page available for kernel map entry allocation.
+ */
+static unsigned long vm_map_kentry_free;
+
+/*
+ * Allocate pages for the kernel map entry cache.
+ */
+static unsigned long
+vm_map_kentry_pagealloc(size_t size)
+{
+    struct vm_page *page;
+    unsigned long addr, va;
+
+    assert(size > 0);
+    assert(vm_page_aligned(size));
+
+    if ((vm_map_kentry_entry.end - vm_map_kentry_free) < size)
+        panic("vm_map: kentry cache pages exhausted");
+
+    addr = vm_map_kentry_free;
+    vm_map_kentry_free += size;
+
+    for (va = addr; va < vm_map_kentry_free; va += PAGE_SIZE) {
+        page = vm_phys_alloc(0);
+
+        if (page == NULL)
+            panic("vm_map: no physical page for kentry cache");
+
+        pmap_kenter(va, vm_page_to_pa(page));
+    }
+
+    return addr;
+}
+
+static inline struct kmem_cache *
+vm_map_entry_select_cache(const struct vm_map *map)
+{
+    return (map == kernel_map) ? &vm_map_kentry_cache : &vm_map_entry_cache;
+}
+
+static struct vm_map_entry *
+vm_map_entry_create(const struct vm_map *map)
+{
+    struct vm_map_entry *entry;
+
+    entry = kmem_cache_alloc(vm_map_entry_select_cache(map));
+
+    if (entry == NULL)
+        panic("vm_map: can't create map entry");
+
+    return entry;
+}
+
+static void
+vm_map_entry_destroy(struct vm_map_entry *entry, const struct vm_map *map)
+{
+    kmem_cache_free(vm_map_entry_select_cache(map), entry);
+}
+
+static inline int
+vm_map_entry_cmp_lookup(unsigned long addr, const struct rbtree_node *node)
+{
+    struct vm_map_entry *entry;
+
+    entry = rbtree_entry(node, struct vm_map_entry, tree_node);
+
+    if (addr >= entry->end)
+        return 1;
+
+    if (addr >= entry->start)
+        return 0;
+
+    return -1;
+}
+
+static inline int
+vm_map_entry_cmp_insert(const struct rbtree_node *a,
+                        const struct rbtree_node *b)
+{
+    struct vm_map_entry *entry;
+
+    entry = rbtree_entry(a, struct vm_map_entry, tree_node);
+    return vm_map_entry_cmp_lookup(entry->start, b);
+}
+
+static inline int
+vm_map_get_protection(int flags)
+{
+    return flags & VM_MAP_PROT_MASK;
+}
+
+static inline int
+vm_map_get_max_protection(int flags)
+{
+    return (flags & VM_MAP_MAX_PROT_MASK) >> 4;
+}
+
+#ifndef NDEBUG
+static void
+vm_map_request_assert_valid(const struct vm_map_request *request)
+{
+    int prot, max_prot;
+
+    assert((request->object != NULL) || (request->offset == 0));
+    assert(vm_page_aligned(request->offset));
+    assert(vm_page_aligned(request->start));
+    assert(request->size > 0);
+    assert(vm_page_aligned(request->size));
+    assert((request->start + request->size) > request->start);
+    assert((request->align == 0) || (request->align >= PAGE_SIZE));
+    assert(ISP2(request->align));
+
+    prot = vm_map_get_protection(request->flags);
+    max_prot = vm_map_get_max_protection(request->flags);
+    assert((prot & max_prot) == prot);
+    assert(__builtin_popcount(request->flags & VM_MAP_INHERIT_MASK) == 1);
+    assert(__builtin_popcount(request->flags & VM_MAP_ADVISE_MASK) == 1);
+    assert(!(request->flags & VM_MAP_FIXED)
+           || (request->align == 0)
+           || P2ALIGNED(request->start, request->align));
+}
+#else /* NDEBUG */
+#define vm_map_request_assert_valid(request)
+#endif /* NDEBUG */
+
+/*
+ * Look up an entry in a map.
+ *
+ * This function returns the entry which is closest to the given address
+ * such that addr < entry->end (i.e. either containing or after the requested
+ * address), or NULL if there is no such entry.
+ */
+static struct vm_map_entry *
+vm_map_lookup_nearest(struct vm_map *map, unsigned long addr)
+{
+    struct vm_map_entry *entry;
+    struct rbtree_node *node;
+
+    assert(vm_page_aligned(addr));
+
+    entry = map->lookup_cache;
+
+    if ((entry != NULL) && (addr >= entry->start) && (addr < entry->end))
+        return entry;
+
+    node = rbtree_lookup_nearest(&map->entry_tree, addr,
+                                 vm_map_entry_cmp_lookup, RBTREE_RIGHT);
+
+    if (node != NULL) {
+        entry = rbtree_entry(node, struct vm_map_entry, tree_node);
+        assert(addr < entry->end);
+        map->lookup_cache = entry;
+        return entry;
+    }
+
+    return NULL;
+}
+
+static void
+vm_map_reset_find_cache(struct vm_map *map)
+{
+    map->find_cache = 0;
+    map->find_cache_threshold = VM_MAP_NO_FIND_CACHE;
+}
+
+static int
+vm_map_find_fixed(struct vm_map *map, struct vm_map_request *request)
+{
+    struct vm_map_entry *next;
+    unsigned long start;
+    size_t size;
+
+    start = request->start;
+    size = request->size;
+
+    if ((start < map->start) || (start + size) > map->end)
+        return ERROR_NOMEM;
+
+    next = vm_map_lookup_nearest(map, start);
+
+    if (next == NULL) {
+        if ((map->end - start) < size)
+            return ERROR_NOMEM;
+
+        request->next = NULL;
+        return 0;
+    }
+
+    if ((start >= next->start) || ((next->start - start) < size))
+        return ERROR_NOMEM;
+
+    request->next = next;
+    return 0;
+}
+
+static int
+vm_map_find_avail(struct vm_map *map, struct vm_map_request *request)
+{
+    struct vm_map_entry *next;
+    struct list *node;
+    unsigned long base, start;
+    size_t size, space;
+    int error;
+
+    /* If there is a hint, try there */
+    if (request->start != 0) {
+        error = vm_map_find_fixed(map, request);
+
+        if (!error)
+            return 0;
+    }
+
+    size = request->size;
+
+    if (size > map->find_cache_threshold)
+        base = map->find_cache;
+    else {
+        base = map->start;
+
+        /*
+         * Searching from the map start means the area which size is the
+         * threshold (or a smaller one) may be selected, making the threshold
+         * invalid. Reset it.
+         */
+        map->find_cache_threshold = 0;
+    }
+
+retry:
+    start = base;
+    next = vm_map_lookup_nearest(map, start);
+
+    for (;;) {
+        assert(start <= map->end);
+
+        /*
+         * The end of the map has been reached, and no space could be found.
+         * If the search didn't start at map->start, retry from there in case
+         * space is available below the previous start address.
+         */
+        if ((map->end - start) < size) {
+            if (base != map->start) {
+                base = map->start;
+                map->find_cache_threshold = 0;
+                goto retry;
+            }
+
+            return ERROR_NOMEM;
+        }
+
+        if (next == NULL)
+            space = map->end - start;
+        else if (start >= next->start)
+            space = 0;
+        else
+            space = next->start - start;
+
+        if (space >= size) {
+            map->find_cache = start + size;
+            request->start = start;
+            request->next = next;
+            return 0;
+        }
+
+        if (space > map->find_cache_threshold)
+            map->find_cache_threshold = space;
+
+        start = next->end;
+        node = list_next(&next->list_node);
+
+        if (list_end(&map->entry_list, node))
+            next = NULL;
+        else
+            next = list_entry(node, struct vm_map_entry, list_node);
+    }
+}
+
+static void
+vm_map_link(struct vm_map *map, struct vm_map_entry *entry,
+            struct vm_map_entry *prev, struct vm_map_entry *next)
+{
+    assert((prev == NULL) || (next == NULL));
+
+    if ((prev == NULL) && (next == NULL))
+        list_insert_tail(&map->entry_list, &entry->list_node);
+    else if (prev == NULL)
+        list_insert_before(&next->list_node, &entry->list_node);
+    else
+        list_insert_after(&prev->list_node, &entry->list_node);
+
+    rbtree_insert(&map->entry_tree, &entry->tree_node, vm_map_entry_cmp_insert);
+    map->nr_entries++;
+}
+
+static void
+vm_map_unlink(struct vm_map *map, struct vm_map_entry *entry)
+{
+    list_remove(&entry->list_node);
+    rbtree_remove(&map->entry_tree, &entry->tree_node);
+    map->nr_entries--;
+}
+
+/*
+ * Check mapping parameters, find a suitable area of virtual memory, and
+ * prepare the mapping request for that region.
+ */
+static int
+vm_map_prepare(struct vm_map *map, struct vm_object *object, unsigned long offset,
+               unsigned long start, size_t size, size_t align, int flags,
+               struct vm_map_request *request)
+{
+    int error;
+
+    request->object = object;
+    request->offset = offset;
+    request->start = start;
+    request->size = size;
+    request->align = align;
+    request->flags = flags;
+    vm_map_request_assert_valid(request);
+
+    if (flags & VM_MAP_FIXED)
+        error = vm_map_find_fixed(map, request);
+    else
+        error = vm_map_find_avail(map, request);
+
+    return error;
+}
+
+/*
+ * Convert a prepared mapping request into an entry in the given map.
+ *
+ * if entry is NULL, a map entry is allocated for the mapping.
+ */
+static int
+vm_map_insert(struct vm_map *map, struct vm_map_entry *entry,
+              const struct vm_map_request *request)
+{
+    /* TODO: merge/extend request with neighbors */
+
+    if (entry == NULL)
+        entry = vm_map_entry_create(map);
+
+    entry->start = request->start;
+    entry->end = request->start + request->size;
+    entry->object = request->object;
+    entry->offset = request->offset;
+    entry->flags = request->flags & VM_MAP_ENTRY_MASK;
+    vm_map_link(map, entry, NULL, request->next);
+    map->size += request->size;
+
+    if ((map == kernel_map) && (pmap_klimit < entry->end))
+        pmap_growkernel(entry->end);
+
+    return 0;
+}
+
+int
+vm_map_enter(struct vm_map *map, struct vm_object *object, uint64_t offset,
+             unsigned long *startp, size_t size, size_t align, int flags)
+{
+    struct vm_map_request request;
+    int error;
+
+    error = vm_map_prepare(map, object, offset, *startp, size, align, flags,
+                           &request);
+
+    if (error)
+        goto error_enter;
+
+    error = vm_map_insert(map, NULL, &request);
+
+    if (error)
+        goto error_enter;
+
+    *startp = request.start;
+    return 0;
+
+error_enter:
+    vm_map_reset_find_cache(map);
+    return error;
+}
+
+static void
+vm_map_split_entries(struct vm_map_entry *prev, struct vm_map_entry *next,
+                     unsigned long split_addr)
+{
+    unsigned long diff;
+
+    assert(prev->start < split_addr);
+    assert(split_addr < prev->end);
+
+    diff = split_addr - prev->start;
+    prev->end = split_addr;
+    next->start = split_addr;
+
+    if (next->object != NULL)
+        next->offset += diff;
+}
+
+static void
+vm_map_clip_start(struct vm_map *map, struct vm_map_entry *entry,
+                  unsigned long start)
+{
+    struct vm_map_entry *new_entry;
+
+    if (entry->start >= start)
+        return;
+
+    new_entry = vm_map_entry_create(map);
+    *new_entry = *entry;
+    vm_map_split_entries(new_entry, entry, start);
+    vm_map_link(map, new_entry, NULL, entry);
+}
+
+static void
+vm_map_clip_end(struct vm_map *map, struct vm_map_entry *entry,
+                unsigned long end)
+{
+    struct vm_map_entry *new_entry;
+
+    if (entry->end <= end)
+        return;
+
+    new_entry = vm_map_entry_create(map);
+    *new_entry = *entry;
+    vm_map_split_entries(entry, new_entry, end);
+    vm_map_link(map, new_entry, entry, NULL);
+}
+
+void
+vm_map_remove(struct vm_map *map, unsigned long start, unsigned long end)
+{
+    struct vm_map_entry *entry;
+    struct list *node;
+
+    assert(start >= map->start);
+    assert(end <= map->end);
+    assert(start < end);
+
+    entry = vm_map_lookup_nearest(map, start);
+
+    if (entry == NULL)
+        return;
+
+    vm_map_clip_start(map, entry, start);
+
+    while (!list_end(&map->entry_list, &entry->list_node)
+           && (entry->start < end)) {
+        vm_map_clip_end(map, entry, end);
+        map->size -= entry->end - entry->start;
+        node = list_next(&entry->list_node);
+        vm_map_unlink(map, entry);
+        vm_map_entry_destroy(entry, map);
+        entry = list_entry(node, struct vm_map_entry, list_node);
+    }
+
+    vm_map_reset_find_cache(map);
+}
+
+void
+vm_map_init(struct vm_map *map, struct pmap *pmap, unsigned long start,
+            unsigned long end)
+{
+    assert(vm_page_aligned(start));
+    assert(vm_page_aligned(end));
+
+    list_init(&map->entry_list);
+    rbtree_init(&map->entry_tree);
+    map->nr_entries = 0;
+    map->start = start;
+    map->end = end;
+    map->size = 0;
+    map->lookup_cache = NULL;
+    vm_map_reset_find_cache(map);
+    map->pmap = pmap;
+}
+
+void __init
+vm_map_bootstrap(void)
+{
+    struct vm_map_request request;
+    unsigned long start, end;
+    int error, flags;
+
+    vm_map_init(kernel_map, kernel_pmap, VM_MIN_KERNEL_ADDRESS,
+                VM_MAX_KERNEL_ADDRESS);
+
+    /*
+     * Create the initial kernel mapping. This reserves memory for at least
+     * the kernel image and the physical page table.
+     */
+    vm_kmem_boot_space(&start, &end);
+    flags = VM_MAP_PROT_ALL | VM_MAP_MAX_PROT_ALL | VM_MAP_INHERIT_NONE
+            | VM_MAP_ADVISE_NORMAL | VM_MAP_NOMERGE | VM_MAP_FIXED;
+    error = vm_map_prepare(kernel_map, NULL, 0, start, end - start, 0, flags,
+                           &request);
+
+    if (error)
+        panic("vm_map: can't map initial kernel mapping");
+
+    error = vm_map_insert(kernel_map, &vm_map_kernel_entry, &request);
+    assert(!error);
+
+    /* Create the kentry mapping */
+    flags = VM_MAP_PROT_ALL | VM_MAP_MAX_PROT_ALL | VM_MAP_INHERIT_NONE
+            | VM_MAP_ADVISE_NORMAL | VM_MAP_NOMERGE;
+    error = vm_map_prepare(kernel_map, NULL, 0, 0, VM_MAP_KENTRY_SIZE, 0,
+                           flags, &request);
+
+    if (error)
+        panic("vm_map: kentry mapping setup failed");
+
+    error = vm_map_insert(kernel_map, &vm_map_kentry_entry, &request);
+    assert(!error);
+
+    vm_map_kentry_free = vm_map_kentry_entry.start;
+
+    flags = KMEM_CACHE_NOCPUPOOL | KMEM_CACHE_NOOFFSLAB | KMEM_CACHE_NORECLAIM;
+    kmem_cache_init(&vm_map_kentry_cache, "vm_map_kentry",
+                    sizeof(struct vm_map_entry), 0, NULL,
+                    vm_map_kentry_pagealloc, NULL, flags);
+}
+
+void __init
+vm_map_setup(void)
+{
+    kmem_cache_init(&vm_map_entry_cache, "vm_map_entry",
+                    sizeof(struct vm_map_entry), 0, NULL, NULL, NULL, 0);
+}
+
+void
+vm_map_info(struct vm_map *map)
+{
+    struct vm_map_entry *entry;
+    const char *type, *name;
+
+    if (map == kernel_map)
+        name = "kernel map";
+    else
+        name = "map";
+
+    printk("vm_map: %s: %08lx-%08lx\n", name, map->start, map->end);
+    printk("vm_map:  start    end        size    offset   flags    type\n");
+
+    list_for_each_entry(&map->entry_list, entry, list_node) {
+        if (entry->object == NULL)
+            type = "null";
+        else
+            type = "object";
+
+        printk("vm_map: %08lx %08lx %8luk %08llx %08x %s\n", entry->start,
+               entry->end, (entry->end - entry->start) >> 10, entry->offset,
+               entry->flags, type);
+    }
+
+    printk("vm_map: total: %uk\n", map->size >> 10);
+}
diff --git a/vm/vm_map.h b/vm/vm_map.h
new file mode 100644
index 00000000..7231b6a3
--- /dev/null
+++ b/vm/vm_map.h
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Virtual memory map management.
+ */
+
+#ifndef _VM_VM_MAP_H
+#define _VM_VM_MAP_H
+
+#include <lib/list.h>
+#include <lib/rbtree.h>
+#include <lib/stdint.h>
+#include <machine/pmap.h>
+
+/*
+ * Mapping flags and masks.
+ *
+ * All these flags can be used when creating a mapping. Most of them are
+ * also used as map entry flags.
+ */
+#define VM_MAP_PROT_READ        0x00001
+#define VM_MAP_PROT_WRITE       0x00002
+#define VM_MAP_PROT_EXEC        0x00004
+#define VM_MAP_PROT_ALL         (VM_MAP_PROT_READ       \
+                                 | VM_MAP_PROT_WRITE    \
+                                 | VM_MAP_PROT_EXEC)
+#define VM_MAP_PROT_MASK        VM_MAP_PROT_ALL
+
+#define VM_MAP_MAX_PROT_READ    (VM_MAP_PROT_READ << 4)
+#define VM_MAP_MAX_PROT_WRITE   (VM_MAP_PROT_WRITE << 4)
+#define VM_MAP_MAX_PROT_EXEC    (VM_MAP_PROT_EXEC << 4)
+#define VM_MAP_MAX_PROT_ALL     (VM_MAP_MAX_PROT_READ       \
+                                 | VM_MAP_MAX_PROT_WRITE    \
+                                 | VM_MAP_MAX_PROT_EXEC)
+#define VM_MAP_MAX_PROT_MASK    VM_MAP_MAX_PROT_ALL
+
+#define VM_MAP_INHERIT_SHARE    0x00100
+#define VM_MAP_INHERIT_COPY     0x00200
+#define VM_MAP_INHERIT_NONE     0x00400
+#define VM_MAP_INHERIT_MASK     (VM_MAP_INHERIT_SHARE   \
+                                 | VM_MAP_INHERIT_COPY  \
+                                 | VM_MAP_INHERIT_NONE)
+
+#define VM_MAP_ADVISE_NORMAL    0x01000
+#define VM_MAP_ADVISE_RAND      0x02000
+#define VM_MAP_ADVISE_SEQ       0x04000
+#define VM_MAP_ADVISE_MASK      (VM_MAP_ADVISE_NORMAL   \
+                                 | VM_MAP_ADVISE_RAND   \
+                                 | VM_MAP_ADVISE_SEQ)
+
+#define VM_MAP_NOMERGE          0x10000
+#define VM_MAP_FIXED            0x20000 /* Not an entry flag */
+
+/*
+ * Flags that can be used as map entry flags.
+ */
+#define VM_MAP_ENTRY_MASK       (VM_MAP_PROT_MASK       \
+                                 | VM_MAP_MAX_PROT_MASK \
+                                 | VM_MAP_INHERIT_MASK  \
+                                 | VM_MAP_ADVISE_MASK   \
+                                 | VM_MAP_NOMERGE)
+
+/*
+ * Memory range descriptor.
+ */
+struct vm_map_entry {
+    struct list list_node;
+    struct rbtree_node tree_node;
+    unsigned long start;
+    unsigned long end;
+    struct vm_object *object;
+    uint64_t offset;
+    int flags;
+};
+
+/*
+ * Memory map.
+ */
+struct vm_map {
+    struct list entry_list;
+    struct rbtree entry_tree;
+    unsigned int nr_entries;
+    unsigned long start;
+    unsigned long end;
+    size_t size;
+    struct vm_map_entry *lookup_cache;
+    unsigned long find_cache;
+    size_t find_cache_threshold;
+    struct pmap *pmap;
+};
+
+/*
+ * Create a virtual mapping.
+ */
+int vm_map_enter(struct vm_map *map, struct vm_object *object, uint64_t offset,
+                 unsigned long *startp, size_t size, size_t align, int flags);
+
+/*
+ * Remove mappings from start to end.
+ */
+void vm_map_remove(struct vm_map *map, unsigned long start, unsigned long end);
+
+/*
+ * Initialize a VM map.
+ */
+void vm_map_init(struct vm_map *map, struct pmap *pmap, unsigned long start,
+                 unsigned long end);
+
+/*
+ * Early initialization of the vm_map module.
+ *
+ * This function creates the kernel map and the kentry cache, making it
+ * possible to map kernel memory.
+ */
+void vm_map_bootstrap(void);
+
+/*
+ * Set up the vm_map module.
+ */
+void vm_map_setup(void);
+
+/*
+ * Display information about a memory map.
+ */
+void vm_map_info(struct vm_map *map);
+
+#endif /* _VM_VM_MAP_H */
diff --git a/vm/vm_page.h b/vm/vm_page.h
new file mode 100644
index 00000000..641cbccf
--- /dev/null
+++ b/vm/vm_page.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2010, 2011 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VM_VM_PAGE_H
+#define _VM_VM_PAGE_H
+
+#include <lib/list.h>
+#include <lib/macros.h>
+#include <kern/param.h>
+#include <kern/types.h>
+
+/*
+ * Address/page conversion and rounding macros (not inline functions to
+ * be easily usable on both virtual and physical addresses, which may not
+ * have the same type size).
+ */
+#define vm_page_atop(addr)      ((addr) >> PAGE_SHIFT)
+#define vm_page_ptoa(page)      ((page) << PAGE_SHIFT)
+#define vm_page_trunc(addr)     P2ALIGN(addr, PAGE_SIZE)
+#define vm_page_round(addr)     P2ROUND(addr, PAGE_SIZE)
+#define vm_page_aligned(addr)   P2ALIGNED(addr, PAGE_SIZE)
+
+/*
+ * Physical page descriptor.
+ */
+struct vm_page {
+    struct list node;
+    unsigned short seg_index;
+    unsigned short order;
+    vm_phys_t phys_addr;
+};
+
+static inline vm_phys_t
+vm_page_to_pa(const struct vm_page *page)
+{
+    return page->phys_addr;
+}
+
+#endif /* _VM_VM_PAGE_H */
diff --git a/vm/vm_phys.c b/vm/vm_phys.c
new file mode 100644
index 00000000..3e8a70f0
--- /dev/null
+++ b/vm/vm_phys.c
@@ -0,0 +1,625 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * This implementation uses the binary buddy system to manage its heap.
+ * Descriptions of the buddy system can be found in the following works :
+ * - "UNIX Internals: The New Frontiers", by Uresh Vahalia.
+ * - "Dynamic Storage Allocation: A Survey and Critical Review",
+ *    by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles.
+ *
+ * In addition, this allocator uses per-cpu pools of pages for order 0
+ * (i.e. single page) allocations. These pools act as caches (but are named
+ * differently to avoid confusion with CPU caches) that reduce contention on
+ * multiprocessor systems. When a pool is empty and cannot provide a page,
+ * it is filled by transferring multiple pages from the backend buddy system.
+ * The symmetric case is handled likewise.
+ */
+
+#include <kern/init.h>
+#include <kern/panic.h>
+#include <kern/param.h>
+#include <kern/printk.h>
+#include <kern/types.h>
+#include <lib/assert.h>
+#include <lib/list.h>
+#include <lib/macros.h>
+#include <lib/sprintf.h>
+#include <lib/stddef.h>
+#include <lib/string.h>
+#include <machine/cpu.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_page.h>
+#include <vm/vm_phys.h>
+
+/*
+ * Number of free block lists per segment.
+ */
+#define VM_PHYS_NR_FREE_LISTS 11
+
+/*
+ * The size of a CPU pool is computed by dividing the number of pages in its
+ * containing segment by this value.
+ */
+#define VM_PHYS_CPU_POOL_RATIO 1024
+
+/*
+ * Maximum number of pages in a CPU pool.
+ */
+#define VM_PHYS_CPU_POOL_MAX_SIZE 128
+
+/*
+ * The transfer size of a CPU pool is computed by dividing the pool size by
+ * this value.
+ */
+#define VM_PHYS_CPU_POOL_TRANSFER_RATIO 2
+
+/*
+ * Per-processor cache of pages.
+ */
+struct vm_phys_cpu_pool {
+    /* struct mutex mutex; */
+    int size;
+    int transfer_size;
+    int nr_pages;
+    struct list pages;
+};
+
+/*
+ * Special order value.
+ *
+ * When a page is free, its order is the index of its free list.
+ */
+#define VM_PHYS_ORDER_ALLOCATED VM_PHYS_NR_FREE_LISTS
+
+/*
+ * Doubly-linked list of free blocks.
+ */
+struct vm_phys_free_list {
+    unsigned long size;
+    struct list blocks;
+};
+
+/*
+ * Segment name buffer size.
+ */
+#define VM_PHYS_NAME_SIZE 16
+
+/*
+ * Segment of contiguous memory.
+ */
+struct vm_phys_seg {
+    struct vm_phys_cpu_pool cpu_pools[MAX_CPUS];
+
+    struct list node;
+    vm_phys_t start;
+    vm_phys_t end;
+    struct vm_page *pages;
+    struct vm_page *pages_end;
+    /* struct mutex mutex; */
+    struct vm_phys_free_list free_lists[VM_PHYS_NR_FREE_LISTS];
+    unsigned long nr_free_pages;
+    char name[VM_PHYS_NAME_SIZE];
+};
+
+/*
+ * Bootstrap information about a segment.
+ */
+struct vm_phys_boot_seg {
+    vm_phys_t avail_start;
+    vm_phys_t avail_end;
+};
+
+int vm_phys_ready;
+
+/*
+ * Segment lists, ordered by priority (higher priority lists have lower
+ * numerical priorities).
+ */
+static struct list vm_phys_seg_lists[VM_NR_PHYS_SEGLIST];
+
+/*
+ * Segment table.
+ */
+static struct vm_phys_seg vm_phys_segs[VM_MAX_PHYS_SEG];
+
+/*
+ * Bootstrap segment table.
+ */
+static struct vm_phys_boot_seg vm_phys_boot_segs[VM_MAX_PHYS_SEG] __initdata;
+
+/*
+ * Number of loaded segments.
+ */
+static unsigned int vm_phys_segs_size;
+
+static int vm_phys_load_initialized __initdata = 0;
+
+static void __init
+vm_phys_init_page(struct vm_page *page, unsigned short seg_index,
+                  unsigned short order, vm_phys_t pa)
+{
+    page->seg_index = seg_index;
+    page->order = order;
+    page->phys_addr = pa;
+}
+
+static void __init
+vm_phys_free_list_init(struct vm_phys_free_list *free_list)
+{
+    free_list->size = 0;
+    list_init(&free_list->blocks);
+}
+
+static inline void
+vm_phys_free_list_insert(struct vm_phys_free_list *free_list,
+                         struct vm_page *page)
+{
+    assert(page->order == VM_PHYS_ORDER_ALLOCATED);
+
+    free_list->size++;
+    list_insert(&free_list->blocks, &page->node);
+}
+
+static inline void
+vm_phys_free_list_remove(struct vm_phys_free_list *free_list,
+                         struct vm_page *page)
+{
+    assert(free_list->size != 0);
+    assert(!list_empty(&free_list->blocks));
+    assert(page->order < VM_PHYS_NR_FREE_LISTS);
+
+    free_list->size--;
+    list_remove(&page->node);
+}
+
+static struct vm_page *
+vm_phys_seg_alloc_from_buddy(struct vm_phys_seg *seg, unsigned int order)
+{
+    struct vm_phys_free_list *free_list;
+    struct vm_page *page, *buddy;
+    unsigned int i;
+
+    assert(order < VM_PHYS_NR_FREE_LISTS);
+
+    for (i = order; i < VM_PHYS_NR_FREE_LISTS; i++) {
+        free_list = &seg->free_lists[i];
+
+        if (free_list->size != 0)
+            break;
+    }
+
+    if (i == VM_PHYS_NR_FREE_LISTS)
+        return NULL;
+
+    page = list_first_entry(&free_list->blocks, struct vm_page, node);
+    vm_phys_free_list_remove(free_list, page);
+    page->order = VM_PHYS_ORDER_ALLOCATED;
+
+    while (i > order) {
+        i--;
+        buddy = &page[1 << i];
+        vm_phys_free_list_insert(&seg->free_lists[i], buddy);
+        buddy->order = i;
+    }
+
+    seg->nr_free_pages -= (1 << order);
+    return page;
+}
+
+static void
+vm_phys_seg_free_to_buddy(struct vm_phys_seg *seg, struct vm_page *page,
+                          unsigned int order)
+{
+    struct vm_page *buddy;
+    vm_phys_t pa, buddy_pa;
+    unsigned int nr_pages;
+
+    assert(page >= seg->pages);
+    assert(page < seg->pages_end);
+    assert(page->order == VM_PHYS_ORDER_ALLOCATED);
+    assert(order < VM_PHYS_NR_FREE_LISTS);
+
+    nr_pages = (1 << order);
+    pa = page->phys_addr;
+
+    while (order < (VM_PHYS_NR_FREE_LISTS - 1)) {
+        buddy_pa = pa ^ vm_page_ptoa(1 << order);
+
+        if ((buddy_pa < seg->start) || (buddy_pa >= seg->end))
+            break;
+
+        buddy = &seg->pages[vm_page_atop(buddy_pa - seg->start)];
+
+        if (buddy->order != order)
+            break;
+
+        vm_phys_free_list_remove(&seg->free_lists[order], buddy);
+        buddy->order = VM_PHYS_ORDER_ALLOCATED;
+        order++;
+        pa &= -vm_page_ptoa(1 << order);
+        page = &seg->pages[vm_page_atop(pa - seg->start)];
+    }
+
+    vm_phys_free_list_insert(&seg->free_lists[order], page);
+    page->order = order;
+    seg->nr_free_pages += nr_pages;
+}
+
+static void __init
+vm_phys_cpu_pool_init(struct vm_phys_cpu_pool *cpu_pool, int size)
+{
+    cpu_pool->size = size;
+    cpu_pool->transfer_size = (size + VM_PHYS_CPU_POOL_TRANSFER_RATIO - 1)
+                              / VM_PHYS_CPU_POOL_TRANSFER_RATIO;
+    cpu_pool->nr_pages = 0;
+    list_init(&cpu_pool->pages);
+}
+
+static inline struct vm_phys_cpu_pool *
+vm_phys_cpu_pool_get(struct vm_phys_seg *seg)
+{
+    return &seg->cpu_pools[cpu_id()];
+}
+
+static inline struct vm_page *
+vm_phys_cpu_pool_pop(struct vm_phys_cpu_pool *cpu_pool)
+{
+    struct vm_page *page;
+
+    assert(cpu_pool->nr_pages != 0);
+    cpu_pool->nr_pages--;
+    page = list_first_entry(&cpu_pool->pages, struct vm_page, node);
+    list_remove(&page->node);
+    return page;
+}
+
+static inline void
+vm_phys_cpu_pool_push(struct vm_phys_cpu_pool *cpu_pool, struct vm_page *page)
+{
+    assert(cpu_pool->nr_pages < cpu_pool->size);
+    cpu_pool->nr_pages++;
+    list_insert(&cpu_pool->pages, &page->node);
+}
+
+static int
+vm_phys_cpu_pool_fill(struct vm_phys_cpu_pool *cpu_pool,
+                      struct vm_phys_seg *seg)
+{
+    struct vm_page *page;
+    int i;
+
+    assert(cpu_pool->nr_pages == 0);
+
+    /* mutex_lock(&seg->mutex); */
+
+    for (i = 0; i < cpu_pool->transfer_size; i++) {
+        page = vm_phys_seg_alloc_from_buddy(seg, 0);
+
+        if (page == NULL)
+            break;
+
+        vm_phys_cpu_pool_push(cpu_pool, page);
+    }
+
+    /* mutex_unlock(&seg->mutex); */
+
+    return i;
+}
+
+static void
+vm_phys_cpu_pool_drain(struct vm_phys_cpu_pool *cpu_pool,
+                       struct vm_phys_seg *seg)
+{
+    struct vm_page *page;
+    int i;
+
+    assert(cpu_pool->nr_pages == cpu_pool->size);
+
+    /* mutex_lock(&seg->mutex); */
+
+    for (i = cpu_pool->transfer_size; i > 0; i--) {
+        page = vm_phys_cpu_pool_pop(cpu_pool);
+        vm_phys_seg_free_to_buddy(seg, page, 0);
+    }
+
+    /* mutex_unlock(&seg->mutex); */
+}
+
+static inline vm_phys_t __init
+vm_phys_seg_size(struct vm_phys_seg *seg)
+{
+    return seg->end - seg->start;
+}
+
+static int __init
+vm_phys_seg_compute_pool_size(struct vm_phys_seg *seg)
+{
+    vm_phys_t size;
+
+    size = vm_page_atop(vm_phys_seg_size(seg)) / VM_PHYS_CPU_POOL_RATIO;
+
+    if (size == 0)
+        size = 1;
+    else if (size > VM_PHYS_CPU_POOL_MAX_SIZE)
+        size = VM_PHYS_CPU_POOL_MAX_SIZE;
+
+    return size;
+}
+
+static void __init
+vm_phys_seg_init(struct vm_phys_seg *seg, struct vm_page *pages)
+{
+    vm_phys_t pa;
+    int pool_size;
+    unsigned int i;
+
+    pool_size = vm_phys_seg_compute_pool_size(seg);
+
+    for (i = 0; i < ARRAY_SIZE(seg->cpu_pools); i++)
+        vm_phys_cpu_pool_init(&seg->cpu_pools[i], pool_size);
+
+    seg->pages = pages;
+    seg->pages_end = pages + vm_page_atop(vm_phys_seg_size(seg));
+    /* mutex_init(&seg->mutex); */
+
+    for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++)
+        vm_phys_free_list_init(&seg->free_lists[i]);
+
+    seg->nr_free_pages = 0;
+    i = seg - vm_phys_segs;
+
+    for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
+        vm_phys_init_page(&pages[vm_page_atop(pa - seg->start)], i,
+                          VM_PHYS_ORDER_ALLOCATED, pa);
+}
+
+static struct vm_page *
+vm_phys_seg_alloc(struct vm_phys_seg *seg, unsigned int order)
+{
+    struct vm_phys_cpu_pool *cpu_pool;
+    struct vm_page *page;
+    int filled;
+
+    assert(order < VM_PHYS_NR_FREE_LISTS);
+
+    if (order == 0) {
+        cpu_pool = vm_phys_cpu_pool_get(seg);
+
+        /* mutex_lock(&cpu_pool->mutex); */
+
+        if (cpu_pool->nr_pages == 0) {
+            filled = vm_phys_cpu_pool_fill(cpu_pool, seg);
+
+            if (!filled) {
+                /* mutex_unlock(&cpu_pool->mutex); */
+                return NULL;
+            }
+        }
+
+        page = vm_phys_cpu_pool_pop(cpu_pool);
+        /* mutex_unlock(&cpu_pool->mutex); */
+    } else {
+        /* mutex_lock(&seg->mutex); */
+        page = vm_phys_seg_alloc_from_buddy(seg, order);
+        /* mutex_unlock(&seg->mutex); */
+    }
+
+    return page;
+}
+
+static void
+vm_phys_seg_free(struct vm_phys_seg *seg, struct vm_page *page,
+                 unsigned int order)
+{
+    struct vm_phys_cpu_pool *cpu_pool;
+
+    assert(order < VM_PHYS_NR_FREE_LISTS);
+
+    if (order == 0) {
+        cpu_pool = vm_phys_cpu_pool_get(seg);
+
+        /* mutex_lock(&cpu_pool->mutex); */
+
+        if (cpu_pool->nr_pages == cpu_pool->size)
+            vm_phys_cpu_pool_drain(cpu_pool, seg);
+
+        vm_phys_cpu_pool_push(cpu_pool, page);
+        /* mutex_unlock(&cpu_pool->mutex); */
+    } else {
+        /* mutex_lock(&seg->mutex); */
+        vm_phys_seg_free_to_buddy(seg, page, order);
+        /* mutex_unlock(&seg->mutex); */
+    }
+}
+
+void __init
+vm_phys_load(const char *name, vm_phys_t start, vm_phys_t end,
+             vm_phys_t avail_start, vm_phys_t avail_end,
+             unsigned int seglist_prio)
+{
+    struct vm_phys_boot_seg *boot_seg;
+    struct vm_phys_seg *seg;
+    struct list *seg_list;
+    unsigned int i;
+
+    assert(name != NULL);
+    assert(start < end);
+    assert(seglist_prio < ARRAY_SIZE(vm_phys_seg_lists));
+
+    if (!vm_phys_load_initialized) {
+        for (i = 0; i < ARRAY_SIZE(vm_phys_seg_lists); i++)
+            list_init(&vm_phys_seg_lists[i]);
+
+        vm_phys_segs_size = 0;
+        vm_phys_load_initialized = 1;
+    }
+
+    if (vm_phys_segs_size >= ARRAY_SIZE(vm_phys_segs))
+        panic("vm_phys: too many physical segments");
+
+    seg_list = &vm_phys_seg_lists[seglist_prio];
+    seg = &vm_phys_segs[vm_phys_segs_size];
+    boot_seg = &vm_phys_boot_segs[vm_phys_segs_size];
+
+    list_insert_tail(seg_list, &seg->node);
+    seg->start = start;
+    seg->end = end;
+    strcpy(seg->name, name); /* TODO: strlcpy */
+    boot_seg->avail_start = avail_start;
+    boot_seg->avail_end = avail_end;
+
+    vm_phys_segs_size++;
+}
+
+vm_phys_t __init
+vm_phys_bootalloc(void)
+{
+    struct vm_phys_boot_seg *boot_seg;
+    struct vm_phys_seg *seg;
+    struct list *seg_list;
+    vm_phys_t pa;
+
+    for (seg_list = &vm_phys_seg_lists[ARRAY_SIZE(vm_phys_seg_lists) - 1];
+         seg_list >= vm_phys_seg_lists;
+         seg_list--)
+        list_for_each_entry(seg_list, seg, node) {
+            boot_seg = &vm_phys_boot_segs[seg - vm_phys_segs];
+
+            if ((boot_seg->avail_end - boot_seg->avail_start) > 1) {
+                pa = boot_seg->avail_start;
+                boot_seg->avail_start += PAGE_SIZE;
+                return pa;
+            }
+        }
+
+    panic("vm_phys: no physical memory available");
+}
+
+void __init
+vm_phys_setup(void)
+{
+    struct vm_phys_boot_seg *boot_seg;
+    struct vm_phys_seg *seg;
+    struct vm_page *map, *start, *end;
+    size_t pages, map_size;
+    unsigned int i;
+
+    /*
+     * Compute the memory map size.
+     */
+    pages = 0;
+
+    for (i = 0; i < vm_phys_segs_size; i++)
+        pages += vm_page_atop(vm_phys_seg_size(&vm_phys_segs[i]));
+
+    map_size = P2ROUND(pages * sizeof(struct vm_page), PAGE_SIZE);
+    printk("vm_phys: page table size: %u entries (%uk)\n", pages,
+           map_size >> 10);
+    map = (struct vm_page *)vm_kmem_bootalloc(map_size);
+
+    /*
+     * Initialize the segments, associating them to the memory map. When
+     * the segments are initialized, all their pages are set allocated,
+     * with a block size of one (order 0). They are then released, which
+     * populates the free lists.
+     */
+    for (i = 0; i < vm_phys_segs_size; i++) {
+        seg = &vm_phys_segs[i];
+        boot_seg = &vm_phys_boot_segs[i];
+        vm_phys_seg_init(seg, map);
+
+        start = seg->pages + vm_page_atop(boot_seg->avail_start - seg->start);
+        end = seg->pages + vm_page_atop(boot_seg->avail_end - seg->start);
+
+        while (start < end) {
+            vm_phys_seg_free_to_buddy(seg, start, 0);
+            start++;
+        }
+
+        map += vm_page_atop(vm_phys_seg_size(seg));
+    }
+
+    vm_phys_ready = 1;
+}
+
+void __init
+vm_phys_manage(struct vm_page *page)
+{
+    assert(page->seg_index < ARRAY_SIZE(vm_phys_segs));
+
+    vm_phys_seg_free_to_buddy(&vm_phys_segs[page->seg_index], page, 0);
+}
+
+struct vm_page *
+vm_phys_lookup_page(vm_phys_t pa)
+{
+    struct vm_phys_seg *seg;
+    unsigned int i;
+
+    for (i = 0; i < vm_phys_segs_size; i++) {
+        seg = &vm_phys_segs[i];
+
+        if ((pa >= seg->start) && (pa < seg->end))
+            return &seg->pages[vm_page_atop(pa - seg->start)];
+    }
+
+    return NULL;
+}
+
+struct vm_page *
+vm_phys_alloc(unsigned int order)
+{
+    struct vm_phys_seg *seg;
+    struct list *seg_list;
+    struct vm_page *page;
+
+    for (seg_list = &vm_phys_seg_lists[ARRAY_SIZE(vm_phys_seg_lists) - 1];
+         seg_list >= vm_phys_seg_lists;
+         seg_list--)
+        list_for_each_entry(seg_list, seg, node) {
+            page = vm_phys_seg_alloc(seg, order);
+
+            if (page != NULL)
+                return page;
+        }
+
+    return NULL;
+}
+
+void
+vm_phys_free(struct vm_page *page, unsigned int order)
+{
+    assert(page->seg_index < ARRAY_SIZE(vm_phys_segs));
+
+    vm_phys_seg_free(&vm_phys_segs[page->seg_index], page, order);
+}
+
+void
+vm_phys_info(void)
+{
+    struct vm_phys_seg *seg;
+    unsigned long pages;
+    unsigned int i;
+
+    for (i = 0; i < vm_phys_segs_size; i++) {
+        seg = &vm_phys_segs[i];
+        pages = (unsigned long)(seg->pages_end - seg->pages);
+        printk("vm_phys: %s: pages: %lu (%luM), free: %lu (%luM)\n", seg->name,
+               pages, pages >> (20 - PAGE_SHIFT), seg->nr_free_pages,
+               seg->nr_free_pages >> (20 - PAGE_SHIFT));
+    }
+}
diff --git a/vm/vm_phys.h b/vm/vm_phys.h
new file mode 100644
index 00000000..a5a7d32f
--- /dev/null
+++ b/vm/vm_phys.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2010, 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ *
+ * Physical page allocator.
+ */
+
+#ifndef _VM_VM_PHYS_H
+#define _VM_VM_PHYS_H
+
+#include <kern/types.h>
+#include <vm/vm_page.h>
+
+/*
+ * True if the vm_phys module is completely initialized, false otherwise
+ * (in which case only vm_phys_bootalloc() can be used for allocations).
+ */
+extern int vm_phys_ready;
+
+/*
+ * Load physical memory into the vm_phys module at boot time.
+ *
+ * The avail_start and avail_end parameters are used to maintain a simple
+ * heap for bootstrap allocations.
+ */
+void vm_phys_load(const char *name, vm_phys_t start, vm_phys_t end,
+                  vm_phys_t avail_start, vm_phys_t avail_end,
+                  unsigned int seglist_prio);
+
+/*
+ * Allocate one physical page.
+ *
+ * This function is used to allocate physical memory at boot time, before the
+ * vm_phys module is ready, but after the physical memory has been loaded.
+ */
+vm_phys_t vm_phys_bootalloc(void);
+
+/*
+ * Set up the vm_phys module.
+ *
+ * Once this function returns, the vm_phys module is ready, and normal
+ * allocation functions can be used.
+ */
+void vm_phys_setup(void);
+
+/*
+ * Make the given page managed by the vm_phys module.
+ *
+ * If additional memory can be made usable after the VM system is initialized,
+ * it should be reported through this function.
+ */
+void vm_phys_manage(struct vm_page *page);
+
+/*
+ * Return the page descriptor for the given physical address.
+ */
+struct vm_page * vm_phys_lookup_page(vm_phys_t pa);
+
+/*
+ * Allocate a block of 2^order physical pages.
+ */
+struct vm_page * vm_phys_alloc(unsigned int order);
+
+/*
+ * Release a block of 2^order physical pages.
+ */
+void vm_phys_free(struct vm_page *page, unsigned int order);
+
+/*
+ * Display internal information about the module.
+ */
+void vm_phys_info(void);
+
+#endif /* _VM_VM_PHYS_H */
diff --git a/vm/vm_prot.h b/vm/vm_prot.h
new file mode 100644
index 00000000..27738c72
--- /dev/null
+++ b/vm/vm_prot.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2010, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VM_VM_PROT_H
+#define _VM_VM_PROT_H
+
+/*
+ * Protection flags.
+ */
+#define VM_PROT_NONE    0
+#define VM_PROT_READ    1
+#define VM_PROT_WRITE   2
+#define VM_PROT_EXECUTE 4
+#define VM_PROT_DEFAULT (VM_PROT_READ | VM_PROT_WRITE)
+#define VM_PROT_ALL     (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)
+
+#endif /* _VM_VM_PROT_H */
diff --git a/vm/vm_setup.c b/vm/vm_setup.c
new file mode 100644
index 00000000..33b07257
--- /dev/null
+++ b/vm/vm_setup.c
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2011, 2012 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <kern/kmem.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kmem.h>
+#include <vm/vm_phys.h>
+#include <vm/vm_setup.h>
+
+void
+vm_setup(void)
+{
+    vm_kmem_setup();
+    vm_phys_setup();
+    kmem_bootstrap();
+    vm_map_bootstrap();
+    kmem_setup();
+    vm_map_setup();
+}
diff --git a/vm/vm_setup.h b/vm/vm_setup.h
new file mode 100644
index 00000000..f52ddb24
--- /dev/null
+++ b/vm/vm_setup.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2011 Richard Braun.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _VM_VM_SETUP_H
+#define _VM_VM_SETUP_H
+
+/*
+ * Set up the VM system.
+ *
+ * This function also initializes the kmem (kernel memory) allocator.
+ */
+void vm_setup(void);
+
+#endif /* _VM_VM_SETUP_H */
author	Richard Braun <rbraun@sceen.net>	2012-09-30 19:31:58 +0200
committer	Richard Braun <rbraun@sceen.net>	2012-09-30 19:31:58 +0200
commit	69504fc63720b4bf2677d6074285b82256bc9b83 (patch)
tree	47fad139526df60554e3fd26a7b8b1577f29d2d0 /vm