/*
* Copyright (c) 2010, 2011, 2012 Richard Braun.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*
*
* This implementation uses the binary buddy system to manage its heap.
* Descriptions of the buddy system can be found in the following works :
* - "UNIX Internals: The New Frontiers", by Uresh Vahalia.
* - "Dynamic Storage Allocation: A Survey and Critical Review",
* by Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles.
*
* In addition, this allocator uses per-CPU pools of pages for order 0
* (i.e. single page) allocations. These pools act as caches (but are named
* differently to avoid confusion with CPU caches) that reduce contention on
* multiprocessor systems. When a pool is empty and cannot provide a page,
* it is filled by transferring multiple pages from the backend buddy system.
* The symmetric case is handled likewise.
*/
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
/*
* Number of free block lists per segment.
*/
#define VM_PHYS_NR_FREE_LISTS 11
/*
* The size of a CPU pool is computed by dividing the number of pages in its
* containing segment by this value.
*/
#define VM_PHYS_CPU_POOL_RATIO 1024
/*
* Maximum number of pages in a CPU pool.
*/
#define VM_PHYS_CPU_POOL_MAX_SIZE 128
/*
* The transfer size of a CPU pool is computed by dividing the pool size by
* this value.
*/
#define VM_PHYS_CPU_POOL_TRANSFER_RATIO 2
/*
* Per-processor cache of pages.
*/
struct vm_phys_cpu_pool {
struct mutex lock;
int size;
int transfer_size;
int nr_pages;
struct list pages;
};
/*
* Special order value.
*
* When a page is free, its order is the index of its free list.
*/
#define VM_PHYS_ORDER_ALLOCATED VM_PHYS_NR_FREE_LISTS
/*
* Doubly-linked list of free blocks.
*/
struct vm_phys_free_list {
unsigned long size;
struct list blocks;
};
/*
* Segment name buffer size.
*/
#define VM_PHYS_NAME_SIZE 16
/*
* Segment of contiguous memory.
*/
struct vm_phys_seg {
struct vm_phys_cpu_pool cpu_pools[MAX_CPUS];
struct list node;
phys_addr_t start;
phys_addr_t end;
struct vm_page *pages;
struct vm_page *pages_end;
struct mutex lock;
struct vm_phys_free_list free_lists[VM_PHYS_NR_FREE_LISTS];
unsigned long nr_free_pages;
char name[VM_PHYS_NAME_SIZE];
};
/*
* Bootstrap information about a segment.
*/
struct vm_phys_boot_seg {
phys_addr_t avail_start;
phys_addr_t avail_end;
};
int vm_phys_ready;
/*
* Segment lists, ordered by priority.
*/
static struct list vm_phys_seg_lists[VM_NR_PHYS_SEGLIST];
/*
* Segment table.
*/
static struct vm_phys_seg vm_phys_segs[VM_MAX_PHYS_SEG];
/*
* Bootstrap segment table.
*/
static struct vm_phys_boot_seg vm_phys_boot_segs[VM_MAX_PHYS_SEG] __initdata;
/*
* Number of loaded segments.
*/
static unsigned int vm_phys_segs_size;
static int vm_phys_load_initialized __initdata = 0;
static void __init
vm_phys_init_page(struct vm_page *page, unsigned short seg_index,
unsigned short order, phys_addr_t pa)
{
page->seg_index = seg_index;
page->order = order;
page->object = NULL;
page->offset = 0;
page->phys_addr = pa;
}
static void __init
vm_phys_free_list_init(struct vm_phys_free_list *free_list)
{
free_list->size = 0;
list_init(&free_list->blocks);
}
static inline void
vm_phys_free_list_insert(struct vm_phys_free_list *free_list,
struct vm_page *page)
{
assert(page->order == VM_PHYS_ORDER_ALLOCATED);
free_list->size++;
list_insert_head(&free_list->blocks, &page->node);
}
static inline void
vm_phys_free_list_remove(struct vm_phys_free_list *free_list,
struct vm_page *page)
{
assert(free_list->size != 0);
assert(!list_empty(&free_list->blocks));
assert(page->order < VM_PHYS_NR_FREE_LISTS);
free_list->size--;
list_remove(&page->node);
}
static struct vm_page *
vm_phys_seg_alloc_from_buddy(struct vm_phys_seg *seg, unsigned int order)
{
struct vm_phys_free_list *free_list;
struct vm_page *page, *buddy;
unsigned int i;
assert(order < VM_PHYS_NR_FREE_LISTS);
for (i = order; i < VM_PHYS_NR_FREE_LISTS; i++) {
free_list = &seg->free_lists[i];
if (free_list->size != 0)
break;
}
if (i == VM_PHYS_NR_FREE_LISTS)
return NULL;
page = list_first_entry(&free_list->blocks, struct vm_page, node);
vm_phys_free_list_remove(free_list, page);
page->order = VM_PHYS_ORDER_ALLOCATED;
while (i > order) {
i--;
buddy = &page[1 << i];
vm_phys_free_list_insert(&seg->free_lists[i], buddy);
buddy->order = i;
}
seg->nr_free_pages -= (1 << order);
return page;
}
static void
vm_phys_seg_free_to_buddy(struct vm_phys_seg *seg, struct vm_page *page,
unsigned int order)
{
struct vm_page *buddy;
phys_addr_t pa, buddy_pa;
unsigned int nr_pages;
assert(page >= seg->pages);
assert(page < seg->pages_end);
assert(page->order == VM_PHYS_ORDER_ALLOCATED);
assert(order < VM_PHYS_NR_FREE_LISTS);
nr_pages = (1 << order);
pa = page->phys_addr;
while (order < (VM_PHYS_NR_FREE_LISTS - 1)) {
buddy_pa = pa ^ vm_page_ptoa(1 << order);
if ((buddy_pa < seg->start) || (buddy_pa >= seg->end))
break;
buddy = &seg->pages[vm_page_atop(buddy_pa - seg->start)];
if (buddy->order != order)
break;
vm_phys_free_list_remove(&seg->free_lists[order], buddy);
buddy->order = VM_PHYS_ORDER_ALLOCATED;
order++;
pa &= -vm_page_ptoa(1 << order);
page = &seg->pages[vm_page_atop(pa - seg->start)];
}
vm_phys_free_list_insert(&seg->free_lists[order], page);
page->order = order;
seg->nr_free_pages += nr_pages;
}
static void __init
vm_phys_cpu_pool_init(struct vm_phys_cpu_pool *cpu_pool, int size)
{
mutex_init(&cpu_pool->lock);
cpu_pool->size = size;
cpu_pool->transfer_size = (size + VM_PHYS_CPU_POOL_TRANSFER_RATIO - 1)
/ VM_PHYS_CPU_POOL_TRANSFER_RATIO;
cpu_pool->nr_pages = 0;
list_init(&cpu_pool->pages);
}
static inline struct vm_phys_cpu_pool *
vm_phys_cpu_pool_get(struct vm_phys_seg *seg)
{
return &seg->cpu_pools[cpu_id()];
}
static inline struct vm_page *
vm_phys_cpu_pool_pop(struct vm_phys_cpu_pool *cpu_pool)
{
struct vm_page *page;
assert(cpu_pool->nr_pages != 0);
cpu_pool->nr_pages--;
page = list_first_entry(&cpu_pool->pages, struct vm_page, node);
list_remove(&page->node);
return page;
}
static inline void
vm_phys_cpu_pool_push(struct vm_phys_cpu_pool *cpu_pool, struct vm_page *page)
{
assert(cpu_pool->nr_pages < cpu_pool->size);
cpu_pool->nr_pages++;
list_insert_head(&cpu_pool->pages, &page->node);
}
static int
vm_phys_cpu_pool_fill(struct vm_phys_cpu_pool *cpu_pool,
struct vm_phys_seg *seg)
{
struct vm_page *page;
int i;
assert(cpu_pool->nr_pages == 0);
mutex_lock(&seg->lock);
for (i = 0; i < cpu_pool->transfer_size; i++) {
page = vm_phys_seg_alloc_from_buddy(seg, 0);
if (page == NULL)
break;
vm_phys_cpu_pool_push(cpu_pool, page);
}
mutex_unlock(&seg->lock);
return i;
}
static void
vm_phys_cpu_pool_drain(struct vm_phys_cpu_pool *cpu_pool,
struct vm_phys_seg *seg)
{
struct vm_page *page;
int i;
assert(cpu_pool->nr_pages == cpu_pool->size);
mutex_lock(&seg->lock);
for (i = cpu_pool->transfer_size; i > 0; i--) {
page = vm_phys_cpu_pool_pop(cpu_pool);
vm_phys_seg_free_to_buddy(seg, page, 0);
}
mutex_unlock(&seg->lock);
}
static inline phys_addr_t __init
vm_phys_seg_size(struct vm_phys_seg *seg)
{
return seg->end - seg->start;
}
static int __init
vm_phys_seg_compute_pool_size(struct vm_phys_seg *seg)
{
phys_addr_t size;
size = vm_page_atop(vm_phys_seg_size(seg)) / VM_PHYS_CPU_POOL_RATIO;
if (size == 0)
size = 1;
else if (size > VM_PHYS_CPU_POOL_MAX_SIZE)
size = VM_PHYS_CPU_POOL_MAX_SIZE;
return size;
}
static void __init
vm_phys_seg_init(struct vm_phys_seg *seg, struct vm_page *pages)
{
phys_addr_t pa;
int pool_size;
unsigned int i;
pool_size = vm_phys_seg_compute_pool_size(seg);
for (i = 0; i < ARRAY_SIZE(seg->cpu_pools); i++)
vm_phys_cpu_pool_init(&seg->cpu_pools[i], pool_size);
seg->pages = pages;
seg->pages_end = pages + vm_page_atop(vm_phys_seg_size(seg));
mutex_init(&seg->lock);
for (i = 0; i < ARRAY_SIZE(seg->free_lists); i++)
vm_phys_free_list_init(&seg->free_lists[i]);
seg->nr_free_pages = 0;
i = seg - vm_phys_segs;
for (pa = seg->start; pa < seg->end; pa += PAGE_SIZE)
vm_phys_init_page(&pages[vm_page_atop(pa - seg->start)], i,
VM_PHYS_ORDER_ALLOCATED, pa);
}
static struct vm_page *
vm_phys_seg_alloc(struct vm_phys_seg *seg, unsigned int order)
{
struct vm_phys_cpu_pool *cpu_pool;
struct vm_page *page;
int filled;
assert(order < VM_PHYS_NR_FREE_LISTS);
if (order == 0) {
cpu_pool = vm_phys_cpu_pool_get(seg);
mutex_lock(&cpu_pool->lock);
if (cpu_pool->nr_pages == 0) {
filled = vm_phys_cpu_pool_fill(cpu_pool, seg);
if (!filled) {
mutex_unlock(&cpu_pool->lock);
return NULL;
}
}
page = vm_phys_cpu_pool_pop(cpu_pool);
mutex_unlock(&cpu_pool->lock);
} else {
mutex_lock(&seg->lock);
page = vm_phys_seg_alloc_from_buddy(seg, order);
mutex_unlock(&seg->lock);
}
return page;
}
static void
vm_phys_seg_free(struct vm_phys_seg *seg, struct vm_page *page,
unsigned int order)
{
struct vm_phys_cpu_pool *cpu_pool;
assert(order < VM_PHYS_NR_FREE_LISTS);
if (order == 0) {
cpu_pool = vm_phys_cpu_pool_get(seg);
mutex_lock(&cpu_pool->lock);
if (cpu_pool->nr_pages == cpu_pool->size)
vm_phys_cpu_pool_drain(cpu_pool, seg);
vm_phys_cpu_pool_push(cpu_pool, page);
mutex_unlock(&cpu_pool->lock);
} else {
mutex_lock(&seg->lock);
vm_phys_seg_free_to_buddy(seg, page, order);
mutex_unlock(&seg->lock);
}
}
void __init
vm_phys_load(const char *name, phys_addr_t start, phys_addr_t end,
phys_addr_t avail_start, phys_addr_t avail_end,
unsigned int seg_index, unsigned int seglist_prio)
{
struct vm_phys_boot_seg *boot_seg;
struct vm_phys_seg *seg;
struct list *seg_list;
unsigned int i;
assert(name != NULL);
assert(start < end);
assert(seg_index < ARRAY_SIZE(vm_phys_segs));
assert(seglist_prio < ARRAY_SIZE(vm_phys_seg_lists));
if (!vm_phys_load_initialized) {
for (i = 0; i < ARRAY_SIZE(vm_phys_seg_lists); i++)
list_init(&vm_phys_seg_lists[i]);
vm_phys_segs_size = 0;
vm_phys_load_initialized = 1;
}
assert(vm_phys_segs_size < ARRAY_SIZE(vm_phys_segs));
boot_seg = &vm_phys_boot_segs[seg_index];
seg = &vm_phys_segs[seg_index];
seg_list = &vm_phys_seg_lists[seglist_prio];
list_insert_tail(seg_list, &seg->node);
seg->start = start;
seg->end = end;
strlcpy(seg->name, name, sizeof(seg->name));
boot_seg->avail_start = avail_start;
boot_seg->avail_end = avail_end;
vm_phys_segs_size++;
}
phys_addr_t __init
vm_phys_bootalloc(void)
{
struct vm_phys_boot_seg *boot_seg;
struct vm_phys_seg *seg;
struct list *seg_list;
phys_addr_t pa;
for (seg_list = &vm_phys_seg_lists[ARRAY_SIZE(vm_phys_seg_lists) - 1];
seg_list >= vm_phys_seg_lists;
seg_list--)
list_for_each_entry(seg_list, seg, node) {
boot_seg = &vm_phys_boot_segs[seg - vm_phys_segs];
if ((boot_seg->avail_end - boot_seg->avail_start) > 1) {
pa = boot_seg->avail_start;
boot_seg->avail_start += PAGE_SIZE;
return pa;
}
}
panic("vm_phys: no physical memory available");
}
void __init
vm_phys_setup(void)
{
struct vm_phys_boot_seg *boot_seg;
struct vm_phys_seg *seg;
struct vm_page *map, *start, *end;
size_t pages, map_size;
unsigned int i;
/*
* Compute the memory map size.
*/
pages = 0;
for (i = 0; i < vm_phys_segs_size; i++)
pages += vm_page_atop(vm_phys_seg_size(&vm_phys_segs[i]));
map_size = P2ROUND(pages * sizeof(struct vm_page), PAGE_SIZE);
printk("vm_phys: page table size: %zu entries (%zuk)\n", pages,
map_size >> 10);
map = (struct vm_page *)vm_kmem_bootalloc(map_size);
/*
* Initialize the segments, associating them to the memory map. When
* the segments are initialized, all their pages are set allocated,
* with a block size of one (order 0). They are then released, which
* populates the free lists.
*/
for (i = 0; i < vm_phys_segs_size; i++) {
seg = &vm_phys_segs[i];
boot_seg = &vm_phys_boot_segs[i];
vm_phys_seg_init(seg, map);
start = seg->pages + vm_page_atop(boot_seg->avail_start - seg->start);
end = seg->pages + vm_page_atop(boot_seg->avail_end - seg->start);
while (start < end) {
vm_phys_seg_free_to_buddy(seg, start, 0);
start++;
}
map += vm_page_atop(vm_phys_seg_size(seg));
}
vm_phys_ready = 1;
}
void __init
vm_phys_manage(struct vm_page *page)
{
assert(page->seg_index < ARRAY_SIZE(vm_phys_segs));
vm_phys_seg_free_to_buddy(&vm_phys_segs[page->seg_index], page, 0);
}
struct vm_page *
vm_phys_lookup_page(phys_addr_t pa)
{
struct vm_phys_seg *seg;
unsigned int i;
for (i = 0; i < vm_phys_segs_size; i++) {
seg = &vm_phys_segs[i];
if ((pa >= seg->start) && (pa < seg->end))
return &seg->pages[vm_page_atop(pa - seg->start)];
}
return NULL;
}
struct vm_page *
vm_phys_alloc(unsigned int order)
{
struct vm_phys_seg *seg;
struct list *seg_list;
struct vm_page *page;
for (seg_list = &vm_phys_seg_lists[ARRAY_SIZE(vm_phys_seg_lists) - 1];
seg_list >= vm_phys_seg_lists;
seg_list--)
list_for_each_entry(seg_list, seg, node) {
page = vm_phys_seg_alloc(seg, order);
if (page != NULL)
return page;
}
return NULL;
}
struct vm_page *
vm_phys_alloc_seg(unsigned int order, unsigned int seg_index)
{
assert(seg_index < vm_phys_segs_size);
return vm_phys_seg_alloc(&vm_phys_segs[seg_index], order);
}
void
vm_phys_free(struct vm_page *page, unsigned int order)
{
assert(page->seg_index < ARRAY_SIZE(vm_phys_segs));
vm_phys_seg_free(&vm_phys_segs[page->seg_index], page, order);
}
void
vm_phys_info(void)
{
struct vm_phys_seg *seg;
unsigned long pages;
unsigned int i;
for (i = 0; i < vm_phys_segs_size; i++) {
seg = &vm_phys_segs[i];
pages = (unsigned long)(seg->pages_end - seg->pages);
printk("vm_phys: %s: pages: %lu (%luM), free: %lu (%luM)\n", seg->name,
pages, pages >> (20 - PAGE_SHIFT), seg->nr_free_pages,
seg->nr_free_pages >> (20 - PAGE_SHIFT));
}
}