diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/bootmem.c | 2 | ||||
| -rw-r--r-- | mm/compaction.c | 5 | ||||
| -rw-r--r-- | mm/madvise.c | 18 | ||||
| -rw-r--r-- | mm/memblock.c | 51 | ||||
| -rw-r--r-- | mm/memory_hotplug.c | 2 | ||||
| -rw-r--r-- | mm/nobootmem.c | 40 | ||||
| -rw-r--r-- | mm/shmem.c | 196 | ||||
| -rw-r--r-- | mm/sparse.c | 20 | ||||
| -rw-r--r-- | mm/vmscan.c | 7 | 
9 files changed, 146 insertions, 195 deletions
| diff --git a/mm/bootmem.c b/mm/bootmem.c index ec4fcb7a56c8..73096630cb35 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -698,7 +698,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,  	return ___alloc_bootmem(size, align, goal, limit);  } -static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, +void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,  				unsigned long size, unsigned long align,  				unsigned long goal, unsigned long limit)  { diff --git a/mm/compaction.c b/mm/compaction.c index 7ea259d82a99..2f42d9528539 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -701,8 +701,11 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)  		if (err) {  			putback_lru_pages(&cc->migratepages);  			cc->nr_migratepages = 0; +			if (err == -ENOMEM) { +				ret = COMPACT_PARTIAL; +				goto out; +			}  		} -  	}  out: diff --git a/mm/madvise.c b/mm/madvise.c index deff1b64a08c..14d260fa0d17 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -15,6 +15,7 @@  #include <linux/sched.h>  #include <linux/ksm.h>  #include <linux/fs.h> +#include <linux/file.h>  /*   * Any behaviour which results in changes to the vma->vm_flags needs to @@ -204,14 +205,16 @@ static long madvise_remove(struct vm_area_struct *vma,  {  	loff_t offset;  	int error; +	struct file *f;  	*prev = NULL;	/* tell sys_madvise we drop mmap_sem */  	if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))  		return -EINVAL; -	if (!vma->vm_file || !vma->vm_file->f_mapping -		|| !vma->vm_file->f_mapping->host) { +	f = vma->vm_file; + +	if (!f || !f->f_mapping || !f->f_mapping->host) {  			return -EINVAL;  	} @@ -221,11 +224,18 @@ static long madvise_remove(struct vm_area_struct *vma,  	offset = (loff_t)(start - vma->vm_start)  			+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT); -	/* filesystem's fallocate may need to take i_mutex */ +	/* +	 * Filesystem's fallocate may need to take i_mutex.  We need to +	 * explicitly grab a reference because the vma (and hence the +	 * vma's reference to the file) can go away as soon as we drop +	 * mmap_sem. +	 */ +	get_file(f);  	up_read(¤t->mm->mmap_sem); -	error = do_fallocate(vma->vm_file, +	error = do_fallocate(f,  				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,  				offset, end - start); +	fput(f);  	down_read(¤t->mm->mmap_sem);  	return error;  } diff --git a/mm/memblock.c b/mm/memblock.c index d4382095f8bd..5cc6731b00cc 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -143,30 +143,6 @@ phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,  					   MAX_NUMNODES);  } -/* - * Free memblock.reserved.regions - */ -int __init_memblock memblock_free_reserved_regions(void) -{ -	if (memblock.reserved.regions == memblock_reserved_init_regions) -		return 0; - -	return memblock_free(__pa(memblock.reserved.regions), -		 sizeof(struct memblock_region) * memblock.reserved.max); -} - -/* - * Reserve memblock.reserved.regions - */ -int __init_memblock memblock_reserve_reserved_regions(void) -{ -	if (memblock.reserved.regions == memblock_reserved_init_regions) -		return 0; - -	return memblock_reserve(__pa(memblock.reserved.regions), -		 sizeof(struct memblock_region) * memblock.reserved.max); -} -  static void __init_memblock memblock_remove_region(struct memblock_type *type, unsigned long r)  {  	type->total_size -= type->regions[r].size; @@ -184,6 +160,18 @@ static void __init_memblock memblock_remove_region(struct memblock_type *type, u  	}  } +phys_addr_t __init_memblock get_allocated_memblock_reserved_regions_info( +					phys_addr_t *addr) +{ +	if (memblock.reserved.regions == memblock_reserved_init_regions) +		return 0; + +	*addr = __pa(memblock.reserved.regions); + +	return PAGE_ALIGN(sizeof(struct memblock_region) * +			  memblock.reserved.max); +} +  /**   * memblock_double_array - double the size of the memblock regions array   * @type: memblock type of the regions array being doubled @@ -204,6 +192,7 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,  						phys_addr_t new_area_size)  {  	struct memblock_region *new_array, *old_array; +	phys_addr_t old_alloc_size, new_alloc_size;  	phys_addr_t old_size, new_size, addr;  	int use_slab = slab_is_available();  	int *in_slab; @@ -217,6 +206,12 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,  	/* Calculate new doubled size */  	old_size = type->max * sizeof(struct memblock_region);  	new_size = old_size << 1; +	/* +	 * We need to allocated new one align to PAGE_SIZE, +	 *   so we can free them completely later. +	 */ +	old_alloc_size = PAGE_ALIGN(old_size); +	new_alloc_size = PAGE_ALIGN(new_size);  	/* Retrieve the slab flag */  	if (type == &memblock.memory) @@ -245,11 +240,11 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,  		addr = memblock_find_in_range(new_area_start + new_area_size,  						memblock.current_limit, -						new_size, sizeof(phys_addr_t)); +						new_alloc_size, PAGE_SIZE);  		if (!addr && new_area_size)  			addr = memblock_find_in_range(0,  					min(new_area_start, memblock.current_limit), -					new_size, sizeof(phys_addr_t)); +					new_alloc_size, PAGE_SIZE);  		new_array = addr ? __va(addr) : 0;  	} @@ -279,13 +274,13 @@ static int __init_memblock memblock_double_array(struct memblock_type *type,  		kfree(old_array);  	else if (old_array != memblock_memory_init_regions &&  		 old_array != memblock_reserved_init_regions) -		memblock_free(__pa(old_array), old_size); +		memblock_free(__pa(old_array), old_alloc_size);  	/* Reserve the new array if that comes from the memblock.  	 * Otherwise, we needn't do it  	 */  	if (!use_slab) -		BUG_ON(memblock_reserve(addr, new_size)); +		BUG_ON(memblock_reserve(addr, new_alloc_size));  	/* Update slab flag */  	*in_slab = use_slab; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 0d7e3ec8e0f3..427bb291dd0f 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -618,7 +618,7 @@ int __ref add_memory(int nid, u64 start, u64 size)  		pgdat = hotadd_new_pgdat(nid, start);  		ret = -ENOMEM;  		if (!pgdat) -			goto out; +			goto error;  		new_pgdat = 1;  	} diff --git a/mm/nobootmem.c b/mm/nobootmem.c index d23415c001bc..405573010f99 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -105,27 +105,35 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)  		__free_pages_bootmem(pfn_to_page(i), 0);  } +static unsigned long __init __free_memory_core(phys_addr_t start, +				 phys_addr_t end) +{ +	unsigned long start_pfn = PFN_UP(start); +	unsigned long end_pfn = min_t(unsigned long, +				      PFN_DOWN(end), max_low_pfn); + +	if (start_pfn > end_pfn) +		return 0; + +	__free_pages_memory(start_pfn, end_pfn); + +	return end_pfn - start_pfn; +} +  unsigned long __init free_low_memory_core_early(int nodeid)  {  	unsigned long count = 0; -	phys_addr_t start, end; +	phys_addr_t start, end, size;  	u64 i; -	/* free reserved array temporarily so that it's treated as free area */ -	memblock_free_reserved_regions(); - -	for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) { -		unsigned long start_pfn = PFN_UP(start); -		unsigned long end_pfn = min_t(unsigned long, -					      PFN_DOWN(end), max_low_pfn); -		if (start_pfn < end_pfn) { -			__free_pages_memory(start_pfn, end_pfn); -			count += end_pfn - start_pfn; -		} -	} +	for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) +		count += __free_memory_core(start, end); + +	/* free range that is used for reserved array if we allocate it */ +	size = get_allocated_memblock_reserved_regions_info(&start); +	if (size) +		count += __free_memory_core(start, start + size); -	/* put region array back? */ -	memblock_reserve_reserved_regions();  	return count;  } @@ -274,7 +282,7 @@ void * __init __alloc_bootmem(unsigned long size, unsigned long align,  	return ___alloc_bootmem(size, align, goal, limit);  } -static void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat, +void * __init ___alloc_bootmem_node_nopanic(pg_data_t *pgdat,  						   unsigned long size,  						   unsigned long align,  						   unsigned long goal, diff --git a/mm/shmem.c b/mm/shmem.c index a15a466d0d1d..bd106361be4b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -264,46 +264,55 @@ static int shmem_radix_tree_replace(struct address_space *mapping,  }  /* + * Sometimes, before we decide whether to proceed or to fail, we must check + * that an entry was not already brought back from swap by a racing thread. + * + * Checking page is not enough: by the time a SwapCache page is locked, it + * might be reused, and again be SwapCache, using the same swap as before. + */ +static bool shmem_confirm_swap(struct address_space *mapping, +			       pgoff_t index, swp_entry_t swap) +{ +	void *item; + +	rcu_read_lock(); +	item = radix_tree_lookup(&mapping->page_tree, index); +	rcu_read_unlock(); +	return item == swp_to_radix_entry(swap); +} + +/*   * Like add_to_page_cache_locked, but error if expected item has gone.   */  static int shmem_add_to_page_cache(struct page *page,  				   struct address_space *mapping,  				   pgoff_t index, gfp_t gfp, void *expected)  { -	int error = 0; +	int error;  	VM_BUG_ON(!PageLocked(page));  	VM_BUG_ON(!PageSwapBacked(page)); +	page_cache_get(page); +	page->mapping = mapping; +	page->index = index; + +	spin_lock_irq(&mapping->tree_lock);  	if (!expected) -		error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); +		error = radix_tree_insert(&mapping->page_tree, index, page); +	else +		error = shmem_radix_tree_replace(mapping, index, expected, +								 page);  	if (!error) { -		page_cache_get(page); -		page->mapping = mapping; -		page->index = index; - -		spin_lock_irq(&mapping->tree_lock); -		if (!expected) -			error = radix_tree_insert(&mapping->page_tree, -							index, page); -		else -			error = shmem_radix_tree_replace(mapping, index, -							expected, page); -		if (!error) { -			mapping->nrpages++; -			__inc_zone_page_state(page, NR_FILE_PAGES); -			__inc_zone_page_state(page, NR_SHMEM); -			spin_unlock_irq(&mapping->tree_lock); -		} else { -			page->mapping = NULL; -			spin_unlock_irq(&mapping->tree_lock); -			page_cache_release(page); -		} -		if (!expected) -			radix_tree_preload_end(); +		mapping->nrpages++; +		__inc_zone_page_state(page, NR_FILE_PAGES); +		__inc_zone_page_state(page, NR_SHMEM); +		spin_unlock_irq(&mapping->tree_lock); +	} else { +		page->mapping = NULL; +		spin_unlock_irq(&mapping->tree_lock); +		page_cache_release(page);  	} -	if (error) -		mem_cgroup_uncharge_cache_page(page);  	return error;  } @@ -1124,9 +1133,9 @@ repeat:  		/* We have to do this with page locked to prevent races */  		lock_page(page);  		if (!PageSwapCache(page) || page_private(page) != swap.val || -		    page->mapping) { +		    !shmem_confirm_swap(mapping, index, swap)) {  			error = -EEXIST;	/* try again */ -			goto failed; +			goto unlock;  		}  		if (!PageUptodate(page)) {  			error = -EIO; @@ -1142,9 +1151,12 @@ repeat:  		error = mem_cgroup_cache_charge(page, current->mm,  						gfp & GFP_RECLAIM_MASK); -		if (!error) +		if (!error) {  			error = shmem_add_to_page_cache(page, mapping, index,  						gfp, swp_to_radix_entry(swap)); +			/* We already confirmed swap, and make no allocation */ +			VM_BUG_ON(error); +		}  		if (error)  			goto failed; @@ -1181,11 +1193,18 @@ repeat:  		__set_page_locked(page);  		error = mem_cgroup_cache_charge(page, current->mm,  						gfp & GFP_RECLAIM_MASK); -		if (!error) -			error = shmem_add_to_page_cache(page, mapping, index, -						gfp, NULL);  		if (error)  			goto decused; +		error = radix_tree_preload(gfp & GFP_RECLAIM_MASK); +		if (!error) { +			error = shmem_add_to_page_cache(page, mapping, index, +							gfp, NULL); +			radix_tree_preload_end(); +		} +		if (error) { +			mem_cgroup_uncharge_cache_page(page); +			goto decused; +		}  		lru_cache_add_anon(page);  		spin_lock(&info->lock); @@ -1245,14 +1264,10 @@ decused:  unacct:  	shmem_unacct_blocks(info->flags, 1);  failed: -	if (swap.val && error != -EINVAL) { -		struct page *test = find_get_page(mapping, index); -		if (test && !radix_tree_exceptional_entry(test)) -			page_cache_release(test); -		/* Have another try if the entry has changed */ -		if (test != swp_to_radix_entry(swap)) -			error = -EEXIST; -	} +	if (swap.val && error != -EINVAL && +	    !shmem_confirm_swap(mapping, index, swap)) +		error = -EEXIST; +unlock:  	if (page) {  		unlock_page(page);  		page_cache_release(page); @@ -1264,7 +1279,7 @@ failed:  		spin_unlock(&info->lock);  		goto repeat;  	} -	if (error == -EEXIST) +	if (error == -EEXIST)	/* from above or from radix_tree_insert */  		goto repeat;  	return error;  } @@ -1594,6 +1609,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,  	struct splice_pipe_desc spd = {  		.pages = pages,  		.partial = partial, +		.nr_pages_max = PIPE_DEF_BUFFERS,  		.flags = flags,  		.ops = &page_cache_pipe_buf_ops,  		.spd_release = spd_release_page, @@ -1682,7 +1698,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,  	if (spd.nr_pages)  		error = splice_to_pipe(pipe, &spd); -	splice_shrink_spd(pipe, &spd); +	splice_shrink_spd(&spd);  	if (error > 0) {  		*ppos += error; @@ -1691,98 +1707,6 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,  	return error;  } -/* - * llseek SEEK_DATA or SEEK_HOLE through the radix_tree. - */ -static pgoff_t shmem_seek_hole_data(struct address_space *mapping, -				    pgoff_t index, pgoff_t end, int origin) -{ -	struct page *page; -	struct pagevec pvec; -	pgoff_t indices[PAGEVEC_SIZE]; -	bool done = false; -	int i; - -	pagevec_init(&pvec, 0); -	pvec.nr = 1;		/* start small: we may be there already */ -	while (!done) { -		pvec.nr = shmem_find_get_pages_and_swap(mapping, index, -					pvec.nr, pvec.pages, indices); -		if (!pvec.nr) { -			if (origin == SEEK_DATA) -				index = end; -			break; -		} -		for (i = 0; i < pvec.nr; i++, index++) { -			if (index < indices[i]) { -				if (origin == SEEK_HOLE) { -					done = true; -					break; -				} -				index = indices[i]; -			} -			page = pvec.pages[i]; -			if (page && !radix_tree_exceptional_entry(page)) { -				if (!PageUptodate(page)) -					page = NULL; -			} -			if (index >= end || -			    (page && origin == SEEK_DATA) || -			    (!page && origin == SEEK_HOLE)) { -				done = true; -				break; -			} -		} -		shmem_deswap_pagevec(&pvec); -		pagevec_release(&pvec); -		pvec.nr = PAGEVEC_SIZE; -		cond_resched(); -	} -	return index; -} - -static loff_t shmem_file_llseek(struct file *file, loff_t offset, int origin) -{ -	struct address_space *mapping; -	struct inode *inode; -	pgoff_t start, end; -	loff_t new_offset; - -	if (origin != SEEK_DATA && origin != SEEK_HOLE) -		return generic_file_llseek_size(file, offset, origin, -							MAX_LFS_FILESIZE); -	mapping = file->f_mapping; -	inode = mapping->host; -	mutex_lock(&inode->i_mutex); -	/* We're holding i_mutex so we can access i_size directly */ - -	if (offset < 0) -		offset = -EINVAL; -	else if (offset >= inode->i_size) -		offset = -ENXIO; -	else { -		start = offset >> PAGE_CACHE_SHIFT; -		end = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -		new_offset = shmem_seek_hole_data(mapping, start, end, origin); -		new_offset <<= PAGE_CACHE_SHIFT; -		if (new_offset > offset) { -			if (new_offset < inode->i_size) -				offset = new_offset; -			else if (origin == SEEK_DATA) -				offset = -ENXIO; -			else -				offset = inode->i_size; -		} -	} - -	if (offset >= 0 && offset != file->f_pos) { -		file->f_pos = offset; -		file->f_version = 0; -	} -	mutex_unlock(&inode->i_mutex); -	return offset; -} -  static long shmem_fallocate(struct file *file, int mode, loff_t offset,  							 loff_t len)  { @@ -2786,7 +2710,7 @@ static const struct address_space_operations shmem_aops = {  static const struct file_operations shmem_file_operations = {  	.mmap		= shmem_mmap,  #ifdef CONFIG_TMPFS -	.llseek		= shmem_file_llseek, +	.llseek		= generic_file_llseek,  	.read		= do_sync_read,  	.write		= do_sync_write,  	.aio_read	= shmem_file_aio_read, diff --git a/mm/sparse.c b/mm/sparse.c index 6a4bf9160e85..c7bb952400c8 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -275,8 +275,9 @@ static unsigned long * __init  sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,  					 unsigned long size)  { -	pg_data_t *host_pgdat; -	unsigned long goal; +	unsigned long goal, limit; +	unsigned long *p; +	int nid;  	/*  	 * A page may contain usemaps for other sections preventing the  	 * page being freed and making a section unremovable while @@ -287,10 +288,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,  	 * from the same section as the pgdat where possible to avoid  	 * this problem.  	 */ -	goal = __pa(pgdat) & PAGE_SECTION_MASK; -	host_pgdat = NODE_DATA(early_pfn_to_nid(goal >> PAGE_SHIFT)); -	return __alloc_bootmem_node_nopanic(host_pgdat, size, -					    SMP_CACHE_BYTES, goal); +	goal = __pa(pgdat) & (PAGE_SECTION_MASK << PAGE_SHIFT); +	limit = goal + (1UL << PA_SECTION_SHIFT); +	nid = early_pfn_to_nid(goal >> PAGE_SHIFT); +again: +	p = ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, +					  SMP_CACHE_BYTES, goal, limit); +	if (!p && limit) { +		limit = 0; +		goto again; +	} +	return p;  }  static void __init check_usemap_section_nr(int nid, unsigned long *usemap) diff --git a/mm/vmscan.c b/mm/vmscan.c index eeb3bc9d1d36..661576324c7f 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -2955,14 +2955,17 @@ int kswapd_run(int nid)  }  /* - * Called by memory hotplug when all memory in a node is offlined. + * Called by memory hotplug when all memory in a node is offlined.  Caller must + * hold lock_memory_hotplug().   */  void kswapd_stop(int nid)  {  	struct task_struct *kswapd = NODE_DATA(nid)->kswapd; -	if (kswapd) +	if (kswapd) {  		kthread_stop(kswapd); +		NODE_DATA(nid)->kswapd = NULL; +	}  }  static int __init kswapd_init(void) | 
