diff options
Diffstat (limited to 'mm/memory-failure.c')
| -rw-r--r-- | mm/memory-failure.c | 336 | 
1 files changed, 152 insertions, 184 deletions
| diff --git a/mm/memory-failure.c b/mm/memory-failure.c index f1aa6433f404..c0bb186bba62 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -65,6 +65,33 @@ int sysctl_memory_failure_recovery __read_mostly = 1;  atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0); +static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release) +{ +	if (hugepage_or_freepage) { +		/* +		 * Doing this check for free pages is also fine since dissolve_free_huge_page +		 * returns 0 for non-hugetlb pages as well. +		 */ +		if (dissolve_free_huge_page(page) || !take_page_off_buddy(page)) +			/* +			 * We could fail to take off the target page from buddy +			 * for example due to racy page allocaiton, but that's +			 * acceptable because soft-offlined page is not broken +			 * and if someone really want to use it, they should +			 * take it. +			 */ +			return false; +	} + +	SetPageHWPoison(page); +	if (release) +		put_page(page); +	page_ref_inc(page); +	num_poisoned_pages_inc(); + +	return true; +} +  #if defined(CONFIG_HWPOISON_INJECT) || defined(CONFIG_HWPOISON_INJECT_MODULE)  u32 hwpoison_filter_enable = 0; @@ -484,11 +511,12 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,  	struct vm_area_struct *vma;  	struct task_struct *tsk;  	struct address_space *mapping = page->mapping; +	pgoff_t pgoff;  	i_mmap_lock_read(mapping);  	read_lock(&tasklist_lock); +	pgoff = page_to_pgoff(page);  	for_each_process(tsk) { -		pgoff_t pgoff = page_to_pgoff(page);  		struct task_struct *t = task_early_kill(tsk, force_early);  		if (!t) @@ -554,6 +582,7 @@ static const char * const action_page_types[] = {  	[MF_MSG_BUDDY]			= "free buddy page",  	[MF_MSG_BUDDY_2ND]		= "free buddy page (2nd try)",  	[MF_MSG_DAX]			= "dax page", +	[MF_MSG_UNSPLIT_THP]		= "unsplit thp",  	[MF_MSG_UNKNOWN]		= "unknown page",  }; @@ -824,7 +853,6 @@ static int me_huge_page(struct page *p, unsigned long pfn)  #define sc		((1UL << PG_swapcache) | (1UL << PG_swapbacked))  #define unevict		(1UL << PG_unevictable)  #define mlock		(1UL << PG_mlocked) -#define writeback	(1UL << PG_writeback)  #define lru		(1UL << PG_lru)  #define head		(1UL << PG_head)  #define slab		(1UL << PG_slab) @@ -873,7 +901,6 @@ static struct page_state {  #undef sc  #undef unevict  #undef mlock -#undef writeback  #undef lru  #undef head  #undef slab @@ -925,7 +952,7 @@ static int page_action(struct page_state *ps, struct page *p,   * Return: return 0 if failed to grab the refcount, otherwise true (some   * non-zero value.)   */ -int get_hwpoison_page(struct page *page) +static int get_hwpoison_page(struct page *page)  {  	struct page *head = compound_head(page); @@ -954,7 +981,6 @@ int get_hwpoison_page(struct page *page)  	return 0;  } -EXPORT_SYMBOL_GPL(get_hwpoison_page);  /*   * Do all that is necessary to remove user space mappings. Unmap @@ -1006,7 +1032,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,  	 */  	mapping = page_mapping(hpage);  	if (!(flags & MF_MUST_KILL) && !PageDirty(hpage) && mapping && -	    mapping_cap_writeback_dirty(mapping)) { +	    mapping_can_writeback(mapping)) {  		if (page_mkclean(hpage)) {  			SetPageDirty(hpage);  		} else { @@ -1104,6 +1130,25 @@ static int identify_page_state(unsigned long pfn, struct page *p,  	return page_action(ps, p, pfn);  } +static int try_to_split_thp_page(struct page *page, const char *msg) +{ +	lock_page(page); +	if (!PageAnon(page) || unlikely(split_huge_page(page))) { +		unsigned long pfn = page_to_pfn(page); + +		unlock_page(page); +		if (!PageAnon(page)) +			pr_info("%s: %#lx: non anonymous thp\n", msg, pfn); +		else +			pr_info("%s: %#lx: thp split failed\n", msg, pfn); +		put_page(page); +		return -EBUSY; +	} +	unlock_page(page); + +	return 0; +} +  static int memory_failure_hugetlb(unsigned long pfn, int flags)  {  	struct page *p = pfn_to_page(pfn); @@ -1145,7 +1190,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)  		pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);  		num_poisoned_pages_dec();  		unlock_page(head); -		put_hwpoison_page(head); +		put_page(head);  		return 0;  	} @@ -1326,23 +1371,11 @@ int memory_failure(unsigned long pfn, int flags)  	}  	if (PageTransHuge(hpage)) { -		lock_page(p); -		if (!PageAnon(p) || unlikely(split_huge_page(p))) { -			unlock_page(p); -			if (!PageAnon(p)) -				pr_err("Memory failure: %#lx: non anonymous thp\n", -					pfn); -			else -				pr_err("Memory failure: %#lx: thp split failed\n", -					pfn); -			if (TestClearPageHWPoison(p)) -				num_poisoned_pages_dec(); -			put_hwpoison_page(p); +		if (try_to_split_thp_page(p, "Memory Failure") < 0) { +			action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);  			return -EBUSY;  		} -		unlock_page(p);  		VM_BUG_ON_PAGE(!page_count(p), p); -		hpage = compound_head(p);  	}  	/* @@ -1382,10 +1415,7 @@ int memory_failure(unsigned long pfn, int flags)  	 * page_remove_rmap() in try_to_unmap_one(). So to determine page status  	 * correctly, we save a copy of the page flags at this time.  	 */ -	if (PageHuge(p)) -		page_flags = hpage->flags; -	else -		page_flags = p->flags; +	page_flags = p->flags;  	/*  	 * unpoison always clear PG_hwpoison inside page lock @@ -1394,14 +1424,14 @@ int memory_failure(unsigned long pfn, int flags)  		pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);  		num_poisoned_pages_dec();  		unlock_page(p); -		put_hwpoison_page(p); +		put_page(p);  		return 0;  	}  	if (hwpoison_filter(p)) {  		if (TestClearPageHWPoison(p))  			num_poisoned_pages_dec();  		unlock_page(p); -		put_hwpoison_page(p); +		put_page(p);  		return 0;  	} @@ -1417,11 +1447,8 @@ int memory_failure(unsigned long pfn, int flags)  	/*  	 * Now take care of user space mappings.  	 * Abort on fail: __delete_from_page_cache() assumes unmapped page. -	 * -	 * When the raw error page is thp tail page, hpage points to the raw -	 * page after thp split.  	 */ -	if (!hwpoison_user_mappings(p, pfn, flags, &hpage)) { +	if (!hwpoison_user_mappings(p, pfn, flags, &p)) {  		action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);  		res = -EBUSY;  		goto out; @@ -1638,24 +1665,14 @@ int unpoison_memory(unsigned long pfn)  	}  	unlock_page(page); -	put_hwpoison_page(page); +	put_page(page);  	if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) -		put_hwpoison_page(page); +		put_page(page);  	return 0;  }  EXPORT_SYMBOL(unpoison_memory); -static struct page *new_page(struct page *p, unsigned long private) -{ -	struct migration_target_control mtc = { -		.nid = page_to_nid(p), -		.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, -	}; - -	return alloc_migration_target(p, (unsigned long)&mtc); -} -  /*   * Safely get reference count of an arbitrary page.   * Returns 0 for a free page, -EIO for a zero refcount page @@ -1680,6 +1697,9 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags)  		} else if (is_free_buddy_page(p)) {  			pr_info("%s: %#lx free buddy page\n", __func__, pfn);  			ret = 0; +		} else if (page_count(p)) { +			/* raced with allocation */ +			ret = -EBUSY;  		} else {  			pr_info("%s: %#lx: unknown zero refcount page type %lx\n",  				__func__, pfn, p->flags); @@ -1696,12 +1716,15 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)  {  	int ret = __get_any_page(page, pfn, flags); +	if (ret == -EBUSY) +		ret = __get_any_page(page, pfn, flags); +  	if (ret == 1 && !PageHuge(page) &&  	    !PageLRU(page) && !__PageMovable(page)) {  		/*  		 * Try to free it.  		 */ -		put_hwpoison_page(page); +		put_page(page);  		shake_page(page, 1);  		/* @@ -1710,7 +1733,7 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)  		ret = __get_any_page(page, pfn, 0);  		if (ret == 1 && !PageLRU(page)) {  			/* Drop page reference which is from __get_any_page() */ -			put_hwpoison_page(page); +			put_page(page);  			pr_info("soft_offline: %#lx: unknown non LRU page type %lx (%pGp)\n",  				pfn, page->flags, &page->flags);  			return -EIO; @@ -1719,69 +1742,55 @@ static int get_any_page(struct page *page, unsigned long pfn, int flags)  	return ret;  } -static int soft_offline_huge_page(struct page *page, int flags) +static bool isolate_page(struct page *page, struct list_head *pagelist)  { -	int ret; -	unsigned long pfn = page_to_pfn(page); -	struct page *hpage = compound_head(page); -	LIST_HEAD(pagelist); +	bool isolated = false; +	bool lru = PageLRU(page); -	/* -	 * This double-check of PageHWPoison is to avoid the race with -	 * memory_failure(). See also comment in __soft_offline_page(). -	 */ -	lock_page(hpage); -	if (PageHWPoison(hpage)) { -		unlock_page(hpage); -		put_hwpoison_page(hpage); -		pr_info("soft offline: %#lx hugepage already poisoned\n", pfn); -		return -EBUSY; +	if (PageHuge(page)) { +		isolated = isolate_huge_page(page, pagelist); +	} else { +		if (lru) +			isolated = !isolate_lru_page(page); +		else +			isolated = !isolate_movable_page(page, ISOLATE_UNEVICTABLE); + +		if (isolated) +			list_add(&page->lru, pagelist);  	} -	unlock_page(hpage); -	ret = isolate_huge_page(hpage, &pagelist); +	if (isolated && lru) +		inc_node_page_state(page, NR_ISOLATED_ANON + +				    page_is_file_lru(page)); +  	/* -	 * get_any_page() and isolate_huge_page() takes a refcount each, -	 * so need to drop one here. +	 * If we succeed to isolate the page, we grabbed another refcount on +	 * the page, so we can safely drop the one we got from get_any_pages(). +	 * If we failed to isolate the page, it means that we cannot go further +	 * and we will return an error, so drop the reference we got from +	 * get_any_pages() as well.  	 */ -	put_hwpoison_page(hpage); -	if (!ret) { -		pr_info("soft offline: %#lx hugepage failed to isolate\n", pfn); -		return -EBUSY; -	} - -	ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, -				MIGRATE_SYNC, MR_MEMORY_FAILURE); -	if (ret) { -		pr_info("soft offline: %#lx: hugepage migration failed %d, type %lx (%pGp)\n", -			pfn, ret, page->flags, &page->flags); -		if (!list_empty(&pagelist)) -			putback_movable_pages(&pagelist); -		if (ret > 0) -			ret = -EIO; -	} else { -		/* -		 * We set PG_hwpoison only when the migration source hugepage -		 * was successfully dissolved, because otherwise hwpoisoned -		 * hugepage remains on free hugepage list, then userspace will -		 * find it as SIGBUS by allocation failure. That's not expected -		 * in soft-offlining. -		 */ -		ret = dissolve_free_huge_page(page); -		if (!ret) { -			if (set_hwpoison_free_buddy_page(page)) -				num_poisoned_pages_inc(); -			else -				ret = -EBUSY; -		} -	} -	return ret; +	put_page(page); +	return isolated;  } -static int __soft_offline_page(struct page *page, int flags) +/* + * __soft_offline_page handles hugetlb-pages and non-hugetlb pages. + * If the page is a non-dirty unmapped page-cache page, it simply invalidates. + * If the page is mapped, it migrates the contents over. + */ +static int __soft_offline_page(struct page *page)  { -	int ret; +	int ret = 0;  	unsigned long pfn = page_to_pfn(page); +	struct page *hpage = compound_head(page); +	char const *msg_page[] = {"page", "hugepage"}; +	bool huge = PageHuge(page); +	LIST_HEAD(pagelist); +	struct migration_target_control mtc = { +		.nid = NUMA_NO_NODE, +		.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, +	};  	/*  	 * Check PageHWPoison again inside page lock because PageHWPoison @@ -1790,121 +1799,75 @@ static int __soft_offline_page(struct page *page, int flags)  	 * so there's no race between soft_offline_page() and memory_failure().  	 */  	lock_page(page); -	wait_on_page_writeback(page); +	if (!PageHuge(page)) +		wait_on_page_writeback(page);  	if (PageHWPoison(page)) {  		unlock_page(page); -		put_hwpoison_page(page); +		put_page(page);  		pr_info("soft offline: %#lx page already poisoned\n", pfn); -		return -EBUSY; +		return 0;  	} -	/* -	 * Try to invalidate first. This should work for -	 * non dirty unmapped page cache pages. -	 */ -	ret = invalidate_inode_page(page); + +	if (!PageHuge(page)) +		/* +		 * Try to invalidate first. This should work for +		 * non dirty unmapped page cache pages. +		 */ +		ret = invalidate_inode_page(page);  	unlock_page(page); +  	/*  	 * RED-PEN would be better to keep it isolated here, but we  	 * would need to fix isolation locking first.  	 */ -	if (ret == 1) { -		put_hwpoison_page(page); +	if (ret) {  		pr_info("soft_offline: %#lx: invalidated\n", pfn); -		SetPageHWPoison(page); -		num_poisoned_pages_inc(); +		page_handle_poison(page, false, true);  		return 0;  	} -	/* -	 * Simple invalidation didn't work. -	 * Try to migrate to a new page instead. migrate.c -	 * handles a large number of cases for us. -	 */ -	if (PageLRU(page)) -		ret = isolate_lru_page(page); -	else -		ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE); -	/* -	 * Drop page reference which is came from get_any_page() -	 * successful isolate_lru_page() already took another one. -	 */ -	put_hwpoison_page(page); -	if (!ret) { -		LIST_HEAD(pagelist); -		/* -		 * After isolated lru page, the PageLRU will be cleared, -		 * so use !__PageMovable instead for LRU page's mapping -		 * cannot have PAGE_MAPPING_MOVABLE. -		 */ -		if (!__PageMovable(page)) -			inc_node_page_state(page, NR_ISOLATED_ANON + -						page_is_file_lru(page)); -		list_add(&page->lru, &pagelist); -		ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL, -					MIGRATE_SYNC, MR_MEMORY_FAILURE); -		if (ret) { +	if (isolate_page(hpage, &pagelist)) { +		ret = migrate_pages(&pagelist, alloc_migration_target, NULL, +			(unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE); +		if (!ret) { +			bool release = !huge; + +			if (!page_handle_poison(page, huge, release)) +				ret = -EBUSY; +		} else {  			if (!list_empty(&pagelist))  				putback_movable_pages(&pagelist); -			pr_info("soft offline: %#lx: migration failed %d, type %lx (%pGp)\n", -				pfn, ret, page->flags, &page->flags); +			pr_info("soft offline: %#lx: %s migration failed %d, type %lx (%pGp)\n", +				pfn, msg_page[huge], ret, page->flags, &page->flags);  			if (ret > 0)  				ret = -EIO;  		}  	} else { -		pr_info("soft offline: %#lx: isolation failed: %d, page count %d, type %lx (%pGp)\n", -			pfn, ret, page_count(page), page->flags, &page->flags); +		pr_info("soft offline: %#lx: %s isolation failed: %d, page count %d, type %lx (%pGp)\n", +			pfn, msg_page[huge], ret, page_count(page), page->flags, &page->flags); +		ret = -EBUSY;  	}  	return ret;  } -static int soft_offline_in_use_page(struct page *page, int flags) +static int soft_offline_in_use_page(struct page *page)  { -	int ret; -	int mt;  	struct page *hpage = compound_head(page); -	if (!PageHuge(page) && PageTransHuge(hpage)) { -		lock_page(page); -		if (!PageAnon(page) || unlikely(split_huge_page(page))) { -			unlock_page(page); -			if (!PageAnon(page)) -				pr_info("soft offline: %#lx: non anonymous thp\n", page_to_pfn(page)); -			else -				pr_info("soft offline: %#lx: thp split failed\n", page_to_pfn(page)); -			put_hwpoison_page(page); +	if (!PageHuge(page) && PageTransHuge(hpage)) +		if (try_to_split_thp_page(page, "soft offline") < 0)  			return -EBUSY; -		} -		unlock_page(page); -	} - -	/* -	 * Setting MIGRATE_ISOLATE here ensures that the page will be linked -	 * to free list immediately (not via pcplist) when released after -	 * successful page migration. Otherwise we can't guarantee that the -	 * page is really free after put_page() returns, so -	 * set_hwpoison_free_buddy_page() highly likely fails. -	 */ -	mt = get_pageblock_migratetype(page); -	set_pageblock_migratetype(page, MIGRATE_ISOLATE); -	if (PageHuge(page)) -		ret = soft_offline_huge_page(page, flags); -	else -		ret = __soft_offline_page(page, flags); -	set_pageblock_migratetype(page, mt); -	return ret; +	return __soft_offline_page(page);  }  static int soft_offline_free_page(struct page *page)  { -	int rc = dissolve_free_huge_page(page); +	int rc = 0; + +	if (!page_handle_poison(page, true, false)) +		rc = -EBUSY; -	if (!rc) { -		if (set_hwpoison_free_buddy_page(page)) -			num_poisoned_pages_inc(); -		else -			rc = -EBUSY; -	}  	return rc;  } @@ -1934,6 +1897,7 @@ int soft_offline_page(unsigned long pfn, int flags)  {  	int ret;  	struct page *page; +	bool try_again = true;  	if (!pfn_valid(pfn))  		return -ENXIO; @@ -1945,18 +1909,22 @@ int soft_offline_page(unsigned long pfn, int flags)  	if (PageHWPoison(page)) {  		pr_info("soft offline: %#lx page already poisoned\n", pfn);  		if (flags & MF_COUNT_INCREASED) -			put_hwpoison_page(page); -		return -EBUSY; +			put_page(page); +		return 0;  	} +retry:  	get_online_mems();  	ret = get_any_page(page, pfn, flags);  	put_online_mems();  	if (ret > 0) -		ret = soft_offline_in_use_page(page, flags); +		ret = soft_offline_in_use_page(page);  	else if (ret == 0) -		ret = soft_offline_free_page(page); +		if (soft_offline_free_page(page) && try_again) { +			try_again = false; +			goto retry; +		}  	return ret;  } | 
