From 2f772e6cadf8ad8fca38927b17e6be028be669f5 Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Mon, 29 Apr 2013 15:08:34 -0700 Subject: mm: break up swap_writepage() for frontswap backends swap_writepage() is currently where frontswap hooks into the swap write path to capture pages with the frontswap_store() function. However, if a frontswap backend wants to "resume" the writeback of a page to the swap device, it can't call swap_writepage() as the page will simply reenter the backend. This patch separates swap_writepage() into a top and bottom half, the bottom half named __swap_writepage() to allow a frontswap backend, like zswap, to resume writeback beyond the frontswap_store() hook. __add_to_swap_cache() is also made non-static so that the page for which writeback is to be resumed can be added to the swap cache. Signed-off-by: Seth Jennings Signed-off-by: Bob Liu Acked-by: Minchan Kim Reviewed-by: Dan Magenheimer Cc: Konrad Rzeszutek Wilk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_io.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'mm/page_io.c') diff --git a/mm/page_io.c b/mm/page_io.c index 78eee32ee486..8e6bcf176cfb 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -185,9 +185,7 @@ bad_bmap: */ int swap_writepage(struct page *page, struct writeback_control *wbc) { - struct bio *bio; - int ret = 0, rw = WRITE; - struct swap_info_struct *sis = page_swap_info(page); + int ret = 0; if (try_to_free_swap(page)) { unlock_page(page); @@ -199,6 +197,16 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) end_page_writeback(page); goto out; } + ret = __swap_writepage(page, wbc); +out: + return ret; +} + +int __swap_writepage(struct page *page, struct writeback_control *wbc) +{ + struct bio *bio; + int ret = 0, rw = WRITE; + struct swap_info_struct *sis = page_swap_info(page); if (sis->flags & SWP_FILE) { struct kiocb kiocb; -- cgit v1.2.3 From 1eec6702a80e04416d528846a5ff2122484d95ec Mon Sep 17 00:00:00 2001 From: Seth Jennings Date: Mon, 29 Apr 2013 15:08:35 -0700 Subject: mm: allow for outstanding swap writeback accounting To prevent flooding the swap device with writebacks, frontswap backends need to count and limit the number of outstanding writebacks. The incrementing of the counter can be done before the call to __swap_writepage(). However, the caller must receive a notification when the writeback completes in order to decrement the counter. To achieve this functionality, this patch modifies __swap_writepage() to take the bio completion callback function as an argument. end_swap_bio_write(), the normal bio completion function, is also made non-static so that code doing the accounting can call it after the accounting is done. There should be no behavioural change to existing code. Signed-off-by: Seth Jennings Signed-off-by: Bob Liu Acked-by: Minchan Kim Reviewed-by: Dan Magenheimer Cc: Konrad Rzeszutek Wilk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 4 +++- mm/page_io.c | 9 +++++---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'mm/page_io.c') diff --git a/include/linux/swap.h b/include/linux/swap.h index 76f6c3b31235..b5b12c71a2af 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -330,7 +330,9 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) /* linux/mm/page_io.c */ extern int swap_readpage(struct page *); extern int swap_writepage(struct page *page, struct writeback_control *wbc); -extern int __swap_writepage(struct page *page, struct writeback_control *wbc); +extern void end_swap_bio_write(struct bio *bio, int err); +extern int __swap_writepage(struct page *page, struct writeback_control *wbc, + void (*end_write_func)(struct bio *, int)); extern int swap_set_page_dirty(struct page *page); extern void end_swap_bio_read(struct bio *bio, int err); diff --git a/mm/page_io.c b/mm/page_io.c index 8e6bcf176cfb..8e0e5c0e7cdb 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -42,7 +42,7 @@ static struct bio *get_swap_bio(gfp_t gfp_flags, return bio; } -static void end_swap_bio_write(struct bio *bio, int err) +void end_swap_bio_write(struct bio *bio, int err) { const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); struct page *page = bio->bi_io_vec[0].bv_page; @@ -197,12 +197,13 @@ int swap_writepage(struct page *page, struct writeback_control *wbc) end_page_writeback(page); goto out; } - ret = __swap_writepage(page, wbc); + ret = __swap_writepage(page, wbc, end_swap_bio_write); out: return ret; } -int __swap_writepage(struct page *page, struct writeback_control *wbc) +int __swap_writepage(struct page *page, struct writeback_control *wbc, + void (*end_write_func)(struct bio *, int)) { struct bio *bio; int ret = 0, rw = WRITE; @@ -234,7 +235,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc) return ret; } - bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write); + bio = get_swap_bio(GFP_NOIO, page, end_write_func); if (bio == NULL) { set_page_dirty(page); unlock_page(page); -- cgit v1.2.3 From 2d30d31ea3c5be426ce25607b9bd1835acb85e0a Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Mon, 29 Apr 2013 15:08:47 -0700 Subject: swap: redirty page if page write fails on swap file Since commit 62c230bc1790 ("mm: add support for a filesystem to activate swap files and use direct_IO for writing swap pages"), swap_writepage() calls direct_IO on swap files. However, in that case the page isn't redirtied if I/O fails, and is therefore handled afterwards as if it has been successfully written to the swap file, leading to memory corruption when the page is eventually swapped back in. This patch sets the page dirty when direct_IO() fails. It fixes a memory corruption that happened while using swap-over-NFS. Signed-off-by: Jerome Marchand Acked-by: Johannes Weiner Acked-by: Mel Gorman Cc: Hugh Dickins Cc: [3.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'mm/page_io.c') diff --git a/mm/page_io.c b/mm/page_io.c index 8e0e5c0e7cdb..eb3300fa89dc 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -231,6 +231,8 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, if (ret == PAGE_SIZE) { count_vm_event(PSWPOUT); ret = 0; + } else { + set_page_dirty(page); } return ret; } -- cgit v1.2.3 From 0cdc444a67ccdbd58bfbcba865cb17a9f17a7691 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Mon, 29 Apr 2013 15:08:48 -0700 Subject: mm: swap: mark swap pages writeback before queueing for direct IO As pointed out by Andrew Morton, the swap-over-NFS writeback is not setting PageWriteback before it is queued for direct IO. While swap pages do not participate in BDI or process dirty accounting and the IO is synchronous, the writeback bit is still required and not setting it in this case was an oversight. swapoff depends on the page writeback to synchronoise all pending writes on a swap page before it is reused. Swapcache freeing and reuse depend on checking the PageWriteback under lock to ensure the page is safe to reuse. Direct IO handlers and the direct IO handler for NFS do not deal with PageWriteback as they are synchronous writes. In the case of NFS, it schedules pages (or a page in the case of swap) for IO and then waits synchronously for IO to complete in nfs_direct_write(). It is recognised that this is a slowdown from normal swap handling which is asynchronous and uses a completion handler. Shoving PageWriteback handling down into direct IO handlers looks like a bad fit to handle the swap case although it may have to be dealt with some day if swap is converted to use direct IO in general and bmap is finally done away with. At that point it will be necessary to refit asynchronous direct IO with completion handlers onto the swap subsystem. As swapcache currently depends on PageWriteback to protect against races, this patch sets PageWriteback under the page lock before queueing it for direct IO. It is cleared when the direct IO handler returns. IO errors are treated similarly to the direct-to-bio case except PageError is not set as in the case of swap-over-NFS, it is likely to be a transient error. It was asked what prevents such a page being reclaimed in parallel. With this patch applied, such a page will now be skipped (most of the time) or blocked until the writeback completes. Reclaim checks PageWriteback under the page lock before calling try_to_free_swap and the page lock should prevent the page being requeued for IO before it is freed. This and Jerome's related patch should considered for -stable as far back as 3.6 when swap-over-NFS was introduced. [akpm@linux-foundation.org: use pr_err_ratelimited()] [akpm@linux-foundation.org: remove hopefully-unneeded cast in printk] Signed-off-by: Mel Gorman Cc: Jerome Marchand Cc: Hugh Dickins Cc: [3.6+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_io.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'mm/page_io.c') diff --git a/mm/page_io.c b/mm/page_io.c index eb3300fa89dc..bb5d75274686 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -223,6 +223,7 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, kiocb.ki_left = PAGE_SIZE; kiocb.ki_nbytes = PAGE_SIZE; + set_page_writeback(page); unlock_page(page); ret = mapping->a_ops->direct_IO(KERNEL_WRITE, &kiocb, &iov, @@ -232,8 +233,22 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, count_vm_event(PSWPOUT); ret = 0; } else { + /* + * In the case of swap-over-nfs, this can be a + * temporary failure if the system has limited + * memory for allocating transmit buffers. + * Mark the page dirty and avoid + * rotate_reclaimable_page but rate-limit the + * messages but do not flag PageError like + * the normal direct-to-bio case as it could + * be temporary. + */ set_page_dirty(page); + ClearPageReclaim(page); + pr_err_ratelimited("Write error on dio swapfile (%Lu)\n", + page_file_offset(page)); } + end_page_writeback(page); return ret; } -- cgit v1.2.3