diff options
author | Zhang Yi <yi.zhang@huawei.com> | 2025-07-07 22:08:04 +0800 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-08-23 16:49:41 +0200 |
commit | dc3588c04debe4cfe026797c43b830bd65a8a3ba (patch) | |
tree | 5a548f4911492b5f540fb028f60aa710d19232e3 | |
parent | 25bf10be219d37d2fb221c93816a913f5f735530 (diff) |
ext4: process folios writeback in bytes
commit 1bfe6354e0975fe89c3d25e81b6546d205556a4b upstream.
Since ext4 supports large folios, processing writebacks in pages is no
longer appropriate, it can be modified to process writebacks in bytes.
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://patch.msgid.link/20250707140814.542883-2-yi.zhang@huaweicloud.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | fs/ext4/inode.c | 70 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 13 |
2 files changed, 42 insertions, 41 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 0f316632b8dd..d712986c44da 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1668,11 +1668,12 @@ struct mpage_da_data { unsigned int can_map:1; /* Can writepages call map blocks? */ /* These are internal state of ext4_do_writepages() */ - pgoff_t first_page; /* The first page to write */ - pgoff_t next_page; /* Current page to examine */ - pgoff_t last_page; /* Last page to examine */ + loff_t start_pos; /* The start pos to write */ + loff_t next_pos; /* Current pos to examine */ + loff_t end_pos; /* Last pos to examine */ + /* - * Extent to map - this can be after first_page because that can be + * Extent to map - this can be after start_pos because that can be * fully mapped. We somewhat abuse m_flags to store whether the extent * is delalloc or unwritten. */ @@ -1692,38 +1693,38 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd, struct inode *inode = mpd->inode; struct address_space *mapping = inode->i_mapping; - /* This is necessary when next_page == 0. */ - if (mpd->first_page >= mpd->next_page) + /* This is necessary when next_pos == 0. */ + if (mpd->start_pos >= mpd->next_pos) return; mpd->scanned_until_end = 0; - index = mpd->first_page; - end = mpd->next_page - 1; if (invalidate) { ext4_lblk_t start, last; - start = index << (PAGE_SHIFT - inode->i_blkbits); - last = end << (PAGE_SHIFT - inode->i_blkbits); + start = EXT4_B_TO_LBLK(inode, mpd->start_pos); + last = mpd->next_pos >> inode->i_blkbits; /* * avoid racing with extent status tree scans made by * ext4_insert_delayed_block() */ down_write(&EXT4_I(inode)->i_data_sem); - ext4_es_remove_extent(inode, start, last - start + 1); + ext4_es_remove_extent(inode, start, last - start); up_write(&EXT4_I(inode)->i_data_sem); } folio_batch_init(&fbatch); - while (index <= end) { - nr = filemap_get_folios(mapping, &index, end, &fbatch); + index = mpd->start_pos >> PAGE_SHIFT; + end = mpd->next_pos >> PAGE_SHIFT; + while (index < end) { + nr = filemap_get_folios(mapping, &index, end - 1, &fbatch); if (nr == 0) break; for (i = 0; i < nr; i++) { struct folio *folio = fbatch.folios[i]; - if (folio->index < mpd->first_page) + if (folio_pos(folio) < mpd->start_pos) continue; - if (folio_next_index(folio) - 1 > end) + if (folio_next_index(folio) > end) continue; BUG_ON(!folio_test_locked(folio)); BUG_ON(folio_test_writeback(folio)); @@ -2025,7 +2026,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, static void mpage_folio_done(struct mpage_da_data *mpd, struct folio *folio) { - mpd->first_page += folio_nr_pages(folio); + mpd->start_pos += folio_size(folio); folio_unlock(folio); } @@ -2035,7 +2036,7 @@ static int mpage_submit_folio(struct mpage_da_data *mpd, struct folio *folio) loff_t size; int err; - BUG_ON(folio->index != mpd->first_page); + WARN_ON_ONCE(folio_pos(folio) != mpd->start_pos); folio_clear_dirty_for_io(folio); /* * We have to be very careful here! Nothing protects writeback path @@ -2447,7 +2448,7 @@ update_disksize: * Update on-disk size after IO is submitted. Races with * truncate are avoided by checking i_size under i_data_sem. */ - disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT; + disksize = mpd->start_pos; if (disksize > READ_ONCE(EXT4_I(inode)->i_disksize)) { int err2; loff_t i_size; @@ -2550,8 +2551,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) struct address_space *mapping = mpd->inode->i_mapping; struct folio_batch fbatch; unsigned int nr_folios; - pgoff_t index = mpd->first_page; - pgoff_t end = mpd->last_page; + pgoff_t index = mpd->start_pos >> PAGE_SHIFT; + pgoff_t end = mpd->end_pos >> PAGE_SHIFT; xa_mark_t tag; int i, err = 0; int blkbits = mpd->inode->i_blkbits; @@ -2566,7 +2567,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) tag = PAGECACHE_TAG_DIRTY; mpd->map.m_len = 0; - mpd->next_page = index; + mpd->next_pos = mpd->start_pos; if (ext4_should_journal_data(mpd->inode)) { handle = ext4_journal_start(mpd->inode, EXT4_HT_WRITE_PAGE, bpp); @@ -2597,7 +2598,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) goto out; /* If we can't merge this page, we are done. */ - if (mpd->map.m_len > 0 && mpd->next_page != folio->index) + if (mpd->map.m_len > 0 && + mpd->next_pos != folio_pos(folio)) goto out; if (handle) { @@ -2643,8 +2645,8 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd) } if (mpd->map.m_len == 0) - mpd->first_page = folio->index; - mpd->next_page = folio_next_index(folio); + mpd->start_pos = folio_pos(folio); + mpd->next_pos = folio_pos(folio) + folio_size(folio); /* * Writeout when we cannot modify metadata is simple. * Just submit the page. For data=journal mode we @@ -2787,18 +2789,18 @@ static int ext4_do_writepages(struct mpage_da_data *mpd) writeback_index = mapping->writeback_index; if (writeback_index) cycled = 0; - mpd->first_page = writeback_index; - mpd->last_page = -1; + mpd->start_pos = writeback_index << PAGE_SHIFT; + mpd->end_pos = LLONG_MAX; } else { - mpd->first_page = wbc->range_start >> PAGE_SHIFT; - mpd->last_page = wbc->range_end >> PAGE_SHIFT; + mpd->start_pos = wbc->range_start; + mpd->end_pos = wbc->range_end; } ext4_io_submit_init(&mpd->io_submit, wbc); retry: if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) - tag_pages_for_writeback(mapping, mpd->first_page, - mpd->last_page); + tag_pages_for_writeback(mapping, mpd->start_pos >> PAGE_SHIFT, + mpd->end_pos >> PAGE_SHIFT); blk_start_plug(&plug); /* @@ -2858,7 +2860,7 @@ retry: } mpd->do_map = 1; - trace_ext4_da_write_pages(inode, mpd->first_page, wbc); + trace_ext4_da_write_pages(inode, mpd->start_pos, wbc); ret = mpage_prepare_extent_to_map(mpd); if (!ret && mpd->map.m_len) ret = mpage_map_and_submit_extent(handle, mpd, @@ -2915,8 +2917,8 @@ unplug: blk_finish_plug(&plug); if (!ret && !cycled && wbc->nr_to_write > 0) { cycled = 1; - mpd->last_page = writeback_index - 1; - mpd->first_page = 0; + mpd->end_pos = (writeback_index << PAGE_SHIFT) - 1; + mpd->start_pos = 0; goto retry; } @@ -2926,7 +2928,7 @@ unplug: * Set the writeback_index so that range_cyclic * mode will write it back later */ - mapping->writeback_index = mpd->first_page; + mapping->writeback_index = mpd->start_pos >> PAGE_SHIFT; out_writepages: trace_ext4_writepages_result(inode, wbc, ret, diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 156908641e68..62d52997b5c6 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -483,15 +483,15 @@ TRACE_EVENT(ext4_writepages, ); TRACE_EVENT(ext4_da_write_pages, - TP_PROTO(struct inode *inode, pgoff_t first_page, + TP_PROTO(struct inode *inode, loff_t start_pos, struct writeback_control *wbc), - TP_ARGS(inode, first_page, wbc), + TP_ARGS(inode, start_pos, wbc), TP_STRUCT__entry( __field( dev_t, dev ) __field( ino_t, ino ) - __field( pgoff_t, first_page ) + __field( loff_t, start_pos ) __field( long, nr_to_write ) __field( int, sync_mode ) ), @@ -499,15 +499,14 @@ TRACE_EVENT(ext4_da_write_pages, TP_fast_assign( __entry->dev = inode->i_sb->s_dev; __entry->ino = inode->i_ino; - __entry->first_page = first_page; + __entry->start_pos = start_pos; __entry->nr_to_write = wbc->nr_to_write; __entry->sync_mode = wbc->sync_mode; ), - TP_printk("dev %d,%d ino %lu first_page %lu nr_to_write %ld " - "sync_mode %d", + TP_printk("dev %d,%d ino %lu start_pos 0x%llx nr_to_write %ld sync_mode %d", MAJOR(__entry->dev), MINOR(__entry->dev), - (unsigned long) __entry->ino, __entry->first_page, + (unsigned long) __entry->ino, __entry->start_pos, __entry->nr_to_write, __entry->sync_mode) ); |