From 15fc0bec883c95007a4901fe75f247bd0ca21651 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 20 Jul 2025 07:56:48 +0930 Subject: btrfs: make btrfs_cleanup_ordered_extents() support large folios When hitting a large folio, btrfs_cleanup_ordered_extents() will get the same large folio multiple times, and clearing the same range again and again. Thankfully this is not causing anything wrong, just inefficiency. This is caused by the fact that we're iterating folios using the old page index, thus can hit the same large folio again and again. Enhance it by increasing @index to the index of the folio end, and only increase @index by 1 if we failed to grab a folio. Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b77dd22b8cdb..a2de289e662b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -401,10 +401,12 @@ static inline void btrfs_cleanup_ordered_extents(struct btrfs_inode *inode, while (index <= end_index) { folio = filemap_get_folio(inode->vfs_inode.i_mapping, index); - index++; - if (IS_ERR(folio)) + if (IS_ERR(folio)) { + index++; continue; + } + index = folio_end(folio) >> PAGE_SHIFT; /* * Here we just clear all Ordered bits for every page in the * range, then btrfs_mark_ordered_io_finished() will handle -- cgit v1.2.3 From deaf895212da74635a7f0a420e1ecf8f5eca1fe5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Sun, 20 Jul 2025 15:01:39 +0930 Subject: btrfs: fix wrong length parameter for btrfs_cleanup_ordered_extents() Inside nocow_one_range(), if the checksum cloning for data reloc inode failed, we call btrfs_cleanup_ordered_extents() to cleanup the just allocated ordered extents. But unlike extent_clear_unlock_delalloc(), btrfs_cleanup_ordered_extents() requires a length, not an inclusive end bytenr. This can be problematic, as the @end is normally way larger than @len. This means btrfs_cleanup_ordered_extents() can be called on folios out of the correct range, and if the out-of-range folio is under writeback, we can incorrectly clear the ordered flag of the folio, and trigger the DEBUG_WARN() inside btrfs_writepage_cow_fixup(). Fix the wrong parameter with correct length instead. Fixes: 94f6c5c17e52 ("btrfs: move ordered extent cleanup to where they are allocated") CC: stable@vger.kernel.org # 6.15+ Reviewed-by: Boris Burkov Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a2de289e662b..d740910e071a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2015,7 +2015,7 @@ static int nocow_one_range(struct btrfs_inode *inode, struct folio *locked_folio * cleaered by the caller. */ if (ret < 0) - btrfs_cleanup_ordered_extents(inode, file_pos, end); + btrfs_cleanup_ordered_extents(inode, file_pos, len); return ret; } -- cgit v1.2.3 From f022499f24e520706b9a8238746e1cacc37eb4e0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 1 Aug 2025 16:39:49 +0100 Subject: btrfs: do not set mtime/ctime to current time when unlinking for log replay If we are doing an unlink for log replay, we are updating the directory's mtime and ctime to the current time, and this is incorrect since it should stay with the mtime and ctime that were set when the directory was logged. This is the same as when adding a link to an inode during log replay (with btrfs_add_link()), where we want the mtime and ctime to be the values that were in place when the inode was logged. This was found with generic/547 using LOAD_FACTOR=20 and TIME_FACTOR=20, where due to large log trees we have longer log replay times and fssum could detect a mismatch of the mtime and ctime of a directory. Fix this by skipping the mtime and ctime update at __btrfs_unlink_inode() if we are in log replay context (just like btrfs_add_link()). Reviewed-by: Boris Burkov Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/inode.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d740910e071a..9e4aec7330cb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4189,6 +4189,23 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, return ret; } +static void update_time_after_link_or_unlink(struct btrfs_inode *dir) +{ + struct timespec64 now; + + /* + * If we are replaying a log tree, we do not want to update the mtime + * and ctime of the parent directory with the current time, since the + * log replay procedure is responsible for setting them to their correct + * values (the ones it had when the fsync was done). + */ + if (test_bit(BTRFS_FS_LOG_RECOVERING, &dir->root->fs_info->flags)) + return; + + now = inode_set_ctime_current(&dir->vfs_inode); + inode_set_mtime_to_ts(&dir->vfs_inode, now); +} + /* * unlink helper that gets used here in inode.c and in the tree logging * recovery code. It remove a link in a directory with a given name, and @@ -4289,7 +4306,7 @@ skip_backref: inode_inc_iversion(&inode->vfs_inode); inode_set_ctime_current(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); - inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode)); + update_time_after_link_or_unlink(dir); return btrfs_update_inode(trans, dir); } @@ -6683,15 +6700,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size + name->len * 2); inode_inc_iversion(&parent_inode->vfs_inode); - /* - * If we are replaying a log tree, we do not want to update the mtime - * and ctime of the parent directory with the current time, since the - * log replay procedure is responsible for setting them to their correct - * values (the ones it had when the fsync was done). - */ - if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) - inode_set_mtime_to_ts(&parent_inode->vfs_inode, - inode_set_ctime_current(&parent_inode->vfs_inode)); + update_time_after_link_or_unlink(parent_inode); ret = btrfs_update_inode(trans, parent_inode); if (ret) -- cgit v1.2.3