summaryrefslogtreecommitdiff
path: root/fs/btrfs/transaction.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2025-04-28 10:52:57 -0400
committerDavid Sterba <dsterba@suse.com>2025-05-15 14:30:50 +0200
commit5e121ae687b8c16294230d69ea16a49a08d5e332 (patch)
treeb9a65717c4b59bf34b8b479c125536102070f071 /fs/btrfs/transaction.c
parent4bc0a3cb75c264e945e08dcc590e3c722a3ee58f (diff)
btrfs: use buffer xarray for extent buffer writeback operations
Currently we have this ugly back and forth with the btree writeback where we find the folio, find the eb associated with that folio, and then attempt to writeback. This results in two different paths for subpage ebs and >= page size ebs. Clean this up by adding our own infrastructure around looking up tagged ebs and writing the ebs out directly. This allows us to unify the subpage and >= pagesize IO paths, resulting in a much cleaner writeback path for extent buffers. I ran this through fsperf on a VM with 8 CPUs and 16GiB of RAM. I used smallfiles100k, but reduced the files to 1k to make it run faster, the results are as follows, with the statistically significant improvements marked with *, there were no regressions. fsperf was run with -n 10 for both runs, so the baseline is the average 10 runs and the test is the average of 10 runs. smallfiles100k results metric baseline current stdev diff ================================================================================ avg_commit_ms 68.58 58.44 3.35 -14.79% * commits 270.60 254.70 16.24 -5.88% dev_read_iops 48 48 0 0.00% dev_read_kbytes 1044 1044 0 0.00% dev_write_iops 866117.90 850028.10 14292.20 -1.86% dev_write_kbytes 10939976.40 10605701.20 351330.32 -3.06% elapsed 49.30 33 1.64 -33.06% * end_state_mount_ns 41251498.80 35773220.70 2531205.32 -13.28% * end_state_umount_ns 1.90e+09 1.50e+09 14186226.85 -21.38% * max_commit_ms 139 111.60 9.72 -19.71% * sys_cpu 4.90 3.86 0.88 -21.29% write_bw_bytes 42935768.20 64318451.10 1609415.05 49.80% * write_clat_ns_mean 366431.69 243202.60 14161.98 -33.63% * write_clat_ns_p50 49203.20 20992 264.40 -57.34% * write_clat_ns_p99 827392 653721.60 65904.74 -20.99% * write_io_kbytes 2035940 2035940 0 0.00% write_iops 10482.37 15702.75 392.92 49.80% * write_lat_ns_max 1.01e+08 90516129 3910102.06 -10.29% * write_lat_ns_mean 366556.19 243308.48 14154.51 -33.62% * As you can see we get about a 33% decrease runtime, with a 50% throughput increase, which is pretty significant. Reviewed-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/transaction.c')
-rw-r--r--fs/btrfs/transaction.c5
1 files changed, 2 insertions, 3 deletions
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 665c36f6e8172..b06254aba8de2 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -1155,7 +1155,7 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
if (!ret)
ret = filemap_fdatawrite_range(mapping, start, end);
if (!ret && wait_writeback)
- ret = filemap_fdatawait_range(mapping, start, end);
+ btrfs_btree_wait_writeback_range(fs_info, start, end);
btrfs_free_extent_state(cached_state);
if (ret)
break;
@@ -1175,7 +1175,6 @@ int btrfs_write_marked_extents(struct btrfs_fs_info *fs_info,
static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
struct extent_io_tree *dirty_pages)
{
- struct address_space *mapping = fs_info->btree_inode->i_mapping;
struct extent_state *cached_state = NULL;
u64 start = 0;
u64 end;
@@ -1196,7 +1195,7 @@ static int __btrfs_wait_marked_extents(struct btrfs_fs_info *fs_info,
if (ret == -ENOMEM)
ret = 0;
if (!ret)
- ret = filemap_fdatawait_range(mapping, start, end);
+ btrfs_btree_wait_writeback_range(fs_info, start, end);
btrfs_free_extent_state(cached_state);
if (ret)
break;