From c000dfec7e88cee660cbc594c9716ecc979dc1f1 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 29 Mar 2023 17:49:38 +0200 Subject: ext4: Drop special handling of journalled data from extent shifting operations Now that filemap_write_and_wait() makes sure pages with journalled data are safely on disk, ext4_collapse_range() and ext4_insert_range() do not need special handling of journalled data. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230329154950.19720-7-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 3559ea6b0781..0b622ae29a73 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5290,13 +5290,6 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len) punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); - /* Call ext4_force_commit to flush all data in case of data=journal. */ - if (ext4_should_journal_data(inode)) { - ret = ext4_force_commit(inode->i_sb); - if (ret) - return ret; - } - inode_lock(inode); /* * There is no need to overlap collapse range with EOF, in which case @@ -5443,13 +5436,6 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len) offset_lblk = offset >> EXT4_BLOCK_SIZE_BITS(sb); len_lblk = len >> EXT4_BLOCK_SIZE_BITS(sb); - /* Call ext4_force_commit to flush all data in case of data=journal */ - if (ext4_should_journal_data(inode)) { - ret = ext4_force_commit(inode->i_sb); - if (ret) - return ret; - } - inode_lock(inode); /* Currently just for extent based files */ if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { -- cgit v1.2.3 From 783ae448b7a21ca59ffe5bc261c17d9c3ebfe2ad Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 29 Mar 2023 17:49:39 +0200 Subject: ext4: Fix special handling of journalled data from extent zeroing The handling of journalled data in ext4_zero_range() is incomplete. We do not need to commit running transaction but we rather need to checkpoint pages with journalled data. If we don't, journal tail can be advanced beyond transaction containing the journalled data and if we then crash before committing the transaction doing the zeroing we will have inconsistent (too old) data in the file. Make sure file pages with journalled data are properly checkpointed before removing them from the page cache. Signed-off-by: Jan Kara Link: https://lore.kernel.org/r/20230329154950.19720-8-jack@suse.cz Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 0b622ae29a73..e79c767cc5e0 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -4526,13 +4526,6 @@ static long ext4_zero_range(struct file *file, loff_t offset, trace_ext4_zero_range(inode, offset, len, mode); - /* Call ext4_force_commit to flush all data in case of data=journal. */ - if (ext4_should_journal_data(inode)) { - ret = ext4_force_commit(inode->i_sb); - if (ret) - return ret; - } - /* * Round up offset. This is not fallocate, we need to zero out * blocks, so convert interior block aligned part of the range to @@ -4616,6 +4609,20 @@ static long ext4_zero_range(struct file *file, loff_t offset, filemap_invalidate_unlock(mapping); goto out_mutex; } + + /* + * For journalled data we need to write (and checkpoint) pages + * before discarding page cache to avoid inconsitent data on + * disk in case of crash before zeroing trans is committed. + */ + if (ext4_should_journal_data(inode)) { + ret = filemap_write_and_wait_range(mapping, start, end); + if (ret) { + filemap_invalidate_unlock(mapping); + goto out_mutex; + } + } + /* Now release the pages and zero block aligned part of pages */ truncate_pagecache_range(inode, start, end - 1); inode->i_mtime = inode->i_ctime = current_time(inode); -- cgit v1.2.3 From 835659598c67907b98cd2aa57bb951dfaf675c69 Mon Sep 17 00:00:00 2001 From: Ye Bin Date: Thu, 6 Apr 2023 11:16:27 +0000 Subject: ext4: fix use-after-free read in ext4_find_extent for bigalloc + inline Syzbot found the following issue: loop0: detected capacity change from 0 to 2048 EXT4-fs (loop0): mounted filesystem 00000000-0000-0000-0000-000000000000 without journal. Quota mode: none. ================================================================== BUG: KASAN: use-after-free in ext4_ext_binsearch_idx fs/ext4/extents.c:768 [inline] BUG: KASAN: use-after-free in ext4_find_extent+0x76e/0xd90 fs/ext4/extents.c:931 Read of size 4 at addr ffff888073644750 by task syz-executor420/5067 CPU: 0 PID: 5067 Comm: syz-executor420 Not tainted 6.2.0-rc1-syzkaller #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 10/26/2022 Call Trace: __dump_stack lib/dump_stack.c:88 [inline] dump_stack_lvl+0x1b1/0x290 lib/dump_stack.c:106 print_address_description+0x74/0x340 mm/kasan/report.c:306 print_report+0x107/0x1f0 mm/kasan/report.c:417 kasan_report+0xcd/0x100 mm/kasan/report.c:517 ext4_ext_binsearch_idx fs/ext4/extents.c:768 [inline] ext4_find_extent+0x76e/0xd90 fs/ext4/extents.c:931 ext4_clu_mapped+0x117/0x970 fs/ext4/extents.c:5809 ext4_insert_delayed_block fs/ext4/inode.c:1696 [inline] ext4_da_map_blocks fs/ext4/inode.c:1806 [inline] ext4_da_get_block_prep+0x9e8/0x13c0 fs/ext4/inode.c:1870 ext4_block_write_begin+0x6a8/0x2290 fs/ext4/inode.c:1098 ext4_da_write_begin+0x539/0x760 fs/ext4/inode.c:3082 generic_perform_write+0x2e4/0x5e0 mm/filemap.c:3772 ext4_buffered_write_iter+0x122/0x3a0 fs/ext4/file.c:285 ext4_file_write_iter+0x1d0/0x18f0 call_write_iter include/linux/fs.h:2186 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x7dc/0xc50 fs/read_write.c:584 ksys_write+0x177/0x2a0 fs/read_write.c:637 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x3d/0xb0 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd RIP: 0033:0x7f4b7a9737b9 RSP: 002b:00007ffc5cac3668 EFLAGS: 00000246 ORIG_RAX: 0000000000000001 RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f4b7a9737b9 RDX: 00000000175d9003 RSI: 0000000020000200 RDI: 0000000000000004 RBP: 00007f4b7a933050 R08: 0000000000000000 R09: 0000000000000000 R10: 000000000000079f R11: 0000000000000246 R12: 00007f4b7a9330e0 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Above issue is happens when enable bigalloc and inline data feature. As commit 131294c35ed6 fixed delayed allocation bug in ext4_clu_mapped for bigalloc + inline. But it only resolved issue when has inline data, if inline data has been converted to extent(ext4_da_convert_inline_data_to_extent) before writepages, there is no EXT4_STATE_MAY_INLINE_DATA flag. However i_data is still store inline data in this scene. Then will trigger UAF when find extent. To resolve above issue, there is need to add judge "ext4_has_inline_data(inode)" in ext4_clu_mapped(). Fixes: 131294c35ed6 ("ext4: fix delayed allocation bug in ext4_clu_mapped for bigalloc + inline") Reported-by: syzbot+bf4bb7731ef73b83a3b4@syzkaller.appspotmail.com Reviewed-by: Jan Kara Reviewed-by: Ye Bin Reviewed-by: Tudor Ambarus Tested-by: Tudor Ambarus Link: https://lore.kernel.org/r/20230406111627.1916759-1-tudor.ambarus@linaro.org Signed-off-by: Theodore Ts'o --- fs/ext4/extents.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/ext4/extents.c') diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index e79c767cc5e0..35703dce23a3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -5795,7 +5795,8 @@ int ext4_clu_mapped(struct inode *inode, ext4_lblk_t lclu) * mapped - no physical clusters have been allocated, and the * file has no extents */ - if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) + if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) || + ext4_has_inline_data(inode)) return 0; /* search for the extent closest to the first block in the cluster */ -- cgit v1.2.3