diff options
author | Christoph Hellwig <hch@lst.de> | 2025-04-28 11:40:06 -0700 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-05-02 07:59:29 +0200 |
commit | 94d9c3366e4d7a075072854b372ade5498e69995 (patch) | |
tree | b089f46bc9ababe5f5aa75292a5f40ba92d3e85b | |
parent | d6989af0cb1b241a4c21987ba12277d3e718a2d7 (diff) |
xfs: flush inodegc before swapon
Commit 2d873efd174bae9005776937d5ac6a96050266db upstream
Fix the brand new xfstest that tries to swapon on a recently unshared
file and use the chance to document the other bit of magic in this
function.
The big comment is taken from a mailinglist post by Dave Chinner.
Fixes: 5e672cd69f0a53 ("xfs: introduce xfs_inodegc_push()")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Carlos Maiolino <cem@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | fs/xfs/xfs_aops.c | 35 |
1 files changed, 34 insertions, 1 deletions
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index d2c7be12f566..ba6b4a180e80 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -19,6 +19,7 @@ #include "xfs_reflink.h" #include "xfs_errortag.h" #include "xfs_error.h" +#include "xfs_icache.h" struct xfs_writepage_ctx { struct iomap_writepage_ctx ctx; @@ -533,7 +534,39 @@ xfs_vm_swap_activate( struct file *swap_file, sector_t *span) { - sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; + struct xfs_inode *ip = XFS_I(file_inode(swap_file)); + + /* + * Swap file activation can race against concurrent shared extent + * removal in files that have been cloned. If this happens, + * iomap_swapfile_iter() can fail because it encountered a shared + * extent even though an operation is in progress to remove those + * shared extents. + * + * This race becomes problematic when we defer extent removal + * operations beyond the end of a syscall (i.e. use async background + * processing algorithms). Users think the extents are no longer + * shared, but iomap_swapfile_iter() still sees them as shared + * because the refcountbt entries for the extents being removed have + * not yet been updated. Hence the swapon call fails unexpectedly. + * + * The race condition is currently most obvious from the unlink() + * operation as extent removal is deferred until after the last + * reference to the inode goes away. We then process the extent + * removal asynchronously, hence triggers the "syscall completed but + * work not done" condition mentioned above. To close this race + * window, we need to flush any pending inodegc operations to ensure + * they have updated the refcountbt records before we try to map the + * swapfile. + */ + xfs_inodegc_flush(ip->i_mount); + + /* + * Direct the swap code to the correct block device when this file + * sits on the RT device. + */ + sis->bdev = xfs_inode_buftarg(ip)->bt_bdev; + return iomap_swapfile_activate(sis, swap_file, span, &xfs_read_iomap_ops); } |