diff options
Diffstat (limited to 'fs')
40 files changed, 359 insertions, 180 deletions
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 1d847a939f29..180a458fc4f7 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -129,7 +129,7 @@ struct inode *anon_inode_make_secure_inode(struct super_block *sb, const char *n } return inode; } -EXPORT_SYMBOL_GPL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); +EXPORT_SYMBOL_FOR_MODULES(anon_inode_make_secure_inode, "kvm"); static struct file *__anon_inode_getfile(const char *name, const struct file_operations *fops, diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f23d75986947..c953297aa89a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1512,7 +1512,7 @@ out: /* * Return 0 if we have submitted or queued the sector for submission. - * Return <0 for critical errors. + * Return <0 for critical errors, and the sector will have its dirty flag cleared. * * Caller should make sure filepos < i_size and handle filepos >= i_size case. */ @@ -1535,8 +1535,17 @@ static int submit_one_sector(struct btrfs_inode *inode, ASSERT(filepos < i_size); em = btrfs_get_extent(inode, NULL, filepos, sectorsize); - if (IS_ERR(em)) + if (IS_ERR(em)) { + /* + * When submission failed, we should still clear the folio dirty. + * Or the folio will be written back again but without any + * ordered extent. + */ + btrfs_folio_clear_dirty(fs_info, folio, filepos, sectorsize); + btrfs_folio_set_writeback(fs_info, folio, filepos, sectorsize); + btrfs_folio_clear_writeback(fs_info, folio, filepos, sectorsize); return PTR_ERR(em); + } extent_offset = filepos - em->start; em_end = btrfs_extent_map_end(em); @@ -1609,8 +1618,12 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, folio_unlock(folio); return 1; } - if (ret < 0) + if (ret < 0) { + btrfs_folio_clear_dirty(fs_info, folio, start, len); + btrfs_folio_set_writeback(fs_info, folio, start, len); + btrfs_folio_clear_writeback(fs_info, folio, start, len); return ret; + } for (cur = start; cur < start + len; cur += fs_info->sectorsize) set_bit((cur - folio_start) >> fs_info->sectorsize_bits, &range_bitmap); @@ -1666,8 +1679,8 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode, * Here we set writeback and clear for the range. If the full folio * is no longer dirty then we clear the PAGECACHE_TAG_DIRTY tag. * - * If we hit any error, the corresponding sector will still be dirty - * thus no need to clear PAGECACHE_TAG_DIRTY. + * If we hit any error, the corresponding sector will have its dirty + * flag cleared and writeback finished, thus no need to handle the error case. */ if (!submitted_io && !error) { btrfs_folio_set_writeback(fs_info, folio, start, len); @@ -1813,6 +1826,7 @@ static noinline_for_stack bool lock_extent_buffer_for_io(struct extent_buffer *e xas_load(&xas); xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK); xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY); + xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); xas_unlock_irqrestore(&xas, flags); btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d740910e071a..9e4aec7330cb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4189,6 +4189,23 @@ int btrfs_update_inode_fallback(struct btrfs_trans_handle *trans, return ret; } +static void update_time_after_link_or_unlink(struct btrfs_inode *dir) +{ + struct timespec64 now; + + /* + * If we are replaying a log tree, we do not want to update the mtime + * and ctime of the parent directory with the current time, since the + * log replay procedure is responsible for setting them to their correct + * values (the ones it had when the fsync was done). + */ + if (test_bit(BTRFS_FS_LOG_RECOVERING, &dir->root->fs_info->flags)) + return; + + now = inode_set_ctime_current(&dir->vfs_inode); + inode_set_mtime_to_ts(&dir->vfs_inode, now); +} + /* * unlink helper that gets used here in inode.c and in the tree logging * recovery code. It remove a link in a directory with a given name, and @@ -4289,7 +4306,7 @@ skip_backref: inode_inc_iversion(&inode->vfs_inode); inode_set_ctime_current(&inode->vfs_inode); inode_inc_iversion(&dir->vfs_inode); - inode_set_mtime_to_ts(&dir->vfs_inode, inode_set_ctime_current(&dir->vfs_inode)); + update_time_after_link_or_unlink(dir); return btrfs_update_inode(trans, dir); } @@ -6683,15 +6700,7 @@ int btrfs_add_link(struct btrfs_trans_handle *trans, btrfs_i_size_write(parent_inode, parent_inode->vfs_inode.i_size + name->len * 2); inode_inc_iversion(&parent_inode->vfs_inode); - /* - * If we are replaying a log tree, we do not want to update the mtime - * and ctime of the parent directory with the current time, since the - * log replay procedure is responsible for setting them to their correct - * values (the ones it had when the fsync was done). - */ - if (!test_bit(BTRFS_FS_LOG_RECOVERING, &root->fs_info->flags)) - inode_set_mtime_to_ts(&parent_inode->vfs_inode, - inode_set_ctime_current(&parent_inode->vfs_inode)); + update_time_after_link_or_unlink(parent_inode); ret = btrfs_update_inode(trans, parent_inode); if (ret) diff --git a/fs/btrfs/subpage.c b/fs/btrfs/subpage.c index c9b3821957f7..cb4f97833dc3 100644 --- a/fs/btrfs/subpage.c +++ b/fs/btrfs/subpage.c @@ -448,8 +448,25 @@ void btrfs_subpage_set_writeback(const struct btrfs_fs_info *fs_info, spin_lock_irqsave(&bfs->lock, flags); bitmap_set(bfs->bitmaps, start_bit, len >> fs_info->sectorsize_bits); + + /* + * Don't clear the TOWRITE tag when starting writeback on a still-dirty + * folio. Doing so can cause WB_SYNC_ALL writepages() to overlook it, + * assume writeback is complete, and exit too early — violating sync + * ordering guarantees. + */ if (!folio_test_writeback(folio)) - folio_start_writeback(folio); + __folio_start_writeback(folio, true); + if (!folio_test_dirty(folio)) { + struct address_space *mapping = folio_mapping(folio); + XA_STATE(xas, &mapping->i_pages, folio->index); + unsigned long flags; + + xas_lock_irqsave(&xas, flags); + xas_load(&xas); + xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE); + xas_unlock_irqrestore(&xas, flags); + } spin_unlock_irqrestore(&bfs->lock, flags); } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 68e35a3700ff..a262b494a89f 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -88,6 +88,9 @@ struct btrfs_fs_context { refcount_t refs; }; +static void btrfs_emit_options(struct btrfs_fs_info *info, + struct btrfs_fs_context *old); + enum { Opt_acl, Opt_clear_cache, @@ -698,12 +701,9 @@ bool btrfs_check_options(const struct btrfs_fs_info *info, if (!test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state)) { if (btrfs_raw_test_opt(*mount_opt, SPACE_CACHE)) { - btrfs_info(info, "disk space caching is enabled"); btrfs_warn(info, "space cache v1 is being deprecated and will be removed in a future release, please use -o space_cache=v2"); } - if (btrfs_raw_test_opt(*mount_opt, FREE_SPACE_TREE)) - btrfs_info(info, "using free-space-tree"); } return ret; @@ -980,6 +980,8 @@ static int btrfs_fill_super(struct super_block *sb, return ret; } + btrfs_emit_options(fs_info, NULL); + inode = btrfs_iget(BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); if (IS_ERR(inode)) { ret = PTR_ERR(inode); @@ -1437,7 +1439,7 @@ static void btrfs_emit_options(struct btrfs_fs_info *info, { btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum"); btrfs_info_if_set(info, old, DEGRADED, "allowing degraded mounts"); - btrfs_info_if_set(info, old, NODATASUM, "setting nodatasum"); + btrfs_info_if_set(info, old, NODATACOW, "setting nodatacow"); btrfs_info_if_set(info, old, SSD, "enabling ssd optimizations"); btrfs_info_if_set(info, old, SSD_SPREAD, "using spread ssd allocation scheme"); btrfs_info_if_set(info, old, NOBARRIER, "turning off barriers"); @@ -1459,10 +1461,11 @@ static void btrfs_emit_options(struct btrfs_fs_info *info, btrfs_info_if_set(info, old, IGNOREMETACSUMS, "ignoring meta csums"); btrfs_info_if_set(info, old, IGNORESUPERFLAGS, "ignoring unknown super block flags"); + btrfs_info_if_unset(info, old, NODATASUM, "setting datasum"); btrfs_info_if_unset(info, old, NODATACOW, "setting datacow"); btrfs_info_if_unset(info, old, SSD, "not using ssd optimizations"); btrfs_info_if_unset(info, old, SSD_SPREAD, "not using spread ssd allocation scheme"); - btrfs_info_if_unset(info, old, NOBARRIER, "turning off barriers"); + btrfs_info_if_unset(info, old, NOBARRIER, "turning on barriers"); btrfs_info_if_unset(info, old, NOTREELOG, "enabling tree log"); btrfs_info_if_unset(info, old, SPACE_CACHE, "disabling disk space caching"); btrfs_info_if_unset(info, old, FREE_SPACE_TREE, "disabling free space tree"); diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index db11b5b5f0e6..ea662036f441 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -17,6 +17,7 @@ #include "accessors.h" #include "bio.h" #include "transaction.h" +#include "sysfs.h" /* Maximum number of zones to report per blkdev_report_zones() call */ #define BTRFS_REPORT_NR_ZONES 4096 @@ -42,6 +43,9 @@ /* Number of superblock log zones */ #define BTRFS_NR_SB_LOG_ZONES 2 +/* Default number of max active zones when the device has no limits. */ +#define BTRFS_DEFAULT_MAX_ACTIVE_ZONES 128 + /* * Minimum of active zones we need: * @@ -416,7 +420,10 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache) if (!IS_ALIGNED(nr_sectors, zone_sectors)) zone_info->nr_zones++; - max_active_zones = bdev_max_active_zones(bdev); + max_active_zones = min_not_zero(bdev_max_active_zones(bdev), + bdev_max_open_zones(bdev)); + if (!max_active_zones && zone_info->nr_zones > BTRFS_DEFAULT_MAX_ACTIVE_ZONES) + max_active_zones = BTRFS_DEFAULT_MAX_ACTIVE_ZONES; if (max_active_zones && max_active_zones < BTRFS_MIN_ACTIVE_ZONES) { btrfs_err(fs_info, "zoned: %s: max active zones %u is too small, need at least %u active zones", @@ -2168,10 +2175,15 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group) goto out_unlock; } - /* No space left */ - if (btrfs_zoned_bg_is_full(block_group)) { - ret = false; - goto out_unlock; + if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) { + /* The caller should check if the block group is full. */ + if (WARN_ON_ONCE(btrfs_zoned_bg_is_full(block_group))) { + ret = false; + goto out_unlock; + } + } else { + /* Since it is already written, it should have been active. */ + WARN_ON_ONCE(block_group->meta_write_pointer != block_group->start); } for (i = 0; i < map->num_stripes; i++) { @@ -2230,7 +2242,7 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group) struct btrfs_fs_info *fs_info = block_group->fs_info; const u64 end = block_group->start + block_group->length; struct extent_buffer *eb; - unsigned long index, start = (block_group->start >> fs_info->sectorsize_bits); + unsigned long index, start = (block_group->start >> fs_info->nodesize_bits); rcu_read_lock(); xa_for_each_start(&fs_info->buffer_tree, index, eb, start) { @@ -2245,6 +2257,40 @@ static void wait_eb_writebacks(struct btrfs_block_group *block_group) rcu_read_unlock(); } +static int call_zone_finish(struct btrfs_block_group *block_group, + struct btrfs_io_stripe *stripe) +{ + struct btrfs_device *device = stripe->dev; + const u64 physical = stripe->physical; + struct btrfs_zoned_device_info *zinfo = device->zone_info; + int ret; + + if (!device->bdev) + return 0; + + if (zinfo->max_active_zones == 0) + return 0; + + if (btrfs_dev_is_sequential(device, physical)) { + unsigned int nofs_flags; + + nofs_flags = memalloc_nofs_save(); + ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, + physical >> SECTOR_SHIFT, + zinfo->zone_size >> SECTOR_SHIFT); + memalloc_nofs_restore(nofs_flags); + + if (ret) + return ret; + } + + if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) + zinfo->reserved_active_zones++; + btrfs_dev_clear_active_zone(device, physical); + + return 0; +} + static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_written) { struct btrfs_fs_info *fs_info = block_group->fs_info; @@ -2329,31 +2375,12 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ down_read(&dev_replace->rwsem); map = block_group->physical_map; for (i = 0; i < map->num_stripes; i++) { - struct btrfs_device *device = map->stripes[i].dev; - const u64 physical = map->stripes[i].physical; - struct btrfs_zoned_device_info *zinfo = device->zone_info; - unsigned int nofs_flags; - - if (!device->bdev) - continue; - - if (zinfo->max_active_zones == 0) - continue; - - nofs_flags = memalloc_nofs_save(); - ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, - physical >> SECTOR_SHIFT, - zinfo->zone_size >> SECTOR_SHIFT); - memalloc_nofs_restore(nofs_flags); + ret = call_zone_finish(block_group, &map->stripes[i]); if (ret) { up_read(&dev_replace->rwsem); return ret; } - - if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) - zinfo->reserved_active_zones++; - btrfs_dev_clear_active_zone(device, physical); } up_read(&dev_replace->rwsem); @@ -2504,12 +2531,12 @@ void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg) void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info) { struct btrfs_space_info *data_sinfo = fs_info->data_sinfo; - struct btrfs_space_info *space_info = data_sinfo->sub_group[0]; + struct btrfs_space_info *space_info = data_sinfo; struct btrfs_trans_handle *trans; struct btrfs_block_group *bg; struct list_head *bg_list; u64 alloc_flags; - bool initial = false; + bool first = true; bool did_chunk_alloc = false; int index; int ret; @@ -2523,21 +2550,52 @@ void btrfs_zoned_reserve_data_reloc_bg(struct btrfs_fs_info *fs_info) if (sb_rdonly(fs_info->sb)) return; - ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags); index = btrfs_bg_flags_to_raid_index(alloc_flags); - bg_list = &data_sinfo->block_groups[index]; + /* Scan the data space_info to find empty block groups. Take the second one. */ again: + bg_list = &space_info->block_groups[index]; list_for_each_entry(bg, bg_list, list) { - if (bg->used > 0) + if (bg->alloc_offset != 0) continue; - if (!initial) { - initial = true; + if (first) { + first = false; continue; } + if (space_info == data_sinfo) { + /* Migrate the block group to the data relocation space_info. */ + struct btrfs_space_info *reloc_sinfo = data_sinfo->sub_group[0]; + int factor; + + ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); + factor = btrfs_bg_type_to_factor(bg->flags); + + down_write(&space_info->groups_sem); + list_del_init(&bg->list); + /* We can assume this as we choose the second empty one. */ + ASSERT(!list_empty(&space_info->block_groups[index])); + up_write(&space_info->groups_sem); + + spin_lock(&space_info->lock); + space_info->total_bytes -= bg->length; + space_info->disk_total -= bg->length * factor; + /* There is no allocation ever happened. */ + ASSERT(bg->used == 0); + ASSERT(bg->zone_unusable == 0); + /* No super block in a block group on the zoned setup. */ + ASSERT(bg->bytes_super == 0); + spin_unlock(&space_info->lock); + + bg->space_info = reloc_sinfo; + if (reloc_sinfo->block_group_kobjs[index] == NULL) + btrfs_sysfs_add_block_group_type(bg); + + btrfs_add_bg_to_space_info(fs_info, bg); + } + fs_info->data_reloc_bg = bg->start; set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags); btrfs_zone_activate(bg); @@ -2552,11 +2610,18 @@ again: if (IS_ERR(trans)) return; + /* Allocate new BG in the data relocation space_info. */ + space_info = data_sinfo->sub_group[0]; + ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); ret = btrfs_chunk_alloc(trans, space_info, alloc_flags, CHUNK_ALLOC_FORCE); btrfs_end_transaction(trans); if (ret == 1) { + /* + * We allocated a new block group in the data relocation space_info. We + * can take that one. + */ + first = false; did_chunk_alloc = true; - bg_list = &space_info->block_groups[index]; goto again; } } diff --git a/fs/buffer.c b/fs/buffer.c index ead4dc85debd..6a8752f7bbed 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -157,8 +157,8 @@ static void __end_buffer_read_notouch(struct buffer_head *bh, int uptodate) */ void end_buffer_read_sync(struct buffer_head *bh, int uptodate) { - __end_buffer_read_notouch(bh, uptodate); put_bh(bh); + __end_buffer_read_notouch(bh, uptodate); } EXPORT_SYMBOL(end_buffer_read_sync); diff --git a/fs/coredump.c b/fs/coredump.c index fedbead956ed..5dce257c67fc 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -345,7 +345,7 @@ static bool coredump_parse(struct core_name *cn, struct coredump_params *cprm, was_space = false; err = cn_printf(cn, "%c", '\0'); if (err) - return err; + return false; (*argv)[(*argc)++] = cn->used; } } diff --git a/fs/ext4/fsmap.c b/fs/ext4/fsmap.c index 383c6edea6dd..91185c40f755 100644 --- a/fs/ext4/fsmap.c +++ b/fs/ext4/fsmap.c @@ -393,6 +393,14 @@ static unsigned int ext4_getfsmap_find_sb(struct super_block *sb, /* Reserved GDT blocks */ if (!ext4_has_feature_meta_bg(sb) || metagroup < first_meta_bg) { len = le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); + + /* + * mkfs.ext4 can set s_reserved_gdt_blocks as 0 in some cases, + * check for that. + */ + if (!len) + return 0; + error = ext4_getfsmap_fill(meta_list, fsb, len, EXT4_FMR_OWN_RESV_GDT); if (error) @@ -526,6 +534,7 @@ static int ext4_getfsmap_datadev(struct super_block *sb, ext4_group_t end_ag; ext4_grpblk_t first_cluster; ext4_grpblk_t last_cluster; + struct ext4_fsmap irec; int error = 0; bofs = le32_to_cpu(sbi->s_es->s_first_data_block); @@ -609,10 +618,18 @@ static int ext4_getfsmap_datadev(struct super_block *sb, goto err; } - /* Report any gaps at the end of the bg */ + /* + * The dummy record below will cause ext4_getfsmap_helper() to report + * any allocated blocks at the end of the range. + */ + irec.fmr_device = 0; + irec.fmr_physical = end_fsb + 1; + irec.fmr_length = 0; + irec.fmr_owner = EXT4_FMR_OWN_FREE; + irec.fmr_flags = 0; + info->gfi_last = true; - error = ext4_getfsmap_datadev_helper(sb, end_ag, last_cluster + 1, - 0, info); + error = ext4_getfsmap_helper(sb, info, &irec); if (error) goto err; diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 7de327fa7b1c..d45124318200 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -539,7 +539,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, int indirect_blks; int blocks_to_boundary = 0; int depth; - int count = 0; + u64 count = 0; ext4_fsblk_t first_block = 0; trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); @@ -588,7 +588,7 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, count++; /* Fill in size of a hole we found */ map->m_pblk = 0; - map->m_len = min_t(unsigned int, map->m_len, count); + map->m_len = umin(map->m_len, count); goto cleanup; } diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ed54c4d0f2f9..5b7a15db4953 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -146,7 +146,7 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode, */ int ext4_inode_is_fast_symlink(struct inode *inode) { - if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) { + if (!ext4_has_feature_ea_inode(inode->i_sb)) { int ea_blocks = EXT4_I(inode)->i_file_acl ? EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; @@ -3155,7 +3155,7 @@ retry: folio_unlock(folio); folio_put(folio); /* - * block_write_begin may have instantiated a few blocks + * ext4_block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need * i_size_read because we hold inode lock. */ diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index d83f91b62317..2cd36f59c9e3 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2965,7 +2965,6 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir, struct inode *inode) { struct buffer_head *dir_block = NULL; - struct ext4_dir_entry_2 *de; ext4_lblk_t block = 0; int err; @@ -2982,10 +2981,7 @@ int ext4_init_new_dir(handle_t *handle, struct inode *dir, dir_block = ext4_append(handle, inode, &block); if (IS_ERR(dir_block)) return PTR_ERR(dir_block); - de = (struct ext4_dir_entry_2 *)dir_block->b_data; err = ext4_init_dirblock(handle, inode, dir_block, dir->i_ino, NULL, 0); - if (err) - goto out; out: brelse(dir_block); return err; diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c index 7c7f792ad6ab..524d4658fa40 100644 --- a/fs/ext4/orphan.c +++ b/fs/ext4/orphan.c @@ -589,8 +589,9 @@ int ext4_init_orphan_info(struct super_block *sb) } oi->of_blocks = inode->i_size >> sb->s_blocksize_bits; oi->of_csum_seed = EXT4_I(inode)->i_csum_seed; - oi->of_binfo = kmalloc(oi->of_blocks*sizeof(struct ext4_orphan_block), - GFP_KERNEL); + oi->of_binfo = kmalloc_array(oi->of_blocks, + sizeof(struct ext4_orphan_block), + GFP_KERNEL); if (!oi->of_binfo) { ret = -ENOMEM; goto out_put; diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 3d8b0f6d2dea..39abfeec5f36 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -547,7 +547,7 @@ int ext4_bio_write_folio(struct ext4_io_submit *io, struct folio *folio, * first page of the bio. Otherwise it can deadlock. */ if (io->io_bio) - gfp_flags = GFP_NOWAIT | __GFP_NOWARN; + gfp_flags = GFP_NOWAIT; retry_encrypt: bounce_page = fscrypt_encrypt_pagecache_blocks(folio, enc_bytes, 0, gfp_flags); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index c7d39da7e733..699c15db28a8 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -268,7 +268,7 @@ struct buffer_head *ext4_sb_bread_unmovable(struct super_block *sb, void ext4_sb_breadahead_unmovable(struct super_block *sb, sector_t block) { struct buffer_head *bh = bdev_getblk(sb->s_bdev, block, - sb->s_blocksize, GFP_NOWAIT | __GFP_NOWARN); + sb->s_blocksize, GFP_NOWAIT); if (likely(bh)) { if (trylock_buffer(bh)) @@ -1998,6 +1998,9 @@ int ext4_init_fs_context(struct fs_context *fc) fc->fs_private = ctx; fc->ops = &ext4_context_ops; + /* i_version is always enabled now */ + fc->sb_flags |= SB_I_VERSION; + return 0; } @@ -2975,6 +2978,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb, SEQ_OPTS_PRINT("min_batch_time=%u", sbi->s_min_batch_time); if (nodefs || sbi->s_max_batch_time != EXT4_DEF_MAX_BATCH_TIME) SEQ_OPTS_PRINT("max_batch_time=%u", sbi->s_max_batch_time); + if (nodefs && sb->s_flags & SB_I_VERSION) + SEQ_OPTS_PUTS("i_version"); if (nodefs || sbi->s_stripe) SEQ_OPTS_PRINT("stripe=%lu", sbi->s_stripe); if (nodefs || EXT4_MOUNT_DATA_FLAGS & @@ -5314,9 +5319,6 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); - /* i_version is always enabled now */ - sb->s_flags |= SB_I_VERSION; - /* HSM events are allowed by default. */ sb->s_iflags |= SB_I_ALLOW_HSM; @@ -5414,6 +5416,8 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) err = ext4_load_and_init_journal(sb, es, ctx); if (err) goto failed_mount3a; + if (bdev_read_only(sb->s_bdev)) + needs_recovery = 0; } else if (test_opt(sb, NOLOAD) && !sb_rdonly(sb) && ext4_has_feature_journal_needs_recovery(sb)) { ext4_msg(sb, KERN_ERR, "required journal recovery " diff --git a/fs/fhandle.c b/fs/fhandle.c index 7c236f64cdea..68a7d2861c58 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -402,7 +402,7 @@ static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, if (retval) return retval; - CLASS(get_unused_fd, fd)(O_CLOEXEC); + CLASS(get_unused_fd, fd)(open_flag); if (fd < 0) return fd; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index cc57367fb641..a07b8cf73ae2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2608,10 +2608,6 @@ void __mark_inode_dirty(struct inode *inode, int flags) wakeup_bdi = inode_io_list_move_locked(inode, wb, dirty_list); - spin_unlock(&wb->list_lock); - spin_unlock(&inode->i_lock); - trace_writeback_dirty_inode_enqueue(inode); - /* * If this is the first dirty inode for this bdi, * we have to wake-up the corresponding bdi thread @@ -2621,6 +2617,11 @@ void __mark_inode_dirty(struct inode *inode, int flags) if (wakeup_bdi && (wb->bdi->capabilities & BDI_CAP_WRITEBACK)) wb_wakeup_delayed(wb); + + spin_unlock(&wb->list_lock); + spin_unlock(&inode->i_lock); + trace_writeback_dirty_inode_enqueue(inode); + return; } } diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index ecb869e895ab..67c2318bfc42 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -289,11 +289,6 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, } } - if (attr->blksize != 0) - inode->i_blkbits = ilog2(attr->blksize); - else - inode->i_blkbits = inode->i_sb->s_blocksize_bits; - /* * Don't set the sticky bit in i_mode, unless we want the VFS * to check permissions. This prevents failures due to the diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c index 6f25d4cfea9f..b84f6af2eb4c 100644 --- a/fs/iomap/direct-io.c +++ b/fs/iomap/direct-io.c @@ -363,14 +363,14 @@ static int iomap_dio_bio_iter(struct iomap_iter *iter, struct iomap_dio *dio) if (iomap->flags & IOMAP_F_SHARED) dio->flags |= IOMAP_DIO_COW; - if (iomap->flags & IOMAP_F_NEW) { + if (iomap->flags & IOMAP_F_NEW) need_zeroout = true; - } else if (iomap->type == IOMAP_MAPPED) { - if (iomap_dio_can_use_fua(iomap, dio)) - bio_opf |= REQ_FUA; - else - dio->flags &= ~IOMAP_DIO_WRITE_THROUGH; - } + else if (iomap->type == IOMAP_MAPPED && + iomap_dio_can_use_fua(iomap, dio)) + bio_opf |= REQ_FUA; + + if (!(bio_opf & REQ_FUA)) + dio->flags &= ~IOMAP_DIO_WRITE_THROUGH; /* * We can only do deferred completion for pure overwrites that diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index b3971e91e8eb..38861ca04899 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -285,6 +285,7 @@ restart: retry: if (batch_count) __flush_batch(journal, &batch_count); + cond_resched(); spin_lock(&journal->j_list_lock); goto restart; } diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index 3c293a5a21b1..457f91c412d4 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -142,9 +142,9 @@ ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size) struct kernfs_node *kn = kernfs_dentry_node(dentry); struct kernfs_iattrs *attrs; - attrs = kernfs_iattrs_noalloc(kn); + attrs = kernfs_iattrs(kn); if (!attrs) - return -ENODATA; + return -ENOMEM; return simple_xattr_list(d_inode(dentry), &attrs->xattrs, buf, size); } diff --git a/fs/namespace.c b/fs/namespace.c index ddfd4457d338..ae6d1312b184 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1197,10 +1197,7 @@ static void commit_tree(struct mount *mnt) if (!mnt_ns_attached(mnt)) { for (struct mount *m = mnt; m; m = next_mnt(m, mnt)) - if (unlikely(mnt_ns_attached(m))) - m = skip_mnt_tree(m); - else - mnt_add_to_ns(n, m); + mnt_add_to_ns(n, m); n->nr_mounts += n->pending_mounts; n->pending_mounts = 0; } @@ -2704,6 +2701,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, lock_mnt_tree(child); q = __lookup_mnt(&child->mnt_parent->mnt, child->mnt_mountpoint); + commit_tree(child); if (q) { struct mountpoint *mp = root.mp; struct mount *r = child; @@ -2713,7 +2711,6 @@ static int attach_recursive_mnt(struct mount *source_mnt, mp = shorter; mnt_change_mountpoint(r, mp, q); } - commit_tree(child); } unpin_mountpoint(&root); unlock_mount_hash(); @@ -2862,6 +2859,19 @@ static int graft_tree(struct mount *mnt, struct mount *p, struct mountpoint *mp) return attach_recursive_mnt(mnt, p, mp); } +static int may_change_propagation(const struct mount *m) +{ + struct mnt_namespace *ns = m->mnt_ns; + + // it must be mounted in some namespace + if (IS_ERR_OR_NULL(ns)) // is_mounted() + return -EINVAL; + // and the caller must be admin in userns of that namespace + if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN)) + return -EPERM; + return 0; +} + /* * Sanity check the flags to change_mnt_propagation. */ @@ -2898,10 +2908,10 @@ static int do_change_type(struct path *path, int ms_flags) return -EINVAL; namespace_lock(); - if (!check_mnt(mnt)) { - err = -EINVAL; + err = may_change_propagation(mnt); + if (err) goto out_unlock; - } + if (type == MS_SHARED) { err = invent_group_ids(mnt, recurse); if (err) @@ -3347,18 +3357,11 @@ static int do_set_group(struct path *from_path, struct path *to_path) namespace_lock(); - err = -EINVAL; - /* To and From must be mounted */ - if (!is_mounted(&from->mnt)) - goto out; - if (!is_mounted(&to->mnt)) - goto out; - - err = -EPERM; - /* We should be allowed to modify mount namespaces of both mounts */ - if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN)) + err = may_change_propagation(from); + if (err) goto out; - if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN)) + err = may_change_propagation(to); + if (err) goto out; err = -EINVAL; @@ -4551,20 +4554,10 @@ SYSCALL_DEFINE5(move_mount, if (flags & MOVE_MOUNT_SET_GROUP) mflags |= MNT_TREE_PROPAGATION; if (flags & MOVE_MOUNT_BENEATH) mflags |= MNT_TREE_BENEATH; - lflags = 0; - if (flags & MOVE_MOUNT_F_SYMLINKS) lflags |= LOOKUP_FOLLOW; - if (flags & MOVE_MOUNT_F_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; uflags = 0; - if (flags & MOVE_MOUNT_F_EMPTY_PATH) uflags = AT_EMPTY_PATH; - from_name = getname_maybe_null(from_pathname, uflags); - if (IS_ERR(from_name)) - return PTR_ERR(from_name); + if (flags & MOVE_MOUNT_T_EMPTY_PATH) + uflags = AT_EMPTY_PATH; - lflags = 0; - if (flags & MOVE_MOUNT_T_SYMLINKS) lflags |= LOOKUP_FOLLOW; - if (flags & MOVE_MOUNT_T_AUTOMOUNTS) lflags |= LOOKUP_AUTOMOUNT; - uflags = 0; - if (flags & MOVE_MOUNT_T_EMPTY_PATH) uflags = AT_EMPTY_PATH; to_name = getname_maybe_null(to_pathname, uflags); if (IS_ERR(to_name)) return PTR_ERR(to_name); @@ -4577,11 +4570,24 @@ SYSCALL_DEFINE5(move_mount, to_path = fd_file(f_to)->f_path; path_get(&to_path); } else { + lflags = 0; + if (flags & MOVE_MOUNT_T_SYMLINKS) + lflags |= LOOKUP_FOLLOW; + if (flags & MOVE_MOUNT_T_AUTOMOUNTS) + lflags |= LOOKUP_AUTOMOUNT; ret = filename_lookup(to_dfd, to_name, lflags, &to_path, NULL); if (ret) return ret; } + uflags = 0; + if (flags & MOVE_MOUNT_F_EMPTY_PATH) + uflags = AT_EMPTY_PATH; + + from_name = getname_maybe_null(from_pathname, uflags); + if (IS_ERR(from_name)) + return PTR_ERR(from_name); + if (!from_name && from_dfd >= 0) { CLASS(fd_raw, f_from)(from_dfd); if (fd_empty(f_from)) @@ -4590,6 +4596,11 @@ SYSCALL_DEFINE5(move_mount, return vfs_move_mount(&fd_file(f_from)->f_path, &to_path, mflags); } + lflags = 0; + if (flags & MOVE_MOUNT_F_SYMLINKS) + lflags |= LOOKUP_FOLLOW; + if (flags & MOVE_MOUNT_F_AUTOMOUNTS) + lflags |= LOOKUP_AUTOMOUNT; ret = filename_lookup(from_dfd, from_name, lflags, &from_path, NULL); if (ret) return ret; @@ -5176,7 +5187,8 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, int ret; struct mount_kattr kattr = {}; - kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE; + if (flags & OPEN_TREE_CLONE) + kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE; if (flags & AT_RECURSIVE) kattr.kflags |= MOUNT_KATTR_RECURSE; diff --git a/fs/netfs/read_collect.c b/fs/netfs/read_collect.c index 3e804da1e1eb..a95e7aadafd0 100644 --- a/fs/netfs/read_collect.c +++ b/fs/netfs/read_collect.c @@ -281,8 +281,10 @@ reassess: } else if (test_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags)) { notes |= MADE_PROGRESS; } else { - if (!stream->failed) + if (!stream->failed) { stream->transferred += transferred; + stream->transferred_valid = true; + } if (front->transferred < front->len) set_bit(NETFS_RREQ_SHORT_TRANSFER, &rreq->flags); notes |= MADE_PROGRESS; diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c index 0f3a36852a4d..cbf3d9194c7b 100644 --- a/fs/netfs/write_collect.c +++ b/fs/netfs/write_collect.c @@ -254,6 +254,7 @@ reassess_streams: if (front->start + front->transferred > stream->collected_to) { stream->collected_to = front->start + front->transferred; stream->transferred = stream->collected_to - wreq->start; + stream->transferred_valid = true; notes |= MADE_PROGRESS; } if (test_bit(NETFS_SREQ_FAILED, &front->flags)) { @@ -356,6 +357,7 @@ bool netfs_write_collection(struct netfs_io_request *wreq) { struct netfs_inode *ictx = netfs_inode(wreq->inode); size_t transferred; + bool transferred_valid = false; int s; _enter("R=%x", wreq->debug_id); @@ -376,12 +378,16 @@ bool netfs_write_collection(struct netfs_io_request *wreq) continue; if (!list_empty(&stream->subrequests)) return false; - if (stream->transferred < transferred) + if (stream->transferred_valid && + stream->transferred < transferred) { transferred = stream->transferred; + transferred_valid = true; + } } /* Okay, declare that all I/O is complete. */ - wreq->transferred = transferred; + if (transferred_valid) + wreq->transferred = transferred; trace_netfs_rreq(wreq, netfs_rreq_trace_write_done); if (wreq->io_streams[1].active && diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 50bee2c4130d..0584cba1a043 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -118,12 +118,12 @@ struct netfs_io_request *netfs_create_write_req(struct address_space *mapping, wreq->io_streams[0].prepare_write = ictx->ops->prepare_write; wreq->io_streams[0].issue_write = ictx->ops->issue_write; wreq->io_streams[0].collected_to = start; - wreq->io_streams[0].transferred = LONG_MAX; + wreq->io_streams[0].transferred = 0; wreq->io_streams[1].stream_nr = 1; wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE; wreq->io_streams[1].collected_to = start; - wreq->io_streams[1].transferred = LONG_MAX; + wreq->io_streams[1].transferred = 0; if (fscache_resources_valid(&wreq->cache_resources)) { wreq->io_streams[1].avail = true; wreq->io_streams[1].active = true; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 11968dcb7243..6e69ce43a13f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -253,13 +253,14 @@ nfs_page_group_unlock(struct nfs_page *req) nfs_page_clear_headlock(req); } -/* - * nfs_page_group_sync_on_bit_locked +/** + * nfs_page_group_sync_on_bit_locked - Test if all requests have @bit set + * @req: request in page group + * @bit: PG_* bit that is used to sync page group * * must be called with page group lock held */ -static bool -nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) +bool nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit) { struct nfs_page *head = req->wb_head; struct nfs_page *tmp; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index fa5c41d0989a..8b7c04737967 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -153,20 +153,10 @@ nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode) } } -static int -nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) +static void nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) { - int ret; - - if (!test_bit(PG_REMOVE, &req->wb_flags)) - return 0; - ret = nfs_page_group_lock(req); - if (ret) - return ret; if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) nfs_page_set_inode_ref(req, inode); - nfs_page_group_unlock(req); - return 0; } /** @@ -585,19 +575,18 @@ retry: } } + ret = nfs_page_group_lock(head); + if (ret < 0) + goto out_unlock; + /* Ensure that nobody removed the request before we locked it */ if (head != folio->private) { + nfs_page_group_unlock(head); nfs_unlock_and_release_request(head); goto retry; } - ret = nfs_cancel_remove_inode(head, inode); - if (ret < 0) - goto out_unlock; - - ret = nfs_page_group_lock(head); - if (ret < 0) - goto out_unlock; + nfs_cancel_remove_inode(head, inode); /* lock each request in the page group */ for (subreq = head->wb_this_page; @@ -786,7 +775,8 @@ static void nfs_inode_remove_request(struct nfs_page *req) { struct nfs_inode *nfsi = NFS_I(nfs_page_to_inode(req)); - if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) { + nfs_page_group_lock(req); + if (nfs_page_group_sync_on_bit_locked(req, PG_REMOVE)) { struct folio *folio = nfs_page_to_folio(req->wb_head); struct address_space *mapping = folio->mapping; @@ -798,6 +788,7 @@ static void nfs_inode_remove_request(struct nfs_page *req) } spin_unlock(&mapping->i_private_lock); } + nfs_page_group_unlock(req); if (test_and_clear_bit(PG_INODE_REF, &req->wb_flags)) { atomic_long_dec(&nfsi->nrequests); diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 70b8687dc45e..dbd63a74df4b 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -225,7 +225,7 @@ struct dentry *ovl_create_temp(struct ovl_fs *ofs, struct dentry *workdir, struct ovl_cattr *attr) { struct dentry *ret; - inode_lock(workdir->d_inode); + inode_lock_nested(workdir->d_inode, I_MUTEX_PARENT); ret = ovl_create_real(ofs, workdir, ovl_lookup_temp(ofs, workdir), attr); inode_unlock(workdir->d_inode); diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index a33115e7384c..41033bac96cb 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -1552,7 +1552,8 @@ void ovl_copyattr(struct inode *inode) int ovl_parent_lock(struct dentry *parent, struct dentry *child) { inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); - if (!child || child->d_parent == parent) + if (!child || + (!d_unhashed(child) && child->d_parent == parent)) return 0; inode_unlock(parent->d_inode); diff --git a/fs/pidfs.c b/fs/pidfs.c index edc35522d75c..108e7527f837 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -296,12 +296,12 @@ static __u32 pidfs_coredump_mask(unsigned long mm_flags) static long pidfd_info(struct file *file, unsigned int cmd, unsigned long arg) { struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; + struct task_struct *task __free(put_task) = NULL; struct pid *pid = pidfd_pid(file); size_t usize = _IOC_SIZE(cmd); struct pidfd_info kinfo = {}; struct pidfs_exit_info *exit_info; struct user_namespace *user_ns; - struct task_struct *task; struct pidfs_attr *attr; const struct cred *c; __u64 mask; diff --git a/fs/pnode.c b/fs/pnode.c index 81f7599bdac4..6f7d02f3fa98 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -111,7 +111,8 @@ void change_mnt_propagation(struct mount *mnt, int type) return; } if (IS_MNT_SHARED(mnt)) { - m = propagation_source(mnt); + if (type == MS_SLAVE || !hlist_empty(&mnt->mnt_slave_list)) + m = propagation_source(mnt); if (list_empty(&mnt->mnt_share)) { mnt_release_group_id(mnt); } else { @@ -637,10 +638,11 @@ void propagate_umount(struct list_head *set) } // now to_umount consists of all acceptable candidates - // deal with reparenting of remaining overmounts on those + // deal with reparenting of surviving overmounts on those list_for_each_entry(m, &to_umount, mnt_list) { - if (m->overmount) - reparent(m->overmount); + struct mount *over = m->overmount; + if (over && !will_be_unmounted(over)) + reparent(over); } // and fold them into the set diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 3b251de874ec..94b1d7a395d5 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4496,7 +4496,7 @@ smb3_init_transform_rq(struct TCP_Server_Info *server, int num_rqst, for (int i = 1; i < num_rqst; i++) { struct smb_rqst *old = &old_rq[i - 1]; struct smb_rqst *new = &new_rq[i]; - struct folio_queue *buffer; + struct folio_queue *buffer = NULL; size_t size = iov_iter_count(&old->rq_iter); orig_len += smb_rqst_len(server, old); diff --git a/fs/smb/server/connection.c b/fs/smb/server/connection.c index 3f04a2977ba8..67c4f73398df 100644 --- a/fs/smb/server/connection.c +++ b/fs/smb/server/connection.c @@ -504,7 +504,8 @@ void ksmbd_conn_transport_destroy(void) { mutex_lock(&init_lock); ksmbd_tcp_destroy(); - ksmbd_rdma_destroy(); + ksmbd_rdma_stop_listening(); stop_sessions(); + ksmbd_rdma_destroy(); mutex_unlock(&init_lock); } diff --git a/fs/smb/server/connection.h b/fs/smb/server/connection.h index 31dd1caac1e8..2aa8084bb593 100644 --- a/fs/smb/server/connection.h +++ b/fs/smb/server/connection.h @@ -46,7 +46,12 @@ struct ksmbd_conn { struct mutex srv_mutex; int status; unsigned int cli_cap; - __be32 inet_addr; + union { + __be32 inet_addr; +#if IS_ENABLED(CONFIG_IPV6) + u8 inet6_addr[16]; +#endif + }; char *request_buf; struct ksmbd_transport *transport; struct nls_table *local_nls; diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index d7a8a580d013..a04d5702820d 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -1102,8 +1102,10 @@ void smb_send_parent_lease_break_noti(struct ksmbd_file *fp, if (!atomic_inc_not_zero(&opinfo->refcount)) continue; - if (ksmbd_conn_releasing(opinfo->conn)) + if (ksmbd_conn_releasing(opinfo->conn)) { + opinfo_put(opinfo); continue; + } oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL); opinfo_put(opinfo); @@ -1139,8 +1141,11 @@ void smb_lazy_parent_lease_break_close(struct ksmbd_file *fp) if (!atomic_inc_not_zero(&opinfo->refcount)) continue; - if (ksmbd_conn_releasing(opinfo->conn)) + if (ksmbd_conn_releasing(opinfo->conn)) { + opinfo_put(opinfo); continue; + } + oplock_break(opinfo, SMB2_OPLOCK_LEVEL_NONE, NULL); opinfo_put(opinfo); } @@ -1343,8 +1348,10 @@ void smb_break_all_levII_oplock(struct ksmbd_work *work, struct ksmbd_file *fp, if (!atomic_inc_not_zero(&brk_op->refcount)) continue; - if (ksmbd_conn_releasing(brk_op->conn)) + if (ksmbd_conn_releasing(brk_op->conn)) { + opinfo_put(brk_op); continue; + } if (brk_op->is_lease && (brk_op->o_lease->state & (~(SMB2_LEASE_READ_CACHING_LE | diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c index 8d366db5f605..5466aa8c39b1 100644 --- a/fs/smb/server/transport_rdma.c +++ b/fs/smb/server/transport_rdma.c @@ -2194,7 +2194,7 @@ int ksmbd_rdma_init(void) return 0; } -void ksmbd_rdma_destroy(void) +void ksmbd_rdma_stop_listening(void) { if (!smb_direct_listener.cm_id) return; @@ -2203,7 +2203,10 @@ void ksmbd_rdma_destroy(void) rdma_destroy_id(smb_direct_listener.cm_id); smb_direct_listener.cm_id = NULL; +} +void ksmbd_rdma_destroy(void) +{ if (smb_direct_wq) { destroy_workqueue(smb_direct_wq); smb_direct_wq = NULL; diff --git a/fs/smb/server/transport_rdma.h b/fs/smb/server/transport_rdma.h index 77aee4e5c9dc..a2291b77488a 100644 --- a/fs/smb/server/transport_rdma.h +++ b/fs/smb/server/transport_rdma.h @@ -54,13 +54,15 @@ struct smb_direct_data_transfer { #ifdef CONFIG_SMB_SERVER_SMBDIRECT int ksmbd_rdma_init(void); +void ksmbd_rdma_stop_listening(void); void ksmbd_rdma_destroy(void); bool ksmbd_rdma_capable_netdev(struct net_device *netdev); void init_smbd_max_io_size(unsigned int sz); unsigned int get_smbd_max_read_write_size(void); #else static inline int ksmbd_rdma_init(void) { return 0; } -static inline int ksmbd_rdma_destroy(void) { return 0; } +static inline void ksmbd_rdma_stop_listening(void) { } +static inline void ksmbd_rdma_destroy(void) { } static inline bool ksmbd_rdma_capable_netdev(struct net_device *netdev) { return false; } static inline void init_smbd_max_io_size(unsigned int sz) { } static inline unsigned int get_smbd_max_read_write_size(void) { return 0; } diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index b1df02e321b0..4337df97987d 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -85,7 +85,14 @@ static struct tcp_transport *alloc_transport(struct socket *client_sk) return NULL; } +#if IS_ENABLED(CONFIG_IPV6) + if (client_sk->sk->sk_family == AF_INET6) + memcpy(&conn->inet6_addr, &client_sk->sk->sk_v6_daddr, 16); + else + conn->inet_addr = inet_sk(client_sk->sk)->inet_daddr; +#else conn->inet_addr = inet_sk(client_sk->sk)->inet_daddr; +#endif conn->transport = KSMBD_TRANS(t); KSMBD_TRANS(t)->conn = conn; KSMBD_TRANS(t)->ops = &ksmbd_tcp_transport_ops; @@ -229,7 +236,6 @@ static int ksmbd_kthread_fn(void *p) { struct socket *client_sk = NULL; struct interface *iface = (struct interface *)p; - struct inet_sock *csk_inet; struct ksmbd_conn *conn; int ret; @@ -252,13 +258,27 @@ static int ksmbd_kthread_fn(void *p) /* * Limits repeated connections from clients with the same IP. */ - csk_inet = inet_sk(client_sk->sk); down_read(&conn_list_lock); list_for_each_entry(conn, &conn_list, conns_list) - if (csk_inet->inet_daddr == conn->inet_addr) { +#if IS_ENABLED(CONFIG_IPV6) + if (client_sk->sk->sk_family == AF_INET6) { + if (memcmp(&client_sk->sk->sk_v6_daddr, + &conn->inet6_addr, 16) == 0) { + ret = -EAGAIN; + break; + } + } else if (inet_sk(client_sk->sk)->inet_daddr == + conn->inet_addr) { + ret = -EAGAIN; + break; + } +#else + if (inet_sk(client_sk->sk)->inet_daddr == + conn->inet_addr) { ret = -EAGAIN; break; } +#endif up_read(&conn_list_lock); if (ret == -EAGAIN) continue; diff --git a/fs/splice.c b/fs/splice.c index 4d6df083e0c0..f5094b6d00a0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -739,6 +739,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, sd.pos = kiocb.ki_pos; if (ret <= 0) break; + WARN_ONCE(ret > sd.total_len - left, + "Splice Exceeded! ret=%zd tot=%zu left=%zu\n", + ret, sd.total_len, left); sd.num_spliced += ret; sd.total_len -= ret; diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c index 992ea0e37257..4465cf05603a 100644 --- a/fs/squashfs/super.c +++ b/fs/squashfs/super.c @@ -187,10 +187,15 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) unsigned short flags; unsigned int fragments; u64 lookup_table_start, xattr_id_table_start, next_table; - int err; + int err, devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); TRACE("Entered squashfs_fill_superblock\n"); + if (!devblksize) { + errorf(fc, "squashfs: unable to set blocksize\n"); + return -EINVAL; + } + sb->s_fs_info = kzalloc(sizeof(*msblk), GFP_KERNEL); if (sb->s_fs_info == NULL) { ERROR("Failed to allocate squashfs_sb_info\n"); @@ -201,12 +206,7 @@ static int squashfs_fill_super(struct super_block *sb, struct fs_context *fc) msblk->panic_on_errors = (opts->errors == Opt_errors_panic); - msblk->devblksize = sb_min_blocksize(sb, SQUASHFS_DEVBLK_SIZE); - if (!msblk->devblksize) { - errorf(fc, "squashfs: unable to set blocksize\n"); - return -EINVAL; - } - + msblk->devblksize = devblksize; msblk->devblksize_log2 = ffz(~msblk->devblksize); mutex_init(&msblk->meta_index_mutex); |