diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-06 17:36:48 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-10-06 17:36:48 -0700 |
commit | 76e45035348c247a70ed50eb29a9906657e4444f (patch) | |
tree | e4101b34b1a3ddfea00be656586c22f704b33a2d /fs/btrfs/sysfs.c | |
parent | 4c0ed7d8d6e3dc013c4599a837de84794baa5b62 (diff) | |
parent | cbddcc4fa3443fe8cfb2ff8e210deb1f6a0eea38 (diff) |
Merge tag 'for-6.1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"There's a bunch of performance improvements, most notably the FIEMAP
speedup, the new block group tree to speed up mount on large
filesystems, more io_uring integration, some sysfs exports and the
usual fixes and core updates.
Summary:
Performance:
- outstanding FIEMAP speed improvement
- algorithmic change how extents are enumerated leads to orders of
magnitude speed boost (uncached and cached)
- extent sharing check speedup (2.2x uncached, 3x cached)
- add more cancellation points, allowing to interrupt seeking in
files with large number of extents
- more efficient hole and data seeking (4x uncached, 1.3x cached)
- sample results:
256M, 32K extents: 4s -> 29ms (~150x)
512M, 64K extents: 30s -> 59ms (~550x)
1G, 128K extents: 225s -> 120ms (~1800x)
- improved inode logging, especially for directories (on dbench
workload throughput +25%, max latency -21%)
- improved buffered IO, remove redundant extent state tracking,
lowering memory consumption and avoiding rb tree traversal
- add sysfs tunable to let qgroup temporarily skip exact accounting
when deleting snapshot, leading to a speedup but requiring a rescan
after that, will be used by snapper
- support io_uring and buffered writes, until now it was just for
direct IO, with the no-wait semantics implemented in the buffered
write path it now works and leads to speed improvement in IOPS
(2x), throughput (2.2x), latency (depends, 2x to 150x)
- small performance improvements when dropping and searching for
extent maps as well as when flushing delalloc in COW mode
(throughput +5MB/s)
User visible changes:
- new incompatible feature block-group-tree adding a dedicated tree
for tracking block groups, this allows a much faster load during
mount and avoids seeking unlike when it's scattered in the extent
tree items
- this reduces mount time for many-terabyte sized filesystems
- conversion tool will be provided so existing filesystem can also
be updated in place
- to reduce test matrix and feature combinations requires no-holes
and free-space-tree (mkfs defaults since 5.15)
- improved reporting of super block corruption detected by scrub
- scrub also tries to repair super block and does not wait until next
commit
- discard stats and tunables are exported in sysfs
(/sys/fs/btrfs/FSID/discard)
- qgroup status is exported in sysfs
(/sys/sys/fs/btrfs/FSID/qgroups/)
- verify that super block was not modified when thawing filesystem
Fixes:
- FIEMAP fixes
- fix extent sharing status, does not depend on the cached status
where merged
- flush delalloc so compressed extents are reported correctly
- fix alignment of VMA for memory mapped files on THP
- send: fix failures when processing inodes with no links (orphan
files and directories)
- fix race between quota enable and quota rescan ioctl
- handle more corner cases for read-only compat feature verification
- fix missed extent on fsync after dropping extent maps
Core:
- lockdep annotations to validate various transactions states and
state transitions
- preliminary support for fs-verity in send
- more effective memory use in scrub for subpage where sector is
smaller than page
- block group caching progress logic has been removed, load is now
synchronous
- simplify end IO callbacks and bio handling, use chained bios
instead of own tracking
- add no-wait semantics to several functions (tree search, nocow,
flushing, buffered write
- cleanups and refactoring
MM changes:
- export balance_dirty_pages_ratelimited_flags"
* tag 'for-6.1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (177 commits)
btrfs: set generation before calling btrfs_clean_tree_block in btrfs_init_new_buffer
btrfs: drop extent map range more efficiently
btrfs: avoid pointless extent map tree search when flushing delalloc
btrfs: remove unnecessary next extent map search
btrfs: remove unnecessary NULL pointer checks when searching extent maps
btrfs: assert tree is locked when clearing extent map from logging
btrfs: remove unnecessary extent map initializations
btrfs: remove the refcount warning/check at free_extent_map()
btrfs: add helper to replace extent map range with a new extent map
btrfs: move open coded extent map tree deletion out of inode eviction
btrfs: use cond_resched_rwlock_write() during inode eviction
btrfs: use extent_map_end() at btrfs_drop_extent_map_range()
btrfs: move btrfs_drop_extent_cache() to extent_map.c
btrfs: fix missed extent on fsync after dropping extent maps
btrfs: remove stale prototype of btrfs_write_inode
btrfs: enable nowait async buffered writes
btrfs: assert nowait mode is not used for some btree search functions
btrfs: make btrfs_buffered_write nowait compatible
btrfs: plumb NOWAIT through the write path
btrfs: make lock_and_cleanup_extent_if_need nowait compatible
...
Diffstat (limited to 'fs/btrfs/sysfs.c')
-rw-r--r-- | fs/btrfs/sysfs.c | 172 |
1 files changed, 121 insertions, 51 deletions
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index d5d0717fd09a..699b54b3acaa 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -35,12 +35,12 @@ * qgroup_attrs /sys/fs/btrfs/<uuid>/qgroups/<level>_<qgroupid> * space_info_attrs /sys/fs/btrfs/<uuid>/allocation/<bg-type> * raid_attrs /sys/fs/btrfs/<uuid>/allocation/<bg-type>/<bg-profile> + * discard_attrs /sys/fs/btrfs/<uuid>/discard * * When built with BTRFS_CONFIG_DEBUG: * * btrfs_debug_feature_attrs /sys/fs/btrfs/debug * btrfs_debug_mount_attrs /sys/fs/btrfs/<uuid>/debug - * discard_debug_attrs /sys/fs/btrfs/<uuid>/debug/discard */ struct btrfs_feature_attr { @@ -286,6 +286,7 @@ BTRFS_FEAT_ATTR_INCOMPAT(skinny_metadata, SKINNY_METADATA); BTRFS_FEAT_ATTR_INCOMPAT(no_holes, NO_HOLES); BTRFS_FEAT_ATTR_INCOMPAT(metadata_uuid, METADATA_UUID); BTRFS_FEAT_ATTR_COMPAT_RO(free_space_tree, FREE_SPACE_TREE); +BTRFS_FEAT_ATTR_COMPAT_RO(block_group_tree, BLOCK_GROUP_TREE); BTRFS_FEAT_ATTR_INCOMPAT(raid1c34, RAID1C34); #ifdef CONFIG_BLK_DEV_ZONED BTRFS_FEAT_ATTR_INCOMPAT(zoned, ZONED); @@ -317,6 +318,7 @@ static struct attribute *btrfs_supported_feature_attrs[] = { BTRFS_FEAT_ATTR_PTR(metadata_uuid), BTRFS_FEAT_ATTR_PTR(free_space_tree), BTRFS_FEAT_ATTR_PTR(raid1c34), + BTRFS_FEAT_ATTR_PTR(block_group_tree), #ifdef CONFIG_BLK_DEV_ZONED BTRFS_FEAT_ATTR_PTR(zoned), #endif @@ -429,12 +431,10 @@ static const struct attribute_group btrfs_static_feature_attr_group = { .attrs = btrfs_supported_static_feature_attrs, }; -#ifdef CONFIG_BTRFS_DEBUG - /* * Discard statistics and tunables */ -#define discard_to_fs_info(_kobj) to_fs_info((_kobj)->parent->parent) +#define discard_to_fs_info(_kobj) to_fs_info(get_btrfs_kobj(_kobj)) static ssize_t btrfs_discardable_bytes_show(struct kobject *kobj, struct kobj_attribute *a, @@ -583,11 +583,11 @@ BTRFS_ATTR_RW(discard, max_discard_size, btrfs_discard_max_discard_size_show, btrfs_discard_max_discard_size_store); /* - * Per-filesystem debugging of discard (when mounted with discard=async). + * Per-filesystem stats for discard (when mounted with discard=async). * - * Path: /sys/fs/btrfs/<uuid>/debug/discard/ + * Path: /sys/fs/btrfs/<uuid>/discard/ */ -static const struct attribute *discard_debug_attrs[] = { +static const struct attribute *discard_attrs[] = { BTRFS_ATTR_PTR(discard, discardable_bytes), BTRFS_ATTR_PTR(discard, discardable_extents), BTRFS_ATTR_PTR(discard, discard_bitmap_bytes), @@ -599,6 +599,8 @@ static const struct attribute *discard_debug_attrs[] = { NULL, }; +#ifdef CONFIG_BTRFS_DEBUG + /* * Per-filesystem runtime debugging exported via sysfs. * @@ -837,11 +839,8 @@ static ssize_t btrfs_sinfo_bg_reclaim_threshold_show(struct kobject *kobj, char *buf) { struct btrfs_space_info *space_info = to_space_info(kobj); - ssize_t ret; - ret = sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold)); - - return ret; + return sysfs_emit(buf, "%d\n", READ_ONCE(space_info->bg_reclaim_threshold)); } static ssize_t btrfs_sinfo_bg_reclaim_threshold_store(struct kobject *kobj, @@ -1150,25 +1149,6 @@ static ssize_t btrfs_generation_show(struct kobject *kobj, } BTRFS_ATTR(, generation, btrfs_generation_show); -/* - * Look for an exact string @string in @buffer with possible leading or - * trailing whitespace - */ -static bool strmatch(const char *buffer, const char *string) -{ - const size_t len = strlen(string); - - /* Skip leading whitespace */ - buffer = skip_spaces(buffer); - - /* Match entire string, check if the rest is whitespace or empty */ - if (strncmp(string, buffer, len) == 0 && - strlen(skip_spaces(buffer + len)) == 0) - return true; - - return false; -} - static const char * const btrfs_read_policy_name[] = { "pid" }; static ssize_t btrfs_read_policy_show(struct kobject *kobj, @@ -1202,7 +1182,7 @@ static ssize_t btrfs_read_policy_store(struct kobject *kobj, int i; for (i = 0; i < BTRFS_NR_READ_POLICY; i++) { - if (strmatch(buf, btrfs_read_policy_name[i])) { + if (sysfs_streq(buf, btrfs_read_policy_name[i])) { if (i != fs_devices->read_policy) { fs_devices->read_policy = i; btrfs_info(fs_devices->fs_info, @@ -1222,11 +1202,8 @@ static ssize_t btrfs_bg_reclaim_threshold_show(struct kobject *kobj, char *buf) { struct btrfs_fs_info *fs_info = to_fs_info(kobj); - ssize_t ret; - ret = sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold)); - - return ret; + return sysfs_emit(buf, "%d\n", READ_ONCE(fs_info->bg_reclaim_threshold)); } static ssize_t btrfs_bg_reclaim_threshold_store(struct kobject *kobj, @@ -1427,13 +1404,12 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info) kobject_del(fs_info->space_info_kobj); kobject_put(fs_info->space_info_kobj); } -#ifdef CONFIG_BTRFS_DEBUG - if (fs_info->discard_debug_kobj) { - sysfs_remove_files(fs_info->discard_debug_kobj, - discard_debug_attrs); - kobject_del(fs_info->discard_debug_kobj); - kobject_put(fs_info->discard_debug_kobj); + if (fs_info->discard_kobj) { + sysfs_remove_files(fs_info->discard_kobj, discard_attrs); + kobject_del(fs_info->discard_kobj); + kobject_put(fs_info->discard_kobj); } +#ifdef CONFIG_BTRFS_DEBUG if (fs_info->debug_kobj) { sysfs_remove_files(fs_info->debug_kobj, btrfs_debug_mount_attrs); kobject_del(fs_info->debug_kobj); @@ -2001,20 +1977,18 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info) error = sysfs_create_files(fs_info->debug_kobj, btrfs_debug_mount_attrs); if (error) goto failure; +#endif /* Discard directory */ - fs_info->discard_debug_kobj = kobject_create_and_add("discard", - fs_info->debug_kobj); - if (!fs_info->discard_debug_kobj) { + fs_info->discard_kobj = kobject_create_and_add("discard", fsid_kobj); + if (!fs_info->discard_kobj) { error = -ENOMEM; goto failure; } - error = sysfs_create_files(fs_info->discard_debug_kobj, - discard_debug_attrs); + error = sysfs_create_files(fs_info->discard_kobj, discard_attrs); if (error) goto failure; -#endif error = addrm_unknown_feature_attrs(fs_info, true); if (error) @@ -2041,6 +2015,98 @@ failure: return error; } +static ssize_t qgroup_enabled_show(struct kobject *qgroups_kobj, + struct kobj_attribute *a, + char *buf) +{ + struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent); + bool enabled; + + spin_lock(&fs_info->qgroup_lock); + enabled = fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON; + spin_unlock(&fs_info->qgroup_lock); + + return sysfs_emit(buf, "%d\n", enabled); +} +BTRFS_ATTR(qgroups, enabled, qgroup_enabled_show); + +static ssize_t qgroup_inconsistent_show(struct kobject *qgroups_kobj, + struct kobj_attribute *a, + char *buf) +{ + struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent); + bool inconsistent; + + spin_lock(&fs_info->qgroup_lock); + inconsistent = (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT); + spin_unlock(&fs_info->qgroup_lock); + + return sysfs_emit(buf, "%d\n", inconsistent); +} +BTRFS_ATTR(qgroups, inconsistent, qgroup_inconsistent_show); + +static ssize_t qgroup_drop_subtree_thres_show(struct kobject *qgroups_kobj, + struct kobj_attribute *a, + char *buf) +{ + struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent); + u8 result; + + spin_lock(&fs_info->qgroup_lock); + result = fs_info->qgroup_drop_subtree_thres; + spin_unlock(&fs_info->qgroup_lock); + + return sysfs_emit(buf, "%d\n", result); +} + +static ssize_t qgroup_drop_subtree_thres_store(struct kobject *qgroups_kobj, + struct kobj_attribute *a, + const char *buf, size_t len) +{ + struct btrfs_fs_info *fs_info = to_fs_info(qgroups_kobj->parent); + u8 new_thres; + int ret; + + ret = kstrtou8(buf, 10, &new_thres); + if (ret) + return -EINVAL; + + if (new_thres > BTRFS_MAX_LEVEL) + return -EINVAL; + + spin_lock(&fs_info->qgroup_lock); + fs_info->qgroup_drop_subtree_thres = new_thres; + spin_unlock(&fs_info->qgroup_lock); + + return len; +} +BTRFS_ATTR_RW(qgroups, drop_subtree_threshold, qgroup_drop_subtree_thres_show, + qgroup_drop_subtree_thres_store); + +/* + * Qgroups global info + * + * Path: /sys/fs/btrfs/<uuid>/qgroups/ + */ +static struct attribute *qgroups_attrs[] = { + BTRFS_ATTR_PTR(qgroups, enabled), + BTRFS_ATTR_PTR(qgroups, inconsistent), + BTRFS_ATTR_PTR(qgroups, drop_subtree_threshold), + NULL +}; +ATTRIBUTE_GROUPS(qgroups); + +static void qgroups_release(struct kobject *kobj) +{ + kfree(kobj); +} + +static struct kobj_type qgroups_ktype = { + .sysfs_ops = &kobj_sysfs_ops, + .default_groups = qgroups_groups, + .release = qgroups_release, +}; + static inline struct btrfs_fs_info *qgroup_kobj_to_fs_info(struct kobject *kobj) { return to_fs_info(kobj->parent->parent); @@ -2166,11 +2232,15 @@ int btrfs_sysfs_add_qgroups(struct btrfs_fs_info *fs_info) if (fs_info->qgroups_kobj) return 0; - fs_info->qgroups_kobj = kobject_create_and_add("qgroups", fsid_kobj); - if (!fs_info->qgroups_kobj) { - ret = -ENOMEM; + fs_info->qgroups_kobj = kzalloc(sizeof(struct kobject), GFP_KERNEL); + if (!fs_info->qgroups_kobj) + return -ENOMEM; + + ret = kobject_init_and_add(fs_info->qgroups_kobj, &qgroups_ktype, + fsid_kobj, "qgroups"); + if (ret < 0) goto out; - } + rbtree_postorder_for_each_entry_safe(qgroup, next, &fs_info->qgroup_tree, node) { ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); |