From 96dbcc0072acf4f9565a16e8da96e57e5cee1068 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 3 Oct 2022 15:57:30 +0100 Subject: btrfs: add missing path cache update during fiemap When looking the stored result for a cached path node, if the stored result is valid and has a value of true, we must update all the nodes for all levels below it with a result of true as well. This is necessary when moving from one leaf in the fs tree to the next one, as well as when moving from a node at any level to the next node at the same level. Currently this logic is missing as it was somehow forgotten by a recent patch with the subject: "btrfs: speedup checking for extent sharedness during fiemap". This adds the missing logic, which is the counter part to what we do when adding a shared node to the cache at store_backref_shared_cache(). Fixes: 12a824dc67a6 ("btrfs: speedup checking for extent sharedness during fiemap") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index dce3a16996b9..3c0c1f626c75 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1557,6 +1557,19 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache return false; *is_shared = entry->is_shared; + /* + * If the node at this level is shared, than all nodes below are also + * shared. Currently some of the nodes below may be marked as not shared + * because we have just switched from one leaf to another, and switched + * also other nodes above the leaf and below the current level, so mark + * them as shared. + */ + if (*is_shared) { + for (int i = 0; i < level; i++) { + cache->entries[i].is_shared = true; + cache->entries[i].gen = entry->gen; + } + } return true; } -- cgit v1.2.3 From 4fc7b57228243d09c0d878873bf24fa64a90fa01 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:16:51 +0100 Subject: btrfs: fix processing of delayed data refs during backref walking When processing delayed data references during backref walking and we are using a share context (we are being called through fiemap), whenever we find a delayed data reference for an inode different from the one we are interested in, then we immediately exit and consider the data extent as shared. This is wrong, because: 1) This might be a DROP reference that will cancel out a reference in the extent tree; 2) Even if it's an ADD reference, it may be followed by a DROP reference that cancels it out. In either case we should not exit immediately. Fix this by never exiting when we find a delayed data reference for another inode - instead add the reference and if it does not cancel out other delayed reference, we will exit early when we call extent_is_shared() after processing all delayed references. If we find a drop reference, then signal the code that processes references from the extent tree (add_inline_refs() and add_keyed_refs()) to not exit immediately if it finds there a reference for another inode, since we have delayed drop references that may cancel it out. In this later case we exit once we don't have references in the rb trees that cancel out each other and have two references for different inodes. Example reproducer for case 1): $ cat test-1.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj mkfs.btrfs -f $DEV mount $DEV $MNT xfs_io -f -c "pwrite 0 64K" $MNT/foo cp --reflink=always $MNT/foo $MNT/bar echo echo "fiemap after cloning:" xfs_io -c "fiemap -v" $MNT/foo rm -f $MNT/bar echo echo "fiemap after removing file bar:" xfs_io -c "fiemap -v" $MNT/foo umount $MNT Running it before this patch, the extent is still listed as shared, it has the flag 0x2000 (FIEMAP_EXTENT_SHARED) set: $ ./test-1.sh fiemap after cloning: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 fiemap after removing file bar: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 Example reproducer for case 2): $ cat test-2.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj mkfs.btrfs -f $DEV mount $DEV $MNT xfs_io -f -c "pwrite 0 64K" $MNT/foo cp --reflink=always $MNT/foo $MNT/bar # Flush delayed references to the extent tree and commit current # transaction. sync echo echo "fiemap after cloning:" xfs_io -c "fiemap -v" $MNT/foo rm -f $MNT/bar echo echo "fiemap after removing file bar:" xfs_io -c "fiemap -v" $MNT/foo umount $MNT Running it before this patch, the extent is still listed as shared, it has the flag 0x2000 (FIEMAP_EXTENT_SHARED) set: $ ./test-2.sh fiemap after cloning: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 fiemap after removing file bar: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 After this patch, after deleting bar in both tests, the extent is not reported with the 0x2000 flag anymore, it gets only the flag 0x1 (which is FIEMAP_EXTENT_LAST): $ ./test-1.sh fiemap after cloning: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 fiemap after removing file bar: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x1 $ ./test-2.sh fiemap after cloning: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x2001 fiemap after removing file bar: /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..127]: 26624..26751 128 0x1 These tests will later be converted to a test case for fstests. Fixes: dc046b10c8b7d4 ("Btrfs: make fiemap not blow when you have lots of snapshots") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 3c0c1f626c75..cf47dabb786f 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -138,6 +138,7 @@ struct share_check { u64 root_objectid; u64 inum; int share_count; + bool have_delayed_delete_refs; }; static inline int extent_is_shared(struct share_check *sc) @@ -884,13 +885,22 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, key.offset = ref->offset; /* - * Found a inum that doesn't match our known inum, we - * know it's shared. + * If we have a share check context and a reference for + * another inode, we can't exit immediately. This is + * because even if this is a BTRFS_ADD_DELAYED_REF + * reference we may find next a BTRFS_DROP_DELAYED_REF + * which cancels out this ADD reference. + * + * If this is a DROP reference and there was no previous + * ADD reference, then we need to signal that when we + * process references from the extent tree (through + * add_inline_refs() and add_keyed_refs()), we should + * not exit early if we find a reference for another + * inode, because one of the delayed DROP references + * may cancel that reference in the extent tree. */ - if (sc && sc->inum && ref->objectid != sc->inum) { - ret = BACKREF_FOUND_SHARED; - goto out; - } + if (sc && count < 0) + sc->have_delayed_delete_refs = true; ret = add_indirect_ref(fs_info, preftrees, ref->root, &key, 0, node->bytenr, count, sc, @@ -920,7 +930,7 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, } if (!ret) ret = extent_is_shared(sc); -out: + spin_unlock(&head->lock); return ret; } @@ -1023,7 +1033,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1033,6 +1044,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, ret = add_indirect_ref(fs_info, preftrees, root, &key, 0, bytenr, count, sc, GFP_NOFS); + break; } default: @@ -1122,7 +1134,8 @@ static int add_keyed_refs(struct btrfs_root *extent_root, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum) { + if (sc && sc->inum && key.objectid != sc->inum && + !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; } @@ -1661,6 +1674,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, .root_objectid = root->root_key.objectid, .inum = inum, .share_count = 0, + .have_delayed_delete_refs = false, }; int level; @@ -1726,6 +1740,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, break; } shared.share_count = 0; + shared.have_delayed_delete_refs = false; cond_resched(); } -- cgit v1.2.3 From 943553ef9b51db303ab2b955c1025261abfdf6fb Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:16:52 +0100 Subject: btrfs: fix processing of delayed tree block refs during backref walking During backref walking, when processing a delayed reference with a type of BTRFS_TREE_BLOCK_REF_KEY, we have two bugs there: 1) We are accessing the delayed references extent_op, and its key, without the protection of the delayed ref head's lock; 2) If there's no extent op for the delayed ref head, we end up with an uninitialized key in the stack, variable 'tmp_op_key', and then pass it to add_indirect_ref(), which adds the reference to the indirect refs rb tree. This is wrong, because indirect references should have a NULL key when we don't have access to the key, and in that case they should be added to the indirect_missing_keys rb tree and not to the indirect rb tree. This means that if have BTRFS_TREE_BLOCK_REF_KEY delayed ref resulting from freeing an extent buffer, therefore with a count of -1, it will not cancel out the corresponding reference we have in the extent tree (with a count of 1), since both references end up in different rb trees. When using fiemap, where we often need to check if extents are shared through shared subtrees resulting from snapshots, it means we can incorrectly report an extent as shared when it's no longer shared. However this is temporary because after the transaction is committed the extent is no longer reported as shared, as running the delayed reference results in deleting the tree block reference from the extent tree. Outside the fiemap context, the result is unpredictable, as the key was not initialized but it's used when navigating the rb trees to insert and search for references (prelim_ref_compare()), and we expect all references in the indirect rb tree to have valid keys. The following reproducer triggers the second bug: $ cat test.sh #!/bin/bash DEV=/dev/sdj MNT=/mnt/sdj mkfs.btrfs -f $DEV mount -o compress $DEV $MNT # With a compressed 128M file we get a tree height of 2 (level 1 root). xfs_io -f -c "pwrite -b 1M 0 128M" $MNT/foo btrfs subvolume snapshot $MNT $MNT/snap # Fiemap should output 0x2008 in the flags column. # 0x2000 means shared extent # 0x8 means encoded extent (because it's compressed) echo echo "fiemap after snapshot, range [120M, 120M + 128K):" xfs_io -c "fiemap -v 120M 128K" $MNT/foo echo # Overwrite one extent and fsync to flush delalloc and COW a new path # in the snapshot's tree. # # After this we have a BTRFS_DROP_DELAYED_REF delayed ref of type # BTRFS_TREE_BLOCK_REF_KEY with a count of -1 for every COWed extent # buffer in the path. # # In the extent tree we have inline references of type # BTRFS_TREE_BLOCK_REF_KEY, with a count of 1, for the same extent # buffers, so they should cancel each other, and the extent buffers in # the fs tree should no longer be considered as shared. # echo "Overwriting file range [120M, 120M + 128K)..." xfs_io -c "pwrite -b 128K 120M 128K" $MNT/snap/foo xfs_io -c "fsync" $MNT/snap/foo # Fiemap should output 0x8 in the flags column. The extent in the range # [120M, 120M + 128K) is no longer shared, it's now exclusive to the fs # tree. echo echo "fiemap after overwrite range [120M, 120M + 128K):" xfs_io -c "fiemap -v 120M 128K" $MNT/foo echo umount $MNT Running it before this patch: $ ./test.sh (...) wrote 134217728/134217728 bytes at offset 0 128 MiB, 128 ops; 0.1152 sec (1.085 GiB/sec and 1110.5809 ops/sec) Create a snapshot of '/mnt/sdj' in '/mnt/sdj/snap' fiemap after snapshot, range [120M, 120M + 128K): /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [245760..246015]: 34304..34559 256 0x2008 Overwriting file range [120M, 120M + 128K)... wrote 131072/131072 bytes at offset 125829120 128 KiB, 1 ops; 0.0001 sec (683.060 MiB/sec and 5464.4809 ops/sec) fiemap after overwrite range [120M, 120M + 128K): /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [245760..246015]: 34304..34559 256 0x2008 The extent in the range [120M, 120M + 128K) is still reported as shared (0x2000 bit set) after overwriting that range and flushing delalloc, which is not correct - an entire path was COWed in the snapshot's tree and the extent is now only referenced by the original fs tree. Running it after this patch: $ ./test.sh (...) wrote 134217728/134217728 bytes at offset 0 128 MiB, 128 ops; 0.1198 sec (1.043 GiB/sec and 1068.2067 ops/sec) Create a snapshot of '/mnt/sdj' in '/mnt/sdj/snap' fiemap after snapshot, range [120M, 120M + 128K): /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [245760..246015]: 34304..34559 256 0x2008 Overwriting file range [120M, 120M + 128K)... wrote 131072/131072 bytes at offset 125829120 128 KiB, 1 ops; 0.0001 sec (694.444 MiB/sec and 5555.5556 ops/sec) fiemap after overwrite range [120M, 120M + 128K): /mnt/sdj/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [245760..246015]: 34304..34559 256 0x8 Now the extent is not reported as shared anymore. So fix this by passing a NULL key pointer to add_indirect_ref() when processing a delayed reference for a tree block if there's no extent op for our delayed ref head with a defined key. Also access the extent op only after locking the delayed ref head's lock. The reproducer will be converted later to a test case for fstests. Fixes: 86d5f994425252 ("btrfs: convert prelimary reference tracking to use rbtrees") Fixes: a6dbceafb915e8 ("btrfs: Remove unused op_key var from add_delayed_refs") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index cf47dabb786f..4e29ccb234c0 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -821,16 +821,11 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, struct preftrees *preftrees, struct share_check *sc) { struct btrfs_delayed_ref_node *node; - struct btrfs_delayed_extent_op *extent_op = head->extent_op; struct btrfs_key key; - struct btrfs_key tmp_op_key; struct rb_node *n; int count; int ret = 0; - if (extent_op && extent_op->update_key) - btrfs_disk_key_to_cpu(&tmp_op_key, &extent_op->key); - spin_lock(&head->lock); for (n = rb_first_cached(&head->ref_tree); n; n = rb_next(n)) { node = rb_entry(n, struct btrfs_delayed_ref_node, @@ -856,10 +851,16 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, case BTRFS_TREE_BLOCK_REF_KEY: { /* NORMAL INDIRECT METADATA backref */ struct btrfs_delayed_tree_ref *ref; + struct btrfs_key *key_ptr = NULL; + + if (head->extent_op && head->extent_op->update_key) { + btrfs_disk_key_to_cpu(&key, &head->extent_op->key); + key_ptr = &key; + } ref = btrfs_delayed_node_to_tree_ref(node); ret = add_indirect_ref(fs_info, preftrees, ref->root, - &tmp_op_key, ref->level + 1, + key_ptr, ref->level + 1, node->bytenr, count, sc, GFP_ATOMIC); break; -- cgit v1.2.3 From 63c84b46b3b75798f1ad63527b6250de00331907 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:16:53 +0100 Subject: btrfs: ignore fiemap path cache if we have multiple leaves for a data extent The path cache used during fiemap used to determine the sharedness of extent buffers in a path from a leaf containing a file extent item pointing to our data extent up to the root node of the tree, is meant to be used for a single path. Having a single path is by far the most common case, and therefore worth to optimize for, but it's possible to actually have multiple paths because we have 2 or more leaves. If we have multiple leaves, the 'level' variable keeps getting incremented in each iteration of the while loop at btrfs_is_data_extent_shared(), which means we will treat the second leaf in the 'tmp' ulist as a level 1 node, and so forth. In the worst case this can lead to getting a level greater than or equals to BTRFS_MAX_LEVEL (8), which will trigger a WARN_ON_ONCE() in the functions to lookup from or store in the path cache (lookup_backref_shared_cache() and store_backref_shared_cache()). If the current level never goes beyond 8, due to shared nodes in the paths and a fs tree height smaller than 8, it can still result in incorrectly marking one leaf as shared because some other leaf is shared and is stored one level below that other leaf, as when storing a true sharedness value in the cache results in updating the sharedness to true of all entries in the cache below the current level. Having multiple leaves happens in a case like the following: - We have a file extent item point to data extent at bytenr X, for a file range [0, 1M[ for example; - At this moment we have an extent data ref for the extent, with an offset of 0 and a count of 1; - A write into the middle of the extent happens, file range [64K, 128K) so the file extent item is split into two (at btrfs_drop_extents()): 1) One for file range [0, 64K), with a length (num_bytes field) of 64K and an extent offset of 0; 2) Another one for file range [128K, 1M), with a length of 896K (1M - 128K) and an extent offset of 128K. - At this moment the two file extent items are located in the same leaf; - A new file extent item for the range [64K, 128K), pointing to a new data extent, is inserted in the leaf. This results in a leaf split and now those two file extent items pointing to data extent X end up located in different leaves; - Once delayed refs are run, we still have a single extent data ref item for our data extent at bytenr X, for offset 0, but now with a count of 2 instead of 1; - So during fiemap, at btrfs_is_data_extent_shared(), after we call find_parent_nodes() for the data extent, we get two leaves, since we have two file extent items point to data extent at bytenr X that are located in two different leaves. So skip the use of the path cache when we get more than one leaf. Fixes: 12a824dc67a61e ("btrfs: speedup checking for extent sharedness during fiemap") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 25 +++++++++++++++++++++++++ fs/btrfs/backref.h | 1 + 2 files changed, 26 insertions(+) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 4e29ccb234c0..4ec18ceb2f21 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1536,6 +1536,9 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache { struct btrfs_backref_shared_cache_entry *entry; + if (!cache->use_cache) + return false; + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) return false; @@ -1600,6 +1603,9 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, struct btrfs_backref_shared_cache_entry *entry; u64 gen; + if (!cache->use_cache) + return; + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) return; @@ -1697,6 +1703,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, /* -1 means we are in the bytenr of the data extent. */ level = -1; ULIST_ITER_INIT(&uiter); + cache->use_cache = true; while (1) { bool is_shared; bool cached; @@ -1726,6 +1733,24 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, extent_gen > btrfs_root_last_snapshot(&root->root_item)) break; + /* + * If our data extent was not directly shared (without multiple + * reference items), than it might have a single reference item + * with a count > 1 for the same offset, which means there are 2 + * (or more) file extent items that point to the data extent - + * this happens when a file extent item needs to be split and + * then one item gets moved to another leaf due to a b+tree leaf + * split when inserting some item. In this case the file extent + * items may be located in different leaves and therefore some + * of the leaves may be referenced through shared subtrees while + * others are not. Since our extent buffer cache only works for + * a single path (by far the most common case and simpler to + * deal with), we can not use it if we have multiple leaves + * (which implies multiple paths). + */ + if (level == -1 && tmp->nnodes > 1) + cache->use_cache = false; + if (level >= 0) store_backref_shared_cache(cache, root, bytenr, level, false); diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 52ae6957b414..8e69584d538d 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -29,6 +29,7 @@ struct btrfs_backref_shared_cache { * a given data extent should never exceed the maximum b+tree height. */ struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL]; + bool use_cache; }; typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, -- cgit v1.2.3 From 5614dc3a47e3310fbc77ea3b67eaadd1c6417bf1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:37 +0000 Subject: btrfs: fix inode list leak during backref walking at resolve_indirect_refs() During backref walking, at resolve_indirect_refs(), if we get an error we jump to the 'out' label and call ulist_free() on the 'parents' ulist, which frees all the elements in the ulist - however that does not free any inode lists that may be attached to elements, through the 'aux' field of a ulist node, so we end up leaking lists if we have any attached to the unodes. Fix this by calling free_leaf_list() instead of ulist_free() when we exit from resolve_indirect_refs(). The static function free_leaf_list() is moved up for this to be possible and it's slightly simplified by removing unnecessary code. Fixes: 3301958b7c1d ("Btrfs: add inodes before dropping the extent lock in find_all_leafs") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 4ec18ceb2f21..40afae0af4e6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -648,6 +648,18 @@ unode_aux_to_inode_list(struct ulist_node *node) return (struct extent_inode_elem *)(uintptr_t)node->aux; } +static void free_leaf_list(struct ulist *ulist) +{ + struct ulist_node *node; + struct ulist_iterator uiter; + + ULIST_ITER_INIT(&uiter); + while ((node = ulist_next(ulist, &uiter))) + free_inode_elem_list(unode_aux_to_inode_list(node)); + + ulist_free(ulist); +} + /* * We maintain three separate rbtrees: one for direct refs, one for * indirect refs which have a key, and one for indirect refs which do not @@ -762,7 +774,11 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, cond_resched(); } out: - ulist_free(parents); + /* + * We may have inode lists attached to refs in the parents ulist, so we + * must free them before freeing the ulist and its refs. + */ + free_leaf_list(parents); return ret; } @@ -1409,24 +1425,6 @@ out: return ret; } -static void free_leaf_list(struct ulist *blocks) -{ - struct ulist_node *node = NULL; - struct extent_inode_elem *eie; - struct ulist_iterator uiter; - - ULIST_ITER_INIT(&uiter); - while ((node = ulist_next(blocks, &uiter))) { - if (!node->aux) - continue; - eie = unode_aux_to_inode_list(node); - free_inode_elem_list(eie); - node->aux = 0; - } - - ulist_free(blocks); -} - /* * Finds all leafs with a reference to the specified combination of bytenr and * offset. key_list_head will point to a list of corresponding keys (caller must -- cgit v1.2.3 From 92876eec382a0f19f33d09d2c939e9ca49038ae5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:38 +0000 Subject: btrfs: fix inode list leak during backref walking at find_parent_nodes() During backref walking, at find_parent_nodes(), if we are dealing with a data extent and we get an error while resolving the indirect backrefs, at resolve_indirect_refs(), or in the while loop that iterates over the refs in the direct refs rbtree, we end up leaking the inode lists attached to the direct refs we have in the direct refs rbtree that were not yet added to the refs ulist passed as argument to find_parent_nodes(). Since they were not yet added to the refs ulist and prelim_release() does not free the lists, on error the caller can only free the lists attached to the refs that were added to the refs ulist, all the remaining refs get their inode lists never freed, therefore leaking their memory. Fix this by having prelim_release() always free any attached inode list to each ref found in the rbtree, and have find_parent_nodes() set the ref's inode list to NULL once it transfers ownership of the inode list to a ref added to the refs ulist passed to find_parent_nodes(). Fixes: 86d5f9944252 ("btrfs: convert prelimary reference tracking to use rbtrees") Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 40afae0af4e6..18374a6d05bd 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -289,8 +289,10 @@ static void prelim_release(struct preftree *preftree) struct prelim_ref *ref, *next_ref; rbtree_postorder_for_each_entry_safe(ref, next_ref, - &preftree->root.rb_root, rbnode) + &preftree->root.rb_root, rbnode) { + free_inode_elem_list(ref->inode_list); free_pref(ref); + } preftree->root = RB_ROOT_CACHED; preftree->count = 0; @@ -1384,6 +1386,12 @@ again: if (ret < 0) goto out; ref->inode_list = eie; + /* + * We transferred the list ownership to the ref, + * so set to NULL to avoid a double free in case + * an error happens after this. + */ + eie = NULL; } ret = ulist_add_merge_ptr(refs, ref->parent, ref->inode_list, @@ -1409,6 +1417,14 @@ again: eie->next = ref->inode_list; } eie = NULL; + /* + * We have transferred the inode list ownership from + * this ref to the ref we added to the 'refs' ulist. + * So set this ref's inode list to NULL to avoid + * use-after-free when our caller uses it or double + * frees in case an error happens before we return. + */ + ref->inode_list = NULL; } cond_resched(); } -- cgit v1.2.3 From c902421927ff602954d2ecc7bd6e71b255d29387 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:16:59 +0100 Subject: btrfs: remove checks for a root with id 0 during backref walking When doing backref walking to determine if an extent is shared, we are testing the root_objectid of the given share_check struct is 0, but that is an impossible case, since btrfs_is_data_extent_shared() always initializes the root_objectid field with the id of the given root, and no root can have an objectid of 0. So remove those checks. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 18374a6d05bd..abac2e942e8b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -719,8 +719,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, continue; } - if (sc && sc->root_objectid && - ref->root_id != sc->root_objectid) { + if (sc && ref->root_id != sc->root_objectid) { free_pref(ref); ret = BACKREF_FOUND_SHARED; goto out; @@ -1348,8 +1347,7 @@ again: * and would retain their original ref->count < 0. */ if (roots && ref->count && ref->root_id && ref->parent == 0) { - if (sc && sc->root_objectid && - ref->root_id != sc->root_objectid) { + if (sc && ref->root_id != sc->root_objectid) { ret = BACKREF_FOUND_SHARED; goto out; } -- cgit v1.2.3 From a0a5472ad802d99d3fb4b361cc3fb5ea24914ee0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:17:00 +0100 Subject: btrfs: remove checks for a 0 inode number during backref walking When doing backref walking to determine if an extent is shared, we are testing if the inode number, stored in the 'inum' field of struct share_check, is 0. However that can never be case, since the all instances of the structure are created at btrfs_is_data_extent_shared(), which always initializes it with the inode number from a fs tree (and the number for any inode from any tree can never be 0). So remove the checks. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index abac2e942e8b..9c7b3e1e4762 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1051,7 +1051,7 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum && + if (sc && key.objectid != sc->inum && !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; @@ -1152,7 +1152,7 @@ static int add_keyed_refs(struct btrfs_root *extent_root, key.type = BTRFS_EXTENT_DATA_KEY; key.offset = btrfs_extent_data_ref_offset(leaf, dref); - if (sc && sc->inum && key.objectid != sc->inum && + if (sc && key.objectid != sc->inum && !sc->have_delayed_delete_refs) { ret = BACKREF_FOUND_SHARED; break; -- cgit v1.2.3 From ceb707da9ad92ad3a5251dc13844034ded06cb3d Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:17:01 +0100 Subject: btrfs: directly pass the inode to btrfs_is_data_extent_shared() Currently we pass a root and an inode number as arguments for btrfs_is_data_extent_shared() and the inode number is always from an inode that belongs to that root (it wouldn't make sense otherwise). In every context that we call btrfs_is_data_extent_shared() (fiemap only), we have an inode available, so directly pass the inode to the function instead of a root and inode number. This reduces the number of parameters and it makes the function's signature conform to most other functions we have. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 8 ++++---- fs/btrfs/backref.h | 2 +- fs/btrfs/extent_io.c | 16 +++++++--------- 3 files changed, 12 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 9c7b3e1e4762..efdeb69e8ed6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1658,8 +1658,7 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, /* * Check if a data extent is shared or not. * - * @root: The root the inode belongs to. - * @inum: Number of the inode whose extent we are checking. + * @inode: The inode whose extent we are checking. * @bytenr: Logical bytenr of the extent we are checking. * @extent_gen: Generation of the extent (file extent item) or 0 if it is * not known. @@ -1678,11 +1677,12 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, * * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. */ -int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, +int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, u64 extent_gen, struct ulist *roots, struct ulist *tmp, struct btrfs_backref_shared_cache *cache) { + struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *trans; struct ulist_iterator uiter; @@ -1691,7 +1691,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, int ret = 0; struct share_check shared = { .root_objectid = root->root_key.objectid, - .inum = inum, + .inum = btrfs_ino(inode), .share_count = 0, .have_delayed_delete_refs = false, }; diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 8e69584d538d..c846fa2bdf93 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -77,7 +77,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, u64 start_off, struct btrfs_path *path, struct btrfs_inode_extref **ret_extref, u64 *found_off); -int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr, +int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, u64 extent_gen, struct ulist *roots, struct ulist *tmp, struct btrfs_backref_shared_cache *cache); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3192f094fe6c..83be7e85ba91 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3712,7 +3712,6 @@ static int fiemap_process_hole(struct btrfs_inode *inode, u64 start, u64 end) { const u64 i_size = i_size_read(&inode->vfs_inode); - const u64 ino = btrfs_ino(inode); u64 cur_offset = start; u64 last_delalloc_end = 0; u32 prealloc_flags = FIEMAP_EXTENT_UNWRITTEN; @@ -3752,8 +3751,8 @@ static int fiemap_process_hole(struct btrfs_inode *inode, if (prealloc_len > 0) { if (!checked_extent_shared && fieinfo->fi_extents_max) { - ret = btrfs_is_data_extent_shared(inode->root, - ino, disk_bytenr, + ret = btrfs_is_data_extent_shared(inode, + disk_bytenr, extent_gen, roots, tmp_ulist, backref_cache); @@ -3802,8 +3801,8 @@ static int fiemap_process_hole(struct btrfs_inode *inode, } if (!checked_extent_shared && fieinfo->fi_extents_max) { - ret = btrfs_is_data_extent_shared(inode->root, - ino, disk_bytenr, + ret = btrfs_is_data_extent_shared(inode, + disk_bytenr, extent_gen, roots, tmp_ulist, backref_cache); @@ -3904,7 +3903,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, const u64 ino = btrfs_ino(inode); struct extent_state *cached_state = NULL; struct btrfs_path *path; - struct btrfs_root *root = inode->root; struct fiemap_cache cache = { 0 }; struct btrfs_backref_shared_cache *backref_cache; struct ulist *roots; @@ -3925,8 +3923,8 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, goto out; } - lockstart = round_down(start, root->fs_info->sectorsize); - lockend = round_up(start + len, root->fs_info->sectorsize); + lockstart = round_down(start, inode->root->fs_info->sectorsize); + lockend = round_up(start + len, inode->root->fs_info->sectorsize); prev_extent_end = lockstart; lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); @@ -4034,7 +4032,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } else { /* We have a regular extent. */ if (fieinfo->fi_extents_max) { - ret = btrfs_is_data_extent_shared(root, ino, + ret = btrfs_is_data_extent_shared(inode, disk_bytenr, extent_gen, roots, -- cgit v1.2.3 From 61dbb952f0a5f587c983d88853212e969d2d4ede Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:17:02 +0100 Subject: btrfs: turn the backref sharedness check cache into a context object Right now we are using a struct btrfs_backref_shared_cache to pass state across multiple btrfs_is_data_extent_shared() calls. The structure's name closely follows its current purpose, which is to cache previous checks for the sharedness of metadata extents. However we will start using the structure for more things other than caching sharedness checks, so rename it to struct btrfs_backref_share_check_ctx. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 32 ++++++++++++++++---------------- fs/btrfs/backref.h | 8 ++++---- fs/btrfs/extent_io.c | 24 ++++++++++++------------ 3 files changed, 32 insertions(+), 32 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index efdeb69e8ed6..693c99ad4afb 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1542,13 +1542,13 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, * fs_info->commit_root_sem semaphore, so no need to worry about the root's last * snapshot field changing while updating or checking the cache. */ -static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache, +static bool lookup_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx, struct btrfs_root *root, u64 bytenr, int level, bool *is_shared) { struct btrfs_backref_shared_cache_entry *entry; - if (!cache->use_cache) + if (!ctx->use_path_cache) return false; if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) @@ -1562,7 +1562,7 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache */ ASSERT(level >= 0); - entry = &cache->entries[level]; + entry = &ctx->path_cache_entries[level]; /* Unused cache entry or being used for some other extent buffer. */ if (entry->bytenr != bytenr) @@ -1595,8 +1595,8 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache */ if (*is_shared) { for (int i = 0; i < level; i++) { - cache->entries[i].is_shared = true; - cache->entries[i].gen = entry->gen; + ctx->path_cache_entries[i].is_shared = true; + ctx->path_cache_entries[i].gen = entry->gen; } } @@ -1608,14 +1608,14 @@ static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache * fs_info->commit_root_sem semaphore, so no need to worry about the root's last * snapshot field changing while updating or checking the cache. */ -static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, +static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx, struct btrfs_root *root, u64 bytenr, int level, bool is_shared) { struct btrfs_backref_shared_cache_entry *entry; u64 gen; - if (!cache->use_cache) + if (!ctx->use_path_cache) return; if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) @@ -1634,7 +1634,7 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, else gen = btrfs_root_last_snapshot(&root->root_item); - entry = &cache->entries[level]; + entry = &ctx->path_cache_entries[level]; entry->bytenr = bytenr; entry->is_shared = is_shared; entry->gen = gen; @@ -1648,7 +1648,7 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, */ if (is_shared) { for (int i = 0; i < level; i++) { - entry = &cache->entries[i]; + entry = &ctx->path_cache_entries[i]; entry->is_shared = is_shared; entry->gen = gen; } @@ -1664,7 +1664,7 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, * not known. * @roots: List of roots this extent is shared among. * @tmp: Temporary list used for iteration. - * @cache: A backref lookup result cache. + * @ctx: A backref sharedness check context. * * btrfs_is_data_extent_shared uses the backref walking code but will short * circuit as soon as it finds a root or inode that doesn't match the @@ -1680,7 +1680,7 @@ static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache, int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, u64 extent_gen, struct ulist *roots, struct ulist *tmp, - struct btrfs_backref_shared_cache *cache) + struct btrfs_backref_share_check_ctx *ctx) { struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; @@ -1715,7 +1715,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, /* -1 means we are in the bytenr of the data extent. */ level = -1; ULIST_ITER_INIT(&uiter); - cache->use_cache = true; + ctx->use_path_cache = true; while (1) { bool is_shared; bool cached; @@ -1726,7 +1726,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, /* this is the only condition under which we return 1 */ ret = 1; if (level >= 0) - store_backref_shared_cache(cache, root, bytenr, + store_backref_shared_cache(ctx, root, bytenr, level, true); break; } @@ -1761,17 +1761,17 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, * (which implies multiple paths). */ if (level == -1 && tmp->nnodes > 1) - cache->use_cache = false; + ctx->use_path_cache = false; if (level >= 0) - store_backref_shared_cache(cache, root, bytenr, + store_backref_shared_cache(ctx, root, bytenr, level, false); node = ulist_next(tmp, &uiter); if (!node) break; bytenr = node->val; level++; - cached = lookup_backref_shared_cache(cache, root, bytenr, level, + cached = lookup_backref_shared_cache(ctx, root, bytenr, level, &is_shared); if (cached) { ret = (is_shared ? 1 : 0); diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index c846fa2bdf93..e3a2b45a76e3 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -23,13 +23,13 @@ struct btrfs_backref_shared_cache_entry { bool is_shared; }; -struct btrfs_backref_shared_cache { +struct btrfs_backref_share_check_ctx { /* * A path from a root to a leaf that has a file extent item pointing to * a given data extent should never exceed the maximum b+tree height. */ - struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL]; - bool use_cache; + struct btrfs_backref_shared_cache_entry path_cache_entries[BTRFS_MAX_LEVEL]; + bool use_path_cache; }; typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, @@ -80,7 +80,7 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, u64 extent_gen, struct ulist *roots, struct ulist *tmp, - struct btrfs_backref_shared_cache *cache); + struct btrfs_backref_share_check_ctx *ctx); int __init btrfs_prelim_ref_init(void); void __cold btrfs_prelim_ref_exit(void); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 83be7e85ba91..365ad00a5942 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3705,7 +3705,7 @@ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path static int fiemap_process_hole(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, struct fiemap_cache *cache, - struct btrfs_backref_shared_cache *backref_cache, + struct btrfs_backref_share_check_ctx *backref_ctx, u64 disk_bytenr, u64 extent_offset, u64 extent_gen, struct ulist *roots, struct ulist *tmp_ulist, @@ -3755,7 +3755,7 @@ static int fiemap_process_hole(struct btrfs_inode *inode, disk_bytenr, extent_gen, roots, tmp_ulist, - backref_cache); + backref_ctx); if (ret < 0) return ret; else if (ret > 0) @@ -3805,7 +3805,7 @@ static int fiemap_process_hole(struct btrfs_inode *inode, disk_bytenr, extent_gen, roots, tmp_ulist, - backref_cache); + backref_ctx); if (ret < 0) return ret; else if (ret > 0) @@ -3904,7 +3904,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, struct extent_state *cached_state = NULL; struct btrfs_path *path; struct fiemap_cache cache = { 0 }; - struct btrfs_backref_shared_cache *backref_cache; + struct btrfs_backref_share_check_ctx *backref_ctx; struct ulist *roots; struct ulist *tmp_ulist; u64 last_extent_end; @@ -3914,11 +3914,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, bool stopped = false; int ret; - backref_cache = kzalloc(sizeof(*backref_cache), GFP_KERNEL); + backref_ctx = kzalloc(sizeof(*backref_ctx), GFP_KERNEL); path = btrfs_alloc_path(); roots = ulist_alloc(GFP_KERNEL); tmp_ulist = ulist_alloc(GFP_KERNEL); - if (!backref_cache || !path || !roots || !tmp_ulist) { + if (!backref_ctx || !path || !roots || !tmp_ulist) { ret = -ENOMEM; goto out; } @@ -3978,7 +3978,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, const u64 range_end = min(key.offset, lockend) - 1; ret = fiemap_process_hole(inode, fieinfo, &cache, - backref_cache, 0, 0, 0, + backref_ctx, 0, 0, 0, roots, tmp_ulist, prev_extent_end, range_end); if (ret < 0) { @@ -4019,14 +4019,14 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, extent_len, flags); } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { ret = fiemap_process_hole(inode, fieinfo, &cache, - backref_cache, + backref_ctx, disk_bytenr, extent_offset, extent_gen, roots, tmp_ulist, key.offset, extent_end - 1); } else if (disk_bytenr == 0) { /* We have an explicit hole. */ ret = fiemap_process_hole(inode, fieinfo, &cache, - backref_cache, 0, 0, 0, + backref_ctx, 0, 0, 0, roots, tmp_ulist, key.offset, extent_end - 1); } else { @@ -4037,7 +4037,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, extent_gen, roots, tmp_ulist, - backref_cache); + backref_ctx); if (ret < 0) goto out_unlock; else if (ret > 0) @@ -4086,7 +4086,7 @@ check_eof_delalloc: path = NULL; if (!stopped && prev_extent_end < lockend) { - ret = fiemap_process_hole(inode, fieinfo, &cache, backref_cache, + ret = fiemap_process_hole(inode, fieinfo, &cache, backref_ctx, 0, 0, 0, roots, tmp_ulist, prev_extent_end, lockend - 1); if (ret < 0) @@ -4119,7 +4119,7 @@ check_eof_delalloc: out_unlock: unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); out: - kfree(backref_cache); + kfree(backref_ctx); btrfs_free_path(path); ulist_free(roots); ulist_free(tmp_ulist); -- cgit v1.2.3 From 84a7949d409753c90dc3477b8cfc18e983b09078 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:17:03 +0100 Subject: btrfs: move ulists to data extent sharedness check context When calling btrfs_is_data_extent_shared() we pass two ulists that were allocated by the caller. This is because the single caller, fiemap, calls btrfs_is_data_extent_shared() multiple times and the ulists can be reused, instead of allocating new ones before each call and freeing them after each call. Now that we have a context structure/object that we pass to btrfs_is_data_extent_shared(), we can move those ulists to it, and hide their allocation and the context's allocation in a helper function, as well as the freeing of the ulists and the context object. This allows to reduce the number of parameters passed to btrfs_is_data_extent_shared(), the need to pass the ulists from extent_fiemap() to fiemap_process_hole() and having the caller deal with allocating and releasing the ulists. Also rename one of the ulists from 'tmp' / 'tmp_ulist' to 'refs', since that's a much better name as it reflects what the list is used for (and matching the argument name for find_parent_nodes()). Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 43 ++++++++++++++++++++++++++++++++----------- fs/btrfs/backref.h | 7 ++++++- fs/btrfs/extent_io.c | 34 ++++++++++------------------------ 3 files changed, 48 insertions(+), 36 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 693c99ad4afb..4caff3052da7 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1655,6 +1655,30 @@ static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx } } +struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void) +{ + struct btrfs_backref_share_check_ctx *ctx; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return NULL; + + ulist_init(&ctx->refs); + ulist_init(&ctx->roots); + + return ctx; +} + +void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx) +{ + if (!ctx) + return; + + ulist_release(&ctx->refs); + ulist_release(&ctx->roots); + kfree(ctx); +} + /* * Check if a data extent is shared or not. * @@ -1662,8 +1686,6 @@ static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx * @bytenr: Logical bytenr of the extent we are checking. * @extent_gen: Generation of the extent (file extent item) or 0 if it is * not known. - * @roots: List of roots this extent is shared among. - * @tmp: Temporary list used for iteration. * @ctx: A backref sharedness check context. * * btrfs_is_data_extent_shared uses the backref walking code but will short @@ -1679,7 +1701,6 @@ static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx */ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, u64 extent_gen, - struct ulist *roots, struct ulist *tmp, struct btrfs_backref_share_check_ctx *ctx) { struct btrfs_root *root = inode->root; @@ -1697,8 +1718,8 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, }; int level; - ulist_init(roots); - ulist_init(tmp); + ulist_init(&ctx->roots); + ulist_init(&ctx->refs); trans = btrfs_join_transaction_nostart(root); if (IS_ERR(trans)) { @@ -1720,8 +1741,8 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, bool is_shared; bool cached; - ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, - roots, NULL, &shared, false); + ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, &ctx->refs, + &ctx->roots, NULL, &shared, false); if (ret == BACKREF_FOUND_SHARED) { /* this is the only condition under which we return 1 */ ret = 1; @@ -1760,13 +1781,13 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, * deal with), we can not use it if we have multiple leaves * (which implies multiple paths). */ - if (level == -1 && tmp->nnodes > 1) + if (level == -1 && ctx->refs.nnodes > 1) ctx->use_path_cache = false; if (level >= 0) store_backref_shared_cache(ctx, root, bytenr, level, false); - node = ulist_next(tmp, &uiter); + node = ulist_next(&ctx->refs, &uiter); if (!node) break; bytenr = node->val; @@ -1789,8 +1810,8 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, up_read(&fs_info->commit_root_sem); } out: - ulist_release(roots); - ulist_release(tmp); + ulist_release(&ctx->roots); + ulist_release(&ctx->refs); return ret; } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index e3a2b45a76e3..8da0ba6b94a4 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -24,6 +24,9 @@ struct btrfs_backref_shared_cache_entry { }; struct btrfs_backref_share_check_ctx { + /* Ulists used during backref walking. */ + struct ulist refs; + struct ulist roots; /* * A path from a root to a leaf that has a file extent item pointing to * a given data extent should never exceed the maximum b+tree height. @@ -35,6 +38,9 @@ struct btrfs_backref_share_check_ctx { typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, void *ctx); +struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void); +void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx); + int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical, struct btrfs_path *path, struct btrfs_key *found_key, u64 *flags); @@ -79,7 +85,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, u64 *found_off); int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, u64 extent_gen, - struct ulist *roots, struct ulist *tmp, struct btrfs_backref_share_check_ctx *ctx); int __init btrfs_prelim_ref_init(void); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 365ad00a5942..e25e54d2216f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3708,7 +3708,6 @@ static int fiemap_process_hole(struct btrfs_inode *inode, struct btrfs_backref_share_check_ctx *backref_ctx, u64 disk_bytenr, u64 extent_offset, u64 extent_gen, - struct ulist *roots, struct ulist *tmp_ulist, u64 start, u64 end) { const u64 i_size = i_size_read(&inode->vfs_inode); @@ -3752,10 +3751,9 @@ static int fiemap_process_hole(struct btrfs_inode *inode, if (prealloc_len > 0) { if (!checked_extent_shared && fieinfo->fi_extents_max) { ret = btrfs_is_data_extent_shared(inode, - disk_bytenr, - extent_gen, roots, - tmp_ulist, - backref_ctx); + disk_bytenr, + extent_gen, + backref_ctx); if (ret < 0) return ret; else if (ret > 0) @@ -3803,8 +3801,7 @@ static int fiemap_process_hole(struct btrfs_inode *inode, if (!checked_extent_shared && fieinfo->fi_extents_max) { ret = btrfs_is_data_extent_shared(inode, disk_bytenr, - extent_gen, roots, - tmp_ulist, + extent_gen, backref_ctx); if (ret < 0) return ret; @@ -3905,8 +3902,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, struct btrfs_path *path; struct fiemap_cache cache = { 0 }; struct btrfs_backref_share_check_ctx *backref_ctx; - struct ulist *roots; - struct ulist *tmp_ulist; u64 last_extent_end; u64 prev_extent_end; u64 lockstart; @@ -3914,11 +3909,9 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, bool stopped = false; int ret; - backref_ctx = kzalloc(sizeof(*backref_ctx), GFP_KERNEL); + backref_ctx = btrfs_alloc_backref_share_check_ctx(); path = btrfs_alloc_path(); - roots = ulist_alloc(GFP_KERNEL); - tmp_ulist = ulist_alloc(GFP_KERNEL); - if (!backref_ctx || !path || !roots || !tmp_ulist) { + if (!backref_ctx || !path) { ret = -ENOMEM; goto out; } @@ -3979,7 +3972,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ret = fiemap_process_hole(inode, fieinfo, &cache, backref_ctx, 0, 0, 0, - roots, tmp_ulist, prev_extent_end, range_end); if (ret < 0) { goto out_unlock; @@ -4021,13 +4013,12 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ret = fiemap_process_hole(inode, fieinfo, &cache, backref_ctx, disk_bytenr, extent_offset, - extent_gen, roots, tmp_ulist, - key.offset, extent_end - 1); + extent_gen, key.offset, + extent_end - 1); } else if (disk_bytenr == 0) { /* We have an explicit hole. */ ret = fiemap_process_hole(inode, fieinfo, &cache, backref_ctx, 0, 0, 0, - roots, tmp_ulist, key.offset, extent_end - 1); } else { /* We have a regular extent. */ @@ -4035,8 +4026,6 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, ret = btrfs_is_data_extent_shared(inode, disk_bytenr, extent_gen, - roots, - tmp_ulist, backref_ctx); if (ret < 0) goto out_unlock; @@ -4087,8 +4076,7 @@ check_eof_delalloc: if (!stopped && prev_extent_end < lockend) { ret = fiemap_process_hole(inode, fieinfo, &cache, backref_ctx, - 0, 0, 0, roots, tmp_ulist, - prev_extent_end, lockend - 1); + 0, 0, 0, prev_extent_end, lockend - 1); if (ret < 0) goto out_unlock; prev_extent_end = lockend; @@ -4119,10 +4107,8 @@ check_eof_delalloc: out_unlock: unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); out: - kfree(backref_ctx); + btrfs_free_backref_share_ctx(backref_ctx); btrfs_free_path(path); - ulist_free(roots); - ulist_free(tmp_ulist); return ret; } -- cgit v1.2.3 From b629685803bc0cefbbd29240ea28d57d8a17bcbc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:17:04 +0100 Subject: btrfs: remove roots ulist when checking data extent sharedness Currently btrfs_is_data_extent_shared() is passing a ulist for the roots argument of find_parent_nodes(), however it does not use that ulist for anything and for this context that list always ends up with at most one element. Since find_parent_nodes() is able to deal with a NULL ulist for its roots argument, make btrfs_is_data_extent_shared() pass it NULL and avoid the burden of allocating memory for the unnused roots ulist, initializing it, releasing it and allocating one struct ulist_node for it during the call to find_parent_nodes(). Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 6 +----- fs/btrfs/backref.h | 1 - 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 4caff3052da7..4d8573fa75b4 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1664,7 +1664,6 @@ struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void) return NULL; ulist_init(&ctx->refs); - ulist_init(&ctx->roots); return ctx; } @@ -1675,7 +1674,6 @@ void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx) return; ulist_release(&ctx->refs); - ulist_release(&ctx->roots); kfree(ctx); } @@ -1718,7 +1716,6 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, }; int level; - ulist_init(&ctx->roots); ulist_init(&ctx->refs); trans = btrfs_join_transaction_nostart(root); @@ -1742,7 +1739,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, bool cached; ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, &ctx->refs, - &ctx->roots, NULL, &shared, false); + NULL, NULL, &shared, false); if (ret == BACKREF_FOUND_SHARED) { /* this is the only condition under which we return 1 */ ret = 1; @@ -1810,7 +1807,6 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, up_read(&fs_info->commit_root_sem); } out: - ulist_release(&ctx->roots); ulist_release(&ctx->refs); return ret; } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 8da0ba6b94a4..5f468f0defda 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -26,7 +26,6 @@ struct btrfs_backref_shared_cache_entry { struct btrfs_backref_share_check_ctx { /* Ulists used during backref walking. */ struct ulist refs; - struct ulist roots; /* * A path from a root to a leaf that has a file extent item pointing to * a given data extent should never exceed the maximum b+tree height. -- cgit v1.2.3 From 56f5c19920d09bbf91efcf80e6ba301923400f4c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:17:05 +0100 Subject: btrfs: remove useless logic when finding parent nodes At find_parent_nodes(), at its last step, when iterating over all direct references, we are checking if we have a share context and if we have a reference with a different root from the one in the share context. However that logic is pointless because of two reasons: 1) After the previous patch in the series (subject "btrfs: remove roots ulist when checking data extent sharedness"), the roots argument is always NULL when using a share check context (struct share_check), so this code is never triggered; 2) Even before that previous patch, we could not hit this code because if we had a reference with a root different from the one in our share context, then we would have exited earlier when doing either of the following: - Adding a second direct ref to the direct refs red black tree resulted in extent_is_shared() returning true when called from add_direct_ref() -> add_prelim_ref(), after processing delayed references or while processing references in the extent tree; - When adding a second reference to the indirect refs red black tree (same as above, extent_is_shared() returns true); - If we only have one indirect reference and no direct references, then when resolving it at resolve_indirect_refs() we immediately return that the target extent is shared, therefore never reaching that loop that iterates over all direct references at find_parent_nodes(); - If we have 1 indirect reference and 1 direct reference, then we also exit early because extent_is_shared() ends up returning true when called through add_prelim_ref() (by add_direct_ref() or add_indirect_ref()) or add_delayed_refs(). Same applies as when having a combination of direct, indirect and indirect with missing key references. This logic had been obsoleted since commit 3ec4d3238ab165 ("btrfs: allow backref search checks for shared extents"), which introduced the early exits in case an extent is shared. So just remove that logic, and assert at find_parent_nodes() that when we have a share context we don't have a roots ulist and that we haven't found the extent to be directly shared after processing delayed references and all references from the extent tree. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 4d8573fa75b4..22ab13821ada 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1219,6 +1219,10 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, .indirect_missing_keys = PREFTREE_INIT }; + /* Roots ulist is not needed when using a sharedness check context. */ + if (sc) + ASSERT(roots == NULL); + key.objectid = bytenr; key.offset = (u64)-1; if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) @@ -1310,6 +1314,20 @@ again: } } + /* + * If we have a share context and we reached here, it means the extent + * is not directly shared (no multiple reference items for it), + * otherwise we would have exited earlier with a return value of + * BACKREF_FOUND_SHARED after processing delayed references or while + * processing inline or keyed references from the extent tree. + * The extent may however be indirectly shared through shared subtrees + * as a result from creating snapshots, so we determine below what is + * its parent node, in case we are dealing with a metadata extent, or + * what's the leaf (or leaves), from a fs tree, that has a file extent + * item pointing to it in case we are dealing with a data extent. + */ + ASSERT(extent_is_shared(sc) == 0); + btrfs_release_path(path); ret = add_missing_keys(fs_info, &preftrees, path->skip_locking == 0); @@ -1347,11 +1365,6 @@ again: * and would retain their original ref->count < 0. */ if (roots && ref->count && ref->root_id && ref->parent == 0) { - if (sc && ref->root_id != sc->root_objectid) { - ret = BACKREF_FOUND_SHARED; - goto out; - } - /* no parent == root of tree */ ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); if (ret < 0) -- cgit v1.2.3 From 73e339e6ab74cc3edcd1f1ed3d9b822baf8534e1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:17:06 +0100 Subject: btrfs: cache sharedness of the last few data extents during fiemap During fiemap we process all the file extent items of an inode, by their file offset order (left to right b+tree order), and then check if the data extent they point at is shared or not. Until now we didn't cache those results, we only did it for b+tree nodes/leaves since for each unique b+tree path we have access to hundreds of file extent items. However, it is also common to repeat checking the sharedness of a particular data extent in a very short time window, and the cases that lead to that are the following: 1) COW writes. If have a file extent item like this: [ bytenr X, offset = 0, num_bytes = 512K ] file offset 0 512K Then a 4K write into file offset 64K happens, we end up with the following file extent item layout: [ bytenr X, offset = 0, num_bytes = 64K ] file offset 0 64K [ bytenr Y, offset = 0, num_bytes = 4K ] file offset 64K 68K [ bytenr X, offset = 68K, num_bytes = 444K ] file offset 68K 512K So during fiemap we well check for the sharedness of the data extent with bytenr X twice. Typically for COW writes and for at least moderately updated files, we end up with many file extent items that point to different sections of the same data extent. 2) Writing into a NOCOW file after a snapshot is taken. This happens if the target extent was created in a generation older than the generation where the last snapshot for the root (the tree the inode belongs to) was made. This leads to a scenario like the previous one. 3) Writing into sections of a preallocated extent. For example if a file has the following layout: [ bytenr X, offset = 0, num_bytes = 1M, type = prealloc ] 0 1M After doing a 4K write into file offset 0 and another 4K write into offset 512K, we get the following layout: [ bytenr X, offset = 0, num_bytes = 4K, type = regular ] 0 4K [ bytenr X, offset = 4K, num_bytes = 508K, type = prealloc ] 4K 512K [ bytenr X, offset = 512K, num_bytes = 4K, type = regular ] 512K 516K [ bytenr X, offset = 516K, num_bytes = 508K, type = prealloc ] 516K 1M So we end up with 4 consecutive file extent items pointing to the data extent at bytenr X. 4) Hole punching in the middle of an extent. For example if a file has the following file extent item: [ bytenr X, offset = 0, num_bytes = 8M ] 0 8M And then hole is punched for the file range [4M, 6M[, we our file extent item split into two: [ bytenr X, offset = 0, num_bytes = 4M ] 0 4M [ 2M hole, implicit or explicit depending on NO_HOLES feature ] 4M 6M [ bytenr X, offset = 6M, num_bytes = 2M ] 6M 8M Again, we end up with two file extent items pointing to the same data extent. 5) When reflinking (clone and deduplication) within the same file. This is probably the least common case of all. In cases 1, 2, 4 and 4, when we have multiple file extent items that point to the same data extent, their distance is usually short, typically separated by a few slots in a b+tree leaf (or across sibling leaves). For case 5, the distance can vary a lot, but it's typically the less common case. This change caches the result of the sharedness checks for data extents, but only for the last 8 extents that we notice that our inode refers to with multiple file extent items. Whenever we want to check if a data extent is shared, we lookup the cache which consists of doing a linear scan of an 8 elements array, and if we find the data extent there, we return the result and don't check the extent tree and delayed refs. The array/cache is small so that doing the search has no noticeable negative impact on the performance in case we don't have file extent items within a distance of 8 slots that point to the same data extent. Slots in the cache/array are overwritten in a simple round robin fashion, as that approach fits very well. Using this simple approach with only the last 8 data extents seen is effective as usually when multiple file extents items point to the same data extent, their distance is within 8 slots. It also uses very little memory and the time to cache a result or lookup the cache is negligible. The following test was run on non-debug kernel (Debian's default kernel config) to measure the impact in the case of COW writes (first example given above), where we run fiemap after overwriting 33% of the blocks of a file: $ cat test.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi umount $DEV &> /dev/null mkfs.btrfs -f $DEV mount $DEV $MNT FILE_SIZE=$((1 * 1024 * 1024 * 1024)) # Create the file full of 1M extents. xfs_io -f -s -c "pwrite -b 1M -S 0xab 0 $FILE_SIZE" $MNT/foobar block_count=$((FILE_SIZE / 4096)) # Overwrite about 33% of the file blocks. overwrite_count=$((block_count / 3)) echo -e "\nOverwriting $overwrite_count 4K blocks (out of $block_count)..." RANDOM=123 for ((i = 1; i <= $overwrite_count; i++)); do off=$(((RANDOM % block_count) * 4096)) xfs_io -c "pwrite -S 0xcd $off 4K" $MNT/foobar > /dev/null echo -ne "\r$i blocks overwritten..." done echo -e "\n" # Unmount and mount to clear all cached metadata. umount $MNT mount $DEV $MNT start=$(date +%s%N) filefrag $MNT/foobar end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "fiemap took $dur milliseconds" umount $MNT Result before applying this patch: fiemap took 128 milliseconds Result after applying this patch: fiemap took 92 milliseconds (-28.1%) The test is somewhat limited in the sense the gains may be higher in practice, because in the test the filesystem is small, so we have small fs and extent trees, plus there's no concurrent access to the trees as well, therefore no lock contention there. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++--- fs/btrfs/backref.h | 27 +++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 22ab13821ada..64bea9b30f6b 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -137,7 +137,25 @@ struct preftrees { struct share_check { u64 root_objectid; u64 inum; + u64 data_bytenr; + /* + * Counts number of inodes that refer to an extent (different inodes in + * the same root or different roots) that we could find. The sharedness + * check typically stops once this counter gets greater than 1, so it + * may not reflect the total number of inodes. + */ int share_count; + /* + * The number of times we found our inode refers to the data extent we + * are determining the sharedness. In other words, how many file extent + * items we could find for our inode that point to our target data + * extent. The value we get here after finishing the extent sharedness + * check may be smaller than reality, but if it ends up being greater + * than 1, then we know for sure the inode has multiple file extent + * items that point to our inode, and we can safely assume it's useful + * to cache the sharedness check result. + */ + int self_ref_count; bool have_delayed_delete_refs; }; @@ -207,7 +225,7 @@ static int prelim_ref_compare(struct prelim_ref *ref1, } static void update_share_count(struct share_check *sc, int oldcount, - int newcount) + int newcount, struct prelim_ref *newref) { if ((!sc) || (oldcount == 0 && newcount < 1)) return; @@ -216,6 +234,11 @@ static void update_share_count(struct share_check *sc, int oldcount, sc->share_count--; else if (oldcount < 1 && newcount > 0) sc->share_count++; + + if (newref->root_id == sc->root_objectid && + newref->wanted_disk_byte == sc->data_bytenr && + newref->key_for_search.objectid == sc->inum) + sc->self_ref_count += newref->count; } /* @@ -266,14 +289,14 @@ static void prelim_ref_insert(const struct btrfs_fs_info *fs_info, * BTRFS_[ADD|DROP]_DELAYED_REF actions. */ update_share_count(sc, ref->count, - ref->count + newref->count); + ref->count + newref->count, newref); ref->count += newref->count; free_pref(newref); return; } } - update_share_count(sc, 0, newref->count); + update_share_count(sc, 0, newref->count, newref); preftree->count++; trace_btrfs_prelim_ref_insert(fs_info, newref, NULL, preftree->count); rb_link_node(&newref->rbnode, parent, p); @@ -1724,11 +1747,18 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, struct share_check shared = { .root_objectid = root->root_key.objectid, .inum = btrfs_ino(inode), + .data_bytenr = bytenr, .share_count = 0, + .self_ref_count = 0, .have_delayed_delete_refs = false, }; int level; + for (int i = 0; i < BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE; i++) { + if (ctx->prev_extents_cache[i].bytenr == bytenr) + return ctx->prev_extents_cache[i].is_shared; + } + ulist_init(&ctx->refs); trans = btrfs_join_transaction_nostart(root); @@ -1813,6 +1843,20 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, cond_resched(); } + /* + * Cache the sharedness result for the data extent if we know our inode + * has more than 1 file extent item that refers to the data extent. + */ + if (ret >= 0 && shared.self_ref_count > 1) { + int slot = ctx->prev_extents_cache_slot; + + ctx->prev_extents_cache[slot].bytenr = shared.data_bytenr; + ctx->prev_extents_cache[slot].is_shared = (ret == 1); + + slot = (slot + 1) % BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE; + ctx->prev_extents_cache_slot = slot; + } + if (trans) { btrfs_put_tree_mod_seq(fs_info, &elem); btrfs_end_transaction(trans); diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 5f468f0defda..fda78db50be6 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -23,6 +23,8 @@ struct btrfs_backref_shared_cache_entry { bool is_shared; }; +#define BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE 8 + struct btrfs_backref_share_check_ctx { /* Ulists used during backref walking. */ struct ulist refs; @@ -32,6 +34,31 @@ struct btrfs_backref_share_check_ctx { */ struct btrfs_backref_shared_cache_entry path_cache_entries[BTRFS_MAX_LEVEL]; bool use_path_cache; + /* + * Cache the sharedness result for the last few extents we have found, + * but only for extents for which we have multiple file extent items + * that point to them. + * It's very common to have several file extent items that point to the + * same extent (bytenr) but with different offsets and lengths. This + * typically happens for COW writes, partial writes into prealloc + * extents, NOCOW writes after snapshoting a root, hole punching or + * reflinking within the same file (less common perhaps). + * So keep a small cache with the lookup results for the extent pointed + * by the last few file extent items. This cache is checked, with a + * linear scan, whenever btrfs_is_data_extent_shared() is called, so + * it must be small so that it does not negatively affect performance in + * case we don't have multiple file extent items that point to the same + * data extent. + */ + struct { + u64 bytenr; + bool is_shared; + } prev_extents_cache[BTRFS_BACKREF_CTX_PREV_EXTENTS_SIZE]; + /* + * The slot in the prev_extents_cache array that will be used for + * storing the sharedness result of a new data extent. + */ + int prev_extents_cache_slot; }; typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, -- cgit v1.2.3 From 583f4ac56254c844cbbc9f23744e1f2efbf42fc7 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:17:07 +0100 Subject: btrfs: move up backref sharedness cache store and lookup functions Move the static functions to lookup and store sharedness check of an extent buffer to a location above find_all_parents(), because in the next patch the lookup function will be used by find_all_parents(). The store function is also moved just because it's the counter part to the lookup function and it's best to have their definitions close together. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 236 ++++++++++++++++++++++++++--------------------------- 1 file changed, 118 insertions(+), 118 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 64bea9b30f6b..1b1575b3a7b0 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1198,6 +1198,124 @@ static int add_keyed_refs(struct btrfs_root *extent_root, return ret; } +/* + * The caller has joined a transaction or is holding a read lock on the + * fs_info->commit_root_sem semaphore, so no need to worry about the root's last + * snapshot field changing while updating or checking the cache. + */ +static bool lookup_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx, + struct btrfs_root *root, + u64 bytenr, int level, bool *is_shared) +{ + struct btrfs_backref_shared_cache_entry *entry; + + if (!ctx->use_path_cache) + return false; + + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) + return false; + + /* + * Level -1 is used for the data extent, which is not reliable to cache + * because its reference count can increase or decrease without us + * realizing. We cache results only for extent buffers that lead from + * the root node down to the leaf with the file extent item. + */ + ASSERT(level >= 0); + + entry = &ctx->path_cache_entries[level]; + + /* Unused cache entry or being used for some other extent buffer. */ + if (entry->bytenr != bytenr) + return false; + + /* + * We cached a false result, but the last snapshot generation of the + * root changed, so we now have a snapshot. Don't trust the result. + */ + if (!entry->is_shared && + entry->gen != btrfs_root_last_snapshot(&root->root_item)) + return false; + + /* + * If we cached a true result and the last generation used for dropping + * a root changed, we can not trust the result, because the dropped root + * could be a snapshot sharing this extent buffer. + */ + if (entry->is_shared && + entry->gen != btrfs_get_last_root_drop_gen(root->fs_info)) + return false; + + *is_shared = entry->is_shared; + /* + * If the node at this level is shared, than all nodes below are also + * shared. Currently some of the nodes below may be marked as not shared + * because we have just switched from one leaf to another, and switched + * also other nodes above the leaf and below the current level, so mark + * them as shared. + */ + if (*is_shared) { + for (int i = 0; i < level; i++) { + ctx->path_cache_entries[i].is_shared = true; + ctx->path_cache_entries[i].gen = entry->gen; + } + } + + return true; +} + +/* + * The caller has joined a transaction or is holding a read lock on the + * fs_info->commit_root_sem semaphore, so no need to worry about the root's last + * snapshot field changing while updating or checking the cache. + */ +static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx, + struct btrfs_root *root, + u64 bytenr, int level, bool is_shared) +{ + struct btrfs_backref_shared_cache_entry *entry; + u64 gen; + + if (!ctx->use_path_cache) + return; + + if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) + return; + + /* + * Level -1 is used for the data extent, which is not reliable to cache + * because its reference count can increase or decrease without us + * realizing. We cache results only for extent buffers that lead from + * the root node down to the leaf with the file extent item. + */ + ASSERT(level >= 0); + + if (is_shared) + gen = btrfs_get_last_root_drop_gen(root->fs_info); + else + gen = btrfs_root_last_snapshot(&root->root_item); + + entry = &ctx->path_cache_entries[level]; + entry->bytenr = bytenr; + entry->is_shared = is_shared; + entry->gen = gen; + + /* + * If we found an extent buffer is shared, set the cache result for all + * extent buffers below it to true. As nodes in the path are COWed, + * their sharedness is moved to their children, and if a leaf is COWed, + * then the sharedness of a data extent becomes direct, the refcount of + * data extent is increased in the extent item at the extent tree. + */ + if (is_shared) { + for (int i = 0; i < level; i++) { + entry = &ctx->path_cache_entries[i]; + entry->is_shared = is_shared; + entry->gen = gen; + } + } +} + /* * this adds all existing backrefs (inline backrefs, backrefs and delayed * refs) for the given bytenr to the refs list, merges duplicates and resolves @@ -1573,124 +1691,6 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, return ret; } -/* - * The caller has joined a transaction or is holding a read lock on the - * fs_info->commit_root_sem semaphore, so no need to worry about the root's last - * snapshot field changing while updating or checking the cache. - */ -static bool lookup_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx, - struct btrfs_root *root, - u64 bytenr, int level, bool *is_shared) -{ - struct btrfs_backref_shared_cache_entry *entry; - - if (!ctx->use_path_cache) - return false; - - if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) - return false; - - /* - * Level -1 is used for the data extent, which is not reliable to cache - * because its reference count can increase or decrease without us - * realizing. We cache results only for extent buffers that lead from - * the root node down to the leaf with the file extent item. - */ - ASSERT(level >= 0); - - entry = &ctx->path_cache_entries[level]; - - /* Unused cache entry or being used for some other extent buffer. */ - if (entry->bytenr != bytenr) - return false; - - /* - * We cached a false result, but the last snapshot generation of the - * root changed, so we now have a snapshot. Don't trust the result. - */ - if (!entry->is_shared && - entry->gen != btrfs_root_last_snapshot(&root->root_item)) - return false; - - /* - * If we cached a true result and the last generation used for dropping - * a root changed, we can not trust the result, because the dropped root - * could be a snapshot sharing this extent buffer. - */ - if (entry->is_shared && - entry->gen != btrfs_get_last_root_drop_gen(root->fs_info)) - return false; - - *is_shared = entry->is_shared; - /* - * If the node at this level is shared, than all nodes below are also - * shared. Currently some of the nodes below may be marked as not shared - * because we have just switched from one leaf to another, and switched - * also other nodes above the leaf and below the current level, so mark - * them as shared. - */ - if (*is_shared) { - for (int i = 0; i < level; i++) { - ctx->path_cache_entries[i].is_shared = true; - ctx->path_cache_entries[i].gen = entry->gen; - } - } - - return true; -} - -/* - * The caller has joined a transaction or is holding a read lock on the - * fs_info->commit_root_sem semaphore, so no need to worry about the root's last - * snapshot field changing while updating or checking the cache. - */ -static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx, - struct btrfs_root *root, - u64 bytenr, int level, bool is_shared) -{ - struct btrfs_backref_shared_cache_entry *entry; - u64 gen; - - if (!ctx->use_path_cache) - return; - - if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL)) - return; - - /* - * Level -1 is used for the data extent, which is not reliable to cache - * because its reference count can increase or decrease without us - * realizing. We cache results only for extent buffers that lead from - * the root node down to the leaf with the file extent item. - */ - ASSERT(level >= 0); - - if (is_shared) - gen = btrfs_get_last_root_drop_gen(root->fs_info); - else - gen = btrfs_root_last_snapshot(&root->root_item); - - entry = &ctx->path_cache_entries[level]; - entry->bytenr = bytenr; - entry->is_shared = is_shared; - entry->gen = gen; - - /* - * If we found an extent buffer is shared, set the cache result for all - * extent buffers below it to true. As nodes in the path are COWed, - * their sharedness is moved to their children, and if a leaf is COWed, - * then the sharedness of a data extent becomes direct, the refcount of - * data extent is increased in the extent item at the extent tree. - */ - if (is_shared) { - for (int i = 0; i < level; i++) { - entry = &ctx->path_cache_entries[i]; - entry->is_shared = is_shared; - entry->gen = gen; - } - } -} - struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void) { struct btrfs_backref_share_check_ctx *ctx; -- cgit v1.2.3 From 877c14767f106a5c8af271ebf1294c514eb76d0c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:17:08 +0100 Subject: btrfs: avoid duplicated resolution of indirect backrefs during fiemap During fiemap, when determining if a data extent is shared or not, if we don't find the extent is directly shared, then we need to determine if it's shared through subtrees. For that we need to resolve the indirect reference we found in order to figure out the path in the inode's fs tree, which is a path starting at the fs tree's root node and going down to the leaf that contains the file extent item that points to the data extent. We then proceed to determine if any extent buffer in that path is shared with other trees or not. Currently whenever we find the data extent that a file extent item points to is not directly shared, we always resolve the path in the fs tree, and then check if any extent buffer in the path is shared. This is a lot of work and when we have file extent items that belong to the same leaf, we have the same path, so we only need to calculate it once. This change does that, it keeps track of the current and previous leaf, and when we find that a data extent is not directly shared, we try to compute the fs tree path only once and then use it for every other file extent item in the same leaf, using the existing cached path result for the leaf as long as the cache results are valid. This saves us from doing expensive b+tree searches in the fs tree of our target inode, as well as other minor work. The following test was run on a non-debug kernel (Debian's default kernel config): $ cat test-with-snapshots.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi umount $DEV &> /dev/null mkfs.btrfs -f $DEV # Use compression to quickly create files with a lot of extents # (each with a size of 128K). mount -o compress=lzo $DEV $MNT # 40G gives 327680 extents, each with a size of 128K. xfs_io -f -c "pwrite -S 0xab -b 1M 0 40G" $MNT/foobar # Add some more files to increase the size of the fs and extent # trees (in the real world there's a lot of files and extents # from other files). xfs_io -f -c "pwrite -S 0xcd -b 1M 0 20G" $MNT/file1 xfs_io -f -c "pwrite -S 0xef -b 1M 0 20G" $MNT/file2 xfs_io -f -c "pwrite -S 0x73 -b 1M 0 20G" $MNT/file3 # Create a snapshot so all the extents become indirectly shared # through subtrees, with a generation less than or equals to the # generation used to create the snapshot. btrfs subvolume snapshot -r $MNT $MNT/snap1 umount $MNT mount -o compress=lzo $DEV $MNT start=$(date +%s%N) filefrag $MNT/foobar end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "fiemap took $dur milliseconds (metadata not cached)" echo start=$(date +%s%N) filefrag $MNT/foobar end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "fiemap took $dur milliseconds (metadata cached)" umount $MNT Result before applying this patch: (...) /mnt/sdi/foobar: 327680 extents found fiemap took 1204 milliseconds (metadata not cached) /mnt/sdi/foobar: 327680 extents found fiemap took 729 milliseconds (metadata cached) Result after applying this patch: (...) /mnt/sdi/foobar: 327680 extents found fiemap took 732 milliseconds (metadata not cached) /mnt/sdi/foobar: 327680 extents found fiemap took 421 milliseconds (metadata cached) That's a -46.1% total reduction for the metadata not cached case, and a -42.2% reduction for the cached metadata case. The test is somewhat limited in the sense the gains may be higher in practice, because in the test the filesystem is small, so we have small fs and extent trees, plus there's no concurrent access to the trees as well, therefore no lock contention there. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 64 ++++++++++++++++++++++++++++++++++++++++++++-------- fs/btrfs/backref.h | 13 +++++++++++ fs/btrfs/extent_io.c | 2 ++ 3 files changed, 69 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 1b1575b3a7b0..94a3c6deafbb 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -16,8 +16,9 @@ #include "misc.h" #include "tree-mod-log.h" -/* Just an arbitrary number so we can be sure this happened */ -#define BACKREF_FOUND_SHARED 6 +/* Just arbitrary numbers so we can be sure one of these happened. */ +#define BACKREF_FOUND_SHARED 6 +#define BACKREF_FOUND_NOT_SHARED 7 struct extent_inode_elem { u64 inum; @@ -135,7 +136,8 @@ struct preftrees { * - decremented when a ref->count transitions to <1 */ struct share_check { - u64 root_objectid; + struct btrfs_backref_share_check_ctx *ctx; + struct btrfs_root *root; u64 inum; u64 data_bytenr; /* @@ -235,7 +237,7 @@ static void update_share_count(struct share_check *sc, int oldcount, else if (oldcount < 1 && newcount > 0) sc->share_count++; - if (newref->root_id == sc->root_objectid && + if (newref->root_id == sc->root->root_key.objectid && newref->wanted_disk_byte == sc->data_bytenr && newref->key_for_search.objectid == sc->inum) sc->self_ref_count += newref->count; @@ -742,7 +744,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, continue; } - if (sc && ref->root_id != sc->root_objectid) { + if (sc && ref->root_id != sc->root->root_key.objectid) { free_pref(ref); ret = BACKREF_FOUND_SHARED; goto out; @@ -1469,6 +1471,44 @@ again: */ ASSERT(extent_is_shared(sc) == 0); + /* + * If we are here for a data extent and we have a share_check structure + * it means the data extent is not directly shared (does not have + * multiple reference items), so we have to check if a path in the fs + * tree (going from the root node down to the leaf that has the file + * extent item pointing to the data extent) is shared, that is, if any + * of the extent buffers in the path is referenced by other trees. + */ + if (sc && bytenr == sc->data_bytenr) { + /* + * If we are only determining if a data extent is shared or not + * and the corresponding file extent item is located in the same + * leaf as the previous file extent item, we can skip resolving + * indirect references for a data extent, since the fs tree path + * is the same (same leaf, so same path). We skip as long as the + * cached result for the leaf is valid and only if there's only + * one file extent item pointing to the data extent, because in + * the case of multiple file extent items, they may be located + * in different leaves and therefore we have multiple paths. + */ + if (sc->ctx->curr_leaf_bytenr == sc->ctx->prev_leaf_bytenr && + sc->self_ref_count == 1) { + bool cached; + bool is_shared; + + cached = lookup_backref_shared_cache(sc->ctx, sc->root, + sc->ctx->curr_leaf_bytenr, + 0, &is_shared); + if (cached) { + if (is_shared) + ret = BACKREF_FOUND_SHARED; + else + ret = BACKREF_FOUND_NOT_SHARED; + goto out; + } + } + } + btrfs_release_path(path); ret = add_missing_keys(fs_info, &preftrees, path->skip_locking == 0); @@ -1745,7 +1785,8 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, struct btrfs_seq_list elem = BTRFS_SEQ_LIST_INIT(elem); int ret = 0; struct share_check shared = { - .root_objectid = root->root_key.objectid, + .ctx = ctx, + .root = root, .inum = btrfs_ino(inode), .data_bytenr = bytenr, .share_count = 0, @@ -1783,12 +1824,13 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, &ctx->refs, NULL, NULL, &shared, false); - if (ret == BACKREF_FOUND_SHARED) { - /* this is the only condition under which we return 1 */ - ret = 1; + if (ret == BACKREF_FOUND_SHARED || + ret == BACKREF_FOUND_NOT_SHARED) { + /* If shared must return 1, otherwise return 0. */ + ret = (ret == BACKREF_FOUND_SHARED) ? 1 : 0; if (level >= 0) store_backref_shared_cache(ctx, root, bytenr, - level, true); + level, ret == 1); break; } if (ret < 0 && ret != -ENOENT) @@ -1865,6 +1907,8 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, } out: ulist_release(&ctx->refs); + ctx->prev_leaf_bytenr = ctx->curr_leaf_bytenr; + return ret; } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index fda78db50be6..6dac462430b0 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -28,6 +28,19 @@ struct btrfs_backref_shared_cache_entry { struct btrfs_backref_share_check_ctx { /* Ulists used during backref walking. */ struct ulist refs; + /* + * The current leaf the caller of btrfs_is_data_extent_shared() is at. + * Typically the caller (at the moment only fiemap) tries to determine + * the sharedness of data extents point by file extent items from entire + * leaves. + */ + u64 curr_leaf_bytenr; + /* + * The previous leaf the caller was at in the previous call to + * btrfs_is_data_extent_shared(). This may be the same as the current + * leaf. On the first call it must be 0. + */ + u64 prev_leaf_bytenr; /* * A path from a root to a leaf that has a file extent item pointing to * a given data extent should never exceed the maximum b+tree height. diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e25e54d2216f..4e4f28387ace 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3966,6 +3966,8 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, if (extent_end <= lockstart) goto next_item; + backref_ctx->curr_leaf_bytenr = leaf->start; + /* We have in implicit hole (NO_HOLES feature enabled). */ if (prev_extent_end < key.offset) { const u64 range_end = min(key.offset, lockend) - 1; -- cgit v1.2.3 From 6976201f188f0a899fe5f0dfde32b9855d1d22cc Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 11 Oct 2022 13:17:09 +0100 Subject: btrfs: avoid unnecessary resolution of indirect backrefs during fiemap During fiemap, when determining if a data extent is shared or not, if we don't find the extent is directly shared, then we need to determine if it's shared through subtrees. For that we need to resolve the indirect reference we found in order to figure out the path in the inode's fs tree, which is a path starting at the fs tree's root node and going down to the leaf that contains the file extent item that points to the data extent. We then proceed to determine if any extent buffer in that path is shared with other trees or not. However when the generation of the data extent is more recent than the last generation used to snapshot the root, we don't need to determine the path, since the data extent can not be shared through snapshots. For this case we currently still determine the leaf of that path (at find_parent_nodes(), but then stop determining the other nodes in the path (at btrfs_is_data_extent_shared()) as it's pointless. So do the check of the data extent's generation earlier, at find_parent_nodes(), before trying to resolve the indirect reference to determine the leaf in the path. This saves us from doing one expensive b+tree search in the fs tree of our target inode, as well as other minor work. The following test was run on a non-debug kernel (Debian's default kernel config): $ cat test-fiemap.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi umount $DEV &> /dev/null mkfs.btrfs -f $DEV # Use compression to quickly create files with a lot of extents # (each with a size of 128K). mount -o compress=lzo $DEV $MNT # 40G gives 327680 extents, each with a size of 128K. xfs_io -f -c "pwrite -S 0xab -b 1M 0 40G" $MNT/foobar # Add some more files to increase the size of the fs and extent # trees (in the real world there's a lot of files and extents # from other files). xfs_io -f -c "pwrite -S 0xcd -b 1M 0 20G" $MNT/file1 xfs_io -f -c "pwrite -S 0xef -b 1M 0 20G" $MNT/file2 xfs_io -f -c "pwrite -S 0x73 -b 1M 0 20G" $MNT/file3 umount $MNT mount -o compress=lzo $DEV $MNT start=$(date +%s%N) filefrag $MNT/foobar end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "fiemap took $dur milliseconds (metadata not cached)" echo start=$(date +%s%N) filefrag $MNT/foobar end=$(date +%s%N) dur=$(( (end - start) / 1000000 )) echo "fiemap took $dur milliseconds (metadata cached)" umount $MNT Before applying this patch: (...) /mnt/sdi/foobar: 327680 extents found fiemap took 1285 milliseconds (metadata not cached) /mnt/sdi/foobar: 327680 extents found fiemap took 742 milliseconds (metadata cached) After applying this patch: (...) /mnt/sdi/foobar: 327680 extents found fiemap took 689 milliseconds (metadata not cached) /mnt/sdi/foobar: 327680 extents found fiemap took 393 milliseconds (metadata cached) That's a -46.4% total reduction for the metadata not cached case, and a -47.0% reduction for the cached metadata case. The test is somewhat limited in the sense the gains may be higher in practice, because in the test the filesystem is small, so we have small fs and extent trees, plus there's no concurrent access to the trees as well, therefore no lock contention there. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 94a3c6deafbb..232f49415ab9 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -140,6 +140,7 @@ struct share_check { struct btrfs_root *root; u64 inum; u64 data_bytenr; + u64 data_extent_gen; /* * Counts number of inodes that refer to an extent (different inodes in * the same root or different roots) that we could find. The sharedness @@ -1480,6 +1481,21 @@ again: * of the extent buffers in the path is referenced by other trees. */ if (sc && bytenr == sc->data_bytenr) { + /* + * If our data extent is from a generation more recent than the + * last generation used to snapshot the root, then we know that + * it can not be shared through subtrees, so we can skip + * resolving indirect references, there's no point in + * determining the extent buffers for the path from the fs tree + * root node down to the leaf that has the file extent item that + * points to the data extent. + */ + if (sc->data_extent_gen > + btrfs_root_last_snapshot(&sc->root->root_item)) { + ret = BACKREF_FOUND_NOT_SHARED; + goto out; + } + /* * If we are only determining if a data extent is shared or not * and the corresponding file extent item is located in the same @@ -1789,6 +1805,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, .root = root, .inum = btrfs_ino(inode), .data_bytenr = bytenr, + .data_extent_gen = extent_gen, .share_count = 0, .self_ref_count = 0, .have_delayed_delete_refs = false, @@ -1836,17 +1853,6 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, if (ret < 0 && ret != -ENOENT) break; ret = 0; - /* - * If our data extent is not shared through reflinks and it was - * created in a generation after the last one used to create a - * snapshot of the inode's root, then it can not be shared - * indirectly through subtrees, as that can only happen with - * snapshots. In this case bail out, no need to check for the - * sharedness of extent buffers. - */ - if (level == -1 && - extent_gen > btrfs_root_last_snapshot(&root->root_item)) - break; /* * If our data extent was not directly shared (without multiple -- cgit v1.2.3 From c7f13d428ea1bfe883f2741a9b5a5352d595eb09 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 19 Oct 2022 10:50:47 -0400 Subject: btrfs: move fs wide helpers out of ctree.h We have several fs wide related helpers in ctree.h. The bulk of these are the incompat flag test helpers, but there are things such as btrfs_fs_closing() and the read only helpers that also aren't directly related to the ctree code. Move these into a fs.h header, which will serve as the location for file system wide related helpers. Reviewed-by: Johannes Thumshirn Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/Makefile | 2 +- fs/btrfs/backref.c | 1 + fs/btrfs/block-group.c | 1 + fs/btrfs/ctree.h | 164 --------------------------------------------- fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 1 + fs/btrfs/file-item.c | 1 + fs/btrfs/file.c | 1 + fs/btrfs/free-space-tree.c | 1 + fs/btrfs/fs.c | 92 +++++++++++++++++++++++++ fs/btrfs/fs.h | 85 +++++++++++++++++++++++ fs/btrfs/inode.c | 1 + fs/btrfs/ioctl.c | 1 + fs/btrfs/props.c | 1 + fs/btrfs/qgroup.c | 1 + fs/btrfs/relocation.c | 1 + fs/btrfs/scrub.c | 1 + fs/btrfs/space-info.c | 1 + fs/btrfs/super.c | 1 + fs/btrfs/transaction.c | 1 + fs/btrfs/tree-checker.c | 1 + fs/btrfs/tree-log.c | 1 + fs/btrfs/uuid-tree.c | 1 + fs/btrfs/verity.c | 1 + fs/btrfs/volumes.c | 1 + fs/btrfs/zoned.c | 1 + 26 files changed, 200 insertions(+), 165 deletions(-) create mode 100644 fs/btrfs/fs.c create mode 100644 fs/btrfs/fs.h (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index fa9ddcc9eb0b..eebb45c06485 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -31,7 +31,7 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \ uuid-tree.o props.o free-space-tree.o tree-checker.o space-info.o \ block-rsv.o delalloc-space.o block-group.o discard.o reflink.o \ - subpage.o tree-mod-log.o extent-io-tree.o + subpage.o tree-mod-log.o extent-io-tree.o fs.o btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 232f49415ab9..f76db317c437 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -15,6 +15,7 @@ #include "locking.h" #include "misc.h" #include "tree-mod-log.h" +#include "fs.h" /* Just arbitrary numbers so we can be sure one of these happened. */ #define BACKREF_FOUND_SHARED 6 diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 47461fd21975..bf8d1e110136 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -17,6 +17,7 @@ #include "discard.h" #include "raid56.h" #include "zoned.h" +#include "fs.h" #ifdef CONFIG_BTRFS_DEBUG int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3485f60efa33..1b2a1dcb60dd 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2857,44 +2857,6 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *node, struct extent_buffer *parent); -static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) -{ - /* - * Do it this way so we only ever do one test_bit in the normal case. - */ - if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) { - if (test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags)) - return 2; - return 1; - } - return 0; -} - -/* - * If we remount the fs to be R/O or umount the fs, the cleaner needn't do - * anything except sleeping. This function is used to check the status of - * the fs. - * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount, - * since setting and checking for SB_RDONLY in the superblock's flags is not - * atomic. - */ -static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info) -{ - return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) || - btrfs_fs_closing(fs_info); -} - -static inline void btrfs_set_sb_rdonly(struct super_block *sb) -{ - sb->s_flags |= SB_RDONLY; - set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); -} - -static inline void btrfs_clear_sb_rdonly(struct super_block *sb) -{ - sb->s_flags &= ~SB_RDONLY; - clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); -} /* root-item.c */ int btrfs_add_root_ref(struct btrfs_trans_handle *trans, u64 root_id, @@ -3529,132 +3491,6 @@ do { \ } while (0) -/* compatibility and incompatibility defines */ - -#define btrfs_set_fs_incompat(__fs_info, opt) \ - __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \ - #opt) - -static inline void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, - u64 flag, const char* name) -{ - struct btrfs_super_block *disk_super; - u64 features; - - disk_super = fs_info->super_copy; - features = btrfs_super_incompat_flags(disk_super); - if (!(features & flag)) { - spin_lock(&fs_info->super_lock); - features = btrfs_super_incompat_flags(disk_super); - if (!(features & flag)) { - features |= flag; - btrfs_set_super_incompat_flags(disk_super, features); - btrfs_info(fs_info, - "setting incompat feature flag for %s (0x%llx)", - name, flag); - } - spin_unlock(&fs_info->super_lock); - } -} - -#define btrfs_clear_fs_incompat(__fs_info, opt) \ - __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, \ - #opt) - -static inline void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, - u64 flag, const char* name) -{ - struct btrfs_super_block *disk_super; - u64 features; - - disk_super = fs_info->super_copy; - features = btrfs_super_incompat_flags(disk_super); - if (features & flag) { - spin_lock(&fs_info->super_lock); - features = btrfs_super_incompat_flags(disk_super); - if (features & flag) { - features &= ~flag; - btrfs_set_super_incompat_flags(disk_super, features); - btrfs_info(fs_info, - "clearing incompat feature flag for %s (0x%llx)", - name, flag); - } - spin_unlock(&fs_info->super_lock); - } -} - -#define btrfs_fs_incompat(fs_info, opt) \ - __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt) - -static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) -{ - struct btrfs_super_block *disk_super; - disk_super = fs_info->super_copy; - return !!(btrfs_super_incompat_flags(disk_super) & flag); -} - -#define btrfs_set_fs_compat_ro(__fs_info, opt) \ - __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \ - #opt) - -static inline void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, - u64 flag, const char *name) -{ - struct btrfs_super_block *disk_super; - u64 features; - - disk_super = fs_info->super_copy; - features = btrfs_super_compat_ro_flags(disk_super); - if (!(features & flag)) { - spin_lock(&fs_info->super_lock); - features = btrfs_super_compat_ro_flags(disk_super); - if (!(features & flag)) { - features |= flag; - btrfs_set_super_compat_ro_flags(disk_super, features); - btrfs_info(fs_info, - "setting compat-ro feature flag for %s (0x%llx)", - name, flag); - } - spin_unlock(&fs_info->super_lock); - } -} - -#define btrfs_clear_fs_compat_ro(__fs_info, opt) \ - __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, \ - #opt) - -static inline void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, - u64 flag, const char *name) -{ - struct btrfs_super_block *disk_super; - u64 features; - - disk_super = fs_info->super_copy; - features = btrfs_super_compat_ro_flags(disk_super); - if (features & flag) { - spin_lock(&fs_info->super_lock); - features = btrfs_super_compat_ro_flags(disk_super); - if (features & flag) { - features &= ~flag; - btrfs_set_super_compat_ro_flags(disk_super, features); - btrfs_info(fs_info, - "clearing compat-ro feature flag for %s (0x%llx)", - name, flag); - } - spin_unlock(&fs_info->super_lock); - } -} - -#define btrfs_fs_compat_ro(fs_info, opt) \ - __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) - -static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) -{ - struct btrfs_super_block *disk_super; - disk_super = fs_info->super_copy; - return !!(btrfs_super_compat_ro_flags(disk_super) & flag); -} - /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL struct posix_acl *btrfs_get_acl(struct inode *inode, int type, bool rcu); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index cf59d8f14a05..5e20b191b108 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -43,6 +43,7 @@ #include "space-info.h" #include "zoned.h" #include "subpage.h" +#include "fs.h" #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ BTRFS_HEADER_FLAG_RELOC |\ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 2801c991814f..bc010dbcb6b1 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -36,6 +36,7 @@ #include "rcu-string.h" #include "zoned.h" #include "dev-replace.h" +#include "fs.h" #undef SCRAMBLE_DELAYED_REFS diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 6bb9fa961a6a..824ff54d8155 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -16,6 +16,7 @@ #include "volumes.h" #include "print-tree.h" #include "compression.h" +#include "fs.h" #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 19b41b5fe6c0..a352c7cacc99 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -30,6 +30,7 @@ #include "delalloc-space.h" #include "reflink.h" #include "subpage.h" +#include "fs.h" static struct kmem_cache *btrfs_inode_defrag_cachep; /* diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 367bcfcf68f5..bfc21eb8ec63 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -11,6 +11,7 @@ #include "free-space-tree.h" #include "transaction.h" #include "block-group.h" +#include "fs.h" static int __add_block_group_free_space(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group, diff --git a/fs/btrfs/fs.c b/fs/btrfs/fs.c new file mode 100644 index 000000000000..d4ba948eba56 --- /dev/null +++ b/fs/btrfs/fs.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "ctree.h" +#include "fs.h" + +void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, + const char *name) +{ + struct btrfs_super_block *disk_super; + u64 features; + + disk_super = fs_info->super_copy; + features = btrfs_super_incompat_flags(disk_super); + if (!(features & flag)) { + spin_lock(&fs_info->super_lock); + features = btrfs_super_incompat_flags(disk_super); + if (!(features & flag)) { + features |= flag; + btrfs_set_super_incompat_flags(disk_super, features); + btrfs_info(fs_info, + "setting incompat feature flag for %s (0x%llx)", + name, flag); + } + spin_unlock(&fs_info->super_lock); + } +} + +void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, + const char *name) +{ + struct btrfs_super_block *disk_super; + u64 features; + + disk_super = fs_info->super_copy; + features = btrfs_super_incompat_flags(disk_super); + if (features & flag) { + spin_lock(&fs_info->super_lock); + features = btrfs_super_incompat_flags(disk_super); + if (features & flag) { + features &= ~flag; + btrfs_set_super_incompat_flags(disk_super, features); + btrfs_info(fs_info, + "clearing incompat feature flag for %s (0x%llx)", + name, flag); + } + spin_unlock(&fs_info->super_lock); + } +} + +void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, + const char *name) +{ + struct btrfs_super_block *disk_super; + u64 features; + + disk_super = fs_info->super_copy; + features = btrfs_super_compat_ro_flags(disk_super); + if (!(features & flag)) { + spin_lock(&fs_info->super_lock); + features = btrfs_super_compat_ro_flags(disk_super); + if (!(features & flag)) { + features |= flag; + btrfs_set_super_compat_ro_flags(disk_super, features); + btrfs_info(fs_info, + "setting compat-ro feature flag for %s (0x%llx)", + name, flag); + } + spin_unlock(&fs_info->super_lock); + } +} + +void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, + const char *name) +{ + struct btrfs_super_block *disk_super; + u64 features; + + disk_super = fs_info->super_copy; + features = btrfs_super_compat_ro_flags(disk_super); + if (features & flag) { + spin_lock(&fs_info->super_lock); + features = btrfs_super_compat_ro_flags(disk_super); + if (features & flag) { + features &= ~flag; + btrfs_set_super_compat_ro_flags(disk_super, features); + btrfs_info(fs_info, + "clearing compat-ro feature flag for %s (0x%llx)", + name, flag); + } + spin_unlock(&fs_info->super_lock); + } +} diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h new file mode 100644 index 000000000000..8eda9ce0a904 --- /dev/null +++ b/fs/btrfs/fs.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef BTRFS_FS_H +#define BTRFS_FS_H + +/* Compatibility and incompatibility defines */ +void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, + const char *name); +void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, + const char *name); +void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, + const char *name); +void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, + const char *name); + +#define btrfs_set_fs_incompat(__fs_info, opt) \ + __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt) + +#define btrfs_clear_fs_incompat(__fs_info, opt) \ + __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt) + +#define btrfs_fs_incompat(fs_info, opt) \ + __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt) + +#define btrfs_set_fs_compat_ro(__fs_info, opt) \ + __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt) + +#define btrfs_clear_fs_compat_ro(__fs_info, opt) \ + __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt) + +#define btrfs_fs_compat_ro(fs_info, opt) \ + __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) + +static inline bool __btrfs_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag) +{ + struct btrfs_super_block *disk_super; + disk_super = fs_info->super_copy; + return !!(btrfs_super_incompat_flags(disk_super) & flag); +} + +static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag) +{ + struct btrfs_super_block *disk_super; + disk_super = fs_info->super_copy; + return !!(btrfs_super_compat_ro_flags(disk_super) & flag); +} + +static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info) +{ + /* Do it this way so we only ever do one test_bit in the normal case. */ + if (test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)) { + if (test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags)) + return 2; + return 1; + } + return 0; +} + +/* + * If we remount the fs to be R/O or umount the fs, the cleaner needn't do + * anything except sleeping. This function is used to check the status of + * the fs. + * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount, + * since setting and checking for SB_RDONLY in the superblock's flags is not + * atomic. + */ +static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info) +{ + return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) || + btrfs_fs_closing(fs_info); +} + +static inline void btrfs_set_sb_rdonly(struct super_block *sb) +{ + sb->s_flags |= SB_RDONLY; + set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); +} + +static inline void btrfs_clear_sb_rdonly(struct super_block *sb) +{ + sb->s_flags &= ~SB_RDONLY; + clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state); +} + +#endif diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 50584b93a66f..60b3162c035d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -55,6 +55,7 @@ #include "zoned.h" #include "subpage.h" #include "inode-item.h" +#include "fs.h" struct btrfs_iget_args { u64 ino; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index a482739c1d82..564a4ae9c285 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -50,6 +50,7 @@ #include "delalloc-space.h" #include "block-group.h" #include "subpage.h" +#include "fs.h" #ifdef CONFIG_64BIT /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index e04289347775..d2c699c9aa51 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -11,6 +11,7 @@ #include "xattr.h" #include "compression.h" #include "space-info.h" +#include "fs.h" #define BTRFS_PROP_HANDLERS_HT_BITS 8 static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b74105a10f16..e87a2f066f4d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -24,6 +24,7 @@ #include "block-group.h" #include "sysfs.h" #include "tree-mod-log.h" +#include "fs.h" /* * Helpers to access qgroup reservation diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 216a4485d914..977afbb4cd45 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -28,6 +28,7 @@ #include "zoned.h" #include "inode-item.h" #include "space-info.h" +#include "fs.h" /* * Relocation overview diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index cdda4b2f20f2..6871c1c54f8c 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -21,6 +21,7 @@ #include "raid56.h" #include "block-group.h" #include "zoned.h" +#include "fs.h" /* * This is only the first step towards a full-features scrub. It reads all diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index af2e133aaa5c..af1538c2685d 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -10,6 +10,7 @@ #include "transaction.h" #include "block-group.h" #include "zoned.h" +#include "fs.h" /* * HOW DOES SPACE RESERVATION WORK diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 42e0d2fcc407..ef646a1d9bc2 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -49,6 +49,7 @@ #include "discard.h" #include "qgroup.h" #include "raid56.h" +#include "fs.h" #define CREATE_TRACE_POINTS #include diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index ae7d4aca771d..bae77fb05e2b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -23,6 +23,7 @@ #include "block-group.h" #include "space-info.h" #include "zoned.h" +#include "fs.h" static struct kmem_cache *btrfs_trans_handle_cachep; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 43f905ab0a18..862d67798de5 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -25,6 +25,7 @@ #include "volumes.h" #include "misc.h" #include "btrfs_inode.h" +#include "fs.h" /* * Error message should follow the following format: diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index c3cf3dabe0b1..e294c38f9b19 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -21,6 +21,7 @@ #include "space-info.h" #include "zoned.h" #include "inode-item.h" +#include "fs.h" #define MAX_CONFLICT_INODES 10 diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index b458452a1aaf..2d7eb290fb9c 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -9,6 +9,7 @@ #include "transaction.h" #include "disk-io.h" #include "print-tree.h" +#include "fs.h" static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key) diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index ee00e33c309e..ab0b39badbbe 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -15,6 +15,7 @@ #include "transaction.h" #include "disk-io.h" #include "locking.h" +#include "fs.h" /* * Implementation of the interface defined in struct fsverity_operations. diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 796e9f5ff8f8..d65d5d7835fe 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -33,6 +33,7 @@ #include "block-group.h" #include "discard.h" #include "zoned.h" +#include "fs.h" static struct bio_set btrfs_bioset; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index c9e2b0c85309..2a7d856c232c 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -15,6 +15,7 @@ #include "transaction.h" #include "dev-replace.h" #include "space-info.h" +#include "fs.h" /* Maximum number of zones to report per blkdev_report_zones() call */ #define BTRFS_REPORT_NR_ZONES 4096 -- cgit v1.2.3 From 07e81dc94474eb62705c6f96d9ab1a5a797b8703 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 19 Oct 2022 10:51:00 -0400 Subject: btrfs: move accessor helpers into accessors.h This is a large patch, but because they're all macros it's impossible to split up. Simply copy all of the item accessors in ctree.h and paste them in accessors.h, and then update any files to include the header so everything compiles. Reviewed-by: Anand Jain Signed-off-by: Josef Bacik Reviewed-by: David Sterba [ reformat comments, style fixups ] Signed-off-by: David Sterba --- fs/btrfs/accessors.h | 1068 +++++++++++++++++++++++++++++++ fs/btrfs/backref.c | 1 + fs/btrfs/block-group.c | 1 + fs/btrfs/block-rsv.c | 1 + fs/btrfs/check-integrity.c | 1 + fs/btrfs/ctree.h | 1090 -------------------------------- fs/btrfs/delayed-inode.c | 1 + fs/btrfs/dev-replace.c | 1 + fs/btrfs/dir-item.c | 1 + fs/btrfs/disk-io.c | 1 + fs/btrfs/export.c | 1 + fs/btrfs/extent-tree.c | 1 + fs/btrfs/extent_io.c | 1 + fs/btrfs/file-item.c | 1 + fs/btrfs/file.c | 1 + fs/btrfs/free-space-cache.c | 1 + fs/btrfs/free-space-tree.c | 1 + fs/btrfs/fs.c | 1 + fs/btrfs/inode-item.c | 1 + fs/btrfs/ioctl.c | 1 + fs/btrfs/locking.c | 1 + fs/btrfs/print-tree.c | 1 + fs/btrfs/props.c | 1 + fs/btrfs/qgroup.c | 1 + fs/btrfs/ref-verify.c | 1 + fs/btrfs/reflink.c | 1 + fs/btrfs/relocation.c | 1 + fs/btrfs/root-tree.c | 1 + fs/btrfs/scrub.c | 1 + fs/btrfs/send.c | 1 + fs/btrfs/space-info.c | 1 + fs/btrfs/super.c | 1 + fs/btrfs/sysfs.c | 1 + fs/btrfs/tests/extent-buffer-tests.c | 1 + fs/btrfs/tests/free-space-tree-tests.c | 1 + fs/btrfs/tests/inode-tests.c | 1 + fs/btrfs/tests/qgroup-tests.c | 1 + fs/btrfs/transaction.c | 1 + fs/btrfs/tree-checker.c | 1 + fs/btrfs/tree-defrag.c | 1 + fs/btrfs/tree-mod-log.c | 1 + fs/btrfs/uuid-tree.c | 2 +- fs/btrfs/verity.c | 1 + fs/btrfs/volumes.c | 1 + fs/btrfs/xattr.c | 1 + fs/btrfs/zoned.c | 1 + 46 files changed, 1112 insertions(+), 1091 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/accessors.h b/fs/btrfs/accessors.h index a47b0f9d30ca..e8bbd4d80161 100644 --- a/fs/btrfs/accessors.h +++ b/fs/btrfs/accessors.h @@ -9,6 +9,1074 @@ struct btrfs_map_token { unsigned long offset; }; +#define BTRFS_LEAF_DATA_OFFSET offsetof(struct btrfs_leaf, items) + void btrfs_init_map_token(struct btrfs_map_token *token, struct extent_buffer *eb); +/* + * Some macros to generate set/get functions for the struct fields. This + * assumes there is a lefoo_to_cpu for every type, so lets make a simple one + * for u8: + */ +#define le8_to_cpu(v) (v) +#define cpu_to_le8(v) (v) +#define __le8 u8 + +static inline u8 get_unaligned_le8(const void *p) +{ + return *(u8 *)p; +} + +static inline void put_unaligned_le8(u8 val, void *p) +{ + *(u8 *)p = val; +} + +#define read_eb_member(eb, ptr, type, member, result) (\ + read_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define write_eb_member(eb, ptr, type, member, result) (\ + write_extent_buffer(eb, (char *)(result), \ + ((unsigned long)(ptr)) + \ + offsetof(type, member), \ + sizeof(((type *)0)->member))) + +#define DECLARE_BTRFS_SETGET_BITS(bits) \ +u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \ + const void *ptr, unsigned long off); \ +void btrfs_set_token_##bits(struct btrfs_map_token *token, \ + const void *ptr, unsigned long off, \ + u##bits val); \ +u##bits btrfs_get_##bits(const struct extent_buffer *eb, \ + const void *ptr, unsigned long off); \ +void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \ + unsigned long off, u##bits val); + +DECLARE_BTRFS_SETGET_BITS(8) +DECLARE_BTRFS_SETGET_BITS(16) +DECLARE_BTRFS_SETGET_BITS(32) +DECLARE_BTRFS_SETGET_BITS(64) + +#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ + const type *s) \ +{ \ + static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ + return btrfs_get_##bits(eb, s, offsetof(type, member)); \ +} \ +static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \ + u##bits val) \ +{ \ + static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ + btrfs_set_##bits(eb, s, offsetof(type, member), val); \ +} \ +static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \ + const type *s) \ +{ \ + static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ + return btrfs_get_token_##bits(token, s, offsetof(type, member));\ +} \ +static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\ + type *s, u##bits val) \ +{ \ + static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ + btrfs_set_token_##bits(token, s, offsetof(type, member), val); \ +} + +#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(const struct extent_buffer *eb) \ +{ \ + const type *p = page_address(eb->pages[0]) + \ + offset_in_page(eb->start); \ + return get_unaligned_le##bits(&p->member); \ +} \ +static inline void btrfs_set_##name(const struct extent_buffer *eb, \ + u##bits val) \ +{ \ + type *p = page_address(eb->pages[0]) + offset_in_page(eb->start); \ + put_unaligned_le##bits(val, &p->member); \ +} + +#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ +static inline u##bits btrfs_##name(const type *s) \ +{ \ + return get_unaligned_le##bits(&s->member); \ +} \ +static inline void btrfs_set_##name(type *s, u##bits val) \ +{ \ + put_unaligned_le##bits(val, &s->member); \ +} + +static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb, + struct btrfs_dev_item *s) +{ + static_assert(sizeof(u64) == + sizeof(((struct btrfs_dev_item *)0))->total_bytes); + return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, + total_bytes)); +} +static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb, + struct btrfs_dev_item *s, + u64 val) +{ + static_assert(sizeof(u64) == + sizeof(((struct btrfs_dev_item *)0))->total_bytes); + WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize)); + btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val); +} + +BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); +BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); +BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); +BTRFS_SETGET_FUNCS(device_start_offset, struct btrfs_dev_item, start_offset, 64); +BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); +BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32); +BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8); +BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8); +BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item, + io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item, + io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item, dev_group, 32); +BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item, + seek_speed, 8); +BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, + bandwidth, 8); +BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, + generation, 64); + +static inline unsigned long btrfs_device_uuid(struct btrfs_dev_item *d) +{ + return (unsigned long)d + offsetof(struct btrfs_dev_item, uuid); +} + +static inline unsigned long btrfs_device_fsid(struct btrfs_dev_item *d) +{ + return (unsigned long)d + offsetof(struct btrfs_dev_item, fsid); +} + +BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); +BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); +BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); +BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); +BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); +BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); +BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16); +BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); + +static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s) +{ + return (char *)s + offsetof(struct btrfs_stripe, dev_uuid); +} + +BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, + stripe_len, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk, io_align, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk, io_width, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, + sector_size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, + num_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk, + sub_stripes, 16); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); + +static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, int nr) +{ + unsigned long offset = (unsigned long)c; + + offset += offsetof(struct btrfs_chunk, stripe); + offset += nr * sizeof(struct btrfs_stripe); + return (struct btrfs_stripe *)offset; +} + +static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr)); +} + +static inline u64 btrfs_stripe_offset_nr(const struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); +} + +static inline u64 btrfs_stripe_devid_nr(const struct extent_buffer *eb, + struct btrfs_chunk *c, int nr) +{ + return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); +} + +/* struct btrfs_block_group_item */ +BTRFS_SETGET_STACK_FUNCS(stack_block_group_used, struct btrfs_block_group_item, + used, 64); +BTRFS_SETGET_FUNCS(block_group_used, struct btrfs_block_group_item, used, 64); +BTRFS_SETGET_STACK_FUNCS(stack_block_group_chunk_objectid, + struct btrfs_block_group_item, chunk_objectid, 64); + +BTRFS_SETGET_FUNCS(block_group_chunk_objectid, + struct btrfs_block_group_item, chunk_objectid, 64); +BTRFS_SETGET_FUNCS(block_group_flags, struct btrfs_block_group_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_block_group_flags, + struct btrfs_block_group_item, flags, 64); + +/* struct btrfs_free_space_info */ +BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info, + extent_count, 32); +BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32); + +/* struct btrfs_inode_ref */ +BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); +BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); + +/* struct btrfs_inode_extref */ +BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref, + parent_objectid, 64); +BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref, + name_len, 16); +BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64); + +/* struct btrfs_inode_item */ +BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); +BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); +BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); +BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64); +BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); +BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); +BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item, + sequence, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item, + transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item, nbytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item, + block_group, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); +BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); +BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); +BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); +BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); +BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); + +/* struct btrfs_dev_extent */ +BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, chunk_tree, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent, + chunk_objectid, 64); +BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, + chunk_offset, 64); +BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); +BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); +BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item, generation, 64); +BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64); + +BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8); + +static inline void btrfs_tree_block_key(const struct extent_buffer *eb, + struct btrfs_tree_block_info *item, + struct btrfs_disk_key *key) +{ + read_eb_member(eb, item, struct btrfs_tree_block_info, key, key); +} + +static inline void btrfs_set_tree_block_key(const struct extent_buffer *eb, + struct btrfs_tree_block_info *item, + struct btrfs_disk_key *key) +{ + write_eb_member(eb, item, struct btrfs_tree_block_info, key, key); +} + +BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref, root, 64); +BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref, + objectid, 64); +BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref, + offset, 64); +BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref, count, 32); + +BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref, count, 32); + +BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref, + type, 8); +BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref, + offset, 64); + +static inline u32 btrfs_extent_inline_ref_size(int type) +{ + if (type == BTRFS_TREE_BLOCK_REF_KEY || + type == BTRFS_SHARED_BLOCK_REF_KEY) + return sizeof(struct btrfs_extent_inline_ref); + if (type == BTRFS_SHARED_DATA_REF_KEY) + return sizeof(struct btrfs_shared_data_ref) + + sizeof(struct btrfs_extent_inline_ref); + if (type == BTRFS_EXTENT_DATA_REF_KEY) + return sizeof(struct btrfs_extent_data_ref) + + offsetof(struct btrfs_extent_inline_ref, offset); + return 0; +} + +/* struct btrfs_node */ +BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); +BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr, blockptr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr, + generation, 64); + +static inline u64 btrfs_node_blockptr(const struct extent_buffer *eb, int nr) +{ + unsigned long ptr; + + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); +} + +static inline void btrfs_set_node_blockptr(const struct extent_buffer *eb, + int nr, u64 val) +{ + unsigned long ptr; + + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); +} + +static inline u64 btrfs_node_ptr_generation(const struct extent_buffer *eb, int nr) +{ + unsigned long ptr; + + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr); +} + +static inline void btrfs_set_node_ptr_generation(const struct extent_buffer *eb, + int nr, u64 val) +{ + unsigned long ptr; + + ptr = offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; + btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val); +} + +static inline unsigned long btrfs_node_key_ptr_offset(int nr) +{ + return offsetof(struct btrfs_node, ptrs) + + sizeof(struct btrfs_key_ptr) * nr; +} + +void btrfs_node_key(const struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr); + +static inline void btrfs_set_node_key(const struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + unsigned long ptr; + + ptr = btrfs_node_key_ptr_offset(nr); + write_eb_member(eb, (struct btrfs_key_ptr *)ptr, + struct btrfs_key_ptr, key, disk_key); +} + +/* struct btrfs_item */ +BTRFS_SETGET_FUNCS(raw_item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_FUNCS(raw_item_size, struct btrfs_item, size, 32); +BTRFS_SETGET_STACK_FUNCS(stack_item_offset, struct btrfs_item, offset, 32); +BTRFS_SETGET_STACK_FUNCS(stack_item_size, struct btrfs_item, size, 32); + +static inline unsigned long btrfs_item_nr_offset(int nr) +{ + return offsetof(struct btrfs_leaf, items) + + sizeof(struct btrfs_item) * nr; +} + +static inline struct btrfs_item *btrfs_item_nr(int nr) +{ + return (struct btrfs_item *)btrfs_item_nr_offset(nr); +} + +#define BTRFS_ITEM_SETGET_FUNCS(member) \ +static inline u32 btrfs_item_##member(const struct extent_buffer *eb, int slot) \ +{ \ + return btrfs_raw_item_##member(eb, btrfs_item_nr(slot)); \ +} \ +static inline void btrfs_set_item_##member(const struct extent_buffer *eb, \ + int slot, u32 val) \ +{ \ + btrfs_set_raw_item_##member(eb, btrfs_item_nr(slot), val); \ +} \ +static inline u32 btrfs_token_item_##member(struct btrfs_map_token *token, \ + int slot) \ +{ \ + struct btrfs_item *item = btrfs_item_nr(slot); \ + return btrfs_token_raw_item_##member(token, item); \ +} \ +static inline void btrfs_set_token_item_##member(struct btrfs_map_token *token, \ + int slot, u32 val) \ +{ \ + struct btrfs_item *item = btrfs_item_nr(slot); \ + btrfs_set_token_raw_item_##member(token, item, val); \ +} + +BTRFS_ITEM_SETGET_FUNCS(offset) +BTRFS_ITEM_SETGET_FUNCS(size); + +static inline u32 btrfs_item_data_end(const struct extent_buffer *eb, int nr) +{ + return btrfs_item_offset(eb, nr) + btrfs_item_size(eb, nr); +} + +static inline void btrfs_item_key(const struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + struct btrfs_item *item = btrfs_item_nr(nr); + + read_eb_member(eb, item, struct btrfs_item, key, disk_key); +} + +static inline void btrfs_set_item_key(struct extent_buffer *eb, + struct btrfs_disk_key *disk_key, int nr) +{ + struct btrfs_item *item = btrfs_item_nr(nr); + + write_eb_member(eb, item, struct btrfs_item, key, disk_key); +} + +BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64); + +/* struct btrfs_root_ref */ +BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64); +BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64); +BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16); + +/* struct btrfs_dir_item */ +BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); +BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); +BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, data_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, name_len, 16); +BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, transid, 64); + +static inline void btrfs_dir_item_key(const struct extent_buffer *eb, + const struct btrfs_dir_item *item, + struct btrfs_disk_key *key) +{ + read_eb_member(eb, item, struct btrfs_dir_item, location, key); +} + +static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, + struct btrfs_dir_item *item, + const struct btrfs_disk_key *key) +{ + write_eb_member(eb, item, struct btrfs_dir_item, location, key); +} + +BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header, + num_entries, 64); +BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header, + num_bitmaps, 64); +BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, + generation, 64); + +static inline void btrfs_free_space_key(const struct extent_buffer *eb, + const struct btrfs_free_space_header *h, + struct btrfs_disk_key *key) +{ + read_eb_member(eb, h, struct btrfs_free_space_header, location, key); +} + +static inline void btrfs_set_free_space_key(struct extent_buffer *eb, + struct btrfs_free_space_header *h, + const struct btrfs_disk_key *key) +{ + write_eb_member(eb, h, struct btrfs_free_space_header, location, key); +} + +/* struct btrfs_disk_key */ +BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, objectid, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); +BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); + +#ifdef __LITTLE_ENDIAN + +/* + * Optimized helpers for little-endian architectures where CPU and on-disk + * structures have the same endianness and we can skip conversions. + */ + +static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu_key, + const struct btrfs_disk_key *disk_key) +{ + memcpy(cpu_key, disk_key, sizeof(struct btrfs_key)); +} + +static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk_key, + const struct btrfs_key *cpu_key) +{ + memcpy(disk_key, cpu_key, sizeof(struct btrfs_key)); +} + +static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb, + struct btrfs_key *cpu_key, int nr) +{ + struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; + + btrfs_node_key(eb, disk_key, nr); +} + +static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb, + struct btrfs_key *cpu_key, int nr) +{ + struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; + + btrfs_item_key(eb, disk_key, nr); +} + +static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb, + const struct btrfs_dir_item *item, + struct btrfs_key *cpu_key) +{ + struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; + + btrfs_dir_item_key(eb, item, disk_key); +} + +#else + +static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, + const struct btrfs_disk_key *disk) +{ + cpu->offset = le64_to_cpu(disk->offset); + cpu->type = disk->type; + cpu->objectid = le64_to_cpu(disk->objectid); +} + +static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, + const struct btrfs_key *cpu) +{ + disk->offset = cpu_to_le64(cpu->offset); + disk->type = cpu->type; + disk->objectid = cpu_to_le64(cpu->objectid); +} + +static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb, + struct btrfs_key *key, int nr) +{ + struct btrfs_disk_key disk_key; + + btrfs_node_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); +} + +static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb, + struct btrfs_key *key, int nr) +{ + struct btrfs_disk_key disk_key; + + btrfs_item_key(eb, &disk_key, nr); + btrfs_disk_key_to_cpu(key, &disk_key); +} + +static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb, + const struct btrfs_dir_item *item, + struct btrfs_key *key) +{ + struct btrfs_disk_key disk_key; + + btrfs_dir_item_key(eb, item, &disk_key); + btrfs_disk_key_to_cpu(key, &disk_key); +} + +#endif + +/* struct btrfs_header */ +BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); +BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, generation, 64); +BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); +BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); +BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); +BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64); +BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, nritems, 32); +BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); + +static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag) +{ + return (btrfs_header_flags(eb) & flag) == flag; +} + +static inline void btrfs_set_header_flag(struct extent_buffer *eb, u64 flag) +{ + u64 flags = btrfs_header_flags(eb); + + btrfs_set_header_flags(eb, flags | flag); +} + +static inline void btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) +{ + u64 flags = btrfs_header_flags(eb); + + btrfs_set_header_flags(eb, flags & ~flag); +} + +static inline int btrfs_header_backref_rev(const struct extent_buffer *eb) +{ + u64 flags = btrfs_header_flags(eb); + + return flags >> BTRFS_BACKREF_REV_SHIFT; +} + +static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, int rev) +{ + u64 flags = btrfs_header_flags(eb); + + flags &= ~BTRFS_BACKREF_REV_MASK; + flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT; + btrfs_set_header_flags(eb, flags); +} + +static inline int btrfs_is_leaf(const struct extent_buffer *eb) +{ + return btrfs_header_level(eb) == 0; +} + +/* struct btrfs_root_item */ +BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item, generation, 64); +BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); + +BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(root_drop_level, struct btrfs_root_item, drop_level, 8); +BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); +BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); +BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); +BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64); +BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); +BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, + last_snapshot, 64); +BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item, + generation_v2, 64); +BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item, ctransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item, otransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item, stransid, 64); +BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, rtransid, 64); + +static inline bool btrfs_root_readonly(const struct btrfs_root *root) +{ + /* Byte-swap the constant at compile time, root_item::flags is LE */ + return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; +} + +static inline bool btrfs_root_dead(const struct btrfs_root *root) +{ + /* Byte-swap the constant at compile time, root_item::flags is LE */ + return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; +} + +static inline u64 btrfs_root_id(const struct btrfs_root *root) +{ + return root->root_key.objectid; +} + +/* struct btrfs_root_backup */ +BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, + tree_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup, + tree_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup, + tree_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup, + chunk_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup, + chunk_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup, + extent_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup, + extent_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup, + extent_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup, + fs_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup, + fs_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup, + fs_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup, + dev_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup, + dev_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup, + dev_root_level, 8); + +BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup, + csum_root, 64); +BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup, + csum_root_gen, 64); +BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup, + csum_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, + num_devices, 64); + +/* struct btrfs_balance_item */ +BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64); + +static inline void btrfs_balance_data(const struct extent_buffer *eb, + const struct btrfs_balance_item *bi, + struct btrfs_disk_balance_args *ba) +{ + read_eb_member(eb, bi, struct btrfs_balance_item, data, ba); +} + +static inline void btrfs_set_balance_data(struct extent_buffer *eb, + struct btrfs_balance_item *bi, + const struct btrfs_disk_balance_args *ba) +{ + write_eb_member(eb, bi, struct btrfs_balance_item, data, ba); +} + +static inline void btrfs_balance_meta(const struct extent_buffer *eb, + const struct btrfs_balance_item *bi, + struct btrfs_disk_balance_args *ba) +{ + read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); +} + +static inline void btrfs_set_balance_meta(struct extent_buffer *eb, + struct btrfs_balance_item *bi, + const struct btrfs_disk_balance_args *ba) +{ + write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); +} + +static inline void btrfs_balance_sys(const struct extent_buffer *eb, + const struct btrfs_balance_item *bi, + struct btrfs_disk_balance_args *ba) +{ + read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); +} + +static inline void btrfs_set_balance_sys(struct extent_buffer *eb, + struct btrfs_balance_item *bi, + const struct btrfs_disk_balance_args *ba) +{ + write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); +} + +static inline void btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, + const struct btrfs_disk_balance_args *disk) +{ + memset(cpu, 0, sizeof(*cpu)); + + cpu->profiles = le64_to_cpu(disk->profiles); + cpu->usage = le64_to_cpu(disk->usage); + cpu->devid = le64_to_cpu(disk->devid); + cpu->pstart = le64_to_cpu(disk->pstart); + cpu->pend = le64_to_cpu(disk->pend); + cpu->vstart = le64_to_cpu(disk->vstart); + cpu->vend = le64_to_cpu(disk->vend); + cpu->target = le64_to_cpu(disk->target); + cpu->flags = le64_to_cpu(disk->flags); + cpu->limit = le64_to_cpu(disk->limit); + cpu->stripes_min = le32_to_cpu(disk->stripes_min); + cpu->stripes_max = le32_to_cpu(disk->stripes_max); +} + +static inline void btrfs_cpu_balance_args_to_disk( + struct btrfs_disk_balance_args *disk, + const struct btrfs_balance_args *cpu) +{ + memset(disk, 0, sizeof(*disk)); + + disk->profiles = cpu_to_le64(cpu->profiles); + disk->usage = cpu_to_le64(cpu->usage); + disk->devid = cpu_to_le64(cpu->devid); + disk->pstart = cpu_to_le64(cpu->pstart); + disk->pend = cpu_to_le64(cpu->pend); + disk->vstart = cpu_to_le64(cpu->vstart); + disk->vend = cpu_to_le64(cpu->vend); + disk->target = cpu_to_le64(cpu->target); + disk->flags = cpu_to_le64(cpu->flags); + disk->limit = cpu_to_le64(cpu->limit); + disk->stripes_min = cpu_to_le32(cpu->stripes_min); + disk->stripes_max = cpu_to_le32(cpu->stripes_max); +} + +/* struct btrfs_super_block */ +BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, + generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); +BTRFS_SETGET_STACK_FUNCS(super_sys_array_size, + struct btrfs_super_block, sys_chunk_array_size, 32); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation, + struct btrfs_super_block, chunk_root_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, + root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, + chunk_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, + chunk_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block, log_root, 64); +BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block, + log_root_level, 8); +BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, + total_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, + bytes_used, 64); +BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, + sectorsize, 32); +BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, + nodesize, 32); +BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, + stripesize, 32); +BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, + root_dir_objectid, 64); +BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block, + num_devices, 64); +BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block, + compat_flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block, + compat_ro_flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, + incompat_flags, 64); +BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, + csum_type, 16); +BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, + cache_generation, 64); +BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); +BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, + uuid_tree_generation, 64); + +int btrfs_super_csum_size(const struct btrfs_super_block *s); +const char *btrfs_super_csum_name(u16 csum_type); +const char *btrfs_super_csum_driver(u16 csum_type); +size_t __attribute_const__ btrfs_get_num_csums(void); + +/* + * The leaf data grows from end-to-front in the node. this returns the address + * of the start of the last item, which is the stop of the leaf data stack. + */ +static inline unsigned int leaf_data_end(const struct extent_buffer *leaf) +{ + u32 nr = btrfs_header_nritems(leaf); + + if (nr == 0) + return BTRFS_LEAF_DATA_SIZE(leaf->fs_info); + return btrfs_item_offset(leaf, nr - 1); +} + +/* struct btrfs_file_extent_item */ +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, + type, 8); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, + struct btrfs_file_extent_item, disk_bytenr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, + struct btrfs_file_extent_item, offset, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, + struct btrfs_file_extent_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, + struct btrfs_file_extent_item, num_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes, + struct btrfs_file_extent_item, ram_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_num_bytes, + struct btrfs_file_extent_item, disk_num_bytes, 64); +BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, + struct btrfs_file_extent_item, compression, 8); + +static inline unsigned long btrfs_file_extent_inline_start( + const struct btrfs_file_extent_item *e) +{ + return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START; +} + +static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) +{ + return BTRFS_FILE_EXTENT_INLINE_DATA_START + datasize; +} + +BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); +BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, + disk_bytenr, 64); +BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, + generation, 64); +BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item, + disk_num_bytes, 64); +BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, + offset, 64); +BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, + num_bytes, 64); +BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item, + ram_bytes, 64); +BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item, + compression, 8); +BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item, + encryption, 8); +BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, + other_encoding, 16); + +/* + * Returns the number of bytes used by the item on disk, minus the size of any + * extent headers. If a file is compressed on disk, this is the compressed + * size. + */ +static inline u32 btrfs_file_extent_inline_item_len( + const struct extent_buffer *eb, + int nr) +{ + return btrfs_item_size(eb, nr) - BTRFS_FILE_EXTENT_INLINE_DATA_START; +} + +/* btrfs_qgroup_status_item */ +BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item, + generation, 64); +BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, + version, 64); +BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, + flags, 64); +BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item, + rescan, 64); + +/* btrfs_qgroup_info_item */ +BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, + generation, 64); +BTRFS_SETGET_FUNCS(qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64); +BTRFS_SETGET_FUNCS(qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item, + rfer_cmpr, 64); +BTRFS_SETGET_FUNCS(qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64); +BTRFS_SETGET_FUNCS(qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item, + excl_cmpr, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation, + struct btrfs_qgroup_info_item, generation, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer, struct btrfs_qgroup_info_item, + rfer, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer_cmpr, + struct btrfs_qgroup_info_item, rfer_cmpr, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl, struct btrfs_qgroup_info_item, + excl, 64); +BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl_cmpr, + struct btrfs_qgroup_info_item, excl_cmpr, 64); + +/* btrfs_qgroup_limit_item */ +BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item, flags, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item, + max_rfer, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_max_excl, struct btrfs_qgroup_limit_item, + max_excl, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item, + rsv_rfer, 64); +BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, + rsv_excl, 64); + +/* btrfs_dev_replace_item */ +BTRFS_SETGET_FUNCS(dev_replace_src_devid, + struct btrfs_dev_replace_item, src_devid, 64); +BTRFS_SETGET_FUNCS(dev_replace_cont_reading_from_srcdev_mode, + struct btrfs_dev_replace_item, cont_reading_from_srcdev_mode, + 64); +BTRFS_SETGET_FUNCS(dev_replace_replace_state, struct btrfs_dev_replace_item, + replace_state, 64); +BTRFS_SETGET_FUNCS(dev_replace_time_started, struct btrfs_dev_replace_item, + time_started, 64); +BTRFS_SETGET_FUNCS(dev_replace_time_stopped, struct btrfs_dev_replace_item, + time_stopped, 64); +BTRFS_SETGET_FUNCS(dev_replace_num_write_errors, struct btrfs_dev_replace_item, + num_write_errors, 64); +BTRFS_SETGET_FUNCS(dev_replace_num_uncorrectable_read_errors, + struct btrfs_dev_replace_item, num_uncorrectable_read_errors, + 64); +BTRFS_SETGET_FUNCS(dev_replace_cursor_left, struct btrfs_dev_replace_item, + cursor_left, 64); +BTRFS_SETGET_FUNCS(dev_replace_cursor_right, struct btrfs_dev_replace_item, + cursor_right, 64); + +BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_src_devid, + struct btrfs_dev_replace_item, src_devid, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cont_reading_from_srcdev_mode, + struct btrfs_dev_replace_item, + cont_reading_from_srcdev_mode, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_replace_state, + struct btrfs_dev_replace_item, replace_state, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_started, + struct btrfs_dev_replace_item, time_started, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_stopped, + struct btrfs_dev_replace_item, time_stopped, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_write_errors, + struct btrfs_dev_replace_item, num_write_errors, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_uncorrectable_read_errors, + struct btrfs_dev_replace_item, + num_uncorrectable_read_errors, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_left, + struct btrfs_dev_replace_item, cursor_left, 64); +BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right, + struct btrfs_dev_replace_item, cursor_right, 64); + +/* btrfs_verity_descriptor_item */ +BTRFS_SETGET_FUNCS(verity_descriptor_encryption, struct btrfs_verity_descriptor_item, + encryption, 8); +BTRFS_SETGET_FUNCS(verity_descriptor_size, struct btrfs_verity_descriptor_item, + size, 64); +BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_encryption, + struct btrfs_verity_descriptor_item, encryption, 8); +BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_size, + struct btrfs_verity_descriptor_item, size, 64); + +/* Cast into the data area of the leaf. */ +#define btrfs_item_ptr(leaf, slot, type) \ + ((type *)(BTRFS_LEAF_DATA_OFFSET + btrfs_item_offset(leaf, slot))) + +#define btrfs_item_ptr_offset(leaf, slot) \ + ((unsigned long)(BTRFS_LEAF_DATA_OFFSET + btrfs_item_offset(leaf, slot))) + #endif diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f76db317c437..41caa6fc6301 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -16,6 +16,7 @@ #include "misc.h" #include "tree-mod-log.h" #include "fs.h" +#include "accessors.h" /* Just arbitrary numbers so we can be sure one of these happened. */ #define BACKREF_FOUND_SHARED 6 diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index bf8d1e110136..d1f8d792b184 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -18,6 +18,7 @@ #include "raid56.h" #include "zoned.h" #include "fs.h" +#include "accessors.h" #ifdef CONFIG_BTRFS_DEBUG int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group) diff --git a/fs/btrfs/block-rsv.c b/fs/btrfs/block-rsv.c index a48fa7ac74cc..069ea9d6a8d4 100644 --- a/fs/btrfs/block-rsv.c +++ b/fs/btrfs/block-rsv.c @@ -8,6 +8,7 @@ #include "block-group.h" #include "disk-io.h" #include "fs.h" +#include "accessors.h" /* * HOW DO BLOCK RESERVES WORK diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c index e8e1a92b30ac..7ff0703ef3e4 100644 --- a/fs/btrfs/check-integrity.c +++ b/fs/btrfs/check-integrity.c @@ -93,6 +93,7 @@ #include "check-integrity.h" #include "rcu-string.h" #include "compression.h" +#include "accessors.h" #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000 diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 296ca4de8d52..20034c23abb2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1176,8 +1176,6 @@ static inline u32 BTRFS_LEAF_DATA_SIZE(const struct btrfs_fs_info *info) return info->nodesize - sizeof(struct btrfs_header); } -#define BTRFS_LEAF_DATA_OFFSET offsetof(struct btrfs_leaf, items) - static inline u32 BTRFS_MAX_ITEM_SIZE(const struct btrfs_fs_info *info) { return BTRFS_LEAF_DATA_SIZE(info) - sizeof(struct btrfs_item); @@ -1204,1094 +1202,6 @@ static inline u32 BTRFS_MAX_XATTR_SIZE(const struct btrfs_fs_info *info) #define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \ ((bytes) >> (fs_info)->sectorsize_bits) -/* some macros to generate set/get functions for the struct fields. This - * assumes there is a lefoo_to_cpu for every type, so lets make a simple - * one for u8: - */ -#define le8_to_cpu(v) (v) -#define cpu_to_le8(v) (v) -#define __le8 u8 - -static inline u8 get_unaligned_le8(const void *p) -{ - return *(u8 *)p; -} - -static inline void put_unaligned_le8(u8 val, void *p) -{ - *(u8 *)p = val; -} - -#define read_eb_member(eb, ptr, type, member, result) (\ - read_extent_buffer(eb, (char *)(result), \ - ((unsigned long)(ptr)) + \ - offsetof(type, member), \ - sizeof(((type *)0)->member))) - -#define write_eb_member(eb, ptr, type, member, result) (\ - write_extent_buffer(eb, (char *)(result), \ - ((unsigned long)(ptr)) + \ - offsetof(type, member), \ - sizeof(((type *)0)->member))) - -#define DECLARE_BTRFS_SETGET_BITS(bits) \ -u##bits btrfs_get_token_##bits(struct btrfs_map_token *token, \ - const void *ptr, unsigned long off); \ -void btrfs_set_token_##bits(struct btrfs_map_token *token, \ - const void *ptr, unsigned long off, \ - u##bits val); \ -u##bits btrfs_get_##bits(const struct extent_buffer *eb, \ - const void *ptr, unsigned long off); \ -void btrfs_set_##bits(const struct extent_buffer *eb, void *ptr, \ - unsigned long off, u##bits val); - -DECLARE_BTRFS_SETGET_BITS(8) -DECLARE_BTRFS_SETGET_BITS(16) -DECLARE_BTRFS_SETGET_BITS(32) -DECLARE_BTRFS_SETGET_BITS(64) - -#define BTRFS_SETGET_FUNCS(name, type, member, bits) \ -static inline u##bits btrfs_##name(const struct extent_buffer *eb, \ - const type *s) \ -{ \ - static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ - return btrfs_get_##bits(eb, s, offsetof(type, member)); \ -} \ -static inline void btrfs_set_##name(const struct extent_buffer *eb, type *s, \ - u##bits val) \ -{ \ - static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ - btrfs_set_##bits(eb, s, offsetof(type, member), val); \ -} \ -static inline u##bits btrfs_token_##name(struct btrfs_map_token *token, \ - const type *s) \ -{ \ - static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ - return btrfs_get_token_##bits(token, s, offsetof(type, member));\ -} \ -static inline void btrfs_set_token_##name(struct btrfs_map_token *token,\ - type *s, u##bits val) \ -{ \ - static_assert(sizeof(u##bits) == sizeof(((type *)0))->member); \ - btrfs_set_token_##bits(token, s, offsetof(type, member), val); \ -} - -#define BTRFS_SETGET_HEADER_FUNCS(name, type, member, bits) \ -static inline u##bits btrfs_##name(const struct extent_buffer *eb) \ -{ \ - const type *p = page_address(eb->pages[0]) + \ - offset_in_page(eb->start); \ - return get_unaligned_le##bits(&p->member); \ -} \ -static inline void btrfs_set_##name(const struct extent_buffer *eb, \ - u##bits val) \ -{ \ - type *p = page_address(eb->pages[0]) + offset_in_page(eb->start); \ - put_unaligned_le##bits(val, &p->member); \ -} - -#define BTRFS_SETGET_STACK_FUNCS(name, type, member, bits) \ -static inline u##bits btrfs_##name(const type *s) \ -{ \ - return get_unaligned_le##bits(&s->member); \ -} \ -static inline void btrfs_set_##name(type *s, u##bits val) \ -{ \ - put_unaligned_le##bits(val, &s->member); \ -} - -static inline u64 btrfs_device_total_bytes(const struct extent_buffer *eb, - struct btrfs_dev_item *s) -{ - static_assert(sizeof(u64) == - sizeof(((struct btrfs_dev_item *)0))->total_bytes); - return btrfs_get_64(eb, s, offsetof(struct btrfs_dev_item, - total_bytes)); -} -static inline void btrfs_set_device_total_bytes(const struct extent_buffer *eb, - struct btrfs_dev_item *s, - u64 val) -{ - static_assert(sizeof(u64) == - sizeof(((struct btrfs_dev_item *)0))->total_bytes); - WARN_ON(!IS_ALIGNED(val, eb->fs_info->sectorsize)); - btrfs_set_64(eb, s, offsetof(struct btrfs_dev_item, total_bytes), val); -} - - -BTRFS_SETGET_FUNCS(device_type, struct btrfs_dev_item, type, 64); -BTRFS_SETGET_FUNCS(device_bytes_used, struct btrfs_dev_item, bytes_used, 64); -BTRFS_SETGET_FUNCS(device_io_align, struct btrfs_dev_item, io_align, 32); -BTRFS_SETGET_FUNCS(device_io_width, struct btrfs_dev_item, io_width, 32); -BTRFS_SETGET_FUNCS(device_start_offset, struct btrfs_dev_item, - start_offset, 64); -BTRFS_SETGET_FUNCS(device_sector_size, struct btrfs_dev_item, sector_size, 32); -BTRFS_SETGET_FUNCS(device_id, struct btrfs_dev_item, devid, 64); -BTRFS_SETGET_FUNCS(device_group, struct btrfs_dev_item, dev_group, 32); -BTRFS_SETGET_FUNCS(device_seek_speed, struct btrfs_dev_item, seek_speed, 8); -BTRFS_SETGET_FUNCS(device_bandwidth, struct btrfs_dev_item, bandwidth, 8); -BTRFS_SETGET_FUNCS(device_generation, struct btrfs_dev_item, generation, 64); - -BTRFS_SETGET_STACK_FUNCS(stack_device_type, struct btrfs_dev_item, type, 64); -BTRFS_SETGET_STACK_FUNCS(stack_device_total_bytes, struct btrfs_dev_item, - total_bytes, 64); -BTRFS_SETGET_STACK_FUNCS(stack_device_bytes_used, struct btrfs_dev_item, - bytes_used, 64); -BTRFS_SETGET_STACK_FUNCS(stack_device_io_align, struct btrfs_dev_item, - io_align, 32); -BTRFS_SETGET_STACK_FUNCS(stack_device_io_width, struct btrfs_dev_item, - io_width, 32); -BTRFS_SETGET_STACK_FUNCS(stack_device_sector_size, struct btrfs_dev_item, - sector_size, 32); -BTRFS_SETGET_STACK_FUNCS(stack_device_id, struct btrfs_dev_item, devid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_device_group, struct btrfs_dev_item, - dev_group, 32); -BTRFS_SETGET_STACK_FUNCS(stack_device_seek_speed, struct btrfs_dev_item, - seek_speed, 8); -BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item, - bandwidth, 8); -BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item, - generation, 64); - -static inline unsigned long btrfs_device_uuid(struct btrfs_dev_item *d) -{ - return (unsigned long)d + offsetof(struct btrfs_dev_item, uuid); -} - -static inline unsigned long btrfs_device_fsid(struct btrfs_dev_item *d) -{ - return (unsigned long)d + offsetof(struct btrfs_dev_item, fsid); -} - -BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64); -BTRFS_SETGET_FUNCS(chunk_owner, struct btrfs_chunk, owner, 64); -BTRFS_SETGET_FUNCS(chunk_stripe_len, struct btrfs_chunk, stripe_len, 64); -BTRFS_SETGET_FUNCS(chunk_io_align, struct btrfs_chunk, io_align, 32); -BTRFS_SETGET_FUNCS(chunk_io_width, struct btrfs_chunk, io_width, 32); -BTRFS_SETGET_FUNCS(chunk_sector_size, struct btrfs_chunk, sector_size, 32); -BTRFS_SETGET_FUNCS(chunk_type, struct btrfs_chunk, type, 64); -BTRFS_SETGET_FUNCS(chunk_num_stripes, struct btrfs_chunk, num_stripes, 16); -BTRFS_SETGET_FUNCS(chunk_sub_stripes, struct btrfs_chunk, sub_stripes, 16); -BTRFS_SETGET_FUNCS(stripe_devid, struct btrfs_stripe, devid, 64); -BTRFS_SETGET_FUNCS(stripe_offset, struct btrfs_stripe, offset, 64); - -static inline char *btrfs_stripe_dev_uuid(struct btrfs_stripe *s) -{ - return (char *)s + offsetof(struct btrfs_stripe, dev_uuid); -} - -BTRFS_SETGET_STACK_FUNCS(stack_chunk_length, struct btrfs_chunk, length, 64); -BTRFS_SETGET_STACK_FUNCS(stack_chunk_owner, struct btrfs_chunk, owner, 64); -BTRFS_SETGET_STACK_FUNCS(stack_chunk_stripe_len, struct btrfs_chunk, - stripe_len, 64); -BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_align, struct btrfs_chunk, - io_align, 32); -BTRFS_SETGET_STACK_FUNCS(stack_chunk_io_width, struct btrfs_chunk, - io_width, 32); -BTRFS_SETGET_STACK_FUNCS(stack_chunk_sector_size, struct btrfs_chunk, - sector_size, 32); -BTRFS_SETGET_STACK_FUNCS(stack_chunk_type, struct btrfs_chunk, type, 64); -BTRFS_SETGET_STACK_FUNCS(stack_chunk_num_stripes, struct btrfs_chunk, - num_stripes, 16); -BTRFS_SETGET_STACK_FUNCS(stack_chunk_sub_stripes, struct btrfs_chunk, - sub_stripes, 16); -BTRFS_SETGET_STACK_FUNCS(stack_stripe_devid, struct btrfs_stripe, devid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_stripe_offset, struct btrfs_stripe, offset, 64); - -static inline struct btrfs_stripe *btrfs_stripe_nr(struct btrfs_chunk *c, - int nr) -{ - unsigned long offset = (unsigned long)c; - offset += offsetof(struct btrfs_chunk, stripe); - offset += nr * sizeof(struct btrfs_stripe); - return (struct btrfs_stripe *)offset; -} - -static inline char *btrfs_stripe_dev_uuid_nr(struct btrfs_chunk *c, int nr) -{ - return btrfs_stripe_dev_uuid(btrfs_stripe_nr(c, nr)); -} - -static inline u64 btrfs_stripe_offset_nr(const struct extent_buffer *eb, - struct btrfs_chunk *c, int nr) -{ - return btrfs_stripe_offset(eb, btrfs_stripe_nr(c, nr)); -} - -static inline u64 btrfs_stripe_devid_nr(const struct extent_buffer *eb, - struct btrfs_chunk *c, int nr) -{ - return btrfs_stripe_devid(eb, btrfs_stripe_nr(c, nr)); -} - -/* struct btrfs_block_group_item */ -BTRFS_SETGET_STACK_FUNCS(stack_block_group_used, struct btrfs_block_group_item, - used, 64); -BTRFS_SETGET_FUNCS(block_group_used, struct btrfs_block_group_item, - used, 64); -BTRFS_SETGET_STACK_FUNCS(stack_block_group_chunk_objectid, - struct btrfs_block_group_item, chunk_objectid, 64); - -BTRFS_SETGET_FUNCS(block_group_chunk_objectid, - struct btrfs_block_group_item, chunk_objectid, 64); -BTRFS_SETGET_FUNCS(block_group_flags, - struct btrfs_block_group_item, flags, 64); -BTRFS_SETGET_STACK_FUNCS(stack_block_group_flags, - struct btrfs_block_group_item, flags, 64); - -/* struct btrfs_free_space_info */ -BTRFS_SETGET_FUNCS(free_space_extent_count, struct btrfs_free_space_info, - extent_count, 32); -BTRFS_SETGET_FUNCS(free_space_flags, struct btrfs_free_space_info, flags, 32); - -/* struct btrfs_inode_ref */ -BTRFS_SETGET_FUNCS(inode_ref_name_len, struct btrfs_inode_ref, name_len, 16); -BTRFS_SETGET_FUNCS(inode_ref_index, struct btrfs_inode_ref, index, 64); - -/* struct btrfs_inode_extref */ -BTRFS_SETGET_FUNCS(inode_extref_parent, struct btrfs_inode_extref, - parent_objectid, 64); -BTRFS_SETGET_FUNCS(inode_extref_name_len, struct btrfs_inode_extref, - name_len, 16); -BTRFS_SETGET_FUNCS(inode_extref_index, struct btrfs_inode_extref, index, 64); - -/* struct btrfs_inode_item */ -BTRFS_SETGET_FUNCS(inode_generation, struct btrfs_inode_item, generation, 64); -BTRFS_SETGET_FUNCS(inode_sequence, struct btrfs_inode_item, sequence, 64); -BTRFS_SETGET_FUNCS(inode_transid, struct btrfs_inode_item, transid, 64); -BTRFS_SETGET_FUNCS(inode_size, struct btrfs_inode_item, size, 64); -BTRFS_SETGET_FUNCS(inode_nbytes, struct btrfs_inode_item, nbytes, 64); -BTRFS_SETGET_FUNCS(inode_block_group, struct btrfs_inode_item, block_group, 64); -BTRFS_SETGET_FUNCS(inode_nlink, struct btrfs_inode_item, nlink, 32); -BTRFS_SETGET_FUNCS(inode_uid, struct btrfs_inode_item, uid, 32); -BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32); -BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32); -BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64); -BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item, - generation, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item, - sequence, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item, - transid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item, - nbytes, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item, - block_group, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32); -BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64); -BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64); -BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64); -BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32); -BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64); -BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32); - -/* struct btrfs_dev_extent */ -BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent, - chunk_tree, 64); -BTRFS_SETGET_FUNCS(dev_extent_chunk_objectid, struct btrfs_dev_extent, - chunk_objectid, 64); -BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent, - chunk_offset, 64); -BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64); -BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64); -BTRFS_SETGET_FUNCS(extent_generation, struct btrfs_extent_item, - generation, 64); -BTRFS_SETGET_FUNCS(extent_flags, struct btrfs_extent_item, flags, 64); - -BTRFS_SETGET_FUNCS(tree_block_level, struct btrfs_tree_block_info, level, 8); - -static inline void btrfs_tree_block_key(const struct extent_buffer *eb, - struct btrfs_tree_block_info *item, - struct btrfs_disk_key *key) -{ - read_eb_member(eb, item, struct btrfs_tree_block_info, key, key); -} - -static inline void btrfs_set_tree_block_key(const struct extent_buffer *eb, - struct btrfs_tree_block_info *item, - struct btrfs_disk_key *key) -{ - write_eb_member(eb, item, struct btrfs_tree_block_info, key, key); -} - -BTRFS_SETGET_FUNCS(extent_data_ref_root, struct btrfs_extent_data_ref, - root, 64); -BTRFS_SETGET_FUNCS(extent_data_ref_objectid, struct btrfs_extent_data_ref, - objectid, 64); -BTRFS_SETGET_FUNCS(extent_data_ref_offset, struct btrfs_extent_data_ref, - offset, 64); -BTRFS_SETGET_FUNCS(extent_data_ref_count, struct btrfs_extent_data_ref, - count, 32); - -BTRFS_SETGET_FUNCS(shared_data_ref_count, struct btrfs_shared_data_ref, - count, 32); - -BTRFS_SETGET_FUNCS(extent_inline_ref_type, struct btrfs_extent_inline_ref, - type, 8); -BTRFS_SETGET_FUNCS(extent_inline_ref_offset, struct btrfs_extent_inline_ref, - offset, 64); - -static inline u32 btrfs_extent_inline_ref_size(int type) -{ - if (type == BTRFS_TREE_BLOCK_REF_KEY || - type == BTRFS_SHARED_BLOCK_REF_KEY) - return sizeof(struct btrfs_extent_inline_ref); - if (type == BTRFS_SHARED_DATA_REF_KEY) - return sizeof(struct btrfs_shared_data_ref) + - sizeof(struct btrfs_extent_inline_ref); - if (type == BTRFS_EXTENT_DATA_REF_KEY) - return sizeof(struct btrfs_extent_data_ref) + - offsetof(struct btrfs_extent_inline_ref, offset); - return 0; -} - -/* struct btrfs_node */ -BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64); -BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64); -BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr, - blockptr, 64); -BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr, - generation, 64); - -static inline u64 btrfs_node_blockptr(const struct extent_buffer *eb, int nr) -{ - unsigned long ptr; - ptr = offsetof(struct btrfs_node, ptrs) + - sizeof(struct btrfs_key_ptr) * nr; - return btrfs_key_blockptr(eb, (struct btrfs_key_ptr *)ptr); -} - -static inline void btrfs_set_node_blockptr(const struct extent_buffer *eb, - int nr, u64 val) -{ - unsigned long ptr; - ptr = offsetof(struct btrfs_node, ptrs) + - sizeof(struct btrfs_key_ptr) * nr; - btrfs_set_key_blockptr(eb, (struct btrfs_key_ptr *)ptr, val); -} - -static inline u64 btrfs_node_ptr_generation(const struct extent_buffer *eb, int nr) -{ - unsigned long ptr; - ptr = offsetof(struct btrfs_node, ptrs) + - sizeof(struct btrfs_key_ptr) * nr; - return btrfs_key_generation(eb, (struct btrfs_key_ptr *)ptr); -} - -static inline void btrfs_set_node_ptr_generation(const struct extent_buffer *eb, - int nr, u64 val) -{ - unsigned long ptr; - ptr = offsetof(struct btrfs_node, ptrs) + - sizeof(struct btrfs_key_ptr) * nr; - btrfs_set_key_generation(eb, (struct btrfs_key_ptr *)ptr, val); -} - -static inline unsigned long btrfs_node_key_ptr_offset(int nr) -{ - return offsetof(struct btrfs_node, ptrs) + - sizeof(struct btrfs_key_ptr) * nr; -} - -void btrfs_node_key(const struct extent_buffer *eb, - struct btrfs_disk_key *disk_key, int nr); - -static inline void btrfs_set_node_key(const struct extent_buffer *eb, - struct btrfs_disk_key *disk_key, int nr) -{ - unsigned long ptr; - ptr = btrfs_node_key_ptr_offset(nr); - write_eb_member(eb, (struct btrfs_key_ptr *)ptr, - struct btrfs_key_ptr, key, disk_key); -} - -/* struct btrfs_item */ -BTRFS_SETGET_FUNCS(raw_item_offset, struct btrfs_item, offset, 32); -BTRFS_SETGET_FUNCS(raw_item_size, struct btrfs_item, size, 32); -BTRFS_SETGET_STACK_FUNCS(stack_item_offset, struct btrfs_item, offset, 32); -BTRFS_SETGET_STACK_FUNCS(stack_item_size, struct btrfs_item, size, 32); - -static inline unsigned long btrfs_item_nr_offset(int nr) -{ - return offsetof(struct btrfs_leaf, items) + - sizeof(struct btrfs_item) * nr; -} - -static inline struct btrfs_item *btrfs_item_nr(int nr) -{ - return (struct btrfs_item *)btrfs_item_nr_offset(nr); -} - -#define BTRFS_ITEM_SETGET_FUNCS(member) \ -static inline u32 btrfs_item_##member(const struct extent_buffer *eb, \ - int slot) \ -{ \ - return btrfs_raw_item_##member(eb, btrfs_item_nr(slot)); \ -} \ -static inline void btrfs_set_item_##member(const struct extent_buffer *eb, \ - int slot, u32 val) \ -{ \ - btrfs_set_raw_item_##member(eb, btrfs_item_nr(slot), val); \ -} \ -static inline u32 btrfs_token_item_##member(struct btrfs_map_token *token, \ - int slot) \ -{ \ - struct btrfs_item *item = btrfs_item_nr(slot); \ - return btrfs_token_raw_item_##member(token, item); \ -} \ -static inline void btrfs_set_token_item_##member(struct btrfs_map_token *token, \ - int slot, u32 val) \ -{ \ - struct btrfs_item *item = btrfs_item_nr(slot); \ - btrfs_set_token_raw_item_##member(token, item, val); \ -} - -BTRFS_ITEM_SETGET_FUNCS(offset) -BTRFS_ITEM_SETGET_FUNCS(size); - -static inline u32 btrfs_item_data_end(const struct extent_buffer *eb, int nr) -{ - return btrfs_item_offset(eb, nr) + btrfs_item_size(eb, nr); -} - -static inline void btrfs_item_key(const struct extent_buffer *eb, - struct btrfs_disk_key *disk_key, int nr) -{ - struct btrfs_item *item = btrfs_item_nr(nr); - read_eb_member(eb, item, struct btrfs_item, key, disk_key); -} - -static inline void btrfs_set_item_key(struct extent_buffer *eb, - struct btrfs_disk_key *disk_key, int nr) -{ - struct btrfs_item *item = btrfs_item_nr(nr); - write_eb_member(eb, item, struct btrfs_item, key, disk_key); -} - -BTRFS_SETGET_FUNCS(dir_log_end, struct btrfs_dir_log_item, end, 64); - -/* - * struct btrfs_root_ref - */ -BTRFS_SETGET_FUNCS(root_ref_dirid, struct btrfs_root_ref, dirid, 64); -BTRFS_SETGET_FUNCS(root_ref_sequence, struct btrfs_root_ref, sequence, 64); -BTRFS_SETGET_FUNCS(root_ref_name_len, struct btrfs_root_ref, name_len, 16); - -/* struct btrfs_dir_item */ -BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16); -BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8); -BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16); -BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8); -BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item, - data_len, 16); -BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item, - name_len, 16); -BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item, - transid, 64); - -static inline void btrfs_dir_item_key(const struct extent_buffer *eb, - const struct btrfs_dir_item *item, - struct btrfs_disk_key *key) -{ - read_eb_member(eb, item, struct btrfs_dir_item, location, key); -} - -static inline void btrfs_set_dir_item_key(struct extent_buffer *eb, - struct btrfs_dir_item *item, - const struct btrfs_disk_key *key) -{ - write_eb_member(eb, item, struct btrfs_dir_item, location, key); -} - -BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header, - num_entries, 64); -BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header, - num_bitmaps, 64); -BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header, - generation, 64); - -static inline void btrfs_free_space_key(const struct extent_buffer *eb, - const struct btrfs_free_space_header *h, - struct btrfs_disk_key *key) -{ - read_eb_member(eb, h, struct btrfs_free_space_header, location, key); -} - -static inline void btrfs_set_free_space_key(struct extent_buffer *eb, - struct btrfs_free_space_header *h, - const struct btrfs_disk_key *key) -{ - write_eb_member(eb, h, struct btrfs_free_space_header, location, key); -} - -/* struct btrfs_disk_key */ -BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key, - objectid, 64); -BTRFS_SETGET_STACK_FUNCS(disk_key_offset, struct btrfs_disk_key, offset, 64); -BTRFS_SETGET_STACK_FUNCS(disk_key_type, struct btrfs_disk_key, type, 8); - -#ifdef __LITTLE_ENDIAN - -/* - * Optimized helpers for little-endian architectures where CPU and on-disk - * structures have the same endianness and we can skip conversions. - */ - -static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu_key, - const struct btrfs_disk_key *disk_key) -{ - memcpy(cpu_key, disk_key, sizeof(struct btrfs_key)); -} - -static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk_key, - const struct btrfs_key *cpu_key) -{ - memcpy(disk_key, cpu_key, sizeof(struct btrfs_key)); -} - -static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb, - struct btrfs_key *cpu_key, int nr) -{ - struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; - - btrfs_node_key(eb, disk_key, nr); -} - -static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb, - struct btrfs_key *cpu_key, int nr) -{ - struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; - - btrfs_item_key(eb, disk_key, nr); -} - -static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb, - const struct btrfs_dir_item *item, - struct btrfs_key *cpu_key) -{ - struct btrfs_disk_key *disk_key = (struct btrfs_disk_key *)cpu_key; - - btrfs_dir_item_key(eb, item, disk_key); -} - -#else - -static inline void btrfs_disk_key_to_cpu(struct btrfs_key *cpu, - const struct btrfs_disk_key *disk) -{ - cpu->offset = le64_to_cpu(disk->offset); - cpu->type = disk->type; - cpu->objectid = le64_to_cpu(disk->objectid); -} - -static inline void btrfs_cpu_key_to_disk(struct btrfs_disk_key *disk, - const struct btrfs_key *cpu) -{ - disk->offset = cpu_to_le64(cpu->offset); - disk->type = cpu->type; - disk->objectid = cpu_to_le64(cpu->objectid); -} - -static inline void btrfs_node_key_to_cpu(const struct extent_buffer *eb, - struct btrfs_key *key, int nr) -{ - struct btrfs_disk_key disk_key; - btrfs_node_key(eb, &disk_key, nr); - btrfs_disk_key_to_cpu(key, &disk_key); -} - -static inline void btrfs_item_key_to_cpu(const struct extent_buffer *eb, - struct btrfs_key *key, int nr) -{ - struct btrfs_disk_key disk_key; - btrfs_item_key(eb, &disk_key, nr); - btrfs_disk_key_to_cpu(key, &disk_key); -} - -static inline void btrfs_dir_item_key_to_cpu(const struct extent_buffer *eb, - const struct btrfs_dir_item *item, - struct btrfs_key *key) -{ - struct btrfs_disk_key disk_key; - btrfs_dir_item_key(eb, item, &disk_key); - btrfs_disk_key_to_cpu(key, &disk_key); -} - -#endif - -/* struct btrfs_header */ -BTRFS_SETGET_HEADER_FUNCS(header_bytenr, struct btrfs_header, bytenr, 64); -BTRFS_SETGET_HEADER_FUNCS(header_generation, struct btrfs_header, - generation, 64); -BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64); -BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32); -BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64); -BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8); -BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header, - generation, 64); -BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64); -BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header, - nritems, 32); -BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64); - -static inline int btrfs_header_flag(const struct extent_buffer *eb, u64 flag) -{ - return (btrfs_header_flags(eb) & flag) == flag; -} - -static inline void btrfs_set_header_flag(struct extent_buffer *eb, u64 flag) -{ - u64 flags = btrfs_header_flags(eb); - btrfs_set_header_flags(eb, flags | flag); -} - -static inline void btrfs_clear_header_flag(struct extent_buffer *eb, u64 flag) -{ - u64 flags = btrfs_header_flags(eb); - btrfs_set_header_flags(eb, flags & ~flag); -} - -static inline int btrfs_header_backref_rev(const struct extent_buffer *eb) -{ - u64 flags = btrfs_header_flags(eb); - return flags >> BTRFS_BACKREF_REV_SHIFT; -} - -static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb, - int rev) -{ - u64 flags = btrfs_header_flags(eb); - flags &= ~BTRFS_BACKREF_REV_MASK; - flags |= (u64)rev << BTRFS_BACKREF_REV_SHIFT; - btrfs_set_header_flags(eb, flags); -} - -static inline int btrfs_is_leaf(const struct extent_buffer *eb) -{ - return btrfs_header_level(eb) == 0; -} - -/* struct btrfs_root_item */ -BTRFS_SETGET_FUNCS(disk_root_generation, struct btrfs_root_item, - generation, 64); -BTRFS_SETGET_FUNCS(disk_root_refs, struct btrfs_root_item, refs, 32); -BTRFS_SETGET_FUNCS(disk_root_bytenr, struct btrfs_root_item, bytenr, 64); -BTRFS_SETGET_FUNCS(disk_root_level, struct btrfs_root_item, level, 8); - -BTRFS_SETGET_STACK_FUNCS(root_generation, struct btrfs_root_item, - generation, 64); -BTRFS_SETGET_STACK_FUNCS(root_bytenr, struct btrfs_root_item, bytenr, 64); -BTRFS_SETGET_STACK_FUNCS(root_drop_level, struct btrfs_root_item, drop_level, 8); -BTRFS_SETGET_STACK_FUNCS(root_level, struct btrfs_root_item, level, 8); -BTRFS_SETGET_STACK_FUNCS(root_dirid, struct btrfs_root_item, root_dirid, 64); -BTRFS_SETGET_STACK_FUNCS(root_refs, struct btrfs_root_item, refs, 32); -BTRFS_SETGET_STACK_FUNCS(root_flags, struct btrfs_root_item, flags, 64); -BTRFS_SETGET_STACK_FUNCS(root_used, struct btrfs_root_item, bytes_used, 64); -BTRFS_SETGET_STACK_FUNCS(root_limit, struct btrfs_root_item, byte_limit, 64); -BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, - last_snapshot, 64); -BTRFS_SETGET_STACK_FUNCS(root_generation_v2, struct btrfs_root_item, - generation_v2, 64); -BTRFS_SETGET_STACK_FUNCS(root_ctransid, struct btrfs_root_item, - ctransid, 64); -BTRFS_SETGET_STACK_FUNCS(root_otransid, struct btrfs_root_item, - otransid, 64); -BTRFS_SETGET_STACK_FUNCS(root_stransid, struct btrfs_root_item, - stransid, 64); -BTRFS_SETGET_STACK_FUNCS(root_rtransid, struct btrfs_root_item, - rtransid, 64); - -static inline bool btrfs_root_readonly(const struct btrfs_root *root) -{ - /* Byte-swap the constant at compile time, root_item::flags is LE */ - return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; -} - -static inline bool btrfs_root_dead(const struct btrfs_root *root) -{ - /* Byte-swap the constant at compile time, root_item::flags is LE */ - return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_DEAD)) != 0; -} - -static inline u64 btrfs_root_id(const struct btrfs_root *root) -{ - return root->root_key.objectid; -} - -/* struct btrfs_root_backup */ -BTRFS_SETGET_STACK_FUNCS(backup_tree_root, struct btrfs_root_backup, - tree_root, 64); -BTRFS_SETGET_STACK_FUNCS(backup_tree_root_gen, struct btrfs_root_backup, - tree_root_gen, 64); -BTRFS_SETGET_STACK_FUNCS(backup_tree_root_level, struct btrfs_root_backup, - tree_root_level, 8); - -BTRFS_SETGET_STACK_FUNCS(backup_chunk_root, struct btrfs_root_backup, - chunk_root, 64); -BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_gen, struct btrfs_root_backup, - chunk_root_gen, 64); -BTRFS_SETGET_STACK_FUNCS(backup_chunk_root_level, struct btrfs_root_backup, - chunk_root_level, 8); - -BTRFS_SETGET_STACK_FUNCS(backup_extent_root, struct btrfs_root_backup, - extent_root, 64); -BTRFS_SETGET_STACK_FUNCS(backup_extent_root_gen, struct btrfs_root_backup, - extent_root_gen, 64); -BTRFS_SETGET_STACK_FUNCS(backup_extent_root_level, struct btrfs_root_backup, - extent_root_level, 8); - -BTRFS_SETGET_STACK_FUNCS(backup_fs_root, struct btrfs_root_backup, - fs_root, 64); -BTRFS_SETGET_STACK_FUNCS(backup_fs_root_gen, struct btrfs_root_backup, - fs_root_gen, 64); -BTRFS_SETGET_STACK_FUNCS(backup_fs_root_level, struct btrfs_root_backup, - fs_root_level, 8); - -BTRFS_SETGET_STACK_FUNCS(backup_dev_root, struct btrfs_root_backup, - dev_root, 64); -BTRFS_SETGET_STACK_FUNCS(backup_dev_root_gen, struct btrfs_root_backup, - dev_root_gen, 64); -BTRFS_SETGET_STACK_FUNCS(backup_dev_root_level, struct btrfs_root_backup, - dev_root_level, 8); - -BTRFS_SETGET_STACK_FUNCS(backup_csum_root, struct btrfs_root_backup, - csum_root, 64); -BTRFS_SETGET_STACK_FUNCS(backup_csum_root_gen, struct btrfs_root_backup, - csum_root_gen, 64); -BTRFS_SETGET_STACK_FUNCS(backup_csum_root_level, struct btrfs_root_backup, - csum_root_level, 8); -BTRFS_SETGET_STACK_FUNCS(backup_total_bytes, struct btrfs_root_backup, - total_bytes, 64); -BTRFS_SETGET_STACK_FUNCS(backup_bytes_used, struct btrfs_root_backup, - bytes_used, 64); -BTRFS_SETGET_STACK_FUNCS(backup_num_devices, struct btrfs_root_backup, - num_devices, 64); - -/* struct btrfs_balance_item */ -BTRFS_SETGET_FUNCS(balance_flags, struct btrfs_balance_item, flags, 64); - -static inline void btrfs_balance_data(const struct extent_buffer *eb, - const struct btrfs_balance_item *bi, - struct btrfs_disk_balance_args *ba) -{ - read_eb_member(eb, bi, struct btrfs_balance_item, data, ba); -} - -static inline void btrfs_set_balance_data(struct extent_buffer *eb, - struct btrfs_balance_item *bi, - const struct btrfs_disk_balance_args *ba) -{ - write_eb_member(eb, bi, struct btrfs_balance_item, data, ba); -} - -static inline void btrfs_balance_meta(const struct extent_buffer *eb, - const struct btrfs_balance_item *bi, - struct btrfs_disk_balance_args *ba) -{ - read_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); -} - -static inline void btrfs_set_balance_meta(struct extent_buffer *eb, - struct btrfs_balance_item *bi, - const struct btrfs_disk_balance_args *ba) -{ - write_eb_member(eb, bi, struct btrfs_balance_item, meta, ba); -} - -static inline void btrfs_balance_sys(const struct extent_buffer *eb, - const struct btrfs_balance_item *bi, - struct btrfs_disk_balance_args *ba) -{ - read_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); -} - -static inline void btrfs_set_balance_sys(struct extent_buffer *eb, - struct btrfs_balance_item *bi, - const struct btrfs_disk_balance_args *ba) -{ - write_eb_member(eb, bi, struct btrfs_balance_item, sys, ba); -} - -static inline void -btrfs_disk_balance_args_to_cpu(struct btrfs_balance_args *cpu, - const struct btrfs_disk_balance_args *disk) -{ - memset(cpu, 0, sizeof(*cpu)); - - cpu->profiles = le64_to_cpu(disk->profiles); - cpu->usage = le64_to_cpu(disk->usage); - cpu->devid = le64_to_cpu(disk->devid); - cpu->pstart = le64_to_cpu(disk->pstart); - cpu->pend = le64_to_cpu(disk->pend); - cpu->vstart = le64_to_cpu(disk->vstart); - cpu->vend = le64_to_cpu(disk->vend); - cpu->target = le64_to_cpu(disk->target); - cpu->flags = le64_to_cpu(disk->flags); - cpu->limit = le64_to_cpu(disk->limit); - cpu->stripes_min = le32_to_cpu(disk->stripes_min); - cpu->stripes_max = le32_to_cpu(disk->stripes_max); -} - -static inline void -btrfs_cpu_balance_args_to_disk(struct btrfs_disk_balance_args *disk, - const struct btrfs_balance_args *cpu) -{ - memset(disk, 0, sizeof(*disk)); - - disk->profiles = cpu_to_le64(cpu->profiles); - disk->usage = cpu_to_le64(cpu->usage); - disk->devid = cpu_to_le64(cpu->devid); - disk->pstart = cpu_to_le64(cpu->pstart); - disk->pend = cpu_to_le64(cpu->pend); - disk->vstart = cpu_to_le64(cpu->vstart); - disk->vend = cpu_to_le64(cpu->vend); - disk->target = cpu_to_le64(cpu->target); - disk->flags = cpu_to_le64(cpu->flags); - disk->limit = cpu_to_le64(cpu->limit); - disk->stripes_min = cpu_to_le32(cpu->stripes_min); - disk->stripes_max = cpu_to_le32(cpu->stripes_max); -} - -/* struct btrfs_super_block */ -BTRFS_SETGET_STACK_FUNCS(super_bytenr, struct btrfs_super_block, bytenr, 64); -BTRFS_SETGET_STACK_FUNCS(super_flags, struct btrfs_super_block, flags, 64); -BTRFS_SETGET_STACK_FUNCS(super_generation, struct btrfs_super_block, - generation, 64); -BTRFS_SETGET_STACK_FUNCS(super_root, struct btrfs_super_block, root, 64); -BTRFS_SETGET_STACK_FUNCS(super_sys_array_size, - struct btrfs_super_block, sys_chunk_array_size, 32); -BTRFS_SETGET_STACK_FUNCS(super_chunk_root_generation, - struct btrfs_super_block, chunk_root_generation, 64); -BTRFS_SETGET_STACK_FUNCS(super_root_level, struct btrfs_super_block, - root_level, 8); -BTRFS_SETGET_STACK_FUNCS(super_chunk_root, struct btrfs_super_block, - chunk_root, 64); -BTRFS_SETGET_STACK_FUNCS(super_chunk_root_level, struct btrfs_super_block, - chunk_root_level, 8); -BTRFS_SETGET_STACK_FUNCS(super_log_root, struct btrfs_super_block, - log_root, 64); -BTRFS_SETGET_STACK_FUNCS(super_log_root_level, struct btrfs_super_block, - log_root_level, 8); -BTRFS_SETGET_STACK_FUNCS(super_total_bytes, struct btrfs_super_block, - total_bytes, 64); -BTRFS_SETGET_STACK_FUNCS(super_bytes_used, struct btrfs_super_block, - bytes_used, 64); -BTRFS_SETGET_STACK_FUNCS(super_sectorsize, struct btrfs_super_block, - sectorsize, 32); -BTRFS_SETGET_STACK_FUNCS(super_nodesize, struct btrfs_super_block, - nodesize, 32); -BTRFS_SETGET_STACK_FUNCS(super_stripesize, struct btrfs_super_block, - stripesize, 32); -BTRFS_SETGET_STACK_FUNCS(super_root_dir, struct btrfs_super_block, - root_dir_objectid, 64); -BTRFS_SETGET_STACK_FUNCS(super_num_devices, struct btrfs_super_block, - num_devices, 64); -BTRFS_SETGET_STACK_FUNCS(super_compat_flags, struct btrfs_super_block, - compat_flags, 64); -BTRFS_SETGET_STACK_FUNCS(super_compat_ro_flags, struct btrfs_super_block, - compat_ro_flags, 64); -BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block, - incompat_flags, 64); -BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block, - csum_type, 16); -BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block, - cache_generation, 64); -BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64); -BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block, - uuid_tree_generation, 64); - -int btrfs_super_csum_size(const struct btrfs_super_block *s); -const char *btrfs_super_csum_name(u16 csum_type); -const char *btrfs_super_csum_driver(u16 csum_type); -size_t __attribute_const__ btrfs_get_num_csums(void); - - -/* - * The leaf data grows from end-to-front in the node. - * this returns the address of the start of the last item, - * which is the stop of the leaf data stack - */ -static inline unsigned int leaf_data_end(const struct extent_buffer *leaf) -{ - u32 nr = btrfs_header_nritems(leaf); - - if (nr == 0) - return BTRFS_LEAF_DATA_SIZE(leaf->fs_info); - return btrfs_item_offset(leaf, nr - 1); -} - -/* struct btrfs_file_extent_item */ -BTRFS_SETGET_STACK_FUNCS(stack_file_extent_type, struct btrfs_file_extent_item, - type, 8); -BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr, - struct btrfs_file_extent_item, disk_bytenr, 64); -BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset, - struct btrfs_file_extent_item, offset, 64); -BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation, - struct btrfs_file_extent_item, generation, 64); -BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes, - struct btrfs_file_extent_item, num_bytes, 64); -BTRFS_SETGET_STACK_FUNCS(stack_file_extent_ram_bytes, - struct btrfs_file_extent_item, ram_bytes, 64); -BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_num_bytes, - struct btrfs_file_extent_item, disk_num_bytes, 64); -BTRFS_SETGET_STACK_FUNCS(stack_file_extent_compression, - struct btrfs_file_extent_item, compression, 8); - -static inline unsigned long -btrfs_file_extent_inline_start(const struct btrfs_file_extent_item *e) -{ - return (unsigned long)e + BTRFS_FILE_EXTENT_INLINE_DATA_START; -} - -static inline u32 btrfs_file_extent_calc_inline_size(u32 datasize) -{ - return BTRFS_FILE_EXTENT_INLINE_DATA_START + datasize; -} - -BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8); -BTRFS_SETGET_FUNCS(file_extent_disk_bytenr, struct btrfs_file_extent_item, - disk_bytenr, 64); -BTRFS_SETGET_FUNCS(file_extent_generation, struct btrfs_file_extent_item, - generation, 64); -BTRFS_SETGET_FUNCS(file_extent_disk_num_bytes, struct btrfs_file_extent_item, - disk_num_bytes, 64); -BTRFS_SETGET_FUNCS(file_extent_offset, struct btrfs_file_extent_item, - offset, 64); -BTRFS_SETGET_FUNCS(file_extent_num_bytes, struct btrfs_file_extent_item, - num_bytes, 64); -BTRFS_SETGET_FUNCS(file_extent_ram_bytes, struct btrfs_file_extent_item, - ram_bytes, 64); -BTRFS_SETGET_FUNCS(file_extent_compression, struct btrfs_file_extent_item, - compression, 8); -BTRFS_SETGET_FUNCS(file_extent_encryption, struct btrfs_file_extent_item, - encryption, 8); -BTRFS_SETGET_FUNCS(file_extent_other_encoding, struct btrfs_file_extent_item, - other_encoding, 16); - -/* - * this returns the number of bytes used by the item on disk, minus the - * size of any extent headers. If a file is compressed on disk, this is - * the compressed size - */ -static inline u32 btrfs_file_extent_inline_item_len( - const struct extent_buffer *eb, - int nr) -{ - return btrfs_item_size(eb, nr) - BTRFS_FILE_EXTENT_INLINE_DATA_START; -} - -/* btrfs_qgroup_status_item */ -BTRFS_SETGET_FUNCS(qgroup_status_generation, struct btrfs_qgroup_status_item, - generation, 64); -BTRFS_SETGET_FUNCS(qgroup_status_version, struct btrfs_qgroup_status_item, - version, 64); -BTRFS_SETGET_FUNCS(qgroup_status_flags, struct btrfs_qgroup_status_item, - flags, 64); -BTRFS_SETGET_FUNCS(qgroup_status_rescan, struct btrfs_qgroup_status_item, - rescan, 64); - -/* btrfs_qgroup_info_item */ -BTRFS_SETGET_FUNCS(qgroup_info_generation, struct btrfs_qgroup_info_item, - generation, 64); -BTRFS_SETGET_FUNCS(qgroup_info_rfer, struct btrfs_qgroup_info_item, rfer, 64); -BTRFS_SETGET_FUNCS(qgroup_info_rfer_cmpr, struct btrfs_qgroup_info_item, - rfer_cmpr, 64); -BTRFS_SETGET_FUNCS(qgroup_info_excl, struct btrfs_qgroup_info_item, excl, 64); -BTRFS_SETGET_FUNCS(qgroup_info_excl_cmpr, struct btrfs_qgroup_info_item, - excl_cmpr, 64); - -BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_generation, - struct btrfs_qgroup_info_item, generation, 64); -BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer, struct btrfs_qgroup_info_item, - rfer, 64); -BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_rfer_cmpr, - struct btrfs_qgroup_info_item, rfer_cmpr, 64); -BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl, struct btrfs_qgroup_info_item, - excl, 64); -BTRFS_SETGET_STACK_FUNCS(stack_qgroup_info_excl_cmpr, - struct btrfs_qgroup_info_item, excl_cmpr, 64); - -/* btrfs_qgroup_limit_item */ -BTRFS_SETGET_FUNCS(qgroup_limit_flags, struct btrfs_qgroup_limit_item, - flags, 64); -BTRFS_SETGET_FUNCS(qgroup_limit_max_rfer, struct btrfs_qgroup_limit_item, - max_rfer, 64); -BTRFS_SETGET_FUNCS(qgroup_limit_max_excl, struct btrfs_qgroup_limit_item, - max_excl, 64); -BTRFS_SETGET_FUNCS(qgroup_limit_rsv_rfer, struct btrfs_qgroup_limit_item, - rsv_rfer, 64); -BTRFS_SETGET_FUNCS(qgroup_limit_rsv_excl, struct btrfs_qgroup_limit_item, - rsv_excl, 64); - -/* btrfs_dev_replace_item */ -BTRFS_SETGET_FUNCS(dev_replace_src_devid, - struct btrfs_dev_replace_item, src_devid, 64); -BTRFS_SETGET_FUNCS(dev_replace_cont_reading_from_srcdev_mode, - struct btrfs_dev_replace_item, cont_reading_from_srcdev_mode, - 64); -BTRFS_SETGET_FUNCS(dev_replace_replace_state, struct btrfs_dev_replace_item, - replace_state, 64); -BTRFS_SETGET_FUNCS(dev_replace_time_started, struct btrfs_dev_replace_item, - time_started, 64); -BTRFS_SETGET_FUNCS(dev_replace_time_stopped, struct btrfs_dev_replace_item, - time_stopped, 64); -BTRFS_SETGET_FUNCS(dev_replace_num_write_errors, struct btrfs_dev_replace_item, - num_write_errors, 64); -BTRFS_SETGET_FUNCS(dev_replace_num_uncorrectable_read_errors, - struct btrfs_dev_replace_item, num_uncorrectable_read_errors, - 64); -BTRFS_SETGET_FUNCS(dev_replace_cursor_left, struct btrfs_dev_replace_item, - cursor_left, 64); -BTRFS_SETGET_FUNCS(dev_replace_cursor_right, struct btrfs_dev_replace_item, - cursor_right, 64); - -BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_src_devid, - struct btrfs_dev_replace_item, src_devid, 64); -BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cont_reading_from_srcdev_mode, - struct btrfs_dev_replace_item, - cont_reading_from_srcdev_mode, 64); -BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_replace_state, - struct btrfs_dev_replace_item, replace_state, 64); -BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_started, - struct btrfs_dev_replace_item, time_started, 64); -BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_time_stopped, - struct btrfs_dev_replace_item, time_stopped, 64); -BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_write_errors, - struct btrfs_dev_replace_item, num_write_errors, 64); -BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_num_uncorrectable_read_errors, - struct btrfs_dev_replace_item, - num_uncorrectable_read_errors, 64); -BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_left, - struct btrfs_dev_replace_item, cursor_left, 64); -BTRFS_SETGET_STACK_FUNCS(stack_dev_replace_cursor_right, - struct btrfs_dev_replace_item, cursor_right, 64); - -/* btrfs_verity_descriptor_item */ -BTRFS_SETGET_FUNCS(verity_descriptor_encryption, struct btrfs_verity_descriptor_item, - encryption, 8); -BTRFS_SETGET_FUNCS(verity_descriptor_size, struct btrfs_verity_descriptor_item, - size, 64); -BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_encryption, - struct btrfs_verity_descriptor_item, encryption, 8); -BTRFS_SETGET_STACK_FUNCS(stack_verity_descriptor_size, - struct btrfs_verity_descriptor_item, size, 64); - -/* helper function to cast into the data area of the leaf. */ -#define btrfs_item_ptr(leaf, slot, type) \ - ((type *)(BTRFS_LEAF_DATA_OFFSET + \ - btrfs_item_offset(leaf, slot))) - -#define btrfs_item_ptr_offset(leaf, slot) \ - ((unsigned long)(BTRFS_LEAF_DATA_OFFSET + \ - btrfs_item_offset(leaf, slot))) - static inline u32 btrfs_crc32c(u32 crc, const void *address, unsigned length) { return crc32c(crc, address, length); diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 2f68570fbb53..012c96de4701 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -17,6 +17,7 @@ #include "locking.h" #include "inode-item.h" #include "space-info.h" +#include "accessors.h" #define BTRFS_DELAYED_WRITEBACK 512 #define BTRFS_DELAYED_BACKGROUND 128 diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 348aef453e69..94f8975034ce 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -24,6 +24,7 @@ #include "zoned.h" #include "block-group.h" #include "fs.h" +#include "accessors.h" /* * Device replace overview diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index be5c1c2a8da5..9fa37f245c43 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -7,6 +7,7 @@ #include "ctree.h" #include "disk-io.h" #include "transaction.h" +#include "accessors.h" /* * insert a name into a directory, doing overflow properly if there is a hash diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2c38b4eebd8f..75475a213972 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -44,6 +44,7 @@ #include "zoned.h" #include "subpage.h" #include "fs.h" +#include "accessors.h" #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ BTRFS_HEADER_FLAG_RELOC |\ diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c index a51a5dfa737c..b6bc9684648f 100644 --- a/fs/btrfs/export.c +++ b/fs/btrfs/export.c @@ -7,6 +7,7 @@ #include "btrfs_inode.h" #include "print-tree.h" #include "export.h" +#include "accessors.h" #define BTRFS_FID_SIZE_NON_CONNECTABLE (offsetof(struct btrfs_fid, \ parent_objectid) / 4) diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index bc010dbcb6b1..8d28ba360e9a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -37,6 +37,7 @@ #include "zoned.h" #include "dev-replace.h" #include "fs.h" +#include "accessors.h" #undef SCRAMBLE_DELAYED_REFS diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index aa2d60f683bb..4b47bb8c590f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -31,6 +31,7 @@ #include "block-group.h" #include "compression.h" #include "fs.h" +#include "accessors.h" static struct kmem_cache *extent_buffer_cache; diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 675987e2d652..bce6c8d31bc0 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -18,6 +18,7 @@ #include "print-tree.h" #include "compression.h" #include "fs.h" +#include "accessors.h" #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \ sizeof(struct btrfs_item) * 2) / \ diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index a352c7cacc99..62360a554420 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -31,6 +31,7 @@ #include "reflink.h" #include "subpage.h" #include "fs.h" +#include "accessors.h" static struct kmem_cache *btrfs_inode_defrag_cachep; /* diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 703902156f97..26ff1a5100b9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -26,6 +26,7 @@ #include "discard.h" #include "subpage.h" #include "inode-item.h" +#include "accessors.h" #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) #define MAX_CACHE_BYTES_PER_GIG SZ_64K diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index 026214d74a02..fc79d21e7b4f 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -13,6 +13,7 @@ #include "transaction.h" #include "block-group.h" #include "fs.h" +#include "accessors.h" static int __add_block_group_free_space(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group, diff --git a/fs/btrfs/fs.c b/fs/btrfs/fs.c index a59504b59435..5553e1f8afe8 100644 --- a/fs/btrfs/fs.c +++ b/fs/btrfs/fs.c @@ -3,6 +3,7 @@ #include "messages.h" #include "ctree.h" #include "fs.h" +#include "accessors.h" void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, const char *name) diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 25e9f1d65067..b8dbabfa8b31 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -11,6 +11,7 @@ #include "transaction.h" #include "print-tree.h" #include "space-info.h" +#include "accessors.h" struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot, const char *name, diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 564a4ae9c285..e33be4032fff 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -51,6 +51,7 @@ #include "block-group.h" #include "subpage.h" #include "fs.h" +#include "accessors.h" #ifdef CONFIG_64BIT /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 0eab3cb274a1..870528d87526 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -12,6 +12,7 @@ #include "ctree.h" #include "extent_io.h" #include "locking.h" +#include "accessors.h" /* * Lockdep class keys for extent_buffer->lock's in this root. For a given diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 708facaede2c..aab7d30eed55 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -7,6 +7,7 @@ #include "ctree.h" #include "disk-io.h" #include "print-tree.h" +#include "accessors.h" struct root_name_map { u64 id; diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 6d88bf0a9b17..9ad15d69718c 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -13,6 +13,7 @@ #include "compression.h" #include "space-info.h" #include "fs.h" +#include "accessors.h" #define BTRFS_PROP_HANDLERS_HT_BITS 8 static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index e87a2f066f4d..39f67ca4b4ca 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -25,6 +25,7 @@ #include "sysfs.h" #include "tree-mod-log.h" #include "fs.h" +#include "accessors.h" /* * Helpers to access qgroup reservation diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c index 9b09dc50ba14..95d28497de7c 100644 --- a/fs/btrfs/ref-verify.c +++ b/fs/btrfs/ref-verify.c @@ -12,6 +12,7 @@ #include "delayed-ref.h" #include "ref-verify.h" #include "fs.h" +#include "accessors.h" /* * Used to keep track the roots and number of refs each root has for a given diff --git a/fs/btrfs/reflink.c b/fs/btrfs/reflink.c index daf65bfad30e..f0243eb33df7 100644 --- a/fs/btrfs/reflink.c +++ b/fs/btrfs/reflink.c @@ -11,6 +11,7 @@ #include "reflink.h" #include "transaction.h" #include "subpage.h" +#include "accessors.h" #define BTRFS_MAX_DEDUPE_LEN SZ_16M diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 977afbb4cd45..3d0a63465a74 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -29,6 +29,7 @@ #include "inode-item.h" #include "space-info.h" #include "fs.h" +#include "accessors.h" /* * Relocation overview diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 112b4bf3c3b8..fc00dfb281d5 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c @@ -13,6 +13,7 @@ #include "print-tree.h" #include "qgroup.h" #include "space-info.h" +#include "accessors.h" /* * Read a root item from the tree. In case we detect a root item smaller then diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6871c1c54f8c..5c6c4aa2fb09 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -22,6 +22,7 @@ #include "block-group.h" #include "zoned.h" #include "fs.h" +#include "accessors.h" /* * This is only the first step towards a full-features scrub. It reads all diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 80104b5af32f..31fb3292e816 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -27,6 +27,7 @@ #include "compression.h" #include "xattr.h" #include "print-tree.h" +#include "accessors.h" /* * Maximum number of references an extent can have in order for us to attempt to diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index af1538c2685d..94d73485454a 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -11,6 +11,7 @@ #include "block-group.h" #include "zoned.h" #include "fs.h" +#include "accessors.h" /* * HOW DOES SPACE RESERVATION WORK diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 0839e4a1cfac..c02fcdb9c7c4 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -51,6 +51,7 @@ #include "qgroup.h" #include "raid56.h" #include "fs.h" +#include "accessors.h" #define CREATE_TRACE_POINTS #include diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c index 47b221372dcf..a14812f1254a 100644 --- a/fs/btrfs/sysfs.c +++ b/fs/btrfs/sysfs.c @@ -23,6 +23,7 @@ #include "qgroup.h" #include "misc.h" #include "fs.h" +#include "accessors.h" /* * Structure name Path diff --git a/fs/btrfs/tests/extent-buffer-tests.c b/fs/btrfs/tests/extent-buffer-tests.c index b7d181a08eab..5ef0b90e25c3 100644 --- a/fs/btrfs/tests/extent-buffer-tests.c +++ b/fs/btrfs/tests/extent-buffer-tests.c @@ -8,6 +8,7 @@ #include "../ctree.h" #include "../extent_io.h" #include "../disk-io.h" +#include "../accessors.h" static int test_btrfs_split_item(u32 sectorsize, u32 nodesize) { diff --git a/fs/btrfs/tests/free-space-tree-tests.c b/fs/btrfs/tests/free-space-tree-tests.c index 13734ed43bfc..53a17b1d1744 100644 --- a/fs/btrfs/tests/free-space-tree-tests.c +++ b/fs/btrfs/tests/free-space-tree-tests.c @@ -10,6 +10,7 @@ #include "../free-space-tree.h" #include "../transaction.h" #include "../block-group.h" +#include "../accessors.h" struct free_space_extent { u64 start; diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c index 625f7d398368..0a34a54ea9fd 100644 --- a/fs/btrfs/tests/inode-tests.c +++ b/fs/btrfs/tests/inode-tests.c @@ -11,6 +11,7 @@ #include "../extent_io.h" #include "../volumes.h" #include "../compression.h" +#include "../accessors.h" static void insert_extent(struct btrfs_root *root, u64 start, u64 len, u64 ram_bytes, u64 offset, u64 disk_bytenr, diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index fd0e0ade8154..65b65d55d1f6 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -11,6 +11,7 @@ #include "../qgroup.h" #include "../backref.h" #include "../fs.h" +#include "../accessors.h" static int insert_normal_tree_ref(struct btrfs_root *root, u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 37d0baaa41d8..25e6b504edb4 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -24,6 +24,7 @@ #include "space-info.h" #include "zoned.h" #include "fs.h" +#include "accessors.h" static struct kmem_cache *btrfs_trans_handle_cachep; diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index fa9536110d69..11cafc520b47 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -27,6 +27,7 @@ #include "misc.h" #include "btrfs_inode.h" #include "fs.h" +#include "accessors.h" /* * Error message should follow the following format: diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b6cf39f4e7e4..5df604846de6 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c @@ -9,6 +9,7 @@ #include "print-tree.h" #include "transaction.h" #include "locking.h" +#include "accessors.h" /* * Defrag all the leaves in a given btree. diff --git a/fs/btrfs/tree-mod-log.c b/fs/btrfs/tree-mod-log.c index 69083889a9b8..3bea19698a10 100644 --- a/fs/btrfs/tree-mod-log.c +++ b/fs/btrfs/tree-mod-log.c @@ -4,6 +4,7 @@ #include "tree-mod-log.h" #include "disk-io.h" #include "fs.h" +#include "accessors.h" struct tree_mod_root { u64 logical; diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 190f752a2e10..70304b89f31f 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -11,7 +11,7 @@ #include "disk-io.h" #include "print-tree.h" #include "fs.h" - +#include "accessors.h" static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key) { diff --git a/fs/btrfs/verity.c b/fs/btrfs/verity.c index 35445855df4d..d02fa354fc2b 100644 --- a/fs/btrfs/verity.c +++ b/fs/btrfs/verity.c @@ -17,6 +17,7 @@ #include "disk-io.h" #include "locking.h" #include "fs.h" +#include "accessors.h" /* * Implementation of the interface defined in struct fsverity_operations. diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index d65d5d7835fe..767c56be6b96 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -34,6 +34,7 @@ #include "discard.h" #include "zoned.h" #include "fs.h" +#include "accessors.h" static struct bio_set btrfs_bioset; diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index b26c869f0226..fcf2d5f7e198 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -21,6 +21,7 @@ #include "disk-io.h" #include "props.h" #include "locking.h" +#include "accessors.h" int btrfs_getxattr(struct inode *inode, const char *name, void *buffer, size_t size) diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 2a7d856c232c..fe66cb929af9 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -16,6 +16,7 @@ #include "dev-replace.h" #include "space-info.h" #include "fs.h" +#include "accessors.h" /* Maximum number of zones to report per blkdev_report_zones() call */ #define BTRFS_REPORT_NR_ZONES 4096 -- cgit v1.2.3 From d68194b238228ce470de1fc2453851fc06294739 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 14 Oct 2022 15:45:37 +0200 Subject: btrfs: sink gfp_t parameter to btrfs_backref_iter_alloc There's only one caller that passes GFP_NOFS, we can drop the parameter an use the flags directly. Reviewed-by: Anand Jain Reviewed-by: Johannes Thumshirn Signed-off-by: David Sterba --- fs/btrfs/backref.c | 5 ++--- fs/btrfs/backref.h | 3 +-- fs/btrfs/relocation.c | 2 +- 3 files changed, 4 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 41caa6fc6301..f556566b9c79 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2650,12 +2650,11 @@ void free_ipath(struct inode_fs_paths *ipath) kfree(ipath); } -struct btrfs_backref_iter *btrfs_backref_iter_alloc( - struct btrfs_fs_info *fs_info, gfp_t gfp_flag) +struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_info) { struct btrfs_backref_iter *ret; - ret = kzalloc(sizeof(*ret), gfp_flag); + ret = kzalloc(sizeof(*ret), GFP_NOFS); if (!ret) return NULL; diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index fa3c9cbf9ae7..2dd68f87dca5 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -156,8 +156,7 @@ struct btrfs_backref_iter { u32 end_ptr; }; -struct btrfs_backref_iter *btrfs_backref_iter_alloc( - struct btrfs_fs_info *fs_info, gfp_t gfp_flag); +struct btrfs_backref_iter *btrfs_backref_iter_alloc(struct btrfs_fs_info *fs_info); static inline void btrfs_backref_iter_free(struct btrfs_backref_iter *iter) { diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 3d0a63465a74..77d03dce2521 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -473,7 +473,7 @@ static noinline_for_stack struct btrfs_backref_node *build_backref_tree( int ret; int err = 0; - iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info, GFP_NOFS); + iter = btrfs_backref_iter_alloc(rc->extent_root->fs_info); if (!iter) return ERR_PTR(-ENOMEM); path = btrfs_alloc_path(); -- cgit v1.2.3 From a0231804affe78d27264811559ee31bd341c2bff Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 24 Oct 2022 14:46:57 -0400 Subject: btrfs: move extent-tree helpers into their own header file Move all the extent tree related prototypes to extent-tree.h out of ctree.h, and then go include it everywhere needed so everything compiles. Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/backref.c | 1 + fs/btrfs/block-group.c | 1 + fs/btrfs/ctree.c | 1 + fs/btrfs/ctree.h | 72 ---------------------------------------------- fs/btrfs/disk-io.c | 1 + fs/btrfs/extent-tree.c | 1 + fs/btrfs/extent-tree.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/file.c | 1 + fs/btrfs/free-space-tree.c | 1 + fs/btrfs/inode-item.c | 1 + fs/btrfs/inode.c | 1 + fs/btrfs/ioctl.c | 1 + fs/btrfs/qgroup.c | 1 + fs/btrfs/relocation.c | 1 + fs/btrfs/space-info.c | 1 + fs/btrfs/transaction.c | 1 + fs/btrfs/tree-log.c | 1 + 17 files changed, 87 insertions(+), 72 deletions(-) create mode 100644 fs/btrfs/extent-tree.h (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f556566b9c79..173df40da984 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -17,6 +17,7 @@ #include "tree-mod-log.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" /* Just arbitrary numbers so we can be sure one of these happened. */ #define BACKREF_FOUND_SHARED 6 diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index d1f8d792b184..5a743c4d4e97 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -19,6 +19,7 @@ #include "zoned.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" #ifdef CONFIG_BTRFS_DEBUG int btrfs_should_fragment_free_space(struct btrfs_block_group *block_group) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1283ca46c5bc..f0ddb44704ee 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -20,6 +20,7 @@ #include "tree-checker.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" static struct kmem_cache *btrfs_path_cachep; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 07f876961da1..8ca67227dec0 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -495,78 +495,6 @@ static inline gfp_t btrfs_alloc_write_mask(struct address_space *mapping) return mapping_gfp_constraint(mapping, ~__GFP_FS); } -/* extent-tree.c */ - -enum btrfs_inline_ref_type { - BTRFS_REF_TYPE_INVALID, - BTRFS_REF_TYPE_BLOCK, - BTRFS_REF_TYPE_DATA, - BTRFS_REF_TYPE_ANY, -}; - -int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, - struct btrfs_extent_inline_ref *iref, - enum btrfs_inline_ref_type is_data); -u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset); - - -int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info, - u64 start, u64 num_bytes); -void btrfs_free_excluded_extents(struct btrfs_block_group *cache); -int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, - unsigned long count); -void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, - struct btrfs_delayed_ref_root *delayed_refs, - struct btrfs_delayed_ref_head *head); -int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); -int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, - u64 offset, int metadata, u64 *refs, u64 *flags); -int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num, - int reserved); -int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, - u64 bytenr, u64 num_bytes); -int btrfs_exclude_logged_extents(struct extent_buffer *eb); -int btrfs_cross_ref_exist(struct btrfs_root *root, - u64 objectid, u64 offset, u64 bytenr, bool strict, - struct btrfs_path *path); -struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - u64 parent, u64 root_objectid, - const struct btrfs_disk_key *key, - int level, u64 hint, - u64 empty_size, - enum btrfs_lock_nesting nest); -void btrfs_free_tree_block(struct btrfs_trans_handle *trans, - u64 root_id, - struct extent_buffer *buf, - u64 parent, int last_ref); -int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, - struct btrfs_root *root, u64 owner, - u64 offset, u64 ram_bytes, - struct btrfs_key *ins); -int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, - u64 root_objectid, u64 owner, u64 offset, - struct btrfs_key *ins); -int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, - u64 min_alloc_size, u64 empty_size, u64 hint_byte, - struct btrfs_key *ins, int is_data, int delalloc); -int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref); -int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, - struct extent_buffer *buf, int full_backref); -int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, - struct extent_buffer *eb, u64 flags, int level); -int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref); - -int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, - u64 start, u64 len, int delalloc); -int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start, - u64 len); -int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans); -int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, - struct btrfs_ref *generic_ref); - int btrfs_subvolume_reserve_metadata(struct btrfs_root *root, struct btrfs_block_rsv *rsv, int nitems, bool use_global_rsv); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 75475a213972..005da95c9c17 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -45,6 +45,7 @@ #include "subpage.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ BTRFS_HEADER_FLAG_RELOC |\ diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b97c99a8dd4c..d2d5de946242 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -38,6 +38,7 @@ #include "dev-replace.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" #undef SCRAMBLE_DELAYED_REFS diff --git a/fs/btrfs/extent-tree.h b/fs/btrfs/extent-tree.h new file mode 100644 index 000000000000..b3674b008d58 --- /dev/null +++ b/fs/btrfs/extent-tree.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef BTRFS_EXTENT_TREE_H +#define BTRFS_EXTENT_TREE_H + +enum btrfs_inline_ref_type { + BTRFS_REF_TYPE_INVALID, + BTRFS_REF_TYPE_BLOCK, + BTRFS_REF_TYPE_DATA, + BTRFS_REF_TYPE_ANY, +}; + +int btrfs_get_extent_inline_ref_type(const struct extent_buffer *eb, + struct btrfs_extent_inline_ref *iref, + enum btrfs_inline_ref_type is_data); +u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset); + +int btrfs_add_excluded_extent(struct btrfs_fs_info *fs_info, + u64 start, u64 num_bytes); +void btrfs_free_excluded_extents(struct btrfs_block_group *cache); +int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, unsigned long count); +void btrfs_cleanup_ref_head_accounting(struct btrfs_fs_info *fs_info, + struct btrfs_delayed_ref_root *delayed_refs, + struct btrfs_delayed_ref_head *head); +int btrfs_lookup_data_extent(struct btrfs_fs_info *fs_info, u64 start, u64 len); +int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, + struct btrfs_fs_info *fs_info, u64 bytenr, + u64 offset, int metadata, u64 *refs, u64 *flags); +int btrfs_pin_extent(struct btrfs_trans_handle *trans, u64 bytenr, u64 num, + int reserved); +int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans, + u64 bytenr, u64 num_bytes); +int btrfs_exclude_logged_extents(struct extent_buffer *eb); +int btrfs_cross_ref_exist(struct btrfs_root *root, + u64 objectid, u64 offset, u64 bytenr, bool strict, + struct btrfs_path *path); +struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + u64 parent, u64 root_objectid, + const struct btrfs_disk_key *key, + int level, u64 hint, + u64 empty_size, + enum btrfs_lock_nesting nest); +void btrfs_free_tree_block(struct btrfs_trans_handle *trans, + u64 root_id, + struct extent_buffer *buf, + u64 parent, int last_ref); +int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 owner, + u64 offset, u64 ram_bytes, + struct btrfs_key *ins); +int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, + u64 root_objectid, u64 owner, u64 offset, + struct btrfs_key *ins); +int btrfs_reserve_extent(struct btrfs_root *root, u64 ram_bytes, u64 num_bytes, + u64 min_alloc_size, u64 empty_size, u64 hint_byte, + struct btrfs_key *ins, int is_data, int delalloc); +int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, int full_backref); +int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, + struct extent_buffer *buf, int full_backref); +int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, + struct extent_buffer *eb, u64 flags, int level); +int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_ref *ref); + +int btrfs_free_reserved_extent(struct btrfs_fs_info *fs_info, + u64 start, u64 len, int delalloc); +int btrfs_pin_reserved_extent(struct btrfs_trans_handle *trans, u64 start, u64 len); +int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans); +int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, struct btrfs_ref *generic_ref); + +#endif diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 62360a554420..bad676d2e23f 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -32,6 +32,7 @@ #include "subpage.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" static struct kmem_cache *btrfs_inode_defrag_cachep; /* diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c index fc79d21e7b4f..a652e5dc465b 100644 --- a/fs/btrfs/free-space-tree.c +++ b/fs/btrfs/free-space-tree.c @@ -14,6 +14,7 @@ #include "block-group.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" static int __add_block_group_free_space(struct btrfs_trans_handle *trans, struct btrfs_block_group *block_group, diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index d66bdbae5585..724507ce7a7d 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c @@ -12,6 +12,7 @@ #include "print-tree.h" #include "space-info.h" #include "accessors.h" +#include "extent-tree.h" struct btrfs_inode_ref *btrfs_find_name_in_backref(struct extent_buffer *leaf, int slot, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 78867a084428..cb2a35b19c75 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -57,6 +57,7 @@ #include "inode-item.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" struct btrfs_iget_args { u64 ino; diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 2132e3e15986..1d4bdb22881c 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -52,6 +52,7 @@ #include "subpage.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" #ifdef CONFIG_64BIT /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 4786b59035a8..88b248b22810 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -26,6 +26,7 @@ #include "tree-mod-log.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" /* * Helpers to access qgroup reservation diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 77d03dce2521..946190f13138 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -30,6 +30,7 @@ #include "space-info.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" /* * Relocation overview diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c index 45404798ee8c..e5f9f43ffa4c 100644 --- a/fs/btrfs/space-info.c +++ b/fs/btrfs/space-info.c @@ -12,6 +12,7 @@ #include "zoned.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" /* * HOW DOES SPACE RESERVATION WORK diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 0d44d50dc3be..a04c2a9708e2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -26,6 +26,7 @@ #include "zoned.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" static struct kmem_cache *btrfs_trans_handle_cachep; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index a5e56a678af2..bd9a5eda9def 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -23,6 +23,7 @@ #include "inode-item.h" #include "fs.h" #include "accessors.h" +#include "extent-tree.h" #define MAX_CONFLICT_INODES 10 -- cgit v1.2.3 From 677074792a1d533232ec5517f23f78d64e6dffac Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Wed, 26 Oct 2022 15:08:34 -0400 Subject: btrfs: move relocation prototypes into relocation.h Move these out of ctree.h into relocation.h to cut down on code in ctree.h Reviewed-by: Johannes Thumshirn Signed-off-by: Josef Bacik Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/backref.c | 1 + fs/btrfs/ctree.c | 1 + fs/btrfs/ctree.h | 20 -------------------- fs/btrfs/disk-io.c | 1 + fs/btrfs/inode.c | 1 + fs/btrfs/relocation.c | 1 + fs/btrfs/relocation.h | 23 +++++++++++++++++++++++ fs/btrfs/transaction.c | 1 + fs/btrfs/volumes.c | 1 + 9 files changed, 30 insertions(+), 20 deletions(-) create mode 100644 fs/btrfs/relocation.h (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 173df40da984..04cb608e7cfb 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -18,6 +18,7 @@ #include "fs.h" #include "accessors.h" #include "extent-tree.h" +#include "relocation.h" /* Just arbitrary numbers so we can be sure one of these happened. */ #define BACKREF_FOUND_SHARED 6 diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 4b47d380da1e..0acd85111cdf 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -21,6 +21,7 @@ #include "fs.h" #include "accessors.h" #include "extent-tree.h" +#include "relocation.h" static struct kmem_cache *btrfs_path_cachep; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 040b640b0222..b1b6de508e20 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -732,26 +732,6 @@ static inline unsigned long get_eb_page_index(unsigned long offset) #define EXPORT_FOR_TESTS #endif -/* relocation.c */ -int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start); -int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root); -int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root); -int btrfs_recover_relocation(struct btrfs_fs_info *fs_info); -int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len); -int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct extent_buffer *buf, - struct extent_buffer *cow); -void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, - u64 *bytes_to_reserve); -int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, - struct btrfs_pending_snapshot *pending); -int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info); -struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, - u64 bytenr); -int btrfs_should_ignore_reloc_root(struct btrfs_root *root); - /* scrub.c */ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, u64 end, struct btrfs_scrub_progress *progress, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e8082e3c5bdc..a613a9a7caae 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -49,6 +49,7 @@ #include "root-tree.h" #include "defrag.h" #include "uuid-tree.h" +#include "relocation.h" #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ BTRFS_HEADER_FLAG_RELOC |\ diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index cb038dbbca7d..d8856e621e01 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -66,6 +66,7 @@ #include "ioctl.h" #include "file.h" #include "acl.h" +#include "relocation.h" struct btrfs_iget_args { u64 ino; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e86364bdac8e..f31a97d4f9ad 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -33,6 +33,7 @@ #include "extent-tree.h" #include "root-tree.h" #include "file-item.h" +#include "relocation.h" /* * Relocation overview diff --git a/fs/btrfs/relocation.h b/fs/btrfs/relocation.h new file mode 100644 index 000000000000..2041a86186de --- /dev/null +++ b/fs/btrfs/relocation.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef BTRFS_RELOCATION_H +#define BTRFS_RELOCATION_H + +int btrfs_relocate_block_group(struct btrfs_fs_info *fs_info, u64 group_start); +int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, struct btrfs_root *root); +int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root); +int btrfs_recover_relocation(struct btrfs_fs_info *fs_info); +int btrfs_reloc_clone_csums(struct btrfs_inode *inode, u64 file_pos, u64 len); +int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *cow); +void btrfs_reloc_pre_snapshot(struct btrfs_pending_snapshot *pending, + u64 *bytes_to_reserve); +int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_pending_snapshot *pending); +int btrfs_should_cancel_balance(struct btrfs_fs_info *fs_info); +struct btrfs_root *find_reloc_root(struct btrfs_fs_info *fs_info, u64 bytenr); +int btrfs_should_ignore_reloc_root(struct btrfs_root *root); + +#endif diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index bbc3ad3e297c..a262fd188abe 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -32,6 +32,7 @@ #include "dir-item.h" #include "uuid-tree.h" #include "ioctl.h" +#include "relocation.h" static struct kmem_cache *btrfs_trans_handle_cachep; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 823de2f29289..abfa6a9e20e6 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -37,6 +37,7 @@ #include "accessors.h" #include "uuid-tree.h" #include "ioctl.h" +#include "relocation.h" static struct bio_set btrfs_bioset; -- cgit v1.2.3 From c7499a64dcf62bf27968b0e9cce353c490b9ada1 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:45 +0000 Subject: btrfs: send: optimize clone detection to increase extent sharing Currently send does not do the best decisions when it comes to decide between multiple clone sources, which results in clone operations for partial extent ranges, which has the following disadvantages: 1) We get less shared extents at the destination; 2) We have to read more data during the send operation and emit more write commands. Besides not being optimal behaviour, it also breaks user expectations and is often reported by users, with a recent example in the Link tag at the bottom of this change log. Part of the reason for this non-optimal behaviour is that the backref walking code does not provide information about the length of the file extent items that were found for each backref, so send is blind about which backref is the best to chose as a cloning source. The other existing reasons are just silliness, namely always prefering the inode with the lowest number when multiple are found for the same root and when we can clone from multiple roots, always prefer the send root over any of the other clone roots. This does not make any sense since any inode or root is fine and as good as any other inode/root. Fix this by making backref walking pass information about the number of bytes referenced by each file extent item and then have send's backref callback pick the inode with the highest number of bytes for each root. Finally select the root from which we can clone more bytes from. Example reproducer: $ cat test.sh #!/bin/bash DEV=/dev/sdi MNT=/mnt/sdi mkfs.btrfs -f $DEV mount $DEV $MNT xfs_io -f -c "pwrite -S 0xab -b 2M 0 2M" $MNT/foo cp --reflink=always $MNT/foo $MNT/bar cp --reflink=always $MNT/foo $MNT/baz sync # Overwrite the second half of file foo. xfs_io -c "pwrite -S 0xcd -b 1M 1M 1M" $MNT/foo sync echo echo "*** fiemap in the original filesystem ***" echo xfs_io -c "fiemap -v" $MNT/foo xfs_io -c "fiemap -v" $MNT/bar xfs_io -c "fiemap -v" $MNT/baz echo btrfs filesystem du $MNT btrfs subvolume snapshot -r $MNT $MNT/snap btrfs send -f /tmp/send_stream $MNT/snap umount $MNT mkfs.btrfs -f $DEV &> /dev/null mount $DEV $MNT btrfs receive -f /tmp/send_stream $MNT echo echo "*** fiemap in the new filesystem ***" echo xfs_io -r -c "fiemap -v" $MNT/snap/foo xfs_io -r -c "fiemap -v" $MNT/snap/bar xfs_io -r -c "fiemap -v" $MNT/snap/baz echo btrfs filesystem du $MNT rm -f /tmp/send_stream rm -f /tmp/snap.fssum umount $MNT Before this change: $ ./test.sh (...) *** fiemap in the original filesystem *** /mnt/sdi/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..2047]: 26624..28671 2048 0x2000 1: [2048..4095]: 30720..32767 2048 0x1 /mnt/sdi/bar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..4095]: 26624..30719 4096 0x2001 /mnt/sdi/baz: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..4095]: 26624..30719 4096 0x2001 Total Exclusive Set shared Filename 2.00MiB 1.00MiB - /mnt/sdi/foo 2.00MiB 0.00B - /mnt/sdi/bar 2.00MiB 0.00B - /mnt/sdi/baz 6.00MiB 1.00MiB 2.00MiB /mnt/sdi Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap' At subvol /mnt/sdi/snap At subvol snap *** fiemap in the new filesystem *** /mnt/sdi/snap/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..4095]: 26624..30719 4096 0x2001 /mnt/sdi/snap/bar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..2047]: 26624..28671 2048 0x2000 1: [2048..4095]: 30720..32767 2048 0x1 /mnt/sdi/snap/baz: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..2047]: 26624..28671 2048 0x2000 1: [2048..4095]: 32768..34815 2048 0x1 Total Exclusive Set shared Filename 2.00MiB 0.00B - /mnt/sdi/snap/foo 2.00MiB 1.00MiB - /mnt/sdi/snap/bar 2.00MiB 1.00MiB - /mnt/sdi/snap/baz 6.00MiB 2.00MiB - /mnt/sdi/snap 6.00MiB 2.00MiB 2.00MiB /mnt/sdi We end up with two 1M extents that are not shared for files bar and baz. After this change: $ ./test.sh (...) *** fiemap in the original filesystem *** /mnt/sdi/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..2047]: 26624..28671 2048 0x2000 1: [2048..4095]: 30720..32767 2048 0x1 /mnt/sdi/bar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..4095]: 26624..30719 4096 0x2001 /mnt/sdi/baz: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..4095]: 26624..30719 4096 0x2001 Total Exclusive Set shared Filename 2.00MiB 1.00MiB - /mnt/sdi/foo 2.00MiB 0.00B - /mnt/sdi/bar 2.00MiB 0.00B - /mnt/sdi/baz 6.00MiB 1.00MiB 2.00MiB /mnt/sdi Create a readonly snapshot of '/mnt/sdi' in '/mnt/sdi/snap' At subvol /mnt/sdi/snap At subvol snap *** fiemap in the new filesystem *** /mnt/sdi/snap/foo: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..4095]: 26624..30719 4096 0x2001 /mnt/sdi/snap/bar: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..2047]: 26624..28671 2048 0x2000 1: [2048..4095]: 30720..32767 2048 0x2001 /mnt/sdi/snap/baz: EXT: FILE-OFFSET BLOCK-RANGE TOTAL FLAGS 0: [0..2047]: 26624..28671 2048 0x2000 1: [2048..4095]: 30720..32767 2048 0x2001 Total Exclusive Set shared Filename 2.00MiB 0.00B - /mnt/sdi/snap/foo 2.00MiB 0.00B - /mnt/sdi/snap/bar 2.00MiB 0.00B - /mnt/sdi/snap/baz 6.00MiB 0.00B - /mnt/sdi/snap 6.00MiB 0.00B 3.00MiB /mnt/sdi Now there's a much better sharing, files bar and baz share 1M of the extent of file foo and the second extent of files bar and baz is shared between themselves. This will later be turned into a test case for fstests. Link: https://lore.kernel.org/linux-btrfs/20221008005704.795b44b0@crass-HP-ZBook-15-G2/ Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 9 +++++---- fs/btrfs/backref.h | 4 ++-- fs/btrfs/scrub.c | 4 ++-- fs/btrfs/send.c | 49 +++++++++++++++++++++++++++++++++---------------- 4 files changed, 42 insertions(+), 24 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 04cb608e7cfb..9be11a342de5 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -27,6 +27,7 @@ struct extent_inode_elem { u64 inum; u64 offset; + u64 num_bytes; struct extent_inode_elem *next; }; @@ -37,6 +38,7 @@ static int check_extent_in_eb(const struct btrfs_key *key, struct extent_inode_elem **eie, bool ignore_offset) { + const u64 data_len = btrfs_file_extent_num_bytes(eb, fi); u64 offset = 0; struct extent_inode_elem *e; @@ -45,10 +47,8 @@ static int check_extent_in_eb(const struct btrfs_key *key, !btrfs_file_extent_encryption(eb, fi) && !btrfs_file_extent_other_encoding(eb, fi)) { u64 data_offset; - u64 data_len; data_offset = btrfs_file_extent_offset(eb, fi); - data_len = btrfs_file_extent_num_bytes(eb, fi); if (extent_item_pos < data_offset || extent_item_pos >= data_offset + data_len) @@ -63,6 +63,7 @@ static int check_extent_in_eb(const struct btrfs_key *key, e->next = *eie; e->inum = key->objectid; e->offset = key->offset + offset; + e->num_bytes = data_len; *eie = e; return 0; @@ -2265,7 +2266,7 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, "ref for %llu resolved, key (%llu EXTEND_DATA %llu), root %llu", extent_item_objectid, eie->inum, eie->offset, root); - ret = iterate(eie->inum, eie->offset, root, ctx); + ret = iterate(eie->inum, eie->offset, eie->num_bytes, root, ctx); if (ret) { btrfs_debug(fs_info, "stopping iteration for %llu due to ret=%d", @@ -2357,7 +2358,7 @@ out: return ret; } -static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) +static int build_ino_list(u64 inum, u64 offset, u64 num_bytes, u64 root, void *ctx) { struct btrfs_data_container *inodes = ctx; const size_t c = 3 * sizeof(u64); diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 2dd68f87dca5..8d3598155f3b 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -75,8 +75,8 @@ struct btrfs_backref_share_check_ctx { int prev_extents_cache_slot; }; -typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root, - void *ctx); +typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 num_bytes, + u64 root, void *ctx); struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void); void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx); diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 8c89e63ef2e8..3c22573bfe0d 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -809,8 +809,8 @@ nomem: return ERR_PTR(-ENOMEM); } -static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, - void *warn_ctx) +static int scrub_print_warning_inode(u64 inum, u64 offset, u64 num_bytes, + u64 root, void *warn_ctx) { u32 nlink; int ret; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index d9e01b66b32b..49759cd9eecb 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -76,7 +76,7 @@ struct clone_root { struct btrfs_root *root; u64 ino; u64 offset; - + u64 num_bytes; u64 found_refs; }; @@ -1273,7 +1273,8 @@ static int __clone_root_cmp_sort(const void *e1, const void *e2) * Called for every backref that is found for the current extent. * Results are collected in sctx->clone_roots->ino/offset/found_refs */ -static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) +static int __iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root, + void *ctx_) { struct backref_ctx *bctx = ctx_; struct clone_root *found; @@ -1318,15 +1319,17 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) bctx->found++; found->found_refs++; - if (ino < found->ino) { + + /* + * If the given backref refers to a file extent item with a larger + * number of bytes than what we found before, use the new one so that + * we clone more optimally and end up doing less writes and getting + * less exclusive, non-shared extents at the destination. + */ + if (num_bytes > found->num_bytes) { found->ino = ino; found->offset = offset; - } else if (found->ino == ino) { - /* - * same extent found more then once in the same file. - */ - if (found->offset > offset + bctx->extent_len) - found->offset = offset; + found->num_bytes = num_bytes; } return 0; @@ -1437,6 +1440,7 @@ static int find_extent_clone(struct send_ctx *sctx, cur_clone_root = sctx->clone_roots + i; cur_clone_root->ino = (u64)-1; cur_clone_root->offset = 0; + cur_clone_root->num_bytes = 0; cur_clone_root->found_refs = 0; } @@ -1506,14 +1510,27 @@ static int find_extent_clone(struct send_ctx *sctx, cur_clone_root = NULL; for (i = 0; i < sctx->clone_roots_cnt; i++) { - if (sctx->clone_roots[i].found_refs) { - if (!cur_clone_root) - cur_clone_root = sctx->clone_roots + i; - else if (sctx->clone_roots[i].root == sctx->send_root) - /* prefer clones from send_root over others */ - cur_clone_root = sctx->clone_roots + i; - } + struct clone_root *clone_root = &sctx->clone_roots[i]; + if (clone_root->found_refs == 0) + continue; + + /* + * Choose the root from which we can clone more bytes, to + * minimize write operations and therefore have more extent + * sharing at the destination (the same as in the source). + */ + if (!cur_clone_root || + clone_root->num_bytes > cur_clone_root->num_bytes) { + cur_clone_root = clone_root; + + /* + * We found an optimal clone candidate (any inode from + * any root is fine), so we're done. + */ + if (clone_root->num_bytes >= backref_ctx.extent_len) + break; + } } if (cur_clone_root) { -- cgit v1.2.3 From 6ce6ba534418132f4c727d5707fe2794c797299c Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:46 +0000 Subject: btrfs: use a single argument for extent offset in backref walking functions The interface for find_parent_nodes() has two extent offset related arguments: 1) One u64 pointer argument for the extent offset; 2) One boolean argument to tell if the extent offset should be ignored or not. These are confusing, becase the extent offset pointer can be NULL and in some cases callers pass a NULL value as a way to tell the backref walking code to ignore offsets in file extent items (and simply consider all file extent items that point to the target data extent). The boolean argument was added in commit c995ab3cda3f ("btrfs: add a flag to iterate_inodes_from_logical to find all extent refs for uncompressed extents"), but it was never really necessary, it was enough if it could find a way to get a NULL value passed to the "extent_item_pos" argument of find_parent_nodes(). The arguments are also passed to functions called by find_parent_nodes() and respective helper functions, which further makes everything more complicated than needed. Then we have several backref walking related functions that end up calling find_parent_nodes(), either directly or through some other function that they call, and for many we have to use an "extent_item_pos" (u64) argument and a boolean "ignore_offset" argument too. This is confusing and not really necessary. So use a single argument to specify the extent offset, as a simple u64 and not as a pointer, but using a special value of (u64)-1, defined as a documented constant, to indicate when the extent offset should be ignored. This is also preparation work for the upcoming patches in the series that add other arguments to find_parent_nodes() and other related functions that use it. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 87 +++++++++++++++++++++++++-------------------------- fs/btrfs/backref.h | 12 +++++-- fs/btrfs/relocation.c | 2 +- fs/btrfs/scrub.c | 2 +- fs/btrfs/send.c | 2 +- 5 files changed, 54 insertions(+), 51 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 9be11a342de5..432064ee788e 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -35,15 +35,13 @@ static int check_extent_in_eb(const struct btrfs_key *key, const struct extent_buffer *eb, const struct btrfs_file_extent_item *fi, u64 extent_item_pos, - struct extent_inode_elem **eie, - bool ignore_offset) + struct extent_inode_elem **eie) { const u64 data_len = btrfs_file_extent_num_bytes(eb, fi); u64 offset = 0; struct extent_inode_elem *e; - if (!ignore_offset && - !btrfs_file_extent_compression(eb, fi) && + if (!btrfs_file_extent_compression(eb, fi) && !btrfs_file_extent_encryption(eb, fi) && !btrfs_file_extent_other_encoding(eb, fi)) { u64 data_offset; @@ -81,8 +79,7 @@ static void free_inode_elem_list(struct extent_inode_elem *eie) static int find_extent_in_eb(const struct extent_buffer *eb, u64 wanted_disk_byte, u64 extent_item_pos, - struct extent_inode_elem **eie, - bool ignore_offset) + struct extent_inode_elem **eie) { u64 disk_byte; struct btrfs_key key; @@ -111,7 +108,7 @@ static int find_extent_in_eb(const struct extent_buffer *eb, if (disk_byte != wanted_disk_byte) continue; - ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie, ignore_offset); + ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); if (ret < 0) return ret; } @@ -450,9 +447,9 @@ static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr) static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, struct ulist *parents, struct preftrees *preftrees, struct prelim_ref *ref, - int level, u64 time_seq, const u64 *extent_item_pos, - bool ignore_offset) + int level, u64 time_seq, u64 extent_item_pos) { + const bool ignore_offset = (extent_item_pos == BTRFS_IGNORE_EXTENT_OFFSET); int ret = 0; int slot; struct extent_buffer *eb; @@ -528,10 +525,9 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, count++; else goto next; - if (extent_item_pos) { + if (!ignore_offset) { ret = check_extent_in_eb(&key, eb, fi, - *extent_item_pos, - &eie, ignore_offset); + extent_item_pos, &eie); if (ret < 0) break; } @@ -541,7 +537,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, eie, (void **)&old, GFP_NOFS); if (ret < 0) break; - if (!ret && extent_item_pos) { + if (!ret && !ignore_offset) { while (old->next) old = old->next; old->next = eie; @@ -570,7 +566,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 time_seq, struct preftrees *preftrees, struct prelim_ref *ref, struct ulist *parents, - const u64 *extent_item_pos, bool ignore_offset) + u64 extent_item_pos) { struct btrfs_root *root; struct extent_buffer *eb; @@ -664,7 +660,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, } ret = add_all_parents(root, path, parents, preftrees, ref, level, - time_seq, extent_item_pos, ignore_offset); + time_seq, extent_item_pos); out: btrfs_put_root(root); out_free: @@ -712,8 +708,8 @@ static void free_leaf_list(struct ulist *ulist) static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, struct btrfs_path *path, u64 time_seq, struct preftrees *preftrees, - const u64 *extent_item_pos, - struct share_check *sc, bool ignore_offset) + u64 extent_item_pos, + struct share_check *sc) { int err; int ret = 0; @@ -756,8 +752,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, goto out; } err = resolve_indirect_ref(fs_info, path, time_seq, preftrees, - ref, parents, extent_item_pos, - ignore_offset); + ref, parents, extent_item_pos); /* * we can only tolerate ENOENT,otherwise,we should catch error * and return directly. @@ -1340,18 +1335,21 @@ static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx * * Otherwise this returns 0 for success and <0 for an error. * - * If ignore_offset is set to false, only extent refs whose offsets match - * extent_item_pos are returned. If true, every extent ref is returned - * and extent_item_pos is ignored. + * @extent_item_pos is meaningful only if we are dealing with a data extent. + * If its value is not BTRFS_IGNORE_EXTENT_OFFSET, then only collect references + * from file extent items that refer to a section of the data extent that + * contains @extent_item_pos. If its value is BTRFS_IGNORE_EXTENT_OFFSET then + * collect references for every file extent item that points to the data extent. * * FIXME some caching might speed things up */ static int find_parent_nodes(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 time_seq, struct ulist *refs, - struct ulist *roots, const u64 *extent_item_pos, - struct share_check *sc, bool ignore_offset) + struct ulist *roots, u64 extent_item_pos, + struct share_check *sc) { + const bool ignore_offset = (extent_item_pos == BTRFS_IGNORE_EXTENT_OFFSET); struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr); struct btrfs_key key; struct btrfs_path *path; @@ -1539,7 +1537,7 @@ again: WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root)); ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees, - extent_item_pos, sc, ignore_offset); + extent_item_pos, sc); if (ret) goto out; @@ -1573,8 +1571,7 @@ again: goto out; } if (ref->count && ref->parent) { - if (extent_item_pos && !ref->inode_list && - ref->level == 0) { + if (!ignore_offset && !ref->inode_list && ref->level == 0) { struct extent_buffer *eb; eb = read_tree_block(fs_info, ref->parent, 0, @@ -1592,7 +1589,7 @@ again: if (!path->skip_locking) btrfs_tree_read_lock(eb); ret = find_extent_in_eb(eb, bytenr, - *extent_item_pos, &eie, ignore_offset); + extent_item_pos, &eie); if (!path->skip_locking) btrfs_tree_read_unlock(eb); free_extent_buffer(eb); @@ -1611,7 +1608,7 @@ again: (void **)&eie, GFP_NOFS); if (ret < 0) goto out; - if (!ret && extent_item_pos) { + if (!ret && !ignore_offset) { /* * We've recorded that parent, so we must extend * its inode list here. @@ -1665,7 +1662,7 @@ out: int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 time_seq, struct ulist **leafs, - const u64 *extent_item_pos, bool ignore_offset) + u64 extent_item_pos) { int ret; @@ -1674,7 +1671,7 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, return -ENOMEM; ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, - *leafs, NULL, extent_item_pos, NULL, ignore_offset); + *leafs, NULL, extent_item_pos, NULL); if (ret < 0 && ret != -ENOENT) { free_leaf_list(*leafs); return ret; @@ -1698,8 +1695,7 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, */ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **roots, - bool ignore_offset) + u64 time_seq, struct ulist **roots) { struct ulist *tmp; struct ulist_node *node = NULL; @@ -1718,7 +1714,8 @@ static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, ULIST_ITER_INIT(&uiter); while (1) { ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, - tmp, *roots, NULL, NULL, ignore_offset); + tmp, *roots, BTRFS_IGNORE_EXTENT_OFFSET, + NULL); if (ret < 0 && ret != -ENOENT) { ulist_free(tmp); ulist_free(*roots); @@ -1745,8 +1742,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, if (!trans && !skip_commit_root_sem) down_read(&fs_info->commit_root_sem); - ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, - time_seq, roots, false); + ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, time_seq, roots); if (!trans && !skip_commit_root_sem) up_read(&fs_info->commit_root_sem); return ret; @@ -1845,7 +1841,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, bool cached; ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, &ctx->refs, - NULL, NULL, &shared, false); + NULL, BTRFS_IGNORE_EXTENT_OFFSET, &shared); if (ret == BACKREF_FOUND_SHARED || ret == BACKREF_FOUND_NOT_SHARED) { /* If shared must return 1, otherwise return 0. */ @@ -2286,8 +2282,7 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, int iterate_extent_inodes(struct btrfs_fs_info *fs_info, u64 extent_item_objectid, u64 extent_item_pos, int search_commit_root, - iterate_extent_inodes_t *iterate, void *ctx, - bool ignore_offset) + iterate_extent_inodes_t *iterate, void *ctx) { int ret; struct btrfs_trans_handle *trans = NULL; @@ -2318,16 +2313,14 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info, down_read(&fs_info->commit_root_sem); ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, - seq_elem.seq, &refs, - &extent_item_pos, ignore_offset); + seq_elem.seq, &refs, extent_item_pos); if (ret) goto out; ULIST_ITER_INIT(&ref_uiter); while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val, - seq_elem.seq, &roots, - ignore_offset); + seq_elem.seq, &roots); if (ret) break; ULIST_ITER_INIT(&root_uiter); @@ -2395,10 +2388,14 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) return -EINVAL; - extent_item_pos = logical - found_key.objectid; + if (ignore_offset) + extent_item_pos = BTRFS_IGNORE_EXTENT_OFFSET; + else + extent_item_pos = logical - found_key.objectid; + ret = iterate_extent_inodes(fs_info, found_key.objectid, extent_item_pos, search_commit_root, - build_ino_list, ctx, ignore_offset); + build_ino_list, ctx); return ret; } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 8d3598155f3b..2eb99f23cc8f 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -12,6 +12,13 @@ #include "disk-io.h" #include "extent_io.h" +/* + * Pass to backref walking functions to tell them to include references from + * all file extent items that point to the target data extent, regardless if + * they refer to the whole extent or just sections of it (bookend extents). + */ +#define BTRFS_IGNORE_EXTENT_OFFSET ((u64)-1) + struct inode_fs_paths { struct btrfs_path *btrfs_path; struct btrfs_root *fs_root; @@ -92,8 +99,7 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, int iterate_extent_inodes(struct btrfs_fs_info *fs_info, u64 extent_item_objectid, u64 extent_offset, int search_commit_root, - iterate_extent_inodes_t *iterate, void *ctx, - bool ignore_offset); + iterate_extent_inodes_t *iterate, void *ctx); int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, struct btrfs_path *path, void *ctx, @@ -104,7 +110,7 @@ int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 time_seq, struct ulist **leafs, - const u64 *extent_item_pos, bool ignore_offset); + u64 extent_item_pos); int btrfs_find_all_roots(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 bytenr, u64 time_seq, struct ulist **roots, diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index d119986d1599..45690f7b5900 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3417,7 +3417,7 @@ int add_data_references(struct reloc_control *rc, btrfs_release_path(path); ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid, - 0, &leaves, NULL, true); + 0, &leaves, BTRFS_IGNORE_EXTENT_OFFSET); if (ret < 0) return ret; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 3c22573bfe0d..6c7dc89709fc 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -969,7 +969,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) swarn.dev = dev; iterate_extent_inodes(fs_info, found_key.objectid, extent_item_pos, 1, - scrub_print_warning_inode, &swarn, false); + scrub_print_warning_inode, &swarn); } out: diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 49759cd9eecb..6bf06939f891 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1467,7 +1467,7 @@ static int find_extent_clone(struct send_ctx *sctx, extent_item_pos = 0; ret = iterate_extent_inodes(fs_info, found_key.objectid, extent_item_pos, 1, __iterate_backrefs, - &backref_ctx, false); + &backref_ctx); if (ret < 0) goto out; -- cgit v1.2.3 From a2c8d27e5ee810b7149b42b88ddf7298e5b8dfe0 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:47 +0000 Subject: btrfs: use a structure to pass arguments to backref walking functions The public backref walking functions have quite a lot of arguments that are passed down the call stack to find_parent_nodes(), the core function of the backref walking code. The next patches in series will need to add even arguments to these functions that should be passed not only to find_parent_nodes(), but also to other functions used by the later (directly or even lower in the call stack). So create a structure to hold all these arguments and state used by the main backref walking function, find_parent_nodes(), and use it as the argument for the public backref walking functions iterate_extent_inodes(), btrfs_find_all_leafs() and btrfs_find_all_roots(). Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 343 ++++++++++++++++++++++-------------------- fs/btrfs/backref.h | 76 ++++++++-- fs/btrfs/qgroup.c | 38 +++-- fs/btrfs/relocation.c | 19 ++- fs/btrfs/scrub.c | 14 +- fs/btrfs/send.c | 15 +- fs/btrfs/tests/qgroup-tests.c | 50 ++++-- 7 files changed, 328 insertions(+), 227 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 432064ee788e..a1a00a7bdba5 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -444,12 +444,12 @@ static int is_shared_data_backref(struct preftrees *preftrees, u64 bytenr) return 0; } -static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, +static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, + struct btrfs_root *root, struct btrfs_path *path, struct ulist *parents, struct preftrees *preftrees, struct prelim_ref *ref, - int level, u64 time_seq, u64 extent_item_pos) + int level) { - const bool ignore_offset = (extent_item_pos == BTRFS_IGNORE_EXTENT_OFFSET); int ret = 0; int slot; struct extent_buffer *eb; @@ -484,10 +484,10 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, if (path->slots[0] >= btrfs_header_nritems(eb) || is_shared_data_backref(preftrees, eb->start) || ref->root_id != btrfs_header_owner(eb)) { - if (time_seq == BTRFS_SEQ_LAST) + if (ctx->time_seq == BTRFS_SEQ_LAST) ret = btrfs_next_leaf(root, path); else - ret = btrfs_next_old_leaf(root, path, time_seq); + ret = btrfs_next_old_leaf(root, path, ctx->time_seq); } while (!ret && count < ref->count) { @@ -508,10 +508,10 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, if (slot == 0 && (is_shared_data_backref(preftrees, eb->start) || ref->root_id != btrfs_header_owner(eb))) { - if (time_seq == BTRFS_SEQ_LAST) + if (ctx->time_seq == BTRFS_SEQ_LAST) ret = btrfs_next_leaf(root, path); else - ret = btrfs_next_old_leaf(root, path, time_seq); + ret = btrfs_next_old_leaf(root, path, ctx->time_seq); continue; } fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); @@ -525,9 +525,9 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, count++; else goto next; - if (!ignore_offset) { + if (!ctx->ignore_extent_item_pos) { ret = check_extent_in_eb(&key, eb, fi, - extent_item_pos, &eie); + ctx->extent_item_pos, &eie); if (ret < 0) break; } @@ -537,7 +537,7 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, eie, (void **)&old, GFP_NOFS); if (ret < 0) break; - if (!ret && !ignore_offset) { + if (!ret && !ctx->ignore_extent_item_pos) { while (old->next) old = old->next; old->next = eie; @@ -545,10 +545,10 @@ static int add_all_parents(struct btrfs_root *root, struct btrfs_path *path, eie = NULL; } next: - if (time_seq == BTRFS_SEQ_LAST) + if (ctx->time_seq == BTRFS_SEQ_LAST) ret = btrfs_next_item(root, path); else - ret = btrfs_next_old_item(root, path, time_seq); + ret = btrfs_next_old_item(root, path, ctx->time_seq); } if (ret > 0) @@ -562,11 +562,10 @@ next: * resolve an indirect backref in the form (root_id, key, level) * to a logical address */ -static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, u64 time_seq, +static int resolve_indirect_ref(struct btrfs_backref_walk_ctx *ctx, + struct btrfs_path *path, struct preftrees *preftrees, - struct prelim_ref *ref, struct ulist *parents, - u64 extent_item_pos) + struct prelim_ref *ref, struct ulist *parents) { struct btrfs_root *root; struct extent_buffer *eb; @@ -584,9 +583,9 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, * here. */ if (path->search_commit_root) - root = btrfs_get_fs_root_commit_root(fs_info, path, ref->root_id); + root = btrfs_get_fs_root_commit_root(ctx->fs_info, path, ref->root_id); else - root = btrfs_get_fs_root(fs_info, ref->root_id, false); + root = btrfs_get_fs_root(ctx->fs_info, ref->root_id, false); if (IS_ERR(root)) { ret = PTR_ERR(root); goto out_free; @@ -598,17 +597,17 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, goto out; } - if (btrfs_is_testing(fs_info)) { + if (btrfs_is_testing(ctx->fs_info)) { ret = -ENOENT; goto out; } if (path->search_commit_root) root_level = btrfs_header_level(root->commit_root); - else if (time_seq == BTRFS_SEQ_LAST) + else if (ctx->time_seq == BTRFS_SEQ_LAST) root_level = btrfs_header_level(root->node); else - root_level = btrfs_old_root_level(root, time_seq); + root_level = btrfs_old_root_level(root, ctx->time_seq); if (root_level + 1 == level) goto out; @@ -636,12 +635,12 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, search_key.offset >= LLONG_MAX) search_key.offset = 0; path->lowest_level = level; - if (time_seq == BTRFS_SEQ_LAST) + if (ctx->time_seq == BTRFS_SEQ_LAST) ret = btrfs_search_slot(NULL, root, &search_key, path, 0, 0); else - ret = btrfs_search_old_slot(root, &search_key, path, time_seq); + ret = btrfs_search_old_slot(root, &search_key, path, ctx->time_seq); - btrfs_debug(fs_info, + btrfs_debug(ctx->fs_info, "search slot in root %llu (level %d, ref count %d) returned %d for key (%llu %u %llu)", ref->root_id, level, ref->count, ret, ref->key_for_search.objectid, ref->key_for_search.type, @@ -659,8 +658,7 @@ static int resolve_indirect_ref(struct btrfs_fs_info *fs_info, eb = path->nodes[level]; } - ret = add_all_parents(root, path, parents, preftrees, ref, level, - time_seq, extent_item_pos); + ret = add_all_parents(ctx, root, path, parents, preftrees, ref, level); out: btrfs_put_root(root); out_free: @@ -705,10 +703,9 @@ static void free_leaf_list(struct ulist *ulist) * rbtree as they are encountered. The new backrefs are subsequently * resolved as above. */ -static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, - struct btrfs_path *path, u64 time_seq, +static int resolve_indirect_refs(struct btrfs_backref_walk_ctx *ctx, + struct btrfs_path *path, struct preftrees *preftrees, - u64 extent_item_pos, struct share_check *sc) { int err; @@ -751,14 +748,13 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, ret = BACKREF_FOUND_SHARED; goto out; } - err = resolve_indirect_ref(fs_info, path, time_seq, preftrees, - ref, parents, extent_item_pos); + err = resolve_indirect_ref(ctx, path, preftrees, ref, parents); /* * we can only tolerate ENOENT,otherwise,we should catch error * and return directly. */ if (err == -ENOENT) { - prelim_ref_insert(fs_info, &preftrees->direct, ref, + prelim_ref_insert(ctx->fs_info, &preftrees->direct, ref, NULL); continue; } else if (err) { @@ -787,7 +783,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, memcpy(new_ref, ref, sizeof(*ref)); new_ref->parent = node->val; new_ref->inode_list = unode_aux_to_inode_list(node); - prelim_ref_insert(fs_info, &preftrees->direct, + prelim_ref_insert(ctx->fs_info, &preftrees->direct, new_ref, NULL); } @@ -795,7 +791,7 @@ static int resolve_indirect_refs(struct btrfs_fs_info *fs_info, * Now it's a direct ref, put it in the direct tree. We must * do this last because the ref could be merged/freed here. */ - prelim_ref_insert(fs_info, &preftrees->direct, ref, NULL); + prelim_ref_insert(ctx->fs_info, &preftrees->direct, ref, NULL); ulist_reinit(parents); cond_resched(); @@ -1325,32 +1321,18 @@ static void store_backref_shared_cache(struct btrfs_backref_share_check_ctx *ctx * indirect refs to their parent bytenr. * When roots are found, they're added to the roots list * - * If time_seq is set to BTRFS_SEQ_LAST, it will not search delayed_refs, and - * behave much like trans == NULL case, the difference only lies in it will not - * commit root. - * The special case is for qgroup to search roots in commit_transaction(). - * - * @sc - if !NULL, then immediately return BACKREF_FOUND_SHARED when a - * shared extent is detected. + * @ctx: Backref walking context object, must be not NULL. + * @sc: If !NULL, then immediately return BACKREF_FOUND_SHARED when a + * shared extent is detected. * * Otherwise this returns 0 for success and <0 for an error. * - * @extent_item_pos is meaningful only if we are dealing with a data extent. - * If its value is not BTRFS_IGNORE_EXTENT_OFFSET, then only collect references - * from file extent items that refer to a section of the data extent that - * contains @extent_item_pos. If its value is BTRFS_IGNORE_EXTENT_OFFSET then - * collect references for every file extent item that points to the data extent. - * * FIXME some caching might speed things up */ -static int find_parent_nodes(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist *refs, - struct ulist *roots, u64 extent_item_pos, +static int find_parent_nodes(struct btrfs_backref_walk_ctx *ctx, struct share_check *sc) { - const bool ignore_offset = (extent_item_pos == BTRFS_IGNORE_EXTENT_OFFSET); - struct btrfs_root *root = btrfs_extent_root(fs_info, bytenr); + struct btrfs_root *root = btrfs_extent_root(ctx->fs_info, ctx->bytenr); struct btrfs_key key; struct btrfs_path *path; struct btrfs_delayed_ref_root *delayed_refs = NULL; @@ -1368,11 +1350,11 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, /* Roots ulist is not needed when using a sharedness check context. */ if (sc) - ASSERT(roots == NULL); + ASSERT(ctx->roots == NULL); - key.objectid = bytenr; + key.objectid = ctx->bytenr; key.offset = (u64)-1; - if (btrfs_fs_incompat(fs_info, SKINNY_METADATA)) + if (btrfs_fs_incompat(ctx->fs_info, SKINNY_METADATA)) key.type = BTRFS_METADATA_ITEM_KEY; else key.type = BTRFS_EXTENT_ITEM_KEY; @@ -1380,12 +1362,12 @@ static int find_parent_nodes(struct btrfs_trans_handle *trans, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - if (!trans) { + if (!ctx->trans) { path->search_commit_root = 1; path->skip_locking = 1; } - if (time_seq == BTRFS_SEQ_LAST) + if (ctx->time_seq == BTRFS_SEQ_LAST) path->skip_locking = 1; again: @@ -1401,17 +1383,17 @@ again: goto out; } - if (trans && likely(trans->type != __TRANS_DUMMY) && - time_seq != BTRFS_SEQ_LAST) { + if (ctx->trans && likely(ctx->trans->type != __TRANS_DUMMY) && + ctx->time_seq != BTRFS_SEQ_LAST) { /* * We have a specific time_seq we care about and trans which * means we have the path lock, we need to grab the ref head and * lock it so we have a consistent view of the refs at the given * time. */ - delayed_refs = &trans->transaction->delayed_refs; + delayed_refs = &ctx->trans->transaction->delayed_refs; spin_lock(&delayed_refs->lock); - head = btrfs_find_delayed_ref_head(delayed_refs, bytenr); + head = btrfs_find_delayed_ref_head(delayed_refs, ctx->bytenr); if (head) { if (!mutex_trylock(&head->mutex)) { refcount_inc(&head->refs); @@ -1429,7 +1411,7 @@ again: goto again; } spin_unlock(&delayed_refs->lock); - ret = add_delayed_refs(fs_info, head, time_seq, + ret = add_delayed_refs(ctx->fs_info, head, ctx->time_seq, &preftrees, sc); mutex_unlock(&head->mutex); if (ret) @@ -1447,14 +1429,14 @@ again: leaf = path->nodes[0]; slot = path->slots[0]; btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid == bytenr && + if (key.objectid == ctx->bytenr && (key.type == BTRFS_EXTENT_ITEM_KEY || key.type == BTRFS_METADATA_ITEM_KEY)) { - ret = add_inline_refs(fs_info, path, bytenr, + ret = add_inline_refs(ctx->fs_info, path, ctx->bytenr, &info_level, &preftrees, sc); if (ret) goto out; - ret = add_keyed_refs(root, path, bytenr, info_level, + ret = add_keyed_refs(root, path, ctx->bytenr, info_level, &preftrees, sc); if (ret) goto out; @@ -1483,7 +1465,7 @@ again: * extent item pointing to the data extent) is shared, that is, if any * of the extent buffers in the path is referenced by other trees. */ - if (sc && bytenr == sc->data_bytenr) { + if (sc && ctx->bytenr == sc->data_bytenr) { /* * If our data extent is from a generation more recent than the * last generation used to snapshot the root, then we know that @@ -1530,14 +1512,13 @@ again: btrfs_release_path(path); - ret = add_missing_keys(fs_info, &preftrees, path->skip_locking == 0); + ret = add_missing_keys(ctx->fs_info, &preftrees, path->skip_locking == 0); if (ret) goto out; WARN_ON(!RB_EMPTY_ROOT(&preftrees.indirect_missing_keys.root.rb_root)); - ret = resolve_indirect_refs(fs_info, path, time_seq, &preftrees, - extent_item_pos, sc); + ret = resolve_indirect_refs(ctx, path, &preftrees, sc); if (ret) goto out; @@ -1564,17 +1545,18 @@ again: * e.g. different offsets would not be merged, * and would retain their original ref->count < 0. */ - if (roots && ref->count && ref->root_id && ref->parent == 0) { + if (ctx->roots && ref->count && ref->root_id && ref->parent == 0) { /* no parent == root of tree */ - ret = ulist_add(roots, ref->root_id, 0, GFP_NOFS); + ret = ulist_add(ctx->roots, ref->root_id, 0, GFP_NOFS); if (ret < 0) goto out; } if (ref->count && ref->parent) { - if (!ignore_offset && !ref->inode_list && ref->level == 0) { + if (!ctx->ignore_extent_item_pos && !ref->inode_list && + ref->level == 0) { struct extent_buffer *eb; - eb = read_tree_block(fs_info, ref->parent, 0, + eb = read_tree_block(ctx->fs_info, ref->parent, 0, 0, ref->level, NULL); if (IS_ERR(eb)) { ret = PTR_ERR(eb); @@ -1588,8 +1570,8 @@ again: if (!path->skip_locking) btrfs_tree_read_lock(eb); - ret = find_extent_in_eb(eb, bytenr, - extent_item_pos, &eie); + ret = find_extent_in_eb(eb, ctx->bytenr, + ctx->extent_item_pos, &eie); if (!path->skip_locking) btrfs_tree_read_unlock(eb); free_extent_buffer(eb); @@ -1603,12 +1585,12 @@ again: */ eie = NULL; } - ret = ulist_add_merge_ptr(refs, ref->parent, + ret = ulist_add_merge_ptr(ctx->refs, ref->parent, ref->inode_list, (void **)&eie, GFP_NOFS); if (ret < 0) goto out; - if (!ret && !ignore_offset) { + if (!ret && !ctx->ignore_extent_item_pos) { /* * We've recorded that parent, so we must extend * its inode list here. @@ -1652,28 +1634,29 @@ out: } /* - * Finds all leafs with a reference to the specified combination of bytenr and - * offset. key_list_head will point to a list of corresponding keys (caller must - * free each list element). The leafs will be stored in the leafs ulist, which - * must be freed with ulist_free. + * Finds all leaves with a reference to the specified combination of + * @ctx->bytenr and @ctx->extent_item_pos. The bytenr of the found leaves are + * added to the ulist at @ctx->refs, and that ulist is allocated by this + * function. The caller should free the ulist with free_leaf_list() if + * @ctx->ignore_extent_item_pos is false, otherwise a fimple ulist_free() is + * enough. * - * returns 0 on success, <0 on error + * Returns 0 on success and < 0 on error. On error @ctx->refs is not allocated. */ -int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **leafs, - u64 extent_item_pos) +int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx) { int ret; - *leafs = ulist_alloc(GFP_NOFS); - if (!*leafs) + ASSERT(ctx->refs == NULL); + + ctx->refs = ulist_alloc(GFP_NOFS); + if (!ctx->refs) return -ENOMEM; - ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, - *leafs, NULL, extent_item_pos, NULL); + ret = find_parent_nodes(ctx, NULL); if (ret < 0 && ret != -ENOENT) { - free_leaf_list(*leafs); + free_leaf_list(ctx->refs); + ctx->refs = NULL; return ret; } @@ -1681,7 +1664,7 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, } /* - * walk all backrefs for a given extent to find all roots that reference this + * Walk all backrefs for a given extent to find all roots that reference this * extent. Walking a backref means finding all extents that reference this * extent and in turn walk the backrefs of those, too. Naturally this is a * recursive process, but here it is implemented in an iterative fashion: We @@ -1689,62 +1672,74 @@ int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, * list. In turn, we find all referencing extents for those, further appending * to the list. The way we iterate the list allows adding more elements after * the current while iterating. The process stops when we reach the end of the - * list. Found roots are added to the roots list. + * list. + * + * Found roots are added to @ctx->roots, which is allocated by this function and + * @ctx->roots should be NULL when calling this function. This function also + * requires @ctx->refs to be NULL, as it uses it for allocating a ulist to do + * temporary work, and frees it before returning. * - * returns 0 on success, < 0 on error. + * Returns 0 on success, < 0 on error. On error @ctx->roots is always NULL. */ -static int btrfs_find_all_roots_safe(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **roots) +static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx) { - struct ulist *tmp; - struct ulist_node *node = NULL; + const u64 orig_bytenr = ctx->bytenr; + const bool orig_ignore_extent_item_pos = ctx->ignore_extent_item_pos; struct ulist_iterator uiter; - int ret; + int ret = 0; - tmp = ulist_alloc(GFP_NOFS); - if (!tmp) + ASSERT(ctx->refs == NULL); + ASSERT(ctx->roots == NULL); + + ctx->refs = ulist_alloc(GFP_NOFS); + if (!ctx->refs) return -ENOMEM; - *roots = ulist_alloc(GFP_NOFS); - if (!*roots) { - ulist_free(tmp); + + ctx->roots = ulist_alloc(GFP_NOFS); + if (!ctx->roots) { + ulist_free(ctx->refs); + ctx->refs = NULL; return -ENOMEM; } + ctx->ignore_extent_item_pos = true; + ULIST_ITER_INIT(&uiter); while (1) { - ret = find_parent_nodes(trans, fs_info, bytenr, time_seq, - tmp, *roots, BTRFS_IGNORE_EXTENT_OFFSET, - NULL); + struct ulist_node *node; + + ret = find_parent_nodes(ctx, NULL); if (ret < 0 && ret != -ENOENT) { - ulist_free(tmp); - ulist_free(*roots); - *roots = NULL; - return ret; + ulist_free(ctx->roots); + ctx->roots = NULL; + break; } - node = ulist_next(tmp, &uiter); + ret = 0; + node = ulist_next(ctx->refs, &uiter); if (!node) break; - bytenr = node->val; + ctx->bytenr = node->val; cond_resched(); } - ulist_free(tmp); - return 0; + ulist_free(ctx->refs); + ctx->refs = NULL; + ctx->bytenr = orig_bytenr; + ctx->ignore_extent_item_pos = orig_ignore_extent_item_pos; + + return ret; } -int btrfs_find_all_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **roots, +int btrfs_find_all_roots(struct btrfs_backref_walk_ctx *ctx, bool skip_commit_root_sem) { int ret; - if (!trans && !skip_commit_root_sem) - down_read(&fs_info->commit_root_sem); - ret = btrfs_find_all_roots_safe(trans, fs_info, bytenr, time_seq, roots); - if (!trans && !skip_commit_root_sem) - up_read(&fs_info->commit_root_sem); + if (!ctx->trans && !skip_commit_root_sem) + down_read(&ctx->fs_info->commit_root_sem); + ret = btrfs_find_all_roots_safe(ctx); + if (!ctx->trans && !skip_commit_root_sem) + up_read(&ctx->fs_info->commit_root_sem); return ret; } @@ -1794,6 +1789,7 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, u64 extent_gen, struct btrfs_backref_share_check_ctx *ctx) { + struct btrfs_backref_walk_ctx walk_ctx = { 0 }; struct btrfs_root *root = inode->root; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_trans_handle *trans; @@ -1830,8 +1826,14 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, down_read(&fs_info->commit_root_sem); } else { btrfs_get_tree_mod_seq(fs_info, &elem); + walk_ctx.time_seq = elem.seq; } + walk_ctx.ignore_extent_item_pos = true; + walk_ctx.trans = trans; + walk_ctx.fs_info = fs_info; + walk_ctx.refs = &ctx->refs; + /* -1 means we are in the bytenr of the data extent. */ level = -1; ULIST_ITER_INIT(&uiter); @@ -1840,8 +1842,8 @@ int btrfs_is_data_extent_shared(struct btrfs_inode *inode, u64 bytenr, bool is_shared; bool cached; - ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, &ctx->refs, - NULL, BTRFS_IGNORE_EXTENT_OFFSET, &shared); + walk_ctx.bytenr = bytenr; + ret = find_parent_nodes(&walk_ctx, &shared); if (ret == BACKREF_FOUND_SHARED || ret == BACKREF_FOUND_NOT_SHARED) { /* If shared must return 1, otherwise return 0. */ @@ -2279,73 +2281,81 @@ static int iterate_leaf_refs(struct btrfs_fs_info *fs_info, * the given parameters. * when the iterator function returns a non-zero value, iteration stops. */ -int iterate_extent_inodes(struct btrfs_fs_info *fs_info, - u64 extent_item_objectid, u64 extent_item_pos, - int search_commit_root, - iterate_extent_inodes_t *iterate, void *ctx) +int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx, + bool search_commit_root, + iterate_extent_inodes_t *iterate, void *user_ctx) { int ret; - struct btrfs_trans_handle *trans = NULL; - struct ulist *refs = NULL; - struct ulist *roots = NULL; - struct ulist_node *ref_node = NULL; - struct ulist_node *root_node = NULL; + struct ulist *refs; + struct ulist_node *ref_node; struct btrfs_seq_list seq_elem = BTRFS_SEQ_LIST_INIT(seq_elem); struct ulist_iterator ref_uiter; - struct ulist_iterator root_uiter; - btrfs_debug(fs_info, "resolving all inodes for extent %llu", - extent_item_objectid); + btrfs_debug(ctx->fs_info, "resolving all inodes for extent %llu", + ctx->bytenr); + + ASSERT(ctx->trans == NULL); if (!search_commit_root) { - trans = btrfs_attach_transaction(fs_info->tree_root); + struct btrfs_trans_handle *trans; + + trans = btrfs_attach_transaction(ctx->fs_info->tree_root); if (IS_ERR(trans)) { if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) return PTR_ERR(trans); trans = NULL; } + ctx->trans = trans; } - if (trans) - btrfs_get_tree_mod_seq(fs_info, &seq_elem); - else - down_read(&fs_info->commit_root_sem); + if (ctx->trans) { + btrfs_get_tree_mod_seq(ctx->fs_info, &seq_elem); + ctx->time_seq = seq_elem.seq; + } else { + down_read(&ctx->fs_info->commit_root_sem); + } - ret = btrfs_find_all_leafs(trans, fs_info, extent_item_objectid, - seq_elem.seq, &refs, extent_item_pos); + ret = btrfs_find_all_leafs(ctx); if (ret) goto out; + refs = ctx->refs; + ctx->refs = NULL; ULIST_ITER_INIT(&ref_uiter); while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { - ret = btrfs_find_all_roots_safe(trans, fs_info, ref_node->val, - seq_elem.seq, &roots); + struct ulist_node *root_node; + struct ulist_iterator root_uiter; + + ctx->bytenr = ref_node->val; + ret = btrfs_find_all_roots_safe(ctx); if (ret) break; + ULIST_ITER_INIT(&root_uiter); - while (!ret && (root_node = ulist_next(roots, &root_uiter))) { - btrfs_debug(fs_info, + while (!ret && (root_node = ulist_next(ctx->roots, &root_uiter))) { + btrfs_debug(ctx->fs_info, "root %llu references leaf %llu, data list %#llx", root_node->val, ref_node->val, ref_node->aux); - ret = iterate_leaf_refs(fs_info, + ret = iterate_leaf_refs(ctx->fs_info, (struct extent_inode_elem *) (uintptr_t)ref_node->aux, - root_node->val, - extent_item_objectid, - iterate, ctx); + root_node->val, ctx->bytenr, + iterate, user_ctx); } - ulist_free(roots); + ulist_free(ctx->roots); + ctx->roots = NULL; } free_leaf_list(refs); out: - if (trans) { - btrfs_put_tree_mod_seq(fs_info, &seq_elem); - btrfs_end_transaction(trans); + if (ctx->trans) { + btrfs_put_tree_mod_seq(ctx->fs_info, &seq_elem); + btrfs_end_transaction(ctx->trans); + ctx->trans = NULL; } else { - up_read(&fs_info->commit_root_sem); + up_read(&ctx->fs_info->commit_root_sem); } return ret; @@ -2375,8 +2385,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, struct btrfs_path *path, void *ctx, bool ignore_offset) { + struct btrfs_backref_walk_ctx walk_ctx = { 0 }; int ret; - u64 extent_item_pos; u64 flags = 0; struct btrfs_key found_key; int search_commit_root = path->search_commit_root; @@ -2388,16 +2398,15 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) return -EINVAL; + walk_ctx.bytenr = found_key.objectid; if (ignore_offset) - extent_item_pos = BTRFS_IGNORE_EXTENT_OFFSET; + walk_ctx.ignore_extent_item_pos = true; else - extent_item_pos = logical - found_key.objectid; + walk_ctx.extent_item_pos = logical - found_key.objectid; + walk_ctx.fs_info = fs_info; - ret = iterate_extent_inodes(fs_info, found_key.objectid, - extent_item_pos, search_commit_root, - build_ino_list, ctx); - - return ret; + return iterate_extent_inodes(&walk_ctx, search_commit_root, + build_ino_list, ctx); } static int inode_to_path(u64 inum, u32 name_len, unsigned long name_off, diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 2eb99f23cc8f..ce700dfcc016 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -13,11 +13,63 @@ #include "extent_io.h" /* - * Pass to backref walking functions to tell them to include references from - * all file extent items that point to the target data extent, regardless if - * they refer to the whole extent or just sections of it (bookend extents). + * Context and arguments for backref walking functions. Some of the fields are + * to be filled by the caller of such functions while other are filled by the + * functions themselves, as described below. */ -#define BTRFS_IGNORE_EXTENT_OFFSET ((u64)-1) +struct btrfs_backref_walk_ctx { + /* + * The address of the extent for which we are doing backref walking. + * Can be either a data extent or a metadata extent. + * + * Must always be set by the top level caller. + */ + u64 bytenr; + /* + * Offset relative to the target extent. This is only used for data + * extents, and it's meaningful because we can have file extent items + * that point only to a section of a data extent ("bookend" extents), + * and we want to filter out any that don't point to a section of the + * data extent containing the given offset. + * + * Must always be set by the top level caller. + */ + u64 extent_item_pos; + /* + * If true and bytenr corresponds to a data extent, then references from + * all file extent items that point to the data extent are considered, + * @extent_item_pos is ignored. + */ + bool ignore_extent_item_pos; + /* A valid transaction handle or NULL. */ + struct btrfs_trans_handle *trans; + /* + * The file system's info object, can not be NULL. + * + * Must always be set by the top level caller. + */ + struct btrfs_fs_info *fs_info; + /* + * Time sequence acquired from btrfs_get_tree_mod_seq(), in case the + * caller joined the tree mod log to get a consistent view of b+trees + * while we do backref walking, or BTRFS_SEQ_LAST. + * When using BTRFS_SEQ_LAST, delayed refs are not checked and it uses + * commit roots when searching b+trees - this is a special case for + * qgroups used during a transaction commit. + */ + u64 time_seq; + /* + * Used to collect the bytenr of metadata extents that point to the + * target extent. + */ + struct ulist *refs; + /* + * List used to collect the IDs of the roots from which the target + * extent is accessible. Can be NULL in case the caller does not care + * about collecting root IDs. + */ + struct ulist *roots; +}; struct inode_fs_paths { struct btrfs_path *btrfs_path; @@ -96,10 +148,9 @@ int tree_backref_for_extent(unsigned long *ptr, struct extent_buffer *eb, struct btrfs_key *key, struct btrfs_extent_item *ei, u32 item_size, u64 *out_root, u8 *out_level); -int iterate_extent_inodes(struct btrfs_fs_info *fs_info, - u64 extent_item_objectid, - u64 extent_offset, int search_commit_root, - iterate_extent_inodes_t *iterate, void *ctx); +int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx, + bool search_commit_root, + iterate_extent_inodes_t *iterate, void *user_ctx); int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, struct btrfs_path *path, void *ctx, @@ -107,13 +158,8 @@ int iterate_inodes_from_logical(u64 logical, struct btrfs_fs_info *fs_info, int paths_from_inode(u64 inum, struct inode_fs_paths *ipath); -int btrfs_find_all_leafs(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **leafs, - u64 extent_item_pos); -int btrfs_find_all_roots(struct btrfs_trans_handle *trans, - struct btrfs_fs_info *fs_info, u64 bytenr, - u64 time_seq, struct ulist **roots, +int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx); +int btrfs_find_all_roots(struct btrfs_backref_walk_ctx *ctx, bool skip_commit_root_sem); char *btrfs_ref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, u32 name_len, unsigned long name_off, diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 75dd964ca46c..24c013c61a94 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1794,8 +1794,7 @@ int btrfs_qgroup_trace_extent_nolock(struct btrfs_fs_info *fs_info, int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, struct btrfs_qgroup_extent_record *qrecord) { - struct ulist *old_root; - u64 bytenr = qrecord->bytenr; + struct btrfs_backref_walk_ctx ctx = { 0 }; int ret; /* @@ -1822,8 +1821,10 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, if (trans->fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING) return 0; - ret = btrfs_find_all_roots(NULL, trans->fs_info, bytenr, 0, &old_root, - true); + ctx.bytenr = qrecord->bytenr; + ctx.fs_info = trans->fs_info; + + ret = btrfs_find_all_roots(&ctx, true); if (ret < 0) { qgroup_mark_inconsistent(trans->fs_info); btrfs_warn(trans->fs_info, @@ -1839,7 +1840,7 @@ int btrfs_qgroup_trace_extent_post(struct btrfs_trans_handle *trans, * * So modifying qrecord->old_roots is safe here */ - qrecord->old_roots = old_root; + qrecord->old_roots = ctx.roots; return 0; } @@ -2750,17 +2751,22 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) if (!ret && !(fs_info->qgroup_flags & BTRFS_QGROUP_RUNTIME_FLAG_NO_ACCOUNTING)) { + struct btrfs_backref_walk_ctx ctx = { 0 }; + + ctx.bytenr = record->bytenr; + ctx.fs_info = fs_info; + /* * Old roots should be searched when inserting qgroup * extent record */ if (WARN_ON(!record->old_roots)) { /* Search commit root to find old_roots */ - ret = btrfs_find_all_roots(NULL, fs_info, - record->bytenr, 0, - &record->old_roots, false); + ret = btrfs_find_all_roots(&ctx, false); if (ret < 0) goto cleanup; + record->old_roots = ctx.roots; + ctx.roots = NULL; } /* Free the reserved data space */ @@ -2773,10 +2779,11 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans) * which doesn't lock tree or delayed_refs and search * current root. It's safe inside commit_transaction(). */ - ret = btrfs_find_all_roots(trans, fs_info, - record->bytenr, BTRFS_SEQ_LAST, &new_roots, false); + ctx.trans = trans; + ret = btrfs_find_all_roots(&ctx, false); if (ret < 0) goto cleanup; + new_roots = ctx.roots; if (qgroup_to_skip) { ulist_del(new_roots, qgroup_to_skip, 0); ulist_del(record->old_roots, qgroup_to_skip, @@ -3242,7 +3249,6 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root; struct btrfs_key found; struct extent_buffer *scratch_leaf = NULL; - struct ulist *roots = NULL; u64 num_bytes; bool done; int slot; @@ -3292,6 +3298,8 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, mutex_unlock(&fs_info->qgroup_rescan_lock); for (; slot < btrfs_header_nritems(scratch_leaf); ++slot) { + struct btrfs_backref_walk_ctx ctx = { 0 }; + btrfs_item_key_to_cpu(scratch_leaf, &found, slot); if (found.type != BTRFS_EXTENT_ITEM_KEY && found.type != BTRFS_METADATA_ITEM_KEY) @@ -3301,13 +3309,15 @@ static int qgroup_rescan_leaf(struct btrfs_trans_handle *trans, else num_bytes = found.offset; - ret = btrfs_find_all_roots(NULL, fs_info, found.objectid, 0, - &roots, false); + ctx.bytenr = found.objectid; + ctx.fs_info = fs_info; + + ret = btrfs_find_all_roots(&ctx, false); if (ret < 0) goto out; /* For rescan, just pass old_roots as NULL */ ret = btrfs_qgroup_account_extent(trans, found.objectid, - num_bytes, NULL, roots); + num_bytes, NULL, ctx.roots); if (ret < 0) goto out; } diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 45690f7b5900..2ecca24e1001 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -3408,24 +3408,27 @@ int add_data_references(struct reloc_control *rc, struct btrfs_path *path, struct rb_root *blocks) { - struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - struct ulist *leaves = NULL; + struct btrfs_backref_walk_ctx ctx = { 0 }; struct ulist_iterator leaf_uiter; struct ulist_node *ref_node = NULL; - const u32 blocksize = fs_info->nodesize; + const u32 blocksize = rc->extent_root->fs_info->nodesize; int ret = 0; btrfs_release_path(path); - ret = btrfs_find_all_leafs(NULL, fs_info, extent_key->objectid, - 0, &leaves, BTRFS_IGNORE_EXTENT_OFFSET); + + ctx.bytenr = extent_key->objectid; + ctx.ignore_extent_item_pos = true; + ctx.fs_info = rc->extent_root->fs_info; + + ret = btrfs_find_all_leafs(&ctx); if (ret < 0) return ret; ULIST_ITER_INIT(&leaf_uiter); - while ((ref_node = ulist_next(leaves, &leaf_uiter))) { + while ((ref_node = ulist_next(ctx.refs, &leaf_uiter))) { struct extent_buffer *eb; - eb = read_tree_block(fs_info, ref_node->val, 0, 0, 0, NULL); + eb = read_tree_block(ctx.fs_info, ref_node->val, 0, 0, 0, NULL); if (IS_ERR(eb)) { ret = PTR_ERR(eb); break; @@ -3441,7 +3444,7 @@ int add_data_references(struct reloc_control *rc, } if (ret < 0) free_block_list(blocks); - ulist_free(leaves); + ulist_free(ctx.refs); return ret; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6c7dc89709fc..288a97992aa4 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -909,7 +909,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) struct btrfs_extent_item *ei; struct scrub_warning swarn; unsigned long ptr = 0; - u64 extent_item_pos; u64 flags = 0; u64 ref_root; u32 item_size; @@ -941,7 +940,6 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) if (ret < 0) goto out; - extent_item_pos = swarn.logical - found_key.objectid; swarn.extent_item_size = found_key.offset; eb = path->nodes[0]; @@ -964,12 +962,18 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock) } while (ret != 1); btrfs_release_path(path); } else { + struct btrfs_backref_walk_ctx ctx = { 0 }; + btrfs_release_path(path); + + ctx.bytenr = found_key.objectid; + ctx.extent_item_pos = swarn.logical - found_key.objectid; + ctx.fs_info = fs_info; + swarn.path = path; swarn.dev = dev; - iterate_extent_inodes(fs_info, found_key.objectid, - extent_item_pos, 1, - scrub_print_warning_inode, &swarn); + + iterate_extent_inodes(&ctx, true, scrub_print_warning_inode, &swarn); } out: diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 6bf06939f891..0c9a9933341e 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1356,12 +1356,12 @@ static int find_extent_clone(struct send_ctx *sctx, u64 logical; u64 disk_byte; u64 num_bytes; - u64 extent_item_pos; u64 extent_refs; u64 flags = 0; struct btrfs_file_extent_item *fi; struct extent_buffer *eb = path->nodes[0]; - struct backref_ctx backref_ctx = {0}; + struct backref_ctx backref_ctx = { 0 }; + struct btrfs_backref_walk_ctx backref_walk_ctx = { 0 }; struct clone_root *cur_clone_root; struct btrfs_key found_key; struct btrfs_path *tmp_path; @@ -1461,14 +1461,13 @@ static int find_extent_clone(struct send_ctx *sctx, /* * Now collect all backrefs. */ + backref_walk_ctx.bytenr = found_key.objectid; if (compressed == BTRFS_COMPRESS_NONE) - extent_item_pos = logical - found_key.objectid; - else - extent_item_pos = 0; - ret = iterate_extent_inodes(fs_info, found_key.objectid, - extent_item_pos, 1, __iterate_backrefs, - &backref_ctx); + backref_walk_ctx.extent_item_pos = logical - found_key.objectid; + backref_walk_ctx.fs_info = fs_info; + ret = iterate_extent_inodes(&backref_walk_ctx, true, __iterate_backrefs, + &backref_ctx); if (ret < 0) goto out; diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index 65b65d55d1f6..3fc8dc3fd980 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -205,6 +205,7 @@ static int remove_extent_ref(struct btrfs_root *root, u64 bytenr, static int test_no_shared_qgroup(struct btrfs_root *root, u32 sectorsize, u32 nodesize) { + struct btrfs_backref_walk_ctx ctx = { 0 }; struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; struct ulist *old_roots = NULL; @@ -220,16 +221,22 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; } + ctx.bytenr = nodesize; + ctx.trans = &trans; + ctx.fs_info = fs_info; + /* * Since the test trans doesn't have the complicated delayed refs, * we can only call btrfs_qgroup_account_extent() directly to test * quota. */ - ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); + ret = btrfs_find_all_roots(&ctx, false); if (ret) { test_err("couldn't find old roots: %d", ret); return ret; } + old_roots = ctx.roots; + ctx.roots = NULL; ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, BTRFS_FS_TREE_OBJECTID); @@ -238,12 +245,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return ret; } - ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); + ret = btrfs_find_all_roots(&ctx, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } + new_roots = ctx.roots; + ctx.roots = NULL; ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, new_roots); @@ -262,11 +271,13 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); + ret = btrfs_find_all_roots(&ctx, false); if (ret) { test_err("couldn't find old roots: %d", ret); return ret; } + old_roots = ctx.roots; + ctx.roots = NULL; ret = remove_extent_item(root, nodesize, nodesize); if (ret) { @@ -274,12 +285,14 @@ static int test_no_shared_qgroup(struct btrfs_root *root, return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); + ret = btrfs_find_all_roots(&ctx, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } + new_roots = ctx.roots; + ctx.roots = NULL; ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, new_roots); @@ -304,6 +317,7 @@ static int test_no_shared_qgroup(struct btrfs_root *root, static int test_multiple_refs(struct btrfs_root *root, u32 sectorsize, u32 nodesize) { + struct btrfs_backref_walk_ctx ctx = { 0 }; struct btrfs_trans_handle trans; struct btrfs_fs_info *fs_info = root->fs_info; struct ulist *old_roots = NULL; @@ -324,11 +338,17 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); + ctx.bytenr = nodesize; + ctx.trans = &trans; + ctx.fs_info = fs_info; + + ret = btrfs_find_all_roots(&ctx, false); if (ret) { test_err("couldn't find old roots: %d", ret); return ret; } + old_roots = ctx.roots; + ctx.roots = NULL; ret = insert_normal_tree_ref(root, nodesize, nodesize, 0, BTRFS_FS_TREE_OBJECTID); @@ -337,12 +357,14 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); + ret = btrfs_find_all_roots(&ctx, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } + new_roots = ctx.roots; + ctx.roots = NULL; ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, new_roots); @@ -357,11 +379,13 @@ static int test_multiple_refs(struct btrfs_root *root, return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); + ret = btrfs_find_all_roots(&ctx, false); if (ret) { test_err("couldn't find old roots: %d", ret); return ret; } + old_roots = ctx.roots; + ctx.roots = NULL; ret = add_tree_ref(root, nodesize, nodesize, 0, BTRFS_FIRST_FREE_OBJECTID); @@ -370,12 +394,14 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); + ret = btrfs_find_all_roots(&ctx, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } + new_roots = ctx.roots; + ctx.roots = NULL; ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, new_roots); @@ -396,11 +422,13 @@ static int test_multiple_refs(struct btrfs_root *root, return -EINVAL; } - ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &old_roots, false); + ret = btrfs_find_all_roots(&ctx, false); if (ret) { test_err("couldn't find old roots: %d", ret); return ret; } + old_roots = ctx.roots; + ctx.roots = NULL; ret = remove_extent_ref(root, nodesize, nodesize, 0, BTRFS_FIRST_FREE_OBJECTID); @@ -409,12 +437,14 @@ static int test_multiple_refs(struct btrfs_root *root, return ret; } - ret = btrfs_find_all_roots(&trans, fs_info, nodesize, 0, &new_roots, false); + ret = btrfs_find_all_roots(&ctx, false); if (ret) { ulist_free(old_roots); test_err("couldn't find old roots: %d", ret); return ret; } + new_roots = ctx.roots; + ctx.roots = NULL; ret = btrfs_qgroup_account_extent(&trans, nodesize, nodesize, old_roots, new_roots); -- cgit v1.2.3 From 1baea6f18abf34037169d3b0c585356abb395376 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:48 +0000 Subject: btrfs: reuse roots ulist on each leaf iteration for iterate_extent_inodes() At iterate_extent_inodes() we collect a ulist of leaves for a given extent with a call to btrfs_find_all_leafs() and then we enter a loop where we iterate over all the collected leaves. Each iteration of that loop does a call to btrfs_find_all_roots_safe(), to determine all roots from which a leaf is accessible, and that results in allocating and releasing a ulist to store the root IDs. Instead of allocating and releasing the roots ulist on every iteration, allocate a ulist before entering the loop and keep using it on each iteration, reinitializing the ulist at the end of each iteration. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index a1a00a7bdba5..dc276ce3afe0 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1674,32 +1674,36 @@ int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx) * the current while iterating. The process stops when we reach the end of the * list. * - * Found roots are added to @ctx->roots, which is allocated by this function and - * @ctx->roots should be NULL when calling this function. This function also - * requires @ctx->refs to be NULL, as it uses it for allocating a ulist to do - * temporary work, and frees it before returning. + * Found roots are added to @ctx->roots, which is allocated by this function if + * it points to NULL, in which case the caller is responsible for freeing it + * after it's not needed anymore. + * This function requires @ctx->refs to be NULL, as it uses it for allocating a + * ulist to do temporary work, and frees it before returning. * - * Returns 0 on success, < 0 on error. On error @ctx->roots is always NULL. + * Returns 0 on success, < 0 on error. */ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx) { const u64 orig_bytenr = ctx->bytenr; const bool orig_ignore_extent_item_pos = ctx->ignore_extent_item_pos; + bool roots_ulist_allocated = false; struct ulist_iterator uiter; int ret = 0; ASSERT(ctx->refs == NULL); - ASSERT(ctx->roots == NULL); ctx->refs = ulist_alloc(GFP_NOFS); if (!ctx->refs) return -ENOMEM; - ctx->roots = ulist_alloc(GFP_NOFS); if (!ctx->roots) { - ulist_free(ctx->refs); - ctx->refs = NULL; - return -ENOMEM; + ctx->roots = ulist_alloc(GFP_NOFS); + if (!ctx->roots) { + ulist_free(ctx->refs); + ctx->refs = NULL; + return -ENOMEM; + } + roots_ulist_allocated = true; } ctx->ignore_extent_item_pos = true; @@ -1710,8 +1714,10 @@ static int btrfs_find_all_roots_safe(struct btrfs_backref_walk_ctx *ctx) ret = find_parent_nodes(ctx, NULL); if (ret < 0 && ret != -ENOENT) { - ulist_free(ctx->roots); - ctx->roots = NULL; + if (roots_ulist_allocated) { + ulist_free(ctx->roots); + ctx->roots = NULL; + } break; } ret = 0; @@ -2295,6 +2301,11 @@ int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx, ctx->bytenr); ASSERT(ctx->trans == NULL); + ASSERT(ctx->roots == NULL); + + ctx->roots = ulist_alloc(GFP_NOFS); + if (!ctx->roots) + return -ENOMEM; if (!search_commit_root) { struct btrfs_trans_handle *trans; @@ -2302,8 +2313,11 @@ int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx, trans = btrfs_attach_transaction(ctx->fs_info->tree_root); if (IS_ERR(trans)) { if (PTR_ERR(trans) != -ENOENT && - PTR_ERR(trans) != -EROFS) + PTR_ERR(trans) != -EROFS) { + ulist_free(ctx->roots); + ctx->roots = NULL; return PTR_ERR(trans); + } trans = NULL; } ctx->trans = trans; @@ -2344,8 +2358,7 @@ int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx, root_node->val, ctx->bytenr, iterate, user_ctx); } - ulist_free(ctx->roots); - ctx->roots = NULL; + ulist_reinit(ctx->roots); } free_leaf_list(refs); @@ -2358,6 +2371,9 @@ out: up_read(&ctx->fs_info->commit_root_sem); } + ulist_free(ctx->roots); + ctx->roots = NULL; + return ret; } -- cgit v1.2.3 From 66d04209e5a8d3fc47900de6bd0c319790a52b3e Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:50 +0000 Subject: btrfs: send: cache leaf to roots mapping during backref walking During a send operation, when doing backref walking to determine which inodes/offsets/roots we can clone from, the most repetitive and expensive step is to map each leaf that has file extent items pointing to the target data extent to the IDs of the roots from which the leaves are accessible, which happens at iterate_extent_inodes(). That step requires finding every parent node of a leaf, then the parent of each parent, and so on until we reach a root node. So it's a naturally expensive operation, and repetitive because each leaf can have hundreds of file extent items (for a nodesize of 16K, that can be slightly over 200 file extent items). There's also temporal locality, as we process all file extent items from a leave before moving the next leaf. This change caches the mapping of leaves to root IDs, to avoid repeating those computations over and over again. The cache is limited to a maximum of 128 entries, with each entry being a struct with a size of 128 bytes, so the maximum cache size is 16K plus any nodes internally allocated by the maple tree that is used to index pointers to those structs. The cache is invalidated whenever we detect relocation happened since we started filling the cache, because if relocation happened then extent buffers for leaves and nodes of the trees used by a send operation may have been reallocated. This cache also allows for another important optimization that is introduced in the next patch in the series. This change is part of a patchset comprised of the following patches: 01/17 btrfs: fix inode list leak during backref walking at resolve_indirect_refs() 02/17 btrfs: fix inode list leak during backref walking at find_parent_nodes() 03/17 btrfs: fix ulist leaks in error paths of qgroup self tests 04/17 btrfs: remove pointless and double ulist frees in error paths of qgroup tests 05/17 btrfs: send: avoid unnecessary path allocations when finding extent clone 06/17 btrfs: send: update comment at find_extent_clone() 07/17 btrfs: send: drop unnecessary backref context field initializations 08/17 btrfs: send: avoid unnecessary backref lookups when finding clone source 09/17 btrfs: send: optimize clone detection to increase extent sharing 10/17 btrfs: use a single argument for extent offset in backref walking functions 11/17 btrfs: use a structure to pass arguments to backref walking functions 12/17 btrfs: reuse roots ulist on each leaf iteration for iterate_extent_inodes() 13/17 btrfs: constify ulist parameter of ulist_next() 14/17 btrfs: send: cache leaf to roots mapping during backref walking 15/17 btrfs: send: skip unnecessary backref iterations 16/17 btrfs: send: avoid double extent tree search when finding clone source 17/17 btrfs: send: skip resolution of our own backref when finding clone source Performance test results are in the changelog of patch 17/17. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 52 +++++++++++---- fs/btrfs/backref.h | 11 ++++ fs/btrfs/send.c | 185 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 236 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index dc276ce3afe0..9dacf487017d 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -2303,21 +2303,14 @@ int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx, ASSERT(ctx->trans == NULL); ASSERT(ctx->roots == NULL); - ctx->roots = ulist_alloc(GFP_NOFS); - if (!ctx->roots) - return -ENOMEM; - if (!search_commit_root) { struct btrfs_trans_handle *trans; trans = btrfs_attach_transaction(ctx->fs_info->tree_root); if (IS_ERR(trans)) { if (PTR_ERR(trans) != -ENOENT && - PTR_ERR(trans) != -EROFS) { - ulist_free(ctx->roots); - ctx->roots = NULL; + PTR_ERR(trans) != -EROFS) return PTR_ERR(trans); - } trans = NULL; } ctx->trans = trans; @@ -2338,23 +2331,58 @@ int iterate_extent_inodes(struct btrfs_backref_walk_ctx *ctx, ULIST_ITER_INIT(&ref_uiter); while (!ret && (ref_node = ulist_next(refs, &ref_uiter))) { + const u64 leaf_bytenr = ref_node->val; struct ulist_node *root_node; struct ulist_iterator root_uiter; + struct extent_inode_elem *inode_list; + + inode_list = (struct extent_inode_elem *)(uintptr_t)ref_node->aux; + + if (ctx->cache_lookup) { + const u64 *root_ids; + int root_count; + bool cached; + + cached = ctx->cache_lookup(leaf_bytenr, ctx->user_ctx, + &root_ids, &root_count); + if (cached) { + for (int i = 0; i < root_count; i++) { + ret = iterate_leaf_refs(ctx->fs_info, + inode_list, + root_ids[i], + leaf_bytenr, + iterate, + user_ctx); + if (ret) + break; + } + continue; + } + } + + if (!ctx->roots) { + ctx->roots = ulist_alloc(GFP_NOFS); + if (!ctx->roots) { + ret = -ENOMEM; + break; + } + } - ctx->bytenr = ref_node->val; + ctx->bytenr = leaf_bytenr; ret = btrfs_find_all_roots_safe(ctx); if (ret) break; + if (ctx->cache_store) + ctx->cache_store(leaf_bytenr, ctx->roots, ctx->user_ctx); + ULIST_ITER_INIT(&root_uiter); while (!ret && (root_node = ulist_next(ctx->roots, &root_uiter))) { btrfs_debug(ctx->fs_info, "root %llu references leaf %llu, data list %#llx", root_node->val, ref_node->val, ref_node->aux); - ret = iterate_leaf_refs(ctx->fs_info, - (struct extent_inode_elem *) - (uintptr_t)ref_node->aux, + ret = iterate_leaf_refs(ctx->fs_info, inode_list, root_node->val, ctx->bytenr, iterate, user_ctx); } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index ce700dfcc016..5005f579f235 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -69,6 +69,17 @@ struct btrfs_backref_walk_ctx { * about collecting root IDs. */ struct ulist *roots; + /* + * Used by iterate_extent_inodes(). Lookup and store functions for an + * optional cache which maps the logical address (bytenr) of leaves + * to an array of root IDs. + */ + bool (*cache_lookup)(u64 leaf_bytenr, void *user_ctx, + const u64 **root_ids_ret, int *root_count_ret); + void (*cache_store)(u64 leaf_bytenr, const struct ulist *root_ids, + void *user_ctx); + /* Context object to pass to @cache_lookup and @cache_store. */ + void *user_ctx; }; struct inode_fs_paths { diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 0c9a9933341e..fa94bd550564 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -83,6 +83,39 @@ struct clone_root { #define SEND_CTX_MAX_NAME_CACHE_SIZE 128 #define SEND_CTX_NAME_CACHE_CLEAN_SIZE (SEND_CTX_MAX_NAME_CACHE_SIZE * 2) +/* + * Limit the root_ids array of struct backref_cache_entry to 12 elements. + * This makes the size of a cache entry to be exactly 128 bytes on x86_64. + * The most common case is to have a single root for cloning, which corresponds + * to the send root. Having the user specify more than 11 clone roots is not + * common, and in such rare cases we simply don't use caching if the number of + * cloning roots that lead down to a leaf is more than 12. + */ +#define SEND_MAX_BACKREF_CACHE_ROOTS 12 + +/* + * Max number of entries in the cache. + * With SEND_MAX_BACKREF_CACHE_ROOTS as 12, the size in bytes, excluding + * maple tree's internal nodes, is 16K. + */ +#define SEND_MAX_BACKREF_CACHE_SIZE 128 + +/* + * A backref cache entry maps a leaf to a list of IDs of roots from which the + * leaf is accessible and we can use for clone operations. + * With SEND_MAX_BACKREF_CACHE_ROOTS as 12, each cache entry is 128 bytes (on + * x86_64). + */ +struct backref_cache_entry { + /* List to link to the cache's lru list. */ + struct list_head list; + /* The key for this entry in the cache. */ + u64 key; + u64 root_ids[SEND_MAX_BACKREF_CACHE_ROOTS]; + /* Number of valid elements in the root_ids array. */ + int num_roots; +}; + struct send_ctx { struct file *send_filp; loff_t send_off; @@ -251,6 +284,14 @@ struct send_ctx { struct rb_root rbtree_new_refs; struct rb_root rbtree_deleted_refs; + + struct { + u64 last_reloc_trans; + struct list_head lru_list; + struct maple_tree entries; + /* Number of entries stored in the cache. */ + int size; + } backref_cache; }; struct pending_dir_move { @@ -1335,6 +1376,142 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root, return 0; } +static void empty_backref_cache(struct send_ctx *sctx) +{ + struct backref_cache_entry *entry; + struct backref_cache_entry *tmp; + + list_for_each_entry_safe(entry, tmp, &sctx->backref_cache.lru_list, list) + kfree(entry); + + INIT_LIST_HEAD(&sctx->backref_cache.lru_list); + mtree_destroy(&sctx->backref_cache.entries); + sctx->backref_cache.size = 0; +} + +static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx, + const u64 **root_ids_ret, int *root_count_ret) +{ + struct send_ctx *sctx = ctx; + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; + const u64 key = leaf_bytenr >> fs_info->sectorsize_bits; + struct backref_cache_entry *entry; + + if (sctx->backref_cache.size == 0) + return false; + + /* + * If relocation happened since we first filled the cache, then we must + * empty the cache and can not use it, because even though we operate on + * read-only roots, their leaves and nodes may have been reallocated and + * now be used for different nodes/leaves of the same tree or some other + * tree. + * + * We are called from iterate_extent_inodes() while either holding a + * transaction handle or holding fs_info->commit_root_sem, so no need + * to take any lock here. + */ + if (fs_info->last_reloc_trans > sctx->backref_cache.last_reloc_trans) { + empty_backref_cache(sctx); + return false; + } + + entry = mtree_load(&sctx->backref_cache.entries, key); + if (!entry) + return false; + + *root_ids_ret = entry->root_ids; + *root_count_ret = entry->num_roots; + list_move_tail(&entry->list, &sctx->backref_cache.lru_list); + + return true; +} + +static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids, + void *ctx) +{ + struct send_ctx *sctx = ctx; + struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; + struct backref_cache_entry *new_entry; + struct ulist_iterator uiter; + struct ulist_node *node; + int ret; + + /* + * We're called while holding a transaction handle or while holding + * fs_info->commit_root_sem (at iterate_extent_inodes()), so must do a + * NOFS allocation. + */ + new_entry = kmalloc(sizeof(struct backref_cache_entry), GFP_NOFS); + /* No worries, cache is optional. */ + if (!new_entry) + return; + + new_entry->key = leaf_bytenr >> fs_info->sectorsize_bits; + new_entry->num_roots = 0; + ULIST_ITER_INIT(&uiter); + while ((node = ulist_next(root_ids, &uiter)) != NULL) { + const u64 root_id = node->val; + struct clone_root *root; + + root = bsearch((void *)(uintptr_t)root_id, sctx->clone_roots, + sctx->clone_roots_cnt, sizeof(struct clone_root), + __clone_root_cmp_bsearch); + if (!root) + continue; + + /* Too many roots, just exit, no worries as caching is optional. */ + if (new_entry->num_roots >= SEND_MAX_BACKREF_CACHE_ROOTS) { + kfree(new_entry); + return; + } + + new_entry->root_ids[new_entry->num_roots] = root_id; + new_entry->num_roots++; + } + + /* + * We may have not added any roots to the new cache entry, which means + * none of the roots is part of the list of roots from which we are + * allowed to clone. Cache the new entry as it's still useful to avoid + * backref walking to determine which roots have a path to the leaf. + */ + + if (sctx->backref_cache.size >= SEND_MAX_BACKREF_CACHE_SIZE) { + struct backref_cache_entry *lru_entry; + struct backref_cache_entry *mt_entry; + + lru_entry = list_first_entry(&sctx->backref_cache.lru_list, + struct backref_cache_entry, list); + mt_entry = mtree_erase(&sctx->backref_cache.entries, lru_entry->key); + ASSERT(mt_entry == lru_entry); + list_del(&mt_entry->list); + kfree(mt_entry); + sctx->backref_cache.size--; + } + + ret = mtree_insert(&sctx->backref_cache.entries, new_entry->key, + new_entry, GFP_NOFS); + ASSERT(ret == 0 || ret == -ENOMEM); + if (ret) { + /* Caching is optional, no worries. */ + kfree(new_entry); + return; + } + + list_add_tail(&new_entry->list, &sctx->backref_cache.lru_list); + + /* + * We are called from iterate_extent_inodes() while either holding a + * transaction handle or holding fs_info->commit_root_sem, so no need + * to take any lock here. + */ + if (sctx->backref_cache.size == 0) + sctx->backref_cache.last_reloc_trans = fs_info->last_reloc_trans; + + sctx->backref_cache.size++; +} + /* * Given an inode, offset and extent item, it finds a good clone for a clone * instruction. Returns -ENOENT when none could be found. The function makes @@ -1465,6 +1642,9 @@ static int find_extent_clone(struct send_ctx *sctx, if (compressed == BTRFS_COMPRESS_NONE) backref_walk_ctx.extent_item_pos = logical - found_key.objectid; backref_walk_ctx.fs_info = fs_info; + backref_walk_ctx.cache_lookup = lookup_backref_cache; + backref_walk_ctx.cache_store = store_backref_cache; + backref_walk_ctx.user_ctx = sctx; ret = iterate_extent_inodes(&backref_walk_ctx, true, __iterate_backrefs, &backref_ctx); @@ -7891,6 +8071,9 @@ long btrfs_ioctl_send(struct inode *inode, struct btrfs_ioctl_send_args *arg) INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL); INIT_LIST_HEAD(&sctx->name_cache_list); + INIT_LIST_HEAD(&sctx->backref_cache.lru_list); + mt_init(&sctx->backref_cache.entries); + sctx->flags = arg->flags; if (arg->flags & BTRFS_SEND_FLAG_VERSION) { @@ -8153,6 +8336,8 @@ out: close_current_inode(sctx); + empty_backref_cache(sctx); + kfree(sctx); } -- cgit v1.2.3 From 88ffb665c894b1929b30b09e05506ff359d9fb89 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:51 +0000 Subject: btrfs: send: skip unnecessary backref iterations When looking for a clone source for an extent, we are iterating over all the backreferences for an extent. This is often a waste of time, because once we find a good clone source we could stop immediately instead of continuing backref walking, which is expensive. Basically what happens currently is this: 1) Call iterate_extent_inodes() to iterate over all the backreferences; 2) It calls btrfs_find_all_leafs() which in turn calls the main function to walk over backrefs and collect them - find_parent_nodes(); 3) Then we collect all the references for our target data extent from the extent tree (and delayed refs if any), add them to the rb trees, resolve all the indirect backreferences and search for all the file extent items in fs trees, building a list of inodes for each one of them (struct extent_inode_elem); 4) Then back at iterate_extent_inodes() we find all the roots associated to each found leaf, and call the callback __iterate_backrefs defined at send.c for each inode in the inode list associated to each leaf. Some times one the first backreferences we find in a fs tree is optimal to satisfy the clone operation that send wants to perform, and in that case we could stop immediately and avoid resolving all the remaining indirect backreferences (search fs trees for the respective file extent items, etc). This possibly if when we find a fs tree leaf with a file extent item we are able to know what are all the roots that can lead to the leaf - this is now possible after the previous patch in the series that adds a cache that maps leaves to a list of roots. So we can now shortcircuit backref walking during send, by having the callback we pass to iterate_extent_inodes() to be called when we find a file extent item for an indirect backreference, and have it return a special value when it found a suitable backreference and it does not need to look for more backreferences. This change does that. This change is part of a patchset comprised of the following patches: 01/17 btrfs: fix inode list leak during backref walking at resolve_indirect_refs() 02/17 btrfs: fix inode list leak during backref walking at find_parent_nodes() 03/17 btrfs: fix ulist leaks in error paths of qgroup self tests 04/17 btrfs: remove pointless and double ulist frees in error paths of qgroup tests 05/17 btrfs: send: avoid unnecessary path allocations when finding extent clone 06/17 btrfs: send: update comment at find_extent_clone() 07/17 btrfs: send: drop unnecessary backref context field initializations 08/17 btrfs: send: avoid unnecessary backref lookups when finding clone source 09/17 btrfs: send: optimize clone detection to increase extent sharing 10/17 btrfs: use a single argument for extent offset in backref walking functions 11/17 btrfs: use a structure to pass arguments to backref walking functions 12/17 btrfs: reuse roots ulist on each leaf iteration for iterate_extent_inodes() 13/17 btrfs: constify ulist parameter of ulist_next() 14/17 btrfs: send: cache leaf to roots mapping during backref walking 15/17 btrfs: send: skip unnecessary backref iterations 16/17 btrfs: send: avoid double extent tree search when finding clone source 17/17 btrfs: send: skip resolution of our own backref when finding clone source Performance test results are in the changelog of patch 17/17. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 73 ++++++++++++++++++++++++++++++++---------------- fs/btrfs/backref.h | 44 ++++++++++++++++++++++++----- fs/btrfs/send.c | 81 +++++++++++++++++++++++++++--------------------------- 3 files changed, 128 insertions(+), 70 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 9dacf487017d..bb59ba68d7ee 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -31,15 +31,18 @@ struct extent_inode_elem { struct extent_inode_elem *next; }; -static int check_extent_in_eb(const struct btrfs_key *key, +static int check_extent_in_eb(struct btrfs_backref_walk_ctx *ctx, + const struct btrfs_key *key, const struct extent_buffer *eb, const struct btrfs_file_extent_item *fi, - u64 extent_item_pos, struct extent_inode_elem **eie) { const u64 data_len = btrfs_file_extent_num_bytes(eb, fi); - u64 offset = 0; + u64 offset = key->offset; struct extent_inode_elem *e; + const u64 *root_ids; + int root_count; + bool cached; if (!btrfs_file_extent_compression(eb, fi) && !btrfs_file_extent_encryption(eb, fi) && @@ -48,19 +51,38 @@ static int check_extent_in_eb(const struct btrfs_key *key, data_offset = btrfs_file_extent_offset(eb, fi); - if (extent_item_pos < data_offset || - extent_item_pos >= data_offset + data_len) + if (ctx->extent_item_pos < data_offset || + ctx->extent_item_pos >= data_offset + data_len) return 1; - offset = extent_item_pos - data_offset; + offset += ctx->extent_item_pos - data_offset; + } + + if (!ctx->indirect_ref_iterator || !ctx->cache_lookup) + goto add_inode_elem; + + cached = ctx->cache_lookup(eb->start, ctx->user_ctx, &root_ids, + &root_count); + if (!cached) + goto add_inode_elem; + + for (int i = 0; i < root_count; i++) { + int ret; + + ret = ctx->indirect_ref_iterator(key->objectid, offset, + data_len, root_ids[i], + ctx->user_ctx); + if (ret) + return ret; } +add_inode_elem: e = kmalloc(sizeof(*e), GFP_NOFS); if (!e) return -ENOMEM; e->next = *eie; e->inum = key->objectid; - e->offset = key->offset + offset; + e->offset = offset; e->num_bytes = data_len; *eie = e; @@ -77,8 +99,8 @@ static void free_inode_elem_list(struct extent_inode_elem *eie) } } -static int find_extent_in_eb(const struct extent_buffer *eb, - u64 wanted_disk_byte, u64 extent_item_pos, +static int find_extent_in_eb(struct btrfs_backref_walk_ctx *ctx, + const struct extent_buffer *eb, struct extent_inode_elem **eie) { u64 disk_byte; @@ -105,11 +127,11 @@ static int find_extent_in_eb(const struct extent_buffer *eb, continue; /* don't skip BTRFS_FILE_EXTENT_PREALLOC, we can handle that */ disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); - if (disk_byte != wanted_disk_byte) + if (disk_byte != ctx->bytenr) continue; - ret = check_extent_in_eb(&key, eb, fi, extent_item_pos, eie); - if (ret < 0) + ret = check_extent_in_eb(ctx, &key, eb, fi, eie); + if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP || ret < 0) return ret; } @@ -526,9 +548,9 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, else goto next; if (!ctx->ignore_extent_item_pos) { - ret = check_extent_in_eb(&key, eb, fi, - ctx->extent_item_pos, &eie); - if (ret < 0) + ret = check_extent_in_eb(ctx, &key, eb, fi, &eie); + if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP || + ret < 0) break; } if (ret > 0) @@ -551,10 +573,11 @@ next: ret = btrfs_next_old_item(root, path, ctx->time_seq); } - if (ret > 0) - ret = 0; - else if (ret < 0) + if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP || ret < 0) free_inode_elem_list(eie); + else if (ret > 0) + ret = 0; + return ret; } @@ -1570,12 +1593,12 @@ again: if (!path->skip_locking) btrfs_tree_read_lock(eb); - ret = find_extent_in_eb(eb, ctx->bytenr, - ctx->extent_item_pos, &eie); + ret = find_extent_in_eb(ctx, eb, &eie); if (!path->skip_locking) btrfs_tree_read_unlock(eb); free_extent_buffer(eb); - if (ret < 0) + if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP || + ret < 0) goto out; ref->inode_list = eie; /* @@ -1628,7 +1651,7 @@ out: prelim_release(&preftrees.indirect); prelim_release(&preftrees.indirect_missing_keys); - if (ret < 0) + if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP || ret < 0) free_inode_elem_list(eie); return ret; } @@ -1654,7 +1677,8 @@ int btrfs_find_all_leafs(struct btrfs_backref_walk_ctx *ctx) return -ENOMEM; ret = find_parent_nodes(ctx, NULL); - if (ret < 0 && ret != -ENOENT) { + if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP || + (ret < 0 && ret != -ENOENT)) { free_leaf_list(ctx->refs); ctx->refs = NULL; return ret; @@ -2402,6 +2426,9 @@ out: ulist_free(ctx->roots); ctx->roots = NULL; + if (ret == BTRFS_ITERATE_EXTENT_INODES_STOP) + ret = 0; + return ret; } diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 5005f579f235..a80d1e8be718 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -12,6 +12,25 @@ #include "disk-io.h" #include "extent_io.h" +/* + * Used by implementations of iterate_extent_inodes_t (see definition below) to + * signal that backref iteration can stop immediately and no error happened. + * The value must be non-negative and must not be 0, 1 (which is a common return + * value from things like btrfs_search_slot() and used internally in the backref + * walking code) and different from BACKREF_FOUND_SHARED and + * BACKREF_FOUND_NOT_SHARED + */ +#define BTRFS_ITERATE_EXTENT_INODES_STOP 5 + +/* + * Should return 0 if no errors happened and iteration of backrefs should + * continue. Can return BTRFS_ITERATE_EXTENT_INODES_STOP or any other non-zero + * value to immediately stop iteration and possibly signal an error back to + * the caller. + */ +typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 num_bytes, + u64 root, void *ctx); + /* * Context and arguments for backref walking functions. Some of the fields are * to be filled by the caller of such functions while other are filled by the @@ -70,15 +89,29 @@ struct btrfs_backref_walk_ctx { */ struct ulist *roots; /* - * Used by iterate_extent_inodes(). Lookup and store functions for an - * optional cache which maps the logical address (bytenr) of leaves - * to an array of root IDs. + * Used by iterate_extent_inodes() and the main backref walk code + * (find_parent_nodes()). Lookup and store functions for an optional + * cache which maps the logical address (bytenr) of leaves to an array + * of root IDs. */ bool (*cache_lookup)(u64 leaf_bytenr, void *user_ctx, const u64 **root_ids_ret, int *root_count_ret); void (*cache_store)(u64 leaf_bytenr, const struct ulist *root_ids, void *user_ctx); - /* Context object to pass to @cache_lookup and @cache_store. */ + /* + * If this is not NULL, then the backref walking code will call this + * for each indirect data extent reference as soon as it finds one, + * before collecting all the remaining backrefs and before resolving + * indirect backrefs. This allows for the caller to terminate backref + * walking as soon as it finds one backref that matches some specific + * criteria. The @cache_lookup and @cache_store callbacks should not + * be NULL in order to use this callback. + */ + iterate_extent_inodes_t *indirect_ref_iterator; + /* + * Context object to pass to the @cache_lookup, @cache_store and + * @indirect_ref_iterator callbacks. + */ void *user_ctx; }; @@ -145,9 +178,6 @@ struct btrfs_backref_share_check_ctx { int prev_extents_cache_slot; }; -typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 num_bytes, - u64 root, void *ctx); - struct btrfs_backref_share_check_ctx *btrfs_alloc_backref_share_check_ctx(void); void btrfs_free_backref_share_ctx(struct btrfs_backref_share_check_ctx *ctx); diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index fa94bd550564..f453f6406564 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -77,7 +77,7 @@ struct clone_root { u64 ino; u64 offset; u64 num_bytes; - u64 found_refs; + bool found_ref; }; #define SEND_CTX_MAX_NAME_CACHE_SIZE 128 @@ -1281,9 +1281,6 @@ struct backref_ctx { /* may be truncated in case it's the last extent in a file */ u64 extent_len; - - /* Just to check for bugs in backref resolving */ - int found_itself; }; static int __clone_root_cmp_bsearch(const void *key, const void *elt) @@ -1312,33 +1309,33 @@ static int __clone_root_cmp_sort(const void *e1, const void *e2) /* * Called for every backref that is found for the current extent. - * Results are collected in sctx->clone_roots->ino/offset/found_refs + * Results are collected in sctx->clone_roots->ino/offset. */ -static int __iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root, - void *ctx_) +static int iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root_id, + void *ctx_) { struct backref_ctx *bctx = ctx_; - struct clone_root *found; + struct clone_root *clone_root; /* First check if the root is in the list of accepted clone sources */ - found = bsearch((void *)(uintptr_t)root, bctx->sctx->clone_roots, - bctx->sctx->clone_roots_cnt, - sizeof(struct clone_root), - __clone_root_cmp_bsearch); - if (!found) + clone_root = bsearch((void *)(uintptr_t)root_id, bctx->sctx->clone_roots, + bctx->sctx->clone_roots_cnt, + sizeof(struct clone_root), + __clone_root_cmp_bsearch); + if (!clone_root) return 0; - if (found->root == bctx->sctx->send_root && + /* This is our own reference, bail out as we can't clone from it. */ + if (clone_root->root == bctx->sctx->send_root && ino == bctx->cur_objectid && - offset == bctx->cur_offset) { - bctx->found_itself = 1; - } + offset == bctx->cur_offset) + return 0; /* * Make sure we don't consider clones from send_root that are * behind the current inode/offset. */ - if (found->root == bctx->sctx->send_root) { + if (clone_root->root == bctx->sctx->send_root) { /* * If the source inode was not yet processed we can't issue a * clone operation, as the source extent does not exist yet at @@ -1359,7 +1356,7 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root, } bctx->found++; - found->found_refs++; + clone_root->found_ref = true; /* * If the given backref refers to a file extent item with a larger @@ -1367,10 +1364,17 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 num_bytes, u64 root, * we clone more optimally and end up doing less writes and getting * less exclusive, non-shared extents at the destination. */ - if (num_bytes > found->num_bytes) { - found->ino = ino; - found->offset = offset; - found->num_bytes = num_bytes; + if (num_bytes > clone_root->num_bytes) { + clone_root->ino = ino; + clone_root->offset = offset; + clone_root->num_bytes = num_bytes; + + /* + * Found a perfect candidate, so there's no need to continue + * backref walking. + */ + if (num_bytes >= bctx->extent_len) + return BTRFS_ITERATE_EXTENT_INODES_STOP; } return 0; @@ -1392,7 +1396,8 @@ static void empty_backref_cache(struct send_ctx *sctx) static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx, const u64 **root_ids_ret, int *root_count_ret) { - struct send_ctx *sctx = ctx; + struct backref_ctx *bctx = ctx; + struct send_ctx *sctx = bctx->sctx; struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; const u64 key = leaf_bytenr >> fs_info->sectorsize_bits; struct backref_cache_entry *entry; @@ -1430,7 +1435,8 @@ static bool lookup_backref_cache(u64 leaf_bytenr, void *ctx, static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids, void *ctx) { - struct send_ctx *sctx = ctx; + struct backref_ctx *bctx = ctx; + struct send_ctx *sctx = bctx->sctx; struct btrfs_fs_info *fs_info = sctx->send_root->fs_info; struct backref_cache_entry *new_entry; struct ulist_iterator uiter; @@ -1618,7 +1624,7 @@ static int find_extent_clone(struct send_ctx *sctx, cur_clone_root->ino = (u64)-1; cur_clone_root->offset = 0; cur_clone_root->num_bytes = 0; - cur_clone_root->found_refs = 0; + cur_clone_root->found_ref = false; } backref_ctx.sctx = sctx; @@ -1628,7 +1634,7 @@ static int find_extent_clone(struct send_ctx *sctx, /* * The last extent of a file may be too large due to page alignment. * We need to adjust extent_len in this case so that the checks in - * __iterate_backrefs work. + * iterate_backrefs() work. */ if (data_offset + num_bytes >= ino_size) backref_ctx.extent_len = ino_size - data_offset; @@ -1644,9 +1650,10 @@ static int find_extent_clone(struct send_ctx *sctx, backref_walk_ctx.fs_info = fs_info; backref_walk_ctx.cache_lookup = lookup_backref_cache; backref_walk_ctx.cache_store = store_backref_cache; - backref_walk_ctx.user_ctx = sctx; + backref_walk_ctx.indirect_ref_iterator = iterate_backrefs; + backref_walk_ctx.user_ctx = &backref_ctx; - ret = iterate_extent_inodes(&backref_walk_ctx, true, __iterate_backrefs, + ret = iterate_extent_inodes(&backref_walk_ctx, true, iterate_backrefs, &backref_ctx); if (ret < 0) goto out; @@ -1671,27 +1678,21 @@ static int find_extent_clone(struct send_ctx *sctx, } up_read(&fs_info->commit_root_sem); - if (!backref_ctx.found_itself) { - /* found a bug in backref code? */ - ret = -EIO; - btrfs_err(fs_info, - "did not find backref in send_root. inode=%llu, offset=%llu, disk_byte=%llu found extent=%llu", - ino, data_offset, disk_byte, found_key.objectid); - goto out; - } - btrfs_debug(fs_info, "find_extent_clone: data_offset=%llu, ino=%llu, num_bytes=%llu, logical=%llu", data_offset, ino, num_bytes, logical); - if (!backref_ctx.found) + if (!backref_ctx.found) { btrfs_debug(fs_info, "no clones found"); + ret = -ENOENT; + goto out; + } cur_clone_root = NULL; for (i = 0; i < sctx->clone_roots_cnt; i++) { struct clone_root *clone_root = &sctx->clone_roots[i]; - if (clone_root->found_refs == 0) + if (!clone_root->found_ref) continue; /* -- cgit v1.2.3 From f73853c7168aef0e071c160979af05a148691e61 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:52 +0000 Subject: btrfs: send: avoid double extent tree search when finding clone source At find_extent_clone() we search twice for the extent item corresponding to the data extent that the current file extent items points to: 1) Once with a call to extent_from_logical(); 2) Once again during backref walking, through iterate_extent_inodes() which eventually leads to find_parent_nodes() where we will search again the extent tree for the same extent item. The extent tree can be huge, so doing this one extra search for every extent we want to send adds up and it's expensive. The first call is there since the send code was introduced and it accomplishes two things: 1) Check that the extent is flagged as a data extent in the extent tree. But it can not be anything else, otherwise we wouldn't have a file extent item in the send root pointing to it. This was probably added to catch bugs in the early days where send was yet too young and the interaction with everything else was far from perfect; 2) Check how many direct references there are on the extent, and if there's too many (more than SEND_MAX_EXTENT_REFS), avoid doing the backred walking as it may take too long and slowdown send. So improve on this by having a callback in the backref walking code that is called when it finds the extent item in the extent tree, and have those checks done in the callback. When the callback returns anything different from 0, it stops the backref walking code. This way we do a single search on the extent tree for the extent item of our data extent. Also, before this change we were only checking the number of references on the data extent against SEND_MAX_EXTENT_REFS, but after starting backref walking we will end up resolving backrefs for extent buffers in the path from a leaf having a file extent item pointing to our data extent, up to roots of trees from which the extent buffer is accessible from, due to shared subtrees resulting from snapshoting. We were therefore allowing for the possibility for send taking too long due to some node in the path from the leaf to a root node being shared too many times. After this change we check for reference counts being greater than SEND_MAX_EXTENT_REFS for both data extents and metadata extents. This change is part of a patchset comprised of the following patches: 01/17 btrfs: fix inode list leak during backref walking at resolve_indirect_refs() 02/17 btrfs: fix inode list leak during backref walking at find_parent_nodes() 03/17 btrfs: fix ulist leaks in error paths of qgroup self tests 04/17 btrfs: remove pointless and double ulist frees in error paths of qgroup tests 05/17 btrfs: send: avoid unnecessary path allocations when finding extent clone 06/17 btrfs: send: update comment at find_extent_clone() 07/17 btrfs: send: drop unnecessary backref context field initializations 08/17 btrfs: send: avoid unnecessary backref lookups when finding clone source 09/17 btrfs: send: optimize clone detection to increase extent sharing 10/17 btrfs: use a single argument for extent offset in backref walking functions 11/17 btrfs: use a structure to pass arguments to backref walking functions 12/17 btrfs: reuse roots ulist on each leaf iteration for iterate_extent_inodes() 13/17 btrfs: constify ulist parameter of ulist_next() 14/17 btrfs: send: cache leaf to roots mapping during backref walking 15/17 btrfs: send: skip unnecessary backref iterations 16/17 btrfs: send: avoid double extent tree search when finding clone source 17/17 btrfs: send: skip resolution of our own backref when finding clone source Performance test results are in the changelog of patch 17/17. Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 31 +++++++++------- fs/btrfs/backref.h | 9 +++-- fs/btrfs/send.c | 103 ++++++++++++++++++++++++----------------------------- 3 files changed, 73 insertions(+), 70 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index bb59ba68d7ee..33056c4c0528 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1003,8 +1003,8 @@ static int add_delayed_refs(const struct btrfs_fs_info *fs_info, * * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED. */ -static int add_inline_refs(const struct btrfs_fs_info *fs_info, - struct btrfs_path *path, u64 bytenr, +static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx, + struct btrfs_path *path, int *info_level, struct preftrees *preftrees, struct share_check *sc) { @@ -1029,6 +1029,13 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, BUG_ON(item_size < sizeof(*ei)); ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); + + if (ctx->check_extent_item) { + ret = ctx->check_extent_item(ctx->bytenr, ei, leaf, ctx->user_ctx); + if (ret) + return ret; + } + flags = btrfs_extent_flags(leaf, ei); btrfs_item_key_to_cpu(leaf, &found_key, slot); @@ -1064,9 +1071,9 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, switch (type) { case BTRFS_SHARED_BLOCK_REF_KEY: - ret = add_direct_ref(fs_info, preftrees, + ret = add_direct_ref(ctx->fs_info, preftrees, *info_level + 1, offset, - bytenr, 1, NULL, GFP_NOFS); + ctx->bytenr, 1, NULL, GFP_NOFS); break; case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_shared_data_ref *sdref; @@ -1075,14 +1082,14 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, sdref = (struct btrfs_shared_data_ref *)(iref + 1); count = btrfs_shared_data_ref_count(leaf, sdref); - ret = add_direct_ref(fs_info, preftrees, 0, offset, - bytenr, count, sc, GFP_NOFS); + ret = add_direct_ref(ctx->fs_info, preftrees, 0, offset, + ctx->bytenr, count, sc, GFP_NOFS); break; } case BTRFS_TREE_BLOCK_REF_KEY: - ret = add_indirect_ref(fs_info, preftrees, offset, + ret = add_indirect_ref(ctx->fs_info, preftrees, offset, NULL, *info_level + 1, - bytenr, 1, NULL, GFP_NOFS); + ctx->bytenr, 1, NULL, GFP_NOFS); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -1104,8 +1111,8 @@ static int add_inline_refs(const struct btrfs_fs_info *fs_info, root = btrfs_extent_data_ref_root(leaf, dref); - ret = add_indirect_ref(fs_info, preftrees, root, - &key, 0, bytenr, count, + ret = add_indirect_ref(ctx->fs_info, preftrees, root, + &key, 0, ctx->bytenr, count, sc, GFP_NOFS); break; @@ -1455,8 +1462,8 @@ again: if (key.objectid == ctx->bytenr && (key.type == BTRFS_EXTENT_ITEM_KEY || key.type == BTRFS_METADATA_ITEM_KEY)) { - ret = add_inline_refs(ctx->fs_info, path, ctx->bytenr, - &info_level, &preftrees, sc); + ret = add_inline_refs(ctx, path, &info_level, + &preftrees, sc); if (ret) goto out; ret = add_keyed_refs(root, path, ctx->bytenr, info_level, diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index a80d1e8be718..1bd5a15c7f9e 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -109,9 +109,14 @@ struct btrfs_backref_walk_ctx { */ iterate_extent_inodes_t *indirect_ref_iterator; /* - * Context object to pass to the @cache_lookup, @cache_store and - * @indirect_ref_iterator callbacks. + * If this is not NULL, then the backref walking code will call this for + * each extent item it's meant to process before it actually starts + * processing it. If this returns anything other than 0, then it stops + * the backref walking code immediately. */ + int (*check_extent_item)(u64 bytenr, const struct btrfs_extent_item *ei, + const struct extent_buffer *leaf, void *user_ctx); + /* Context object to pass to the callbacks defined above. */ void *user_ctx; }; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index f453f6406564..516b80637bfb 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1281,6 +1281,9 @@ struct backref_ctx { /* may be truncated in case it's the last extent in a file */ u64 extent_len; + + /* The bytenr the file extent item we are processing refers to. */ + u64 bytenr; }; static int __clone_root_cmp_bsearch(const void *key, const void *elt) @@ -1518,6 +1521,43 @@ static void store_backref_cache(u64 leaf_bytenr, const struct ulist *root_ids, sctx->backref_cache.size++; } +static int check_extent_item(u64 bytenr, const struct btrfs_extent_item *ei, + const struct extent_buffer *leaf, void *ctx) +{ + const u64 refs = btrfs_extent_refs(leaf, ei); + const struct backref_ctx *bctx = ctx; + const struct send_ctx *sctx = bctx->sctx; + + if (bytenr == bctx->bytenr) { + const u64 flags = btrfs_extent_flags(leaf, ei); + + if (WARN_ON(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) + return -EUCLEAN; + + /* + * If we have only one reference and only the send root as a + * clone source - meaning no clone roots were given in the + * struct btrfs_ioctl_send_args passed to the send ioctl - then + * it's our reference and there's no point in doing backref + * walking which is expensive, so exit early. + */ + if (refs == 1 && sctx->clone_roots_cnt == 1) + return -ENOENT; + } + + /* + * Backreference walking (iterate_extent_inodes() below) is currently + * too expensive when an extent has a large number of references, both + * in time spent and used memory. So for now just fallback to write + * operations instead of clone operations when an extent has more than + * a certain amount of references. + */ + if (refs > SEND_MAX_EXTENT_REFS) + return -ENOENT; + + return 0; +} + /* * Given an inode, offset and extent item, it finds a good clone for a clone * instruction. Returns -ENOENT when none could be found. The function makes @@ -1539,16 +1579,11 @@ static int find_extent_clone(struct send_ctx *sctx, u64 logical; u64 disk_byte; u64 num_bytes; - u64 extent_refs; - u64 flags = 0; struct btrfs_file_extent_item *fi; struct extent_buffer *eb = path->nodes[0]; struct backref_ctx backref_ctx = { 0 }; struct btrfs_backref_walk_ctx backref_walk_ctx = { 0 }; struct clone_root *cur_clone_root; - struct btrfs_key found_key; - struct btrfs_path *tmp_path; - struct btrfs_extent_item *ei; int compressed; u32 i; @@ -1574,48 +1609,6 @@ static int find_extent_clone(struct send_ctx *sctx, num_bytes = btrfs_file_extent_num_bytes(eb, fi); logical = disk_byte + btrfs_file_extent_offset(eb, fi); - tmp_path = alloc_path_for_send(); - if (!tmp_path) - return -ENOMEM; - - /* We only use this path under the commit sem */ - tmp_path->need_commit_sem = 0; - - down_read(&fs_info->commit_root_sem); - ret = extent_from_logical(fs_info, disk_byte, tmp_path, - &found_key, &flags); - up_read(&fs_info->commit_root_sem); - - if (ret < 0) - goto out; - if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { - ret = -EIO; - goto out; - } - - ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0], - struct btrfs_extent_item); - extent_refs = btrfs_extent_refs(tmp_path->nodes[0], ei); - /* - * Backreference walking (iterate_extent_inodes() below) is currently - * too expensive when an extent has a large number of references, both - * in time spent and used memory. So for now just fallback to write - * operations instead of clone operations when an extent has more than - * a certain amount of references. - * - * Also, if we have only one reference and only the send root as a clone - * source - meaning no clone roots were given in the struct - * btrfs_ioctl_send_args passed to the send ioctl - then it's our - * reference and there's no point in doing backref walking which is - * expensive, so exit early. - */ - if ((extent_refs == 1 && sctx->clone_roots_cnt == 1) || - extent_refs > SEND_MAX_EXTENT_REFS) { - ret = -ENOENT; - goto out; - } - btrfs_release_path(tmp_path); - /* * Setup the clone roots. */ @@ -1630,6 +1623,7 @@ static int find_extent_clone(struct send_ctx *sctx, backref_ctx.sctx = sctx; backref_ctx.cur_objectid = ino; backref_ctx.cur_offset = data_offset; + backref_ctx.bytenr = disk_byte; /* * The last extent of a file may be too large due to page alignment. @@ -1644,19 +1638,20 @@ static int find_extent_clone(struct send_ctx *sctx, /* * Now collect all backrefs. */ - backref_walk_ctx.bytenr = found_key.objectid; + backref_walk_ctx.bytenr = disk_byte; if (compressed == BTRFS_COMPRESS_NONE) - backref_walk_ctx.extent_item_pos = logical - found_key.objectid; + backref_walk_ctx.extent_item_pos = btrfs_file_extent_offset(eb, fi); backref_walk_ctx.fs_info = fs_info; backref_walk_ctx.cache_lookup = lookup_backref_cache; backref_walk_ctx.cache_store = store_backref_cache; backref_walk_ctx.indirect_ref_iterator = iterate_backrefs; + backref_walk_ctx.check_extent_item = check_extent_item; backref_walk_ctx.user_ctx = &backref_ctx; ret = iterate_extent_inodes(&backref_walk_ctx, true, iterate_backrefs, &backref_ctx); if (ret < 0) - goto out; + return ret; down_read(&fs_info->commit_root_sem); if (fs_info->last_reloc_trans > sctx->last_reloc_trans) { @@ -1673,8 +1668,7 @@ static int find_extent_clone(struct send_ctx *sctx, * was already reallocated after the relocation. */ up_read(&fs_info->commit_root_sem); - ret = -ENOENT; - goto out; + return -ENOENT; } up_read(&fs_info->commit_root_sem); @@ -1684,8 +1678,7 @@ static int find_extent_clone(struct send_ctx *sctx, if (!backref_ctx.found) { btrfs_debug(fs_info, "no clones found"); - ret = -ENOENT; - goto out; + return -ENOENT; } cur_clone_root = NULL; @@ -1720,8 +1713,6 @@ static int find_extent_clone(struct send_ctx *sctx, ret = -ENOENT; } -out: - btrfs_free_path(tmp_path); return ret; } -- cgit v1.2.3 From adf0241868bd46c7be8012ef99cb88c7f47c16ce Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Tue, 1 Nov 2022 16:15:53 +0000 Subject: btrfs: send: skip resolution of our own backref when finding clone source When doing backref walking to determine a source range to clone from, it is worthless to collect and resolve our own data backref, as we can't obviously use it as a clone source and it represents the range we want to clone into. Collecting the backref implies doing the extra work to resolve it, doing the search for a file extent item in a subvolume tree, etc. Skipping the data backref is valid as long as we only have the send root as the single clone root, otherwise the leaf with the file extent item may be accessible from another clone root due to shared subtrees created by snapshots, and therefore we have to collect the backref and resolve it. So add a callback to the backref walking code to guide it to skip data backrefs. This change is part of a patchset comprised of the following patches: 01/17 btrfs: fix inode list leak during backref walking at resolve_indirect_refs() 02/17 btrfs: fix inode list leak during backref walking at find_parent_nodes() 03/17 btrfs: fix ulist leaks in error paths of qgroup self tests 04/17 btrfs: remove pointless and double ulist frees in error paths of qgroup tests 05/17 btrfs: send: avoid unnecessary path allocations when finding extent clone 06/17 btrfs: send: update comment at find_extent_clone() 07/17 btrfs: send: drop unnecessary backref context field initializations 08/17 btrfs: send: avoid unnecessary backref lookups when finding clone source 09/17 btrfs: send: optimize clone detection to increase extent sharing 10/17 btrfs: use a single argument for extent offset in backref walking functions 11/17 btrfs: use a structure to pass arguments to backref walking functions 12/17 btrfs: reuse roots ulist on each leaf iteration for iterate_extent_inodes() 13/17 btrfs: constify ulist parameter of ulist_next() 14/17 btrfs: send: cache leaf to roots mapping during backref walking 15/17 btrfs: send: skip unnecessary backref iterations 16/17 btrfs: send: avoid double extent tree search when finding clone source 17/17 btrfs: send: skip resolution of our own backref when finding clone source The following test was run on non-debug kernel (Debian's default kernel config) before and after applying the patchset: $ cat test-send-many-shared-extents.sh #!/bin/bash DEV=/dev/sdh MNT=/mnt/sdh umount $DEV &> /dev/null mkfs.btrfs -f $DEV mount $DEV $MNT num_files=50000 num_clones_per_file=50 for ((i = 1; i <= $num_files; i++)); do xfs_io -f -c "pwrite 0 64K" $MNT/file_$i > /dev/null echo -ne "\r$i files created..." done echo btrfs subvolume snapshot -r $MNT $MNT/snap1 cloned=0 for ((i = 1; i <= $num_clones_per_file; i++)); do for ((j = 1; j <= $num_files; j++)); do cp --reflink=always $MNT/file_$j $MNT/file_${j}_clone_${i} cloned=$((cloned + 1)) echo -ne "\r$cloned / $((num_files * num_clones_per_file)) clone operations" done done echo btrfs subvolume snapshot -r $MNT $MNT/snap2 # Unmount and mount again to clear all cached metadata (and data). umount $DEV mount $DEV $MNT start=$(date +%s%N) btrfs send $MNT/snap2 > /dev/null end=$(date +%s%N) dur=$(( (end - start) / 1000000000 )) echo -e "\nFull send took $dur seconds" # Unmount and mount again to clear all cached metadata (and data). umount $DEV mount $DEV $MNT start=$(date +%s%N) btrfs send -p $MNT/snap1 $MNT/snap2 > /dev/null end=$(date +%s%N) dur=$(( (end - start) / 1000000000 )) echo -e "\nIncremental send took $dur seconds" umount $MNT Before applying the patchset: (...) Full send took 1108 seconds (...) Incremental send took 1135 seconds After applying the whole patchset: (...) Full send took 268 seconds (-75.8%) (...) Incremental send took 316 seconds (-72.2%) Signed-off-by: Filipe Manana Signed-off-by: David Sterba --- fs/btrfs/backref.c | 35 +++++++++++++++++++++-------------- fs/btrfs/backref.h | 9 +++++++++ fs/btrfs/send.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 14 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 33056c4c0528..430974cf3b96 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -1111,10 +1111,12 @@ static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx, root = btrfs_extent_data_ref_root(leaf, dref); - ret = add_indirect_ref(ctx->fs_info, preftrees, root, - &key, 0, ctx->bytenr, count, - sc, GFP_NOFS); - + if (!ctx->skip_data_ref || + !ctx->skip_data_ref(root, key.objectid, key.offset, + ctx->user_ctx)) + ret = add_indirect_ref(ctx->fs_info, preftrees, + root, &key, 0, ctx->bytenr, + count, sc, GFP_NOFS); break; } default: @@ -1133,8 +1135,9 @@ static int add_inline_refs(struct btrfs_backref_walk_ctx *ctx, * * Returns 0 on success, <0 on error, or BACKREF_FOUND_SHARED. */ -static int add_keyed_refs(struct btrfs_root *extent_root, - struct btrfs_path *path, u64 bytenr, +static int add_keyed_refs(struct btrfs_backref_walk_ctx *ctx, + struct btrfs_root *extent_root, + struct btrfs_path *path, int info_level, struct preftrees *preftrees, struct share_check *sc) { @@ -1157,7 +1160,7 @@ static int add_keyed_refs(struct btrfs_root *extent_root, leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, slot); - if (key.objectid != bytenr) + if (key.objectid != ctx->bytenr) break; if (key.type < BTRFS_TREE_BLOCK_REF_KEY) continue; @@ -1169,7 +1172,7 @@ static int add_keyed_refs(struct btrfs_root *extent_root, /* SHARED DIRECT METADATA backref */ ret = add_direct_ref(fs_info, preftrees, info_level + 1, key.offset, - bytenr, 1, NULL, GFP_NOFS); + ctx->bytenr, 1, NULL, GFP_NOFS); break; case BTRFS_SHARED_DATA_REF_KEY: { /* SHARED DIRECT FULL backref */ @@ -1180,14 +1183,14 @@ static int add_keyed_refs(struct btrfs_root *extent_root, struct btrfs_shared_data_ref); count = btrfs_shared_data_ref_count(leaf, sdref); ret = add_direct_ref(fs_info, preftrees, 0, - key.offset, bytenr, count, + key.offset, ctx->bytenr, count, sc, GFP_NOFS); break; } case BTRFS_TREE_BLOCK_REF_KEY: /* NORMAL INDIRECT METADATA backref */ ret = add_indirect_ref(fs_info, preftrees, key.offset, - NULL, info_level + 1, bytenr, + NULL, info_level + 1, ctx->bytenr, 1, NULL, GFP_NOFS); break; case BTRFS_EXTENT_DATA_REF_KEY: { @@ -1211,9 +1214,13 @@ static int add_keyed_refs(struct btrfs_root *extent_root, } root = btrfs_extent_data_ref_root(leaf, dref); - ret = add_indirect_ref(fs_info, preftrees, root, - &key, 0, bytenr, count, - sc, GFP_NOFS); + + if (!ctx->skip_data_ref || + !ctx->skip_data_ref(root, key.objectid, key.offset, + ctx->user_ctx)) + ret = add_indirect_ref(fs_info, preftrees, root, + &key, 0, ctx->bytenr, + count, sc, GFP_NOFS); break; } default: @@ -1466,7 +1473,7 @@ again: &preftrees, sc); if (ret) goto out; - ret = add_keyed_refs(root, path, ctx->bytenr, info_level, + ret = add_keyed_refs(ctx, root, path, info_level, &preftrees, sc); if (ret) goto out; diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 1bd5a15c7f9e..ef6bbea3f456 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -116,6 +116,15 @@ struct btrfs_backref_walk_ctx { */ int (*check_extent_item)(u64 bytenr, const struct btrfs_extent_item *ei, const struct extent_buffer *leaf, void *user_ctx); + /* + * If this is not NULL, then the backref walking code will call this for + * each extent data ref it finds (BTRFS_EXTENT_DATA_REF_KEY keys) before + * processing that data ref. If this callback return false, then it will + * ignore this data ref and it will never resolve the indirect data ref, + * saving time searching for leaves in a fs tree with file extent items + * matching the data ref. + */ + bool (*skip_data_ref)(u64 root, u64 ino, u64 offset, void *user_ctx); /* Context object to pass to the callbacks defined above. */ void *user_ctx; }; diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 516b80637bfb..383bc8a5cb6c 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -1284,6 +1284,10 @@ struct backref_ctx { /* The bytenr the file extent item we are processing refers to. */ u64 bytenr; + /* The owner (root id) of the data backref for the current extent. */ + u64 backref_owner; + /* The offset of the data backref for the current extent. */ + u64 backref_offset; }; static int __clone_root_cmp_bsearch(const void *key, const void *elt) @@ -1558,6 +1562,18 @@ static int check_extent_item(u64 bytenr, const struct btrfs_extent_item *ei, return 0; } +static bool skip_self_data_ref(u64 root, u64 ino, u64 offset, void *ctx) +{ + const struct backref_ctx *bctx = ctx; + + if (ino == bctx->cur_objectid && + root == bctx->backref_owner && + offset == bctx->backref_offset) + return true; + + return false; +} + /* * Given an inode, offset and extent item, it finds a good clone for a clone * instruction. Returns -ENOENT when none could be found. The function makes @@ -1624,6 +1640,12 @@ static int find_extent_clone(struct send_ctx *sctx, backref_ctx.cur_objectid = ino; backref_ctx.cur_offset = data_offset; backref_ctx.bytenr = disk_byte; + /* + * Use the header owner and not the send root's id, because in case of a + * snapshot we can have shared subtrees. + */ + backref_ctx.backref_owner = btrfs_header_owner(eb); + backref_ctx.backref_offset = data_offset - btrfs_file_extent_offset(eb, fi); /* * The last extent of a file may be too large due to page alignment. @@ -1648,6 +1670,17 @@ static int find_extent_clone(struct send_ctx *sctx, backref_walk_ctx.check_extent_item = check_extent_item; backref_walk_ctx.user_ctx = &backref_ctx; + /* + * If have a single clone root, then it's the send root and we can tell + * the backref walking code to skip our own backref and not resolve it, + * since we can not use it for cloning - the source and destination + * ranges can't overlap and in case the leaf is shared through a subtree + * due to snapshots, we can't use those other roots since they are not + * in the list of clone roots. + */ + if (sctx->clone_roots_cnt == 1) + backref_walk_ctx.skip_data_ref = skip_self_data_ref; + ret = iterate_extent_inodes(&backref_walk_ctx, true, iterate_backrefs, &backref_ctx); if (ret < 0) -- cgit v1.2.3 From 789d6a3a876e32c23fc9633d5b372d02a5188f0e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 14 Sep 2022 13:32:50 +0800 Subject: btrfs: concentrate all tree block parentness check parameters into one structure There are several different tree block parentness check parameters used across several helpers: - level Mandatory - transid Under most cases it's mandatory, but there are several backref cases which skips this check. - owner_root - first_key Utilized by most top-down tree search routine. Otherwise can be skipped. Those four members are not always mandatory checks, and some of them are the same u64, which means if some arguments got swapped compiler will not catch it. Furthermore if we're going to further expand the parentness check, we need to modify quite some helpers just to add one more parameter. This patch will concentrate all these members into a structure called btrfs_tree_parent_check, and pass that structure for the following helpers: - btrfs_read_extent_buffer() - read_tree_block() Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/backref.c | 15 ++++++++---- fs/btrfs/ctree.c | 28 +++++++++++++--------- fs/btrfs/disk-io.c | 63 ++++++++++++++++++++++++++++--------------------- fs/btrfs/disk-io.h | 36 ++++++++++++++++++++++++---- fs/btrfs/extent-tree.c | 12 ++++++---- fs/btrfs/print-tree.c | 14 ++++++----- fs/btrfs/qgroup.c | 18 ++++++++++---- fs/btrfs/relocation.c | 11 ++++++--- fs/btrfs/tree-log.c | 25 ++++++++++++++------ fs/btrfs/tree-mod-log.c | 9 +++++-- 10 files changed, 159 insertions(+), 72 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 430974cf3b96..55c072ba6747 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -840,6 +840,8 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info, struct rb_node *node; while ((node = rb_first_cached(&tree->root))) { + struct btrfs_tree_parent_check check = { 0 }; + ref = rb_entry(node, struct prelim_ref, rbnode); rb_erase_cached(node, &tree->root); @@ -847,8 +849,10 @@ static int add_missing_keys(struct btrfs_fs_info *fs_info, BUG_ON(ref->key_for_search.type); BUG_ON(!ref->wanted_disk_byte); - eb = read_tree_block(fs_info, ref->wanted_disk_byte, - ref->root_id, 0, ref->level - 1, NULL); + check.level = ref->level - 1; + check.owner_root = ref->root_id; + + eb = read_tree_block(fs_info, ref->wanted_disk_byte, &check); if (IS_ERR(eb)) { free_pref(ref); return PTR_ERR(eb); @@ -1591,10 +1595,13 @@ again: if (ref->count && ref->parent) { if (!ctx->ignore_extent_item_pos && !ref->inode_list && ref->level == 0) { + struct btrfs_tree_parent_check check = { 0 }; struct extent_buffer *eb; - eb = read_tree_block(ctx->fs_info, ref->parent, 0, - 0, ref->level, NULL); + check.level = ref->level; + + eb = read_tree_block(ctx->fs_info, ref->parent, + &check); if (IS_ERR(eb)) { ret = PTR_ERR(eb); goto out; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0acd85111cdf..f75e398d7b71 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -857,19 +857,22 @@ struct extent_buffer *btrfs_read_node_slot(struct extent_buffer *parent, int slot) { int level = btrfs_header_level(parent); + struct btrfs_tree_parent_check check = { 0 }; struct extent_buffer *eb; - struct btrfs_key first_key; if (slot < 0 || slot >= btrfs_header_nritems(parent)) return ERR_PTR(-ENOENT); BUG_ON(level == 0); - btrfs_node_key_to_cpu(parent, &first_key, slot); + check.level = level - 1; + check.transid = btrfs_node_ptr_generation(parent, slot); + check.owner_root = btrfs_header_owner(parent); + check.has_first_key = true; + btrfs_node_key_to_cpu(parent, &check.first_key, slot); + eb = read_tree_block(parent->fs_info, btrfs_node_blockptr(parent, slot), - btrfs_header_owner(parent), - btrfs_node_ptr_generation(parent, slot), - level - 1, &first_key); + &check); if (IS_ERR(eb)) return eb; if (!extent_buffer_uptodate(eb)) { @@ -1428,10 +1431,10 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, const struct btrfs_key *key) { struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_tree_parent_check check = { 0 }; u64 blocknr; u64 gen; struct extent_buffer *tmp; - struct btrfs_key first_key; int ret; int parent_level; bool unlock_up; @@ -1440,7 +1443,11 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, blocknr = btrfs_node_blockptr(*eb_ret, slot); gen = btrfs_node_ptr_generation(*eb_ret, slot); parent_level = btrfs_header_level(*eb_ret); - btrfs_node_key_to_cpu(*eb_ret, &first_key, slot); + btrfs_node_key_to_cpu(*eb_ret, &check.first_key, slot); + check.has_first_key = true; + check.level = parent_level - 1; + check.transid = gen; + check.owner_root = root->root_key.objectid; /* * If we need to read an extent buffer from disk and we are holding locks @@ -1462,7 +1469,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, * parents (shared tree blocks). */ if (btrfs_verify_level_key(tmp, - parent_level - 1, &first_key, gen)) { + parent_level - 1, &check.first_key, gen)) { free_extent_buffer(tmp); return -EUCLEAN; } @@ -1479,7 +1486,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, btrfs_unlock_up_safe(p, level + 1); /* now we're allowed to do a blocking uptodate check */ - ret = btrfs_read_extent_buffer(tmp, gen, parent_level - 1, &first_key); + ret = btrfs_read_extent_buffer(tmp, &check); if (ret) { free_extent_buffer(tmp); btrfs_release_path(p); @@ -1509,8 +1516,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (p->reada != READA_NONE) reada_for_search(fs_info, p, level, slot, key->objectid); - tmp = read_tree_block(fs_info, blocknr, root->root_key.objectid, - gen, parent_level - 1, &first_key); + tmp = read_tree_block(fs_info, blocknr, &check); if (IS_ERR(tmp)) { btrfs_release_path(p); return PTR_ERR(tmp); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e602e0b4c25d..2f944a7c70d5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -259,13 +259,11 @@ int btrfs_verify_level_key(struct extent_buffer *eb, int level, * helper to read a given tree block, doing retries as required when * the checksums don't match and we have alternate mirrors to try. * - * @parent_transid: expected transid, skip check if 0 - * @level: expected level, mandatory check - * @first_key: expected key of first slot, skip check if NULL + * @check: expected tree parentness check, see the comments of the + * structure for details. */ int btrfs_read_extent_buffer(struct extent_buffer *eb, - u64 parent_transid, int level, - struct btrfs_key *first_key) + struct btrfs_tree_parent_check *check) { struct btrfs_fs_info *fs_info = eb->fs_info; struct extent_io_tree *io_tree; @@ -275,16 +273,19 @@ int btrfs_read_extent_buffer(struct extent_buffer *eb, int mirror_num = 0; int failed_mirror = 0; + ASSERT(check); + io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; while (1) { clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num); if (!ret) { - if (verify_parent_transid(io_tree, eb, - parent_transid, 0)) + if (verify_parent_transid(io_tree, eb, check->transid, 0)) ret = -EIO; - else if (btrfs_verify_level_key(eb, level, - first_key, parent_transid)) + else if (btrfs_verify_level_key(eb, check->level, + check->has_first_key ? + &check->first_key : NULL, + check->transid)) ret = -EUCLEAN; else break; @@ -936,28 +937,28 @@ struct extent_buffer *btrfs_find_create_tree_block( * Read tree block at logical address @bytenr and do variant basic but critical * verification. * - * @owner_root: the objectid of the root owner for this block. - * @parent_transid: expected transid of this tree block, skip check if 0 - * @level: expected level, mandatory check - * @first_key: expected key in slot 0, skip check if NULL + * @check: expected tree parentness check, see comments of the + * structure for details. */ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, - u64 owner_root, u64 parent_transid, - int level, struct btrfs_key *first_key) + struct btrfs_tree_parent_check *check) { struct extent_buffer *buf = NULL; int ret; - buf = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level); + ASSERT(check); + + buf = btrfs_find_create_tree_block(fs_info, bytenr, check->owner_root, + check->level); if (IS_ERR(buf)) return buf; - ret = btrfs_read_extent_buffer(buf, parent_transid, level, first_key); + ret = btrfs_read_extent_buffer(buf, check); if (ret) { free_extent_buffer_stale(buf); return ERR_PTR(ret); } - if (btrfs_check_eb_owner(buf, owner_root)) { + if (btrfs_check_eb_owner(buf, check->owner_root)) { free_extent_buffer_stale(buf); return ERR_PTR(-EUCLEAN); } @@ -1373,6 +1374,7 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root, struct btrfs_key *key) { struct btrfs_root *root; + struct btrfs_tree_parent_check check = { 0 }; struct btrfs_fs_info *fs_info = tree_root->fs_info; u64 generation; int ret; @@ -1392,9 +1394,11 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root, generation = btrfs_root_generation(&root->root_item); level = btrfs_root_level(&root->root_item); - root->node = read_tree_block(fs_info, - btrfs_root_bytenr(&root->root_item), - key->objectid, generation, level, NULL); + check.level = level; + check.transid = generation; + check.owner_root = key->objectid; + root->node = read_tree_block(fs_info, btrfs_root_bytenr(&root->root_item), + &check); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); root->node = NULL; @@ -2367,6 +2371,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, struct btrfs_fs_devices *fs_devices) { int ret; + struct btrfs_tree_parent_check check = { 0 }; struct btrfs_root *log_tree_root; struct btrfs_super_block *disk_super = fs_info->super_copy; u64 bytenr = btrfs_super_log_root(disk_super); @@ -2382,10 +2387,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, if (!log_tree_root) return -ENOMEM; - log_tree_root->node = read_tree_block(fs_info, bytenr, - BTRFS_TREE_LOG_OBJECTID, - fs_info->generation + 1, level, - NULL); + check.level = level; + check.transid = fs_info->generation + 1; + check.owner_root = BTRFS_TREE_LOG_OBJECTID; + log_tree_root->node = read_tree_block(fs_info, bytenr, &check); if (IS_ERR(log_tree_root->node)) { btrfs_warn(fs_info, "failed to read log tree"); ret = PTR_ERR(log_tree_root->node); @@ -2863,10 +2868,14 @@ out: static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level) { + struct btrfs_tree_parent_check check = { + .level = level, + .transid = gen, + .owner_root = root->root_key.objectid + }; int ret = 0; - root->node = read_tree_block(root->fs_info, bytenr, - root->root_key.objectid, gen, level, NULL); + root->node = read_tree_block(root->fs_info, bytenr, &check); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); root->node = NULL; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index f2c507fd0e04..03fe4154ffb8 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -25,6 +25,35 @@ static inline u64 btrfs_sb_offset(int mirror) return BTRFS_SUPER_INFO_OFFSET; } +/* All the extra info needed to verify the parentness of a tree block. */ +struct btrfs_tree_parent_check { + /* + * The owner check against the tree block. + * + * Can be 0 to skip the owner check. + */ + u64 owner_root; + + /* + * Expected transid, can be 0 to skip the check, but such skip + * should only be utlized for backref walk related code. + */ + u64 transid; + + /* + * The expected first key. + * + * This check can be skipped if @has_first_key is false, such skip + * can happen for case where we don't have the parent node key, + * e.g. reading the tree root, doing backref walk. + */ + struct btrfs_key first_key; + bool has_first_key; + + /* The expected level. Should always be set. */ + u8 level; +}; + struct btrfs_device; struct btrfs_fs_devices; @@ -33,8 +62,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info); int btrfs_verify_level_key(struct extent_buffer *eb, int level, struct btrfs_key *first_key, u64 parent_transid); struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, - u64 owner_root, u64 parent_transid, - int level, struct btrfs_key *first_key); + struct btrfs_tree_parent_check *check); struct extent_buffer *btrfs_find_create_tree_block( struct btrfs_fs_info *fs_info, u64 bytenr, u64 owner_root, @@ -111,8 +139,8 @@ void btrfs_put_root(struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct extent_buffer *buf); int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, int atomic); -int btrfs_read_extent_buffer(struct extent_buffer *buf, u64 parent_transid, - int level, struct btrfs_key *first_key); +int btrfs_read_extent_buffer(struct extent_buffer *buf, + struct btrfs_tree_parent_check *check); enum btrfs_wq_submit_cmd { WQ_SUBMIT_METADATA, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index b037107678c8..10cc757a602b 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5261,8 +5261,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, u64 bytenr; u64 generation; u64 parent; + struct btrfs_tree_parent_check check = { 0 }; struct btrfs_key key; - struct btrfs_key first_key; struct btrfs_ref ref = { 0 }; struct extent_buffer *next; int level = wc->level; @@ -5284,7 +5284,12 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, } bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); - btrfs_node_key_to_cpu(path->nodes[level], &first_key, + + check.level = level - 1; + check.transid = generation; + check.owner_root = root->root_key.objectid; + check.has_first_key = true; + btrfs_node_key_to_cpu(path->nodes[level], &check.first_key, path->slots[level]); next = find_extent_buffer(fs_info, bytenr); @@ -5346,8 +5351,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, if (!next) { if (reada && level == 1) reada_walk_down(trans, root, wc, path); - next = read_tree_block(fs_info, bytenr, root->root_key.objectid, - generation, level - 1, &first_key); + next = read_tree_block(fs_info, bytenr, &check); if (IS_ERR(next)) { return PTR_ERR(next); } else if (!extent_buffer_uptodate(next)) { diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 1a2350fd68be..1469aa55ad48 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -386,14 +386,16 @@ void btrfs_print_tree(struct extent_buffer *c, bool follow) if (!follow) return; for (i = 0; i < nr; i++) { - struct btrfs_key first_key; + struct btrfs_tree_parent_check check = { + .level = level - 1, + .transid = btrfs_node_ptr_generation(c, i), + .owner_root = btrfs_header_owner(c), + .has_first_key = true + }; struct extent_buffer *next; - btrfs_node_key_to_cpu(c, &first_key, i); - next = read_tree_block(fs_info, btrfs_node_blockptr(c, i), - btrfs_header_owner(c), - btrfs_node_ptr_generation(c, i), - level - 1, &first_key); + btrfs_node_key_to_cpu(c, &check.first_key, i); + next = read_tree_block(fs_info, btrfs_node_blockptr(c, i), &check); if (IS_ERR(next)) continue; if (!extent_buffer_uptodate(next)) { diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 24c013c61a94..e0522c6c0d67 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -2339,7 +2339,13 @@ int btrfs_qgroup_trace_subtree(struct btrfs_trans_handle *trans, } if (!extent_buffer_uptodate(root_eb)) { - ret = btrfs_read_extent_buffer(root_eb, root_gen, root_level, NULL); + struct btrfs_tree_parent_check check = { + .has_first_key = false, + .transid = root_gen, + .level = root_level + }; + + ret = btrfs_read_extent_buffer(root_eb, &check); if (ret) goto out; } @@ -4303,6 +4309,7 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, struct extent_buffer *subvol_eb) { struct btrfs_fs_info *fs_info = root->fs_info; + struct btrfs_tree_parent_check check = { 0 }; struct btrfs_qgroup_swapped_blocks *blocks = &root->swapped_blocks; struct btrfs_qgroup_swapped_block *block; struct extent_buffer *reloc_eb = NULL; @@ -4351,10 +4358,13 @@ int btrfs_qgroup_trace_subtree_after_cow(struct btrfs_trans_handle *trans, blocks->swapped = swapped; spin_unlock(&blocks->lock); + check.level = block->level; + check.transid = block->reloc_generation; + check.has_first_key = true; + memcpy(&check.first_key, &block->first_key, sizeof(check.first_key)); + /* Read out reloc subtree root */ - reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, 0, - block->reloc_generation, block->level, - &block->first_key); + reloc_eb = read_tree_block(fs_info, block->reloc_bytenr, &check); if (IS_ERR(reloc_eb)) { ret = PTR_ERR(reloc_eb); reloc_eb = NULL; diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 8914aa920bb7..56c8afa6f6d2 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2610,10 +2610,14 @@ static int tree_block_processed(u64 bytenr, struct reloc_control *rc) static int get_tree_block_key(struct btrfs_fs_info *fs_info, struct tree_block *block) { + struct btrfs_tree_parent_check check = { + .level = block->level, + .owner_root = block->owner, + .transid = block->key.offset + }; struct extent_buffer *eb; - eb = read_tree_block(fs_info, block->bytenr, block->owner, - block->key.offset, block->level, NULL); + eb = read_tree_block(fs_info, block->bytenr, &check); if (IS_ERR(eb)) return PTR_ERR(eb); if (!extent_buffer_uptodate(eb)) { @@ -3426,9 +3430,10 @@ int add_data_references(struct reloc_control *rc, ULIST_ITER_INIT(&leaf_uiter); while ((ref_node = ulist_next(ctx.refs, &leaf_uiter))) { + struct btrfs_tree_parent_check check = { 0 }; struct extent_buffer *eb; - eb = read_tree_block(ctx.fs_info, ref_node->val, 0, 0, 0, NULL); + eb = read_tree_block(ctx.fs_info, ref_node->val, &check); if (IS_ERR(eb)) { ret = PTR_ERR(eb); break; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f7b1bb9c63e4..4d9f6803bfbe 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -341,7 +341,12 @@ static int process_one_buffer(struct btrfs_root *log, * pin down any logged extents, so we have to read the block. */ if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) { - ret = btrfs_read_extent_buffer(eb, gen, level, NULL); + struct btrfs_tree_parent_check check = { + .level = level, + .transid = gen + }; + + ret = btrfs_read_extent_buffer(eb, &check); if (ret) return ret; } @@ -2411,13 +2416,17 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, struct walk_control *wc, u64 gen, int level) { int nritems; + struct btrfs_tree_parent_check check = { + .transid = gen, + .level = level + }; struct btrfs_path *path; struct btrfs_root *root = wc->replay_dest; struct btrfs_key key; int i; int ret; - ret = btrfs_read_extent_buffer(eb, gen, level, NULL); + ret = btrfs_read_extent_buffer(eb, &check); if (ret) return ret; @@ -2597,7 +2606,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, int ret = 0; while (*level > 0) { - struct btrfs_key first_key; + struct btrfs_tree_parent_check check = { 0 }; cur = path->nodes[*level]; @@ -2609,7 +2618,10 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, bytenr = btrfs_node_blockptr(cur, path->slots[*level]); ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); - btrfs_node_key_to_cpu(cur, &first_key, path->slots[*level]); + check.transid = ptr_gen; + check.level = *level - 1; + check.has_first_key = true; + btrfs_node_key_to_cpu(cur, &check.first_key, path->slots[*level]); blocksize = fs_info->nodesize; next = btrfs_find_create_tree_block(fs_info, bytenr, @@ -2628,8 +2640,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, path->slots[*level]++; if (wc->free) { - ret = btrfs_read_extent_buffer(next, ptr_gen, - *level - 1, &first_key); + ret = btrfs_read_extent_buffer(next, &check); if (ret) { free_extent_buffer(next); return ret; @@ -2657,7 +2668,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, free_extent_buffer(next); continue; } - ret = btrfs_read_extent_buffer(next, ptr_gen, *level - 1, &first_key); + ret = btrfs_read_extent_buffer(next, &check); if (ret) { free_extent_buffer(next); return ret; diff --git a/fs/btrfs/tree-mod-log.c b/fs/btrfs/tree-mod-log.c index 3bea19698a10..779ad44d285f 100644 --- a/fs/btrfs/tree-mod-log.c +++ b/fs/btrfs/tree-mod-log.c @@ -821,10 +821,15 @@ struct extent_buffer *btrfs_get_old_root(struct btrfs_root *root, u64 time_seq) tm = tree_mod_log_search(fs_info, logical, time_seq); if (old_root && tm && tm->op != BTRFS_MOD_LOG_KEY_REMOVE_WHILE_FREEING) { + struct btrfs_tree_parent_check check = { 0 }; + btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); - old = read_tree_block(fs_info, logical, root->root_key.objectid, - 0, level, NULL); + + check.level = level; + check.owner_root = root->root_key.objectid; + + old = read_tree_block(fs_info, logical, &check); if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) { if (!IS_ERR(old)) free_extent_buffer(old); -- cgit v1.2.3 From 27137fac4c0628fc8320bb7f1ce3bb9f84b76a9b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 15 Nov 2022 10:44:04 +0100 Subject: btrfs: move struct btrfs_tree_parent_check out of disk-io.h Move struct btrfs_tree_parent_check out of disk-io.h so that volumes.h an various .c files don't have to include disk-io.h just for it. Reviewed-by: Josef Bacik Reviewed-by: Johannes Thumshirn Reviewed-by: Qu Wenruo Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba [ use tree-checker.h for the structure ] Signed-off-by: David Sterba --- fs/btrfs/backref.c | 1 + fs/btrfs/disk-io.h | 30 +----------------------------- fs/btrfs/print-tree.c | 1 + fs/btrfs/qgroup.c | 1 + fs/btrfs/tree-checker.h | 35 +++++++++++++++++++++++++++++++++-- fs/btrfs/tree-mod-log.c | 1 + fs/btrfs/volumes.h | 2 +- 7 files changed, 39 insertions(+), 32 deletions(-) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 55c072ba6747..21c92c74bf71 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -19,6 +19,7 @@ #include "accessors.h" #include "extent-tree.h" #include "relocation.h" +#include "tree-checker.h" /* Just arbitrary numbers so we can be sure one of these happened. */ #define BACKREF_FOUND_SHARED 6 diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 03fe4154ffb8..363935cfc084 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -25,37 +25,9 @@ static inline u64 btrfs_sb_offset(int mirror) return BTRFS_SUPER_INFO_OFFSET; } -/* All the extra info needed to verify the parentness of a tree block. */ -struct btrfs_tree_parent_check { - /* - * The owner check against the tree block. - * - * Can be 0 to skip the owner check. - */ - u64 owner_root; - - /* - * Expected transid, can be 0 to skip the check, but such skip - * should only be utlized for backref walk related code. - */ - u64 transid; - - /* - * The expected first key. - * - * This check can be skipped if @has_first_key is false, such skip - * can happen for case where we don't have the parent node key, - * e.g. reading the tree root, doing backref walk. - */ - struct btrfs_key first_key; - bool has_first_key; - - /* The expected level. Should always be set. */ - u8 level; -}; - struct btrfs_device; struct btrfs_fs_devices; +struct btrfs_tree_parent_check; void btrfs_check_leaked_roots(struct btrfs_fs_info *fs_info); void btrfs_init_fs_info(struct btrfs_fs_info *fs_info); diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c index 1469aa55ad48..b93c96213304 100644 --- a/fs/btrfs/print-tree.c +++ b/fs/btrfs/print-tree.c @@ -8,6 +8,7 @@ #include "disk-io.h" #include "print-tree.h" #include "accessors.h" +#include "tree-checker.h" struct root_name_map { u64 id; diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index e0522c6c0d67..5c636e00d77d 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -28,6 +28,7 @@ #include "accessors.h" #include "extent-tree.h" #include "root-tree.h" +#include "tree-checker.h" /* * Helpers to access qgroup reservation diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index ece497e26558..bfb5efa4e01f 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -6,8 +6,39 @@ #ifndef BTRFS_TREE_CHECKER_H #define BTRFS_TREE_CHECKER_H -#include "ctree.h" -#include "extent_io.h" +#include + +struct extent_buffer; +struct btrfs_chunk; + +/* All the extra info needed to verify the parentness of a tree block. */ +struct btrfs_tree_parent_check { + /* + * The owner check against the tree block. + * + * Can be 0 to skip the owner check. + */ + u64 owner_root; + + /* + * Expected transid, can be 0 to skip the check, but such skip + * should only be utlized for backref walk related code. + */ + u64 transid; + + /* + * The expected first key. + * + * This check can be skipped if @has_first_key is false, such skip + * can happen for case where we don't have the parent node key, + * e.g. reading the tree root, doing backref walk. + */ + struct btrfs_key first_key; + bool has_first_key; + + /* The expected level. Should always be set. */ + u8 level; +}; /* * Comprehensive leaf checker. diff --git a/fs/btrfs/tree-mod-log.c b/fs/btrfs/tree-mod-log.c index 779ad44d285f..146a6b198933 100644 --- a/fs/btrfs/tree-mod-log.c +++ b/fs/btrfs/tree-mod-log.c @@ -5,6 +5,7 @@ #include "disk-io.h" #include "fs.h" #include "accessors.h" +#include "tree-checker.h" struct tree_mod_root { u64 logical; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index 2c90e50c460a..ab551471c1f8 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -11,7 +11,7 @@ #include #include "async-thread.h" #include "messages.h" -#include "disk-io.h" +#include "tree-checker.h" #include "rcu-string.h" #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G) -- cgit v1.2.3 From 560840afc3e63bbe5d9c5ef6b2ecf8f3589adff6 Mon Sep 17 00:00:00 2001 From: Boris Burkov Date: Wed, 14 Dec 2022 15:05:08 -0800 Subject: btrfs: fix resolving backrefs for inline extent followed by prealloc If a file consists of an inline extent followed by a regular or prealloc extent, then a legitimate attempt to resolve a logical address in the non-inline region will result in add_all_parents reading the invalid offset field of the inline extent. If the inline extent item is placed in the leaf eb s.t. it is the first item, attempting to access the offset field will not only be meaningless, it will go past the end of the eb and cause this panic: [17.626048] BTRFS warning (device dm-2): bad eb member end: ptr 0x3fd4 start 30834688 member offset 16377 size 8 [17.631693] general protection fault, probably for non-canonical address 0x5088000000000: 0000 [#1] SMP PTI [17.635041] CPU: 2 PID: 1267 Comm: btrfs Not tainted 5.12.0-07246-g75175d5adc74-dirty #199 [17.637969] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014 [17.641995] RIP: 0010:btrfs_get_64+0xe7/0x110 [17.649890] RSP: 0018:ffffc90001f73a08 EFLAGS: 00010202 [17.651652] RAX: 0000000000000001 RBX: ffff88810c42d000 RCX: 0000000000000000 [17.653921] RDX: 0005088000000000 RSI: ffffc90001f73a0f RDI: 0000000000000001 [17.656174] RBP: 0000000000000ff9 R08: 0000000000000007 R09: c0000000fffeffff [17.658441] R10: ffffc90001f73790 R11: ffffc90001f73788 R12: ffff888106afe918 [17.661070] R13: 0000000000003fd4 R14: 0000000000003f6f R15: cdcdcdcdcdcdcdcd [17.663617] FS: 00007f64e7627d80(0000) GS:ffff888237c80000(0000) knlGS:0000000000000000 [17.666525] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [17.668664] CR2: 000055d4a39152e8 CR3: 000000010c596002 CR4: 0000000000770ee0 [17.671253] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [17.673634] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [17.676034] PKRU: 55555554 [17.677004] Call Trace: [17.677877] add_all_parents+0x276/0x480 [17.679325] find_parent_nodes+0xfae/0x1590 [17.680771] btrfs_find_all_leafs+0x5e/0xa0 [17.682217] iterate_extent_inodes+0xce/0x260 [17.683809] ? btrfs_inode_flags_to_xflags+0x50/0x50 [17.685597] ? iterate_inodes_from_logical+0xa1/0xd0 [17.687404] iterate_inodes_from_logical+0xa1/0xd0 [17.689121] ? btrfs_inode_flags_to_xflags+0x50/0x50 [17.691010] btrfs_ioctl_logical_to_ino+0x131/0x190 [17.692946] btrfs_ioctl+0x104a/0x2f60 [17.694384] ? selinux_file_ioctl+0x182/0x220 [17.695995] ? __x64_sys_ioctl+0x84/0xc0 [17.697394] __x64_sys_ioctl+0x84/0xc0 [17.698697] do_syscall_64+0x33/0x40 [17.700017] entry_SYSCALL_64_after_hwframe+0x44/0xae [17.701753] RIP: 0033:0x7f64e72761b7 [17.709355] RSP: 002b:00007ffefb067f58 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 [17.712088] RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007f64e72761b7 [17.714667] RDX: 00007ffefb067fb0 RSI: 00000000c0389424 RDI: 0000000000000003 [17.717386] RBP: 00007ffefb06d188 R08: 000055d4a390d2b0 R09: 00007f64e7340a60 [17.719938] R10: 0000000000000231 R11: 0000000000000246 R12: 0000000000000001 [17.722383] R13: 0000000000000000 R14: 00000000c0389424 R15: 000055d4a38fd2a0 [17.724839] Modules linked in: Fix the bug by detecting the inline extent item in add_all_parents and skipping to the next extent item. CC: stable@vger.kernel.org # 4.9+ Reviewed-by: Qu Wenruo Signed-off-by: Boris Burkov Signed-off-by: David Sterba --- fs/btrfs/backref.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/backref.c') diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 21c92c74bf71..46851511b661 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -484,6 +484,7 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, u64 wanted_disk_byte = ref->wanted_disk_byte; u64 count = 0; u64 data_offset; + u8 type; if (level != 0) { eb = path->nodes[level]; @@ -538,6 +539,9 @@ static int add_all_parents(struct btrfs_backref_walk_ctx *ctx, continue; } fi = btrfs_item_ptr(eb, slot, struct btrfs_file_extent_item); + type = btrfs_file_extent_type(eb, fi); + if (type == BTRFS_FILE_EXTENT_INLINE) + goto next; disk_byte = btrfs_file_extent_disk_bytenr(eb, fi); data_offset = btrfs_file_extent_offset(eb, fi); -- cgit v1.2.3