diff options
Diffstat (limited to 'fs/btrfs/send.c')
-rw-r--r-- | fs/btrfs/send.c | 138 |
1 files changed, 57 insertions, 81 deletions
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c index 123ac54af071..6a92ecf9eaa2 100644 --- a/fs/btrfs/send.c +++ b/fs/btrfs/send.c @@ -25,6 +25,14 @@ #include "compression.h" /* + * Maximum number of references an extent can have in order for us to attempt to + * issue clone operations instead of write operations. This currently exists to + * avoid hitting limitations of the backreference walking code (taking a lot of + * time and using too much memory for extents with large number of references). + */ +#define SEND_MAX_EXTENT_REFS 64 + +/* * A fs_path is a helper to dynamically build path names with unknown size. * It reallocates the internal buffer on demand. * It allows fast adding of path elements on the right side (normal path) and @@ -1248,12 +1256,21 @@ static int __iterate_backrefs(u64 ino, u64 offset, u64 root, void *ctx_) */ if (found->root == bctx->sctx->send_root) { /* - * TODO for the moment we don't accept clones from the inode - * that is currently send. We may change this when - * BTRFS_IOC_CLONE_RANGE supports cloning from and to the same - * file. + * If the source inode was not yet processed we can't issue a + * clone operation, as the source extent does not exist yet at + * the destination of the stream. + */ + if (ino > bctx->cur_objectid) + return 0; + /* + * We clone from the inode currently being sent as long as the + * source extent is already processed, otherwise we could try + * to clone from an extent that does not exist yet at the + * destination of the stream. */ - if (ino >= bctx->cur_objectid) + if (ino == bctx->cur_objectid && + offset + bctx->extent_len > + bctx->sctx->cur_inode_next_write_offset) return 0; } @@ -1302,6 +1319,7 @@ static int find_extent_clone(struct send_ctx *sctx, struct clone_root *cur_clone_root; struct btrfs_key found_key; struct btrfs_path *tmp_path; + struct btrfs_extent_item *ei; int compressed; u32 i; @@ -1349,7 +1367,6 @@ static int find_extent_clone(struct send_ctx *sctx, ret = extent_from_logical(fs_info, disk_byte, tmp_path, &found_key, &flags); up_read(&fs_info->commit_root_sem); - btrfs_release_path(tmp_path); if (ret < 0) goto out; @@ -1358,6 +1375,21 @@ static int find_extent_clone(struct send_ctx *sctx, goto out; } + ei = btrfs_item_ptr(tmp_path->nodes[0], tmp_path->slots[0], + struct btrfs_extent_item); + /* + * Backreference walking (iterate_extent_inodes() below) is currently + * too expensive when an extent has a large number of references, both + * in time spent and used memory. So for now just fallback to write + * operations instead of clone operations when an extent has more than + * a certain amount of references. + */ + if (btrfs_extent_refs(tmp_path->nodes[0], ei) > SEND_MAX_EXTENT_REFS) { + ret = -ENOENT; + goto out; + } + btrfs_release_path(tmp_path); + /* * Setup the clone roots. */ @@ -4779,7 +4811,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len) key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(fs_info->sb, &key, root, NULL); + inode = btrfs_iget(fs_info->sb, &key, root); if (IS_ERR(inode)) return PTR_ERR(inode); @@ -5554,10 +5586,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset) { struct btrfs_path *path; struct btrfs_root *root = sctx->send_root; - struct btrfs_file_extent_item *fi; struct btrfs_key key; - u64 extent_end; - u8 type; int ret; path = alloc_path_for_send(); @@ -5577,18 +5606,7 @@ static int get_last_extent(struct send_ctx *sctx, u64 offset) if (key.objectid != sctx->cur_ino || key.type != BTRFS_EXTENT_DATA_KEY) goto out; - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - type = btrfs_file_extent_type(path->nodes[0], fi); - if (type == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); - extent_end = ALIGN(key.offset + size, - sctx->send_root->fs_info->sectorsize); - } else { - extent_end = key.offset + - btrfs_file_extent_num_bytes(path->nodes[0], fi); - } - sctx->cur_inode_last_extent = extent_end; + sctx->cur_inode_last_extent = btrfs_file_extent_end(path); out: btrfs_free_path(path); return ret; @@ -5642,16 +5660,7 @@ static int range_is_hole_in_parent(struct send_ctx *sctx, break; fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, fi) == - BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_ram_bytes(leaf, fi); - - extent_end = ALIGN(key.offset + size, - root->fs_info->sectorsize); - } else { - extent_end = key.offset + - btrfs_file_extent_num_bytes(leaf, fi); - } + extent_end = btrfs_file_extent_end(path); if (extent_end <= start) goto next; if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) { @@ -5672,9 +5681,6 @@ out: static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, struct btrfs_key *key) { - struct btrfs_file_extent_item *fi; - u64 extent_end; - u8 type; int ret = 0; if (sctx->cur_ino != key->objectid || !need_send_hole(sctx)) @@ -5686,18 +5692,6 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, return ret; } - fi = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_file_extent_item); - type = btrfs_file_extent_type(path->nodes[0], fi); - if (type == BTRFS_FILE_EXTENT_INLINE) { - u64 size = btrfs_file_extent_ram_bytes(path->nodes[0], fi); - extent_end = ALIGN(key->offset + size, - sctx->send_root->fs_info->sectorsize); - } else { - extent_end = key->offset + - btrfs_file_extent_num_bytes(path->nodes[0], fi); - } - if (path->slots[0] == 0 && sctx->cur_inode_last_extent < key->offset) { /* @@ -5723,7 +5717,7 @@ static int maybe_send_hole(struct send_ctx *sctx, struct btrfs_path *path, else ret = 0; } - sctx->cur_inode_last_extent = extent_end; + sctx->cur_inode_last_extent = btrfs_file_extent_end(path); return ret; } @@ -7034,7 +7028,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) int clone_sources_to_rollback = 0; unsigned alloc_size; int sort_clone_roots = 0; - int index; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -7053,12 +7046,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) spin_unlock(&send_root->root_item_lock); /* - * This is done when we lookup the root, it should already be complete - * by the time we get here. - */ - WARN_ON(send_root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE); - - /* * Userspace tools do the checks and warn the user if it's * not RO. */ @@ -7078,13 +7065,6 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) goto out; } - if (!access_ok(arg->clone_sources, - sizeof(*arg->clone_sources) * - arg->clone_sources_count)) { - ret = -EFAULT; - goto out; - } - if (arg->flags & ~BTRFS_SEND_FLAG_MASK) { ret = -EINVAL; goto out; @@ -7167,11 +7147,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; - index = srcu_read_lock(&fs_info->subvol_srcu); - - clone_root = btrfs_read_fs_root_no_name(fs_info, &key); + clone_root = btrfs_get_fs_root(fs_info, &key, true); if (IS_ERR(clone_root)) { - srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(clone_root); goto out; } @@ -7179,20 +7156,19 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) if (!btrfs_root_readonly(clone_root) || btrfs_root_dead(clone_root)) { spin_unlock(&clone_root->root_item_lock); - srcu_read_unlock(&fs_info->subvol_srcu, index); + btrfs_put_root(clone_root); ret = -EPERM; goto out; } if (clone_root->dedupe_in_progress) { dedupe_in_progress_warn(clone_root); spin_unlock(&clone_root->root_item_lock); - srcu_read_unlock(&fs_info->subvol_srcu, index); + btrfs_put_root(clone_root); ret = -EAGAIN; goto out; } clone_root->send_in_progress++; spin_unlock(&clone_root->root_item_lock); - srcu_read_unlock(&fs_info->subvol_srcu, index); sctx->clone_roots[i].root = clone_root; clone_sources_to_rollback = i + 1; @@ -7206,11 +7182,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) key.type = BTRFS_ROOT_ITEM_KEY; key.offset = (u64)-1; - index = srcu_read_lock(&fs_info->subvol_srcu); - - sctx->parent_root = btrfs_read_fs_root_no_name(fs_info, &key); + sctx->parent_root = btrfs_get_fs_root(fs_info, &key, true); if (IS_ERR(sctx->parent_root)) { - srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(sctx->parent_root); goto out; } @@ -7220,20 +7193,16 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) if (!btrfs_root_readonly(sctx->parent_root) || btrfs_root_dead(sctx->parent_root)) { spin_unlock(&sctx->parent_root->root_item_lock); - srcu_read_unlock(&fs_info->subvol_srcu, index); ret = -EPERM; goto out; } if (sctx->parent_root->dedupe_in_progress) { dedupe_in_progress_warn(sctx->parent_root); spin_unlock(&sctx->parent_root->root_item_lock); - srcu_read_unlock(&fs_info->subvol_srcu, index); ret = -EAGAIN; goto out; } spin_unlock(&sctx->parent_root->root_item_lock); - - srcu_read_unlock(&fs_info->subvol_srcu, index); } /* @@ -7241,7 +7210,8 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg) * is behind the current send position. This is checked while searching * for possible clone sources. */ - sctx->clone_roots[sctx->clone_roots_cnt++].root = sctx->send_root; + sctx->clone_roots[sctx->clone_roots_cnt++].root = + btrfs_grab_root(sctx->send_root); /* We do a bsearch later */ sort(sctx->clone_roots, sctx->clone_roots_cnt, @@ -7326,18 +7296,24 @@ out: } if (sort_clone_roots) { - for (i = 0; i < sctx->clone_roots_cnt; i++) + for (i = 0; i < sctx->clone_roots_cnt; i++) { btrfs_root_dec_send_in_progress( sctx->clone_roots[i].root); + btrfs_put_root(sctx->clone_roots[i].root); + } } else { - for (i = 0; sctx && i < clone_sources_to_rollback; i++) + for (i = 0; sctx && i < clone_sources_to_rollback; i++) { btrfs_root_dec_send_in_progress( sctx->clone_roots[i].root); + btrfs_put_root(sctx->clone_roots[i].root); + } btrfs_root_dec_send_in_progress(send_root); } - if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) + if (sctx && !IS_ERR_OR_NULL(sctx->parent_root)) { btrfs_root_dec_send_in_progress(sctx->parent_root); + btrfs_put_root(sctx->parent_root); + } kvfree(clone_sources_tmp); |