diff options
Diffstat (limited to 'fs/btrfs/ioctl.c')
| -rw-r--r-- | fs/btrfs/ioctl.c | 723 | 
1 files changed, 620 insertions, 103 deletions
| diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 226c91fe31a7..ae98269a5e3a 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -29,6 +29,7 @@  #include <linux/fileattr.h>  #include <linux/fsverity.h>  #include <linux/sched/xacct.h> +#include <linux/io_uring/cmd.h>  #include "ctree.h"  #include "disk-io.h"  #include "export.h" @@ -402,86 +403,6 @@ update_flags:  	return ret;  } -/* - * Start exclusive operation @type, return true on success - */ -bool btrfs_exclop_start(struct btrfs_fs_info *fs_info, -			enum btrfs_exclusive_operation type) -{ -	bool ret = false; - -	spin_lock(&fs_info->super_lock); -	if (fs_info->exclusive_operation == BTRFS_EXCLOP_NONE) { -		fs_info->exclusive_operation = type; -		ret = true; -	} -	spin_unlock(&fs_info->super_lock); - -	return ret; -} - -/* - * Conditionally allow to enter the exclusive operation in case it's compatible - * with the running one.  This must be paired with btrfs_exclop_start_unlock and - * btrfs_exclop_finish. - * - * Compatibility: - * - the same type is already running - * - when trying to add a device and balance has been paused - * - not BTRFS_EXCLOP_NONE - this is intentionally incompatible and the caller - *   must check the condition first that would allow none -> @type - */ -bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info, -				 enum btrfs_exclusive_operation type) -{ -	spin_lock(&fs_info->super_lock); -	if (fs_info->exclusive_operation == type || -	    (fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED && -	     type == BTRFS_EXCLOP_DEV_ADD)) -		return true; - -	spin_unlock(&fs_info->super_lock); -	return false; -} - -void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info) -{ -	spin_unlock(&fs_info->super_lock); -} - -void btrfs_exclop_finish(struct btrfs_fs_info *fs_info) -{ -	spin_lock(&fs_info->super_lock); -	WRITE_ONCE(fs_info->exclusive_operation, BTRFS_EXCLOP_NONE); -	spin_unlock(&fs_info->super_lock); -	sysfs_notify(&fs_info->fs_devices->fsid_kobj, NULL, "exclusive_operation"); -} - -void btrfs_exclop_balance(struct btrfs_fs_info *fs_info, -			  enum btrfs_exclusive_operation op) -{ -	switch (op) { -	case BTRFS_EXCLOP_BALANCE_PAUSED: -		spin_lock(&fs_info->super_lock); -		ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE || -		       fs_info->exclusive_operation == BTRFS_EXCLOP_DEV_ADD || -		       fs_info->exclusive_operation == BTRFS_EXCLOP_NONE || -		       fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED); -		fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE_PAUSED; -		spin_unlock(&fs_info->super_lock); -		break; -	case BTRFS_EXCLOP_BALANCE: -		spin_lock(&fs_info->super_lock); -		ASSERT(fs_info->exclusive_operation == BTRFS_EXCLOP_BALANCE_PAUSED); -		fs_info->exclusive_operation = BTRFS_EXCLOP_BALANCE; -		spin_unlock(&fs_info->super_lock); -		break; -	default: -		btrfs_warn(fs_info, -			"invalid exclop balance operation %d requested", op); -	} -} -  static int btrfs_ioctl_getversion(struct inode *inode, int __user *arg)  {  	return put_user(inode->i_generation, arg); @@ -550,17 +471,6 @@ static noinline int btrfs_ioctl_fitrim(struct btrfs_fs_info *fs_info,  	return ret;  } -int __pure btrfs_is_empty_uuid(const u8 *uuid) -{ -	int i; - -	for (i = 0; i < BTRFS_UUID_SIZE; i++) { -		if (uuid[i]) -			return 0; -	} -	return 1; -} -  /*   * Calculate the number of transaction items to reserve for creating a subvolume   * or snapshot, not including the inode, directory entries, or parent directory. @@ -1048,7 +958,6 @@ static noinline int btrfs_mksnapshot(const struct path *parent,  				   struct btrfs_qgroup_inherit *inherit)  {  	int ret; -	bool snapshot_force_cow = false;  	/*  	 * Force new buffered writes to reserve space even when NOCOW is @@ -1067,15 +976,13 @@ static noinline int btrfs_mksnapshot(const struct path *parent,  	 * creation.  	 */  	atomic_inc(&root->snapshot_force_cow); -	snapshot_force_cow = true;  	btrfs_wait_ordered_extents(root, U64_MAX, NULL);  	ret = btrfs_mksubvol(parent, idmap, name, namelen,  			     root, readonly, inherit); +	atomic_dec(&root->snapshot_force_cow);  out: -	if (snapshot_force_cow) -		atomic_dec(&root->snapshot_force_cow);  	btrfs_drew_read_unlock(&root->snapshot_lock);  	return ret;  } @@ -1308,9 +1215,9 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,  		ret = btrfs_mksubvol(&file->f_path, idmap, name,  				     namelen, NULL, readonly, inherit);  	} else { -		struct fd src = fdget(fd); +		CLASS(fd, src)(fd);  		struct inode *src_inode; -		if (!fd_file(src)) { +		if (fd_empty(src)) {  			ret = -EINVAL;  			goto out_drop_write;  		} @@ -1341,7 +1248,6 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,  					       BTRFS_I(src_inode)->root,  					       readonly, inherit);  		} -		fdput(src);  	}  out_drop_write:  	mnt_drop_write_file(file); @@ -3010,7 +2916,6 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)  	btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key);  	btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); -	btrfs_mark_buffer_dirty(trans, path->nodes[0]);  	btrfs_release_path(path);  	btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL); @@ -4058,8 +3963,7 @@ static long btrfs_ioctl_quota_rescan_status(struct btrfs_fs_info *fs_info,  	return 0;  } -static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info, -						void __user *arg) +static long btrfs_ioctl_quota_rescan_wait(struct btrfs_fs_info *fs_info)  {  	if (!capable(CAP_SYS_ADMIN))  		return -EPERM; @@ -4514,12 +4418,17 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,  	size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args,  					     flags);  	size_t copy_end; +	struct btrfs_inode *inode = BTRFS_I(file_inode(file)); +	struct btrfs_fs_info *fs_info = inode->root->fs_info; +	struct extent_io_tree *io_tree = &inode->io_tree;  	struct iovec iovstack[UIO_FASTIOV];  	struct iovec *iov = iovstack;  	struct iov_iter iter;  	loff_t pos;  	struct kiocb kiocb;  	ssize_t ret; +	u64 disk_bytenr, disk_io_size; +	struct extent_state *cached_state = NULL;  	if (!capable(CAP_SYS_ADMIN)) {  		ret = -EPERM; @@ -4572,7 +4481,32 @@ static int btrfs_ioctl_encoded_read(struct file *file, void __user *argp,  	init_sync_kiocb(&kiocb, file);  	kiocb.ki_pos = pos; -	ret = btrfs_encoded_read(&kiocb, &iter, &args); +	ret = btrfs_encoded_read(&kiocb, &iter, &args, &cached_state, +				 &disk_bytenr, &disk_io_size); + +	if (ret == -EIOCBQUEUED) { +		bool unlocked = false; +		u64 start, lockend, count; + +		start = ALIGN_DOWN(kiocb.ki_pos, fs_info->sectorsize); +		lockend = start + BTRFS_MAX_UNCOMPRESSED - 1; + +		if (args.compression) +			count = disk_io_size; +		else +			count = args.len; + +		ret = btrfs_encoded_read_regular(&kiocb, &iter, start, lockend, +						 &cached_state, disk_bytenr, +						 disk_io_size, count, +						 args.compression, &unlocked); + +		if (!unlocked) { +			unlock_extent(io_tree, start, lockend, &cached_state); +			btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); +		} +	} +  	if (ret >= 0) {  		fsnotify_access(file);  		if (copy_to_user(argp + copy_end, @@ -4690,6 +4624,585 @@ out_acct:  	return ret;  } +/* + * Context that's attached to an encoded read io_uring command, in cmd->pdu. It + * contains the fields in btrfs_uring_read_extent that are necessary to finish + * off and cleanup the I/O in btrfs_uring_read_finished. + */ +struct btrfs_uring_priv { +	struct io_uring_cmd *cmd; +	struct page **pages; +	unsigned long nr_pages; +	struct kiocb iocb; +	struct iovec *iov; +	struct iov_iter iter; +	struct extent_state *cached_state; +	u64 count; +	u64 start; +	u64 lockend; +	int err; +	bool compressed; +}; + +struct io_btrfs_cmd { +	struct btrfs_uring_priv *priv; +}; + +static void btrfs_uring_read_finished(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ +	struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(cmd, struct io_btrfs_cmd); +	struct btrfs_uring_priv *priv = bc->priv; +	struct btrfs_inode *inode = BTRFS_I(file_inode(priv->iocb.ki_filp)); +	struct extent_io_tree *io_tree = &inode->io_tree; +	unsigned long index; +	u64 cur; +	size_t page_offset; +	ssize_t ret; + +	/* The inode lock has already been acquired in btrfs_uring_read_extent.  */ +	btrfs_lockdep_inode_acquire(inode, i_rwsem); + +	if (priv->err) { +		ret = priv->err; +		goto out; +	} + +	if (priv->compressed) { +		index = 0; +		page_offset = 0; +	} else { +		index = (priv->iocb.ki_pos - priv->start) >> PAGE_SHIFT; +		page_offset = offset_in_page(priv->iocb.ki_pos - priv->start); +	} +	cur = 0; +	while (cur < priv->count) { +		size_t bytes = min_t(size_t, priv->count - cur, PAGE_SIZE - page_offset); + +		if (copy_page_to_iter(priv->pages[index], page_offset, bytes, +				      &priv->iter) != bytes) { +			ret = -EFAULT; +			goto out; +		} + +		index++; +		cur += bytes; +		page_offset = 0; +	} +	ret = priv->count; + +out: +	unlock_extent(io_tree, priv->start, priv->lockend, &priv->cached_state); +	btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); + +	io_uring_cmd_done(cmd, ret, 0, issue_flags); +	add_rchar(current, ret); + +	for (index = 0; index < priv->nr_pages; index++) +		__free_page(priv->pages[index]); + +	kfree(priv->pages); +	kfree(priv->iov); +	kfree(priv); +} + +void btrfs_uring_read_extent_endio(void *ctx, int err) +{ +	struct btrfs_uring_priv *priv = ctx; +	struct io_btrfs_cmd *bc = io_uring_cmd_to_pdu(priv->cmd, struct io_btrfs_cmd); + +	priv->err = err; +	bc->priv = priv; + +	io_uring_cmd_complete_in_task(priv->cmd, btrfs_uring_read_finished); +} + +static int btrfs_uring_read_extent(struct kiocb *iocb, struct iov_iter *iter, +				   u64 start, u64 lockend, +				   struct extent_state *cached_state, +				   u64 disk_bytenr, u64 disk_io_size, +				   size_t count, bool compressed, +				   struct iovec *iov, struct io_uring_cmd *cmd) +{ +	struct btrfs_inode *inode = BTRFS_I(file_inode(iocb->ki_filp)); +	struct extent_io_tree *io_tree = &inode->io_tree; +	struct page **pages; +	struct btrfs_uring_priv *priv = NULL; +	unsigned long nr_pages; +	int ret; + +	nr_pages = DIV_ROUND_UP(disk_io_size, PAGE_SIZE); +	pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS); +	if (!pages) +		return -ENOMEM; +	ret = btrfs_alloc_page_array(nr_pages, pages, 0); +	if (ret) { +		ret = -ENOMEM; +		goto out_fail; +	} + +	priv = kmalloc(sizeof(*priv), GFP_NOFS); +	if (!priv) { +		ret = -ENOMEM; +		goto out_fail; +	} + +	priv->iocb = *iocb; +	priv->iov = iov; +	priv->iter = *iter; +	priv->count = count; +	priv->cmd = cmd; +	priv->cached_state = cached_state; +	priv->compressed = compressed; +	priv->nr_pages = nr_pages; +	priv->pages = pages; +	priv->start = start; +	priv->lockend = lockend; +	priv->err = 0; + +	ret = btrfs_encoded_read_regular_fill_pages(inode, disk_bytenr, +						    disk_io_size, pages, priv); +	if (ret && ret != -EIOCBQUEUED) +		goto out_fail; + +	/* +	 * If we return -EIOCBQUEUED, we're deferring the cleanup to +	 * btrfs_uring_read_finished(), which will handle unlocking the extent +	 * and inode and freeing the allocations. +	 */ + +	/* +	 * We're returning to userspace with the inode lock held, and that's +	 * okay - it'll get unlocked in a worker thread.  Call +	 * btrfs_lockdep_inode_release() to avoid confusing lockdep. +	 */ +	btrfs_lockdep_inode_release(inode, i_rwsem); + +	return -EIOCBQUEUED; + +out_fail: +	unlock_extent(io_tree, start, lockend, &cached_state); +	btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); +	kfree(priv); +	return ret; +} + +struct btrfs_uring_encoded_data { +	struct btrfs_ioctl_encoded_io_args args; +	struct iovec iovstack[UIO_FASTIOV]; +	struct iovec *iov; +	struct iov_iter iter; +}; + +static int btrfs_uring_encoded_read(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ +	size_t copy_end_kernel = offsetofend(struct btrfs_ioctl_encoded_io_args, flags); +	size_t copy_end; +	int ret; +	u64 disk_bytenr, disk_io_size; +	struct file *file; +	struct btrfs_inode *inode; +	struct btrfs_fs_info *fs_info; +	struct extent_io_tree *io_tree; +	loff_t pos; +	struct kiocb kiocb; +	struct extent_state *cached_state = NULL; +	u64 start, lockend; +	void __user *sqe_addr; +	struct btrfs_uring_encoded_data *data = io_uring_cmd_get_async_data(cmd)->op_data; + +	if (!capable(CAP_SYS_ADMIN)) { +		ret = -EPERM; +		goto out_acct; +	} +	file = cmd->file; +	inode = BTRFS_I(file->f_inode); +	fs_info = inode->root->fs_info; +	io_tree = &inode->io_tree; +	sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr)); + +	if (issue_flags & IO_URING_F_COMPAT) { +#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) +		copy_end = offsetofend(struct btrfs_ioctl_encoded_io_args_32, flags); +#else +		return -ENOTTY; +#endif +	} else { +		copy_end = copy_end_kernel; +	} + +	if (!data) { +		data = kzalloc(sizeof(*data), GFP_NOFS); +		if (!data) { +			ret = -ENOMEM; +			goto out_acct; +		} + +		io_uring_cmd_get_async_data(cmd)->op_data = data; + +		if (issue_flags & IO_URING_F_COMPAT) { +#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) +			struct btrfs_ioctl_encoded_io_args_32 args32; + +			if (copy_from_user(&args32, sqe_addr, copy_end)) { +				ret = -EFAULT; +				goto out_acct; +			} + +			data->args.iov = compat_ptr(args32.iov); +			data->args.iovcnt = args32.iovcnt; +			data->args.offset = args32.offset; +			data->args.flags = args32.flags; +#endif +		} else { +			if (copy_from_user(&data->args, sqe_addr, copy_end)) { +				ret = -EFAULT; +				goto out_acct; +			} +		} + +		if (data->args.flags != 0) { +			ret = -EINVAL; +			goto out_acct; +		} + +		data->iov = data->iovstack; +		ret = import_iovec(ITER_DEST, data->args.iov, data->args.iovcnt, +				   ARRAY_SIZE(data->iovstack), &data->iov, +				   &data->iter); +		if (ret < 0) +			goto out_acct; + +		if (iov_iter_count(&data->iter) == 0) { +			ret = 0; +			goto out_free; +		} +	} + +	pos = data->args.offset; +	ret = rw_verify_area(READ, file, &pos, data->args.len); +	if (ret < 0) +		goto out_free; + +	init_sync_kiocb(&kiocb, file); +	kiocb.ki_pos = pos; + +	if (issue_flags & IO_URING_F_NONBLOCK) +		kiocb.ki_flags |= IOCB_NOWAIT; + +	start = ALIGN_DOWN(pos, fs_info->sectorsize); +	lockend = start + BTRFS_MAX_UNCOMPRESSED - 1; + +	ret = btrfs_encoded_read(&kiocb, &data->iter, &data->args, &cached_state, +				 &disk_bytenr, &disk_io_size); +	if (ret < 0 && ret != -EIOCBQUEUED) +		goto out_free; + +	file_accessed(file); + +	if (copy_to_user(sqe_addr + copy_end, +			 (const char *)&data->args + copy_end_kernel, +			 sizeof(data->args) - copy_end_kernel)) { +		if (ret == -EIOCBQUEUED) { +			unlock_extent(io_tree, start, lockend, &cached_state); +			btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); +		} +		ret = -EFAULT; +		goto out_free; +	} + +	if (ret == -EIOCBQUEUED) { +		u64 count = min_t(u64, iov_iter_count(&data->iter), disk_io_size); + +		/* Match ioctl by not returning past EOF if uncompressed. */ +		if (!data->args.compression) +			count = min_t(u64, count, data->args.len); + +		ret = btrfs_uring_read_extent(&kiocb, &data->iter, start, lockend, +					      cached_state, disk_bytenr, disk_io_size, +					      count, data->args.compression, +					      data->iov, cmd); + +		goto out_acct; +	} + +out_free: +	kfree(data->iov); + +out_acct: +	if (ret > 0) +		add_rchar(current, ret); +	inc_syscr(current); + +	return ret; +} + +static int btrfs_uring_encoded_write(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ +	loff_t pos; +	struct kiocb kiocb; +	struct file *file; +	ssize_t ret; +	void __user *sqe_addr; +	struct btrfs_uring_encoded_data *data = io_uring_cmd_get_async_data(cmd)->op_data; + +	if (!capable(CAP_SYS_ADMIN)) { +		ret = -EPERM; +		goto out_acct; +	} + +	file = cmd->file; +	sqe_addr = u64_to_user_ptr(READ_ONCE(cmd->sqe->addr)); + +	if (!(file->f_mode & FMODE_WRITE)) { +		ret = -EBADF; +		goto out_acct; +	} + +	if (!data) { +		data = kzalloc(sizeof(*data), GFP_NOFS); +		if (!data) { +			ret = -ENOMEM; +			goto out_acct; +		} + +		io_uring_cmd_get_async_data(cmd)->op_data = data; + +		if (issue_flags & IO_URING_F_COMPAT) { +#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) +			struct btrfs_ioctl_encoded_io_args_32 args32; + +			if (copy_from_user(&args32, sqe_addr, sizeof(args32))) { +				ret = -EFAULT; +				goto out_acct; +			} +			data->args.iov = compat_ptr(args32.iov); +			data->args.iovcnt = args32.iovcnt; +			data->args.offset = args32.offset; +			data->args.flags = args32.flags; +			data->args.len = args32.len; +			data->args.unencoded_len = args32.unencoded_len; +			data->args.unencoded_offset = args32.unencoded_offset; +			data->args.compression = args32.compression; +			data->args.encryption = args32.encryption; +			memcpy(data->args.reserved, args32.reserved, +			       sizeof(data->args.reserved)); +#else +			ret = -ENOTTY; +			goto out_acct; +#endif +		} else { +			if (copy_from_user(&data->args, sqe_addr, sizeof(data->args))) { +				ret = -EFAULT; +				goto out_acct; +			} +		} + +		ret = -EINVAL; +		if (data->args.flags != 0) +			goto out_acct; +		if (memchr_inv(data->args.reserved, 0, sizeof(data->args.reserved))) +			goto out_acct; +		if (data->args.compression == BTRFS_ENCODED_IO_COMPRESSION_NONE && +		    data->args.encryption == BTRFS_ENCODED_IO_ENCRYPTION_NONE) +			goto out_acct; +		if (data->args.compression >= BTRFS_ENCODED_IO_COMPRESSION_TYPES || +		    data->args.encryption >= BTRFS_ENCODED_IO_ENCRYPTION_TYPES) +			goto out_acct; +		if (data->args.unencoded_offset > data->args.unencoded_len) +			goto out_acct; +		if (data->args.len > data->args.unencoded_len - data->args.unencoded_offset) +			goto out_acct; + +		data->iov = data->iovstack; +		ret = import_iovec(ITER_SOURCE, data->args.iov, data->args.iovcnt, +				   ARRAY_SIZE(data->iovstack), &data->iov, +				   &data->iter); +		if (ret < 0) +			goto out_acct; + +		if (iov_iter_count(&data->iter) == 0) { +			ret = 0; +			goto out_iov; +		} +	} + +	if (issue_flags & IO_URING_F_NONBLOCK) { +		ret = -EAGAIN; +		goto out_acct; +	} + +	pos = data->args.offset; +	ret = rw_verify_area(WRITE, file, &pos, data->args.len); +	if (ret < 0) +		goto out_iov; + +	init_sync_kiocb(&kiocb, file); +	ret = kiocb_set_rw_flags(&kiocb, 0, WRITE); +	if (ret) +		goto out_iov; +	kiocb.ki_pos = pos; + +	file_start_write(file); + +	ret = btrfs_do_write_iter(&kiocb, &data->iter, &data->args); +	if (ret > 0) +		fsnotify_modify(file); + +	file_end_write(file); +out_iov: +	kfree(data->iov); +out_acct: +	if (ret > 0) +		add_wchar(current, ret); +	inc_syscw(current); +	return ret; +} + +int btrfs_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ +	switch (cmd->cmd_op) { +	case BTRFS_IOC_ENCODED_READ: +#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) +	case BTRFS_IOC_ENCODED_READ_32: +#endif +		return btrfs_uring_encoded_read(cmd, issue_flags); + +	case BTRFS_IOC_ENCODED_WRITE: +#if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) +	case BTRFS_IOC_ENCODED_WRITE_32: +#endif +		return btrfs_uring_encoded_write(cmd, issue_flags); +	} + +	return -EINVAL; +} + +static int btrfs_ioctl_subvol_sync(struct btrfs_fs_info *fs_info, void __user *argp) +{ +	struct btrfs_root *root; +	struct btrfs_ioctl_subvol_wait args = { 0 }; +	signed long sched_ret; +	int refs; +	u64 root_flags; +	bool wait_for_deletion = false; +	bool found = false; + +	if (copy_from_user(&args, argp, sizeof(args))) +		return -EFAULT; + +	switch (args.mode) { +	case BTRFS_SUBVOL_SYNC_WAIT_FOR_QUEUED: +		/* +		 * Wait for the first one deleted that waits until all previous +		 * are cleaned. +		 */ +		spin_lock(&fs_info->trans_lock); +		if (!list_empty(&fs_info->dead_roots)) { +			root = list_last_entry(&fs_info->dead_roots, +					       struct btrfs_root, root_list); +			args.subvolid = btrfs_root_id(root); +			found = true; +		} +		spin_unlock(&fs_info->trans_lock); +		if (!found) +			return -ENOENT; + +		fallthrough; +	case BTRFS_SUBVOL_SYNC_WAIT_FOR_ONE: +		if ((0 < args.subvolid && args.subvolid < BTRFS_FIRST_FREE_OBJECTID) || +		    BTRFS_LAST_FREE_OBJECTID < args.subvolid) +			return -EINVAL; +		break; +	case BTRFS_SUBVOL_SYNC_COUNT: +		spin_lock(&fs_info->trans_lock); +		args.count = list_count_nodes(&fs_info->dead_roots); +		spin_unlock(&fs_info->trans_lock); +		if (copy_to_user(argp, &args, sizeof(args))) +			return -EFAULT; +		return 0; +	case BTRFS_SUBVOL_SYNC_PEEK_FIRST: +		spin_lock(&fs_info->trans_lock); +		/* Last in the list was deleted first. */ +		if (!list_empty(&fs_info->dead_roots)) { +			root = list_last_entry(&fs_info->dead_roots, +					       struct btrfs_root, root_list); +			args.subvolid = btrfs_root_id(root); +		} else { +			args.subvolid = 0; +		} +		spin_unlock(&fs_info->trans_lock); +		if (copy_to_user(argp, &args, sizeof(args))) +			return -EFAULT; +		return 0; +	case BTRFS_SUBVOL_SYNC_PEEK_LAST: +		spin_lock(&fs_info->trans_lock); +		/* First in the list was deleted last. */ +		if (!list_empty(&fs_info->dead_roots)) { +			root = list_first_entry(&fs_info->dead_roots, +						struct btrfs_root, root_list); +			args.subvolid = btrfs_root_id(root); +		} else { +			args.subvolid = 0; +		} +		spin_unlock(&fs_info->trans_lock); +		if (copy_to_user(argp, &args, sizeof(args))) +			return -EFAULT; +		return 0; +	default: +		return -EINVAL; +	} + +	/* 32bit limitation: fs_roots_radix key is not wide enough. */ +	if (sizeof(unsigned long) != sizeof(u64) && args.subvolid > U32_MAX) +		return -EOVERFLOW; + +	while (1) { +		/* Wait for the specific one. */ +		if (down_read_interruptible(&fs_info->subvol_sem) == -EINTR) +			return -EINTR; +		refs = -1; +		spin_lock(&fs_info->fs_roots_radix_lock); +		root = radix_tree_lookup(&fs_info->fs_roots_radix, +					 (unsigned long)args.subvolid); +		if (root) { +			spin_lock(&root->root_item_lock); +			refs = btrfs_root_refs(&root->root_item); +			root_flags = btrfs_root_flags(&root->root_item); +			spin_unlock(&root->root_item_lock); +		} +		spin_unlock(&fs_info->fs_roots_radix_lock); +		up_read(&fs_info->subvol_sem); + +		/* Subvolume does not exist. */ +		if (!root) +			return -ENOENT; + +		/* Subvolume not deleted at all. */ +		if (refs > 0) +			return -EEXIST; +		/* We've waited and now the subvolume is gone. */ +		if (wait_for_deletion && refs == -1) { +			/* Return the one we waited for as the last one. */ +			if (copy_to_user(argp, &args, sizeof(args))) +				return -EFAULT; +			return 0; +		} + +		/* Subvolume not found on the first try (deleted or never existed). */ +		if (refs == -1) +			return -ENOENT; + +		wait_for_deletion = true; +		ASSERT(root_flags & BTRFS_ROOT_SUBVOL_DEAD); +		sched_ret = schedule_timeout_interruptible(HZ); +		/* Early wake up or error. */ +		if (sched_ret != 0) +			return -EINTR; +	} + +	return 0; +} +  long btrfs_ioctl(struct file *file, unsigned int  		cmd, unsigned long arg)  { @@ -4812,7 +5325,7 @@ long btrfs_ioctl(struct file *file, unsigned int  	case BTRFS_IOC_QUOTA_RESCAN_STATUS:  		return btrfs_ioctl_quota_rescan_status(fs_info, argp);  	case BTRFS_IOC_QUOTA_RESCAN_WAIT: -		return btrfs_ioctl_quota_rescan_wait(fs_info, argp); +		return btrfs_ioctl_quota_rescan_wait(fs_info);  	case BTRFS_IOC_DEV_REPLACE:  		return btrfs_ioctl_dev_replace(fs_info, argp);  	case BTRFS_IOC_GET_SUPPORTED_FEATURES: @@ -4831,6 +5344,8 @@ long btrfs_ioctl(struct file *file, unsigned int  		return fsverity_ioctl_enable(file, (const void __user *)argp);  	case FS_IOC_MEASURE_VERITY:  		return fsverity_ioctl_measure(file, argp); +	case FS_IOC_READ_VERITY_METADATA: +		return fsverity_ioctl_read_metadata(file, argp);  	case BTRFS_IOC_ENCODED_READ:  		return btrfs_ioctl_encoded_read(file, argp, false);  	case BTRFS_IOC_ENCODED_WRITE: @@ -4841,6 +5356,8 @@ long btrfs_ioctl(struct file *file, unsigned int  	case BTRFS_IOC_ENCODED_WRITE_32:  		return btrfs_ioctl_encoded_write(file, argp, true);  #endif +	case BTRFS_IOC_SUBVOL_SYNC_WAIT: +		return btrfs_ioctl_subvol_sync(fs_info, argp);  	}  	return -ENOTTY; | 
