diff options
30 files changed, 673 insertions, 238 deletions
| diff --git a/Documentation/block/ublk.rst b/Documentation/block/ublk.rst index 1e0e7358e14a..854f823b46c2 100644 --- a/Documentation/block/ublk.rst +++ b/Documentation/block/ublk.rst @@ -309,18 +309,35 @@ with specified IO tag in the command data:    ``UBLK_IO_COMMIT_AND_FETCH_REQ`` to the server, ublkdrv needs to copy    the server buffer (pages) read to the IO request pages. -Future development -================== -  Zero copy  --------- -Zero copy is a generic requirement for nbd, fuse or similar drivers. A -problem [#xiaoguang]_ Xiaoguang mentioned is that pages mapped to userspace -can't be remapped any more in kernel with existing mm interfaces. This can -occurs when destining direct IO to ``/dev/ublkb*``. Also, he reported that -big requests (IO size >= 256 KB) may benefit a lot from zero copy. - +ublk zero copy relies on io_uring's fixed kernel buffer, which provides +two APIs: `io_buffer_register_bvec()` and `io_buffer_unregister_bvec`. + +ublk adds IO command of `UBLK_IO_REGISTER_IO_BUF` to call +`io_buffer_register_bvec()` for ublk server to register client request +buffer into io_uring buffer table, then ublk server can submit io_uring +IOs with the registered buffer index. IO command of `UBLK_IO_UNREGISTER_IO_BUF` +calls `io_buffer_unregister_bvec()` to unregister the buffer, which is +guaranteed to be live between calling `io_buffer_register_bvec()` and +`io_buffer_unregister_bvec()`. Any io_uring operation which supports this +kind of kernel buffer will grab one reference of the buffer until the +operation is completed. + +ublk server implementing zero copy or user copy has to be CAP_SYS_ADMIN and +be trusted, because it is ublk server's responsibility to make sure IO buffer +filled with data for handling read command, and ublk server has to return +correct result to ublk driver when handling READ command, and the result +has to match with how many bytes filled to the IO buffer. Otherwise, +uninitialized kernel IO buffer will be exposed to client application. + +ublk server needs to align the parameter of `struct ublk_param_dma_align` +with backend for zero copy to work correctly. + +For reaching best IO performance, ublk server should align its segment +parameter of `struct ublk_param_segment` with backend for avoiding +unnecessary IO split, which usually hurts io_uring performance.  References  ========== @@ -332,5 +349,3 @@ References  .. [#userspace_nbdublk] https://gitlab.com/rwmjones/libnbd/-/tree/nbdublk  .. [#userspace_readme] https://github.com/ming1/ubdsrv/blob/master/README - -.. [#xiaoguang] https://lore.kernel.org/linux-block/YoOr6jBfgVm8GvWg@stefanha-x1.localdomain/ diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c index c060da409ed8..2fd05c1bd30b 100644 --- a/drivers/block/ublk_drv.c +++ b/drivers/block/ublk_drv.c @@ -74,13 +74,30 @@  #define UBLK_PARAM_TYPE_ALL                                \  	(UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD | \  	 UBLK_PARAM_TYPE_DEVT | UBLK_PARAM_TYPE_ZONED |    \ -	 UBLK_PARAM_TYPE_DMA_ALIGN) +	 UBLK_PARAM_TYPE_DMA_ALIGN | UBLK_PARAM_TYPE_SEGMENT)  struct ublk_rq_data {  	struct kref ref;  };  struct ublk_uring_cmd_pdu { +	/* +	 * Store requests in same batch temporarily for queuing them to +	 * daemon context. +	 * +	 * It should have been stored to request payload, but we do want +	 * to avoid extra pre-allocation, and uring_cmd payload is always +	 * free for us +	 */ +	union { +		struct request *req; +		struct request *req_list; +	}; + +	/* +	 * The following two are valid in this cmd whole lifetime, and +	 * setup in ublk uring_cmd handler +	 */  	struct ublk_queue *ubq;  	u16 tag;  }; @@ -141,10 +158,8 @@ struct ublk_queue {  	unsigned long flags;  	struct task_struct	*ubq_daemon; -	char *io_cmd_buf; +	struct ublksrv_io_desc *io_cmd_buf; -	unsigned long io_addr;	/* mapped vm address */ -	unsigned int max_io_sz;  	bool force_abort;  	bool timeout;  	bool canceling; @@ -582,6 +597,18 @@ static int ublk_validate_params(const struct ublk_device *ub)  			return -EINVAL;  	} +	if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) { +		const struct ublk_param_segment *p = &ub->params.seg; + +		if (!is_power_of_2(p->seg_boundary_mask + 1)) +			return -EINVAL; + +		if (p->seg_boundary_mask + 1 < UBLK_MIN_SEGMENT_SIZE) +			return -EINVAL; +		if (p->max_segment_size < UBLK_MIN_SEGMENT_SIZE) +			return -EINVAL; +	} +  	return 0;  } @@ -598,6 +625,11 @@ static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)  	return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY);  } +static inline bool ublk_need_map_io(const struct ublk_queue *ubq) +{ +	return !ublk_support_user_copy(ubq); +} +  static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)  {  	/* @@ -674,11 +706,11 @@ static inline bool ublk_rq_has_data(const struct request *rq)  static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,  		int tag)  { -	return (struct ublksrv_io_desc *) -		&(ubq->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]); +	return &ubq->io_cmd_buf[tag];  } -static inline char *ublk_queue_cmd_buf(struct ublk_device *ub, int q_id) +static inline struct ublksrv_io_desc * +ublk_queue_cmd_buf(struct ublk_device *ub, int q_id)  {  	return ublk_get_queue(ub, q_id)->io_cmd_buf;  } @@ -925,7 +957,7 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,  {  	const unsigned int rq_bytes = blk_rq_bytes(req); -	if (ublk_support_user_copy(ubq)) +	if (!ublk_need_map_io(ubq))  		return rq_bytes;  	/* @@ -949,7 +981,7 @@ static int ublk_unmap_io(const struct ublk_queue *ubq,  {  	const unsigned int rq_bytes = blk_rq_bytes(req); -	if (ublk_support_user_copy(ubq)) +	if (!ublk_need_map_io(ubq))  		return rq_bytes;  	if (ublk_need_unmap_req(req)) { @@ -1037,7 +1069,7 @@ static blk_status_t ublk_setup_iod(struct ublk_queue *ubq, struct request *req)  static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu(  		struct io_uring_cmd *ioucmd)  { -	return (struct ublk_uring_cmd_pdu *)&ioucmd->pdu; +	return io_uring_cmd_to_pdu(ioucmd, struct ublk_uring_cmd_pdu);  }  static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq) @@ -1155,14 +1187,11 @@ static inline void __ublk_abort_rq(struct ublk_queue *ubq,  		blk_mq_end_request(rq, BLK_STS_IOERR);  } -static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd, -				 unsigned int issue_flags) +static void ublk_dispatch_req(struct ublk_queue *ubq, +			      struct request *req, +			      unsigned int issue_flags)  { -	struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); -	struct ublk_queue *ubq = pdu->ubq; -	int tag = pdu->tag; -	struct request *req = blk_mq_tag_to_rq( -		ubq->dev->tag_set.tags[ubq->q_id], tag); +	int tag = req->tag;  	struct ublk_io *io = &ubq->ios[tag];  	unsigned int mapped_bytes; @@ -1237,11 +1266,49 @@ static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd,  	ubq_complete_io_cmd(io, UBLK_IO_RES_OK, issue_flags);  } +static void ublk_cmd_tw_cb(struct io_uring_cmd *cmd, +			   unsigned int issue_flags) +{ +	struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); +	struct ublk_queue *ubq = pdu->ubq; + +	ublk_dispatch_req(ubq, pdu->req, issue_flags); +} +  static void ublk_queue_cmd(struct ublk_queue *ubq, struct request *rq)  { -	struct ublk_io *io = &ubq->ios[rq->tag]; +	struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd; +	struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); + +	pdu->req = rq; +	io_uring_cmd_complete_in_task(cmd, ublk_cmd_tw_cb); +} + +static void ublk_cmd_list_tw_cb(struct io_uring_cmd *cmd, +		unsigned int issue_flags) +{ +	struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); +	struct request *rq = pdu->req_list; +	struct ublk_queue *ubq = pdu->ubq; +	struct request *next; + +	do { +		next = rq->rq_next; +		rq->rq_next = NULL; +		ublk_dispatch_req(ubq, rq, issue_flags); +		rq = next; +	} while (rq); +} + +static void ublk_queue_cmd_list(struct ublk_queue *ubq, struct rq_list *l) +{ +	struct request *rq = rq_list_peek(l); +	struct io_uring_cmd *cmd = ubq->ios[rq->tag].cmd; +	struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd); -	io_uring_cmd_complete_in_task(io->cmd, ublk_rq_task_work_cb); +	pdu->req_list = rq; +	rq_list_init(l); +	io_uring_cmd_complete_in_task(cmd, ublk_cmd_list_tw_cb);  }  static enum blk_eh_timer_return ublk_timeout(struct request *rq) @@ -1282,21 +1349,12 @@ static enum blk_eh_timer_return ublk_timeout(struct request *rq)  	return BLK_EH_RESET_TIMER;  } -static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, -		const struct blk_mq_queue_data *bd) +static blk_status_t ublk_prep_req(struct ublk_queue *ubq, struct request *rq)  { -	struct ublk_queue *ubq = hctx->driver_data; -	struct request *rq = bd->rq;  	blk_status_t res; -	if (unlikely(ubq->fail_io)) { +	if (unlikely(ubq->fail_io))  		return BLK_STS_TARGET; -	} - -	/* fill iod to slot in io cmd buffer */ -	res = ublk_setup_iod(ubq, rq); -	if (unlikely(res != BLK_STS_OK)) -		return BLK_STS_IOERR;  	/* With recovery feature enabled, force_abort is set in  	 * ublk_stop_dev() before calling del_gendisk(). We have to @@ -1310,17 +1368,68 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,  	if (ublk_nosrv_should_queue_io(ubq) && unlikely(ubq->force_abort))  		return BLK_STS_IOERR; +	if (unlikely(ubq->canceling)) +		return BLK_STS_IOERR; + +	/* fill iod to slot in io cmd buffer */ +	res = ublk_setup_iod(ubq, rq); +	if (unlikely(res != BLK_STS_OK)) +		return BLK_STS_IOERR; + +	blk_mq_start_request(rq); +	return BLK_STS_OK; +} + +static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx, +		const struct blk_mq_queue_data *bd) +{ +	struct ublk_queue *ubq = hctx->driver_data; +	struct request *rq = bd->rq; +	blk_status_t res; + +	res = ublk_prep_req(ubq, rq); +	if (res != BLK_STS_OK) +		return res; + +	/* +	 * ->canceling has to be handled after ->force_abort and ->fail_io +	 * is dealt with, otherwise this request may not be failed in case +	 * of recovery, and cause hang when deleting disk +	 */  	if (unlikely(ubq->canceling)) {  		__ublk_abort_rq(ubq, rq);  		return BLK_STS_OK;  	} -	blk_mq_start_request(bd->rq);  	ublk_queue_cmd(ubq, rq); -  	return BLK_STS_OK;  } +static void ublk_queue_rqs(struct rq_list *rqlist) +{ +	struct rq_list requeue_list = { }; +	struct rq_list submit_list = { }; +	struct ublk_queue *ubq = NULL; +	struct request *req; + +	while ((req = rq_list_pop(rqlist))) { +		struct ublk_queue *this_q = req->mq_hctx->driver_data; + +		if (ubq && ubq != this_q && !rq_list_empty(&submit_list)) +			ublk_queue_cmd_list(ubq, &submit_list); +		ubq = this_q; + +		if (ublk_prep_req(ubq, req) == BLK_STS_OK) +			rq_list_add_tail(&submit_list, req); +		else +			rq_list_add_tail(&requeue_list, req); +	} + +	if (ubq && !rq_list_empty(&submit_list)) +		ublk_queue_cmd_list(ubq, &submit_list); +	*rqlist = requeue_list; +} +  static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,  		unsigned int hctx_idx)  { @@ -1333,6 +1442,7 @@ static int ublk_init_hctx(struct blk_mq_hw_ctx *hctx, void *driver_data,  static const struct blk_mq_ops ublk_mq_ops = {  	.queue_rq       = ublk_queue_rq, +	.queue_rqs      = ublk_queue_rqs,  	.init_hctx	= ublk_init_hctx,  	.timeout	= ublk_timeout,  }; @@ -1446,17 +1556,27 @@ static void ublk_abort_queue(struct ublk_device *ub, struct ublk_queue *ubq)  	}  } +/* Must be called when queue is frozen */ +static bool ublk_mark_queue_canceling(struct ublk_queue *ubq) +{ +	bool canceled; + +	spin_lock(&ubq->cancel_lock); +	canceled = ubq->canceling; +	if (!canceled) +		ubq->canceling = true; +	spin_unlock(&ubq->cancel_lock); + +	return canceled; +} +  static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq)  { +	bool was_canceled = ubq->canceling;  	struct gendisk *disk; -	spin_lock(&ubq->cancel_lock); -	if (ubq->canceling) { -		spin_unlock(&ubq->cancel_lock); +	if (was_canceled)  		return false; -	} -	ubq->canceling = true; -	spin_unlock(&ubq->cancel_lock);  	spin_lock(&ub->lock);  	disk = ub->ub_disk; @@ -1468,14 +1588,23 @@ static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq)  	if (!disk)  		return false; -	/* Now we are serialized with ublk_queue_rq() */ +	/* +	 * Now we are serialized with ublk_queue_rq() +	 * +	 * Make sure that ubq->canceling is set when queue is frozen, +	 * because ublk_queue_rq() has to rely on this flag for avoiding to +	 * touch completed uring_cmd +	 */  	blk_mq_quiesce_queue(disk->queue); -	/* abort queue is for making forward progress */ -	ublk_abort_queue(ub, ubq); +	was_canceled = ublk_mark_queue_canceling(ubq); +	if (!was_canceled) { +		/* abort queue is for making forward progress */ +		ublk_abort_queue(ub, ubq); +	}  	blk_mq_unquiesce_queue(disk->queue);  	put_device(disk_to_dev(disk)); -	return true; +	return !was_canceled;  }  static void ublk_cancel_cmd(struct ublk_queue *ubq, struct ublk_io *io, @@ -1845,7 +1974,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,  		if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)  			goto out; -		if (!ublk_support_user_copy(ubq)) { +		if (ublk_need_map_io(ubq)) {  			/*  			 * FETCH_RQ has to provide IO buffer if NEED GET  			 * DATA is not enabled @@ -1867,7 +1996,7 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,  		if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))  			goto out; -		if (!ublk_support_user_copy(ubq)) { +		if (ublk_need_map_io(ubq)) {  			/*  			 * COMMIT_AND_FETCH_REQ has to provide IO buffer if  			 * NEED GET DATA is not enabled or it is Read IO. @@ -2343,6 +2472,12 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)  	if (ub->params.types & UBLK_PARAM_TYPE_DMA_ALIGN)  		lim.dma_alignment = ub->params.dma.alignment; +	if (ub->params.types & UBLK_PARAM_TYPE_SEGMENT) { +		lim.seg_boundary_mask = ub->params.seg.seg_boundary_mask; +		lim.max_segment_size = ub->params.seg.max_segment_size; +		lim.max_segments = ub->params.seg.max_segments; +	} +  	if (wait_for_completion_interruptible(&ub->completion) != 0)  		return -EINTR; diff --git a/include/linux/bvec.h b/include/linux/bvec.h index ba8f52d48b94..204b22a99c4b 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -184,6 +184,12 @@ static inline void bvec_iter_advance_single(const struct bio_vec *bv,  		((bvl = bvec_iter_bvec((bio_vec), (iter))), 1);	\  	     bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) +#define for_each_mp_bvec(bvl, bio_vec, iter, start)			\ +	for (iter = (start);						\ +	     (iter).bi_size &&						\ +		((bvl = mp_bvec_iter_bvec((bio_vec), (iter))), 1);	\ +	     bvec_iter_advance_single((bio_vec), &(iter), (bvl).bv_len)) +  /* for iterating one bio from start to end */  #define BVEC_ITER_ALL_INIT (struct bvec_iter)				\  {									\ diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index e6723fa95160..0634a3de1782 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -21,7 +21,6 @@ struct io_uring_cmd {  struct io_uring_cmd_data {  	void			*op_data; -	struct io_uring_sqe	sqes[2];  };  static inline const void *io_uring_sqe_cmd(const struct io_uring_sqe *sqe) diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h index 7255b36b5cf6..583b86681c93 100644 --- a/include/uapi/linux/ublk_cmd.h +++ b/include/uapi/linux/ublk_cmd.h @@ -410,6 +410,29 @@ struct ublk_param_dma_align {  	__u8	pad[4];  }; +#define UBLK_MIN_SEGMENT_SIZE   4096 +/* + * If any one of the three segment parameter is set as 0, the behavior is + * undefined. + */ +struct ublk_param_segment { +	/* +	 * seg_boundary_mask + 1 needs to be power_of_2(), and the sum has +	 * to be >= UBLK_MIN_SEGMENT_SIZE(4096) +	 */ +	__u64 	seg_boundary_mask; + +	/* +	 * max_segment_size could be override by virt_boundary_mask, so be +	 * careful when setting both. +	 * +	 * max_segment_size has to be >= UBLK_MIN_SEGMENT_SIZE(4096) +	 */ +	__u32 	max_segment_size; +	__u16 	max_segments; +	__u8	pad[2]; +}; +  struct ublk_params {  	/*  	 * Total length of parameters, userspace has to set 'len' for both @@ -423,6 +446,7 @@ struct ublk_params {  #define UBLK_PARAM_TYPE_DEVT            (1 << 2)  #define UBLK_PARAM_TYPE_ZONED           (1 << 3)  #define UBLK_PARAM_TYPE_DMA_ALIGN       (1 << 4) +#define UBLK_PARAM_TYPE_SEGMENT         (1 << 5)  	__u32	types;			/* types of parameter included */  	struct ublk_param_basic		basic; @@ -430,6 +454,7 @@ struct ublk_params {  	struct ublk_param_devt		devt;  	struct ublk_param_zoned	zoned;  	struct ublk_param_dma_align	dma; +	struct ublk_param_segment	seg;  };  #endif diff --git a/io_uring/Kconfig b/io_uring/Kconfig index 9e2a4beba1ef..4b949c42c0bf 100644 --- a/io_uring/Kconfig +++ b/io_uring/Kconfig @@ -5,6 +5,7 @@  config IO_URING_ZCRX  	def_bool y +	depends on IO_URING  	depends on PAGE_POOL  	depends on INET  	depends on NET_RX_BUSY_POLL diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 3ba49c628337..c6209fe44cb1 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1141,10 +1141,9 @@ void tctx_task_work(struct callback_head *cb)  	WARN_ON_ONCE(ret);  } -static inline void io_req_local_work_add(struct io_kiocb *req, -					 struct io_ring_ctx *ctx, -					 unsigned flags) +static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)  { +	struct io_ring_ctx *ctx = req->ctx;  	unsigned nr_wait, nr_tw, nr_tw_prev;  	struct llist_node *head; @@ -1239,17 +1238,16 @@ static void io_req_normal_work_add(struct io_kiocb *req)  void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)  {  	if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) -		io_req_local_work_add(req, req->ctx, flags); +		io_req_local_work_add(req, flags);  	else  		io_req_normal_work_add(req);  } -void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx, -				 unsigned flags) +void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags)  { -	if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))) +	if (WARN_ON_ONCE(!(req->ctx->flags & IORING_SETUP_DEFER_TASKRUN)))  		return; -	io_req_local_work_add(req, ctx, flags); +	__io_req_task_work_add(req, flags);  }  static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx) @@ -1645,6 +1643,8 @@ io_req_flags_t io_file_get_flags(struct file *file)  {  	io_req_flags_t res = 0; +	BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1); +  	if (S_ISREG(file_inode(file)->i_mode))  		res |= REQ_F_ISREG;  	if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT)) @@ -1796,7 +1796,7 @@ struct io_wq_work *io_wq_free_work(struct io_wq_work *work)  	struct io_kiocb *req = container_of(work, struct io_kiocb, work);  	struct io_kiocb *nxt = NULL; -	if (req_ref_put_and_test(req)) { +	if (req_ref_put_and_test_atomic(req)) {  		if (req->flags & IO_REQ_LINK_FLAGS)  			nxt = io_req_find_next(req);  		io_free_req(req); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 87f883130286..e4050b2d0821 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -89,8 +89,7 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,  			       unsigned issue_flags);  void __io_req_task_work_add(struct io_kiocb *req, unsigned flags); -void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx, -				 unsigned flags); +void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags);  void io_req_task_queue(struct io_kiocb *req);  void io_req_task_complete(struct io_kiocb *req, io_tw_token_t tw);  void io_req_task_queue_fail(struct io_kiocb *req, int ret); diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 0bbcbbcdebfd..50a958e9c921 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -38,8 +38,8 @@ static void io_double_unlock_ctx(struct io_ring_ctx *octx)  	mutex_unlock(&octx->uring_lock);  } -static int io_double_lock_ctx(struct io_ring_ctx *octx, -			      unsigned int issue_flags) +static int io_lock_external_ctx(struct io_ring_ctx *octx, +				unsigned int issue_flags)  {  	/*  	 * To ensure proper ordering between the two ctxs, we can only @@ -93,13 +93,14 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,  		kmem_cache_free(req_cachep, req);  		return -EOWNERDEAD;  	} +	req->opcode = IORING_OP_NOP;  	req->cqe.user_data = user_data;  	io_req_set_res(req, res, cflags);  	percpu_ref_get(&ctx->refs);  	req->ctx = ctx;  	req->tctx = NULL;  	req->io_task_work.func = io_msg_tw_complete; -	io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE); +	io_req_task_work_add_remote(req, IOU_F_TWQ_LAZY_WAKE);  	return 0;  } @@ -154,7 +155,7 @@ static int __io_msg_ring_data(struct io_ring_ctx *target_ctx,  	ret = -EOVERFLOW;  	if (target_ctx->flags & IORING_SETUP_IOPOLL) { -		if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) +		if (unlikely(io_lock_external_ctx(target_ctx, issue_flags)))  			return -EAGAIN;  	}  	if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) @@ -199,7 +200,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag  	struct file *src_file = msg->src_file;  	int ret; -	if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) +	if (unlikely(io_lock_external_ctx(target_ctx, issue_flags)))  		return -EAGAIN;  	ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); diff --git a/io_uring/net.c b/io_uring/net.c index 8944eb679024..24040bc3916a 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -97,6 +97,11 @@ struct io_recvzc {  	struct io_zcrx_ifq		*ifq;  }; +static int io_sg_from_iter_iovec(struct sk_buff *skb, +				 struct iov_iter *from, size_t length); +static int io_sg_from_iter(struct sk_buff *skb, +			   struct iov_iter *from, size_t length); +  int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  {  	struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); @@ -176,16 +181,6 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)  	return hdr;  } -/* assign new iovec to kmsg, if we need to */ -static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, -			     struct iovec *iov) -{ -	if (iov) { -		req->flags |= REQ_F_NEED_CLEANUP; -		io_vec_reset_iovec(&kmsg->vec, iov, kmsg->msg.msg_iter.nr_segs); -	} -} -  static inline void io_mshot_prep_retry(struct io_kiocb *req,  				       struct io_async_msghdr *kmsg)  { @@ -217,7 +212,11 @@ static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg  			     &iomsg->msg.msg_iter, io_is_compat(req->ctx));  	if (unlikely(ret < 0))  		return ret; -	io_net_vec_assign(req, iomsg, iov); + +	if (iov) { +		req->flags |= REQ_F_NEED_CLEANUP; +		io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs); +	}  	return 0;  } @@ -325,25 +324,6 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,  	return 0;  } -static int io_sendmsg_copy_hdr(struct io_kiocb *req, -			       struct io_async_msghdr *iomsg) -{ -	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); -	struct user_msghdr msg; -	int ret; - -	ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE, NULL); -	if (unlikely(ret)) -		return ret; - -	if (!(req->flags & REQ_F_BUFFER_SELECT)) -		ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen, -					ITER_SOURCE); -	/* save msg_control as sys_sendmsg() overwrites it */ -	sr->msg_control = iomsg->msg.msg_control_user; -	return ret; -} -  void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)  {  	struct io_async_msghdr *io = req->async_data; @@ -379,6 +359,8 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)  		kmsg->msg.msg_name = &kmsg->addr;  		kmsg->msg.msg_namelen = addr_len;  	} +	if (sr->flags & IORING_RECVSEND_FIXED_BUF) +		return 0;  	if (!io_do_buffer_select(req)) {  		ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,  				  &kmsg->msg.msg_iter); @@ -392,31 +374,24 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe  {  	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);  	struct io_async_msghdr *kmsg = req->async_data; - -	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); - -	return io_sendmsg_copy_hdr(req, kmsg); -} - -static int io_sendmsg_zc_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ -	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); -	struct io_async_msghdr *kmsg = req->async_data;  	struct user_msghdr msg;  	int ret; -	if (!(sr->flags & IORING_RECVSEND_FIXED_BUF)) -		return io_sendmsg_setup(req, sqe); -  	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); -  	ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL);  	if (unlikely(ret))  		return ret; +	/* save msg_control as sys_sendmsg() overwrites it */  	sr->msg_control = kmsg->msg.msg_control_user; -	kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; -	return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, msg.msg_iovlen); +	if (sr->flags & IORING_RECVSEND_FIXED_BUF) { +		kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; +		return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, +					 msg.msg_iovlen); +	} +	if (req->flags & REQ_F_BUFFER_SELECT) +		return 0; +	return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE);  }  #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) @@ -427,12 +402,6 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	sr->done_io = 0;  	sr->retry = false; - -	if (req->opcode != IORING_OP_SEND) { -		if (sqe->addr2 || sqe->file_index) -			return -EINVAL; -	} -  	sr->len = READ_ONCE(sqe->len);  	sr->flags = READ_ONCE(sqe->ioprio);  	if (sr->flags & ~SENDMSG_FLAGS) @@ -458,6 +427,8 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  		return -ENOMEM;  	if (req->opcode != IORING_OP_SENDMSG)  		return io_send_setup(req, sqe); +	if (unlikely(sqe->addr2 || sqe->file_index)) +		return -EINVAL;  	return io_sendmsg_setup(req, sqe);  } @@ -1302,11 +1273,12 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  {  	struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);  	struct io_ring_ctx *ctx = req->ctx; +	struct io_async_msghdr *iomsg;  	struct io_kiocb *notif; +	int ret;  	zc->done_io = 0;  	zc->retry = false; -	req->flags |= REQ_F_POLL_NO_LAZY;  	if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))  		return -EINVAL; @@ -1320,7 +1292,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	notif->cqe.user_data = req->cqe.user_data;  	notif->cqe.res = 0;  	notif->cqe.flags = IORING_CQE_F_NOTIF; -	req->flags |= REQ_F_NEED_CLEANUP; +	req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY;  	zc->flags = READ_ONCE(sqe->ioprio);  	if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { @@ -1335,11 +1307,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  		}  	} -	if (req->opcode != IORING_OP_SEND_ZC) { -		if (unlikely(sqe->addr2 || sqe->file_index)) -			return -EINVAL; -	} -  	zc->len = READ_ONCE(sqe->len);  	zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;  	req->buf_index = READ_ONCE(sqe->buf_index); @@ -1349,13 +1316,28 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	if (io_is_compat(req->ctx))  		zc->msg_flags |= MSG_CMSG_COMPAT; -	if (unlikely(!io_msg_alloc_async(req))) +	iomsg = io_msg_alloc_async(req); +	if (unlikely(!iomsg))  		return -ENOMEM; +  	if (req->opcode == IORING_OP_SEND_ZC) { -		req->flags |= REQ_F_IMPORT_BUFFER; -		return io_send_setup(req, sqe); +		if (zc->flags & IORING_RECVSEND_FIXED_BUF) +			req->flags |= REQ_F_IMPORT_BUFFER; +		ret = io_send_setup(req, sqe); +	} else { +		if (unlikely(sqe->addr2 || sqe->file_index)) +			return -EINVAL; +		ret = io_sendmsg_setup(req, sqe); +	} +	if (unlikely(ret)) +		return ret; + +	if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { +		iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; +		return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count);  	} -	return io_sendmsg_zc_setup(req, sqe); +	iomsg->msg.sg_from_iter = io_sg_from_iter; +	return 0;  }  static int io_sg_from_iter_iovec(struct sk_buff *skb, @@ -1412,27 +1394,13 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)  {  	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);  	struct io_async_msghdr *kmsg = req->async_data; -	int ret; -	if (sr->flags & IORING_RECVSEND_FIXED_BUF) { -		sr->notif->buf_index = req->buf_index; -		ret = io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter, -					(u64)(uintptr_t)sr->buf, sr->len, -					ITER_SOURCE, issue_flags); -		if (unlikely(ret)) -			return ret; -		kmsg->msg.sg_from_iter = io_sg_from_iter; -	} else { -		ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); -		if (unlikely(ret)) -			return ret; -		ret = io_notif_account_mem(sr->notif, sr->len); -		if (unlikely(ret)) -			return ret; -		kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; -	} +	WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF)); -	return ret; +	sr->notif->buf_index = req->buf_index; +	return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter, +				(u64)(uintptr_t)sr->buf, sr->len, +				ITER_SOURCE, issue_flags);  }  int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) @@ -1513,8 +1481,6 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)  	unsigned flags;  	int ret, min_ret = 0; -	kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; -  	if (req->flags & REQ_F_IMPORT_BUFFER) {  		unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs;  		int ret; @@ -1523,7 +1489,6 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)  					&kmsg->vec, uvec_segs, issue_flags);  		if (unlikely(ret))  			return ret; -		kmsg->msg.sg_from_iter = io_sg_from_iter;  		req->flags &= ~REQ_F_IMPORT_BUFFER;  	} diff --git a/io_uring/refs.h b/io_uring/refs.h index 63982ead9f7d..0d928d87c4ed 100644 --- a/io_uring/refs.h +++ b/io_uring/refs.h @@ -17,6 +17,13 @@ static inline bool req_ref_inc_not_zero(struct io_kiocb *req)  	return atomic_inc_not_zero(&req->refs);  } +static inline bool req_ref_put_and_test_atomic(struct io_kiocb *req) +{ +	WARN_ON_ONCE(!(data_race(req->flags) & REQ_F_REFCOUNT)); +	WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); +	return atomic_dec_and_test(&req->refs); +} +  static inline bool req_ref_put_and_test(struct io_kiocb *req)  {  	if (likely(!(req->flags & REQ_F_REFCOUNT))) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 3f195e24777e..5e64a8bb30a4 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1002,20 +1002,33 @@ unlock:  }  EXPORT_SYMBOL_GPL(io_buffer_unregister_bvec); -static int io_import_fixed(int ddir, struct iov_iter *iter, -			   struct io_mapped_ubuf *imu, -			   u64 buf_addr, size_t len) +static int validate_fixed_range(u64 buf_addr, size_t len, +				const struct io_mapped_ubuf *imu)  {  	u64 buf_end; -	size_t offset; -	if (WARN_ON_ONCE(!imu)) -		return -EFAULT;  	if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end)))  		return -EFAULT;  	/* not inside the mapped region */  	if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len)))  		return -EFAULT; +	if (unlikely(len > MAX_RW_COUNT)) +		return -EFAULT; +	return 0; +} + +static int io_import_fixed(int ddir, struct iov_iter *iter, +			   struct io_mapped_ubuf *imu, +			   u64 buf_addr, size_t len) +{ +	size_t offset; +	int ret; + +	if (WARN_ON_ONCE(!imu)) +		return -EFAULT; +	ret = validate_fixed_range(buf_addr, len, imu); +	if (unlikely(ret)) +		return ret;  	if (!(imu->dir & (1 << ddir)))  		return -EFAULT; @@ -1305,12 +1318,12 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,  		u64 buf_addr = (u64)(uintptr_t)iovec[iov_idx].iov_base;  		struct bio_vec *src_bvec;  		size_t offset; -		u64 buf_end; +		int ret; + +		ret = validate_fixed_range(buf_addr, iov_len, imu); +		if (unlikely(ret)) +			return ret; -		if (unlikely(check_add_overflow(buf_addr, (u64)iov_len, &buf_end))) -			return -EFAULT; -		if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len))) -			return -EFAULT;  		if (unlikely(!iov_len))  			return -EFAULT;  		if (unlikely(check_add_overflow(total_len, iov_len, &total_len))) @@ -1349,6 +1362,82 @@ static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs,  	return max_segs;  } +static int io_vec_fill_kern_bvec(int ddir, struct iov_iter *iter, +				 struct io_mapped_ubuf *imu, +				 struct iovec *iovec, unsigned nr_iovs, +				 struct iou_vec *vec) +{ +	const struct bio_vec *src_bvec = imu->bvec; +	struct bio_vec *res_bvec = vec->bvec; +	unsigned res_idx = 0; +	size_t total_len = 0; +	unsigned iov_idx; + +	for (iov_idx = 0; iov_idx < nr_iovs; iov_idx++) { +		size_t offset = (size_t)(uintptr_t)iovec[iov_idx].iov_base; +		size_t iov_len = iovec[iov_idx].iov_len; +		struct bvec_iter bi = { +			.bi_size        = offset + iov_len, +		}; +		struct bio_vec bv; + +		bvec_iter_advance(src_bvec, &bi, offset); +		for_each_mp_bvec(bv, src_bvec, bi, bi) +			res_bvec[res_idx++] = bv; +		total_len += iov_len; +	} +	iov_iter_bvec(iter, ddir, res_bvec, res_idx, total_len); +	return 0; +} + +static int iov_kern_bvec_size(const struct iovec *iov, +			      const struct io_mapped_ubuf *imu, +			      unsigned int *nr_seg) +{ +	size_t offset = (size_t)(uintptr_t)iov->iov_base; +	const struct bio_vec *bvec = imu->bvec; +	int start = 0, i = 0; +	size_t off = 0; +	int ret; + +	ret = validate_fixed_range(offset, iov->iov_len, imu); +	if (unlikely(ret)) +		return ret; + +	for (i = 0; off < offset + iov->iov_len && i < imu->nr_bvecs; +			off += bvec[i].bv_len, i++) { +		if (offset >= off && offset < off + bvec[i].bv_len) +			start = i; +	} +	*nr_seg = i - start; +	return 0; +} + +static int io_kern_bvec_size(struct iovec *iov, unsigned nr_iovs, +			     struct io_mapped_ubuf *imu, unsigned *nr_segs) +{ +	unsigned max_segs = 0; +	size_t total_len = 0; +	unsigned i; +	int ret; + +	*nr_segs = 0; +	for (i = 0; i < nr_iovs; i++) { +		if (unlikely(!iov[i].iov_len)) +			return -EFAULT; +		if (unlikely(check_add_overflow(total_len, iov[i].iov_len, +						&total_len))) +			return -EOVERFLOW; +		ret = iov_kern_bvec_size(&iov[i], imu, &max_segs); +		if (unlikely(ret)) +			return ret; +		*nr_segs += max_segs; +	} +	if (total_len > MAX_RW_COUNT) +		return -EINVAL; +	return 0; +} +  int io_import_reg_vec(int ddir, struct iov_iter *iter,  			struct io_kiocb *req, struct iou_vec *vec,  			unsigned nr_iovs, unsigned issue_flags) @@ -1363,14 +1452,20 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter,  	if (!node)  		return -EFAULT;  	imu = node->buf; -	if (imu->is_kbuf) -		return -EOPNOTSUPP;  	if (!(imu->dir & (1 << ddir)))  		return -EFAULT;  	iovec_off = vec->nr - nr_iovs;  	iov = vec->iovec + iovec_off; -	nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu); + +	if (imu->is_kbuf) { +		int ret = io_kern_bvec_size(iov, nr_iovs, imu, &nr_segs); + +		if (unlikely(ret)) +			return ret; +	} else { +		nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu); +	}  	if (sizeof(struct bio_vec) > sizeof(struct iovec)) {  		size_t bvec_bytes; @@ -1397,6 +1492,9 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter,  		req->flags |= REQ_F_NEED_CLEANUP;  	} +	if (imu->is_kbuf) +		return io_vec_fill_kern_bvec(ddir, iter, imu, iov, nr_iovs, vec); +  	return io_vec_fill_bvec(ddir, iter, imu, iov, nr_iovs, vec);  } diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index f2cfc371f3d0..a9ea7d29cdd9 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -205,8 +205,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,  	 * that it doesn't read in per-op data, play it safe and ensure that  	 * any SQE data is stable beyond prep. This can later get relaxed.  	 */ -	memcpy(ac->data.sqes, sqe, uring_sqe_size(req->ctx)); -	ioucmd->sqe = ac->data.sqes; +	memcpy(ac->sqes, sqe, uring_sqe_size(req->ctx)); +	ioucmd->sqe = ac->sqes;  	return 0;  } @@ -307,17 +307,18 @@ static inline int io_uring_cmd_getsockopt(struct socket *sock,  					  struct io_uring_cmd *cmd,  					  unsigned int issue_flags)  { +	const struct io_uring_sqe *sqe = cmd->sqe;  	bool compat = !!(issue_flags & IO_URING_F_COMPAT);  	int optlen, optname, level, err;  	void __user *optval; -	level = READ_ONCE(cmd->sqe->level); +	level = READ_ONCE(sqe->level);  	if (level != SOL_SOCKET)  		return -EOPNOTSUPP; -	optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval)); -	optname = READ_ONCE(cmd->sqe->optname); -	optlen = READ_ONCE(cmd->sqe->optlen); +	optval = u64_to_user_ptr(READ_ONCE(sqe->optval)); +	optname = READ_ONCE(sqe->optname); +	optlen = READ_ONCE(sqe->optlen);  	err = do_sock_getsockopt(sock, compat, level, optname,  				 USER_SOCKPTR(optval), @@ -333,15 +334,16 @@ static inline int io_uring_cmd_setsockopt(struct socket *sock,  					  struct io_uring_cmd *cmd,  					  unsigned int issue_flags)  { +	const struct io_uring_sqe *sqe = cmd->sqe;  	bool compat = !!(issue_flags & IO_URING_F_COMPAT);  	int optname, optlen, level;  	void __user *optval;  	sockptr_t optval_s; -	optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval)); -	optname = READ_ONCE(cmd->sqe->optname); -	optlen = READ_ONCE(cmd->sqe->optlen); -	level = READ_ONCE(cmd->sqe->level); +	optval = u64_to_user_ptr(READ_ONCE(sqe->optval)); +	optname = READ_ONCE(sqe->optname); +	optlen = READ_ONCE(sqe->optlen); +	level = READ_ONCE(sqe->level);  	optval_s = USER_SOCKPTR(optval);  	return do_sock_setsockopt(sock, compat, level, optname, optval_s, diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h index 14e525255854..b04686b6b5d2 100644 --- a/io_uring/uring_cmd.h +++ b/io_uring/uring_cmd.h @@ -6,6 +6,7 @@  struct io_async_cmd {  	struct io_uring_cmd_data	data;  	struct iou_vec			vec; +	struct io_uring_sqe		sqes[2];  };  int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags); diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 9c95b5b6ec4e..80d4a6f71d29 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -818,6 +818,14 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,  	int ret = 0;  	len = min_t(size_t, len, desc->count); +	/* +	 * __tcp_read_sock() always calls io_zcrx_recv_skb one last time, even +	 * if desc->count is already 0. This is caused by the if (offset + 1 != +	 * skb->len) check. Return early in this case to break out of +	 * __tcp_read_sock(). +	 */ +	if (!len) +		return 0;  	if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))  		return -EAGAIN; diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile index 7817afe29005..c7781efea0f3 100644 --- a/tools/testing/selftests/ublk/Makefile +++ b/tools/testing/selftests/ublk/Makefile @@ -4,6 +4,8 @@ CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)  LDLIBS += -lpthread -lm -luring  TEST_PROGS := test_generic_01.sh +TEST_PROGS += test_generic_02.sh +TEST_PROGS += test_generic_03.sh  TEST_PROGS += test_null_01.sh  TEST_PROGS += test_null_02.sh @@ -11,8 +13,11 @@ TEST_PROGS += test_loop_01.sh  TEST_PROGS += test_loop_02.sh  TEST_PROGS += test_loop_03.sh  TEST_PROGS += test_loop_04.sh +TEST_PROGS += test_loop_05.sh  TEST_PROGS += test_stripe_01.sh  TEST_PROGS += test_stripe_02.sh +TEST_PROGS += test_stripe_03.sh +TEST_PROGS += test_stripe_04.sh  TEST_PROGS += test_stress_01.sh  TEST_PROGS += test_stress_02.sh diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 05147b53c361..91c282bc7674 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -99,7 +99,7 @@ static int __ublk_ctrl_cmd(struct ublk_dev *dev,  static int ublk_ctrl_stop_dev(struct ublk_dev *dev)  {  	struct ublk_ctrl_cmd_data data = { -		.cmd_op	= UBLK_CMD_STOP_DEV, +		.cmd_op	= UBLK_U_CMD_STOP_DEV,  	};  	return __ublk_ctrl_cmd(dev, &data); @@ -169,7 +169,7 @@ static int ublk_ctrl_get_params(struct ublk_dev *dev,  		struct ublk_params *params)  {  	struct ublk_ctrl_cmd_data data = { -		.cmd_op	= UBLK_CMD_GET_PARAMS, +		.cmd_op	= UBLK_U_CMD_GET_PARAMS,  		.flags	= CTRL_CMD_HAS_BUF,  		.addr = (__u64)params,  		.len = sizeof(*params), @@ -215,7 +215,7 @@ static void ublk_ctrl_dump(struct ublk_dev *dev)  	ret = ublk_ctrl_get_params(dev, &p);  	if (ret < 0) { -		ublk_err("failed to get params %m\n"); +		ublk_err("failed to get params %d %s\n", ret, strerror(-ret));  		return;  	} @@ -322,7 +322,7 @@ static int ublk_queue_init(struct ublk_queue *q)  	cmd_buf_size = ublk_queue_cmd_buf_sz(q);  	off = UBLKSRV_CMD_BUF_OFFSET + q->q_id * ublk_queue_max_cmd_buf_sz(); -	q->io_cmd_buf = (char *)mmap(0, cmd_buf_size, PROT_READ, +	q->io_cmd_buf = mmap(0, cmd_buf_size, PROT_READ,  			MAP_SHARED | MAP_POPULATE, dev->fds[0], off);  	if (q->io_cmd_buf == MAP_FAILED) {  		ublk_err("ublk dev %d queue %d map io_cmd_buf failed %m\n", diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h index f31a5c4d4143..760ff8ffb810 100644 --- a/tools/testing/selftests/ublk/kublk.h +++ b/tools/testing/selftests/ublk/kublk.h @@ -128,7 +128,7 @@ struct ublk_queue {  	unsigned int io_inflight;  	struct ublk_dev *dev;  	const struct ublk_tgt_ops *tgt_ops; -	char *io_cmd_buf; +	struct ublksrv_io_desc *io_cmd_buf;  	struct io_uring ring;  	struct ublk_io ios[UBLK_QUEUE_DEPTH];  #define UBLKSRV_QUEUE_STOPPING	(1U << 0) @@ -302,7 +302,7 @@ static inline void ublk_mark_io_done(struct ublk_io *io, int res)  static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag)  { -	return (struct ublksrv_io_desc *)&(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]); +	return &q->io_cmd_buf[tag];  }  static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op) diff --git a/tools/testing/selftests/ublk/null.c b/tools/testing/selftests/ublk/null.c index 899875ff50fe..91fec3690d4b 100644 --- a/tools/testing/selftests/ublk/null.c +++ b/tools/testing/selftests/ublk/null.c @@ -17,7 +17,8 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)  	dev->tgt.dev_size = dev_size;  	dev->tgt.params = (struct ublk_params) { -		.types = UBLK_PARAM_TYPE_BASIC, +		.types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN | +			UBLK_PARAM_TYPE_SEGMENT,  		.basic = {  			.logical_bs_shift	= 9,  			.physical_bs_shift	= 12, @@ -26,6 +27,14 @@ static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)  			.max_sectors		= info->max_io_buf_bytes >> 9,  			.dev_sectors		= dev_size >> 9,  		}, +		.dma = { +			.alignment 		= 4095, +		}, +		.seg = { +			.seg_boundary_mask 	= 4095, +			.max_segment_size 	= 32 << 10, +			.max_segments 		= 32, +		},  	};  	if (info->flags & UBLK_F_SUPPORT_ZERO_COPY) diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c index 98c564b12f3c..179731c3dd6f 100644 --- a/tools/testing/selftests/ublk/stripe.c +++ b/tools/testing/selftests/ublk/stripe.c @@ -111,43 +111,67 @@ static void calculate_stripe_array(const struct stripe_conf *conf,  	}  } -static inline enum io_uring_op stripe_to_uring_op(const struct ublksrv_io_desc *iod) +static inline enum io_uring_op stripe_to_uring_op( +		const struct ublksrv_io_desc *iod, int zc)  {  	unsigned ublk_op = ublksrv_get_op(iod);  	if (ublk_op == UBLK_IO_OP_READ) -		return IORING_OP_READV; +		return zc ? IORING_OP_READV_FIXED : IORING_OP_READV;  	else if (ublk_op == UBLK_IO_OP_WRITE) -		return IORING_OP_WRITEV; +		return zc ? IORING_OP_WRITEV_FIXED : IORING_OP_WRITEV;  	assert(0);  }  static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)  {  	const struct stripe_conf *conf = get_chunk_shift(q); -	enum io_uring_op op = stripe_to_uring_op(iod); +	int zc = !!(ublk_queue_use_zc(q) != 0); +	enum io_uring_op op = stripe_to_uring_op(iod, zc);  	struct io_uring_sqe *sqe[NR_STRIPE];  	struct stripe_array *s = alloc_stripe_array(conf, iod);  	struct ublk_io *io = ublk_get_io(q, tag); -	int i; +	int i, extra = zc ? 2 : 0;  	io->private_data = s;  	calculate_stripe_array(conf, iod, s); -	ublk_queue_alloc_sqes(q, sqe, s->nr); -	for (i = 0; i < s->nr; i++) { -		struct stripe *t = &s->s[i]; +	ublk_queue_alloc_sqes(q, sqe, s->nr + extra); + +	if (zc) { +		io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag); +		sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK; +		sqe[0]->user_data = build_user_data(tag, +			ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1); +	} + +	for (i = zc; i < s->nr + extra - zc; i++) { +		struct stripe *t = &s->s[i - zc];  		io_uring_prep_rw(op, sqe[i],  				t->seq + 1,  				(void *)t->vec,  				t->nr_vec,  				t->start << 9); -		io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); +		if (zc) { +			sqe[i]->buf_index = tag; +			io_uring_sqe_set_flags(sqe[i], +					IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK); +		} else { +			io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE); +		}  		/* bit63 marks us as tgt io */ -		sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i, 1); +		sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i - zc, 1); +	} +	if (zc) { +		struct io_uring_sqe *unreg = sqe[s->nr + 1]; + +		io_uring_prep_buf_unregister(unreg, 0, tag, q->q_id, tag); +		unreg->user_data = build_user_data(tag, ublk_cmd_op_nr(unreg->cmd_op), 0, 1);  	} -	return s->nr; + +	/* register buffer is skip_success */ +	return s->nr + zc;  }  static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag) @@ -208,19 +232,27 @@ static void ublk_stripe_io_done(struct ublk_queue *q, int tag,  	struct ublk_io *io = ublk_get_io(q, tag);  	int res = cqe->res; -	if (res < 0) { +	if (res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {  		if (!io->result)  			io->result = res; -		ublk_err("%s: io failure %d tag %u\n", __func__, res, tag); +		if (res < 0) +			ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);  	} +	/* buffer register op is IOSQE_CQE_SKIP_SUCCESS */ +	if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF)) +		io->tgt_ios += 1; +  	/* fail short READ/WRITE simply */  	if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) {  		unsigned seq = user_data_to_tgt_data(cqe->user_data);  		struct stripe_array *s = io->private_data; -		if (res < s->s[seq].vec->iov_len) +		if (res < s->s[seq].nr_sects << 9) {  			io->result = -EIO; +			ublk_err("%s: short rw op %u res %d exp %u tag %u\n", +					__func__, op, res, s->s[seq].vec->iov_len, tag); +		}  	}  	if (ublk_completed_tgt_io(q, tag)) { @@ -253,7 +285,7 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)  	struct stripe_conf *conf;  	unsigned chunk_shift;  	loff_t bytes = 0; -	int ret, i; +	int ret, i, mul = 1;  	if ((chunk_size & (chunk_size - 1)) || !chunk_size) {  		ublk_err("invalid chunk size %u\n", chunk_size); @@ -295,8 +327,11 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)  	dev->tgt.dev_size = bytes;  	p.basic.dev_sectors = bytes >> 9;  	dev->tgt.params = p; -	dev->tgt.sq_depth = dev->dev_info.queue_depth * conf->nr_files; -	dev->tgt.cq_depth = dev->dev_info.queue_depth * conf->nr_files; + +	if (dev->dev_info.flags & UBLK_F_SUPPORT_ZERO_COPY) +		mul = 2; +	dev->tgt.sq_depth = mul * dev->dev_info.queue_depth * conf->nr_files; +	dev->tgt.cq_depth = mul * dev->dev_info.queue_depth * conf->nr_files;  	printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files); diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh index 75f54ac6b1c4..a88b35943227 100755 --- a/tools/testing/selftests/ublk/test_common.sh +++ b/tools/testing/selftests/ublk/test_common.sh @@ -23,6 +23,12 @@ _get_disk_dev_t() {  	echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) ))  } +_run_fio_verify_io() { +	fio --name=verify --rw=randwrite --direct=1 --ioengine=libaio \ +		--bs=8k --iodepth=32 --verify=crc32c --do_verify=1 \ +		--verify_state_save=0 "$@" > /dev/null +} +  _create_backfile() {  	local my_size=$1  	local my_file diff --git a/tools/testing/selftests/ublk/test_generic_02.sh b/tools/testing/selftests/ublk/test_generic_02.sh new file mode 100755 index 000000000000..3e80121e3bf5 --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_02.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_02" +ERR_CODE=0 + +if ! _have_program bpftrace; then +	exit "$UBLK_SKIP_CODE" +fi + +_prep_test "null" "sequential io order for MQ" + +dev_id=$(_add_ublk_dev -t null -q 2) +_check_add_dev $TID $? + +dev_t=$(_get_disk_dev_t "$dev_id") +bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 & +btrace_pid=$! +sleep 2 + +if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then +	_cleanup_test "null" +	exit "$UBLK_SKIP_CODE" +fi + +# run fio over this ublk disk +fio --name=write_seq \ +    --filename=/dev/ublkb"${dev_id}" \ +    --ioengine=libaio --iodepth=16 \ +    --rw=write \ +    --size=512M \ +    --direct=1 \ +    --bs=4k > /dev/null 2>&1 +ERR_CODE=$? +kill "$btrace_pid" +wait +if grep -q "io_out_of_order" "$UBLK_TMP"; then +	cat "$UBLK_TMP" +	ERR_CODE=255 +fi +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_generic_03.sh b/tools/testing/selftests/ublk/test_generic_03.sh new file mode 100755 index 000000000000..b551aa76cb0d --- /dev/null +++ b/tools/testing/selftests/ublk/test_generic_03.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="generic_03" +ERR_CODE=0 + +_prep_test "null" "check dma & segment limits for zero copy" + +dev_id=$(_add_ublk_dev -t null -z) +_check_add_dev $TID $? + +sysfs_path=/sys/block/ublkb"${dev_id}" +dma_align=$(cat "$sysfs_path"/queue/dma_alignment) +max_segments=$(cat "$sysfs_path"/queue/max_segments) +max_segment_size=$(cat "$sysfs_path"/queue/max_segment_size) +if [ "$dma_align" != "4095" ]; then +	ERR_CODE=255 +fi +if [ "$max_segments" != "32" ]; then +	ERR_CODE=255 +fi +if [ "$max_segment_size" != "32768" ]; then +	ERR_CODE=255 +fi +_cleanup_test "null" +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh index c882d2a08e13..1ef8b6044777 100755 --- a/tools/testing/selftests/ublk/test_loop_01.sh +++ b/tools/testing/selftests/ublk/test_loop_01.sh @@ -6,6 +6,10 @@  TID="loop_01"  ERR_CODE=0 +if ! _have_program fio; then +	exit "$UBLK_SKIP_CODE" +fi +  _prep_test "loop" "write and verify test"  backfile_0=$(_create_backfile 256M) @@ -14,15 +18,7 @@ dev_id=$(_add_ublk_dev -t loop "$backfile_0")  _check_add_dev $TID $? "${backfile_0}"  # run fio over the ublk disk -fio --name=write_and_verify \ -    --filename=/dev/ublkb"${dev_id}" \ -    --ioengine=libaio --iodepth=16 \ -    --rw=write \ -    --size=256M \ -    --direct=1 \ -    --verify=crc32c \ -    --do_verify=1 \ -    --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M  ERR_CODE=$?  _cleanup_test "loop" diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh index 269c96787d7d..e9ca744de8b1 100755 --- a/tools/testing/selftests/ublk/test_loop_03.sh +++ b/tools/testing/selftests/ublk/test_loop_03.sh @@ -6,6 +6,10 @@  TID="loop_03"  ERR_CODE=0 +if ! _have_program fio; then +	exit "$UBLK_SKIP_CODE" +fi +  _prep_test "loop" "write and verify over zero copy"  backfile_0=$(_create_backfile 256M) @@ -13,15 +17,7 @@ dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")  _check_add_dev $TID $? "$backfile_0"  # run fio over the ublk disk -fio --name=write_and_verify \ -    --filename=/dev/ublkb"${dev_id}" \ -    --ioengine=libaio --iodepth=64 \ -    --rw=write \ -    --size=256M \ -    --direct=1 \ -    --verify=crc32c \ -    --do_verify=1 \ -    --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M  ERR_CODE=$?  _cleanup_test "loop" diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh new file mode 100755 index 000000000000..2e6e2e6978fc --- /dev/null +++ b/tools/testing/selftests/ublk/test_loop_05.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="loop_05" +ERR_CODE=0 + +if ! _have_program fio; then +	exit "$UBLK_SKIP_CODE" +fi + +_prep_test "loop" "write and verify test" + +backfile_0=$(_create_backfile 256M) + +dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0") +_check_add_dev $TID $? "${backfile_0}" + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M +ERR_CODE=$? + +_cleanup_test "loop" + +_remove_backfile "$backfile_0" + +_show_result $TID $ERR_CODE diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh index 7177f6c57bc5..a8be24532b24 100755 --- a/tools/testing/selftests/ublk/test_stress_01.sh +++ b/tools/testing/selftests/ublk/test_stress_01.sh @@ -27,20 +27,20 @@ ublk_io_and_remove()  _prep_test "stress" "run IO and remove device" -ublk_io_and_remove 8G -t null +ublk_io_and_remove 8G -t null -q 4  ERR_CODE=$?  if [ ${ERR_CODE} -ne 0 ]; then  	_show_result $TID $ERR_CODE  fi  BACK_FILE=$(_create_backfile 256M) -ublk_io_and_remove 256M -t loop "${BACK_FILE}" +ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}"  ERR_CODE=$?  if [ ${ERR_CODE} -ne 0 ]; then  	_show_result $TID $ERR_CODE  fi -ublk_io_and_remove 256M -t loop -z "${BACK_FILE}" +ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}"  ERR_CODE=$?  _cleanup_test "stress"  _remove_backfile "${BACK_FILE}" diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh index 2a8e60579a06..2159e4cc8140 100755 --- a/tools/testing/selftests/ublk/test_stress_02.sh +++ b/tools/testing/selftests/ublk/test_stress_02.sh @@ -27,20 +27,20 @@ ublk_io_and_kill_daemon()  _prep_test "stress" "run IO and kill ublk server" -ublk_io_and_kill_daemon 8G -t null +ublk_io_and_kill_daemon 8G -t null -q 4  ERR_CODE=$?  if [ ${ERR_CODE} -ne 0 ]; then  	_show_result $TID $ERR_CODE  fi  BACK_FILE=$(_create_backfile 256M) -ublk_io_and_kill_daemon 256M -t loop "${BACK_FILE}" +ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}"  ERR_CODE=$?  if [ ${ERR_CODE} -ne 0 ]; then  	_show_result $TID $ERR_CODE  fi -ublk_io_and_kill_daemon 256M -t loop -z "${BACK_FILE}" +ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}"  ERR_CODE=$?  _cleanup_test "stress"  _remove_backfile "${BACK_FILE}" diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh index c01f3dc325ab..7e387ef656ea 100755 --- a/tools/testing/selftests/ublk/test_stripe_01.sh +++ b/tools/testing/selftests/ublk/test_stripe_01.sh @@ -6,6 +6,10 @@  TID="stripe_01"  ERR_CODE=0 +if ! _have_program fio; then +	exit "$UBLK_SKIP_CODE" +fi +  _prep_test "stripe" "write and verify test"  backfile_0=$(_create_backfile 256M) @@ -15,15 +19,7 @@ dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")  _check_add_dev $TID $? "${backfile_0}"  # run fio over the ublk disk -fio --name=write_and_verify \ -    --filename=/dev/ublkb"${dev_id}" \ -    --ioengine=libaio --iodepth=32 \ -    --rw=write \ -    --size=512M \ -    --direct=1 \ -    --verify=crc32c \ -    --do_verify=1 \ -    --bs=4k > /dev/null 2>&1 +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M  ERR_CODE=$?  _cleanup_test "stripe" diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh new file mode 100755 index 000000000000..c1b34af36145 --- /dev/null +++ b/tools/testing/selftests/ublk/test_stripe_03.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh + +TID="stripe_03" +ERR_CODE=0 + +if ! _have_program fio; then +	exit "$UBLK_SKIP_CODE" +fi + +_prep_test "stripe" "write and verify test" + +backfile_0=$(_create_backfile 256M) +backfile_1=$(_create_backfile 256M) + +dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1") +_check_add_dev $TID $? "${backfile_0}" + +# run fio over the ublk disk +_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M +ERR_CODE=$? + +_cleanup_test "stripe" + +_remove_backfile "$backfile_0" +_remove_backfile "$backfile_1" + +_show_result $TID $ERR_CODE | 
