diff options
Diffstat (limited to 'io_uring/net.c')
| -rw-r--r-- | io_uring/net.c | 91 | 
1 files changed, 70 insertions, 21 deletions
| diff --git a/io_uring/net.c b/io_uring/net.c index 43a43522f406..35585bdc59f3 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -75,15 +75,29 @@ struct io_sr_msg {  	u16				flags;  	/* initialised and used only by !msg send variants */  	u16				buf_group; -	unsigned short			retry_flags; +	/* per-invocation mshot limit */ +	unsigned			mshot_len; +	/* overall mshot byte limit */ +	unsigned			mshot_total_len;  	void __user			*msg_control;  	/* used only for send zerocopy */  	struct io_kiocb 		*notif;  }; +/* + * The UAPI flags are the lower 8 bits, as that's all sqe->ioprio will hold + * anyway. Use the upper 8 bits for internal uses. + */  enum sr_retry_flags { -	IO_SR_MSG_RETRY		= 1, -	IO_SR_MSG_PARTIAL_MAP	= 2, +	IORING_RECV_RETRY	= (1U << 15), +	IORING_RECV_PARTIAL_MAP	= (1U << 14), +	IORING_RECV_MSHOT_CAP	= (1U << 13), +	IORING_RECV_MSHOT_LIM	= (1U << 12), +	IORING_RECV_MSHOT_DONE	= (1U << 11), + +	IORING_RECV_RETRY_CLEAR	= IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP, +	IORING_RECV_NO_RETRY	= IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP | +				  IORING_RECV_MSHOT_CAP | IORING_RECV_MSHOT_DONE,  };  /* @@ -192,8 +206,8 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,  	req->flags &= ~REQ_F_BL_EMPTY;  	sr->done_io = 0; -	sr->retry_flags = 0; -	sr->len = 0; /* get from the provided buffer */ +	sr->flags &= ~IORING_RECV_RETRY_CLEAR; +	sr->len = sr->mshot_len;  }  static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg, @@ -402,7 +416,6 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);  	sr->done_io = 0; -	sr->retry_flags = 0;  	sr->len = READ_ONCE(sqe->len);  	sr->flags = READ_ONCE(sqe->ioprio);  	if (sr->flags & ~SENDMSG_FLAGS) @@ -756,9 +769,8 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);  	sr->done_io = 0; -	sr->retry_flags = 0; -	if (unlikely(sqe->file_index || sqe->addr2)) +	if (unlikely(sqe->addr2))  		return -EINVAL;  	sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); @@ -783,15 +795,25 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  		sr->buf_group = req->buf_index;  		req->buf_list = NULL;  	} +	sr->mshot_total_len = sr->mshot_len = 0;  	if (sr->flags & IORING_RECV_MULTISHOT) {  		if (!(req->flags & REQ_F_BUFFER_SELECT))  			return -EINVAL;  		if (sr->msg_flags & MSG_WAITALL)  			return -EINVAL; -		if (req->opcode == IORING_OP_RECV && sr->len) +		if (req->opcode == IORING_OP_RECV) { +			sr->mshot_len = sr->len; +			sr->mshot_total_len = READ_ONCE(sqe->optlen); +			if (sr->mshot_total_len) +				sr->flags |= IORING_RECV_MSHOT_LIM; +		} else if (sqe->optlen) {  			return -EINVAL; +		}  		req->flags |= REQ_F_APOLL_MULTISHOT; +	} else if (sqe->optlen) { +		return -EINVAL;  	} +  	if (sr->flags & IORING_RECVSEND_BUNDLE) {  		if (req->opcode == IORING_OP_RECVMSG)  			return -EINVAL; @@ -823,13 +845,28 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,  	if (kmsg->msg.msg_inq > 0)  		cflags |= IORING_CQE_F_SOCK_NONEMPTY; +	if (*ret > 0 && sr->flags & IORING_RECV_MSHOT_LIM) { +		/* +		 * If sr->len hits zero, the limit has been reached. Mark +		 * mshot as finished, and flag MSHOT_DONE as well to prevent +		 * a potential bundle from being retried. +		 */ +		sr->mshot_total_len -= min_t(int, *ret, sr->mshot_total_len); +		if (!sr->mshot_total_len) { +			sr->flags |= IORING_RECV_MSHOT_DONE; +			mshot_finished = true; +		} +	} +  	if (sr->flags & IORING_RECVSEND_BUNDLE) {  		size_t this_ret = *ret - sr->done_io;  		cflags |= io_put_kbufs(req, this_ret, io_bundle_nbufs(kmsg, this_ret),  				      issue_flags); -		if (sr->retry_flags & IO_SR_MSG_RETRY) +		if (sr->flags & IORING_RECV_RETRY)  			cflags = req->cqe.flags | (cflags & CQE_F_MASK); +		if (sr->mshot_len && *ret >= sr->mshot_len) +			sr->flags |= IORING_RECV_MSHOT_CAP;  		/* bundle with no more immediate buffers, we're done */  		if (req->flags & REQ_F_BL_EMPTY)  			goto finish; @@ -837,12 +874,13 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,  		 * If more is available AND it was a full transfer, retry and  		 * append to this one  		 */ -		if (!sr->retry_flags && kmsg->msg.msg_inq > 1 && this_ret > 0 && +		if (!(sr->flags & IORING_RECV_NO_RETRY) && +		    kmsg->msg.msg_inq > 1 && this_ret > 0 &&  		    !iov_iter_count(&kmsg->msg.msg_iter)) {  			req->cqe.flags = cflags & ~CQE_F_MASK;  			sr->len = kmsg->msg.msg_inq;  			sr->done_io += this_ret; -			sr->retry_flags |= IO_SR_MSG_RETRY; +			sr->flags |= IORING_RECV_RETRY;  			return false;  		}  	} else { @@ -859,10 +897,13 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,  		io_mshot_prep_retry(req, kmsg);  		/* Known not-empty or unknown state, retry */  		if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) { -			if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY) +			if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY && +			    !(sr->flags & IORING_RECV_MSHOT_CAP)) {  				return false; +			}  			/* mshot retries exceeded, force a requeue */  			sr->nr_multishot_loops = 0; +			sr->flags &= ~IORING_RECV_MSHOT_CAP;  			if (issue_flags & IO_URING_F_MULTISHOT)  				*ret = IOU_REQUEUE;  		} @@ -1075,9 +1116,14 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg  			arg.mode |= KBUF_MODE_FREE;  		} -		if (kmsg->msg.msg_inq > 1) -			arg.max_len = min_not_zero(sr->len, kmsg->msg.msg_inq); +		if (*len) +			arg.max_len = *len; +		else if (kmsg->msg.msg_inq > 1) +			arg.max_len = min_not_zero(*len, (size_t) kmsg->msg.msg_inq); +		/* if mshot limited, ensure we don't go over */ +		if (sr->flags & IORING_RECV_MSHOT_LIM) +			arg.max_len = min_not_zero(arg.max_len, sr->mshot_total_len);  		ret = io_buffers_peek(req, &arg);  		if (unlikely(ret < 0))  			return ret; @@ -1088,7 +1134,7 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg  			req->flags |= REQ_F_NEED_CLEANUP;  		}  		if (arg.partial_map) -			sr->retry_flags |= IO_SR_MSG_PARTIAL_MAP; +			sr->flags |= IORING_RECV_PARTIAL_MAP;  		/* special case 1 vec, can be a fast path */  		if (ret == 1) { @@ -1283,7 +1329,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)  	int ret;  	zc->done_io = 0; -	zc->retry_flags = 0;  	if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))  		return -EINVAL; @@ -1738,9 +1783,11 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)  	int ret;  	bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK; -	if (unlikely(req->flags & REQ_F_FAIL)) { -		ret = -ECONNRESET; -		goto out; +	if (connect->in_progress) { +		struct poll_table_struct pt = { ._key = EPOLLERR }; + +		if (vfs_poll(req->file, &pt) & EPOLLERR) +			goto get_sock_err;  	}  	file_flags = force_nonblock ? O_NONBLOCK : 0; @@ -1765,8 +1812,10 @@ int io_connect(struct io_kiocb *req, unsigned int issue_flags)  		 * which means the previous result is good. For both of these,  		 * grab the sock_error() and use that for the completion.  		 */ -		if (ret == -EBADFD || ret == -EISCONN) +		if (ret == -EBADFD || ret == -EISCONN) { +get_sock_err:  			ret = sock_error(sock_from_file(req->file)->sk); +		}  	}  	if (ret == -ERESTARTSYS)  		ret = -EINTR; | 
