summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2025-07-07 19:38:45 -0600
committerJens Axboe <axboe@kernel.dk>2025-07-10 11:54:33 -0600
commit6a8afb9fff6478e7944794f089181e93df1c728a (patch)
treece570a43150c66a37e5f24f47053aba7300f656d
parent3919b695932dd1990b5c7fd44fc52361f8e2ac5f (diff)
io_uring/net: allow multishot receive per-invocation cap
If an application is handling multiple receive streams using recv multishot, then the amount of retries and buffer peeking for multishot and bundles can process too much per socket before moving on. This isn't directly controllable by the application. By default, io_uring will retry a recv MULTISHOT_MAX_RETRY (32) times, if the socket keeps having data to receive. And if using bundles, then each bundle peek will potentially map up to PEEK_MAX_IMPORT (256) iovecs of data. Once these limits are hit, then a requeue operation will be done, where the request will get retried after other pending requests have had a time to get executed. Add support for capping the per-invocation receive length, before a requeue condition is considered for each receive. This is done by setting sqe->mshot_len to the byte value. For example, if this is set to 1024, then each receive will be requeued by 1024 bytes received. Link: https://lore.kernel.org/io-uring/20250709203420.1321689-4-axboe@kernel.dk Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--io_uring/net.c23
1 files changed, 17 insertions, 6 deletions
diff --git a/io_uring/net.c b/io_uring/net.c
index 08309b5ed45e..40f4ac0ab151 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -75,6 +75,7 @@ struct io_sr_msg {
u16 flags;
/* initialised and used only by !msg send variants */
u16 buf_group;
+ unsigned mshot_len;
void __user *msg_control;
/* used only for send zerocopy */
struct io_kiocb *notif;
@@ -87,9 +88,11 @@ struct io_sr_msg {
enum sr_retry_flags {
IORING_RECV_RETRY = (1U << 15),
IORING_RECV_PARTIAL_MAP = (1U << 14),
+ IORING_RECV_MSHOT_CAP = (1U << 13),
IORING_RECV_RETRY_CLEAR = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP,
- IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP,
+ IORING_RECV_NO_RETRY = IORING_RECV_RETRY | IORING_RECV_PARTIAL_MAP |
+ IORING_RECV_MSHOT_CAP,
};
/*
@@ -199,7 +202,7 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
req->flags &= ~REQ_F_BL_EMPTY;
sr->done_io = 0;
sr->flags &= ~IORING_RECV_RETRY_CLEAR;
- sr->len = 0; /* get from the provided buffer */
+ sr->len = sr->mshot_len;
}
static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg,
@@ -787,13 +790,14 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sr->buf_group = req->buf_index;
req->buf_list = NULL;
}
+ sr->mshot_len = 0;
if (sr->flags & IORING_RECV_MULTISHOT) {
if (!(req->flags & REQ_F_BUFFER_SELECT))
return -EINVAL;
if (sr->msg_flags & MSG_WAITALL)
return -EINVAL;
- if (req->opcode == IORING_OP_RECV && sr->len)
- return -EINVAL;
+ if (req->opcode == IORING_OP_RECV)
+ sr->mshot_len = sr->len;
req->flags |= REQ_F_APOLL_MULTISHOT;
}
if (sr->flags & IORING_RECVSEND_BUNDLE) {
@@ -834,6 +838,8 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
issue_flags);
if (sr->flags & IORING_RECV_RETRY)
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
+ if (sr->mshot_len && *ret >= sr->mshot_len)
+ sr->flags |= IORING_RECV_MSHOT_CAP;
/* bundle with no more immediate buffers, we're done */
if (req->flags & REQ_F_BL_EMPTY)
goto finish;
@@ -864,10 +870,13 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
io_mshot_prep_retry(req, kmsg);
/* Known not-empty or unknown state, retry */
if (cflags & IORING_CQE_F_SOCK_NONEMPTY || kmsg->msg.msg_inq < 0) {
- if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY)
+ if (sr->nr_multishot_loops++ < MULTISHOT_MAX_RETRY &&
+ !(sr->flags & IORING_RECV_MSHOT_CAP)) {
return false;
+ }
/* mshot retries exceeded, force a requeue */
sr->nr_multishot_loops = 0;
+ sr->flags &= ~IORING_RECV_MSHOT_CAP;
if (issue_flags & IO_URING_F_MULTISHOT)
*ret = IOU_REQUEUE;
}
@@ -1080,7 +1089,9 @@ static int io_recv_buf_select(struct io_kiocb *req, struct io_async_msghdr *kmsg
arg.mode |= KBUF_MODE_FREE;
}
- if (kmsg->msg.msg_inq > 1)
+ if (*len)
+ arg.max_len = *len;
+ else if (kmsg->msg.msg_inq > 1)
arg.max_len = min_not_zero(*len, kmsg->msg.msg_inq);
ret = io_buffers_peek(req, &arg);