summaryrefslogtreecommitdiff
path: root/io_uring
diff options
context:
space:
mode:
Diffstat (limited to 'io_uring')
-rw-r--r--io_uring/Kconfig1
-rw-r--r--io_uring/io_uring.c18
-rw-r--r--io_uring/io_uring.h3
-rw-r--r--io_uring/msg_ring.c11
-rw-r--r--io_uring/net.c135
-rw-r--r--io_uring/refs.h7
-rw-r--r--io_uring/rsrc.c126
-rw-r--r--io_uring/uring_cmd.c22
-rw-r--r--io_uring/uring_cmd.h1
-rw-r--r--io_uring/zcrx.c8
10 files changed, 207 insertions, 125 deletions
diff --git a/io_uring/Kconfig b/io_uring/Kconfig
index 9e2a4beba1ef..4b949c42c0bf 100644
--- a/io_uring/Kconfig
+++ b/io_uring/Kconfig
@@ -5,6 +5,7 @@
config IO_URING_ZCRX
def_bool y
+ depends on IO_URING
depends on PAGE_POOL
depends on INET
depends on NET_RX_BUSY_POLL
diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c
index 3ba49c628337..c6209fe44cb1 100644
--- a/io_uring/io_uring.c
+++ b/io_uring/io_uring.c
@@ -1141,10 +1141,9 @@ void tctx_task_work(struct callback_head *cb)
WARN_ON_ONCE(ret);
}
-static inline void io_req_local_work_add(struct io_kiocb *req,
- struct io_ring_ctx *ctx,
- unsigned flags)
+static void io_req_local_work_add(struct io_kiocb *req, unsigned flags)
{
+ struct io_ring_ctx *ctx = req->ctx;
unsigned nr_wait, nr_tw, nr_tw_prev;
struct llist_node *head;
@@ -1239,17 +1238,16 @@ static void io_req_normal_work_add(struct io_kiocb *req)
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags)
{
if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN)
- io_req_local_work_add(req, req->ctx, flags);
+ io_req_local_work_add(req, flags);
else
io_req_normal_work_add(req);
}
-void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
- unsigned flags)
+void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags)
{
- if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_DEFER_TASKRUN)))
+ if (WARN_ON_ONCE(!(req->ctx->flags & IORING_SETUP_DEFER_TASKRUN)))
return;
- io_req_local_work_add(req, ctx, flags);
+ __io_req_task_work_add(req, flags);
}
static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx)
@@ -1645,6 +1643,8 @@ io_req_flags_t io_file_get_flags(struct file *file)
{
io_req_flags_t res = 0;
+ BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1);
+
if (S_ISREG(file_inode(file)->i_mode))
res |= REQ_F_ISREG;
if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT))
@@ -1796,7 +1796,7 @@ struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
struct io_kiocb *req = container_of(work, struct io_kiocb, work);
struct io_kiocb *nxt = NULL;
- if (req_ref_put_and_test(req)) {
+ if (req_ref_put_and_test_atomic(req)) {
if (req->flags & IO_REQ_LINK_FLAGS)
nxt = io_req_find_next(req);
io_free_req(req);
diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h
index 87f883130286..e4050b2d0821 100644
--- a/io_uring/io_uring.h
+++ b/io_uring/io_uring.h
@@ -89,8 +89,7 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
unsigned issue_flags);
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
-void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
- unsigned flags);
+void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags);
void io_req_task_queue(struct io_kiocb *req);
void io_req_task_complete(struct io_kiocb *req, io_tw_token_t tw);
void io_req_task_queue_fail(struct io_kiocb *req, int ret);
diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c
index 0bbcbbcdebfd..50a958e9c921 100644
--- a/io_uring/msg_ring.c
+++ b/io_uring/msg_ring.c
@@ -38,8 +38,8 @@ static void io_double_unlock_ctx(struct io_ring_ctx *octx)
mutex_unlock(&octx->uring_lock);
}
-static int io_double_lock_ctx(struct io_ring_ctx *octx,
- unsigned int issue_flags)
+static int io_lock_external_ctx(struct io_ring_ctx *octx,
+ unsigned int issue_flags)
{
/*
* To ensure proper ordering between the two ctxs, we can only
@@ -93,13 +93,14 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req,
kmem_cache_free(req_cachep, req);
return -EOWNERDEAD;
}
+ req->opcode = IORING_OP_NOP;
req->cqe.user_data = user_data;
io_req_set_res(req, res, cflags);
percpu_ref_get(&ctx->refs);
req->ctx = ctx;
req->tctx = NULL;
req->io_task_work.func = io_msg_tw_complete;
- io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE);
+ io_req_task_work_add_remote(req, IOU_F_TWQ_LAZY_WAKE);
return 0;
}
@@ -154,7 +155,7 @@ static int __io_msg_ring_data(struct io_ring_ctx *target_ctx,
ret = -EOVERFLOW;
if (target_ctx->flags & IORING_SETUP_IOPOLL) {
- if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
+ if (unlikely(io_lock_external_ctx(target_ctx, issue_flags)))
return -EAGAIN;
}
if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags))
@@ -199,7 +200,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag
struct file *src_file = msg->src_file;
int ret;
- if (unlikely(io_double_lock_ctx(target_ctx, issue_flags)))
+ if (unlikely(io_lock_external_ctx(target_ctx, issue_flags)))
return -EAGAIN;
ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd);
diff --git a/io_uring/net.c b/io_uring/net.c
index 8944eb679024..24040bc3916a 100644
--- a/io_uring/net.c
+++ b/io_uring/net.c
@@ -97,6 +97,11 @@ struct io_recvzc {
struct io_zcrx_ifq *ifq;
};
+static int io_sg_from_iter_iovec(struct sk_buff *skb,
+ struct iov_iter *from, size_t length);
+static int io_sg_from_iter(struct sk_buff *skb,
+ struct iov_iter *from, size_t length);
+
int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown);
@@ -176,16 +181,6 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req)
return hdr;
}
-/* assign new iovec to kmsg, if we need to */
-static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg,
- struct iovec *iov)
-{
- if (iov) {
- req->flags |= REQ_F_NEED_CLEANUP;
- io_vec_reset_iovec(&kmsg->vec, iov, kmsg->msg.msg_iter.nr_segs);
- }
-}
-
static inline void io_mshot_prep_retry(struct io_kiocb *req,
struct io_async_msghdr *kmsg)
{
@@ -217,7 +212,11 @@ static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg
&iomsg->msg.msg_iter, io_is_compat(req->ctx));
if (unlikely(ret < 0))
return ret;
- io_net_vec_assign(req, iomsg, iov);
+
+ if (iov) {
+ req->flags |= REQ_F_NEED_CLEANUP;
+ io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs);
+ }
return 0;
}
@@ -325,25 +324,6 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
return 0;
}
-static int io_sendmsg_copy_hdr(struct io_kiocb *req,
- struct io_async_msghdr *iomsg)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
- struct user_msghdr msg;
- int ret;
-
- ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE, NULL);
- if (unlikely(ret))
- return ret;
-
- if (!(req->flags & REQ_F_BUFFER_SELECT))
- ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen,
- ITER_SOURCE);
- /* save msg_control as sys_sendmsg() overwrites it */
- sr->msg_control = iomsg->msg.msg_control_user;
- return ret;
-}
-
void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req)
{
struct io_async_msghdr *io = req->async_data;
@@ -379,6 +359,8 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
kmsg->msg.msg_name = &kmsg->addr;
kmsg->msg.msg_namelen = addr_len;
}
+ if (sr->flags & IORING_RECVSEND_FIXED_BUF)
+ return 0;
if (!io_do_buffer_select(req)) {
ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len,
&kmsg->msg.msg_iter);
@@ -392,31 +374,24 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_async_msghdr *kmsg = req->async_data;
-
- sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
-
- return io_sendmsg_copy_hdr(req, kmsg);
-}
-
-static int io_sendmsg_zc_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
- struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
- struct io_async_msghdr *kmsg = req->async_data;
struct user_msghdr msg;
int ret;
- if (!(sr->flags & IORING_RECVSEND_FIXED_BUF))
- return io_sendmsg_setup(req, sqe);
-
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
-
ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL);
if (unlikely(ret))
return ret;
+ /* save msg_control as sys_sendmsg() overwrites it */
sr->msg_control = kmsg->msg.msg_control_user;
- kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen;
- return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, msg.msg_iovlen);
+ if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
+ kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen;
+ return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov,
+ msg.msg_iovlen);
+ }
+ if (req->flags & REQ_F_BUFFER_SELECT)
+ return 0;
+ return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE);
}
#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
@@ -427,12 +402,6 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
sr->done_io = 0;
sr->retry = false;
-
- if (req->opcode != IORING_OP_SEND) {
- if (sqe->addr2 || sqe->file_index)
- return -EINVAL;
- }
-
sr->len = READ_ONCE(sqe->len);
sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~SENDMSG_FLAGS)
@@ -458,6 +427,8 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
return -ENOMEM;
if (req->opcode != IORING_OP_SENDMSG)
return io_send_setup(req, sqe);
+ if (unlikely(sqe->addr2 || sqe->file_index))
+ return -EINVAL;
return io_sendmsg_setup(req, sqe);
}
@@ -1302,11 +1273,12 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_ring_ctx *ctx = req->ctx;
+ struct io_async_msghdr *iomsg;
struct io_kiocb *notif;
+ int ret;
zc->done_io = 0;
zc->retry = false;
- req->flags |= REQ_F_POLL_NO_LAZY;
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
return -EINVAL;
@@ -1320,7 +1292,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
notif->cqe.user_data = req->cqe.user_data;
notif->cqe.res = 0;
notif->cqe.flags = IORING_CQE_F_NOTIF;
- req->flags |= REQ_F_NEED_CLEANUP;
+ req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY;
zc->flags = READ_ONCE(sqe->ioprio);
if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) {
@@ -1335,11 +1307,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
}
}
- if (req->opcode != IORING_OP_SEND_ZC) {
- if (unlikely(sqe->addr2 || sqe->file_index))
- return -EINVAL;
- }
-
zc->len = READ_ONCE(sqe->len);
zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
req->buf_index = READ_ONCE(sqe->buf_index);
@@ -1349,13 +1316,28 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
if (io_is_compat(req->ctx))
zc->msg_flags |= MSG_CMSG_COMPAT;
- if (unlikely(!io_msg_alloc_async(req)))
+ iomsg = io_msg_alloc_async(req);
+ if (unlikely(!iomsg))
return -ENOMEM;
+
if (req->opcode == IORING_OP_SEND_ZC) {
- req->flags |= REQ_F_IMPORT_BUFFER;
- return io_send_setup(req, sqe);
+ if (zc->flags & IORING_RECVSEND_FIXED_BUF)
+ req->flags |= REQ_F_IMPORT_BUFFER;
+ ret = io_send_setup(req, sqe);
+ } else {
+ if (unlikely(sqe->addr2 || sqe->file_index))
+ return -EINVAL;
+ ret = io_sendmsg_setup(req, sqe);
+ }
+ if (unlikely(ret))
+ return ret;
+
+ if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) {
+ iomsg->msg.sg_from_iter = io_sg_from_iter_iovec;
+ return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count);
}
- return io_sendmsg_zc_setup(req, sqe);
+ iomsg->msg.sg_from_iter = io_sg_from_iter;
+ return 0;
}
static int io_sg_from_iter_iovec(struct sk_buff *skb,
@@ -1412,27 +1394,13 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
{
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
struct io_async_msghdr *kmsg = req->async_data;
- int ret;
- if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
- sr->notif->buf_index = req->buf_index;
- ret = io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter,
- (u64)(uintptr_t)sr->buf, sr->len,
- ITER_SOURCE, issue_flags);
- if (unlikely(ret))
- return ret;
- kmsg->msg.sg_from_iter = io_sg_from_iter;
- } else {
- ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter);
- if (unlikely(ret))
- return ret;
- ret = io_notif_account_mem(sr->notif, sr->len);
- if (unlikely(ret))
- return ret;
- kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
- }
+ WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF));
- return ret;
+ sr->notif->buf_index = req->buf_index;
+ return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter,
+ (u64)(uintptr_t)sr->buf, sr->len,
+ ITER_SOURCE, issue_flags);
}
int io_send_zc(struct io_kiocb *req, unsigned int issue_flags)
@@ -1513,8 +1481,6 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
unsigned flags;
int ret, min_ret = 0;
- kmsg->msg.sg_from_iter = io_sg_from_iter_iovec;
-
if (req->flags & REQ_F_IMPORT_BUFFER) {
unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs;
int ret;
@@ -1523,7 +1489,6 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags)
&kmsg->vec, uvec_segs, issue_flags);
if (unlikely(ret))
return ret;
- kmsg->msg.sg_from_iter = io_sg_from_iter;
req->flags &= ~REQ_F_IMPORT_BUFFER;
}
diff --git a/io_uring/refs.h b/io_uring/refs.h
index 63982ead9f7d..0d928d87c4ed 100644
--- a/io_uring/refs.h
+++ b/io_uring/refs.h
@@ -17,6 +17,13 @@ static inline bool req_ref_inc_not_zero(struct io_kiocb *req)
return atomic_inc_not_zero(&req->refs);
}
+static inline bool req_ref_put_and_test_atomic(struct io_kiocb *req)
+{
+ WARN_ON_ONCE(!(data_race(req->flags) & REQ_F_REFCOUNT));
+ WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req));
+ return atomic_dec_and_test(&req->refs);
+}
+
static inline bool req_ref_put_and_test(struct io_kiocb *req)
{
if (likely(!(req->flags & REQ_F_REFCOUNT)))
diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c
index 3f195e24777e..5e64a8bb30a4 100644
--- a/io_uring/rsrc.c
+++ b/io_uring/rsrc.c
@@ -1002,20 +1002,33 @@ unlock:
}
EXPORT_SYMBOL_GPL(io_buffer_unregister_bvec);
-static int io_import_fixed(int ddir, struct iov_iter *iter,
- struct io_mapped_ubuf *imu,
- u64 buf_addr, size_t len)
+static int validate_fixed_range(u64 buf_addr, size_t len,
+ const struct io_mapped_ubuf *imu)
{
u64 buf_end;
- size_t offset;
- if (WARN_ON_ONCE(!imu))
- return -EFAULT;
if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end)))
return -EFAULT;
/* not inside the mapped region */
if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len)))
return -EFAULT;
+ if (unlikely(len > MAX_RW_COUNT))
+ return -EFAULT;
+ return 0;
+}
+
+static int io_import_fixed(int ddir, struct iov_iter *iter,
+ struct io_mapped_ubuf *imu,
+ u64 buf_addr, size_t len)
+{
+ size_t offset;
+ int ret;
+
+ if (WARN_ON_ONCE(!imu))
+ return -EFAULT;
+ ret = validate_fixed_range(buf_addr, len, imu);
+ if (unlikely(ret))
+ return ret;
if (!(imu->dir & (1 << ddir)))
return -EFAULT;
@@ -1305,12 +1318,12 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter,
u64 buf_addr = (u64)(uintptr_t)iovec[iov_idx].iov_base;
struct bio_vec *src_bvec;
size_t offset;
- u64 buf_end;
+ int ret;
+
+ ret = validate_fixed_range(buf_addr, iov_len, imu);
+ if (unlikely(ret))
+ return ret;
- if (unlikely(check_add_overflow(buf_addr, (u64)iov_len, &buf_end)))
- return -EFAULT;
- if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len)))
- return -EFAULT;
if (unlikely(!iov_len))
return -EFAULT;
if (unlikely(check_add_overflow(total_len, iov_len, &total_len)))
@@ -1349,6 +1362,82 @@ static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs,
return max_segs;
}
+static int io_vec_fill_kern_bvec(int ddir, struct iov_iter *iter,
+ struct io_mapped_ubuf *imu,
+ struct iovec *iovec, unsigned nr_iovs,
+ struct iou_vec *vec)
+{
+ const struct bio_vec *src_bvec = imu->bvec;
+ struct bio_vec *res_bvec = vec->bvec;
+ unsigned res_idx = 0;
+ size_t total_len = 0;
+ unsigned iov_idx;
+
+ for (iov_idx = 0; iov_idx < nr_iovs; iov_idx++) {
+ size_t offset = (size_t)(uintptr_t)iovec[iov_idx].iov_base;
+ size_t iov_len = iovec[iov_idx].iov_len;
+ struct bvec_iter bi = {
+ .bi_size = offset + iov_len,
+ };
+ struct bio_vec bv;
+
+ bvec_iter_advance(src_bvec, &bi, offset);
+ for_each_mp_bvec(bv, src_bvec, bi, bi)
+ res_bvec[res_idx++] = bv;
+ total_len += iov_len;
+ }
+ iov_iter_bvec(iter, ddir, res_bvec, res_idx, total_len);
+ return 0;
+}
+
+static int iov_kern_bvec_size(const struct iovec *iov,
+ const struct io_mapped_ubuf *imu,
+ unsigned int *nr_seg)
+{
+ size_t offset = (size_t)(uintptr_t)iov->iov_base;
+ const struct bio_vec *bvec = imu->bvec;
+ int start = 0, i = 0;
+ size_t off = 0;
+ int ret;
+
+ ret = validate_fixed_range(offset, iov->iov_len, imu);
+ if (unlikely(ret))
+ return ret;
+
+ for (i = 0; off < offset + iov->iov_len && i < imu->nr_bvecs;
+ off += bvec[i].bv_len, i++) {
+ if (offset >= off && offset < off + bvec[i].bv_len)
+ start = i;
+ }
+ *nr_seg = i - start;
+ return 0;
+}
+
+static int io_kern_bvec_size(struct iovec *iov, unsigned nr_iovs,
+ struct io_mapped_ubuf *imu, unsigned *nr_segs)
+{
+ unsigned max_segs = 0;
+ size_t total_len = 0;
+ unsigned i;
+ int ret;
+
+ *nr_segs = 0;
+ for (i = 0; i < nr_iovs; i++) {
+ if (unlikely(!iov[i].iov_len))
+ return -EFAULT;
+ if (unlikely(check_add_overflow(total_len, iov[i].iov_len,
+ &total_len)))
+ return -EOVERFLOW;
+ ret = iov_kern_bvec_size(&iov[i], imu, &max_segs);
+ if (unlikely(ret))
+ return ret;
+ *nr_segs += max_segs;
+ }
+ if (total_len > MAX_RW_COUNT)
+ return -EINVAL;
+ return 0;
+}
+
int io_import_reg_vec(int ddir, struct iov_iter *iter,
struct io_kiocb *req, struct iou_vec *vec,
unsigned nr_iovs, unsigned issue_flags)
@@ -1363,14 +1452,20 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter,
if (!node)
return -EFAULT;
imu = node->buf;
- if (imu->is_kbuf)
- return -EOPNOTSUPP;
if (!(imu->dir & (1 << ddir)))
return -EFAULT;
iovec_off = vec->nr - nr_iovs;
iov = vec->iovec + iovec_off;
- nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu);
+
+ if (imu->is_kbuf) {
+ int ret = io_kern_bvec_size(iov, nr_iovs, imu, &nr_segs);
+
+ if (unlikely(ret))
+ return ret;
+ } else {
+ nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu);
+ }
if (sizeof(struct bio_vec) > sizeof(struct iovec)) {
size_t bvec_bytes;
@@ -1397,6 +1492,9 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter,
req->flags |= REQ_F_NEED_CLEANUP;
}
+ if (imu->is_kbuf)
+ return io_vec_fill_kern_bvec(ddir, iter, imu, iov, nr_iovs, vec);
+
return io_vec_fill_bvec(ddir, iter, imu, iov, nr_iovs, vec);
}
diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c
index f2cfc371f3d0..a9ea7d29cdd9 100644
--- a/io_uring/uring_cmd.c
+++ b/io_uring/uring_cmd.c
@@ -205,8 +205,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req,
* that it doesn't read in per-op data, play it safe and ensure that
* any SQE data is stable beyond prep. This can later get relaxed.
*/
- memcpy(ac->data.sqes, sqe, uring_sqe_size(req->ctx));
- ioucmd->sqe = ac->data.sqes;
+ memcpy(ac->sqes, sqe, uring_sqe_size(req->ctx));
+ ioucmd->sqe = ac->sqes;
return 0;
}
@@ -307,17 +307,18 @@ static inline int io_uring_cmd_getsockopt(struct socket *sock,
struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
+ const struct io_uring_sqe *sqe = cmd->sqe;
bool compat = !!(issue_flags & IO_URING_F_COMPAT);
int optlen, optname, level, err;
void __user *optval;
- level = READ_ONCE(cmd->sqe->level);
+ level = READ_ONCE(sqe->level);
if (level != SOL_SOCKET)
return -EOPNOTSUPP;
- optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval));
- optname = READ_ONCE(cmd->sqe->optname);
- optlen = READ_ONCE(cmd->sqe->optlen);
+ optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
+ optname = READ_ONCE(sqe->optname);
+ optlen = READ_ONCE(sqe->optlen);
err = do_sock_getsockopt(sock, compat, level, optname,
USER_SOCKPTR(optval),
@@ -333,15 +334,16 @@ static inline int io_uring_cmd_setsockopt(struct socket *sock,
struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
+ const struct io_uring_sqe *sqe = cmd->sqe;
bool compat = !!(issue_flags & IO_URING_F_COMPAT);
int optname, optlen, level;
void __user *optval;
sockptr_t optval_s;
- optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval));
- optname = READ_ONCE(cmd->sqe->optname);
- optlen = READ_ONCE(cmd->sqe->optlen);
- level = READ_ONCE(cmd->sqe->level);
+ optval = u64_to_user_ptr(READ_ONCE(sqe->optval));
+ optname = READ_ONCE(sqe->optname);
+ optlen = READ_ONCE(sqe->optlen);
+ level = READ_ONCE(sqe->level);
optval_s = USER_SOCKPTR(optval);
return do_sock_setsockopt(sock, compat, level, optname, optval_s,
diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h
index 14e525255854..b04686b6b5d2 100644
--- a/io_uring/uring_cmd.h
+++ b/io_uring/uring_cmd.h
@@ -6,6 +6,7 @@
struct io_async_cmd {
struct io_uring_cmd_data data;
struct iou_vec vec;
+ struct io_uring_sqe sqes[2];
};
int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags);
diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c
index 9c95b5b6ec4e..80d4a6f71d29 100644
--- a/io_uring/zcrx.c
+++ b/io_uring/zcrx.c
@@ -818,6 +818,14 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb,
int ret = 0;
len = min_t(size_t, len, desc->count);
+ /*
+ * __tcp_read_sock() always calls io_zcrx_recv_skb one last time, even
+ * if desc->count is already 0. This is caused by the if (offset + 1 !=
+ * skb->len) check. Return early in this case to break out of
+ * __tcp_read_sock().
+ */
+ if (!len)
+ return 0;
if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT))
return -EAGAIN;