diff options
Diffstat (limited to 'io_uring')
| -rw-r--r-- | io_uring/Kconfig | 1 | ||||
| -rw-r--r-- | io_uring/io_uring.c | 18 | ||||
| -rw-r--r-- | io_uring/io_uring.h | 3 | ||||
| -rw-r--r-- | io_uring/msg_ring.c | 11 | ||||
| -rw-r--r-- | io_uring/net.c | 135 | ||||
| -rw-r--r-- | io_uring/refs.h | 7 | ||||
| -rw-r--r-- | io_uring/rsrc.c | 126 | ||||
| -rw-r--r-- | io_uring/uring_cmd.c | 22 | ||||
| -rw-r--r-- | io_uring/uring_cmd.h | 1 | ||||
| -rw-r--r-- | io_uring/zcrx.c | 8 |
10 files changed, 207 insertions, 125 deletions
diff --git a/io_uring/Kconfig b/io_uring/Kconfig index 9e2a4beba1ef..4b949c42c0bf 100644 --- a/io_uring/Kconfig +++ b/io_uring/Kconfig @@ -5,6 +5,7 @@ config IO_URING_ZCRX def_bool y + depends on IO_URING depends on PAGE_POOL depends on INET depends on NET_RX_BUSY_POLL diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 3ba49c628337..c6209fe44cb1 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1141,10 +1141,9 @@ void tctx_task_work(struct callback_head *cb) WARN_ON_ONCE(ret); } -static inline void io_req_local_work_add(struct io_kiocb *req, - struct io_ring_ctx *ctx, - unsigned flags) +static void io_req_local_work_add(struct io_kiocb *req, unsigned flags) { + struct io_ring_ctx *ctx = req->ctx; unsigned nr_wait, nr_tw, nr_tw_prev; struct llist_node *head; @@ -1239,17 +1238,16 @@ static void io_req_normal_work_add(struct io_kiocb *req) void __io_req_task_work_add(struct io_kiocb *req, unsigned flags) { if (req->ctx->flags & IORING_SETUP_DEFER_TASKRUN) - io_req_local_work_add(req, req->ctx, flags); + io_req_local_work_add(req, flags); else io_req_normal_work_add(req); } -void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx, - unsigned flags) +void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags) { - if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))) + if (WARN_ON_ONCE(!(req->ctx->flags & IORING_SETUP_DEFER_TASKRUN))) return; - io_req_local_work_add(req, ctx, flags); + __io_req_task_work_add(req, flags); } static void __cold io_move_task_work_from_local(struct io_ring_ctx *ctx) @@ -1645,6 +1643,8 @@ io_req_flags_t io_file_get_flags(struct file *file) { io_req_flags_t res = 0; + BUILD_BUG_ON(REQ_F_ISREG_BIT != REQ_F_SUPPORT_NOWAIT_BIT + 1); + if (S_ISREG(file_inode(file)->i_mode)) res |= REQ_F_ISREG; if ((file->f_flags & O_NONBLOCK) || (file->f_mode & FMODE_NOWAIT)) @@ -1796,7 +1796,7 @@ struct io_wq_work *io_wq_free_work(struct io_wq_work *work) struct io_kiocb *req = container_of(work, struct io_kiocb, work); struct io_kiocb *nxt = NULL; - if (req_ref_put_and_test(req)) { + if (req_ref_put_and_test_atomic(req)) { if (req->flags & IO_REQ_LINK_FLAGS) nxt = io_req_find_next(req); io_free_req(req); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index 87f883130286..e4050b2d0821 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -89,8 +89,7 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd, unsigned issue_flags); void __io_req_task_work_add(struct io_kiocb *req, unsigned flags); -void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx, - unsigned flags); +void io_req_task_work_add_remote(struct io_kiocb *req, unsigned flags); void io_req_task_queue(struct io_kiocb *req); void io_req_task_complete(struct io_kiocb *req, io_tw_token_t tw); void io_req_task_queue_fail(struct io_kiocb *req, int ret); diff --git a/io_uring/msg_ring.c b/io_uring/msg_ring.c index 0bbcbbcdebfd..50a958e9c921 100644 --- a/io_uring/msg_ring.c +++ b/io_uring/msg_ring.c @@ -38,8 +38,8 @@ static void io_double_unlock_ctx(struct io_ring_ctx *octx) mutex_unlock(&octx->uring_lock); } -static int io_double_lock_ctx(struct io_ring_ctx *octx, - unsigned int issue_flags) +static int io_lock_external_ctx(struct io_ring_ctx *octx, + unsigned int issue_flags) { /* * To ensure proper ordering between the two ctxs, we can only @@ -93,13 +93,14 @@ static int io_msg_remote_post(struct io_ring_ctx *ctx, struct io_kiocb *req, kmem_cache_free(req_cachep, req); return -EOWNERDEAD; } + req->opcode = IORING_OP_NOP; req->cqe.user_data = user_data; io_req_set_res(req, res, cflags); percpu_ref_get(&ctx->refs); req->ctx = ctx; req->tctx = NULL; req->io_task_work.func = io_msg_tw_complete; - io_req_task_work_add_remote(req, ctx, IOU_F_TWQ_LAZY_WAKE); + io_req_task_work_add_remote(req, IOU_F_TWQ_LAZY_WAKE); return 0; } @@ -154,7 +155,7 @@ static int __io_msg_ring_data(struct io_ring_ctx *target_ctx, ret = -EOVERFLOW; if (target_ctx->flags & IORING_SETUP_IOPOLL) { - if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) + if (unlikely(io_lock_external_ctx(target_ctx, issue_flags))) return -EAGAIN; } if (io_post_aux_cqe(target_ctx, msg->user_data, msg->len, flags)) @@ -199,7 +200,7 @@ static int io_msg_install_complete(struct io_kiocb *req, unsigned int issue_flag struct file *src_file = msg->src_file; int ret; - if (unlikely(io_double_lock_ctx(target_ctx, issue_flags))) + if (unlikely(io_lock_external_ctx(target_ctx, issue_flags))) return -EAGAIN; ret = __io_fixed_fd_install(target_ctx, src_file, msg->dst_fd); diff --git a/io_uring/net.c b/io_uring/net.c index 8944eb679024..24040bc3916a 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -97,6 +97,11 @@ struct io_recvzc { struct io_zcrx_ifq *ifq; }; +static int io_sg_from_iter_iovec(struct sk_buff *skb, + struct iov_iter *from, size_t length); +static int io_sg_from_iter(struct sk_buff *skb, + struct iov_iter *from, size_t length); + int io_shutdown_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_shutdown *shutdown = io_kiocb_to_cmd(req, struct io_shutdown); @@ -176,16 +181,6 @@ static struct io_async_msghdr *io_msg_alloc_async(struct io_kiocb *req) return hdr; } -/* assign new iovec to kmsg, if we need to */ -static void io_net_vec_assign(struct io_kiocb *req, struct io_async_msghdr *kmsg, - struct iovec *iov) -{ - if (iov) { - req->flags |= REQ_F_NEED_CLEANUP; - io_vec_reset_iovec(&kmsg->vec, iov, kmsg->msg.msg_iter.nr_segs); - } -} - static inline void io_mshot_prep_retry(struct io_kiocb *req, struct io_async_msghdr *kmsg) { @@ -217,7 +212,11 @@ static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg &iomsg->msg.msg_iter, io_is_compat(req->ctx)); if (unlikely(ret < 0)) return ret; - io_net_vec_assign(req, iomsg, iov); + + if (iov) { + req->flags |= REQ_F_NEED_CLEANUP; + io_vec_reset_iovec(&iomsg->vec, iov, iomsg->msg.msg_iter.nr_segs); + } return 0; } @@ -325,25 +324,6 @@ static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg, return 0; } -static int io_sendmsg_copy_hdr(struct io_kiocb *req, - struct io_async_msghdr *iomsg) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct user_msghdr msg; - int ret; - - ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE, NULL); - if (unlikely(ret)) - return ret; - - if (!(req->flags & REQ_F_BUFFER_SELECT)) - ret = io_net_import_vec(req, iomsg, msg.msg_iov, msg.msg_iovlen, - ITER_SOURCE); - /* save msg_control as sys_sendmsg() overwrites it */ - sr->msg_control = iomsg->msg.msg_control_user; - return ret; -} - void io_sendmsg_recvmsg_cleanup(struct io_kiocb *req) { struct io_async_msghdr *io = req->async_data; @@ -379,6 +359,8 @@ static int io_send_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) kmsg->msg.msg_name = &kmsg->addr; kmsg->msg.msg_namelen = addr_len; } + if (sr->flags & IORING_RECVSEND_FIXED_BUF) + return 0; if (!io_do_buffer_select(req)) { ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); @@ -392,31 +374,24 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *kmsg = req->async_data; - - sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); - - return io_sendmsg_copy_hdr(req, kmsg); -} - -static int io_sendmsg_zc_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe) -{ - struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); - struct io_async_msghdr *kmsg = req->async_data; struct user_msghdr msg; int ret; - if (!(sr->flags & IORING_RECVSEND_FIXED_BUF)) - return io_sendmsg_setup(req, sqe); - sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr)); - ret = io_msg_copy_hdr(req, kmsg, &msg, ITER_SOURCE, NULL); if (unlikely(ret)) return ret; + /* save msg_control as sys_sendmsg() overwrites it */ sr->msg_control = kmsg->msg.msg_control_user; - kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; - return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, msg.msg_iovlen); + if (sr->flags & IORING_RECVSEND_FIXED_BUF) { + kmsg->msg.msg_iter.nr_segs = msg.msg_iovlen; + return io_prep_reg_iovec(req, &kmsg->vec, msg.msg_iov, + msg.msg_iovlen); + } + if (req->flags & REQ_F_BUFFER_SELECT) + return 0; + return io_net_import_vec(req, kmsg, msg.msg_iov, msg.msg_iovlen, ITER_SOURCE); } #define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE) @@ -427,12 +402,6 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) sr->done_io = 0; sr->retry = false; - - if (req->opcode != IORING_OP_SEND) { - if (sqe->addr2 || sqe->file_index) - return -EINVAL; - } - sr->len = READ_ONCE(sqe->len); sr->flags = READ_ONCE(sqe->ioprio); if (sr->flags & ~SENDMSG_FLAGS) @@ -458,6 +427,8 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) return -ENOMEM; if (req->opcode != IORING_OP_SENDMSG) return io_send_setup(req, sqe); + if (unlikely(sqe->addr2 || sqe->file_index)) + return -EINVAL; return io_sendmsg_setup(req, sqe); } @@ -1302,11 +1273,12 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { struct io_sr_msg *zc = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_ring_ctx *ctx = req->ctx; + struct io_async_msghdr *iomsg; struct io_kiocb *notif; + int ret; zc->done_io = 0; zc->retry = false; - req->flags |= REQ_F_POLL_NO_LAZY; if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3))) return -EINVAL; @@ -1320,7 +1292,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) notif->cqe.user_data = req->cqe.user_data; notif->cqe.res = 0; notif->cqe.flags = IORING_CQE_F_NOTIF; - req->flags |= REQ_F_NEED_CLEANUP; + req->flags |= REQ_F_NEED_CLEANUP | REQ_F_POLL_NO_LAZY; zc->flags = READ_ONCE(sqe->ioprio); if (unlikely(zc->flags & ~IO_ZC_FLAGS_COMMON)) { @@ -1335,11 +1307,6 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) } } - if (req->opcode != IORING_OP_SEND_ZC) { - if (unlikely(sqe->addr2 || sqe->file_index)) - return -EINVAL; - } - zc->len = READ_ONCE(sqe->len); zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY; req->buf_index = READ_ONCE(sqe->buf_index); @@ -1349,13 +1316,28 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (io_is_compat(req->ctx)) zc->msg_flags |= MSG_CMSG_COMPAT; - if (unlikely(!io_msg_alloc_async(req))) + iomsg = io_msg_alloc_async(req); + if (unlikely(!iomsg)) return -ENOMEM; + if (req->opcode == IORING_OP_SEND_ZC) { - req->flags |= REQ_F_IMPORT_BUFFER; - return io_send_setup(req, sqe); + if (zc->flags & IORING_RECVSEND_FIXED_BUF) + req->flags |= REQ_F_IMPORT_BUFFER; + ret = io_send_setup(req, sqe); + } else { + if (unlikely(sqe->addr2 || sqe->file_index)) + return -EINVAL; + ret = io_sendmsg_setup(req, sqe); + } + if (unlikely(ret)) + return ret; + + if (!(zc->flags & IORING_RECVSEND_FIXED_BUF)) { + iomsg->msg.sg_from_iter = io_sg_from_iter_iovec; + return io_notif_account_mem(zc->notif, iomsg->msg.msg_iter.count); } - return io_sendmsg_zc_setup(req, sqe); + iomsg->msg.sg_from_iter = io_sg_from_iter; + return 0; } static int io_sg_from_iter_iovec(struct sk_buff *skb, @@ -1412,27 +1394,13 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags) { struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg); struct io_async_msghdr *kmsg = req->async_data; - int ret; - if (sr->flags & IORING_RECVSEND_FIXED_BUF) { - sr->notif->buf_index = req->buf_index; - ret = io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter, - (u64)(uintptr_t)sr->buf, sr->len, - ITER_SOURCE, issue_flags); - if (unlikely(ret)) - return ret; - kmsg->msg.sg_from_iter = io_sg_from_iter; - } else { - ret = import_ubuf(ITER_SOURCE, sr->buf, sr->len, &kmsg->msg.msg_iter); - if (unlikely(ret)) - return ret; - ret = io_notif_account_mem(sr->notif, sr->len); - if (unlikely(ret)) - return ret; - kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; - } + WARN_ON_ONCE(!(sr->flags & IORING_RECVSEND_FIXED_BUF)); - return ret; + sr->notif->buf_index = req->buf_index; + return io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter, + (u64)(uintptr_t)sr->buf, sr->len, + ITER_SOURCE, issue_flags); } int io_send_zc(struct io_kiocb *req, unsigned int issue_flags) @@ -1513,8 +1481,6 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) unsigned flags; int ret, min_ret = 0; - kmsg->msg.sg_from_iter = io_sg_from_iter_iovec; - if (req->flags & REQ_F_IMPORT_BUFFER) { unsigned uvec_segs = kmsg->msg.msg_iter.nr_segs; int ret; @@ -1523,7 +1489,6 @@ int io_sendmsg_zc(struct io_kiocb *req, unsigned int issue_flags) &kmsg->vec, uvec_segs, issue_flags); if (unlikely(ret)) return ret; - kmsg->msg.sg_from_iter = io_sg_from_iter; req->flags &= ~REQ_F_IMPORT_BUFFER; } diff --git a/io_uring/refs.h b/io_uring/refs.h index 63982ead9f7d..0d928d87c4ed 100644 --- a/io_uring/refs.h +++ b/io_uring/refs.h @@ -17,6 +17,13 @@ static inline bool req_ref_inc_not_zero(struct io_kiocb *req) return atomic_inc_not_zero(&req->refs); } +static inline bool req_ref_put_and_test_atomic(struct io_kiocb *req) +{ + WARN_ON_ONCE(!(data_race(req->flags) & REQ_F_REFCOUNT)); + WARN_ON_ONCE(req_ref_zero_or_close_to_overflow(req)); + return atomic_dec_and_test(&req->refs); +} + static inline bool req_ref_put_and_test(struct io_kiocb *req) { if (likely(!(req->flags & REQ_F_REFCOUNT))) diff --git a/io_uring/rsrc.c b/io_uring/rsrc.c index 3f195e24777e..5e64a8bb30a4 100644 --- a/io_uring/rsrc.c +++ b/io_uring/rsrc.c @@ -1002,20 +1002,33 @@ unlock: } EXPORT_SYMBOL_GPL(io_buffer_unregister_bvec); -static int io_import_fixed(int ddir, struct iov_iter *iter, - struct io_mapped_ubuf *imu, - u64 buf_addr, size_t len) +static int validate_fixed_range(u64 buf_addr, size_t len, + const struct io_mapped_ubuf *imu) { u64 buf_end; - size_t offset; - if (WARN_ON_ONCE(!imu)) - return -EFAULT; if (unlikely(check_add_overflow(buf_addr, (u64)len, &buf_end))) return -EFAULT; /* not inside the mapped region */ if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len))) return -EFAULT; + if (unlikely(len > MAX_RW_COUNT)) + return -EFAULT; + return 0; +} + +static int io_import_fixed(int ddir, struct iov_iter *iter, + struct io_mapped_ubuf *imu, + u64 buf_addr, size_t len) +{ + size_t offset; + int ret; + + if (WARN_ON_ONCE(!imu)) + return -EFAULT; + ret = validate_fixed_range(buf_addr, len, imu); + if (unlikely(ret)) + return ret; if (!(imu->dir & (1 << ddir))) return -EFAULT; @@ -1305,12 +1318,12 @@ static int io_vec_fill_bvec(int ddir, struct iov_iter *iter, u64 buf_addr = (u64)(uintptr_t)iovec[iov_idx].iov_base; struct bio_vec *src_bvec; size_t offset; - u64 buf_end; + int ret; + + ret = validate_fixed_range(buf_addr, iov_len, imu); + if (unlikely(ret)) + return ret; - if (unlikely(check_add_overflow(buf_addr, (u64)iov_len, &buf_end))) - return -EFAULT; - if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len))) - return -EFAULT; if (unlikely(!iov_len)) return -EFAULT; if (unlikely(check_add_overflow(total_len, iov_len, &total_len))) @@ -1349,6 +1362,82 @@ static int io_estimate_bvec_size(struct iovec *iov, unsigned nr_iovs, return max_segs; } +static int io_vec_fill_kern_bvec(int ddir, struct iov_iter *iter, + struct io_mapped_ubuf *imu, + struct iovec *iovec, unsigned nr_iovs, + struct iou_vec *vec) +{ + const struct bio_vec *src_bvec = imu->bvec; + struct bio_vec *res_bvec = vec->bvec; + unsigned res_idx = 0; + size_t total_len = 0; + unsigned iov_idx; + + for (iov_idx = 0; iov_idx < nr_iovs; iov_idx++) { + size_t offset = (size_t)(uintptr_t)iovec[iov_idx].iov_base; + size_t iov_len = iovec[iov_idx].iov_len; + struct bvec_iter bi = { + .bi_size = offset + iov_len, + }; + struct bio_vec bv; + + bvec_iter_advance(src_bvec, &bi, offset); + for_each_mp_bvec(bv, src_bvec, bi, bi) + res_bvec[res_idx++] = bv; + total_len += iov_len; + } + iov_iter_bvec(iter, ddir, res_bvec, res_idx, total_len); + return 0; +} + +static int iov_kern_bvec_size(const struct iovec *iov, + const struct io_mapped_ubuf *imu, + unsigned int *nr_seg) +{ + size_t offset = (size_t)(uintptr_t)iov->iov_base; + const struct bio_vec *bvec = imu->bvec; + int start = 0, i = 0; + size_t off = 0; + int ret; + + ret = validate_fixed_range(offset, iov->iov_len, imu); + if (unlikely(ret)) + return ret; + + for (i = 0; off < offset + iov->iov_len && i < imu->nr_bvecs; + off += bvec[i].bv_len, i++) { + if (offset >= off && offset < off + bvec[i].bv_len) + start = i; + } + *nr_seg = i - start; + return 0; +} + +static int io_kern_bvec_size(struct iovec *iov, unsigned nr_iovs, + struct io_mapped_ubuf *imu, unsigned *nr_segs) +{ + unsigned max_segs = 0; + size_t total_len = 0; + unsigned i; + int ret; + + *nr_segs = 0; + for (i = 0; i < nr_iovs; i++) { + if (unlikely(!iov[i].iov_len)) + return -EFAULT; + if (unlikely(check_add_overflow(total_len, iov[i].iov_len, + &total_len))) + return -EOVERFLOW; + ret = iov_kern_bvec_size(&iov[i], imu, &max_segs); + if (unlikely(ret)) + return ret; + *nr_segs += max_segs; + } + if (total_len > MAX_RW_COUNT) + return -EINVAL; + return 0; +} + int io_import_reg_vec(int ddir, struct iov_iter *iter, struct io_kiocb *req, struct iou_vec *vec, unsigned nr_iovs, unsigned issue_flags) @@ -1363,14 +1452,20 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter, if (!node) return -EFAULT; imu = node->buf; - if (imu->is_kbuf) - return -EOPNOTSUPP; if (!(imu->dir & (1 << ddir))) return -EFAULT; iovec_off = vec->nr - nr_iovs; iov = vec->iovec + iovec_off; - nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu); + + if (imu->is_kbuf) { + int ret = io_kern_bvec_size(iov, nr_iovs, imu, &nr_segs); + + if (unlikely(ret)) + return ret; + } else { + nr_segs = io_estimate_bvec_size(iov, nr_iovs, imu); + } if (sizeof(struct bio_vec) > sizeof(struct iovec)) { size_t bvec_bytes; @@ -1397,6 +1492,9 @@ int io_import_reg_vec(int ddir, struct iov_iter *iter, req->flags |= REQ_F_NEED_CLEANUP; } + if (imu->is_kbuf) + return io_vec_fill_kern_bvec(ddir, iter, imu, iov, nr_iovs, vec); + return io_vec_fill_bvec(ddir, iter, imu, iov, nr_iovs, vec); } diff --git a/io_uring/uring_cmd.c b/io_uring/uring_cmd.c index f2cfc371f3d0..a9ea7d29cdd9 100644 --- a/io_uring/uring_cmd.c +++ b/io_uring/uring_cmd.c @@ -205,8 +205,8 @@ static int io_uring_cmd_prep_setup(struct io_kiocb *req, * that it doesn't read in per-op data, play it safe and ensure that * any SQE data is stable beyond prep. This can later get relaxed. */ - memcpy(ac->data.sqes, sqe, uring_sqe_size(req->ctx)); - ioucmd->sqe = ac->data.sqes; + memcpy(ac->sqes, sqe, uring_sqe_size(req->ctx)); + ioucmd->sqe = ac->sqes; return 0; } @@ -307,17 +307,18 @@ static inline int io_uring_cmd_getsockopt(struct socket *sock, struct io_uring_cmd *cmd, unsigned int issue_flags) { + const struct io_uring_sqe *sqe = cmd->sqe; bool compat = !!(issue_flags & IO_URING_F_COMPAT); int optlen, optname, level, err; void __user *optval; - level = READ_ONCE(cmd->sqe->level); + level = READ_ONCE(sqe->level); if (level != SOL_SOCKET) return -EOPNOTSUPP; - optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval)); - optname = READ_ONCE(cmd->sqe->optname); - optlen = READ_ONCE(cmd->sqe->optlen); + optval = u64_to_user_ptr(READ_ONCE(sqe->optval)); + optname = READ_ONCE(sqe->optname); + optlen = READ_ONCE(sqe->optlen); err = do_sock_getsockopt(sock, compat, level, optname, USER_SOCKPTR(optval), @@ -333,15 +334,16 @@ static inline int io_uring_cmd_setsockopt(struct socket *sock, struct io_uring_cmd *cmd, unsigned int issue_flags) { + const struct io_uring_sqe *sqe = cmd->sqe; bool compat = !!(issue_flags & IO_URING_F_COMPAT); int optname, optlen, level; void __user *optval; sockptr_t optval_s; - optval = u64_to_user_ptr(READ_ONCE(cmd->sqe->optval)); - optname = READ_ONCE(cmd->sqe->optname); - optlen = READ_ONCE(cmd->sqe->optlen); - level = READ_ONCE(cmd->sqe->level); + optval = u64_to_user_ptr(READ_ONCE(sqe->optval)); + optname = READ_ONCE(sqe->optname); + optlen = READ_ONCE(sqe->optlen); + level = READ_ONCE(sqe->level); optval_s = USER_SOCKPTR(optval); return do_sock_setsockopt(sock, compat, level, optname, optval_s, diff --git a/io_uring/uring_cmd.h b/io_uring/uring_cmd.h index 14e525255854..b04686b6b5d2 100644 --- a/io_uring/uring_cmd.h +++ b/io_uring/uring_cmd.h @@ -6,6 +6,7 @@ struct io_async_cmd { struct io_uring_cmd_data data; struct iou_vec vec; + struct io_uring_sqe sqes[2]; }; int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags); diff --git a/io_uring/zcrx.c b/io_uring/zcrx.c index 9c95b5b6ec4e..80d4a6f71d29 100644 --- a/io_uring/zcrx.c +++ b/io_uring/zcrx.c @@ -818,6 +818,14 @@ io_zcrx_recv_skb(read_descriptor_t *desc, struct sk_buff *skb, int ret = 0; len = min_t(size_t, len, desc->count); + /* + * __tcp_read_sock() always calls io_zcrx_recv_skb one last time, even + * if desc->count is already 0. This is caused by the if (offset + 1 != + * skb->len) check. Return early in this case to break out of + * __tcp_read_sock(). + */ + if (!len) + return 0; if (unlikely(args->nr_skbs++ > IO_SKBS_PER_CALL_LIMIT)) return -EAGAIN; |
