diff options
-rw-r--r-- | include/linux/io_uring_types.h | 2 | ||||
-rw-r--r-- | io_uring/io_uring.c | 79 | ||||
-rw-r--r-- | io_uring/io_uring.h | 3 |
3 files changed, 34 insertions, 50 deletions
diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index 73b289b48280..00dbd7cd0e7d 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -341,7 +341,6 @@ struct io_ring_ctx { unsigned cached_cq_tail; unsigned cq_entries; struct io_ev_fd __rcu *io_ev_fd; - unsigned cq_extra; void *cq_wait_arg; size_t cq_wait_size; @@ -417,6 +416,7 @@ struct io_ring_ctx { struct callback_head poll_wq_task_work; struct list_head defer_list; + unsigned nr_drained; struct io_alloc_cache msg_cache; spinlock_t msg_lock; diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 714b66ab34b0..9a9b8d35349b 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -129,7 +129,6 @@ struct io_defer_entry { struct list_head list; struct io_kiocb *req; - u32 seq; }; /* requests with any of those set should undergo io_disarm_next() */ @@ -149,6 +148,7 @@ static bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, bool is_sqpoll_thread); static void io_queue_sqe(struct io_kiocb *req); +static void __io_req_caches_free(struct io_ring_ctx *ctx); static __read_mostly DEFINE_STATIC_KEY_FALSE(io_key_has_sqarray); @@ -540,46 +540,45 @@ void io_req_queue_iowq(struct io_kiocb *req) io_req_task_work_add(req); } -static bool io_drain_defer_seq(struct io_kiocb *req, u32 seq) +static unsigned io_linked_nr(struct io_kiocb *req) { - struct io_ring_ctx *ctx = req->ctx; + struct io_kiocb *tmp; + unsigned nr = 0; - return seq + READ_ONCE(ctx->cq_extra) != ctx->cached_cq_tail; + io_for_each_link(tmp, req) + nr++; + return nr; } -static __cold noinline void __io_queue_deferred(struct io_ring_ctx *ctx) +static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx) { bool drain_seen = false, first = true; + lockdep_assert_held(&ctx->uring_lock); + __io_req_caches_free(ctx); + while (!list_empty(&ctx->defer_list)) { struct io_defer_entry *de = list_first_entry(&ctx->defer_list, struct io_defer_entry, list); drain_seen |= de->req->flags & REQ_F_IO_DRAIN; - if ((drain_seen || first) && io_drain_defer_seq(de->req, de->seq)) - break; + if ((drain_seen || first) && ctx->nr_req_allocated != ctx->nr_drained) + return; list_del_init(&de->list); + ctx->nr_drained -= io_linked_nr(de->req); io_req_task_queue(de->req); kfree(de); first = false; } } -static __cold noinline void io_queue_deferred(struct io_ring_ctx *ctx) -{ - guard(spinlock)(&ctx->completion_lock); - __io_queue_deferred(ctx); -} - void __io_commit_cqring_flush(struct io_ring_ctx *ctx) { if (ctx->poll_activated) io_poll_wq_wake(ctx); if (ctx->off_timeout_used) io_flush_timeouts(ctx); - if (ctx->drain_active) - io_queue_deferred(ctx); if (ctx->has_evfd) io_eventfd_signal(ctx, true); } @@ -742,7 +741,6 @@ static bool io_cqring_event_overflow(struct io_ring_ctx *ctx, u64 user_data, * on the floor. */ WRITE_ONCE(r->cq_overflow, READ_ONCE(r->cq_overflow) + 1); - ctx->cq_extra--; set_bit(IO_CHECK_CQ_DROPPED_BIT, &ctx->check_cq); return false; } @@ -812,8 +810,6 @@ static bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data, s32 res, { struct io_uring_cqe *cqe; - ctx->cq_extra++; - if (likely(io_get_cqe(ctx, &cqe))) { WRITE_ONCE(cqe->user_data, user_data); WRITE_ONCE(cqe->res, res); @@ -1459,6 +1455,10 @@ void __io_submit_flush_completions(struct io_ring_ctx *ctx) io_free_batch_list(ctx, state->compl_reqs.first); INIT_WQ_LIST(&state->compl_reqs); } + + if (unlikely(ctx->drain_active)) + io_queue_deferred(ctx); + ctx->submit_state.cq_flush = false; } @@ -1646,17 +1646,6 @@ io_req_flags_t io_file_get_flags(struct file *file) return res; } -static u32 io_get_sequence(struct io_kiocb *req) -{ - u32 seq = req->ctx->cached_sq_head; - struct io_kiocb *cur; - - /* need original cached_sq_head, but it was increased for each req */ - io_for_each_link(cur, req) - seq--; - return seq; -} - static __cold void io_drain_req(struct io_kiocb *req) __must_hold(&ctx->uring_lock) { @@ -1673,14 +1662,12 @@ static __cold void io_drain_req(struct io_kiocb *req) io_prep_async_link(req); trace_io_uring_defer(req); de->req = req; - de->seq = io_get_sequence(req); - scoped_guard(spinlock, &ctx->completion_lock) { - list_add_tail(&de->list, &ctx->defer_list); - __io_queue_deferred(ctx); - if (!drain && list_empty(&ctx->defer_list)) - ctx->drain_active = false; - } + ctx->nr_drained += io_linked_nr(req); + list_add_tail(&de->list, &ctx->defer_list); + io_queue_deferred(ctx); + if (!drain && list_empty(&ctx->defer_list)) + ctx->drain_active = false; } static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def, @@ -2263,10 +2250,6 @@ static bool io_get_sqe(struct io_ring_ctx *ctx, const struct io_uring_sqe **sqe) (!(ctx->flags & IORING_SETUP_NO_SQARRAY))) { head = READ_ONCE(ctx->sq_array[head]); if (unlikely(head >= ctx->sq_entries)) { - /* drop invalid entries */ - spin_lock(&ctx->completion_lock); - ctx->cq_extra--; - spin_unlock(&ctx->completion_lock); WRITE_ONCE(ctx->rings->sq_dropped, READ_ONCE(ctx->rings->sq_dropped) + 1); return false; @@ -2684,13 +2667,11 @@ unsigned long rings_size(unsigned int flags, unsigned int sq_entries, return off; } -static void io_req_caches_free(struct io_ring_ctx *ctx) +static __cold void __io_req_caches_free(struct io_ring_ctx *ctx) { struct io_kiocb *req; int nr = 0; - mutex_lock(&ctx->uring_lock); - while (!io_req_cache_empty(ctx)) { req = io_extract_req(ctx); kmem_cache_free(req_cachep, req); @@ -2700,7 +2681,12 @@ static void io_req_caches_free(struct io_ring_ctx *ctx) ctx->nr_req_allocated -= nr; percpu_ref_put_many(&ctx->refs, nr); } - mutex_unlock(&ctx->uring_lock); +} + +static __cold void io_req_caches_free(struct io_ring_ctx *ctx) +{ + guard(mutex)(&ctx->uring_lock); + __io_req_caches_free(ctx); } static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx) @@ -3005,20 +2991,19 @@ static __cold bool io_cancel_defer_files(struct io_ring_ctx *ctx, struct io_defer_entry *de; LIST_HEAD(list); - spin_lock(&ctx->completion_lock); list_for_each_entry_reverse(de, &ctx->defer_list, list) { if (io_match_task_safe(de->req, tctx, cancel_all)) { list_cut_position(&list, &ctx->defer_list, &de->list); break; } } - spin_unlock(&ctx->completion_lock); if (list_empty(&list)) return false; while (!list_empty(&list)) { de = list_first_entry(&list, struct io_defer_entry, list); list_del_init(&de->list); + ctx->nr_drained -= io_linked_nr(de->req); io_req_task_queue_fail(de->req, -ECANCELED); kfree(de); } @@ -3093,8 +3078,8 @@ static __cold bool io_uring_try_cancel_requests(struct io_ring_ctx *ctx, if ((ctx->flags & IORING_SETUP_DEFER_TASKRUN) && io_allowed_defer_tw_run(ctx)) ret |= io_run_local_work(ctx, INT_MAX, INT_MAX) > 0; - ret |= io_cancel_defer_files(ctx, tctx, cancel_all); mutex_lock(&ctx->uring_lock); + ret |= io_cancel_defer_files(ctx, tctx, cancel_all); ret |= io_poll_remove_all(ctx, tctx, cancel_all); ret |= io_waitid_remove_all(ctx, tctx, cancel_all); ret |= io_futex_remove_all(ctx, tctx, cancel_all); diff --git a/io_uring/io_uring.h b/io_uring/io_uring.h index e4050b2d0821..81f22196a57d 100644 --- a/io_uring/io_uring.h +++ b/io_uring/io_uring.h @@ -196,7 +196,6 @@ static inline bool io_defer_get_uncommited_cqe(struct io_ring_ctx *ctx, { io_lockdep_assert_cq_locked(ctx); - ctx->cq_extra++; ctx->submit_state.cq_flush = true; return io_get_cqe(ctx, cqe_ret); } @@ -414,7 +413,7 @@ static inline void io_req_complete_defer(struct io_kiocb *req) static inline void io_commit_cqring_flush(struct io_ring_ctx *ctx) { - if (unlikely(ctx->off_timeout_used || ctx->drain_active || + if (unlikely(ctx->off_timeout_used || ctx->has_evfd || ctx->poll_activated)) __io_commit_cqring_flush(ctx); } |