summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2025-08-27 20:15:59 +0800
committerJens Axboe <axboe@kernel.dk>2025-08-28 07:56:57 -0600
commitc5c5eb24ed6177fc0ef4bb75fc18d07a99c1d3f0 (patch)
tree4d6cbf5e10f956dcbbfa8d59a9c4fcd7cc178b59
parente3ef9445cd9d90e43de0bd3cd55d437773dfd139 (diff)
ublk: avoid ublk_io_release() called after ublk char dev is closed
When running test_stress_04.sh, the following warning is triggered: WARNING: CPU: 1 PID: 135 at drivers/block/ublk_drv.c:1933 ublk_ch_release+0x423/0x4b0 [ublk_drv] This happens when the daemon is abruptly killed: - some references may still be held, because registering IO buffer doesn't grab ublk char device reference OR - io->task_registered_buffers won't be cleared because io buffer is released from non-daemon context For zero-copy and auto buffer register modes, I/O reference crosses syscalls, so IO reference may not be dropped naturally when ublk server is killed abruptly. However, when releasing io_uring context, it is guaranteed that the reference is dropped finally, see io_sqe_buffers_unregister() from io_ring_ctx_free(). Fix this by adding ublk_drain_io_references() that: - Waits for active I/O references dropped in async way by scheduling work function, for avoiding ublk dev and io_uring file's release dependency - Reinitializes io->ref and io->task_registered_buffers to clean state This ensures the reference count state is clean when ublk_queue_reinit() is called, preventing the warning and potential use-after-free. Fixes: 1f6540e2aabb ("ublk: zc register/unregister bvec") Fixes: 1ceeedb59749 ("ublk: optimize UBLK_IO_UNREGISTER_IO_BUF on daemon task") Fixes: 8a8fe42d765b ("ublk: optimize UBLK_IO_REGISTER_IO_BUF on daemon task") Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://lore.kernel.org/r/20250827121602.2619736-2-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--drivers/block/ublk_drv.c72
1 files changed, 70 insertions, 2 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 99abd67b708b..67d4a867aec4 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -239,6 +239,7 @@ struct ublk_device {
struct mutex cancel_mutex;
bool canceling;
pid_t ublksrv_tgid;
+ struct delayed_work exit_work;
};
/* header of ublk_params */
@@ -1595,13 +1596,63 @@ static void ublk_set_canceling(struct ublk_device *ub, bool canceling)
ublk_get_queue(ub, i)->canceling = canceling;
}
-static int ublk_ch_release(struct inode *inode, struct file *filp)
+static bool ublk_check_and_reset_active_ref(struct ublk_device *ub)
{
- struct ublk_device *ub = filp->private_data;
+ int i, j;
+
+ if (!(ub->dev_info.flags & (UBLK_F_SUPPORT_ZERO_COPY |
+ UBLK_F_AUTO_BUF_REG)))
+ return false;
+
+ for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
+ struct ublk_queue *ubq = ublk_get_queue(ub, i);
+
+ for (j = 0; j < ubq->q_depth; j++) {
+ struct ublk_io *io = &ubq->ios[j];
+ unsigned int refs = refcount_read(&io->ref) +
+ io->task_registered_buffers;
+
+ /*
+ * UBLK_REFCOUNT_INIT or zero means no active
+ * reference
+ */
+ if (refs != UBLK_REFCOUNT_INIT && refs != 0)
+ return true;
+
+ /* reset to zero if the io hasn't active references */
+ refcount_set(&io->ref, 0);
+ io->task_registered_buffers = 0;
+ }
+ }
+ return false;
+}
+
+static void ublk_ch_release_work_fn(struct work_struct *work)
+{
+ struct ublk_device *ub =
+ container_of(work, struct ublk_device, exit_work.work);
struct gendisk *disk;
int i;
/*
+ * For zero-copy and auto buffer register modes, I/O references
+ * might not be dropped naturally when the daemon is killed, but
+ * io_uring guarantees that registered bvec kernel buffers are
+ * unregistered finally when freeing io_uring context, then the
+ * active references are dropped.
+ *
+ * Wait until active references are dropped for avoiding use-after-free
+ *
+ * registered buffer may be unregistered in io_ring's release hander,
+ * so have to wait by scheduling work function for avoiding the two
+ * file release dependency.
+ */
+ if (ublk_check_and_reset_active_ref(ub)) {
+ schedule_delayed_work(&ub->exit_work, 1);
+ return;
+ }
+
+ /*
* disk isn't attached yet, either device isn't live, or it has
* been removed already, so we needn't to do anything
*/
@@ -1673,6 +1724,23 @@ unlock:
ublk_reset_ch_dev(ub);
out:
clear_bit(UB_STATE_OPEN, &ub->state);
+
+ /* put the reference grabbed in ublk_ch_release() */
+ ublk_put_device(ub);
+}
+
+static int ublk_ch_release(struct inode *inode, struct file *filp)
+{
+ struct ublk_device *ub = filp->private_data;
+
+ /*
+ * Grab ublk device reference, so it won't be gone until we are
+ * really released from work function.
+ */
+ ublk_get_device(ub);
+
+ INIT_DELAYED_WORK(&ub->exit_work, ublk_ch_release_work_fn);
+ schedule_delayed_work(&ub->exit_work, 0);
return 0;
}