diff options
author | Rob Clark <robdclark@chromium.org> | 2025-06-29 13:13:06 -0700 |
---|---|---|
committer | Rob Clark <robin.clark@oss.qualcomm.com> | 2025-07-04 17:48:36 -0700 |
commit | 6a4d287a1ae6e49f8ef57fcb2a512c2b0bbef966 (patch) | |
tree | d2062a91e21f1668a3ada199e658bdb90c6ff91c | |
parent | feb8ef4636a457a1fd916a3ae575f552935e69b9 (diff) |
drm/msm: Mark VM as unusable on GPU hangs
If userspace has opted-in to VM_BIND, then GPU hangs and VM_BIND errors
will mark the VM as unusable.
Signed-off-by: Rob Clark <robdclark@chromium.org>
Signed-off-by: Rob Clark <robin.clark@oss.qualcomm.com>
Tested-by: Antonino Maniscalco <antomani103@gmail.com>
Reviewed-by: Antonino Maniscalco <antomani103@gmail.com>
Patchwork: https://patchwork.freedesktop.org/patch/661499/
-rw-r--r-- | drivers/gpu/drm/msm/msm_gem.h | 17 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gem_submit.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/msm/msm_gpu.c | 16 |
3 files changed, 34 insertions, 2 deletions
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index b5bf21f62f9d..f2631a8c62b9 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -76,6 +76,23 @@ struct msm_gem_vm { /** @managed: is this a kernel managed VM? */ bool managed; + + /** + * @unusable: True if the VM has turned unusable because something + * bad happened during an asynchronous request. + * + * We don't try to recover from such failures, because this implies + * informing userspace about the specific operation that failed, and + * hoping the userspace driver can replay things from there. This all + * sounds very complicated for little gain. + * + * Instead, we should just flag the VM as unusable, and fail any + * further request targeting this VM. + * + * As an analogy, this would be mapped to a VK_ERROR_DEVICE_LOST + * situation, where the logical device needs to be re-created. + */ + bool unusable; }; #define to_msm_vm(x) container_of(x, struct msm_gem_vm, base) diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index daf05c380e84..50fafcacf035 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -681,6 +681,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, if (args->pad) return -EINVAL; + if (to_msm_vm(ctx->vm)->unusable) + return UERR(EPIPE, dev, "context is unusable"); + /* for now, we just have 3d pipe.. eventually this would need to * be more clever to dispatch to appropriate gpu module: */ diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index c08c942d85a0..0846f6c5169f 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -389,8 +389,20 @@ static void recover_worker(struct kthread_work *work) /* Increment the fault counts */ submit->queue->faults++; - if (submit->vm) - to_msm_vm(submit->vm)->faults++; + if (submit->vm) { + struct msm_gem_vm *vm = to_msm_vm(submit->vm); + + vm->faults++; + + /* + * If userspace has opted-in to VM_BIND (and therefore userspace + * management of the VM), faults mark the VM as unusuable. This + * matches vulkan expectations (vulkan is the main target for + * VM_BIND) + */ + if (!vm->managed) + vm->unusable = true; + } get_comm_cmdline(submit, &comm, &cmd); |