diff options
author | Matthew Brost <matthew.brost@intel.com> | 2021-07-26 17:23:33 -0700 |
---|---|---|
committer | John Harrison <John.C.Harrison@Intel.com> | 2021-07-27 17:31:59 -0700 |
commit | 573ba126aef37c8315e5bb68d2dad515efa96994 (patch) | |
tree | 671ca22ec14a9b93c075ad9f47bc8566f68fb161 /drivers/gpu/drm/i915/i915_gpu_error.c | |
parent | c17b637928f030caac2d1c737959b9627011ac49 (diff) |
drm/i915/guc: Capture error state on context reset
We receive notification of an engine reset from GuC at its
completion. Meaning GuC has potentially cleared any HW state
we may have been interested in capturing. GuC resumes scheduling
on the engine post-reset, as the resets are meant to be transparent,
further muddling our error state.
There is ongoing work to define an API for a GuC debug state dump. The
suggestion for now is to manually disable FW initiated resets in cases
where debug state is needed.
Signed-off-by: Matthew Brost <matthew.brost@intel.com>
Reviewed-by: John Harrison <John.C.Harrison@Intel.com>
Signed-off-by: John Harrison <John.C.Harrison@Intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-19-matthew.brost@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r-- | drivers/gpu/drm/i915/i915_gpu_error.c | 25 |
1 files changed, 21 insertions, 4 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index a2c58b54a5928..0f08bcfbe9641 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -1429,20 +1429,37 @@ capture_engine(struct intel_engine_cs *engine, { struct intel_engine_capture_vma *capture = NULL; struct intel_engine_coredump *ee; - struct i915_request *rq; + struct intel_context *ce; + struct i915_request *rq = NULL; unsigned long flags; ee = intel_engine_coredump_alloc(engine, GFP_KERNEL); if (!ee) return NULL; - spin_lock_irqsave(&engine->sched_engine->lock, flags); - rq = intel_engine_find_active_request(engine); + ce = intel_engine_get_hung_context(engine); + if (ce) { + intel_engine_clear_hung_context(engine); + rq = intel_context_find_active_request(ce); + if (!rq || !i915_request_started(rq)) + goto no_request_capture; + } else { + /* + * Getting here with GuC enabled means it is a forced error capture + * with no actual hang. So, no need to attempt the execlist search. + */ + if (!intel_uc_uses_guc_submission(&engine->gt->uc)) { + spin_lock_irqsave(&engine->sched_engine->lock, flags); + rq = intel_engine_execlist_find_hung_request(engine); + spin_unlock_irqrestore(&engine->sched_engine->lock, + flags); + } + } if (rq) capture = intel_engine_coredump_add_request(ee, rq, ATOMIC_MAYFAIL); - spin_unlock_irqrestore(&engine->sched_engine->lock, flags); if (!capture) { +no_request_capture: kfree(ee); return NULL; } |