summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gpu_error.c
diff options
context:
space:
mode:
authorMatthew Brost <matthew.brost@intel.com>2021-07-26 17:23:33 -0700
committerJohn Harrison <John.C.Harrison@Intel.com>2021-07-27 17:31:59 -0700
commit573ba126aef37c8315e5bb68d2dad515efa96994 (patch)
tree671ca22ec14a9b93c075ad9f47bc8566f68fb161 /drivers/gpu/drm/i915/i915_gpu_error.c
parentc17b637928f030caac2d1c737959b9627011ac49 (diff)
drm/i915/guc: Capture error state on context reset
We receive notification of an engine reset from GuC at its completion. Meaning GuC has potentially cleared any HW state we may have been interested in capturing. GuC resumes scheduling on the engine post-reset, as the resets are meant to be transparent, further muddling our error state. There is ongoing work to define an API for a GuC debug state dump. The suggestion for now is to manually disable FW initiated resets in cases where debug state is needed. Signed-off-by: Matthew Brost <matthew.brost@intel.com> Reviewed-by: John Harrison <John.C.Harrison@Intel.com> Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210727002348.97202-19-matthew.brost@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gpu_error.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gpu_error.c25
1 files changed, 21 insertions, 4 deletions
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index a2c58b54a5928..0f08bcfbe9641 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1429,20 +1429,37 @@ capture_engine(struct intel_engine_cs *engine,
{
struct intel_engine_capture_vma *capture = NULL;
struct intel_engine_coredump *ee;
- struct i915_request *rq;
+ struct intel_context *ce;
+ struct i915_request *rq = NULL;
unsigned long flags;
ee = intel_engine_coredump_alloc(engine, GFP_KERNEL);
if (!ee)
return NULL;
- spin_lock_irqsave(&engine->sched_engine->lock, flags);
- rq = intel_engine_find_active_request(engine);
+ ce = intel_engine_get_hung_context(engine);
+ if (ce) {
+ intel_engine_clear_hung_context(engine);
+ rq = intel_context_find_active_request(ce);
+ if (!rq || !i915_request_started(rq))
+ goto no_request_capture;
+ } else {
+ /*
+ * Getting here with GuC enabled means it is a forced error capture
+ * with no actual hang. So, no need to attempt the execlist search.
+ */
+ if (!intel_uc_uses_guc_submission(&engine->gt->uc)) {
+ spin_lock_irqsave(&engine->sched_engine->lock, flags);
+ rq = intel_engine_execlist_find_hung_request(engine);
+ spin_unlock_irqrestore(&engine->sched_engine->lock,
+ flags);
+ }
+ }
if (rq)
capture = intel_engine_coredump_add_request(ee, rq,
ATOMIC_MAYFAIL);
- spin_unlock_irqrestore(&engine->sched_engine->lock, flags);
if (!capture) {
+no_request_capture:
kfree(ee);
return NULL;
}