summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gt/intel_workarounds.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/gt/intel_workarounds.c')
-rw-r--r--drivers/gpu/drm/i915/gt/intel_workarounds.c353
1 files changed, 333 insertions, 20 deletions
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index 90a2b9e399b0..5726cd0a37e0 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -179,6 +179,12 @@ wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
}
static void
+wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
+{
+ wa_write_masked_or(wal, reg, clr, 0);
+}
+
+static void
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
{
wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val);
@@ -199,6 +205,18 @@ wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
#define WA_SET_FIELD_MASKED(addr, mask, value) \
wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value)))
+static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
+}
+
+static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
+ struct i915_wa_list *wal)
+{
+ WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING);
+}
+
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
@@ -349,7 +367,10 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
HDC_FORCE_NON_COHERENT);
/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
- if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915))
+ if (IS_SKYLAKE(i915) ||
+ IS_KABYLAKE(i915) ||
+ IS_COFFEELAKE(i915) ||
+ IS_COMETLAKE(i915))
WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
GEN8_SAMPLER_POWER_BYPASS_DIS);
@@ -383,7 +404,7 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
struct i915_wa_list *wal)
{
- struct drm_i915_private *i915 = engine->i915;
+ struct intel_gt *gt = engine->gt;
u8 vals[3] = { 0, 0, 0 };
unsigned int i;
@@ -394,7 +415,7 @@ static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
* Only consider slices where one, and only one, subslice has 7
* EUs
*/
- if (!is_power_of_2(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]))
+ if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
continue;
/*
@@ -403,7 +424,7 @@ static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
*
* -> 0 <= ss <= 3;
*/
- ss = ffs(RUNTIME_INFO(i915)->sseu.subslice_7eu[i]) - 1;
+ ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
vals[i] = 3 - ss;
}
@@ -594,11 +615,14 @@ static void tgl_ctx_workarounds_init(struct intel_engine_cs *engine,
* Wa_1604555607:gen12 and Wa_1608008084:gen12
* FF_MODE2 register will return the wrong value when read. The default
* value for this register is zero for all fields and there are no bit
- * masks. So instead of doing a RMW we should just write the TDS timer
- * value for Wa_1604555607.
+ * masks. So instead of doing a RMW we should just write the GS Timer
+ * and TDS timer values for Wa_1604555607 and Wa_16011163337.
*/
- wa_add(wal, FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
- FF_MODE2_TDS_TIMER_128, 0);
+ wa_add(wal,
+ FF_MODE2,
+ FF_MODE2_GS_TIMER_MASK | FF_MODE2_TDS_TIMER_MASK,
+ FF_MODE2_GS_TIMER_224 | FF_MODE2_TDS_TIMER_128,
+ 0);
/* WaDisableGPGPUMidThreadPreemption:tgl */
WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1,
@@ -624,7 +648,7 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
icl_ctx_workarounds_init(engine, wal);
else if (IS_CANNONLAKE(i915))
cnl_ctx_workarounds_init(engine, wal);
- else if (IS_COFFEELAKE(i915))
+ else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
cfl_ctx_workarounds_init(engine, wal);
else if (IS_GEMINILAKE(i915))
glk_ctx_workarounds_init(engine, wal);
@@ -638,6 +662,10 @@ __intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
chv_ctx_workarounds_init(engine, wal);
else if (IS_BROADWELL(i915))
bdw_ctx_workarounds_init(engine, wal);
+ else if (IS_GEN(i915, 7))
+ gen7_ctx_workarounds_init(engine, wal);
+ else if (IS_GEN(i915, 6))
+ gen6_ctx_workarounds_init(engine, wal);
else if (INTEL_GEN(i915) < 8)
return;
else
@@ -687,10 +715,231 @@ int intel_engine_emit_ctx_wa(struct i915_request *rq)
}
static void
+gen4_gt_workarounds_init(struct drm_i915_private *i915,
+ struct i915_wa_list *wal)
+{
+ /* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
+ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
+}
+
+static void
+g4x_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ gen4_gt_workarounds_init(i915, wal);
+
+ /* WaDisableRenderCachePipelinedFlush:g4x,ilk */
+ wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
+}
+
+static void
+ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ g4x_gt_workarounds_init(i915, wal);
+
+ wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
+}
+
+static void
+snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* WaDisableHiZPlanesWhenMSAAEnabled:snb */
+ wa_masked_en(wal,
+ _3D_CHICKEN,
+ _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
+
+ /* WaDisable_RenderCache_OperationalFlush:snb */
+ wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal,
+ GEN6_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+
+ wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB);
+
+ wa_masked_en(wal,
+ _3D_CHICKEN3,
+ /* WaStripsFansDisableFastClipPerformanceFix:snb */
+ _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
+ /*
+ * Bspec says:
+ * "This bit must be set if 3DSTATE_CLIP clip mode is set
+ * to normal and 3DSTATE_SF number of SF output attributes
+ * is more than 16."
+ */
+ _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
+}
+
+static void
+ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* WaDisableEarlyCull:ivb */
+ wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
+
+ /* WaDisablePSDDualDispatchEnable:ivb */
+ if (IS_IVB_GT1(i915))
+ wa_masked_en(wal,
+ GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+
+ /* WaDisable_RenderCache_OperationalFlush:ivb */
+ wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
+
+ /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
+ wa_masked_dis(wal,
+ GEN7_COMMON_SLICE_CHICKEN1,
+ GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
+
+ /* WaApplyL3ControlAndL3ChickenMode:ivb */
+ wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
+ wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
+
+ /* WaForceL3Serialization:ivb */
+ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
+ /*
+ * WaVSThreadDispatchOverride:ivb,vlv
+ *
+ * This actually overrides the dispatch
+ * mode for all thread types.
+ */
+ wa_write_masked_or(wal, GEN7_FF_THREAD_MODE,
+ GEN7_FF_SCHED_MASK,
+ GEN7_FF_TS_SCHED_HW |
+ GEN7_FF_VS_SCHED_HW |
+ GEN7_FF_DS_SCHED_HW);
+
+ if (0) { /* causes HiZ corruption on ivb:gt1 */
+ /* enable HiZ Raw Stall Optimization */
+ wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
+ }
+
+ /* WaDisable4x2SubspanOptimization:ivb */
+ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal, GEN7_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+}
+
+static void
+vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* WaDisableEarlyCull:vlv */
+ wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
+
+ /* WaPsdDispatchEnable:vlv */
+ /* WaDisablePSDDualDispatchEnable:vlv */
+ wa_masked_en(wal,
+ GEN7_HALF_SLICE_CHICKEN1,
+ GEN7_MAX_PS_THREAD_DEP |
+ GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
+
+ /* WaDisable_RenderCache_OperationalFlush:vlv */
+ wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
+
+ /* WaForceL3Serialization:vlv */
+ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
+
+ /*
+ * WaVSThreadDispatchOverride:ivb,vlv
+ *
+ * This actually overrides the dispatch
+ * mode for all thread types.
+ */
+ wa_write_masked_or(wal,
+ GEN7_FF_THREAD_MODE,
+ GEN7_FF_SCHED_MASK,
+ GEN7_FF_TS_SCHED_HW |
+ GEN7_FF_VS_SCHED_HW |
+ GEN7_FF_DS_SCHED_HW);
+
+ /*
+ * BSpec says this must be set, even though
+ * WaDisable4x2SubspanOptimization isn't listed for VLV.
+ */
+ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal, GEN7_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+
+ /*
+ * WaIncreaseL3CreditsForVLVB0:vlv
+ * This is the hardware default actually.
+ */
+ wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
+}
+
+static void
+hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
+{
+ /* L3 caching of data atomics doesn't work -- disable it. */
+ wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
+
+ wa_add(wal,
+ HSW_ROW_CHICKEN3, 0,
+ _MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
+ 0 /* XXX does this reg exist? */);
+
+ /* WaVSRefCountFullforceMissDisable:hsw */
+ wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
+
+ wa_masked_dis(wal,
+ CACHE_MODE_0_GEN7,
+ /* WaDisable_RenderCache_OperationalFlush:hsw */
+ RC_OP_FLUSH_ENABLE |
+ /* enable HiZ Raw Stall Optimization */
+ HIZ_RAW_STALL_OPT_DISABLE);
+
+ /* WaDisable4x2SubspanOptimization:hsw */
+ wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
+
+ /*
+ * BSpec recommends 8x4 when MSAA is used,
+ * however in practice 16x4 seems fastest.
+ *
+ * Note that PS/WM thread counts depend on the WIZ hashing
+ * disable bit, which we don't touch here, but it's good
+ * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
+ */
+ wa_add(wal, GEN7_GT_MODE, 0,
+ _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4),
+ GEN6_WIZ_HASHING_16x4);
+
+ /* WaSampleCChickenBitEnable:hsw */
+ wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
+}
+
+static void
gen9_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
/* WaDisableKillLogic:bxt,skl,kbl */
- if (!IS_COFFEELAKE(i915))
+ if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
wa_write_or(wal,
GAM_ECOCHK,
ECOCHK_DIS_TLB);
@@ -787,7 +1036,7 @@ cfl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal)
static void
wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
{
- const struct sseu_dev_info *sseu = &RUNTIME_INFO(i915)->sseu;
+ const struct sseu_dev_info *sseu = &i915->gt.info.sseu;
unsigned int slice, subslice;
u32 l3_en, mcr, mcr_mask;
@@ -953,7 +1202,7 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
icl_gt_workarounds_init(i915, wal);
else if (IS_CANNONLAKE(i915))
cnl_gt_workarounds_init(i915, wal);
- else if (IS_COFFEELAKE(i915))
+ else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
cfl_gt_workarounds_init(i915, wal);
else if (IS_GEMINILAKE(i915))
glk_gt_workarounds_init(i915, wal);
@@ -963,6 +1212,20 @@ gt_init_workarounds(struct drm_i915_private *i915, struct i915_wa_list *wal)
bxt_gt_workarounds_init(i915, wal);
else if (IS_SKYLAKE(i915))
skl_gt_workarounds_init(i915, wal);
+ else if (IS_HASWELL(i915))
+ hsw_gt_workarounds_init(i915, wal);
+ else if (IS_VALLEYVIEW(i915))
+ vlv_gt_workarounds_init(i915, wal);
+ else if (IS_IVYBRIDGE(i915))
+ ivb_gt_workarounds_init(i915, wal);
+ else if (IS_GEN(i915, 6))
+ snb_gt_workarounds_init(i915, wal);
+ else if (IS_GEN(i915, 5))
+ ilk_gt_workarounds_init(i915, wal);
+ else if (IS_G4X(i915))
+ g4x_gt_workarounds_init(i915, wal);
+ else if (IS_GEN(i915, 4))
+ gen4_gt_workarounds_init(i915, wal);
else if (INTEL_GEN(i915) <= 8)
return;
else
@@ -1187,6 +1450,18 @@ static void cfl_whitelist_build(struct intel_engine_cs *engine)
RING_FORCE_TO_NONPRIV_RANGE_4);
}
+static void cml_whitelist_build(struct intel_engine_cs *engine)
+{
+ struct i915_wa_list *w = &engine->whitelist;
+
+ if (engine->class != RENDER_CLASS)
+ whitelist_reg_ext(w,
+ RING_CTX_TIMESTAMP(engine->mmio_base),
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
+
+ cfl_whitelist_build(engine);
+}
+
static void cnl_whitelist_build(struct intel_engine_cs *engine)
{
struct i915_wa_list *w = &engine->whitelist;
@@ -1237,9 +1512,15 @@ static void icl_whitelist_build(struct intel_engine_cs *engine)
/* hucStatus2RegOffset */
whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
RING_FORCE_TO_NONPRIV_ACCESS_RD);
+ whitelist_reg_ext(w,
+ RING_CTX_TIMESTAMP(engine->mmio_base),
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
break;
default:
+ whitelist_reg_ext(w,
+ RING_CTX_TIMESTAMP(engine->mmio_base),
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
break;
}
}
@@ -1271,6 +1552,9 @@ static void tgl_whitelist_build(struct intel_engine_cs *engine)
whitelist_reg(w, HIZ_CHICKEN);
break;
default:
+ whitelist_reg_ext(w,
+ RING_CTX_TIMESTAMP(engine->mmio_base),
+ RING_FORCE_TO_NONPRIV_ACCESS_RD);
break;
}
}
@@ -1288,6 +1572,8 @@ void intel_engine_init_whitelist(struct intel_engine_cs *engine)
icl_whitelist_build(engine);
else if (IS_CANNONLAKE(i915))
cnl_whitelist_build(engine);
+ else if (IS_COMETLAKE(i915))
+ cml_whitelist_build(engine);
else if (IS_COFFEELAKE(i915))
cfl_whitelist_build(engine);
else if (IS_GEMINILAKE(i915))
@@ -1363,11 +1649,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN7_SARCHKMD,
GEN7_DISABLE_SAMPLER_PREFETCH);
- /* Wa_1407928979:tgl */
- wa_write_or(wal,
- GEN7_FF_THREAD_MODE,
- GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
-
/* Wa_1408615072:tgl */
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
VSUNIT_CLKGATE_DIS_TGL);
@@ -1391,6 +1672,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
* Wa_14010229206:tgl
*/
wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
+
+ /*
+ * Wa_1407928979:tgl A*
+ * Wa_18011464164:tgl B0+
+ * Wa_22010931296:tgl B0+
+ */
+ wa_write_or(wal, GEN7_FF_THREAD_MODE,
+ GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
}
if (IS_GEN(i915, 11)) {
@@ -1484,6 +1773,12 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
wa_write_or(wal,
GEN7_FF_THREAD_MODE,
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
+
+ /* Wa_22010271021:ehl */
+ if (IS_ELKHARTLAKE(i915))
+ wa_masked_en(wal,
+ GEN9_CS_DEBUG_MODE1,
+ FF_DOP_CLOCK_GATE_DISABLE);
}
if (IS_GEN_RANGE(i915, 9, 12)) {
@@ -1493,7 +1788,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
GEN9_FFSC_PERCTX_PREEMPT_CTRL);
}
- if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915)) {
+ if (IS_SKYLAKE(i915) ||
+ IS_KABYLAKE(i915) ||
+ IS_COFFEELAKE(i915) ||
+ IS_COMETLAKE(i915)) {
/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
wa_write_or(wal,
GEN8_GARBCNTL,
@@ -1577,6 +1875,21 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
/* XXX bit doesn't stick on Broadwater */
IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH);
+
+ if (IS_GEN(i915, 4))
+ /*
+ * Disable CONSTANT_BUFFER before it is loaded from the context
+ * image. For as it is loaded, it is executed and the stored
+ * address may no longer be valid, leading to a GPU hang.
+ *
+ * This imposes the requirement that userspace reload their
+ * CONSTANT_BUFFER on every batch, fortunately a requirement
+ * they are already accustomed to from before contexts were
+ * enabled.
+ */
+ wa_add(wal, ECOSKPD,
+ 0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
+ 0 /* XXX bit doesn't stick on Broadwater */);
}
static void
@@ -1691,7 +2004,7 @@ wa_list_srm(struct i915_request *rq,
const struct i915_wa_list *wal,
struct i915_vma *vma)
{
- struct drm_i915_private *i915 = rq->i915;
+ struct drm_i915_private *i915 = rq->engine->i915;
unsigned int i, count = 0;
const struct i915_wa *wa;
u32 srm, *cs;
@@ -1780,7 +2093,7 @@ static int engine_wa_list_verify(struct intel_context *ce,
err = 0;
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
- if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
+ if (mcr_range(rq->engine->i915, i915_mmio_reg_offset(wa->reg)))
continue;
if (!wa_verify(wa, results[i], wal->name, from))