summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_lrc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_lrc.c')
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c264
1 files changed, 247 insertions, 17 deletions
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 6d38411bdeba..47e9df775072 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -8,6 +8,7 @@
#include <generated/xe_wa_oob.h>
#include <linux/ascii85.h>
+#include <linux/panic.h>
#include "instructions/xe_mi_commands.h"
#include "instructions/xe_gfxpipe_commands.h"
@@ -16,6 +17,7 @@
#include "regs/xe_lrc_layout.h"
#include "xe_bb.h"
#include "xe_bo.h"
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_exec_queue_types.h"
@@ -41,7 +43,6 @@
#define LRC_PPHWSP_SIZE SZ_4K
#define LRC_INDIRECT_CTX_BO_SIZE SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
-#define LRC_WA_BB_SIZE SZ_4K
/*
* Layout of the LRC and associated data allocated as
@@ -76,6 +77,17 @@ lrc_to_xe(struct xe_lrc *lrc)
static bool
gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class)
{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (XE_GT_WA(gt, 16010904313) &&
+ (class == XE_ENGINE_CLASS_RENDER ||
+ class == XE_ENGINE_CLASS_COMPUTE))
+ return true;
+
+ if (xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev),
+ class, NULL))
+ return true;
+
return false;
}
@@ -692,7 +704,13 @@ u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
}
-static size_t lrc_reg_size(struct xe_device *xe)
+/**
+ * xe_lrc_reg_size() - Get size of the LRC registers area within queues
+ * @xe: the &xe_device struct instance
+ *
+ * Returns: Size of the LRC registers area for current platform
+ */
+size_t xe_lrc_reg_size(struct xe_device *xe)
{
if (GRAPHICS_VERx100(xe) >= 1250)
return 96 * sizeof(u32);
@@ -702,7 +720,7 @@ static size_t lrc_reg_size(struct xe_device *xe)
size_t xe_lrc_skip_size(struct xe_device *xe)
{
- return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
+ return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe);
}
static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
@@ -943,6 +961,47 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
return data;
}
+/**
+ * xe_default_lrc_update_memirq_regs_with_address - Re-compute GGTT references in default LRC
+ * of given engine.
+ * @hwe: the &xe_hw_engine struct instance
+ */
+void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe)
+{
+ struct xe_gt *gt = hwe->gt;
+ u32 *regs;
+
+ if (!gt->default_lrc[hwe->class])
+ return;
+
+ regs = gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE;
+ set_memory_based_intr(regs, hwe);
+}
+
+/**
+ * xe_lrc_update_memirq_regs_with_address - Re-compute GGTT references in mem interrupt data
+ * for given LRC.
+ * @lrc: the &xe_lrc struct instance
+ * @hwe: the &xe_hw_engine struct instance
+ * @regs: scratch buffer to be used as temporary storage
+ */
+void xe_lrc_update_memirq_regs_with_address(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ u32 *regs)
+{
+ struct xe_gt *gt = hwe->gt;
+ struct iosys_map map;
+ size_t regs_len;
+
+ if (!xe_device_uses_memirq(gt_to_xe(gt)))
+ return;
+
+ map = __xe_lrc_regs_map(lrc);
+ regs_len = xe_lrc_reg_size(gt_to_xe(gt));
+ xe_map_memcpy_from(gt_to_xe(gt), regs, &map, 0, regs_len);
+ set_memory_based_intr(regs, hwe);
+ xe_map_memcpy_to(gt_to_xe(gt), &map, 0, regs, regs_len);
+}
+
static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
{
u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
@@ -1014,6 +1073,121 @@ static ssize_t setup_utilization_wa(struct xe_lrc *lrc,
return cmd - batch;
}
+static ssize_t setup_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ const u32 ts_addr = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
+ u32 *cmd = batch;
+
+ if (!XE_GT_WA(lrc->gt, 16010904313) ||
+ !(hwe->class == XE_ENGINE_CLASS_RENDER ||
+ hwe->class == XE_ENGINE_CLASS_COMPUTE ||
+ hwe->class == XE_ENGINE_CLASS_COPY ||
+ hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE ||
+ hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE))
+ return 0;
+
+ if (xe_gt_WARN_ON(lrc->gt, max_len < 12))
+ return -ENOSPC;
+
+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO |
+ MI_LRM_ASYNC;
+ *cmd++ = RING_CTX_TIMESTAMP(0).addr;
+ *cmd++ = ts_addr;
+ *cmd++ = 0;
+
+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO |
+ MI_LRM_ASYNC;
+ *cmd++ = RING_CTX_TIMESTAMP(0).addr;
+ *cmd++ = ts_addr;
+ *cmd++ = 0;
+
+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO;
+ *cmd++ = RING_CTX_TIMESTAMP(0).addr;
+ *cmd++ = ts_addr;
+ *cmd++ = 0;
+
+ return cmd - batch;
+}
+
+static ssize_t setup_configfs_post_ctx_restore_bb(struct xe_lrc *lrc,
+ struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ struct xe_device *xe = gt_to_xe(lrc->gt);
+ const u32 *user_batch;
+ u32 *cmd = batch;
+ u32 count;
+
+ count = xe_configfs_get_ctx_restore_post_bb(to_pci_dev(xe->drm.dev),
+ hwe->class, &user_batch);
+ if (!count)
+ return 0;
+
+ if (count > max_len)
+ return -ENOSPC;
+
+ /*
+ * This should be used only for tests and validation. Taint the kernel
+ * as anything could be submitted directly in context switches
+ */
+ add_taint(TAINT_TEST, LOCKDEP_STILL_OK);
+
+ memcpy(cmd, user_batch, count * sizeof(u32));
+ cmd += count;
+
+ return cmd - batch;
+}
+
+static ssize_t setup_configfs_mid_ctx_restore_bb(struct xe_lrc *lrc,
+ struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ struct xe_device *xe = gt_to_xe(lrc->gt);
+ const u32 *user_batch;
+ u32 *cmd = batch;
+ u32 count;
+
+ count = xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev),
+ hwe->class, &user_batch);
+ if (!count)
+ return 0;
+
+ if (count > max_len)
+ return -ENOSPC;
+
+ /*
+ * This should be used only for tests and validation. Taint the kernel
+ * as anything could be submitted directly in context switches
+ */
+ add_taint(TAINT_TEST, LOCKDEP_STILL_OK);
+
+ memcpy(cmd, user_batch, count * sizeof(u32));
+ cmd += count;
+
+ return cmd - batch;
+}
+
+static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc,
+ struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ u32 *cmd = batch;
+
+ if (!XE_GT_WA(lrc->gt, 18022495364) ||
+ hwe->class != XE_ENGINE_CLASS_RENDER)
+ return 0;
+
+ if (xe_gt_WARN_ON(lrc->gt, max_len < 3))
+ return -ENOSPC;
+
+ *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
+ *cmd++ = CS_DEBUG_MODE1(0).addr;
+ *cmd++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
+
+ return cmd - batch;
+}
+
struct bo_setup {
ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
u32 *batch, size_t max_size);
@@ -1040,13 +1214,11 @@ static int setup_bo(struct bo_setup_state *state)
ssize_t remain;
if (state->lrc->bo->vmap.is_iomem) {
- state->buffer = kmalloc(state->max_size, GFP_KERNEL);
if (!state->buffer)
return -ENOMEM;
state->ptr = state->buffer;
} else {
state->ptr = state->lrc->bo->vmap.vaddr + state->offset;
- state->buffer = NULL;
}
remain = state->max_size / sizeof(u32);
@@ -1071,7 +1243,6 @@ static int setup_bo(struct bo_setup_state *state)
return 0;
fail:
- kfree(state->buffer);
return -ENOSPC;
}
@@ -1083,18 +1254,28 @@ static void finish_bo(struct bo_setup_state *state)
xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap,
state->offset, state->buffer,
state->written * sizeof(u32));
- kfree(state->buffer);
}
-static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
+/**
+ * xe_lrc_setup_wa_bb_with_scratch - Execute all wa bb setup callbacks.
+ * @lrc: the &xe_lrc struct instance
+ * @hwe: the &xe_hw_engine struct instance
+ * @scratch: preallocated scratch buffer for temporary storage
+ * Return: 0 on success, negative error code on failure
+ */
+int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *scratch)
{
static const struct bo_setup funcs[] = {
+ { .setup = setup_timestamp_wa },
+ { .setup = setup_invalidate_state_cache_wa },
{ .setup = setup_utilization_wa },
+ { .setup = setup_configfs_post_ctx_restore_bb },
};
struct bo_setup_state state = {
.lrc = lrc,
.hwe = hwe,
.max_size = LRC_WA_BB_SIZE,
+ .buffer = scratch,
.reserve_dw = 1,
.offset = __xe_lrc_wa_bb_offset(lrc),
.funcs = funcs,
@@ -1117,15 +1298,36 @@ static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
return 0;
}
+static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
+{
+ u32 *buf = NULL;
+ int ret;
+
+ if (lrc->bo->vmap.is_iomem)
+ buf = kmalloc(LRC_WA_BB_SIZE, GFP_KERNEL);
+
+ ret = xe_lrc_setup_wa_bb_with_scratch(lrc, hwe, buf);
+
+ kfree(buf);
+
+ return ret;
+}
+
static int
setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
{
- static struct bo_setup rcs_funcs[] = {
+ static const struct bo_setup rcs_funcs[] = {
+ { .setup = setup_timestamp_wa },
+ { .setup = setup_configfs_mid_ctx_restore_bb },
+ };
+ static const struct bo_setup xcs_funcs[] = {
+ { .setup = setup_configfs_mid_ctx_restore_bb },
};
struct bo_setup_state state = {
.lrc = lrc,
.hwe = hwe,
.max_size = (63 * 64) /* max 63 cachelines */,
+ .buffer = NULL,
.offset = __xe_lrc_indirect_ctx_offset(lrc),
};
int ret;
@@ -1137,14 +1339,22 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
hwe->class == XE_ENGINE_CLASS_COMPUTE) {
state.funcs = rcs_funcs;
state.num_funcs = ARRAY_SIZE(rcs_funcs);
+ } else {
+ state.funcs = xcs_funcs;
+ state.num_funcs = ARRAY_SIZE(xcs_funcs);
}
if (xe_gt_WARN_ON(lrc->gt, !state.funcs))
return 0;
+ if (lrc->bo->vmap.is_iomem)
+ state.buffer = kmalloc(state.max_size, GFP_KERNEL);
+
ret = setup_bo(&state);
- if (ret)
+ if (ret) {
+ kfree(state.buffer);
return ret;
+ }
/*
* Align to 64B cacheline so there's no garbage at the end for CS to
@@ -1156,15 +1366,17 @@ setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
}
finish_bo(&state);
+ kfree(state.buffer);
+ /*
+ * Enable INDIRECT_CTX leaving INDIRECT_CTX_OFFSET at its default: it
+ * varies per engine class, but the default is good enough
+ */
xe_lrc_write_ctx_reg(lrc,
CTX_CS_INDIRECT_CTX,
(xe_bo_ggtt_addr(lrc->bo) + state.offset) |
/* Size in CLs. */
(state.written * sizeof(u32) / 64));
- xe_lrc_write_ctx_reg(lrc,
- CTX_CS_INDIRECT_CTX_OFFSET,
- CTX_INDIRECT_CTX_OFFSET_DEFAULT);
return 0;
}
@@ -1203,9 +1415,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
if (vm && vm->xef) /* userspace */
bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
- lrc->bo = xe_bo_create_pin_map(xe, tile, NULL, bo_size,
- ttm_bo_type_kernel,
- bo_flags);
+ lrc->bo = xe_bo_create_pin_map_novm(xe, tile,
+ bo_size,
+ ttm_bo_type_kernel,
+ bo_flags, false);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
@@ -1374,6 +1587,23 @@ void xe_lrc_destroy(struct kref *ref)
kfree(lrc);
}
+/**
+ * xe_lrc_update_hwctx_regs_with_address - Re-compute GGTT references within given LRC.
+ * @lrc: the &xe_lrc struct instance
+ */
+void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc)
+{
+ if (xe_lrc_has_indirect_ring_state(lrc)) {
+ xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
+ __xe_lrc_indirect_ring_ggtt_addr(lrc));
+
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
+ __xe_lrc_ring_ggtt_addr(lrc));
+ } else {
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
+ }
+}
+
void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
{
if (xe_lrc_has_indirect_ring_state(lrc))
@@ -1939,7 +2169,7 @@ u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs)
* continue to emit all of the SVG state since it's best not to leak
* any of the state between contexts, even if that leakage is harmless.
*/
- if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
+ if (XE_GT_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
state_table = xe_hpg_svg_state;
state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
}