summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_guc_submit.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_guc_submit.c')
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c331
1 files changed, 306 insertions, 25 deletions
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index cafb47711e9b..53024eb5670b 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -32,6 +32,7 @@
#include "xe_guc_ct.h"
#include "xe_guc_exec_queue_types.h"
#include "xe_guc_id_mgr.h"
+#include "xe_guc_klv_helpers.h"
#include "xe_guc_submit_types.h"
#include "xe_hw_engine.h"
#include "xe_hw_fence.h"
@@ -316,6 +317,71 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
}
+/*
+ * Given that we want to guarantee enough RCS throughput to avoid missing
+ * frames, we set the yield policy to 20% of each 80ms interval.
+ */
+#define RC_YIELD_DURATION 80 /* in ms */
+#define RC_YIELD_RATIO 20 /* in percent */
+static u32 *emit_render_compute_yield_klv(u32 *emit)
+{
+ *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD);
+ *emit++ = RC_YIELD_DURATION;
+ *emit++ = RC_YIELD_RATIO;
+
+ return emit;
+}
+
+#define SCHEDULING_POLICY_MAX_DWORDS 16
+static int guc_init_global_schedule_policy(struct xe_guc *guc)
+{
+ u32 data[SCHEDULING_POLICY_MAX_DWORDS];
+ u32 *emit = data;
+ u32 count = 0;
+ int ret;
+
+ if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
+ return 0;
+
+ *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
+
+ if (CCS_MASK(guc_to_gt(guc)))
+ emit = emit_render_compute_yield_klv(emit);
+
+ count = emit - data;
+ if (count > 1) {
+ xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS);
+
+ ret = xe_guc_ct_send_block(&guc->ct, data, count);
+ if (ret < 0) {
+ xe_gt_err(guc_to_gt(guc),
+ "failed to enable GuC scheduling policies: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int xe_guc_submit_enable(struct xe_guc *guc)
+{
+ int ret;
+
+ ret = guc_init_global_schedule_policy(guc);
+ if (ret)
+ return ret;
+
+ guc->submission_state.enabled = true;
+
+ return 0;
+}
+
+void xe_guc_submit_disable(struct xe_guc *guc)
+{
+ guc->submission_state.enabled = false;
+}
+
static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
{
int i;
@@ -542,7 +608,7 @@ static void __register_exec_queue(struct xe_guc *guc,
xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
}
-static void register_exec_queue(struct xe_exec_queue *q)
+static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
{
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
@@ -550,6 +616,7 @@ static void register_exec_queue(struct xe_exec_queue *q)
struct guc_ctxt_registration_info info;
xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
+ xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT);
memset(&info, 0, sizeof(info));
info.context_idx = q->guc->id;
@@ -557,7 +624,8 @@ static void register_exec_queue(struct xe_exec_queue *q)
info.engine_submit_mask = q->logical_mask;
info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
- info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
+ FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
if (xe_exec_queue_is_parallel(q)) {
u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
@@ -667,12 +735,18 @@ static void wq_item_append(struct xe_exec_queue *q)
if (wq_wait_for_space(q, wqi_size))
return;
+ xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN);
wqi[i++] = FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_MULTI_LRC) |
FIELD_PREP(WQ_LEN_MASK, len_dw);
+ xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW);
wqi[i++] = xe_lrc_descriptor(q->lrc[0]);
+ xe_gt_assert(guc_to_gt(guc), i ==
+ XE_GUC_CONTEXT_WQ_EL_INFO_DATA_2_GUCCTX_RINGTAIL_FREEZEPOCS);
wqi[i++] = FIELD_PREP(WQ_GUC_ID_MASK, q->guc->id) |
FIELD_PREP(WQ_RING_TAIL_MASK, q->lrc[0]->ring.tail / sizeof(u64));
+ xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_INFO_DATA_3_WI_FENCE_ID);
wqi[i++] = 0;
+ xe_gt_assert(guc_to_gt(guc), i == XE_GUC_CONTEXT_WQ_EL_CHILD_LIST_DATA_4_RINGTAIL);
for (j = 1; j < q->width; ++j) {
struct xe_lrc *lrc = q->lrc[j];
@@ -693,6 +767,50 @@ static void wq_item_append(struct xe_exec_queue *q)
parallel_write(xe, map, wq_desc.tail, q->guc->wqi_tail);
}
+static int wq_items_rebase(struct xe_exec_queue *q)
+{
+ struct xe_guc *guc = exec_queue_to_guc(q);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
+ int i = q->guc->wqi_head;
+
+ /* the ring starts after a header struct */
+ iosys_map_incr(&map, offsetof(struct guc_submit_parallel_scratch, wq[0]));
+
+ while ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) {
+ u32 len_dw, type, val;
+
+ if (drm_WARN_ON_ONCE(&xe->drm, i < 0 || i > 2 * WQ_SIZE))
+ break;
+
+ val = xe_map_rd_ring_u32(xe, &map, i / sizeof(u32) +
+ XE_GUC_CONTEXT_WQ_HEADER_DATA_0_TYPE_LEN,
+ WQ_SIZE / sizeof(u32));
+ len_dw = FIELD_GET(WQ_LEN_MASK, val);
+ type = FIELD_GET(WQ_TYPE_MASK, val);
+
+ if (drm_WARN_ON_ONCE(&xe->drm, len_dw >= WQ_SIZE / sizeof(u32)))
+ break;
+
+ if (type == WQ_TYPE_MULTI_LRC) {
+ val = xe_lrc_descriptor(q->lrc[0]);
+ xe_map_wr_ring_u32(xe, &map, i / sizeof(u32) +
+ XE_GUC_CONTEXT_WQ_EL_INFO_DATA_1_CTX_DESC_LOW,
+ WQ_SIZE / sizeof(u32), val);
+ } else if (drm_WARN_ON_ONCE(&xe->drm, type != WQ_TYPE_NOOP)) {
+ break;
+ }
+
+ i += (len_dw + 1) * sizeof(u32);
+ }
+
+ if ((i % WQ_SIZE) != (q->guc->wqi_tail % WQ_SIZE)) {
+ xe_gt_err(q->gt, "Exec queue fixups incomplete - wqi parse failed\n");
+ return -EBADMSG;
+ }
+ return 0;
+}
+
#define RESUME_PENDING ~0x0ull
static void submit_exec_queue(struct xe_exec_queue *q)
{
@@ -761,7 +879,7 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
if (!exec_queue_registered(q))
- register_exec_queue(q);
+ register_exec_queue(q, GUC_CONTEXT_NORMAL);
if (!lr) /* LR jobs are emitted in the exec IOCTL */
q->ring_ops->emit_job(job);
submit_exec_queue(q);
@@ -777,6 +895,30 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
return fence;
}
+/**
+ * xe_guc_jobs_ring_rebase - Re-emit ring commands of requests pending
+ * on all queues under a guc.
+ * @guc: the &xe_guc struct instance
+ */
+void xe_guc_jobs_ring_rebase(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ /*
+ * This routine is used within VF migration recovery. This means
+ * using the lock here introduces a restriction: we cannot wait
+ * for any GFX HW response while the lock is taken.
+ */
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+ if (exec_queue_killed_or_banned_or_wedged(q))
+ continue;
+ xe_exec_queue_jobs_ring_restore(q);
+ }
+ mutex_unlock(&guc->submission_state.lock);
+}
+
static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
{
struct xe_sched_job *job = to_xe_sched_job(drm_job);
@@ -1277,48 +1419,57 @@ rearm:
return DRM_GPU_SCHED_STAT_NO_HANG;
}
-static void __guc_exec_queue_fini_async(struct work_struct *w)
+static void guc_exec_queue_fini(struct xe_exec_queue *q)
+{
+ struct xe_guc_exec_queue *ge = q->guc;
+ struct xe_guc *guc = exec_queue_to_guc(q);
+
+ release_guc_id(guc, q);
+ xe_sched_entity_fini(&ge->entity);
+ xe_sched_fini(&ge->sched);
+
+ /*
+ * RCU free due sched being exported via DRM scheduler fences
+ * (timeline name).
+ */
+ kfree_rcu(ge, rcu);
+}
+
+static void __guc_exec_queue_destroy_async(struct work_struct *w)
{
struct xe_guc_exec_queue *ge =
- container_of(w, struct xe_guc_exec_queue, fini_async);
+ container_of(w, struct xe_guc_exec_queue, destroy_async);
struct xe_exec_queue *q = ge->q;
struct xe_guc *guc = exec_queue_to_guc(q);
xe_pm_runtime_get(guc_to_xe(guc));
trace_xe_exec_queue_destroy(q);
- release_guc_id(guc, q);
if (xe_exec_queue_is_lr(q))
cancel_work_sync(&ge->lr_tdr);
/* Confirm no work left behind accessing device structures */
cancel_delayed_work_sync(&ge->sched.base.work_tdr);
- xe_sched_entity_fini(&ge->entity);
- xe_sched_fini(&ge->sched);
- /*
- * RCU free due sched being exported via DRM scheduler fences
- * (timeline name).
- */
- kfree_rcu(ge, rcu);
xe_exec_queue_fini(q);
+
xe_pm_runtime_put(guc_to_xe(guc));
}
-static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
+static void guc_exec_queue_destroy_async(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
- INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
+ INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async);
/* We must block on kernel engines so slabs are empty on driver unload */
if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
- __guc_exec_queue_fini_async(&q->guc->fini_async);
+ __guc_exec_queue_destroy_async(&q->guc->destroy_async);
else
- queue_work(xe->destroy_wq, &q->guc->fini_async);
+ queue_work(xe->destroy_wq, &q->guc->destroy_async);
}
-static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
+static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q)
{
/*
* Might be done from within the GPU scheduler, need to do async as we
@@ -1327,7 +1478,7 @@ static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
* this we and don't really care when everything is fini'd, just that it
* is.
*/
- guc_exec_queue_fini_async(q);
+ guc_exec_queue_destroy_async(q);
}
static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
@@ -1341,7 +1492,7 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
if (exec_queue_registered(q))
disable_scheduling_deregister(guc, q);
else
- __guc_exec_queue_fini(guc, q);
+ __guc_exec_queue_destroy(guc, q);
}
static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
@@ -1574,14 +1725,14 @@ static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
#define STATIC_MSG_CLEANUP 0
#define STATIC_MSG_SUSPEND 1
#define STATIC_MSG_RESUME 2
-static void guc_exec_queue_fini(struct xe_exec_queue *q)
+static void guc_exec_queue_destroy(struct xe_exec_queue *q)
{
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
guc_exec_queue_add_msg(q, msg, CLEANUP);
else
- __guc_exec_queue_fini(exec_queue_to_guc(q), q);
+ __guc_exec_queue_destroy(exec_queue_to_guc(q), q);
}
static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
@@ -1711,6 +1862,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
.init = guc_exec_queue_init,
.kill = guc_exec_queue_kill,
.fini = guc_exec_queue_fini,
+ .destroy = guc_exec_queue_destroy,
.set_priority = guc_exec_queue_set_priority,
.set_timeslice = guc_exec_queue_set_timeslice,
.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
@@ -1732,7 +1884,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
xe_exec_queue_put(q);
else if (exec_queue_destroyed(q))
- __guc_exec_queue_fini(guc, q);
+ __guc_exec_queue_destroy(guc, q);
}
if (q->guc->suspend_pending) {
set_exec_queue_suspended(q);
@@ -1773,6 +1925,43 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
}
}
+/**
+ * xe_guc_submit_reset_block - Disallow reset calls on given GuC.
+ * @guc: the &xe_guc struct instance
+ */
+int xe_guc_submit_reset_block(struct xe_guc *guc)
+{
+ return atomic_fetch_or(1, &guc->submission_state.reset_blocked);
+}
+
+/**
+ * xe_guc_submit_reset_unblock - Allow back reset calls on given GuC.
+ * @guc: the &xe_guc struct instance
+ */
+void xe_guc_submit_reset_unblock(struct xe_guc *guc)
+{
+ atomic_set_release(&guc->submission_state.reset_blocked, 0);
+ wake_up_all(&guc->ct.wq);
+}
+
+static int guc_submit_reset_is_blocked(struct xe_guc *guc)
+{
+ return atomic_read_acquire(&guc->submission_state.reset_blocked);
+}
+
+/* Maximum time of blocking reset */
+#define RESET_BLOCK_PERIOD_MAX (HZ * 5)
+
+/**
+ * xe_guc_wait_reset_unblock - Wait until reset blocking flag is lifted, or timeout.
+ * @guc: the &xe_guc struct instance
+ */
+int xe_guc_wait_reset_unblock(struct xe_guc *guc)
+{
+ return wait_event_timeout(guc->ct.wq,
+ !guc_submit_reset_is_blocked(guc), RESET_BLOCK_PERIOD_MAX);
+}
+
int xe_guc_submit_reset_prepare(struct xe_guc *guc)
{
int ret;
@@ -1826,6 +2015,19 @@ void xe_guc_submit_stop(struct xe_guc *guc)
}
+/**
+ * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC.
+ * @guc: the &xe_guc struct instance whose scheduler is to be disabled
+ */
+void xe_guc_submit_pause(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ xe_sched_submission_stop_async(&q->guc->sched);
+}
+
static void guc_exec_queue_start(struct xe_exec_queue *q)
{
struct xe_gpu_scheduler *sched = &q->guc->sched;
@@ -1866,6 +2068,28 @@ int xe_guc_submit_start(struct xe_guc *guc)
return 0;
}
+static void guc_exec_queue_unpause(struct xe_exec_queue *q)
+{
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
+
+ xe_sched_submission_start(sched);
+}
+
+/**
+ * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC.
+ * @guc: the &xe_guc struct instance whose scheduler is to be enabled
+ */
+void xe_guc_submit_unpause(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q)
+ guc_exec_queue_unpause(q);
+
+ wake_up_all(&guc->ct.wq);
+}
+
static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
{
@@ -1879,7 +2103,7 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
if (unlikely(!q)) {
- xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id);
+ xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id);
return NULL;
}
@@ -1989,7 +2213,7 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
xe_exec_queue_put(q);
else
- __guc_exec_queue_fini(guc, q);
+ __guc_exec_queue_destroy(guc, q);
}
int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
@@ -2378,6 +2602,34 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
}
/**
+ * xe_guc_register_vf_exec_queue - Register exec queue for a given context type.
+ * @q: Execution queue
+ * @ctx_type: Type of the context
+ *
+ * This function registers the execution queue with the guc. Special context
+ * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE
+ * are only applicable for IGPU and in the VF.
+ * Submits the execution queue to GUC after registering it.
+ *
+ * Returns - None.
+ */
+void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type)
+{
+ struct xe_guc *guc = exec_queue_to_guc(q);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+
+ xe_gt_assert(gt, IS_SRIOV_VF(xe));
+ xe_gt_assert(gt, !IS_DGFX(xe));
+ xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE ||
+ ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE);
+ xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0));
+
+ register_exec_queue(q, ctx_type);
+ enable_scheduling(q);
+}
+
+/**
* xe_guc_submit_print - GuC Submit Print.
* @guc: GuC.
* @p: drm_printer where it will be printed out.
@@ -2397,3 +2649,32 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
guc_exec_queue_print(q, p);
mutex_unlock(&guc->submission_state.lock);
}
+
+/**
+ * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all
+ * exec queues registered to given GuC.
+ * @guc: the &xe_guc struct instance
+ * @scratch: scratch buffer to be used as temporary storage
+ *
+ * Returns: zero on success, negative error code on failure.
+ */
+int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+ int err = 0;
+
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+ err = xe_exec_queue_contexts_hwsp_rebase(q, scratch);
+ if (err)
+ break;
+ if (xe_exec_queue_is_parallel(q))
+ err = wq_items_rebase(q);
+ if (err)
+ break;
+ }
+ mutex_unlock(&guc->submission_state.lock);
+
+ return err;
+}