diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 115 |
1 files changed, 100 insertions, 15 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ae09fc1cfe6b7..a674c8a58dc23 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -755,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); @@ -1995,6 +1995,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); + ring->vm_hub = AMDGPU_GFXHUB_0; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -2094,6 +2095,7 @@ static int gfx_v9_0_sw_init(void *handle) /* disable scheduler on the real ring */ ring->no_scheduler = true; + ring->vm_hub = AMDGPU_GFXHUB_0; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -2111,6 +2113,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; ring->is_sw_ring = true; hw_prio = amdgpu_sw_ring_priority(i); + ring->vm_hub = AMDGPU_GFXHUB_0; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, NULL); @@ -3604,8 +3607,10 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) return r; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) + if (unlikely(r != 0)) { + amdgpu_bo_unreserve(ring->mqd_obj); return r; + } gfx_v9_0_kiq_init_queue(ring); amdgpu_bo_kunmap(ring->mqd_obj); @@ -3751,7 +3756,8 @@ static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -5121,7 +5127,8 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, gfx_v9_0_ring_emit_de_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? - true : false); + true : false, + job->gds_size > 0 && job->gds_base != 0); } amdgpu_ring_write(ring, header); @@ -5132,9 +5139,83 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, #endif lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_ib_on_emit_cntl(ring); amdgpu_ring_write(ring, control); } +static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring, + unsigned offset) +{ + u32 control = ring->ring[offset]; + + control |= INDIRECT_BUFFER_PRE_RESUME(1); + ring->ring[offset] = control; +} + +static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, + unsigned offset) +{ + struct amdgpu_device *adev = ring->adev; + void *ce_payload_cpu_addr; + uint64_t payload_offset, payload_size; + + payload_size = sizeof(struct v9_ce_ib_state); + + if (ring->is_mes_queue) { + payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); + } else { + payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; + } + + if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { + memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); + } else { + memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, + (ring->buf_mask + 1 - offset) << 2); + payload_size -= (ring->buf_mask + 1 - offset) << 2; + memcpy((void *)&ring->ring[0], + ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), + payload_size); + } +} + +static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, + unsigned offset) +{ + struct amdgpu_device *adev = ring->adev; + void *de_payload_cpu_addr; + uint64_t payload_offset, payload_size; + + payload_size = sizeof(struct v9_de_ib_state); + + if (ring->is_mes_queue) { + payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); + } else { + payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; + } + + if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { + memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); + } else { + memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, + (ring->buf_mask + 1 - offset) << 2); + payload_size -= (ring->buf_mask + 1 - offset) << 2; + memcpy((void *)&ring->ring[0], + de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), + payload_size); + } +} + static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, @@ -5330,6 +5411,8 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); + amdgpu_ring_ib_on_emit_ce(ring); + if (resume) amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, sizeof(ce_payload) >> 2); @@ -5363,10 +5446,6 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) amdgpu_ring_alloc(ring, 13); gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); - /*reset the CP_VMID_PREEMPT after trailing fence*/ - amdgpu_ring_emit_wreg(ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), - 0x0); /* assert IB preemption, emit the trailing fence */ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, @@ -5389,6 +5468,10 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); } + /*reset the CP_VMID_PREEMPT after trailing fence*/ + amdgpu_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), + 0x0); amdgpu_ring_commit(ring); /* deassert preemption condition */ @@ -5396,7 +5479,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) return r; } -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds) { struct amdgpu_device *adev = ring->adev; struct v9_de_ib_state de_payload = {0}; @@ -5427,8 +5510,10 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) PAGE_SIZE); } - de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); - de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + if (usegds) { + de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + } cnt = (sizeof(de_payload) >> 2) + 4 - 2; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); @@ -5439,6 +5524,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); + amdgpu_ring_ib_on_emit_de(ring); if (resume) amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, sizeof(de_payload) >> 2); @@ -6750,7 +6836,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_gfx, .get_wptr = gfx_v9_0_ring_get_wptr_gfx, .set_wptr = gfx_v9_0_ring_set_wptr_gfx, @@ -6804,7 +6889,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, .secure_submission_supported = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = amdgpu_sw_ring_get_rptr_gfx, .get_wptr = amdgpu_sw_ring_get_wptr_gfx, .set_wptr = amdgpu_sw_ring_set_wptr_gfx, @@ -6851,6 +6935,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, + .patch_cntl = gfx_v9_0_ring_patch_cntl, + .patch_de = gfx_v9_0_ring_patch_de_meta, + .patch_ce = gfx_v9_0_ring_patch_ce_meta, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -6858,7 +6945,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, @@ -6897,7 +6983,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { .align_mask = 0xff, .nop = PACKET3(PACKET3_NOP, 0x3FFF), .support_64bit_ptrs = true, - .vmhub = AMDGPU_GFXHUB_0, .get_rptr = gfx_v9_0_ring_get_rptr_compute, .get_wptr = gfx_v9_0_ring_get_wptr_compute, .set_wptr = gfx_v9_0_ring_set_wptr_compute, |