diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 250 |
1 files changed, 100 insertions, 150 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index d7db4cb907ae5..20b30f4b3c7d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -225,17 +225,36 @@ static const struct amdgpu_hwip_reg_entry gc_reg_list_9[] = { SOC15_REG_ENTRY_STR(GC, 0, mmRLC_SMU_SAFE_MODE), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_INT_STAT), SOC15_REG_ENTRY_STR(GC, 0, mmRLC_GPM_GENERAL_6), - /* cp header registers */ - SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME2_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), - SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), /* SE status registers */ SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE0), SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE1), SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE2), - SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3) + SOC15_REG_ENTRY_STR(GC, 0, mmGRBM_STATUS_SE3), + /* packet headers */ + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_CE_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_ME_HEADER_DUMP) }; static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { @@ -277,6 +296,14 @@ static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_9[] = { SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_LO), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_PQ_WPTR_HI), SOC15_REG_ENTRY_STR(GC, 0, mmCP_HQD_GFX_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, mmCP_MEC_ME1_HEADER_DUMP) }; enum ta_ras_gfx_subblock { @@ -1624,42 +1651,16 @@ static u32 gfx_v9_0_get_csb_size(struct amdgpu_device *adev) static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer) { - u32 count = 0, i; - const struct cs_section_def *sect = NULL; - const struct cs_extent_def *ext = NULL; + u32 count = 0; if (adev->gfx.rlc.cs_data == NULL) return; if (buffer == NULL) return; - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1)); - buffer[count++] = cpu_to_le32(0x80000000); - buffer[count++] = cpu_to_le32(0x80000000); - - for (sect = adev->gfx.rlc.cs_data; sect->section != NULL; ++sect) { - for (ext = sect->section; ext->extent != NULL; ++ext) { - if (sect->id == SECT_CONTEXT) { - buffer[count++] = - cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count)); - buffer[count++] = cpu_to_le32(ext->reg_index - - PACKET3_SET_CONTEXT_REG_START); - for (i = 0; i < ext->reg_count; i++) - buffer[count++] = cpu_to_le32(ext->extent[i]); - } else { - return; - } - } - } - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0)); - buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE); - - buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0)); - buffer[count++] = cpu_to_le32(0); + count = amdgpu_gfx_csb_preamble_start(buffer); + count = amdgpu_gfx_csb_data_parser(adev, buffer, count); + amdgpu_gfx_csb_preamble_end(buffer, count); } static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) @@ -2234,6 +2235,25 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 0, 1): + case IP_VERSION(9, 2, 1): + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 2, 2): + case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 3, 0): + adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 167 && + adev->gfx.pfp_fw_version >= 196 && + adev->gfx.mec_fw_version >= 474) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; case IP_VERSION(9, 4, 2): adev->gfx.cleaner_shader_ptr = gfx_9_4_2_cleaner_shader_hex; adev->gfx.cleaner_shader_size = sizeof(gfx_9_4_2_cleaner_shader_hex); @@ -2390,6 +2410,8 @@ static int gfx_v9_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); adev->gfx.compute_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + if (!amdgpu_sriov_vf(adev)) + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); if (r) { @@ -5441,16 +5463,8 @@ static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_ce_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); @@ -5473,16 +5487,8 @@ static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, payload_size = sizeof(struct v9_de_ib_state); - if (ring->is_mes_queue) { - payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); - } else { - payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; - } + payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; ((struct v9_de_ib_state *)de_payload_cpu_addr)->ib_completion_status = IB_COMPLETION_STATUS_PREEMPTED; @@ -5672,19 +5678,9 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) cnt = (sizeof(ce_payload) >> 2) + 4 - 2; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - ce_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, ce_payload); - ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - } + offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) | @@ -5770,28 +5766,13 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo void *de_payload_cpu_addr; int cnt; - if (ring->is_mes_queue) { - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gfx_meta_data) + - offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = - amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - de_payload_cpu_addr = - amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); - - offset = offsetof(struct amdgpu_mes_ctx_meta_data, - gfx[0].gds_backup) + - offsetof(struct v9_gfx_meta_data, de_payload); - gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); - } else { - offset = offsetof(struct v9_gfx_meta_data, de_payload); - de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; - de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; + de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; - gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + - AMDGPU_CSA_SIZE - adev->gds.gds_size, - PAGE_SIZE); - } + gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + + AMDGPU_CSA_SIZE - adev->gds.gds_size, + PAGE_SIZE); if (usegds) { de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); @@ -7192,53 +7173,9 @@ static void gfx_v9_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop) amdgpu_ring_insert_nop(ring, num_nop - 1); } -static int gfx_v9_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) -{ - struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; - struct amdgpu_ring *kiq_ring = &kiq->ring; - unsigned long flags; - u32 tmp; - int r; - - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) - return -EINVAL; - - spin_lock_irqsave(&kiq->ring_lock, flags); - - if (amdgpu_ring_alloc(kiq_ring, 5)) { - spin_unlock_irqrestore(&kiq->ring_lock, flags); - return -ENOMEM; - } - - tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); - gfx_v9_0_ring_emit_wreg(kiq_ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), tmp); - amdgpu_ring_commit(kiq_ring); - - spin_unlock_irqrestore(&kiq->ring_lock, flags); - - r = amdgpu_ring_test_ring(kiq_ring); - if (r) - return r; - - if (amdgpu_ring_alloc(ring, 7 + 7 + 5)) - return -ENOMEM; - gfx_v9_0_ring_emit_fence(ring, ring->fence_drv.gpu_addr, - ring->fence_drv.sync_seq, AMDGPU_FENCE_FLAG_EXEC); - gfx_v9_0_ring_emit_reg_wait(ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0, 0xffff); - gfx_v9_0_ring_emit_wreg(ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_RESET), 0); - - return amdgpu_ring_test_ring(ring); -} - static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, - unsigned int vmid) + unsigned int vmid, + struct amdgpu_fence *timedout_fence) { struct amdgpu_device *adev = ring->adev; struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; @@ -7246,12 +7183,11 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, unsigned long flags; int i, r; - if (amdgpu_sriov_vf(adev)) - return -EINVAL; - if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; + amdgpu_ring_reset_helper_begin(ring, timedout_fence); + spin_lock_irqsave(&kiq->ring_lock, flags); if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { @@ -7301,13 +7237,13 @@ static int gfx_v9_0_reset_kcq(struct amdgpu_ring *ring, } kiq->pmf->kiq_map_queues(kiq_ring, ring); amdgpu_ring_commit(kiq_ring); - spin_unlock_irqrestore(&kiq->ring_lock, flags); r = amdgpu_ring_test_ring(kiq_ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); if (r) { DRM_ERROR("fail to remap queue\n"); return r; } - return amdgpu_ring_test_ring(ring); + return amdgpu_ring_reset_helper_end(ring, timedout_fence); } static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer *p) @@ -7339,9 +7275,14 @@ static void gfx_v9_ip_print(struct amdgpu_ip_block *ip_block, struct drm_printer for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); for (reg = 0; reg < reg_count; reg++) { - drm_printf(p, "%-50s \t 0x%08x\n", - gc_cp_reg_list_9[reg].reg_name, - adev->gfx.ip_dump_compute_queues[index + reg]); + if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + drm_printf(p, "%-50s \t 0x%08x\n", + "mmCP_MEC_ME2_HEADER_DUMP", + adev->gfx.ip_dump_compute_queues[index + reg]); + else + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_9[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); } index += reg_count; } @@ -7378,9 +7319,13 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) soc15_grbm_select(adev, 1 + i, j, k, 0, 0); for (reg = 0; reg < reg_count; reg++) { - adev->gfx.ip_dump_compute_queues[index + reg] = - RREG32(SOC15_REG_ENTRY_OFFSET( - gc_cp_reg_list_9[reg])); + if (i && gc_cp_reg_list_9[reg].reg_offset == mmCP_MEC_ME1_HEADER_DUMP) + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME2_HEADER_DUMP)); + else + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_9[reg])); } index += reg_count; } @@ -7394,8 +7339,14 @@ static void gfx_v9_ip_dump(struct amdgpu_ip_block *ip_block) static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + /* Emit the cleaner shader */ - amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0)); + else + amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER_9_0, 0)); + amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ } @@ -7502,7 +7453,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, - .reset = gfx_v9_0_reset_kgq, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, |