diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 81 |
1 files changed, 74 insertions, 7 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 135598502c8d..4883adcfbb4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -191,6 +191,20 @@ int amdgpu_mes_init(struct amdgpu_device *adev) if (r) goto error_doorbell; + if (adev->mes.hung_queue_db_array_size) { + r = amdgpu_bo_create_kernel(adev, + adev->mes.hung_queue_db_array_size * sizeof(u32), + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &adev->mes.hung_queue_db_array_gpu_obj, + &adev->mes.hung_queue_db_array_gpu_addr, + &adev->mes.hung_queue_db_array_cpu_addr); + if (r) { + dev_warn(adev->dev, "failed to create MES hung db array buffer (%d)", r); + goto error_doorbell; + } + } + return 0; error_doorbell: @@ -216,6 +230,10 @@ void amdgpu_mes_fini(struct amdgpu_device *adev) { int i; + amdgpu_bo_free_kernel(&adev->mes.hung_queue_db_array_gpu_obj, + &adev->mes.hung_queue_db_array_gpu_addr, + &adev->mes.hung_queue_db_array_cpu_addr); + amdgpu_bo_free_kernel(&adev->mes.event_log_gpu_obj, &adev->mes.event_log_gpu_addr, &adev->mes.event_log_cpu_addr); @@ -366,6 +384,58 @@ int amdgpu_mes_reset_legacy_queue(struct amdgpu_device *adev, return r; } +int amdgpu_mes_get_hung_queue_db_array_size(struct amdgpu_device *adev) +{ + return adev->mes.hung_queue_db_array_size; +} + +int amdgpu_mes_detect_and_reset_hung_queues(struct amdgpu_device *adev, + int queue_type, + bool detect_only, + unsigned int *hung_db_num, + u32 *hung_db_array) + +{ + struct mes_detect_and_reset_queue_input input; + u32 *db_array = adev->mes.hung_queue_db_array_cpu_addr; + int r, i; + + if (!hung_db_num || !hung_db_array) + return -EINVAL; + + if ((queue_type != AMDGPU_RING_TYPE_GFX) && + (queue_type != AMDGPU_RING_TYPE_COMPUTE) && + (queue_type != AMDGPU_RING_TYPE_SDMA)) + return -EINVAL; + + /* Clear the doorbell array before detection */ + memset(adev->mes.hung_queue_db_array_cpu_addr, AMDGPU_MES_INVALID_DB_OFFSET, + adev->mes.hung_queue_db_array_size * sizeof(u32)); + input.queue_type = queue_type; + input.detect_only = detect_only; + + r = adev->mes.funcs->detect_and_reset_hung_queues(&adev->mes, + &input); + if (r) { + dev_err(adev->dev, "failed to detect and reset\n"); + } else { + *hung_db_num = 0; + for (i = 0; i < adev->mes.hung_queue_hqd_info_offset; i++) { + if (db_array[i] != AMDGPU_MES_INVALID_DB_OFFSET) { + hung_db_array[i] = db_array[i]; + *hung_db_num += 1; + } + } + + /* + * TODO: return HQD info for MES scheduled user compute queue reset cases + * stored in hung_db_array hqd info offset to full array size + */ + } + + return r; +} + uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg) { struct mes_misc_op_input op_input; @@ -621,14 +691,11 @@ out: bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) { uint32_t mes_rev = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; - bool is_supported = false; - - if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && - amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && - mes_rev >= 0x63) - is_supported = true; - return is_supported; + return ((amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0) && + amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(12, 0, 0) && + mes_rev >= 0x63) || + amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 0, 0)); } /* Fix me -- node_id is used to identify the correct MES instances in the future */ |