From 3ba7b418f12b634b8920c979f5259013d40e38e2 Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Mon, 22 Oct 2018 17:12:39 -0400 Subject: drm/amdgpu: Enable default GPU reset for dGPU on gfx8/9 v3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After testing looks like these subset of ASICs has GPU reset working for the most part. Enable reset due to job timeout. v2: Switch from GFX version to ASIC type. v3: Fix identation Signed-off-by: Andrey Grodzovsky Acked-by: Alex Deucher Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 30bc345d6fdf0..416a67672f3dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3295,13 +3295,35 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) return false; } - if (amdgpu_gpu_recovery == 0 || (amdgpu_gpu_recovery == -1 && - !amdgpu_sriov_vf(adev))) { - DRM_INFO("GPU recovery disabled.\n"); - return false; + if (amdgpu_gpu_recovery == 0) + goto disabled; + + if (amdgpu_sriov_vf(adev)) + return true; + + if (amdgpu_gpu_recovery == -1) { + switch (adev->asic_type) { + case CHIP_TOPAZ: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_VEGAM: + case CHIP_VEGA20: + case CHIP_VEGA10: + case CHIP_VEGA12: + break; + default: + goto disabled; + } } return true; + +disabled: + DRM_INFO("GPU recovery disabled.\n"); + return false; } /** -- cgit v1.2.3 From 1e256e2762211c02078c31f839a9b243f62efd5e Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Mon, 15 Oct 2018 17:08:38 +0800 Subject: drm/amdgpu: Refine CSA related functions There is no functional changes, Use function arguments for SRIOV special variables which is hardcode in those functions. so we can share those functions in baremetal. Reviewed-by: Monk Liu Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 5 ++++- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 32 +++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 8 +++++--- 4 files changed, 29 insertions(+), 22 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 416a67672f3dc..0bf13d69efbcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1656,7 +1656,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) /* right after GMC hw init, we create CSA */ if (amdgpu_sriov_vf(adev)) { - r = amdgpu_allocate_static_csa(adev); + r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_CSA_SIZE); if (r) { DRM_ERROR("allocate CSA failed %d\n", r); return r; @@ -1890,7 +1892,7 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { amdgpu_ucode_free_bo(adev); - amdgpu_free_static_csa(adev); + amdgpu_free_static_csa(&adev->virt.csa_obj); amdgpu_device_wb_fini(adev); amdgpu_device_vram_scratch_fini(adev); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 8f6ff9f895c8d..9b3164c0f8618 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -978,7 +978,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) } if (amdgpu_sriov_vf(adev)) { - r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); + uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; + + r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, + &fpriv->csa_va, csa_addr, AMDGPU_CSA_SIZE); if (r) goto error_vm; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 9ff16b790c92a..f71bc6feea7a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -41,25 +41,25 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) return RREG32_NO_KIQ(0xc040) == 0xffffffff; } -int amdgpu_allocate_static_csa(struct amdgpu_device *adev) +int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo, + u32 domain, uint32_t size) { int r; void *ptr; - r = amdgpu_bo_create_kernel(adev, AMDGPU_CSA_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->virt.csa_obj, + r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + domain, bo, NULL, &ptr); - if (r) - return r; + if (!bo) + return -ENOMEM; - memset(ptr, 0, AMDGPU_CSA_SIZE); + memset(ptr, 0, size); return 0; } -void amdgpu_free_static_csa(struct amdgpu_device *adev) { - amdgpu_bo_free_kernel(&adev->virt.csa_obj, - NULL, - NULL); +void amdgpu_free_static_csa(struct amdgpu_bo **bo) +{ + amdgpu_bo_free_kernel(bo, NULL, NULL); } /* @@ -69,9 +69,9 @@ void amdgpu_free_static_csa(struct amdgpu_device *adev) { * package to support SRIOV gfx preemption. */ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo_va **bo_va) + struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, + uint64_t csa_addr, uint32_t size) { - uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; struct ww_acquire_ctx ticket; struct list_head list; struct amdgpu_bo_list_entry pd; @@ -80,7 +80,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&csa_tv.head); - csa_tv.bo = &adev->virt.csa_obj->tbo; + csa_tv.bo = &bo->tbo; csa_tv.shared = true; list_add(&csa_tv.head, &list); @@ -92,7 +92,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return r; } - *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); + *bo_va = amdgpu_vm_bo_add(adev, vm, bo); if (!*bo_va) { ttm_eu_backoff_reservation(&ticket, &list); DRM_ERROR("failed to create bo_va for static CSA\n"); @@ -100,7 +100,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, - AMDGPU_CSA_SIZE); + size); if (r) { DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); amdgpu_vm_bo_rmv(adev, *bo_va); @@ -108,7 +108,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, return r; } - r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE, + r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | AMDGPU_PTE_EXECUTABLE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index f1a6a50d94441..09a7ebe964d0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -280,10 +280,12 @@ struct amdgpu_vm; uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev); bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); -int amdgpu_allocate_static_csa(struct amdgpu_device *adev); +int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo, + u32 domain, uint32_t size); int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo_va **bo_va); -void amdgpu_free_static_csa(struct amdgpu_device *adev); + struct amdgpu_bo *bo, + struct amdgpu_bo_va **bo_va, uint64_t csa_addr, uint32_t size); +void amdgpu_free_static_csa(struct amdgpu_bo **bo); void amdgpu_virt_init_setting(struct amdgpu_device *adev); uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); -- cgit v1.2.3 From 3e2e2ab55499f77cbd57ee91e250c085d252a979 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Fri, 28 Sep 2018 21:28:10 +0800 Subject: drm/amdgpu/psp: initialize xgmi session (v2) Setup and tear down xgmi as part of psp. v2: - make psp_xgmi_terminate static - squash in: drm/amdgpu: only issue xgmi cmd when it is enabled drm/amdgpu/psp: terminate xgmi ta in suspend and hw_fini phase Signed-off-by: Hawking Zhang Acked-by: Alex Deucher Reviewed-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 69 ++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 0bf13d69efbcd..590588a82471f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -1683,7 +1683,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) if (r) return r; - amdgpu_xgmi_add_device(adev); + if (adev->gmc.xgmi.num_physical_nodes > 1) + amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev); if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 07f9fcb59c0a9..e05dc66b10909 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -405,6 +405,53 @@ int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id) return ret; } +static int psp_xgmi_terminate(struct psp_context *psp) +{ + int ret; + + if (!psp->xgmi_context.initialized) + return 0; + + ret = psp_xgmi_unload(psp); + if (ret) + return ret; + + psp->xgmi_context.initialized = 0; + + /* free xgmi shared memory */ + amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo, + &psp->xgmi_context.xgmi_shared_mc_addr, + &psp->xgmi_context.xgmi_shared_buf); + + return 0; +} + +static int psp_xgmi_initialize(struct psp_context *psp) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + int ret; + + if (!psp->xgmi_context.initialized) { + ret = psp_xgmi_init_shared_buf(psp); + if (ret) + return ret; + } + + /* Load XGMI TA */ + ret = psp_xgmi_load(psp); + if (ret) + return ret; + + /* Initialize XGMI session */ + xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf); + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE; + + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + + return ret; +} + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -432,6 +479,15 @@ static int psp_hw_start(struct psp_context *psp) if (ret) return ret; + if (adev->gmc.xgmi.num_physical_nodes > 1) { + ret = psp_xgmi_initialize(psp); + /* Warning the XGMI seesion initialize failure + * Instead of stop driver initialization + */ + if (ret) + dev_err(psp->adev->dev, + "XGMI: Failed to initialize XGMI session\n"); + } return 0; } @@ -592,6 +648,10 @@ static int psp_hw_fini(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + if (adev->gmc.xgmi.num_physical_nodes > 1 && + psp->xgmi_context.initialized == 1) + psp_xgmi_terminate(psp); + psp_ring_destroy(psp, PSP_RING_TYPE__KM); amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); @@ -619,6 +679,15 @@ static int psp_suspend(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + if (adev->gmc.xgmi.num_physical_nodes > 1 && + psp->xgmi_context.initialized == 1) { + ret = psp_xgmi_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate xgmi ta\n"); + return ret; + } + } + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); -- cgit v1.2.3