diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 46 | 
1 files changed, 37 insertions, 9 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 404483437bd3..cef94e2169fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -198,9 +198,6 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,  	return 0;  } -static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, -		struct ras_common_if *head); -  /**   * DOC: AMDGPU RAS debugfs control interface   * @@ -318,7 +315,7 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user *  	default:  		ret = -EINVAL;  		break; -	}; +	}  	if (ret)  		return -EINVAL; @@ -445,7 +442,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,  }  /* return an obj equal to head, or the first when head is NULL */ -static struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, +struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,  		struct ras_common_if *head)  {  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -689,6 +686,7 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,  {  	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head);  	struct ras_err_data err_data = {0, 0, 0, NULL}; +	int i;  	if (!obj)  		return -EINVAL; @@ -703,6 +701,13 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,  		if (adev->umc.funcs->query_ras_error_address)  			adev->umc.funcs->query_ras_error_address(adev, &err_data);  		break; +	case AMDGPU_RAS_BLOCK__SDMA: +		if (adev->sdma.funcs->query_ras_error_count) { +			for (i = 0; i < adev->sdma.num_instances; i++) +				adev->sdma.funcs->query_ras_error_count(adev, i, +									&err_data); +		} +		break;  	case AMDGPU_RAS_BLOCK__GFX:  		if (adev->gfx.funcs->query_ras_error_count)  			adev->gfx.funcs->query_ras_error_count(adev, &err_data); @@ -737,6 +742,20 @@ int amdgpu_ras_error_query(struct amdgpu_device *adev,  	return 0;  } +uint64_t get_xgmi_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr) +{ +	uint32_t df_inst_id; + +	if ((!adev->df.funcs)                 || +	    (!adev->df.funcs->get_df_inst_id) || +	    (!adev->df.funcs->get_dram_base_addr)) +		return addr; + +	df_inst_id = adev->df.funcs->get_df_inst_id(adev); + +	return addr + adev->df.funcs->get_dram_base_addr(adev, df_inst_id); +} +  /* wrapper of psp_ras_trigger_error */  int amdgpu_ras_error_inject(struct amdgpu_device *adev,  		struct ras_inject_if *info) @@ -754,6 +773,12 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,  	if (!obj)  		return -EINVAL; +	/* Calculate XGMI relative offset */ +	if (adev->gmc.xgmi.num_physical_nodes > 1) { +		block_info.address = get_xgmi_relative_phy_addr(adev, +								block_info.address); +	} +  	switch (info->head.block) {  	case AMDGPU_RAS_BLOCK__GFX:  		if (adev->gfx.funcs->ras_error_inject) @@ -1314,6 +1339,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev,  	data = con->eh_data;  	if (!data || data->count == 0) {  		*bps = NULL; +		ret = -EINVAL;  		goto out;  	} @@ -1347,7 +1373,8 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)  	struct amdgpu_ras *ras =  		container_of(work, struct amdgpu_ras, recovery_work); -	amdgpu_device_gpu_recover(ras->adev, 0); +	if (amdgpu_device_should_recover_gpu(ras->adev)) +		amdgpu_device_gpu_recover(ras->adev, 0);  	atomic_set(&ras->in_recovery, 0);  } @@ -1687,7 +1714,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,  	*supported = 0;  	if (amdgpu_sriov_vf(adev) || -			adev->asic_type != CHIP_VEGA20) +	    (adev->asic_type != CHIP_VEGA20 && +	     adev->asic_type != CHIP_ARCTURUS))  		return;  	if (adev->is_atom_fw && @@ -1872,7 +1900,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)  		 * See feature_enable_on_boot  		 */  		amdgpu_ras_disable_all_features(adev, 1); -		amdgpu_ras_reset_gpu(adev, 0); +		amdgpu_ras_reset_gpu(adev);  	}  } @@ -1935,6 +1963,6 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)  	if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {  		DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n"); -		amdgpu_ras_reset_gpu(adev, false); +		amdgpu_ras_reset_gpu(adev);  	}  } | 
