diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 81 | 
1 files changed, 78 insertions, 3 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 896f3609b0ee..eafe20d8fe0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -78,7 +78,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,  	if (amdgpu_bad_page_threshold != 0) {  		amdgpu_ras_add_bad_pages(adev, err_data.err_addr, -						err_data.err_addr_cnt); +						err_data.err_addr_cnt, false);  		amdgpu_ras_save_bad_pages(adev, NULL);  	} @@ -166,10 +166,11 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev,  		if ((amdgpu_bad_page_threshold != 0) &&  			err_data->err_addr_cnt) {  			amdgpu_ras_add_bad_pages(adev, err_data->err_addr, -						err_data->err_addr_cnt); +						err_data->err_addr_cnt, false);  			amdgpu_ras_save_bad_pages(adev, &err_count); -			amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); +			amdgpu_dpm_send_hbm_bad_pages_num(adev, +					con->eeprom_control.ras_num_bad_pages);  			if (con->update_channel_flag == true) {  				amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap); @@ -444,3 +445,77 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev,  	return ret;  } + +int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev, +			struct ras_err_data *err_data, uint64_t pa_addr) +{ +	struct ta_ras_query_address_output addr_out; + +	/* reinit err_data */ +	err_data->err_addr_cnt = 0; +	err_data->err_addr_len = adev->umc.retire_unit; + +	addr_out.pa.pa = pa_addr; +	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) +		return adev->umc.ras->convert_ras_err_addr(adev, err_data, NULL, +				&addr_out, false); +	else +		return -EINVAL; +} + +int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, +			uint64_t pa_addr, uint64_t *pfns, int len) +{ +	int i, ret; +	struct ras_err_data err_data; + +	err_data.err_addr = kcalloc(adev->umc.retire_unit, +				sizeof(struct eeprom_table_record), GFP_KERNEL); +	if (!err_data.err_addr) { +		dev_warn(adev->dev, "Failed to alloc memory in bad page lookup!\n"); +		return 0; +	} + +	ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr); +	if (ret) +		goto out; + +	for (i = 0; i < adev->umc.retire_unit; i++) { +		if (i >= len) +			goto out; + +		pfns[i] = err_data.err_addr[i].retired_page; +	} +	ret = i; + +out: +	kfree(err_data.err_addr); +	return ret; +} + +int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev, +			uint64_t err_addr, uint32_t ch, uint32_t umc, +			uint32_t node, uint32_t socket, +			struct ta_ras_query_address_output *addr_out, bool dump_addr) +{ +	struct ta_ras_query_address_input addr_in; +	int ret; + +	memset(&addr_in, 0, sizeof(addr_in)); +	addr_in.ma.err_addr = err_addr; +	addr_in.ma.ch_inst = ch; +	addr_in.ma.umc_inst = umc; +	addr_in.ma.node_inst = node; +	addr_in.ma.socket_id = socket; + +	if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { +		ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in, +				addr_out, dump_addr); +		if (ret) +			return ret; +	} else { +		return 0; +	} + +	return 0; +} | 
