diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 43 | 
1 files changed, 13 insertions, 30 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a4b47e1bd111..ad490c1e2f57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1267,7 +1267,7 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,  	struct amdgpu_ras *con =  		container_of(attr, struct amdgpu_ras, features_attr); -	return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); +	return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);  }  static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) @@ -1561,7 +1561,6 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *  {  	bool poison_stat = false;  	struct amdgpu_device *adev = obj->adev; -	struct ras_err_data err_data = {0, 0, 0, NULL};  	struct amdgpu_ras_block_object *block_obj =  		amdgpu_ras_get_ras_block(adev, obj->head.block, 0); @@ -1584,7 +1583,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *  	}  	if (!adev->gmc.xgmi.connected_to_cpu) -		amdgpu_umc_poison_handler(adev, &err_data, false); +		amdgpu_umc_poison_handler(adev, false);  	if (block_obj->hw_ops->handle_poison_consumption)  		poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); @@ -1949,7 +1948,12 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)  		reset_context.method = AMD_RESET_METHOD_NONE;  		reset_context.reset_req_dev = adev; -		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + +		/* Perform full reset in fatal error mode */ +		if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) +			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); +		else +			clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);  		amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);  	} @@ -2344,7 +2348,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)  				adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |  							    1 << AMDGPU_RAS_BLOCK__DF); -				if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) +				if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) || +				    adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0))  					adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |  							1 << AMDGPU_RAS_BLOCK__JPEG);  				else @@ -2848,7 +2853,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,  	struct amdgpu_device *adev = NULL;  	uint32_t gpu_id = 0;  	uint32_t umc_inst = 0, ch_inst = 0; -	struct ras_err_data err_data = {0, 0, 0, NULL};  	/*  	 * If the error was generated in UMC_V2, which belongs to GPU UMCs, @@ -2887,31 +2891,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,  	dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",  			     umc_inst, ch_inst); -	err_data.err_addr = -		kcalloc(adev->umc.max_ras_err_cnt_per_query, -			sizeof(struct eeprom_table_record), GFP_KERNEL); -	if (!err_data.err_addr) { -		dev_warn(adev->dev, -			"Failed to alloc memory for umc error record in mca notifier!\n"); +	if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst)) +		return NOTIFY_OK; +	else  		return NOTIFY_DONE; -	} - -	/* -	 * Translate UMC channel address to Physical address -	 */ -	if (adev->umc.ras && -	    adev->umc.ras->convert_ras_error_address) -		adev->umc.ras->convert_ras_error_address(adev, -			&err_data, m->addr, ch_inst, umc_inst); - -	if (amdgpu_bad_page_threshold != 0) { -		amdgpu_ras_add_bad_pages(adev, err_data.err_addr, -						err_data.err_addr_cnt); -		amdgpu_ras_save_bad_pages(adev); -	} - -	kfree(err_data.err_addr); -	return NOTIFY_OK;  }  static struct notifier_block amdgpu_bad_page_nb = { | 
