summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c31
1 files changed, 16 insertions, 15 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 83b54efcaa877..09a6f8bc1a5a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -727,11 +727,9 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control,
- control->ras_fri)
% control->ras_max_record_count;
- if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA)
- control->ras_num_bad_pages = control->ras_num_recs;
- else
- control->ras_num_bad_pages =
- control->ras_num_recs * adev->umc.retire_unit;
+ control->ras_num_mca_recs += num;
+ control->ras_num_bad_pages += num * adev->umc.retire_unit;
+
Out:
kfree(buf);
return res;
@@ -749,7 +747,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
/* Modify the header if it exceeds.
*/
if (amdgpu_bad_page_threshold != 0 &&
- control->ras_num_bad_pages >= ras->bad_page_cnt_threshold) {
+ control->ras_num_bad_pages > ras->bad_page_cnt_threshold) {
dev_warn(adev->dev,
"Saved bad pages %d reaches threshold value %d\n",
control->ras_num_bad_pages, ras->bad_page_cnt_threshold);
@@ -808,7 +806,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control)
*/
if (amdgpu_bad_page_threshold != 0 &&
control->tbl_hdr.version == RAS_TABLE_VER_V2_1 &&
- control->ras_num_bad_pages < ras->bad_page_cnt_threshold)
+ control->ras_num_bad_pages <= ras->bad_page_cnt_threshold)
control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold -
control->ras_num_bad_pages) * 100) /
ras->bad_page_cnt_threshold;
@@ -852,6 +850,7 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
{
struct amdgpu_device *adev = to_amdgpu_device(control);
int res, i;
+ uint64_t nps = AMDGPU_NPS1_PARTITION_MODE;
if (!__is_ras_eeprom_supported(adev))
return 0;
@@ -865,9 +864,12 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
return -EINVAL;
}
+ if (adev->gmc.gmc_funcs->query_mem_partition_mode)
+ nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev);
+
/* set the new channel index flag */
for (i = 0; i < num; i++)
- record[i].retired_page |= UMC_CHANNEL_IDX_V2;
+ record[i].retired_page |= (nps << UMC_NPS_SHIFT);
mutex_lock(&control->ras_tbl_mutex);
@@ -881,7 +883,7 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control,
/* clear channel index flag, the flag is only saved on eeprom */
for (i = 0; i < num; i++)
- record[i].retired_page &= ~UMC_CHANNEL_IDX_V2;
+ record[i].retired_page &= ~(nps << UMC_NPS_SHIFT);
return res;
}
@@ -1392,6 +1394,8 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control)
}
control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset);
+ control->ras_num_mca_recs = 0;
+ control->ras_num_pa_recs = 0;
return 0;
}
@@ -1412,11 +1416,8 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
if (!__get_eeprom_i2c_addr(adev, control))
return -EINVAL;
- if (control->rec_type == AMDGPU_RAS_EEPROM_REC_PA)
- control->ras_num_bad_pages = control->ras_num_recs;
- else
- control->ras_num_bad_pages =
- control->ras_num_recs * adev->umc.retire_unit;
+ control->ras_num_bad_pages = control->ras_num_pa_recs +
+ control->ras_num_mca_recs * adev->umc.retire_unit;
if (hdr->header == RAS_TABLE_HDR_VAL) {
DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records",
@@ -1455,7 +1456,7 @@ int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control)
res);
return -EINVAL;
}
- if (ras->bad_page_cnt_threshold > control->ras_num_bad_pages) {
+ if (ras->bad_page_cnt_threshold >= control->ras_num_bad_pages) {
/* This means that, the threshold was increased since
* the last time the system was booted, and now,
* ras->bad_page_cnt_threshold - control->num_recs > 0,