diff options
author | Dave Airlie <airlied@redhat.com> | 2023-03-03 08:26:59 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2023-03-03 08:26:59 +1000 |
commit | 54ceb92724a8cf5294c284d5e9f770fc763cdab2 (patch) | |
tree | 24ad116766b4a21e27c9d86977112f5722a18b06 /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | |
parent | 7b7d2429a1d2f789f4ce34afadbd76510a0236cc (diff) | |
parent | 6bb811d0ee3e1fe9f22a028c89b3472c999b70bc (diff) |
Merge tag 'amd-drm-fixes-6.3-2023-03-02' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-fixes-6.3-2023-03-02:
amdgpu:
- SMU 13 fixes
- Enable TMZ for GC 10.3.6
- Misc display fixes
- Buddy allocator fixes
- GC 11 fixes
- S0ix fix
- INFO IOCTL queries for GC 11
- VCN harvest fixes for SR-IOV
- UMC 8.10 RAS fixes
- Don't restrict bpc to 8
- NBIO 7.5 fix
- Allow freesync on PCon for more devices
amdkfd:
- SDMA fix
- Illegal memory access fix
radeon:
- Display fix for iMac11,2
UAPI:
- Add some additional INFO IOCTL queries for GC 11 fixes
Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/21403
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230302051843.7793-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 23 |
1 files changed, 17 insertions, 6 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 6e543558386d..63dfcc98152d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -176,7 +176,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, err_data.err_addr_cnt); - amdgpu_ras_save_bad_pages(adev); + amdgpu_ras_save_bad_pages(adev, NULL); } dev_warn(adev->dev, "WARNING: THIS IS ONLY FOR TEST PURPOSES AND WILL CORRUPT RAS EEPROM\n"); @@ -2084,22 +2084,32 @@ out: /* * write error record array to eeprom, the function should be * protected by recovery_lock + * new_cnt: new added UE count, excluding reserved bad pages, can be NULL */ -int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev) +int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, + unsigned long *new_cnt) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; struct amdgpu_ras_eeprom_control *control; int save_count; - if (!con || !con->eh_data) + if (!con || !con->eh_data) { + if (new_cnt) + *new_cnt = 0; + return 0; + } mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; save_count = data->count - control->ras_num_recs; mutex_unlock(&con->recovery_lock); + + if (new_cnt) + *new_cnt = save_count / adev->umc.retire_unit; + /* only new entries are saved */ if (save_count > 0) { if (amdgpu_ras_eeprom_append(control, @@ -2186,11 +2196,12 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, /* * Justification of value bad_page_cnt_threshold in ras structure * - * Generally, -1 <= amdgpu_bad_page_threshold <= max record length - * in eeprom, and introduce two scenarios accordingly. + * Generally, 0 <= amdgpu_bad_page_threshold <= max record length + * in eeprom or amdgpu_bad_page_threshold == -2, introduce two + * scenarios accordingly. * * Bad page retirement enablement: - * - If amdgpu_bad_page_threshold = -1, + * - If amdgpu_bad_page_threshold = -2, * bad_page_cnt_threshold = typical value by formula. * * - When the value from user is 0 < amdgpu_bad_page_threshold < |