diff options
author | Thomas Zimmermann <tzimmermann@suse.de> | 2025-04-07 13:47:40 +0200 |
---|---|---|
committer | Thomas Zimmermann <tzimmermann@suse.de> | 2025-04-07 14:35:48 +0200 |
commit | 1afba39f9305fe4061a4e70baa6ebab9d41459da (patch) | |
tree | 67e7ed686bd75adc559025870151c4de6d649ced /drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | |
parent | fbe43810d563a293e3de301141d33caf1f5d5c5a (diff) | |
parent | 0af2f6be1b4281385b618cb86ad946eded089ac8 (diff) |
Merge drm/drm-next into drm-misc-next
Backmerging to get v6.15-rc1 into drm-misc-next. Also fixes a
build issue when enabling CONFIG_DRM_SCHED_KUNIT_TEST.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 74 |
1 files changed, 62 insertions, 12 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 9d6345146495..b4ad163f42a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -30,16 +30,6 @@ typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); -struct aca_banks { - int nr_banks; - struct list_head list; -}; - -struct aca_hwip { - int hwid; - int mcatype; -}; - static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = { ACA_BANK_HWID(SMU, 0x01, 0x01), ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00), @@ -111,7 +101,7 @@ static struct aca_regs_dump { {"STATUS", ACA_REG_IDX_STATUS}, {"ADDR", ACA_REG_IDX_ADDR}, {"MISC", ACA_REG_IDX_MISC0}, - {"CONFIG", ACA_REG_IDX_CONFG}, + {"CONFIG", ACA_REG_IDX_CONFIG}, {"IPID", ACA_REG_IDX_IPID}, {"SYND", ACA_REG_IDX_SYND}, {"DESTAT", ACA_REG_IDX_DESTAT}, @@ -168,7 +158,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_ if (ret) return ret; - bank.type = type; + bank.smu_err_type = type; aca_smu_bank_dump(adev, i, count, &bank, qctx); @@ -205,6 +195,10 @@ static bool aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, { const struct aca_bank_ops *bank_ops = handle->bank_ops; + /* Parse all deferred errors with UMC aca handle */ + if (ACA_BANK_ERR_IS_DEFFERED(bank)) + return handle->hwip == ACA_HWIP_TYPE_UMC; + if (!aca_bank_hwip_is_matched(bank, handle->hwip)) return false; @@ -394,6 +388,56 @@ static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type return ret; } +static void aca_banks_generate_cper(struct amdgpu_device *adev, + enum aca_smu_type type, + struct aca_banks *banks, + int count) +{ + struct aca_bank_node *node; + struct aca_bank *bank; + int r; + + if (!adev->cper.enabled) + return; + + if (!banks || !count) { + dev_warn(adev->dev, "fail to generate cper records\n"); + return; + } + + /* UEs must be encoded into separate CPER entries */ + if (type == ACA_SMU_TYPE_UE) { + struct aca_banks de_banks; + + aca_banks_init(&de_banks); + list_for_each_entry(node, &banks->list, node) { + bank = &node->bank; + if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) { + r = aca_banks_add_bank(&de_banks, bank); + if (r) + dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r); + } else { + if (amdgpu_cper_generate_ue_record(adev, bank)) + dev_warn(adev->dev, "fail to generate ue cper records\n"); + } + } + + if (!list_empty(&de_banks.list)) { + if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks)) + dev_warn(adev->dev, "fail to generate de cper records\n"); + } + + aca_banks_release(&de_banks); + } else { + /* + * SMU_TYPE_CE banks are combined into 1 CPER entries, + * they could be CEs or DEs or both + */ + if (amdgpu_cper_generate_ce_records(adev, banks, count)) + dev_warn(adev->dev, "fail to generate ce cper records\n"); + } +} + static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, bank_handler_t handler, struct ras_query_context *qctx, void *data) { @@ -431,6 +475,8 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, if (ret) goto err_release_banks; + aca_banks_generate_cper(adev, type, &banks, count); + err_release_banks: aca_banks_release(&banks); @@ -516,6 +562,10 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h if (ret) return ret; + /* DEs may contain in CEs or UEs */ + if (type != ACA_ERROR_TYPE_DEFERRED) + aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data); + return aca_log_aca_error(handle, type, err_data); } |