From bc143d8b8387ff0a22e4ef8e2375e63aa24bc311 Mon Sep 17 00:00:00 2001 From: Evan Quan Date: Mon, 22 Nov 2021 10:57:20 +0800 Subject: drm/amd/pm: do not expose implementation details to other blocks out of power Those implementation details(whether swsmu supported, some ppt_funcs supported, accessing internal statistics ...)should be kept internally. It's not a good practice and even error prone to expose implementation details. Signed-off-by: Evan Quan Reviewed-by: Lijo Lazar Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 46264a4002f7..25951b2d83b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -33,7 +33,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, int ret = 0; kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - ret = smu_get_ecc_info(&adev->smu, (void *)&(con->umc_ecc)); + ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc)); if (ret == -EOPNOTSUPP) { if (adev->umc.ras_funcs && adev->umc.ras_funcs->query_ras_error_count) @@ -96,8 +96,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, err_data->err_addr_cnt); amdgpu_ras_save_bad_pages(adev); - if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num) - adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs); + amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); } if (reset) -- cgit v1.2.3 From 7cab2124058d2f5f048f435a4631e176dcd1430d Mon Sep 17 00:00:00 2001 From: yipechai Date: Tue, 4 Jan 2022 13:39:15 +0800 Subject: drm/amdgpu: Modify the compilation failed problem when other ras blocks' .h include amdgpu_ras.h Modify the compilation failed problem when other ras blocks' .h include amdgpu_ras.h. v2: squash in forward declaration warning fix (Alex) Signed-off-by: yipechai Reviewed-by: Hawking Zhang Reviewed-by: John Clements Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 39 +++++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 34 +++++++++------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 2 +- 4 files changed, 51 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c349337939dd..445a0d077c1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -108,6 +108,7 @@ #include "amdgpu_smuio.h" #include "amdgpu_fdinfo.h" #include "amdgpu_mca.h" +#include "amdgpu_ras.h" #define MAX_GPU_INSTANCE 16 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index bf2983fe5d9d..5b3f4beb2149 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2810,6 +2810,45 @@ static void amdgpu_register_bad_pages_mca_notifier(struct amdgpu_device *adev) } } #endif + +struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev) +{ + if (!adev) + return NULL; + + return adev->psp.ras_context.ras; +} + +int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras* ras_con) +{ + if (!adev) + return -EINVAL;; + + adev->psp.ras_context.ras = ras_con; + return 0; +} + +/* check if ras is supported on block, say, sdma, gfx */ +int amdgpu_ras_is_supported(struct amdgpu_device *adev, + unsigned int block) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (block >= AMDGPU_RAS_BLOCK_COUNT) + return 0; + return ras && (adev->ras_enabled & (1 << block)); +} + +int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + + if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) + schedule_work(&ras->recovery_work); + return 0; +} + + /* Register each ip ras block into amdgpu ras */ int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object* ras_block_obj) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index f66122fdf477..7a4d82378205 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -26,11 +26,11 @@ #include #include -#include "amdgpu.h" -#include "amdgpu_psp.h" #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" +struct amdgpu_iv_entry; + #define AMDGPU_RAS_FLAG_INIT_BY_VBIOS (0x1 << 0) enum amdgpu_ras_block { @@ -525,19 +525,6 @@ struct amdgpu_ras_block_hw_ops { * 8: feature disable */ -#define amdgpu_ras_get_context(adev) ((adev)->psp.ras_context.ras) -#define amdgpu_ras_set_context(adev, ras_con) ((adev)->psp.ras_context.ras = (ras_con)) - -/* check if ras is supported on block, say, sdma, gfx */ -static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, - unsigned int block) -{ - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - - if (block >= AMDGPU_RAS_BLOCK_COUNT) - return 0; - return ras && (adev->ras_enabled & (1 << block)); -} int amdgpu_ras_recovery_init(struct amdgpu_device *adev); @@ -554,15 +541,6 @@ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev); -static inline int amdgpu_ras_reset_gpu(struct amdgpu_device *adev) -{ - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - - if (atomic_cmpxchg(&ras->in_recovery, 0, 1) == 0) - schedule_work(&ras->recovery_work); - return 0; -} - static inline enum ta_ras_block amdgpu_ras_block_to_ta(enum amdgpu_ras_block block) { switch (block) { @@ -694,5 +672,13 @@ const char *get_ras_block_str(struct ras_common_if *ras_block); bool amdgpu_ras_is_poison_mode_supported(struct amdgpu_device *adev); +int amdgpu_ras_is_supported(struct amdgpu_device *adev, unsigned int block); + +int amdgpu_ras_reset_gpu(struct amdgpu_device *adev); + +struct amdgpu_ras* amdgpu_ras_get_context(struct amdgpu_device *adev); + +int amdgpu_ras_set_context(struct amdgpu_device *adev, struct amdgpu_ras* ras_con); + int amdgpu_ras_register_ras_block(struct amdgpu_device *adev, struct amdgpu_ras_block_object* ras_block_obj); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 25951b2d83b6..e81ce465ff3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -21,7 +21,7 @@ * */ -#include "amdgpu_ras.h" +#include "amdgpu.h" static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, void *ras_error_status, -- cgit v1.2.3 From efe17d5a217e6b7dfd16c80dab522abcf2edf1bc Mon Sep 17 00:00:00 2001 From: yipechai Date: Thu, 6 Jan 2022 14:07:44 +0800 Subject: drm/amdgpu: Modify umc block to fit for the unified ras block data and ops 1.Modify umc block to fit for the unified ras block data and ops. 2.Change amdgpu_umc_ras_funcs to amdgpu_umc_ras, and the corresponding variable name remove _funcs suffix. 3.Remove the const flag of umc ras variable so that umc ras block can be able to be inserted into amdgpu device ras block link list. 4.Invoke amdgpu_ras_register_ras_block function to register umc ras block into amdgpu device ras block link list. 5.Remove the redundant code about umc in amdgpu_ras.c after using the unified ras block. 6.Fill unified ras block .name .block .ras_late_init and .ras_fini for all of umc versions. If .ras_late_init and .ras_fini had been defined by the selected umc version, the defined functions will take effect; if not defined, default fill them with amdgpu_umc_ras_late_init and amdgpu_umc_ras_fini. Signed-off-by: yipechai Reviewed-by: Hawking Zhang Reviewed-by: John Clements Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 10 ++++------ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 30 +++++++++++++++--------------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 32 ++++++++++++++++---------------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 14 +++++--------- drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 16 +++++++++++++++- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 21 ++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 12 ++++++++---- drivers/gpu/drm/amd/amdgpu/umc_v6_1.h | 2 +- drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 10 +++++++--- drivers/gpu/drm/amd/amdgpu/umc_v6_7.h | 2 +- drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 12 ++++++++---- drivers/gpu/drm/amd/amdgpu/umc_v8_7.h | 2 +- 12 files changed, 99 insertions(+), 64 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index acf806c87673..de0ef21e1501 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -440,9 +440,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) { int r; - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ras_late_init) { - r = adev->umc.ras_funcs->ras_late_init(adev); + if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) { + r = adev->umc.ras->ras_block.ras_late_init(adev, NULL); if (r) return r; } @@ -496,9 +495,8 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) { - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ras_fini) - adev->umc.ras_funcs->ras_fini(adev); + if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini) + adev->umc.ras->ras_block.ras_fini(adev); if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini) adev->mmhub.ras->ras_block.ras_fini(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 6d1ca9e9795b..fba1c415a2a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -939,24 +939,24 @@ static void amdgpu_ras_get_ecc_info(struct amdgpu_device *adev, struct ras_err_d */ ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(ras->umc_ecc)); if (ret == -EOPNOTSUPP) { - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_count) - adev->umc.ras_funcs->query_ras_error_count(adev, err_data); + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && + adev->umc.ras->ras_block.hw_ops->query_ras_error_count) + adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, err_data); /* umc query_ras_error_address is also responsible for clearing * error status */ - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_address) - adev->umc.ras_funcs->query_ras_error_address(adev, err_data); + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && + adev->umc.ras->ras_block.hw_ops->query_ras_error_address) + adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, err_data); } else if (!ret) { - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ecc_info_query_ras_error_count) - adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, err_data); + if (adev->umc.ras && + adev->umc.ras->ecc_info_query_ras_error_count) + adev->umc.ras->ecc_info_query_ras_error_count(adev, err_data); - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ecc_info_query_ras_error_address) - adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, err_data); + if (adev->umc.ras && + adev->umc.ras->ecc_info_query_ras_error_address) + adev->umc.ras->ecc_info_query_ras_error_address(adev, err_data); } } @@ -2412,12 +2412,12 @@ int amdgpu_ras_init(struct amdgpu_device *adev) } else if (adev->df.funcs && adev->df.funcs->query_ras_poison_mode && - adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_poison_mode) { + adev->umc.ras && + adev->umc.ras->query_ras_poison_mode) { df_poison = adev->df.funcs->query_ras_poison_mode(adev); umc_poison = - adev->umc.ras_funcs->query_ras_poison_mode(adev); + adev->umc.ras->query_ras_poison_mode(adev); /* Only poison is set in both DF and UMC, we can support it */ if (df_poison && umc_poison) con->poison_supported = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index e81ce465ff3a..b4c68c09e071 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -35,12 +35,12 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc)); if (ret == -EOPNOTSUPP) { - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_count) - adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status); + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && + adev->umc.ras->ras_block.hw_ops->query_ras_error_count) + adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status); - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->query_ras_error_address && + if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops && + adev->umc.ras->ras_block.hw_ops->query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { err_data->err_addr = kcalloc(adev->umc.max_ras_err_cnt_per_query, @@ -56,15 +56,15 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, /* umc query_ras_error_address is also responsible for clearing * error status */ - adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status); + adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status); } } else if (!ret) { - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ecc_info_query_ras_error_count) - adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, ras_error_status); + if (adev->umc.ras && + adev->umc.ras->ecc_info_query_ras_error_count) + adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status); - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->ecc_info_query_ras_error_address && + if (adev->umc.ras && + adev->umc.ras->ecc_info_query_ras_error_address && adev->umc.max_ras_err_cnt_per_query) { err_data->err_addr = kcalloc(adev->umc.max_ras_err_cnt_per_query, @@ -80,7 +80,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, /* umc query_ras_error_address is also responsible for clearing * error status */ - adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, ras_error_status); + adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status); } } @@ -136,7 +136,7 @@ static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); } -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info) { int r; struct ras_fs_if fs_info = { @@ -172,9 +172,9 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev) } /* ras init of specific umc version */ - if (adev->umc.ras_funcs && - adev->umc.ras_funcs->err_cnt_init) - adev->umc.ras_funcs->err_cnt_init(adev); + if (adev->umc.ras && + adev->umc.ras->err_cnt_init) + adev->umc.ras->err_cnt_init(adev); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index b72194e8bfe5..195740a6d97d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -20,6 +20,7 @@ */ #ifndef __AMDGPU_UMC_H__ #define __AMDGPU_UMC_H__ +#include "amdgpu_ras.h" /* * (addr / 256) * 4096, the higher 26 bits in ErrorAddr @@ -40,14 +41,9 @@ #define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++) #define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst)) -struct amdgpu_umc_ras_funcs { +struct amdgpu_umc_ras { + struct amdgpu_ras_block_object ras_block; void (*err_cnt_init)(struct amdgpu_device *adev); - int (*ras_late_init)(struct amdgpu_device *adev); - void (*ras_fini)(struct amdgpu_device *adev); - void (*query_ras_error_count)(struct amdgpu_device *adev, - void *ras_error_status); - void (*query_ras_error_address)(struct amdgpu_device *adev, - void *ras_error_status); bool (*query_ras_poison_mode)(struct amdgpu_device *adev); void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev, void *ras_error_status); @@ -73,10 +69,10 @@ struct amdgpu_umc { struct ras_common_if *ras_if; const struct amdgpu_umc_funcs *funcs; - const struct amdgpu_umc_ras_funcs *ras_funcs; + struct amdgpu_umc_ras *ras; }; -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev); +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info); void amdgpu_umc_ras_fini(struct amdgpu_device *adev); int amdgpu_umc_poison_handler(struct amdgpu_device *adev, void *ras_error_status, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 38bb42727715..5ef4ad28ab26 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -664,11 +664,25 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V8_7_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V8_7_PER_CHANNEL_OFFSET_SIENNA; adev->umc.channel_idx_tbl = &umc_v8_7_channel_idx_tbl[0][0]; - adev->umc.ras_funcs = &umc_v8_7_ras_funcs; + adev->umc.ras = &umc_v8_7_ras; break; default: break; } + if (adev->umc.ras) { + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + + strcpy(adev->umc.ras->ras_block.name,"umc"); + adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If don't define special ras_fini function, use default ras_fini */ + if (!adev->umc.ras->ras_block.ras_fini) + adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; + } } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 7506e198e6e1..3965aae435f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1202,7 +1202,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_VG20; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; - adev->umc.ras_funcs = &umc_v6_1_ras_funcs; + adev->umc.ras = &umc_v6_1_ras; break; case IP_VERSION(6, 1, 2): adev->umc.max_ras_err_cnt_per_query = UMC_V6_1_TOTAL_CHANNEL_NUM; @@ -1210,7 +1210,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V6_1_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_1_PER_CHANNEL_OFFSET_ARCT; adev->umc.channel_idx_tbl = &umc_v6_1_channel_idx_tbl[0][0]; - adev->umc.ras_funcs = &umc_v6_1_ras_funcs; + adev->umc.ras = &umc_v6_1_ras; break; case IP_VERSION(6, 7, 0): adev->umc.max_ras_err_cnt_per_query = UMC_V6_7_TOTAL_CHANNEL_NUM; @@ -1218,7 +1218,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.umc_inst_num = UMC_V6_7_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V6_7_PER_CHANNEL_OFFSET; if (!adev->gmc.xgmi.connected_to_cpu) - adev->umc.ras_funcs = &umc_v6_7_ras_funcs; + adev->umc.ras = &umc_v6_7_ras; if (1 & adev->smuio.funcs->get_die_id(adev)) adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_first[0][0]; else @@ -1227,6 +1227,21 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) default: break; } + + if (adev->umc.ras) { + amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block); + + strcpy(adev->umc.ras->ras_block.name,"umc"); + adev->umc.ras->ras_block.block = AMDGPU_RAS_BLOCK__UMC; + + /* If don't define special ras_late_init function, use default ras_late_init */ + if (!adev->umc.ras->ras_block.ras_late_init) + adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init; + + /* If don't define special ras_fini function, use default ras_fini */ + if (!adev->umc.ras->ras_block.ras_fini) + adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; + } } static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 20b44983ac94..4776301972d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -465,10 +465,14 @@ static void umc_v6_1_err_cnt_init(struct amdgpu_device *adev) umc_v6_1_enable_umc_index_mode(adev); } -const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs = { - .err_cnt_init = umc_v6_1_err_cnt_init, - .ras_late_init = amdgpu_umc_ras_late_init, - .ras_fini = amdgpu_umc_ras_fini, +const struct amdgpu_ras_block_hw_ops umc_v6_1_ras_hw_ops = { .query_ras_error_count = umc_v6_1_query_ras_error_count, .query_ras_error_address = umc_v6_1_query_ras_error_address, }; + +struct amdgpu_umc_ras umc_v6_1_ras = { + .ras_block = { + .hw_ops = &umc_v6_1_ras_hw_ops, + }, + .err_cnt_init = umc_v6_1_err_cnt_init, +}; \ No newline at end of file diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h index 5dc36c730bb2..50c632eb4cc6 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.h @@ -45,7 +45,7 @@ /* umc ce count initial value */ #define UMC_V6_1_CE_CNT_INIT (UMC_V6_1_CE_CNT_MAX - UMC_V6_1_CE_INT_THRESHOLD) -extern const struct amdgpu_umc_ras_funcs umc_v6_1_ras_funcs; +extern struct amdgpu_umc_ras umc_v6_1_ras; extern const uint32_t umc_v6_1_channel_idx_tbl[UMC_V6_1_UMC_INSTANCE_NUM][UMC_V6_1_CHANNEL_INSTANCE_NUM]; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 6dd1e19e8d43..6953426f0bed 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -480,11 +480,15 @@ static bool umc_v6_7_query_ras_poison_mode(struct amdgpu_device *adev) return true; } -const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs = { - .ras_late_init = amdgpu_umc_ras_late_init, - .ras_fini = amdgpu_umc_ras_fini, +const struct amdgpu_ras_block_hw_ops umc_v6_7_ras_hw_ops = { .query_ras_error_count = umc_v6_7_query_ras_error_count, .query_ras_error_address = umc_v6_7_query_ras_error_address, +}; + +struct amdgpu_umc_ras umc_v6_7_ras = { + .ras_block = { + .hw_ops = &umc_v6_7_ras_hw_ops, + }, .query_ras_poison_mode = umc_v6_7_query_ras_poison_mode, .ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count, .ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h index 57f2557e7aca..1f2edf625370 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h @@ -43,7 +43,7 @@ #define UMC_V6_7_TOTAL_CHANNEL_NUM (UMC_V6_7_CHANNEL_INSTANCE_NUM * UMC_V6_7_UMC_INSTANCE_NUM) /* UMC regiser per channel offset */ #define UMC_V6_7_PER_CHANNEL_OFFSET 0x400 -extern const struct amdgpu_umc_ras_funcs umc_v6_7_ras_funcs; +extern struct amdgpu_umc_ras umc_v6_7_ras; extern const uint32_t umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; extern const uint32_t diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c index af59a35788e3..ff9e1fac616d 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c @@ -324,10 +324,14 @@ static void umc_v8_7_err_cnt_init(struct amdgpu_device *adev) } } -const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs = { - .err_cnt_init = umc_v8_7_err_cnt_init, - .ras_late_init = amdgpu_umc_ras_late_init, - .ras_fini = amdgpu_umc_ras_fini, +const struct amdgpu_ras_block_hw_ops umc_v8_7_ras_hw_ops = { .query_ras_error_count = umc_v8_7_query_ras_error_count, .query_ras_error_address = umc_v8_7_query_ras_error_address, }; + +struct amdgpu_umc_ras umc_v8_7_ras = { + .ras_block = { + .hw_ops = &umc_v8_7_ras_hw_ops, + }, + .err_cnt_init = umc_v8_7_err_cnt_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h index 37e6dc7c28e0..dd4993f5f78f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.h @@ -44,7 +44,7 @@ /* umc ce count initial value */ #define UMC_V8_7_CE_CNT_INIT (UMC_V8_7_CE_CNT_MAX - UMC_V8_7_CE_INT_THRESHOLD) -extern const struct amdgpu_umc_ras_funcs umc_v8_7_ras_funcs; +extern struct amdgpu_umc_ras umc_v8_7_ras; extern const uint32_t umc_v8_7_channel_idx_tbl[UMC_V8_7_UMC_INSTANCE_NUM][UMC_V8_7_CHANNEL_INSTANCE_NUM]; -- cgit v1.2.3 From 400013b268cb666a44c0827b136bfd4bb741b13d Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Wed, 19 Jan 2022 15:42:55 +0800 Subject: drm/amdgpu: add umc_fill_error_record to make code more simple Create common amdgpu_umc_fill_error_record function for all versions of UMC and clean up related codes. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 23 +++++---------------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 21 +++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 5 +++++ drivers/gpu/drm/amd/amdgpu/umc_v6_1.c | 18 +++-------------- drivers/gpu/drm/amd/amdgpu/umc_v6_7.c | 36 ++++++--------------------------- drivers/gpu/drm/amd/amdgpu/umc_v8_7.c | 36 ++++++--------------------------- 6 files changed, 46 insertions(+), 93 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d4e07d0acb66..e6324995fc54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -158,14 +158,9 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre } memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); - - err_rec.address = address; - err_rec.retired_page = address >> AMDGPU_GPU_PAGE_SHIFT; - err_rec.ts = (uint64_t)ktime_get_real_seconds(); - err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_data.err_addr = &err_rec; - err_data.err_addr_cnt = 1; + amdgpu_umc_fill_error_record(&err_data, address, + (address >> AMDGPU_GPU_PAGE_SHIFT), 0, 0); if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, @@ -2660,8 +2655,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d", umc_inst, ch_inst); - memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); - /* * Translate UMC channel address to Physical address */ @@ -2673,16 +2666,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb, ADDR_OF_256B_BLOCK(channel_index) | OFFSET_IN_256B_BLOCK(m->addr); - err_rec.address = m->addr; - err_rec.retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec.ts = (uint64_t)ktime_get_real_seconds(); - err_rec.err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec.cu = 0; - err_rec.mem_channel = channel_index; - err_rec.mcumc_id = umc_inst; - + memset(&err_rec, 0x0, sizeof(struct eeprom_table_record)); err_data.err_addr = &err_rec; - err_data.err_addr_cnt = 1; + amdgpu_umc_fill_error_record(&err_data, m->addr, + retired_page, channel_index, umc_inst); if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index b4c68c09e071..ff7805beda38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -218,3 +218,24 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); return 0; } + +void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, + uint64_t err_addr, + uint64_t retired_page, + uint32_t channel_index, + uint32_t umc_inst) +{ + struct eeprom_table_record *err_rec = + &err_data->err_addr[err_data->err_addr_cnt]; + + err_rec->address = err_addr; + /* page frame address is saved */ + err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + err_rec->ts = (uint64_t)ktime_get_real_seconds(); + err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; + err_rec->cu = 0; + err_rec->mem_channel = channel_index; + err_rec->mcumc_id = umc_inst; + + err_data->err_addr_cnt++; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 195740a6d97d..4db0526d0be4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -80,4 +80,9 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry); +void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, + uint64_t err_addr, + uint64_t retired_page, + uint32_t channel_index, + uint32_t umc_inst); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c index 4776301972d4..939cb203f7ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_1.c @@ -300,7 +300,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, { uint32_t lsb, mc_umc_status_addr; uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; - struct eeprom_table_record *err_rec; uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; if (adev->asic_type == CHIP_ARCTURUS) { @@ -328,8 +327,6 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, return; } - err_rec = &err_data->err_addr[err_data->err_addr_cnt]; - /* calculate error address if ue/ce error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || @@ -348,18 +345,9 @@ static void umc_v6_1_query_error_address(struct amdgpu_device *adev, /* we only save ue error information currently, ce is skipped */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - err_rec->address = err_addr; - /* page frame address is saved */ - err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec->ts = (uint64_t)ktime_get_real_seconds(); - err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec->cu = 0; - err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_inst; - - err_data->err_addr_cnt++; - } + == 1) + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); } /* clear umc status */ diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index f5a1ba7db75a..300dee9ec6b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -120,7 +120,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, uint32_t umc_inst) { uint64_t mc_umc_status, err_addr, retired_page; - struct eeprom_table_record *err_rec; uint32_t channel_index; uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -137,8 +136,6 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, if (!err_data->err_addr) return; - err_rec = &err_data->err_addr[err_data->err_addr_cnt]; - /* calculate error address if ue/ce error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || @@ -154,18 +151,9 @@ static void umc_v6_7_ecc_info_query_error_address(struct amdgpu_device *adev, /* we only save ue error information currently, ce is skipped */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - err_rec->address = err_addr; - /* page frame address is saved */ - err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec->ts = (uint64_t)ktime_get_real_seconds(); - err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec->cu = 0; - err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_inst; - - err_data->err_addr_cnt++; - } + == 1) + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); } } @@ -345,7 +333,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, { uint32_t mc_umc_status_addr; uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; - struct eeprom_table_record *err_rec; uint32_t channel_index; mc_umc_status_addr = @@ -364,8 +351,6 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, return; } - err_rec = &err_data->err_addr[err_data->err_addr_cnt]; - channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; @@ -384,18 +369,9 @@ static void umc_v6_7_query_error_address(struct amdgpu_device *adev, /* we only save ue error information currently, ce is skipped */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - err_rec->address = err_addr; - /* page frame address is saved */ - err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec->ts = (uint64_t)ktime_get_real_seconds(); - err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec->cu = 0; - err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_inst; - - err_data->err_addr_cnt++; - } + == 1) + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); } /* clear umc status */ diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c index d70417196662..de85a998ef99 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_7.c @@ -121,7 +121,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, uint32_t umc_inst) { uint64_t mc_umc_status, err_addr, retired_page; - struct eeprom_table_record *err_rec; uint32_t channel_index; uint32_t eccinfo_table_idx; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); @@ -138,8 +137,6 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, if (!err_data->err_addr) return; - err_rec = &err_data->err_addr[err_data->err_addr_cnt]; - /* calculate error address if ue/ce error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || @@ -155,18 +152,9 @@ static void umc_v8_7_ecc_info_query_error_address(struct amdgpu_device *adev, /* we only save ue error information currently, ce is skipped */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - err_rec->address = err_addr; - /* page frame address is saved */ - err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec->ts = (uint64_t)ktime_get_real_seconds(); - err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec->cu = 0; - err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_inst; - - err_data->err_addr_cnt++; - } + == 1) + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); } } @@ -344,7 +332,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, { uint32_t lsb, mc_umc_status_addr; uint64_t mc_umc_status, err_addr, retired_page, mc_umc_addrt0; - struct eeprom_table_record *err_rec; uint32_t channel_index = adev->umc.channel_idx_tbl[umc_inst * adev->umc.channel_inst_num + ch_inst]; mc_umc_status_addr = @@ -363,8 +350,6 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, return; } - err_rec = &err_data->err_addr[err_data->err_addr_cnt]; - /* calculate error address if ue/ce error is detected */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 && (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 || @@ -383,18 +368,9 @@ static void umc_v8_7_query_error_address(struct amdgpu_device *adev, /* we only save ue error information currently, ce is skipped */ if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) - == 1) { - err_rec->address = err_addr; - /* page frame address is saved */ - err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - err_rec->ts = (uint64_t)ktime_get_real_seconds(); - err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE; - err_rec->cu = 0; - err_rec->mem_channel = channel_index; - err_rec->mcumc_id = umc_inst; - - err_data->err_addr_cnt++; - } + == 1) + amdgpu_umc_fill_error_record(err_data, err_addr, + retired_page, channel_index, umc_inst); } /* clear umc status */ -- cgit v1.2.3 From a3ace75cdb6979e18ec9ad00862445ff71bb8a71 Mon Sep 17 00:00:00 2001 From: yipechai Date: Tue, 8 Feb 2022 11:05:49 +0800 Subject: drm/amdgpu: Optimize amdgpu_umc_ras_late_init/amdgpu_umc_ras_fini function code Optimize amdgpu_umc_ras_late_init/amdgpu_umc_ras_fini function code. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 44 +++++---------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 4 +++ drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 6 +++++ drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 6 +++++ 4 files changed, 22 insertions(+), 38 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index ff7805beda38..9f1406e1a48a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -129,7 +129,7 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev, return ret; } -static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, void *ras_error_status, struct amdgpu_iv_entry *entry) { @@ -139,36 +139,15 @@ static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info) { int r; - struct ras_fs_if fs_info = { - .sysfs_name = "umc_err_count", - }; - struct ras_ih_if ih_info = { - .cb = amdgpu_umc_process_ras_data_cb, - }; - if (!adev->umc.ras_if) { - adev->umc.ras_if = - kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); - if (!adev->umc.ras_if) - return -ENOMEM; - adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC; - adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; - adev->umc.ras_if->sub_block_index = 0; - } - ih_info.head = fs_info.head = *adev->umc.ras_if; - - r = amdgpu_ras_late_init(adev, adev->umc.ras_if, - &fs_info, &ih_info); + r = amdgpu_ras_block_late_init(adev, adev->umc.ras_if); if (r) - goto free; + return r; if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); if (r) goto late_fini; - } else { - r = 0; - goto free; } /* ras init of specific umc version */ @@ -179,26 +158,15 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info) return 0; late_fini: - amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info); -free: - kfree(adev->umc.ras_if); - adev->umc.ras_if = NULL; + amdgpu_ras_block_late_fini(adev, adev->umc.ras_if); return r; } void amdgpu_umc_ras_fini(struct amdgpu_device *adev) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && - adev->umc.ras_if) { - struct ras_common_if *ras_if = adev->umc.ras_if; - struct ras_ih_if ih_info = { - .head = *ras_if, - .cb = amdgpu_umc_process_ras_data_cb, - }; - - amdgpu_ras_late_fini(adev, ras_if, &ih_info); - kfree(ras_if); - } + adev->umc.ras_if) + amdgpu_ras_block_late_fini(adev, adev->umc.ras_if); } int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index 4db0526d0be4..ec15b3640399 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -85,4 +85,8 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data, uint64_t retired_page, uint32_t channel_index, uint32_t umc_inst); + +int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, + void *ras_error_status, + struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 80f6a29ed30c..2fc8761ede1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -674,6 +674,8 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; /* If don't define special ras_late_init function, use default ras_late_init */ if (!adev->umc.ras->ras_block.ras_late_init) @@ -682,6 +684,10 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) /* If don't define special ras_fini function, use default ras_fini */ if (!adev->umc.ras->ras_block.ras_fini) adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; } } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 15958fd45f64..94095b965e2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1234,6 +1234,8 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc"); adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC; + adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm; /* If don't define special ras_late_init function, use default ras_late_init */ if (!adev->umc.ras->ras_block.ras_late_init) @@ -1242,6 +1244,10 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) /* If don't define special ras_fini function, use default ras_fini */ if (!adev->umc.ras->ras_block.ras_fini) adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; + + /* If not defined special ras_cb function, use default ras_cb */ + if (!adev->umc.ras->ras_block.ras_cb) + adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb; } } -- cgit v1.2.3 From 4e9b1fa5a2757d11a5c40eed2b2b4837dcb2f12e Mon Sep 17 00:00:00 2001 From: yipechai Date: Mon, 14 Feb 2022 14:12:55 +0800 Subject: drm/amdgpu: Modify .ras_late_init function pointer parameter Modify .ras_late_init function pointer parameter so that it can remove redundant intermediate calls in some ras blocks. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 6 +++--- 15 files changed, 17 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index fe392108b5c2..b7470ed7bc25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -622,7 +622,7 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) return r; } -int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, void *ras_info) +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; r = amdgpu_ras_block_late_init(adev, adev->gfx.ras_if); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index f99eac544f6d..ccca0a85b982 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -386,7 +386,7 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value); -int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, void *ras_info); +int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c index 21a5f884dd2a..70a096160998 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -24,7 +24,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" -int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info) +int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { return amdgpu_ras_block_late_init(adev, adev->hdp.ras_if); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index 4af2c2a322e7..aabd59aa5213 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -43,6 +43,6 @@ struct amdgpu_hdp { struct amdgpu_hdp_ras *ras; }; -int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, void *ras_info); +int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); void amdgpu_hdp_ras_fini(struct amdgpu_device *adev); #endif /* __AMDGPU_HDP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c index 2bdb4d8b7955..ede98db8c126 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c @@ -24,7 +24,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" -int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info) +int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { return amdgpu_ras_block_late_init(adev, adev->mmhub.ras_if); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 7deda9a3b81e..75815106f2d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -47,7 +47,7 @@ struct amdgpu_mmhub { struct amdgpu_mmhub_ras *ras; }; -int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, void *ras_info); +int amdgpu_mmhub_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 89e61fdd3580..92fd4ffa7779 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -22,7 +22,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" -int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, void *ras_info) +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; r = amdgpu_ras_block_late_init(adev, adev->nbio.ras_if); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index 4afb76d3cd97..f9546c7341b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -104,6 +104,6 @@ struct amdgpu_nbio { struct amdgpu_nbio_ras *ras; }; -int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, void *ras_info); +int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); void amdgpu_nbio_ras_fini(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 5de567c6a8f7..837d1b79a9cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -490,7 +490,7 @@ struct amdgpu_ras_block_object { int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index); - int (*ras_late_init)(struct amdgpu_device *adev, void *ras_info); + int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block); void (*ras_fini)(struct amdgpu_device *adev); ras_ih_cb ras_cb; const struct amdgpu_ras_block_hw_ops *hw_ops; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 242a7b4dcad9..594454dba4c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -87,7 +87,7 @@ uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, } int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, - void *ras_ih_info) + struct ras_common_if *ras_block) { int r, i; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index eaee12ab6518..8b226ffee32c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -117,7 +117,7 @@ amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid); int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, - void *ras_ih_info); + struct ras_common_if *ras_block); void amdgpu_sdma_ras_fini(struct amdgpu_device *adev); int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 9f1406e1a48a..7abf9299e0d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -136,7 +136,7 @@ int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev, return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true); } -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info) +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index ec15b3640399..e4b3678a6685 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -72,7 +72,7 @@ struct amdgpu_umc { struct amdgpu_umc_ras *ras; }; -int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, void *ras_info); +int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); void amdgpu_umc_ras_fini(struct amdgpu_device *adev); int amdgpu_umc_poison_handler(struct amdgpu_device *adev, void *ras_error_status, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index a785b1e088cd..91f788f6f6b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -732,7 +732,7 @@ int amdgpu_xgmi_remove_device(struct amdgpu_device *adev) return psp_xgmi_terminate(&adev->psp); } -static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, void *ras_info) +static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (!adev->gmc.xgmi.supported || adev->gmc.xgmi.num_physical_nodes == 0) diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c index c442b34b9472..72ce19acb8bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c @@ -37,7 +37,7 @@ static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static int mca_v3_0_mp0_ras_late_init(struct amdgpu_device *adev, void *ras_info) +static int mca_v3_0_mp0_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { return amdgpu_mca_ras_late_init(adev, &adev->mca.mp0); } @@ -89,7 +89,7 @@ static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static int mca_v3_0_mp1_ras_late_init(struct amdgpu_device *adev, void *ras_info) +static int mca_v3_0_mp1_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { return amdgpu_mca_ras_late_init(adev, &adev->mca.mp1); } @@ -127,7 +127,7 @@ static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static int mca_v3_0_mpio_ras_late_init(struct amdgpu_device *adev, void *ras_info) +static int mca_v3_0_mpio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { return amdgpu_mca_ras_late_init(adev, &adev->mca.mpio); } -- cgit v1.2.3 From caae42f00924498e78da8a960561936aa7eba503 Mon Sep 17 00:00:00 2001 From: yipechai Date: Mon, 14 Feb 2022 14:38:02 +0800 Subject: drm/amdgpu: Optimize xxx_ras_late_init function of each ras block 1. Move calling ras block instance members from module internal function to the top calling xxx_ras_late_init. 2. Module internal function calls can only use parameter variables of xxx_ras_late_init instead of ras block instance members. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 +- 9 files changed, 18 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index b7470ed7bc25..52912b6bcb20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -625,11 +625,11 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - r = amdgpu_ras_block_late_init(adev, adev->gfx.ras_if); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) return r; - if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); @@ -640,7 +640,7 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r return 0; late_fini: - amdgpu_ras_block_late_fini(adev, adev->gfx.ras_if); + amdgpu_ras_block_late_fini(adev, ras_block); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index dca1dc5b993c..7bc68c94e0b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -452,7 +452,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) int r; if (adev->umc.ras && adev->umc.ras->ras_block.ras_late_init) { - r = adev->umc.ras->ras_block.ras_late_init(adev, NULL); + r = adev->umc.ras->ras_block.ras_late_init(adev, adev->umc.ras_if); if (r) return r; } @@ -464,7 +464,7 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) } if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_late_init) { - r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, NULL); + r = adev->gmc.xgmi.ras->ras_block.ras_late_init(adev, adev->gmc.xgmi.ras_if); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 92fd4ffa7779..f09ad80f0772 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -25,11 +25,11 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) { int r; - r = amdgpu_ras_block_late_init(adev, adev->nbio.ras_if); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) return r; - if (amdgpu_ras_is_supported(adev, adev->nbio.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { r = amdgpu_irq_get(adev, &adev->nbio.ras_controller_irq, 0); if (r) goto late_fini; @@ -40,7 +40,7 @@ int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if * return 0; late_fini: - amdgpu_ras_block_late_fini(adev, adev->nbio.ras_if); + amdgpu_ras_block_late_fini(adev, ras_block); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 594454dba4c1..3b5c43575aa3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -91,11 +91,11 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, { int r, i; - r = amdgpu_ras_block_late_init(adev, adev->sdma.ras_if); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) return r; - if (amdgpu_ras_is_supported(adev, adev->sdma.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { for (i = 0; i < adev->sdma.num_instances; i++) { r = amdgpu_irq_get(adev, &adev->sdma.ecc_irq, AMDGPU_SDMA_IRQ_INSTANCE0 + i); @@ -107,7 +107,7 @@ int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, return 0; late_fini: - amdgpu_ras_block_late_fini(adev, adev->sdma.ras_if); + amdgpu_ras_block_late_fini(adev, ras_block); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 7abf9299e0d7..9400260e3263 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -140,11 +140,11 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r { int r; - r = amdgpu_ras_block_late_init(adev, adev->umc.ras_if); + r = amdgpu_ras_block_late_init(adev, ras_block); if (r) return r; - if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) { + if (amdgpu_ras_is_supported(adev, ras_block->block)) { r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0); if (r) goto late_fini; @@ -158,7 +158,7 @@ int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *r return 0; late_fini: - amdgpu_ras_block_late_fini(adev, adev->umc.ras_if); + amdgpu_ras_block_late_fini(adev, ras_block); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 91f788f6f6b5..77b65434ccc2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -740,7 +740,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_comm adev->gmc.xgmi.ras->ras_block.hw_ops->reset_ras_error_count(adev); - return amdgpu_ras_block_late_init(adev, adev->gmc.xgmi.ras_if); + return amdgpu_ras_block_late_init(adev, ras_block); } static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 7e57b90d5b50..bb40ab83fc22 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4792,7 +4792,7 @@ static int gfx_v9_0_ecc_late_init(void *handle) return r; if (adev->gfx.ras && adev->gfx.ras->ras_block.ras_late_init) { - r = adev->gfx.ras->ras_block.ras_late_init(adev, NULL); + r = adev->gfx.ras->ras_block.ras_late_init(adev, adev->gfx.ras_if); if (r) return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 82f6b31a1904..af5a1c93861b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1895,7 +1895,7 @@ static int sdma_v4_0_late_init(void *handle) } if (adev->sdma.ras && adev->sdma.ras->ras_block.ras_late_init) - return adev->sdma.ras->ras_block.ras_late_init(adev, NULL); + return adev->sdma.ras->ras_block.ras_late_init(adev, adev->sdma.ras_if); else return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index a216e625c89c..99a88f4d8050 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1195,7 +1195,7 @@ static int soc15_common_late_init(void *handle) xgpu_ai_mailbox_get_irq(adev); if (adev->nbio.ras && adev->nbio.ras->ras_block.ras_late_init) - r = adev->nbio.ras->ras_block.ras_late_init(adev, NULL); + r = adev->nbio.ras->ras_block.ras_late_init(adev, adev->nbio.ras_if); return r; } -- cgit v1.2.3 From 01d468d9a420152e4a1270992e69a37ea0c98e04 Mon Sep 17 00:00:00 2001 From: yipechai Date: Thu, 17 Feb 2022 14:52:20 +0800 Subject: drm/amdgpu: Modify .ras_fini function pointer parameter Modify .ras_fini function pointer parameter so that we can remove redundant intermediate calls in some ras blocks. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 2 +- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/mca_v3_0.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 +- 19 files changed, 24 insertions(+), 24 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 52912b6bcb20..d020c4599433 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -644,7 +644,7 @@ late_fini: return r; } -void amdgpu_gfx_ras_fini(struct amdgpu_device *adev) +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && adev->gfx.ras_if) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index ccca0a85b982..f7c50ab4589c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -387,7 +387,7 @@ bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value); int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); +void amdgpu_gfx_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 78498d1d1769..350fbfec1e03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -455,16 +455,16 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) { if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini) - adev->umc.ras->ras_block.ras_fini(adev); + adev->umc.ras->ras_block.ras_fini(adev, NULL); if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini) - adev->mmhub.ras->ras_block.ras_fini(adev); + adev->mmhub.ras->ras_block.ras_fini(adev, NULL); if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini) - adev->gmc.xgmi.ras->ras_block.ras_fini(adev); + adev->gmc.xgmi.ras->ras_block.ras_fini(adev, NULL); if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_fini) - adev->hdp.ras->ras_block.ras_fini(adev); + adev->hdp.ras->ras_block.ras_fini(adev, NULL); } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c index b7fbc114a175..0f224e21cd55 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -24,7 +24,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" -void amdgpu_hdp_ras_fini(struct amdgpu_device *adev) +void amdgpu_hdp_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) && adev->hdp.ras_if) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index aabd59aa5213..c05cd992ef8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -44,5 +44,5 @@ struct amdgpu_hdp { }; int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_hdp_ras_fini(struct amdgpu_device *adev); +void amdgpu_hdp_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block); #endif /* __AMDGPU_HDP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c index 42413813765a..6dfcedcc37fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c @@ -24,7 +24,7 @@ #include "amdgpu.h" #include "amdgpu_ras.h" -void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev) +void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && adev->mmhub.ras_if) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 240b26d9a388..253f047379cf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -47,6 +47,6 @@ struct amdgpu_mmhub { struct amdgpu_mmhub_ras *ras; }; -void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev); +void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index f09ad80f0772..0de2fdf31eed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -44,7 +44,7 @@ late_fini: return r; } -void amdgpu_nbio_ras_fini(struct amdgpu_device *adev) +void amdgpu_nbio_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) && adev->nbio.ras_if) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index f9546c7341b8..3222e1cae134 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -105,5 +105,5 @@ struct amdgpu_nbio { }; int amdgpu_nbio_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_nbio_ras_fini(struct amdgpu_device *adev); +void amdgpu_nbio_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 143a83043d7c..7cddaad90d6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -491,7 +491,7 @@ struct amdgpu_ras_block_object { int (*ras_block_match)(struct amdgpu_ras_block_object *block_obj, enum amdgpu_ras_block block, uint32_t sub_block_index); int (*ras_late_init)(struct amdgpu_device *adev, struct ras_common_if *ras_block); - void (*ras_fini)(struct amdgpu_device *adev); + void (*ras_fini)(struct amdgpu_device *adev, struct ras_common_if *ras_block); ras_ih_cb ras_cb; const struct amdgpu_ras_block_hw_ops *hw_ops; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 3b5c43575aa3..863035a94bd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -111,7 +111,7 @@ late_fini: return r; } -void amdgpu_sdma_ras_fini(struct amdgpu_device *adev) +void amdgpu_sdma_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && adev->sdma.ras_if) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 8b226ffee32c..34ec60dfe5e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -118,7 +118,7 @@ int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid); int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_sdma_ras_fini(struct amdgpu_device *adev); +void amdgpu_sdma_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, void *err_data, struct amdgpu_iv_entry *entry); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 9400260e3263..41e976733c57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -162,7 +162,7 @@ late_fini: return r; } -void amdgpu_umc_ras_fini(struct amdgpu_device *adev) +void amdgpu_umc_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && adev->umc.ras_if) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index e4b3678a6685..c8deba8dacb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -73,7 +73,7 @@ struct amdgpu_umc { }; int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_umc_ras_fini(struct amdgpu_device *adev); +void amdgpu_umc_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_umc_poison_handler(struct amdgpu_device *adev, void *ras_error_status, bool reset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 91817a31f3e1..3e56adea84a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -768,7 +768,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_comm return amdgpu_ras_block_late_init(adev, ras_block); } -static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev) +static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) && adev->gmc.xgmi.ras_if) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 1997f129db9c..3ecb238c6483 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2433,7 +2433,7 @@ static int gfx_v9_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->gfx.ras && adev->gfx.ras->ras_block.ras_fini) - adev->gfx.ras->ras_block.ras_fini(adev); + adev->gfx.ras->ras_block.ras_fini(adev, NULL); for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c index b4b36899f5c6..02c50be19d3b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mca_v3_0.c @@ -37,7 +37,7 @@ static void mca_v3_0_mp0_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev) +static void mca_v3_0_mp0_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { amdgpu_mca_ras_fini(adev, &adev->mca.mp0); } @@ -83,7 +83,7 @@ static void mca_v3_0_mp1_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev) +static void mca_v3_0_mp1_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { amdgpu_mca_ras_fini(adev, &adev->mca.mp1); } @@ -115,7 +115,7 @@ static void mca_v3_0_mpio_query_ras_error_count(struct amdgpu_device *adev, ras_error_status); } -static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev) +static void mca_v3_0_mpio_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { amdgpu_mca_ras_fini(adev, &adev->mca.mpio); } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index e26c39fcd336..d0a9012e53d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1997,7 +1997,7 @@ static int sdma_v4_0_sw_fini(void *handle) if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && adev->sdma.ras->ras_block.ras_fini) - adev->sdma.ras->ras_block.ras_fini(adev); + adev->sdma.ras->ras_block.ras_fini(adev, NULL); for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index b7c24149a6bd..34cd5cad7da5 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1215,7 +1215,7 @@ static int soc15_common_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->nbio.ras && adev->nbio.ras->ras_block.ras_fini) - adev->nbio.ras->ras_block.ras_fini(adev); + adev->nbio.ras->ras_block.ras_fini(adev, NULL); if (adev->df.funcs && adev->df.funcs->sw_fini) -- cgit v1.2.3 From 667c7091a39e8b360d34f37aed5f8dd85bdc45f7 Mon Sep 17 00:00:00 2001 From: yipechai Date: Thu, 17 Feb 2022 14:55:19 +0800 Subject: drm/amdgpu: Optimize xxx_ras_fini function of each ras block 1. Move the variables of ras block instance members from specific xxx_ras_fini to general ras_fini call. 2. Function calls inside the modules only use parameters passed from xxx_ras_fini instead of ras block instance members. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 8 ++++---- drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/soc15.c | 2 +- 11 files changed, 21 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index d020c4599433..40f7e29aa9ca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -647,8 +647,8 @@ late_fini: void amdgpu_gfx_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX) && - adev->gfx.ras_if) - amdgpu_ras_block_late_fini(adev, adev->gfx.ras_if); + ras_block) + amdgpu_ras_block_late_fini(adev, ras_block); } int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 350fbfec1e03..62083d3aa9b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -455,16 +455,16 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) { if (adev->umc.ras && adev->umc.ras->ras_block.ras_fini) - adev->umc.ras->ras_block.ras_fini(adev, NULL); + adev->umc.ras->ras_block.ras_fini(adev, adev->umc.ras_if); if (adev->mmhub.ras && adev->mmhub.ras->ras_block.ras_fini) - adev->mmhub.ras->ras_block.ras_fini(adev, NULL); + adev->mmhub.ras->ras_block.ras_fini(adev, adev->mmhub.ras_if); if (adev->gmc.xgmi.ras && adev->gmc.xgmi.ras->ras_block.ras_fini) - adev->gmc.xgmi.ras->ras_block.ras_fini(adev, NULL); + adev->gmc.xgmi.ras->ras_block.ras_fini(adev, adev->gmc.xgmi.ras_if); if (adev->hdp.ras && adev->hdp.ras->ras_block.ras_fini) - adev->hdp.ras->ras_block.ras_fini(adev, NULL); + adev->hdp.ras->ras_block.ras_fini(adev, adev->hdp.ras_if); } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c index 0f224e21cd55..5595f903c17a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -27,6 +27,6 @@ void amdgpu_hdp_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) && - adev->hdp.ras_if) - amdgpu_ras_block_late_fini(adev, adev->hdp.ras_if); + ras_block) + amdgpu_ras_block_late_fini(adev, ras_block); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c index 6dfcedcc37fd..e7c3b8fff868 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.c @@ -27,6 +27,6 @@ void amdgpu_mmhub_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB) && - adev->mmhub.ras_if) - amdgpu_ras_block_late_fini(adev, adev->mmhub.ras_if); + ras_block) + amdgpu_ras_block_late_fini(adev, ras_block); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c index 0de2fdf31eed..54a5a15272c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.c @@ -47,6 +47,6 @@ late_fini: void amdgpu_nbio_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__PCIE_BIF) && - adev->nbio.ras_if) - amdgpu_ras_block_late_fini(adev, adev->nbio.ras_if); + ras_block) + amdgpu_ras_block_late_fini(adev, ras_block); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 863035a94bd8..1df8de84386d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -114,8 +114,8 @@ late_fini: void amdgpu_sdma_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA) && - adev->sdma.ras_if) - amdgpu_ras_block_late_fini(adev, adev->sdma.ras_if); + ras_block) + amdgpu_ras_block_late_fini(adev, ras_block); } int amdgpu_sdma_process_ras_data_cb(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 41e976733c57..2623a2d30703 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -165,8 +165,8 @@ late_fini: void amdgpu_umc_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && - adev->umc.ras_if) - amdgpu_ras_block_late_fini(adev, adev->umc.ras_if); + ras_block) + amdgpu_ras_block_late_fini(adev, ras_block); } int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 3e56adea84a3..a54257760f9e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -771,8 +771,8 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_comm static void amdgpu_xgmi_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) { if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL) && - adev->gmc.xgmi.ras_if) - amdgpu_ras_block_late_fini(adev, adev->gmc.xgmi.ras_if); + ras_block) + amdgpu_ras_block_late_fini(adev, ras_block); } uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3ecb238c6483..e8446967a4d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2433,7 +2433,7 @@ static int gfx_v9_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->gfx.ras && adev->gfx.ras->ras_block.ras_fini) - adev->gfx.ras->ras_block.ras_fini(adev, NULL); + adev->gfx.ras->ras_block.ras_fini(adev, adev->gfx.ras_if); for (i = 0; i < adev->gfx.num_gfx_rings; i++) amdgpu_ring_fini(&adev->gfx.gfx_ring[i]); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index d0a9012e53d7..222d25a0413a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1997,7 +1997,7 @@ static int sdma_v4_0_sw_fini(void *handle) if (adev->sdma.ras && adev->sdma.ras->ras_block.hw_ops && adev->sdma.ras->ras_block.ras_fini) - adev->sdma.ras->ras_block.ras_fini(adev, NULL); + adev->sdma.ras->ras_block.ras_fini(adev, adev->sdma.ras_if); for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 34cd5cad7da5..0631ebd39db1 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -1215,7 +1215,7 @@ static int soc15_common_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; if (adev->nbio.ras && adev->nbio.ras->ras_block.ras_fini) - adev->nbio.ras->ras_block.ras_fini(adev, NULL); + adev->nbio.ras->ras_block.ras_fini(adev, adev->nbio.ras_if); if (adev->df.funcs && adev->df.funcs->sw_fini) -- cgit v1.2.3 From 0dca257d6dc5526c4c293f306b7b47765987de22 Mon Sep 17 00:00:00 2001 From: yipechai Date: Mon, 14 Feb 2022 16:07:55 +0800 Subject: drm/amdgpu: Remove redundant calls of amdgpu_ras_block_late_fini in umc ras block Remove redundant calls of amdgpu_ras_block_late_fini in umc ras block. Signed-off-by: yipechai Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 7 ------- drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h | 1 - drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c | 2 +- drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 2 +- 4 files changed, 2 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 2623a2d30703..85da6cbaf3b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -162,13 +162,6 @@ late_fini: return r; } -void amdgpu_umc_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block) -{ - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) && - ras_block) - amdgpu_ras_block_late_fini(adev, ras_block); -} - int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index c8deba8dacb5..2ec6698aa1fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -73,7 +73,6 @@ struct amdgpu_umc { }; int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -void amdgpu_umc_ras_fini(struct amdgpu_device *adev, struct ras_common_if *ras_block); int amdgpu_umc_poison_handler(struct amdgpu_device *adev, void *ras_error_status, bool reset); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 3dcd82b49481..d4ffe08f9fb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -685,7 +685,7 @@ static void gmc_v10_0_set_umc_funcs(struct amdgpu_device *adev) /* If don't define special ras_fini function, use default ras_fini */ if (!adev->umc.ras->ras_block.ras_fini) - adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; + adev->umc.ras->ras_block.ras_fini = amdgpu_ras_block_late_fini; /* If not defined special ras_cb function, use default ras_cb */ if (!adev->umc.ras->ras_block.ras_cb) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 6beea09e5fdd..1b7a59e427e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1245,7 +1245,7 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) /* If don't define special ras_fini function, use default ras_fini */ if (!adev->umc.ras->ras_block.ras_fini) - adev->umc.ras->ras_block.ras_fini = amdgpu_umc_ras_fini; + adev->umc.ras->ras_block.ras_fini = amdgpu_ras_block_late_fini; /* If not defined special ras_cb function, use default ras_cb */ if (!adev->umc.ras->ras_block.ras_cb) -- cgit v1.2.3 From 69691c823531c36c7283ecaa040e99e9c12ece07 Mon Sep 17 00:00:00 2001 From: "Stanley.Yang" Date: Thu, 3 Mar 2022 17:56:33 +0800 Subject: drm/amdgpu: message smu to update bad channel info It should notice SMU to update bad channel info when detected uncorrectable error in UMC block Signed-off-by: Stanley.Yang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 7 +++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c | 25 +++++++++++++++++++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c | 5 +++++ 5 files changed, 42 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index d78c2970e558..424c22a841f4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -2068,6 +2068,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) mutex_init(&con->recovery_lock); INIT_WORK(&con->recovery_work, amdgpu_ras_do_recovery); atomic_set(&con->in_recovery, 0); + con->eeprom_control.bad_channel_bitmap = 0; max_eeprom_records_count = amdgpu_ras_eeprom_max_record_count(); amdgpu_ras_validate_threshold(adev, max_eeprom_records_count); @@ -2092,6 +2093,11 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) goto free; amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); + + if (con->update_channel_flag == true) { + amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap); + con->update_channel_flag = false; + } } #ifdef CONFIG_X86_MCE_AMD @@ -2285,6 +2291,7 @@ int amdgpu_ras_init(struct amdgpu_device *adev) goto release_con; } + con->update_channel_flag = false; con->features = 0; INIT_LIST_HEAD(&con->head); /* Might need get this flag from vbios. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 7cddaad90d6d..9314fde81e68 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -374,6 +374,9 @@ struct amdgpu_ras { /* record umc error info queried from smu */ struct umc_ecc_info umc_ecc; + + /* Indicates smu whether need update bad channel info */ + bool update_channel_flag; }; struct ras_fs_data { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index a44f2eeed6ef..c4283987bb1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -267,6 +267,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) { struct amdgpu_device *adev = to_amdgpu_device(control); struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); u8 csum; int res; @@ -287,6 +288,10 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs); + control->bad_channel_bitmap = 0; + amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap); + con->update_channel_flag = false; + amdgpu_ras_debugfs_set_ret_size(control); mutex_unlock(&control->ras_tbl_mutex); @@ -420,6 +425,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, struct eeprom_table_record *record, const u32 num) { + struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control)); u32 a, b, i; u8 *buf, *pp; int res; @@ -431,9 +437,16 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, /* Encode all of them in one go. */ pp = buf; - for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) { __encode_table_record_to_buf(control, &record[i], pp); + /* update bad channel bitmap */ + if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { + control->bad_channel_bitmap |= 1 << record[i].mem_channel; + con->update_channel_flag = true; + } + } + /* a, first record index to write into. * b, last record index to write into. * a = first index to read (fri) + number of records in the table, @@ -686,6 +699,7 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, const u32 num) { struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); int i, res; u8 *buf, *pp; u32 g0, g1; @@ -753,8 +767,15 @@ int amdgpu_ras_eeprom_read(struct amdgpu_ras_eeprom_control *control, /* Read up everything? Then transform. */ pp = buf; - for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) + for (i = 0; i < num; i++, pp += RAS_TABLE_RECORD_SIZE) { __decode_table_record_from_buf(control, &record[i], pp); + + /* update bad channel bitmap */ + if (!(control->bad_channel_bitmap & (1 << record[i].mem_channel))) { + control->bad_channel_bitmap |= 1 << record[i].mem_channel; + con->update_channel_flag = true; + } + } Out: kfree(buf); mutex_unlock(&control->ras_tbl_mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index 6bb00578bfbb..54d9bfe0881d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -80,6 +80,10 @@ struct amdgpu_ras_eeprom_control { /* Protect table access via this mutex. */ struct mutex ras_tbl_mutex; + + /* Record channel info which occurred bad pages + */ + u32 bad_channel_bitmap; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 85da6cbaf3b7..aad3c8b4c810 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -97,6 +97,11 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev, amdgpu_ras_save_bad_pages(adev); amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); + + if (con->update_channel_flag == true) { + amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap); + con->update_channel_flag = false; + } } if (reset) -- cgit v1.2.3