summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-03-24 16:19:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2022-03-24 16:19:43 -0700
commitb14ffae378aa1db993e62b01392e70d1e585fb23 (patch)
tree0ac179d24e8a62ec4c2732ed18d90d83da4b82d7 /drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
parent52deda9551a01879b3562e7b41748e85c591f14c (diff)
parentc6e90a1c660874736bd09c1fec6312b4b4c2ff7b (diff)
Merge tag 'drm-next-2022-03-24' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "Lots of work all over, Intel improving DG2 support, amdkfd CRIU support, msm new hw support, and faster fbdev support. dma-buf: - rename dma-buf-map to iosys-map core: - move buddy allocator to core - add pci/platform init macros - improve EDID parser deep color handling - EDID timing type 7 support - add GPD Win Max quirk - add yes/no helpers to string_helpers - flatten syncobj chains - add nomodeset support to lots of drivers - improve fb-helper clipping support - add default property value interface fbdev: - improve fbdev ops speed ttm: - add a backpointer from ttm bo->ttm resource dp: - move displayport headers - add a dp helper module bridge: - anx7625 atomic support, HDCP support panel: - split out panel-lvds and lvds bindings - find panels in OF subnodes privacy: - add chromeos privacy screen support fb: - hot unplug fw fb on forced removal simpledrm: - request region instead of marking ioresource busy - add panel oreintation property udmabuf: - fix oops with 0 pages amdgpu: - power management code cleanup - Enable freesync video mode by default - RAS code cleanup - Improve VRAM access for debug using SDMA - SR-IOV rework special register access and fixes - profiling power state request ioctl - expose IP discovery via sysfs - Cyan skillfish updates - GC 10.3.7, SDMA 5.2.7, DCN 3.1.6 updates - expose benchmark tests via debugfs - add module param to disable XGMI for testing - GPU reset debugfs register dumping support amdkfd: - CRIU support - SDMA queue fixes radeon: - UVD suspend fix - iMac backlight fix i915: - minimal parallel submission for execlists - DG2-G12 subplatform added - DG2 programming workarounds - DG2 accelerated migration support - flat CCS and CCS engine support for XeHP - initial small BAR support - drop fake LMEM support - ADL-N PCH support - bigjoiner updates - introduce VMA resources and async unbinding - register definitions cleanups - multi-FBC refactoring - DG1 OPROM over SPI support - ADL-N platform enabling - opregion mailbox #5 support - DP MST ESI improvements - drm device based logging - async flip optimisation for DG2 - CPU arch abstraction fixes - improve GuC ADS init to work on aarch64 - tweak TTM LRU priority hint - GuC 69.0.3 support - remove short term execbuf pins nouveau: - higher DP/eDP bitrates - backlight fixes msm: - dpu + dp support for sc8180x - dp support for sm8350 - dpu + dsi support for qcm2290 - 10nm dsi phy tuning support - bridge support for dp encoder - gpu support for additional 7c3 SKUs ingenic: - HDMI support for JZ4780 - aux channel EDID support ast: - AST2600 support - add wide screen support - create DP/DVI connectors omapdrm: - fix implicit dma_buf fencing vc4: - add CSC + full range support - better display firmware handoff panfrost: - add initial dual-core GPU support stm: - new revision support - fb handover support mediatek: - transfer display binding document to yaml format. - add mt8195 display device binding. - allow commands to be sent during video mode. - add wait_for_event for crtc disable by cmdq. tegra: - YUV format support rcar-du: - LVDS support for M3-W+ (R8A77961) exynos: - BGR pixel format for FIMD device" * tag 'drm-next-2022-03-24' of git://anongit.freedesktop.org/drm/drm: (1529 commits) drm/i915/display: Do not re-enable PSR after it was marked as not reliable drm/i915/display: Fix HPD short pulse handling for eDP drm/amdgpu: Use drm_mode_copy() drm/radeon: Use drm_mode_copy() drm/amdgpu: Use ternary operator in `vcn_v1_0_start()` drm/amdgpu: Remove pointless on stack mode copies drm/amd/pm: fix indenting in __smu_cmn_reg_print_error() drm/amdgpu/dc: fix typos in comments drm/amdgpu: fix typos in comments drm/amd/pm: fix typos in comments drm/amdgpu: Add stolen reserved memory for MI25 SRIOV. drm/amdgpu: Merge get_reserved_allocation to get_vbios_allocations. drm/amdkfd: evict svm bo worker handle error drm/amdgpu/vcn: fix vcn ring test failure in igt reload test drm/amdgpu: only allow secure submission on rings which support that drm/amdgpu: fixed the warnings reported by kernel test robot drm/amd/display: 3.2.177 drm/amd/display: [FW Promotion] Release 0.0.108.0 drm/amd/display: Add save/restore PANEL_PWRSEQ_REF_DIV2 drm/amd/display: Wait for hubp read line for Pollock ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c114
1 files changed, 50 insertions, 64 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index 46264a4002f78..aad3c8b4c8102 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -21,7 +21,7 @@
*
*/
-#include "amdgpu_ras.h"
+#include "amdgpu.h"
static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
void *ras_error_status,
@@ -33,14 +33,14 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
int ret = 0;
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- ret = smu_get_ecc_info(&adev->smu, (void *)&(con->umc_ecc));
+ ret = amdgpu_dpm_get_ecc_info(adev, (void *)&(con->umc_ecc));
if (ret == -EOPNOTSUPP) {
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_count)
- adev->umc.ras_funcs->query_ras_error_count(adev, ras_error_status);
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count)
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_count(adev, ras_error_status);
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->query_ras_error_address &&
+ if (adev->umc.ras && adev->umc.ras->ras_block.hw_ops &&
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) {
err_data->err_addr =
kcalloc(adev->umc.max_ras_err_cnt_per_query,
@@ -56,15 +56,15 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
- adev->umc.ras_funcs->query_ras_error_address(adev, ras_error_status);
+ adev->umc.ras->ras_block.hw_ops->query_ras_error_address(adev, ras_error_status);
}
} else if (!ret) {
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->ecc_info_query_ras_error_count)
- adev->umc.ras_funcs->ecc_info_query_ras_error_count(adev, ras_error_status);
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_count)
+ adev->umc.ras->ecc_info_query_ras_error_count(adev, ras_error_status);
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->ecc_info_query_ras_error_address &&
+ if (adev->umc.ras &&
+ adev->umc.ras->ecc_info_query_ras_error_address &&
adev->umc.max_ras_err_cnt_per_query) {
err_data->err_addr =
kcalloc(adev->umc.max_ras_err_cnt_per_query,
@@ -80,7 +80,7 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
/* umc query_ras_error_address is also responsible for clearing
* error status
*/
- adev->umc.ras_funcs->ecc_info_query_ras_error_address(adev, ras_error_status);
+ adev->umc.ras->ecc_info_query_ras_error_address(adev, ras_error_status);
}
}
@@ -96,8 +96,12 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
err_data->err_addr_cnt);
amdgpu_ras_save_bad_pages(adev);
- if (adev->smu.ppt_funcs && adev->smu.ppt_funcs->send_hbm_bad_pages_num)
- adev->smu.ppt_funcs->send_hbm_bad_pages_num(&adev->smu, con->eeprom_control.ras_num_recs);
+ amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs);
+
+ if (con->update_channel_flag == true) {
+ amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap);
+ con->update_channel_flag = false;
+ }
}
if (reset)
@@ -130,78 +134,39 @@ int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
return ret;
}
-static int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
+int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
void *ras_error_status,
struct amdgpu_iv_entry *entry)
{
return amdgpu_umc_do_page_retirement(adev, ras_error_status, entry, true);
}
-int amdgpu_umc_ras_late_init(struct amdgpu_device *adev)
+int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
int r;
- struct ras_fs_if fs_info = {
- .sysfs_name = "umc_err_count",
- };
- struct ras_ih_if ih_info = {
- .cb = amdgpu_umc_process_ras_data_cb,
- };
- if (!adev->umc.ras_if) {
- adev->umc.ras_if =
- kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
- if (!adev->umc.ras_if)
- return -ENOMEM;
- adev->umc.ras_if->block = AMDGPU_RAS_BLOCK__UMC;
- adev->umc.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
- adev->umc.ras_if->sub_block_index = 0;
- }
- ih_info.head = fs_info.head = *adev->umc.ras_if;
-
- r = amdgpu_ras_late_init(adev, adev->umc.ras_if,
- &fs_info, &ih_info);
+ r = amdgpu_ras_block_late_init(adev, ras_block);
if (r)
- goto free;
+ return r;
- if (amdgpu_ras_is_supported(adev, adev->umc.ras_if->block)) {
+ if (amdgpu_ras_is_supported(adev, ras_block->block)) {
r = amdgpu_irq_get(adev, &adev->gmc.ecc_irq, 0);
if (r)
goto late_fini;
- } else {
- r = 0;
- goto free;
}
/* ras init of specific umc version */
- if (adev->umc.ras_funcs &&
- adev->umc.ras_funcs->err_cnt_init)
- adev->umc.ras_funcs->err_cnt_init(adev);
+ if (adev->umc.ras &&
+ adev->umc.ras->err_cnt_init)
+ adev->umc.ras->err_cnt_init(adev);
return 0;
late_fini:
- amdgpu_ras_late_fini(adev, adev->umc.ras_if, &ih_info);
-free:
- kfree(adev->umc.ras_if);
- adev->umc.ras_if = NULL;
+ amdgpu_ras_block_late_fini(adev, ras_block);
return r;
}
-void amdgpu_umc_ras_fini(struct amdgpu_device *adev)
-{
- if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC) &&
- adev->umc.ras_if) {
- struct ras_common_if *ras_if = adev->umc.ras_if;
- struct ras_ih_if ih_info = {
- .head = *ras_if,
- .cb = amdgpu_umc_process_ras_data_cb,
- };
-
- amdgpu_ras_late_fini(adev, ras_if, &ih_info);
- kfree(ras_if);
- }
-}
-
int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
@@ -219,3 +184,24 @@ int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
+
+void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
+ uint64_t err_addr,
+ uint64_t retired_page,
+ uint32_t channel_index,
+ uint32_t umc_inst)
+{
+ struct eeprom_table_record *err_rec =
+ &err_data->err_addr[err_data->err_addr_cnt];
+
+ err_rec->address = err_addr;
+ /* page frame address is saved */
+ err_rec->retired_page = retired_page >> AMDGPU_GPU_PAGE_SHIFT;
+ err_rec->ts = (uint64_t)ktime_get_real_seconds();
+ err_rec->err_type = AMDGPU_RAS_EEPROM_ERR_NON_RECOVERABLE;
+ err_rec->cu = 0;
+ err_rec->mem_channel = channel_index;
+ err_rec->mcumc_id = umc_inst;
+
+ err_data->err_addr_cnt++;
+}