diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
239 files changed, 13785 insertions, 6208 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 41fa3377d9cf..1a11cab741ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -26,6 +26,7 @@ config DRM_AMDGPU select DRM_BUDDY select DRM_SUBALLOC_HELPER select DRM_EXEC + select DRM_PANEL_BACKLIGHT_QUIRKS # amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work # ACPI_VIDEO's dependencies must also be selected. select INPUT if ACPI diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index c7b18c52825d..aacc810cabb3 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -1,5 +1,5 @@ # -# Copyright 2017 Advanced Micro Devices, Inc. +# Copyright 2017-2024 Advanced Micro Devices, Inc. All rights reserved. # # Permission is hereby granted, free of charge, to any person obtaining a # copy of this software and associated documentation files (the "Software"), @@ -65,7 +65,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ amdgpu_fw_attestation.o amdgpu_securedisplay.o \ amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ - amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o + amdgpu_ring_mux.o amdgpu_xcp.o amdgpu_seq64.o amdgpu_aca.o amdgpu_dev_coredump.o \ + amdgpu_cper.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -105,7 +106,7 @@ amdgpu-y += \ # add UMC block amdgpu-y += \ - umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o + umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o umc_v12_0.o umc_v8_14.o # add IH block amdgpu-y += \ @@ -200,6 +201,7 @@ amdgpu-y += \ vcn_v4_0_3.o \ vcn_v4_0_5.o \ vcn_v5_0_0.o \ + vcn_v5_0_1.o \ amdgpu_jpeg.o \ jpeg_v1_0.o \ jpeg_v2_0.o \ @@ -208,7 +210,8 @@ amdgpu-y += \ jpeg_v4_0.o \ jpeg_v4_0_3.o \ jpeg_v4_0_5.o \ - jpeg_v5_0_0.o + jpeg_v5_0_0.o \ + jpeg_v5_0_1.o # add VPE block amdgpu-y += \ diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index f44de9d4b6a1..e13fbd974141 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -334,6 +334,8 @@ aldebaran_mode2_restore_hwcontext(struct amdgpu_reset_control *reset_ctl, AMDGPU_INIT_LEVEL_RESET_RECOVERY); dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); + /*TBD: Ideally should clear only GFX, SDMA blocks*/ + amdgpu_ras_clear_err_state(tmp_adev); r = aldebaran_mode2_restore_ip(tmp_adev); if (r) goto end; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 4653a8d2823a..6d83ccfa42ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -109,6 +109,7 @@ #include "amdgpu_mca.h" #include "amdgpu_aca.h" #include "amdgpu_ras.h" +#include "amdgpu_cper.h" #include "amdgpu_xcp.h" #include "amdgpu_seq64.h" #include "amdgpu_reg_state.h" @@ -415,6 +416,7 @@ bool amdgpu_get_bios(struct amdgpu_device *adev); bool amdgpu_read_bios(struct amdgpu_device *adev); bool amdgpu_soc15_read_bios_from_rom(struct amdgpu_device *adev, u8 *bios, u32 length_bytes); +void amdgpu_bios_release(struct amdgpu_device *adev); /* * Clocks */ @@ -880,6 +882,7 @@ struct amdgpu_device { bool need_swiotlb; bool accel_working; struct notifier_block acpi_nb; + struct notifier_block pm_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct debugfs_blob_wrapper debugfs_vbios_blob; struct debugfs_blob_wrapper debugfs_discovery_blob; @@ -1089,6 +1092,9 @@ struct amdgpu_device { /* ACA */ struct amdgpu_aca aca; + /* CPER */ + struct amdgpu_cper cper; + struct amdgpu_ip_block ip_blocks[AMDGPU_MAX_IP_NUM]; uint32_t harvest_ip_mask; int num_ip_blocks; @@ -1148,6 +1154,7 @@ struct amdgpu_device { struct ratelimit_state throttling_logging_rs; uint32_t ras_hw_enabled; uint32_t ras_enabled; + bool ras_default_ecc_enabled; bool no_hw_access; struct pci_saved_state *pci_state; @@ -1174,7 +1181,6 @@ struct amdgpu_device { struct work_struct reset_work; - bool job_hang; bool dc_enabled; /* Mask of active clusters */ uint32_t aid_mask; @@ -1186,12 +1192,24 @@ struct amdgpu_device { bool debug_use_vram_fw_buf; bool debug_enable_ras_aca; bool debug_exp_resets; + bool debug_disable_gpu_ring_reset; - bool enforce_isolation[MAX_XCP]; - /* Added this mutex for cleaner shader isolation between GFX and compute processes */ + /* Protection for the following isolation structure */ struct mutex enforce_isolation_mutex; + bool enforce_isolation[MAX_XCP]; + struct amdgpu_isolation { + void *owner; + struct dma_fence *spearhead; + struct amdgpu_sync active; + struct amdgpu_sync prev; + } isolation[MAX_XCP]; struct amdgpu_init_level *init_lvl; + + /* This flag is used to determine how VRAM allocations are handled for APUs + * in KFD: VRAM or GTT. + */ + bool apu_prefer_gtt; }; static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, @@ -1470,6 +1488,9 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev); struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, struct dma_fence *gang); +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_job *job); bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev); ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring); ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 9d6345146495..dc47f5fd4ea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -30,16 +30,6 @@ typedef int bank_handler_t(struct aca_handle *handle, struct aca_bank *bank, enum aca_smu_type type, void *data); -struct aca_banks { - int nr_banks; - struct list_head list; -}; - -struct aca_hwip { - int hwid; - int mcatype; -}; - static struct aca_hwip aca_hwid_mcatypes[ACA_HWIP_TYPE_COUNT] = { ACA_BANK_HWID(SMU, 0x01, 0x01), ACA_BANK_HWID(PCS_XGMI, 0x50, 0x00), @@ -111,7 +101,7 @@ static struct aca_regs_dump { {"STATUS", ACA_REG_IDX_STATUS}, {"ADDR", ACA_REG_IDX_ADDR}, {"MISC", ACA_REG_IDX_MISC0}, - {"CONFIG", ACA_REG_IDX_CONFG}, + {"CONFIG", ACA_REG_IDX_CONFIG}, {"IPID", ACA_REG_IDX_IPID}, {"SYND", ACA_REG_IDX_SYND}, {"DESTAT", ACA_REG_IDX_DESTAT}, @@ -168,7 +158,7 @@ static int aca_smu_get_valid_aca_banks(struct amdgpu_device *adev, enum aca_smu_ if (ret) return ret; - bank.type = type; + bank.smu_err_type = type; aca_smu_bank_dump(adev, i, count, &bank, qctx); @@ -394,6 +384,56 @@ static bool aca_bank_should_update(struct amdgpu_device *adev, enum aca_smu_type return ret; } +static void aca_banks_generate_cper(struct amdgpu_device *adev, + enum aca_smu_type type, + struct aca_banks *banks, + int count) +{ + struct aca_bank_node *node; + struct aca_bank *bank; + int r; + + if (!adev->cper.enabled) + return; + + if (!banks || !count) { + dev_warn(adev->dev, "fail to generate cper records\n"); + return; + } + + /* UEs must be encoded into separate CPER entries */ + if (type == ACA_SMU_TYPE_UE) { + struct aca_banks de_banks; + + aca_banks_init(&de_banks); + list_for_each_entry(node, &banks->list, node) { + bank = &node->bank; + if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) { + r = aca_banks_add_bank(&de_banks, bank); + if (r) + dev_warn(adev->dev, "fail to add de banks, ret = %d\n", r); + } else { + if (amdgpu_cper_generate_ue_record(adev, bank)) + dev_warn(adev->dev, "fail to generate ue cper records\n"); + } + } + + if (!list_empty(&de_banks.list)) { + if (amdgpu_cper_generate_ce_records(adev, &de_banks, de_banks.nr_banks)) + dev_warn(adev->dev, "fail to generate de cper records\n"); + } + + aca_banks_release(&de_banks); + } else { + /* + * SMU_TYPE_CE banks are combined into 1 CPER entries, + * they could be CEs or DEs or both + */ + if (amdgpu_cper_generate_ce_records(adev, banks, count)) + dev_warn(adev->dev, "fail to generate ce cper records\n"); + } +} + static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, bank_handler_t handler, struct ras_query_context *qctx, void *data) { @@ -431,6 +471,8 @@ static int aca_banks_update(struct amdgpu_device *adev, enum aca_smu_type type, if (ret) goto err_release_banks; + aca_banks_generate_cper(adev, type, &banks, count); + err_release_banks: aca_banks_release(&banks); @@ -516,6 +558,10 @@ static int __aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *h if (ret) return ret; + /* DEs may contain in CEs or UEs */ + if (type != ACA_ERROR_TYPE_DEFERRED) + aca_log_aca_error(handle, ACA_ERROR_TYPE_DEFERRED, err_data); + return aca_log_aca_error(handle, type, err_data); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h index 5ef6b745f222..6b180f1b33fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h @@ -71,12 +71,29 @@ struct ras_query_context; #define ACA_ERROR_CE_MASK BIT_MASK(ACA_ERROR_TYPE_CE) #define ACA_ERROR_DEFERRED_MASK BIT_MASK(ACA_ERROR_TYPE_DEFERRED) +#define mmSMNAID_AID0_MCA_SMU 0x03b30400 /* SMN AID AID0 */ +#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */ +#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ +#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ + +#define ACA_BANK_ERR_IS_DEFFERED(bank) \ + (ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \ + ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) + +#define ACA_BANK_ERR_CE_DE_DECODE(bank) \ + (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ + ACA_ERROR_TYPE_CE) + +#define ACA_BANK_ERR_UE_DE_DECODE(bank) \ + (ACA_BANK_ERR_IS_DEFFERED(bank) ? ACA_ERROR_TYPE_DEFERRED : \ + ACA_ERROR_TYPE_UE) + enum aca_reg_idx { ACA_REG_IDX_CTL = 0, ACA_REG_IDX_STATUS = 1, ACA_REG_IDX_ADDR = 2, ACA_REG_IDX_MISC0 = 3, - ACA_REG_IDX_CONFG = 4, + ACA_REG_IDX_CONFIG = 4, ACA_REG_IDX_IPID = 5, ACA_REG_IDX_SYND = 6, ACA_REG_IDX_DESTAT = 8, @@ -103,13 +120,20 @@ enum aca_error_type { }; enum aca_smu_type { + ACA_SMU_TYPE_INVALID = -1, ACA_SMU_TYPE_UE = 0, ACA_SMU_TYPE_CE, ACA_SMU_TYPE_COUNT, }; +struct aca_hwip { + int hwid; + int mcatype; +}; + struct aca_bank { - enum aca_smu_type type; + enum aca_error_type aca_err_type; + enum aca_smu_type smu_err_type; u64 regs[ACA_MAX_REGS_COUNT]; }; @@ -118,6 +142,11 @@ struct aca_bank_node { struct list_head node; }; +struct aca_banks { + int nr_banks; + struct list_head list; +}; + struct aca_bank_info { int die_id; int socket_id; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index ec5e0dcf8613..4926996f94da 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -140,7 +140,7 @@ static int acp_poweroff(struct generic_pm_domain *genpd) * 2. power off the acp tiles * 3. check and enter ulv state */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0); return 0; } @@ -157,7 +157,7 @@ static int acp_poweron(struct generic_pm_domain *genpd) * 2. turn on acp clock * 3. power on acp tiles */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0); return 0; } @@ -236,7 +236,7 @@ static int acp_hw_init(struct amdgpu_ip_block *ip_block) ip_block->version->major, ip_block->version->minor); /* -ENODEV means board uses AZ rather than ACP */ if (r == -ENODEV) { - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0); return 0; } else if (r) { return r; @@ -508,7 +508,7 @@ static int acp_hw_fini(struct amdgpu_ip_block *ip_block) /* return early if no ACP */ if (!adev->acp.acp_genpd) { - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0); return 0; } @@ -565,7 +565,7 @@ static int acp_suspend(struct amdgpu_ip_block *ip_block) /* power up on suspend */ if (!adev->acp.acp_cell) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false, 0); return 0; } @@ -575,28 +575,28 @@ static int acp_resume(struct amdgpu_ip_block *ip_block) /* power down again on resume */ if (!adev->acp.acp_cell) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true, 0); return 0; } -static bool acp_is_idle(void *handle) +static bool acp_is_idle(struct amdgpu_ip_block *ip_block) { return true; } -static int acp_set_clockgating_state(void *handle, +static int acp_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int acp_set_powergating_state(void *handle, +static int acp_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index b8d4e07d2043..b7f8f2ff143d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -394,6 +394,10 @@ static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif) characteristics.max_input_signal; atif->backlight_caps.ac_level = characteristics.ac_level; atif->backlight_caps.dc_level = characteristics.dc_level; + atif->backlight_caps.data_points = characteristics.number_of_points; + memcpy(atif->backlight_caps.luminance_data, + characteristics.data_points, + sizeof(atif->backlight_caps.luminance_data)); out: kfree(info); return err; @@ -1277,11 +1281,7 @@ void amdgpu_acpi_get_backlight_caps(struct amdgpu_dm_backlight_caps *caps) { struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif; - caps->caps_valid = atif->backlight_caps.caps_valid; - caps->min_input_signal = atif->backlight_caps.min_input_signal; - caps->max_input_signal = atif->backlight_caps.max_input_signal; - caps->ac_level = atif->backlight_caps.ac_level; - caps->dc_level = atif->backlight_caps.dc_level; + memcpy(caps, &atif->backlight_caps, sizeof(*caps)); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index c4e733c2e75e..4cec3a873995 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -368,7 +368,7 @@ void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj) { struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj; - amdgpu_bo_reserve(*bo, true); + (void)amdgpu_bo_reserve(*bo, true); amdgpu_bo_kunmap(*bo); amdgpu_bo_unpin(*bo); amdgpu_bo_unreserve(*bo); @@ -459,7 +459,7 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev, else mem_info->local_mem_size_private = KFD_XCP_MEMORY_SIZE(adev, xcp->id); - } else if (adev->flags & AMD_IS_APU) { + } else if (adev->apu_prefer_gtt) { mem_info->local_mem_size_public = (ttm_tt_pages_limit() << PAGE_SHIFT); mem_info->local_mem_size_private = 0; } else { @@ -555,48 +555,6 @@ out_put: return r; } -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, - struct amdgpu_device *src) -{ - struct amdgpu_device *peer_adev = src; - struct amdgpu_device *adev = dst; - int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev); - - if (ret < 0) { - DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n", - adev->gmc.xgmi.physical_node_id, - peer_adev->gmc.xgmi.physical_node_id, ret); - ret = 0; - } - return (uint8_t)ret; -} - -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, - struct amdgpu_device *src, - bool is_min) -{ - struct amdgpu_device *adev = dst, *peer_adev; - int num_links; - - if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(9, 4, 2)) - return 0; - - if (src) - peer_adev = src; - - /* num links returns 0 for indirect peers since indirect route is unknown. */ - num_links = is_min ? 1 : amdgpu_xgmi_get_num_links(adev, peer_adev); - if (num_links < 0) { - DRM_ERROR("amdgpu: failed to get xgmi num links between node %d and %d. ret = %d\n", - adev->gmc.xgmi.physical_node_id, - peer_adev->gmc.xgmi.physical_node_id, num_links); - num_links = 0; - } - - /* Aldebaran xGMI DPM is defeatured so assume x16 x 25Gbps for bandwidth. */ - return (num_links * 16 * 25000)/BITS_PER_BYTE; -} - int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min) { int num_lanes_shift = (is_min ? ffs(adev->pm.pcie_mlw_mask) : @@ -725,7 +683,9 @@ void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle) /* Disable GFXOFF and PG. Temporary workaround * to fix some compute applications issue on GFX9. */ - adev->ip_blocks[AMD_IP_BLOCK_TYPE_GFX].version->funcs->set_powergating_state((void *)adev, state); + struct amdgpu_ip_block *gfx_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + if (gfx_block != NULL) + gfx_block->version->funcs->set_powergating_state((void *)gfx_block, state); } amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_COMPUTE, @@ -816,7 +776,7 @@ u64 amdgpu_amdkfd_xcp_memory_size(struct amdgpu_device *adev, int xcp_id) } do_div(tmp, adev->xcp_mgr->num_xcp_per_mem_partition); return ALIGN_DOWN(tmp, PAGE_SIZE); - } else if (adev->flags & AMD_IS_APU) { + } else if (adev->apu_prefer_gtt) { return (ttm_tt_pages_limit() << PAGE_SHIFT); } else { return adev->gmc.real_vram_size; @@ -835,7 +795,7 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off, if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (!kiq_ring->sched.ready || adev->job_hang) + if (!kiq_ring->sched.ready || amdgpu_in_reset(adev)) return 0; ring_funcs = kzalloc(sizeof(*ring_funcs), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 4b80ad860639..b6ca41859b53 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -47,6 +47,7 @@ enum TLB_FLUSH_TYPE { }; struct amdgpu_device; +struct kfd_process_device; struct amdgpu_reset_context; enum kfd_mem_attachment_type { @@ -192,7 +193,7 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data); #if IS_ENABLED(CONFIG_HSA_AMD) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); +void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo); int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, unsigned long cur_seq, struct kgd_mem *mem); int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo, @@ -212,9 +213,8 @@ struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f) } static inline -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) +void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo) { - return 0; } static inline @@ -254,11 +254,6 @@ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd, uint64_t *bo_size, void *metadata_buffer, size_t buffer_size, uint32_t *metadata_size, uint32_t *flags, int8_t *xcp_id); -uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct amdgpu_device *dst, - struct amdgpu_device *src); -int amdgpu_amdkfd_get_xgmi_bandwidth_mbytes(struct amdgpu_device *dst, - struct amdgpu_device *src, - bool is_min); int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_min); int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev, uint32_t *payload); @@ -299,14 +294,10 @@ bool amdgpu_amdkfd_compute_active(struct amdgpu_device *adev, uint32_t node_id); (&((struct amdgpu_fpriv *) \ ((struct drm_file *)(drm_priv))->driver_priv)->vm) -int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, - struct amdgpu_vm *avm, u32 pasid); int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, struct amdgpu_vm *avm, void **process_info, struct dma_fence **ef); -void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, - void *drm_priv); uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, uint8_t xcp_id); @@ -433,6 +424,9 @@ void kgd2kfd_unlock_kfd(void); int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id); +bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, + bool retry_fault); + #else static inline int kgd2kfd_init(void) { @@ -518,5 +512,12 @@ static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id) { return false; } + +static inline bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry, + bool retry_fault) +{ + return false; +} + #endif #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c index 8dfdb18197c4..7e9f7a280c1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c @@ -189,8 +189,9 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = { .set_address_watch = kgd_gfx_aldebaran_set_address_watch, .clear_address_watch = kgd_gfx_v9_clear_address_watch, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, - .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, .hqd_reset = kgd_gfx_v9_hqd_reset, + .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 9abf29b58ac7..ffbaa8bc5eea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -415,9 +415,10 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = { .set_address_watch = kgd_gfx_v9_set_address_watch, .clear_address_watch = kgd_gfx_v9_clear_address_watch, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, - .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, - .hqd_reset = kgd_gfx_v9_hqd_reset + .hqd_reset = kgd_gfx_v9_hqd_reset, + .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c index e2ae714a700f..89a45a9218f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c @@ -509,6 +509,17 @@ static uint32_t kgd_gfx_v9_4_3_clear_address_watch(struct amdgpu_device *adev, return 0; } +static uint32_t kgd_gfx_v9_4_3_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) +{ + uint32_t reg_offset = get_sdma_rlc_reg_offset(adev, engine, queue); + uint32_t status = RREG32(regSDMA_RLC0_CONTEXT_STATUS + reg_offset); + uint32_t doorbell_off = RREG32(regSDMA_RLC0_DOORBELL_OFFSET + reg_offset); + bool is_active = !!REG_GET_FIELD(status, SDMA_RLC0_CONTEXT_STATUS, SELECTED); + + return is_active ? doorbell_off >> 2 : 0; +} + const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_4_3_set_pasid_vmid_mapping, @@ -530,8 +541,8 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, - .build_grace_period_packet_info = - kgd_gfx_v9_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = + kgd_gfx_v9_build_dequeue_wait_counts_packet_info, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, .enable_debug_trap = kgd_aldebaran_enable_debug_trap, .disable_debug_trap = kgd_gfx_v9_4_3_disable_debug_trap, @@ -543,5 +554,6 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = { .set_address_watch = kgd_gfx_v9_4_3_set_address_watch, .clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, - .hqd_reset = kgd_gfx_v9_hqd_reset + .hqd_reset = kgd_gfx_v9_hqd_reset, + .hqd_sdma_get_doorbell = kgd_gfx_v9_4_3_hqd_sdma_get_doorbell }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 62176d607bef..04ef0ca10541 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -1021,25 +1021,25 @@ void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); } -void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, +void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, uint32_t wait_times, - uint32_t grace_period, + uint32_t sch_wave, + uint32_t que_sleep, uint32_t *reg_offset, uint32_t *reg_data) { *reg_data = wait_times; - /* - * The CP cannont handle a 0 grace period input and will result in - * an infinite grace period being set so set to 1 to prevent this. - */ - if (grace_period == 0) - grace_period = 1; - - *reg_data = REG_SET_FIELD(*reg_data, - CP_IQ_WAIT_TIME2, - SCH_WAVE, - grace_period); + if (sch_wave) + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + SCH_WAVE, + sch_wave); + if (que_sleep) + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + QUE_SLEEP, + que_sleep); *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); } @@ -1084,6 +1084,12 @@ uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev, return 0; } +uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, @@ -1109,8 +1115,9 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = { .set_address_watch = kgd_gfx_v10_set_address_watch, .clear_address_watch = kgd_gfx_v10_clear_address_watch, .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, - .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info, .program_trap_handler_settings = program_trap_handler_settings, .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, - .hqd_reset = kgd_gfx_v10_hqd_reset + .hqd_reset = kgd_gfx_v10_hqd_reset, + .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h index 9efd2dd4fdd7..a4c607c88178 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h @@ -51,9 +51,10 @@ uint32_t kgd_gfx_v10_clear_address_watch(struct amdgpu_device *adev, void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times, uint32_t inst); -void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, +void kgd_gfx_v10_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, uint32_t wait_times, - uint32_t grace_period, + uint32_t sch_wave, + uint32_t que_sleep, uint32_t *reg_offset, uint32_t *reg_data); uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev, @@ -65,3 +66,5 @@ uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev, uint32_t queue_id, uint32_t inst, unsigned int utimeout); +uint32_t kgd_gfx_v10_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index c718bedda0ca..6d08bc2781a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -673,7 +673,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, .program_trap_handler_settings = program_trap_handler_settings_v10_3, .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, - .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = kgd_gfx_v10_build_dequeue_wait_counts_packet_info, .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, .validate_trap_override_request = kgd_gfx_v10_validate_trap_override_request, @@ -682,5 +682,6 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = { .set_address_watch = kgd_gfx_v10_set_address_watch, .clear_address_watch = kgd_gfx_v10_clear_address_watch, .hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr, - .hqd_reset = kgd_gfx_v10_hqd_reset + .hqd_reset = kgd_gfx_v10_hqd_reset, + .hqd_sdma_get_doorbell = kgd_gfx_v10_hqd_sdma_get_doorbell }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c index a4ba49cb22db..e0e6a6a49d90 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c @@ -800,6 +800,12 @@ static uint64_t kgd_gfx_v11_hqd_reset(struct amdgpu_device *adev, return 0; } +static uint32_t kgd_gfx_v11_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .program_sh_mem_settings = program_sh_mem_settings_v11, .set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11, @@ -824,5 +830,6 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = { .set_address_watch = kgd_gfx_v11_set_address_watch, .clear_address_watch = kgd_gfx_v11_clear_address_watch, .hqd_get_pq_addr = kgd_gfx_v11_hqd_get_pq_addr, - .hqd_reset = kgd_gfx_v11_hqd_reset + .hqd_reset = kgd_gfx_v11_hqd_reset, + .hqd_sdma_get_doorbell = kgd_gfx_v11_hqd_sdma_get_doorbell }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c index 0dfe7093bd8a..6f0dc23c901b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v12.c @@ -361,6 +361,12 @@ static uint32_t kgd_gfx_v12_clear_address_watch(struct amdgpu_device *adev, return 0; } +static uint32_t kgd_gfx_v12_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) +{ + return 0; +} + const struct kfd2kgd_calls gfx_v12_kfd2kgd = { .init_interrupts = init_interrupts_v12, .hqd_dump = hqd_dump_v12, @@ -374,4 +380,5 @@ const struct kfd2kgd_calls gfx_v12_kfd2kgd = { .set_wave_launch_mode = kgd_gfx_v12_set_wave_launch_mode, .set_address_watch = kgd_gfx_v12_set_address_watch, .clear_address_watch = kgd_gfx_v12_clear_address_watch, + .hqd_sdma_get_doorbell = kgd_gfx_v12_hqd_sdma_get_doorbell }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index cc66ebb7bae1..088d09cc7a72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -1077,25 +1077,25 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, adev->gfx.cu_info.max_waves_per_simd; } -void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, +void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, uint32_t wait_times, - uint32_t grace_period, + uint32_t sch_wave, + uint32_t que_sleep, uint32_t *reg_offset, uint32_t *reg_data) { *reg_data = wait_times; - /* - * The CP cannot handle a 0 grace period input and will result in - * an infinite grace period being set so set to 1 to prevent this. - */ - if (grace_period == 0) - grace_period = 1; - - *reg_data = REG_SET_FIELD(*reg_data, - CP_IQ_WAIT_TIME2, - SCH_WAVE, - grace_period); + if (sch_wave) + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + SCH_WAVE, + sch_wave); + if (que_sleep) + *reg_data = REG_SET_FIELD(*reg_data, + CP_IQ_WAIT_TIME2, + QUE_SLEEP, + que_sleep); *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); } @@ -1223,6 +1223,13 @@ unlock_out: return queue_addr; } +uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue) + +{ + return 0; +} + const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .program_sh_mem_settings = kgd_gfx_v9_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_gfx_v9_set_pasid_vmid_mapping, @@ -1248,9 +1255,10 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .set_address_watch = kgd_gfx_v9_set_address_watch, .clear_address_watch = kgd_gfx_v9_clear_address_watch, .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, - .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, + .build_dequeue_wait_counts_packet_info = kgd_gfx_v9_build_dequeue_wait_counts_packet_info, .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, .hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr, - .hqd_reset = kgd_gfx_v9_hqd_reset + .hqd_reset = kgd_gfx_v9_hqd_reset, + .hqd_sdma_get_doorbell = kgd_gfx_v9_hqd_sdma_get_doorbell }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h index b6a91a552aa4..704452ca62f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h @@ -97,9 +97,10 @@ uint32_t kgd_gfx_v9_clear_address_watch(struct amdgpu_device *adev, void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times, uint32_t inst); -void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, +void kgd_gfx_v9_build_dequeue_wait_counts_packet_info(struct amdgpu_device *adev, uint32_t wait_times, - uint32_t grace_period, + uint32_t sch_wave, + uint32_t que_sleep, uint32_t *reg_offset, uint32_t *reg_data); uint64_t kgd_gfx_v9_hqd_get_pq_addr(struct amdgpu_device *adev, @@ -111,3 +112,5 @@ uint64_t kgd_gfx_v9_hqd_reset(struct amdgpu_device *adev, uint32_t queue_id, uint32_t inst, unsigned int utimeout); +uint32_t kgd_gfx_v9_hqd_sdma_get_doorbell(struct amdgpu_device *adev, + int engine, int queue); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f30548f4c3b3..d2ec4130a316 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -197,7 +197,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, return -EINVAL; vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id); - if (adev->flags & AMD_IS_APU) { + if (adev->apu_prefer_gtt) { system_mem_needed = size; ttm_mem_needed = size; } @@ -234,7 +234,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, if (adev && xcp_id >= 0) { adev->kfd.vram_used[xcp_id] += vram_needed; adev->kfd.vram_used_aligned[xcp_id] += - (adev->flags & AMD_IS_APU) ? + adev->apu_prefer_gtt ? vram_needed : ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); } @@ -262,7 +262,7 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, if (adev) { adev->kfd.vram_used[xcp_id] -= size; - if (adev->flags & AMD_IS_APU) { + if (adev->apu_prefer_gtt) { adev->kfd.vram_used_aligned[xcp_id] -= size; kfd_mem_limit.system_mem_used -= size; kfd_mem_limit.ttm_mem_used -= size; @@ -370,40 +370,32 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo, return 0; } -int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo) +/** + * amdgpu_amdkfd_remove_all_eviction_fences - Remove all eviction fences + * @bo: the BO where to remove the evictions fences from. + * + * This functions should only be used on release when all references to the BO + * are already dropped. We remove the eviction fence from the private copy of + * the dma_resv object here since that is what is used during release to + * determine of the BO is idle or not. + */ +void amdgpu_amdkfd_remove_all_eviction_fences(struct amdgpu_bo *bo) { - struct amdgpu_bo *root = bo; - struct amdgpu_vm_bo_base *vm_bo; - struct amdgpu_vm *vm; - struct amdkfd_process_info *info; - struct amdgpu_amdkfd_fence *ef; - int ret; - - /* we can always get vm_bo from root PD bo.*/ - while (root->parent) - root = root->parent; - - vm_bo = root->vm_bo; - if (!vm_bo) - return 0; - - vm = vm_bo->vm; - if (!vm) - return 0; - - info = vm->process_info; - if (!info || !info->eviction_fence) - return 0; + struct dma_resv *resv = &bo->tbo.base._resv; + struct dma_fence *fence, *stub; + struct dma_resv_iter cursor; - ef = container_of(dma_fence_get(&info->eviction_fence->base), - struct amdgpu_amdkfd_fence, base); + dma_resv_assert_held(resv); - BUG_ON(!dma_resv_trylock(bo->tbo.base.resv)); - ret = amdgpu_amdkfd_remove_eviction_fence(bo, ef); - dma_resv_unlock(bo->tbo.base.resv); + stub = dma_fence_get_stub(); + dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_BOOKKEEP, fence) { + if (!to_amdgpu_amdkfd_fence(fence)) + continue; - dma_fence_put(&ef->base); - return ret; + dma_resv_replace_fences(resv, fence->context, stub, + DMA_RESV_USAGE_BOOKKEEP); + } + dma_fence_put(stub); } static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain, @@ -499,7 +491,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return amdgpu_sync_fence(sync, vm->last_update); + return amdgpu_sync_fence(sync, vm->last_update, GFP_KERNEL); } static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) @@ -603,12 +595,6 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) { struct ttm_operation_ctx ctx = {.interruptible = true}; struct amdgpu_bo *bo = attachment->bo_va->base.bo; - int ret; - - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); - if (ret) - return ret; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -730,7 +716,7 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem, return; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); sg_free_table(ttm->sg); @@ -779,7 +765,7 @@ kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, } amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + (void)ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); dir = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE; @@ -890,7 +876,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, * if peer device has large BAR. In contrast, access over xGMI is * allowed for both small and large BAR configurations of peer device */ - if ((adev != bo_adev && !(adev->flags & AMD_IS_APU)) && + if ((adev != bo_adev && !adev->apu_prefer_gtt) && ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { @@ -989,7 +975,7 @@ unwind: if (!attachment[i]) continue; if (attachment[i]->bo_va) { - amdgpu_bo_reserve(bo[i], true); + (void)amdgpu_bo_reserve(bo[i], true); if (--attachment[i]->bo_va->ref_count == 0) amdgpu_vm_bo_del(adev, attachment[i]->bo_va); amdgpu_bo_unreserve(bo[i]); @@ -1259,11 +1245,11 @@ static int unmap_bo_from_gpuvm(struct kgd_mem *mem, return -EBUSY; } - amdgpu_vm_bo_unmap(adev, bo_va, entry->va); + (void)amdgpu_vm_bo_unmap(adev, bo_va, entry->va); - amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); + (void)amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - amdgpu_sync_fence(sync, bo_va->last_pt_update); + (void)amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL); return 0; } @@ -1287,7 +1273,7 @@ static int update_gpuvm_pte(struct kgd_mem *mem, return ret; } - return amdgpu_sync_fence(sync, bo_va->last_pt_update); + return amdgpu_sync_fence(sync, bo_va->last_pt_update, GFP_KERNEL); } static int map_bo_to_gpuvm(struct kgd_mem *mem, @@ -1529,27 +1515,6 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo) amdgpu_bo_unreserve(bo); } -int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, - struct amdgpu_vm *avm, u32 pasid) - -{ - int ret; - - /* Free the original amdgpu allocated pasid, - * will be replaced with kfd allocated pasid. - */ - if (avm->pasid) { - amdgpu_pasid_free(avm->pasid); - amdgpu_vm_set_pasid(adev, avm, 0); - } - - ret = amdgpu_vm_set_pasid(adev, avm, pasid); - if (ret) - return ret; - - return 0; -} - int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, struct amdgpu_vm *avm, void **process_info, @@ -1607,27 +1572,6 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, } } -void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, - void *drm_priv) -{ - struct amdgpu_vm *avm; - - if (WARN_ON(!adev || !drm_priv)) - return; - - avm = drm_priv_to_vm(drm_priv); - - pr_debug("Releasing process vm %p\n", avm); - - /* The original pasid of amdgpu vm has already been - * released during making a amdgpu vm to a compute vm - * The current pasid is managed by kfd and will be - * released on kfd process destroy. Set amdgpu pasid - * to 0 to avoid duplicate release. - */ - amdgpu_vm_release_compute(adev, avm); -} - uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); @@ -1688,7 +1632,7 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, - reserved_for_pt - reserved_for_ras; - if (adev->flags & AMD_IS_APU) { + if (adev->apu_prefer_gtt) { system_mem_available = no_system_mem_limit ? kfd_mem_limit.max_system_mem_limit : kfd_mem_limit.max_system_mem_limit - @@ -1736,7 +1680,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - if (adev->flags & AMD_IS_APU) { + if (adev->apu_prefer_gtt) { domain = AMDGPU_GEM_DOMAIN_GTT; alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; @@ -1987,7 +1931,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( if (size) { if (!is_imported && (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM || - ((adev->flags & AMD_IS_APU) && + (adev->apu_prefer_gtt && mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT))) *size = bo_size; else @@ -2352,7 +2296,7 @@ void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem) { struct amdgpu_bo *bo = mem->bo; - amdgpu_bo_reserve(bo, true); + (void)amdgpu_bo_reserve(bo, true); amdgpu_bo_kunmap(bo); amdgpu_bo_unpin(bo); amdgpu_bo_unreserve(bo); @@ -2414,7 +2358,7 @@ static int import_obj_create(struct amdgpu_device *adev, (*mem)->bo = bo; (*mem)->va = va; (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && - !(adev->flags & AMD_IS_APU) ? + !adev->apu_prefer_gtt ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; (*mem)->mapped_to_gpu_memory = 0; @@ -2969,7 +2913,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence __rcu * } dma_resv_for_each_fence(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL, fence) { - ret = amdgpu_sync_fence(&sync_obj, fence); + ret = amdgpu_sync_fence(&sync_obj, fence, GFP_KERNEL); if (ret) { pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index 093141ad6ed0..e476e45b996a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -36,13 +36,6 @@ #include "atombios_encoders.h" #include "bif/bif_4_1_d.h" -static void amdgpu_atombios_lookup_i2c_gpio_quirks(struct amdgpu_device *adev, - ATOM_GPIO_I2C_ASSIGMENT *gpio, - u8 index) -{ - -} - static struct amdgpu_i2c_bus_rec amdgpu_atombios_get_bus_rec_for_i2c_gpio(ATOM_GPIO_I2C_ASSIGMENT *gpio) { struct amdgpu_i2c_bus_rec i2c; @@ -108,9 +101,6 @@ struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device * gpio = &i2c_info->asGPIO_Info[0]; for (i = 0; i < num_indices; i++) { - - amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i); - if (gpio->sucI2cId.ucAccess == id) { i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); break; @@ -142,8 +132,6 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev) gpio = &i2c_info->asGPIO_Info[0]; for (i = 0; i < num_indices; i++) { - amdgpu_atombios_lookup_i2c_gpio_quirks(adev, gpio, i); - i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); if (i2c.valid) { @@ -156,6 +144,38 @@ void amdgpu_atombios_i2c_init(struct amdgpu_device *adev) } } +void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id) +{ + struct atom_context *ctx = adev->mode_info.atom_context; + ATOM_GPIO_I2C_ASSIGMENT *gpio; + struct amdgpu_i2c_bus_rec i2c; + int index = GetIndexIntoMasterTable(DATA, GPIO_I2C_Info); + struct _ATOM_GPIO_I2C_INFO *i2c_info; + uint16_t data_offset, size; + int i, num_indices; + char stmp[32]; + + if (amdgpu_atom_parse_data_header(ctx, index, &size, NULL, NULL, &data_offset)) { + i2c_info = (struct _ATOM_GPIO_I2C_INFO *)(ctx->bios + data_offset); + + num_indices = (size - sizeof(ATOM_COMMON_TABLE_HEADER)) / + sizeof(ATOM_GPIO_I2C_ASSIGMENT); + + gpio = &i2c_info->asGPIO_Info[0]; + for (i = 0; i < num_indices; i++) { + i2c = amdgpu_atombios_get_bus_rec_for_i2c_gpio(gpio); + + if (i2c.valid && i2c.i2c_id == i2c_id) { + sprintf(stmp, "OEM 0x%x", i2c.i2c_id); + adev->i2c_bus[i] = amdgpu_i2c_create(adev_to_drm(adev), &i2c, stmp); + break; + } + gpio = (ATOM_GPIO_I2C_ASSIGMENT *) + ((u8 *)gpio + sizeof(ATOM_GPIO_I2C_ASSIGMENT)); + } + } +} + struct amdgpu_gpio_rec amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev, u8 id) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h index 0e16432d9a72..867bc5c5ce67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h @@ -136,6 +136,7 @@ amdgpu_atombios_lookup_gpio(struct amdgpu_device *adev, struct amdgpu_i2c_bus_rec amdgpu_atombios_lookup_i2c_gpio(struct amdgpu_device *adev, uint8_t id); void amdgpu_atombios_i2c_init(struct amdgpu_device *adev); +void amdgpu_atombios_oem_i2c_init(struct amdgpu_device *adev, u8 i2c_id); bool amdgpu_atombios_has_dce_engine_info(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index f873dd3cae16..eb015bdda8a7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -549,9 +549,10 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev) u16 data_offset, size; union umc_info *umc_info; u8 frev, crev; - bool ecc_default_enabled = false; + bool mem_ecc_enabled = false; u8 umc_config; u32 umc_config1; + adev->ras_default_ecc_enabled = false; index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, umc_info); @@ -563,20 +564,22 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev) switch (crev) { case 1: umc_config = le32_to_cpu(umc_info->v31.umc_config); - ecc_default_enabled = + mem_ecc_enabled = (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false; break; case 2: umc_config = le32_to_cpu(umc_info->v32.umc_config); - ecc_default_enabled = + mem_ecc_enabled = (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false; break; case 3: umc_config = le32_to_cpu(umc_info->v33.umc_config); umc_config1 = le32_to_cpu(umc_info->v33.umc_config1); - ecc_default_enabled = + mem_ecc_enabled = ((umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) || (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE)) ? true : false; + adev->ras_default_ecc_enabled = + (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false; break; default: /* unsupported crev */ @@ -585,9 +588,12 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev) } else if (frev == 4) { switch (crev) { case 0: + umc_config = le32_to_cpu(umc_info->v40.umc_config); umc_config1 = le32_to_cpu(umc_info->v40.umc_config1); - ecc_default_enabled = + mem_ecc_enabled = (umc_config1 & UMC_CONFIG1__ENABLE_ECC_CAPABLE) ? true : false; + adev->ras_default_ecc_enabled = + (umc_config & UMC_CONFIG__DEFAULT_MEM_ECC_ENABLE) ? true : false; break; default: /* unsupported crev */ @@ -599,7 +605,7 @@ bool amdgpu_atomfirmware_mem_ecc_supported(struct amdgpu_device *adev) } } - return ecc_default_enabled; + return mem_ecc_enabled; } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 45affc02548c..75fcc521c171 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -47,41 +47,50 @@ /* Check if current bios is an ATOM BIOS. * Return true if it is ATOM BIOS. Otherwise, return false. */ -static bool check_atom_bios(uint8_t *bios, size_t size) +static bool check_atom_bios(struct amdgpu_device *adev, size_t size) { uint16_t tmp, bios_header_start; + uint8_t *bios = adev->bios; if (!bios || size < 0x49) { - DRM_INFO("vbios mem is null or mem size is wrong\n"); + dev_dbg(adev->dev, "VBIOS mem is null or mem size is wrong\n"); return false; } if (!AMD_IS_VALID_VBIOS(bios)) { - DRM_INFO("BIOS signature incorrect %x %x\n", bios[0], bios[1]); + dev_dbg(adev->dev, "VBIOS signature incorrect %x %x\n", bios[0], + bios[1]); return false; } bios_header_start = bios[0x48] | (bios[0x49] << 8); if (!bios_header_start) { - DRM_INFO("Can't locate bios header\n"); + dev_dbg(adev->dev, "Can't locate VBIOS header\n"); return false; } tmp = bios_header_start + 4; if (size < tmp) { - DRM_INFO("BIOS header is broken\n"); + dev_dbg(adev->dev, "VBIOS header is broken\n"); return false; } if (!memcmp(bios + tmp, "ATOM", 4) || !memcmp(bios + tmp, "MOTA", 4)) { - DRM_DEBUG("ATOMBIOS detected\n"); + dev_dbg(adev->dev, "ATOMBIOS detected\n"); return true; } return false; } +void amdgpu_bios_release(struct amdgpu_device *adev) +{ + kfree(adev->bios); + adev->bios = NULL; + adev->bios_size = 0; +} + /* If you boot an IGP board with a discrete card as the primary, * the IGP rom is not accessible via the rom bar as the IGP rom is * part of the system bios. On boot, the system bios puts a @@ -118,8 +127,8 @@ static bool amdgpu_read_bios_from_vram(struct amdgpu_device *adev) memcpy_fromio(adev->bios, bios, size); iounmap(bios); - if (!check_atom_bios(adev->bios, size)) { - kfree(adev->bios); + if (!check_atom_bios(adev, size)) { + amdgpu_bios_release(adev); return false; } @@ -146,8 +155,8 @@ bool amdgpu_read_bios(struct amdgpu_device *adev) memcpy_fromio(adev->bios, bios, size); pci_unmap_rom(adev->pdev, bios); - if (!check_atom_bios(adev->bios, size)) { - kfree(adev->bios); + if (!check_atom_bios(adev, size)) { + amdgpu_bios_release(adev); return false; } @@ -186,8 +195,8 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev) /* read complete BIOS */ amdgpu_asic_read_bios_from_rom(adev, adev->bios, len); - if (!check_atom_bios(adev->bios, len)) { - kfree(adev->bios); + if (!check_atom_bios(adev, len)) { + amdgpu_bios_release(adev); return false; } @@ -216,14 +225,15 @@ static bool amdgpu_read_platform_bios(struct amdgpu_device *adev) memcpy_fromio(adev->bios, bios, romlen); iounmap(bios); - if (!check_atom_bios(adev->bios, romlen)) + if (!check_atom_bios(adev, romlen)) goto free_bios; adev->bios_size = romlen; return true; free_bios: - kfree(adev->bios); + amdgpu_bios_release(adev); + return false; } @@ -324,8 +334,8 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev) break; } - if (!check_atom_bios(adev->bios, size)) { - kfree(adev->bios); + if (!check_atom_bios(adev, size)) { + amdgpu_bios_release(adev); return false; } adev->bios_size = size; @@ -389,8 +399,8 @@ static bool amdgpu_acpi_vfct_bios(struct amdgpu_device *adev) vhdr->ImageLength, GFP_KERNEL); - if (!check_atom_bios(adev->bios, vhdr->ImageLength)) { - kfree(adev->bios); + if (!check_atom_bios(adev, vhdr->ImageLength)) { + amdgpu_bios_release(adev); return false; } adev->bios_size = vhdr->ImageLength; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 16153d275d7a..68bce6a6d09d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -414,7 +414,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->pm.fw, "%s", fw_name); + err = amdgpu_ucode_request(adev, &adev->pm.fw, + AMDGPU_UCODE_REQUIRED, + "%s", fw_name); if (err) { DRM_ERROR("Failed to load firmware \"%s\"", fw_name); amdgpu_ucode_release(&adev->pm.fw); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index 344e0a9ee08a..5e375e9c4f5d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -674,7 +674,7 @@ static int amdgpu_connector_lvds_get_modes(struct drm_connector *connector) } static enum drm_mode_status amdgpu_connector_lvds_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) + const struct drm_display_mode *mode) { struct drm_encoder *encoder = amdgpu_connector_best_single_encoder(connector); @@ -839,7 +839,7 @@ static int amdgpu_connector_vga_get_modes(struct drm_connector *connector) } static enum drm_mode_status amdgpu_connector_vga_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) + const struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = drm_to_adev(dev); @@ -1196,7 +1196,7 @@ static void amdgpu_connector_dvi_force(struct drm_connector *connector) } static enum drm_mode_status amdgpu_connector_dvi_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) + const struct drm_display_mode *mode) { struct drm_device *dev = connector->dev; struct amdgpu_device *adev = drm_to_adev(dev); @@ -1464,7 +1464,7 @@ out: } static enum drm_mode_status amdgpu_connector_dp_mode_valid(struct drm_connector *connector, - struct drm_display_mode *mode) + const struct drm_display_mode *mode) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct amdgpu_connector_atom_dig *amdgpu_dig_connector = amdgpu_connector->con_priv; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c new file mode 100644 index 000000000000..360e07a5c7c1 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.c @@ -0,0 +1,581 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/list.h> +#include "amdgpu.h" + +static const guid_t MCE = CPER_NOTIFY_MCE; +static const guid_t CMC = CPER_NOTIFY_CMC; +static const guid_t BOOT = BOOT_TYPE; + +static const guid_t CRASHDUMP = AMD_CRASHDUMP; +static const guid_t RUNTIME = AMD_GPU_NONSTANDARD_ERROR; + +static void __inc_entry_length(struct cper_hdr *hdr, uint32_t size) +{ + hdr->record_length += size; +} + +static void amdgpu_cper_get_timestamp(struct cper_timestamp *timestamp) +{ + struct tm tm; + time64_t now = ktime_get_real_seconds(); + + time64_to_tm(now, 0, &tm); + timestamp->seconds = tm.tm_sec; + timestamp->minutes = tm.tm_min; + timestamp->hours = tm.tm_hour; + timestamp->flag = 0; + timestamp->day = tm.tm_mday; + timestamp->month = 1 + tm.tm_mon; + timestamp->year = (1900 + tm.tm_year) % 100; + timestamp->century = (1900 + tm.tm_year) / 100; +} + +void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev, + struct cper_hdr *hdr, + enum amdgpu_cper_type type, + enum cper_error_severity sev) +{ + char record_id[16]; + + hdr->signature[0] = 'C'; + hdr->signature[1] = 'P'; + hdr->signature[2] = 'E'; + hdr->signature[3] = 'R'; + hdr->revision = CPER_HDR_REV_1; + hdr->signature_end = 0xFFFFFFFF; + hdr->error_severity = sev; + + hdr->valid_bits.platform_id = 1; + hdr->valid_bits.partition_id = 1; + hdr->valid_bits.timestamp = 1; + + amdgpu_cper_get_timestamp(&hdr->timestamp); + + snprintf(record_id, 9, "%d:%X", + (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ? + adev->smuio.funcs->get_socket_id(adev) : + 0, + atomic_inc_return(&adev->cper.unique_id)); + memcpy(hdr->record_id, record_id, 8); + + snprintf(hdr->platform_id, 16, "0x%04X:0x%04X", + adev->pdev->vendor, adev->pdev->device); + /* pmfw version should be part of creator_id according to CPER spec */ + snprintf(hdr->creator_id, 16, "%s", CPER_CREATOR_ID_AMDGPU); + + switch (type) { + case AMDGPU_CPER_TYPE_BOOT: + hdr->notify_type = BOOT; + break; + case AMDGPU_CPER_TYPE_FATAL: + case AMDGPU_CPER_TYPE_BP_THRESHOLD: + hdr->notify_type = MCE; + break; + case AMDGPU_CPER_TYPE_RUNTIME: + if (sev == CPER_SEV_NON_FATAL_CORRECTED) + hdr->notify_type = CMC; + else + hdr->notify_type = MCE; + break; + default: + dev_err(adev->dev, "Unknown CPER Type\n"); + break; + } + + __inc_entry_length(hdr, HDR_LEN); +} + +static int amdgpu_cper_entry_fill_section_desc(struct amdgpu_device *adev, + struct cper_sec_desc *section_desc, + bool bp_threshold, + bool poison, + enum cper_error_severity sev, + guid_t sec_type, + uint32_t section_length, + uint32_t section_offset) +{ + section_desc->revision_minor = CPER_SEC_MINOR_REV_1; + section_desc->revision_major = CPER_SEC_MAJOR_REV_22; + section_desc->sec_offset = section_offset; + section_desc->sec_length = section_length; + section_desc->valid_bits.fru_text = 1; + section_desc->flag_bits.primary = 1; + section_desc->severity = sev; + section_desc->sec_type = sec_type; + + snprintf(section_desc->fru_text, 20, "OAM%d", + (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) ? + adev->smuio.funcs->get_socket_id(adev) : + 0); + + if (bp_threshold) + section_desc->flag_bits.exceed_err_threshold = 1; + if (poison) + section_desc->flag_bits.latent_err = 1; + + return 0; +} + +int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev, + struct cper_hdr *hdr, + uint32_t idx, + struct cper_sec_crashdump_reg_data reg_data) +{ + struct cper_sec_desc *section_desc; + struct cper_sec_crashdump_fatal *section; + + section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx)); + section = (struct cper_sec_crashdump_fatal *)((uint8_t *)hdr + + FATAL_SEC_OFFSET(hdr->sec_cnt, idx)); + + amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, false, + CPER_SEV_FATAL, CRASHDUMP, FATAL_SEC_LEN, + FATAL_SEC_OFFSET(hdr->sec_cnt, idx)); + + section->body.reg_ctx_type = CPER_CTX_TYPE_CRASH; + section->body.reg_arr_size = sizeof(reg_data); + section->body.data = reg_data; + + __inc_entry_length(hdr, SEC_DESC_LEN + FATAL_SEC_LEN); + + return 0; +} + +int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev, + struct cper_hdr *hdr, + uint32_t idx, + enum cper_error_severity sev, + uint32_t *reg_dump, + uint32_t reg_count) +{ + struct cper_sec_desc *section_desc; + struct cper_sec_nonstd_err *section; + bool poison; + + poison = (sev == CPER_SEV_NON_FATAL_CORRECTED) ? false : true; + section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx)); + section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr + + NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); + + amdgpu_cper_entry_fill_section_desc(adev, section_desc, false, poison, + sev, RUNTIME, NONSTD_SEC_LEN, + NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); + + reg_count = umin(reg_count, CPER_ACA_REG_COUNT); + + section->hdr.valid_bits.err_info_cnt = 1; + section->hdr.valid_bits.err_context_cnt = 1; + + section->info.error_type = RUNTIME; + section->info.ms_chk_bits.err_type_valid = 1; + section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH; + section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump); + + memcpy(section->ctx.reg_dump, reg_dump, reg_count * sizeof(uint32_t)); + + __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN); + + return 0; +} + +int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev, + struct cper_hdr *hdr, + uint32_t idx) +{ + struct cper_sec_desc *section_desc; + struct cper_sec_nonstd_err *section; + + section_desc = (struct cper_sec_desc *)((uint8_t *)hdr + SEC_DESC_OFFSET(idx)); + section = (struct cper_sec_nonstd_err *)((uint8_t *)hdr + + NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); + + amdgpu_cper_entry_fill_section_desc(adev, section_desc, true, false, + CPER_SEV_NUM, RUNTIME, NONSTD_SEC_LEN, + NONSTD_SEC_OFFSET(hdr->sec_cnt, idx)); + + section->hdr.valid_bits.err_info_cnt = 1; + section->hdr.valid_bits.err_context_cnt = 1; + + section->info.error_type = RUNTIME; + section->info.ms_chk_bits.err_type_valid = 1; + section->ctx.reg_ctx_type = CPER_CTX_TYPE_CRASH; + section->ctx.reg_arr_size = sizeof(section->ctx.reg_dump); + + /* Hardcoded Reg dump for bad page threshold CPER */ + section->ctx.reg_dump[CPER_ACA_REG_CTL_LO] = 0x1; + section->ctx.reg_dump[CPER_ACA_REG_CTL_HI] = 0x0; + section->ctx.reg_dump[CPER_ACA_REG_STATUS_LO] = 0x137; + section->ctx.reg_dump[CPER_ACA_REG_STATUS_HI] = 0xB0000000; + section->ctx.reg_dump[CPER_ACA_REG_ADDR_LO] = 0x0; + section->ctx.reg_dump[CPER_ACA_REG_ADDR_HI] = 0x0; + section->ctx.reg_dump[CPER_ACA_REG_MISC0_LO] = 0x0; + section->ctx.reg_dump[CPER_ACA_REG_MISC0_HI] = 0x0; + section->ctx.reg_dump[CPER_ACA_REG_CONFIG_LO] = 0x2; + section->ctx.reg_dump[CPER_ACA_REG_CONFIG_HI] = 0x1ff; + section->ctx.reg_dump[CPER_ACA_REG_IPID_LO] = 0x0; + section->ctx.reg_dump[CPER_ACA_REG_IPID_HI] = 0x96; + section->ctx.reg_dump[CPER_ACA_REG_SYND_LO] = 0x0; + section->ctx.reg_dump[CPER_ACA_REG_SYND_HI] = 0x0; + + __inc_entry_length(hdr, SEC_DESC_LEN + NONSTD_SEC_LEN); + + return 0; +} + +struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev, + enum amdgpu_cper_type type, + uint16_t section_count) +{ + struct cper_hdr *hdr; + uint32_t size = 0; + + size += HDR_LEN; + size += (SEC_DESC_LEN * section_count); + + switch (type) { + case AMDGPU_CPER_TYPE_RUNTIME: + case AMDGPU_CPER_TYPE_BP_THRESHOLD: + size += (NONSTD_SEC_LEN * section_count); + break; + case AMDGPU_CPER_TYPE_FATAL: + size += (FATAL_SEC_LEN * section_count); + break; + case AMDGPU_CPER_TYPE_BOOT: + size += (BOOT_SEC_LEN * section_count); + break; + default: + dev_err(adev->dev, "Unknown CPER Type!\n"); + return NULL; + } + + hdr = kzalloc(size, GFP_KERNEL); + if (!hdr) + return NULL; + + /* Save this early */ + hdr->sec_cnt = section_count; + + return hdr; +} + +int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev, + struct aca_bank *bank) +{ + struct cper_hdr *fatal = NULL; + struct cper_sec_crashdump_reg_data reg_data = { 0 }; + struct amdgpu_ring *ring = &adev->cper.ring_buf; + int ret; + + fatal = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_FATAL, 1); + if (!fatal) { + dev_err(adev->dev, "fail to alloc cper entry for ue record\n"); + return -ENOMEM; + } + + reg_data.status_lo = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]); + reg_data.status_hi = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]); + reg_data.addr_lo = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]); + reg_data.addr_hi = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]); + reg_data.ipid_lo = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]); + reg_data.ipid_hi = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]); + reg_data.synd_lo = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]); + reg_data.synd_hi = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]); + + amdgpu_cper_entry_fill_hdr(adev, fatal, AMDGPU_CPER_TYPE_FATAL, CPER_SEV_FATAL); + ret = amdgpu_cper_entry_fill_fatal_section(adev, fatal, 0, reg_data); + if (ret) + return ret; + + amdgpu_cper_ring_write(ring, fatal, fatal->record_length); + kfree(fatal); + + return 0; +} + +int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev) +{ + struct cper_hdr *bp_threshold = NULL; + struct amdgpu_ring *ring = &adev->cper.ring_buf; + int ret; + + bp_threshold = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_BP_THRESHOLD, 1); + if (!bp_threshold) { + dev_err(adev->dev, "fail to alloc cper entry for bad page threshold record\n"); + return -ENOMEM; + } + + amdgpu_cper_entry_fill_hdr(adev, bp_threshold, AMDGPU_CPER_TYPE_BP_THRESHOLD, CPER_SEV_NUM); + ret = amdgpu_cper_entry_fill_bad_page_threshold_section(adev, bp_threshold, 0); + if (ret) + return ret; + + amdgpu_cper_ring_write(ring, bp_threshold, bp_threshold->record_length); + kfree(bp_threshold); + + return 0; +} + +static enum cper_error_severity amdgpu_aca_err_type_to_cper_sev(struct amdgpu_device *adev, + enum aca_error_type aca_err_type) +{ + switch (aca_err_type) { + case ACA_ERROR_TYPE_UE: + return CPER_SEV_FATAL; + case ACA_ERROR_TYPE_CE: + return CPER_SEV_NON_FATAL_CORRECTED; + case ACA_ERROR_TYPE_DEFERRED: + return CPER_SEV_NON_FATAL_UNCORRECTED; + default: + dev_err(adev->dev, "Unknown ACA error type!\n"); + return CPER_SEV_FATAL; + } +} + +int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev, + struct aca_banks *banks, + uint16_t bank_count) +{ + struct cper_hdr *corrected = NULL; + enum cper_error_severity sev = CPER_SEV_NON_FATAL_CORRECTED; + struct amdgpu_ring *ring = &adev->cper.ring_buf; + uint32_t reg_data[CPER_ACA_REG_COUNT] = { 0 }; + struct aca_bank_node *node; + struct aca_bank *bank; + uint32_t i = 0; + int ret; + + corrected = amdgpu_cper_alloc_entry(adev, AMDGPU_CPER_TYPE_RUNTIME, bank_count); + if (!corrected) { + dev_err(adev->dev, "fail to allocate cper entry for ce records\n"); + return -ENOMEM; + } + + /* Raise severity if any DE is detected in the ACA bank list */ + list_for_each_entry(node, &banks->list, node) { + bank = &node->bank; + if (bank->aca_err_type == ACA_ERROR_TYPE_DEFERRED) { + sev = CPER_SEV_NON_FATAL_UNCORRECTED; + break; + } + } + + amdgpu_cper_entry_fill_hdr(adev, corrected, AMDGPU_CPER_TYPE_RUNTIME, sev); + + /* Combine CE and DE in cper record */ + list_for_each_entry(node, &banks->list, node) { + bank = &node->bank; + reg_data[CPER_ACA_REG_CTL_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CTL]); + reg_data[CPER_ACA_REG_CTL_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CTL]); + reg_data[CPER_ACA_REG_STATUS_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_STATUS]); + reg_data[CPER_ACA_REG_STATUS_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_STATUS]); + reg_data[CPER_ACA_REG_ADDR_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_ADDR]); + reg_data[CPER_ACA_REG_ADDR_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_ADDR]); + reg_data[CPER_ACA_REG_MISC0_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_MISC0]); + reg_data[CPER_ACA_REG_MISC0_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_MISC0]); + reg_data[CPER_ACA_REG_CONFIG_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_CONFIG]); + reg_data[CPER_ACA_REG_CONFIG_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_CONFIG]); + reg_data[CPER_ACA_REG_IPID_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_IPID]); + reg_data[CPER_ACA_REG_IPID_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_IPID]); + reg_data[CPER_ACA_REG_SYND_LO] = lower_32_bits(bank->regs[ACA_REG_IDX_SYND]); + reg_data[CPER_ACA_REG_SYND_HI] = upper_32_bits(bank->regs[ACA_REG_IDX_SYND]); + + ret = amdgpu_cper_entry_fill_runtime_section(adev, corrected, i++, + amdgpu_aca_err_type_to_cper_sev(adev, bank->aca_err_type), + reg_data, CPER_ACA_REG_COUNT); + if (ret) + return ret; + } + + amdgpu_cper_ring_write(ring, corrected, corrected->record_length); + kfree(corrected); + + return 0; +} + +static bool amdgpu_cper_is_hdr(struct amdgpu_ring *ring, u64 pos) +{ + struct cper_hdr *chdr; + + chdr = (struct cper_hdr *)&(ring->ring[pos]); + return strcmp(chdr->signature, "CPER") ? false : true; +} + +static u32 amdgpu_cper_ring_get_ent_sz(struct amdgpu_ring *ring, u64 pos) +{ + struct cper_hdr *chdr; + u64 p; + u32 chunk, rec_len = 0; + + chdr = (struct cper_hdr *)&(ring->ring[pos]); + chunk = ring->ring_size - (pos << 2); + + if (!strcmp(chdr->signature, "CPER")) { + rec_len = chdr->record_length; + goto calc; + } + + /* ring buffer is not full, no cper data after ring->wptr */ + if (ring->count_dw) + goto calc; + + for (p = pos + 1; p <= ring->buf_mask; p++) { + chdr = (struct cper_hdr *)&(ring->ring[p]); + if (!strcmp(chdr->signature, "CPER")) { + rec_len = (p - pos) << 2; + goto calc; + } + } + +calc: + if (!rec_len) + return chunk; + else + return umin(rec_len, chunk); +} + +void amdgpu_cper_ring_write(struct amdgpu_ring *ring, void *src, int count) +{ + u64 pos, wptr_old, rptr = *ring->rptr_cpu_addr & ring->ptr_mask; + int rec_cnt_dw = count >> 2; + u32 chunk, ent_sz; + u8 *s = (u8 *)src; + + if (count >= ring->ring_size - 4) { + dev_err(ring->adev->dev, + "CPER data size(%d) is larger than ring size(%d)\n", + count, ring->ring_size - 4); + + return; + } + + wptr_old = ring->wptr; + + mutex_lock(&ring->adev->cper.ring_lock); + while (count) { + ent_sz = amdgpu_cper_ring_get_ent_sz(ring, ring->wptr); + chunk = umin(ent_sz, count); + + memcpy(&ring->ring[ring->wptr], s, chunk); + + ring->wptr += (chunk >> 2); + ring->wptr &= ring->ptr_mask; + count -= chunk; + s += chunk; + } + + if (ring->count_dw < rec_cnt_dw) + ring->count_dw = 0; + + /* the buffer is overflow, adjust rptr */ + if (((wptr_old < rptr) && (rptr <= ring->wptr)) || + ((ring->wptr < wptr_old) && (wptr_old < rptr)) || + ((rptr <= ring->wptr) && (ring->wptr < wptr_old))) { + pos = (ring->wptr + 1) & ring->ptr_mask; + + do { + ent_sz = amdgpu_cper_ring_get_ent_sz(ring, pos); + + rptr += (ent_sz >> 2); + rptr &= ring->ptr_mask; + *ring->rptr_cpu_addr = rptr; + + pos = rptr; + } while (!amdgpu_cper_is_hdr(ring, rptr)); + } + + if (ring->count_dw >= rec_cnt_dw) + ring->count_dw -= rec_cnt_dw; + mutex_unlock(&ring->adev->cper.ring_lock); +} + +static u64 amdgpu_cper_ring_get_rptr(struct amdgpu_ring *ring) +{ + return *(ring->rptr_cpu_addr); +} + +static u64 amdgpu_cper_ring_get_wptr(struct amdgpu_ring *ring) +{ + return ring->wptr; +} + +static const struct amdgpu_ring_funcs cper_ring_funcs = { + .type = AMDGPU_RING_TYPE_CPER, + .align_mask = 0xff, + .support_64bit_ptrs = false, + .get_rptr = amdgpu_cper_ring_get_rptr, + .get_wptr = amdgpu_cper_ring_get_wptr, +}; + +static int amdgpu_cper_ring_init(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &(adev->cper.ring_buf); + + mutex_init(&adev->cper.ring_lock); + + ring->adev = NULL; + ring->ring_obj = NULL; + ring->use_doorbell = false; + ring->no_scheduler = true; + ring->funcs = &cper_ring_funcs; + + sprintf(ring->name, "cper"); + return amdgpu_ring_init(adev, ring, CPER_MAX_RING_SIZE, NULL, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); +} + +int amdgpu_cper_init(struct amdgpu_device *adev) +{ + int r; + + if (!amdgpu_aca_is_enabled(adev)) + return 0; + + r = amdgpu_cper_ring_init(adev); + if (r) { + dev_err(adev->dev, "failed to initialize cper ring, r = %d\n", r); + return r; + } + + mutex_init(&adev->cper.cper_lock); + + adev->cper.enabled = true; + adev->cper.max_count = CPER_MAX_ALLOWED_COUNT; + + return 0; +} + +int amdgpu_cper_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_aca_is_enabled(adev)) + return 0; + + adev->cper.enabled = false; + + amdgpu_ring_fini(&(adev->cper.ring_buf)); + adev->cper.count = 0; + adev->cper.wptr = 0; + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h new file mode 100644 index 000000000000..bcb97d245673 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cper.h @@ -0,0 +1,105 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_CPER_H__ +#define __AMDGPU_CPER_H__ + +#include "amd_cper.h" +#include "amdgpu_aca.h" + +#define CPER_MAX_ALLOWED_COUNT 0x1000 +#define CPER_MAX_RING_SIZE 0X100000 +#define HDR_LEN (sizeof(struct cper_hdr)) +#define SEC_DESC_LEN (sizeof(struct cper_sec_desc)) + +#define BOOT_SEC_LEN (sizeof(struct cper_sec_crashdump_boot)) +#define FATAL_SEC_LEN (sizeof(struct cper_sec_crashdump_fatal)) +#define NONSTD_SEC_LEN (sizeof(struct cper_sec_nonstd_err)) + +#define SEC_DESC_OFFSET(idx) (HDR_LEN + (SEC_DESC_LEN * idx)) + +#define BOOT_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (BOOT_SEC_LEN * idx)) +#define FATAL_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (FATAL_SEC_LEN * idx)) +#define NONSTD_SEC_OFFSET(count, idx) (HDR_LEN + (SEC_DESC_LEN * count) + (NONSTD_SEC_LEN * idx)) + +enum amdgpu_cper_type { + AMDGPU_CPER_TYPE_RUNTIME, + AMDGPU_CPER_TYPE_FATAL, + AMDGPU_CPER_TYPE_BOOT, + AMDGPU_CPER_TYPE_BP_THRESHOLD, +}; + +struct amdgpu_cper { + bool enabled; + + atomic_t unique_id; + struct mutex cper_lock; + + /* Lifetime CPERs generated */ + uint32_t count; + uint32_t max_count; + + uint32_t wptr; + + void *ring[CPER_MAX_ALLOWED_COUNT]; + struct amdgpu_ring ring_buf; + struct mutex ring_lock; +}; + +void amdgpu_cper_entry_fill_hdr(struct amdgpu_device *adev, + struct cper_hdr *hdr, + enum amdgpu_cper_type type, + enum cper_error_severity sev); +int amdgpu_cper_entry_fill_fatal_section(struct amdgpu_device *adev, + struct cper_hdr *hdr, + uint32_t idx, + struct cper_sec_crashdump_reg_data reg_data); +int amdgpu_cper_entry_fill_runtime_section(struct amdgpu_device *adev, + struct cper_hdr *hdr, + uint32_t idx, + enum cper_error_severity sev, + uint32_t *reg_dump, + uint32_t reg_count); +int amdgpu_cper_entry_fill_bad_page_threshold_section(struct amdgpu_device *adev, + struct cper_hdr *hdr, + uint32_t section_idx); + +struct cper_hdr *amdgpu_cper_alloc_entry(struct amdgpu_device *adev, + enum amdgpu_cper_type type, + uint16_t section_count); +/* UE must be encoded into separated cper entries, 1 UE 1 cper */ +int amdgpu_cper_generate_ue_record(struct amdgpu_device *adev, + struct aca_bank *bank); +/* CEs and DEs are combined into 1 cper entry */ +int amdgpu_cper_generate_ce_records(struct amdgpu_device *adev, + struct aca_banks *banks, + uint16_t bank_count); +/* Bad page threshold is encoded into separated cper entry */ +int amdgpu_cper_generate_bp_threshold_record(struct amdgpu_device *adev); +void amdgpu_cper_ring_write(struct amdgpu_ring *ring, + void *src, int count); +int amdgpu_cper_init(struct amdgpu_device *adev); +int amdgpu_cper_fini(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5df21529b3b1..82df06a72ee0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -428,7 +428,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p, dma_fence_put(old); } - r = amdgpu_sync_fence(&p->sync, fence); + r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL); dma_fence_put(fence); if (r) return r; @@ -450,7 +450,7 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, return r; } - r = amdgpu_sync_fence(&p->sync, fence); + r = amdgpu_sync_fence(&p->sync, fence, GFP_KERNEL); dma_fence_put(fence); return r; } @@ -1105,13 +1105,13 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) * We can't use gang submit on with reserved VMIDs when the VM changes * can't be invalidated by more than one engine at the same time. */ - if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) { + if (p->gang_size > 1 && !adev->vm_manager.concurrent_flush) { for (i = 0; i < p->gang_size; ++i) { struct drm_sched_entity *entity = p->entities[i]; struct drm_gpu_scheduler *sched = entity->rq->sched; struct amdgpu_ring *ring = to_amdgpu_ring(sched); - if (amdgpu_vmid_uses_reserved(adev, vm, ring->vm_hub)) + if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) return -EINVAL; } } @@ -1124,7 +1124,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update, + GFP_KERNEL); if (r) return r; @@ -1135,7 +1136,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, + GFP_KERNEL); if (r) return r; } @@ -1154,7 +1156,8 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update); + r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update, + GFP_KERNEL); if (r) return r; } @@ -1167,7 +1170,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (r) return r; - r = amdgpu_sync_fence(&p->sync, vm->last_update); + r = amdgpu_sync_fence(&p->sync, vm->last_update, GFP_KERNEL); if (r) return r; @@ -1189,7 +1192,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) if (!bo) continue; - amdgpu_vm_bo_invalidate(adev, bo, false); + amdgpu_vm_bo_invalidate(bo, false); } } @@ -1248,7 +1251,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) continue; } - r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence, + GFP_KERNEL); dma_fence_put(fence); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index a68338cb7b4a..a1450f13d963 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1990,7 +1990,7 @@ static int amdgpu_debugfs_sclk_set(void *data, u64 val) uint32_t max_freq, min_freq; struct amdgpu_device *adev = (struct amdgpu_device *)data; - if (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) + if (amdgpu_sriov_multi_vf_mode(adev)) return -EINVAL; ret = pm_runtime_get_sync(adev_to_drm(adev)->dev); @@ -2095,6 +2095,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) if (amdgpu_umsch_mm & amdgpu_umsch_mm_fwlog) amdgpu_debugfs_umsch_fwlog_init(adev, &adev->umsch_mm); + amdgpu_debugfs_vcn_sched_mask_init(adev); amdgpu_debugfs_jpeg_sched_mask_init(adev); amdgpu_debugfs_gfx_sched_mask_init(adev); amdgpu_debugfs_compute_sched_mask_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c index 824f9da5b6ce..7b50741dc097 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dev_coredump.c @@ -364,5 +364,9 @@ void amdgpu_coredump(struct amdgpu_device *adev, bool skip_vram_check, dev_coredumpm(dev->dev, THIS_MODULE, coredump, 0, GFP_NOWAIT, amdgpu_devcoredump_read, amdgpu_devcoredump_free); + + drm_info(dev, "AMDGPU device coredump file has been created\n"); + drm_info(dev, "Check your /sys/class/drm/card%d/device/devcoredump/data\n", + dev->primary->index); } #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index cd4fac120834..a30111d2c3ea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -102,6 +102,9 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2) #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2) +#define AMDGPU_VBIOS_SKIP (1U << 0) +#define AMDGPU_VBIOS_OPTIONAL (1U << 1) + static const struct drm_driver amdgpu_kms_driver; const char *amdgpu_asic_name[] = { @@ -199,14 +202,16 @@ void amdgpu_set_init_level(struct amdgpu_device *adev, } static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); +static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, + void *data); /** * DOC: pcie_replay_count * * The amdgpu driver provides a sysfs API for reporting the total number - * of PCIe replays (NAKs) + * of PCIe replays (NAKs). * The file pcie_replay_count is used for this and returns the total - * number of replays as a sum of the NAKs generated and NAKs received + * number of replays as a sum of the NAKs generated and NAKs received. */ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, @@ -222,8 +227,26 @@ static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, static DEVICE_ATTR(pcie_replay_count, 0444, amdgpu_device_get_pcie_replay_count, NULL); +static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev) +{ + int ret = 0; + + if (!amdgpu_sriov_vf(adev)) + ret = sysfs_create_file(&adev->dev->kobj, + &dev_attr_pcie_replay_count.attr); + + return ret; +} + +static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev) +{ + if (!amdgpu_sriov_vf(adev)) + sysfs_remove_file(&adev->dev->kobj, + &dev_attr_pcie_replay_count.attr); +} + static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, - struct bin_attribute *attr, char *buf, + const struct bin_attribute *attr, char *buf, loff_t ppos, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -259,8 +282,8 @@ static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, return bytes_read; } -BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, - AMDGPU_SYS_REG_STATE_END); +static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, + AMDGPU_SYS_REG_STATE_END); int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev) { @@ -432,8 +455,8 @@ bool amdgpu_device_supports_boco(struct drm_device *dev) * @dev: drm_device pointer * * Return: - * 1 if the device supporte BACO; - * 3 if the device support MACO (only works if BACO is supported) + * 1 if the device supports BACO; + * 3 if the device supports MACO (only works if BACO is supported) * otherwise return 0. */ int amdgpu_device_supports_baco(struct drm_device *dev) @@ -580,7 +603,7 @@ void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, } /** - * amdgpu_device_aper_access - access vram by vram aperature + * amdgpu_device_aper_access - access vram by vram aperture * * @adev: amdgpu_device pointer * @pos: offset of the buffer in vram @@ -671,7 +694,7 @@ bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) * here is that the GPU reset is not running on another thread in parallel. * * For this we trylock the read side of the reset semaphore, if that succeeds - * we know that the reset is not running in paralell. + * we know that the reset is not running in parallel. * * If the trylock fails we assert that we are either already holding the read * side of the lock or are the reset thread itself and hold the write side of @@ -1387,6 +1410,17 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, BUG(); } +static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev) +{ + if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) + return AMDGPU_VBIOS_SKIP; + + if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev)) + return AMDGPU_VBIOS_OPTIONAL; + + return 0; +} + /** * amdgpu_device_asic_init - Wrapper for atom asic_init * @@ -1396,17 +1430,28 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, */ static int amdgpu_device_asic_init(struct amdgpu_device *adev) { + uint32_t flags; + bool optional; int ret; amdgpu_asic_pre_asic_init(adev); + flags = amdgpu_device_get_vbios_flags(adev); + optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP)); if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(11, 0, 0)) { amdgpu_psp_wait_for_bootloader(adev); + if (optional && !adev->bios) + return 0; + ret = amdgpu_atomfirmware_asic_init(adev, true); return ret; } else { + if (optional && !adev->bios) + return 0; + return amdgpu_atom_asic_init(adev->mode_info.atom_context); } @@ -1635,6 +1680,13 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) return 0; + /* resizing on Dell G5 SE platforms causes problems with runtime pm */ + if ((amdgpu_runtime_pm != 0) && + adev->pdev->vendor == PCI_VENDOR_ID_ATI && + adev->pdev->device == 0x731f && + adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL) + return 0; + /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ if (!pci_find_ext_capability(adev->pdev, PCI_EXT_CAP_ID_VNDR)) DRM_WARN("System can't access extended configuration space, please check!!\n"); @@ -1695,14 +1747,6 @@ int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) return 0; } -static bool amdgpu_device_read_bios(struct amdgpu_device *adev) -{ - if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) - return false; - - return true; -} - /* * GPU helpers function. */ @@ -1717,12 +1761,15 @@ static bool amdgpu_device_read_bios(struct amdgpu_device *adev) */ bool amdgpu_device_need_post(struct amdgpu_device *adev) { - uint32_t reg; + uint32_t reg, flags; if (amdgpu_sriov_vf(adev)) return false; - if (!amdgpu_device_read_bios(adev)) + flags = amdgpu_device_get_vbios_flags(adev); + if (flags & AMDGPU_VBIOS_SKIP) + return false; + if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios) return false; if (amdgpu_passthrough(adev)) { @@ -1736,7 +1783,7 @@ bool amdgpu_device_need_post(struct amdgpu_device *adev) uint32_t fw_ver; err = request_firmware(&adev->pm.fw, "amdgpu/fiji_smc.bin", adev->dev); - /* force vPost if error occured */ + /* force vPost if error occurred */ if (err) return true; @@ -2168,7 +2215,7 @@ int amdgpu_device_ip_set_clockgating_state(void *dev, if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) continue; r = adev->ip_blocks[i].version->funcs->set_clockgating_state( - (void *)adev, state); + &adev->ip_blocks[i], state); if (r) DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2202,7 +2249,7 @@ int amdgpu_device_ip_set_powergating_state(void *dev, if (!adev->ip_blocks[i].version->funcs->set_powergating_state) continue; r = adev->ip_blocks[i].version->funcs->set_powergating_state( - (void *)adev, state); + &adev->ip_blocks[i], state); if (r) DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -2230,7 +2277,8 @@ void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, if (!adev->ip_blocks[i].status.valid) continue; if (adev->ip_blocks[i].version->funcs->get_clockgating_state) - adev->ip_blocks[i].version->funcs->get_clockgating_state((void *)adev, flags); + adev->ip_blocks[i].version->funcs->get_clockgating_state( + &adev->ip_blocks[i], flags); } } @@ -2362,8 +2410,8 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, break; } - DRM_INFO("add ip block number %d <%s>\n", adev->num_ip_blocks, - ip_block_version->funcs->name); + dev_info(adev->dev, "detected ip block number %d <%s>\n", + adev->num_ip_blocks, ip_block_version->funcs->name); adev->ip_blocks[adev->num_ip_blocks].adev = adev; @@ -2381,7 +2429,7 @@ int amdgpu_device_ip_block_add(struct amdgpu_device *adev, * the module parameter virtual_display. This feature provides a virtual * display hardware on headless boards or in virtualized environments. * This function parses and validates the configuration string specified by - * the user and configues the virtual display configuration (number of + * the user and configures the virtual display configuration (number of * virtual connectors, crtcs, etc.) specified. */ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) @@ -2444,7 +2492,7 @@ void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) * @adev: amdgpu_device pointer * * Parses the asic configuration parameters specified in the gpu info - * firmware and makes them availale to the driver for use in configuring + * firmware and makes them available to the driver for use in configuring * the asic. * Returns 0 on success, -EINVAL on failure. */ @@ -2485,6 +2533,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->firmware.gpu_info_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_gpu_info.bin", chip_name); if (err) { dev_err(adev->dev, @@ -2504,7 +2553,7 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) le32_to_cpu(hdr->header.ucode_array_offset_bytes)); /* - * Should be droped when DAL no longer needs it. + * Should be dropped when DAL no longer needs it. */ if (adev->asic_type == CHIP_NAVI12) goto parse_soc_bounding_box; @@ -2574,8 +2623,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) { struct amdgpu_ip_block *ip_block; struct pci_dev *parent; + bool total, skip_bios; + uint32_t bios_flags; int i, r; - bool total; amdgpu_device_enable_virtual_display(adev); @@ -2688,16 +2738,31 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (r) return r; + bios_flags = amdgpu_device_get_vbios_flags(adev); + skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP); /* Read BIOS */ - if (amdgpu_device_read_bios(adev)) { - if (!amdgpu_get_bios(adev)) + if (!skip_bios) { + bool optional = + !!(bios_flags & AMDGPU_VBIOS_OPTIONAL); + if (!amdgpu_get_bios(adev) && !optional) return -EINVAL; - r = amdgpu_atombios_init(adev); - if (r) { - dev_err(adev->dev, "amdgpu_atombios_init failed\n"); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0); - return r; + if (optional && !adev->bios) + dev_info( + adev->dev, + "VBIOS image optional, proceeding without VBIOS image"); + + if (adev->bios) { + r = amdgpu_atombios_init(adev); + if (r) { + dev_err(adev->dev, + "amdgpu_atombios_init failed\n"); + amdgpu_vf_error_put( + adev, + AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, + 0, 0); + return r; + } } } @@ -2710,6 +2775,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!total) return -ENODEV; + if (adev->gmc.xgmi.supported) + amdgpu_xgmi_early_init(adev); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); if (ip_block->status.valid != false) amdgpu_amdkfd_device_probe(adev); @@ -2819,6 +2887,12 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) { + struct drm_sched_init_args args = { + .ops = &amdgpu_sched_ops, + .num_rqs = DRM_SCHED_PRIORITY_COUNT, + .timeout_wq = adev->reset_domain->wq, + .dev = adev->dev, + }; long timeout; int r, i; @@ -2844,12 +2918,12 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) break; } - r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, NULL, - DRM_SCHED_PRIORITY_COUNT, - ring->num_hw_submission, 0, - timeout, adev->reset_domain->wq, - ring->sched_score, ring->name, - adev->dev); + args.timeout = timeout; + args.credit_limit = ring->num_hw_submission; + args.score = ring->sched_score; + args.name = ring->name; + + r = drm_sched_init(&ring->sched, &args); if (r) { DRM_ERROR("Failed to create scheduler on ring %s.\n", ring->name); @@ -3052,6 +3126,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) amdgpu_fru_get_product_info(adev); + if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev)) + r = amdgpu_cper_init(adev); + init_failed: return r; @@ -3064,7 +3141,7 @@ init_failed: * * Writes a reset magic value to the gart pointer in VRAM. The driver calls * this function before a GPU reset. If the value is retained after a - * GPU reset, VRAM has not been lost. Some GPU resets may destry VRAM contents. + * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents. */ static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) { @@ -3140,7 +3217,7 @@ int amdgpu_device_set_cg_state(struct amdgpu_device *adev, adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* enable clockgating to save power */ - r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, + r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i], state); if (r) { DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", @@ -3177,7 +3254,7 @@ int amdgpu_device_set_pg_state(struct amdgpu_device *adev, adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && adev->ip_blocks[i].version->funcs->set_powergating_state) { /* enable powergating to save power */ - r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, + r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i], state); if (r) { DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", @@ -3379,7 +3456,7 @@ static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) amdgpu_amdkfd_suspend(adev, false); - /* Workaroud for ASICs need to disable SMC first */ + /* Workaround for ASICs need to disable SMC first */ amdgpu_device_smu_fini_early(adev); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { @@ -3412,6 +3489,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + amdgpu_cper_fini(adev); + if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) amdgpu_virt_release_ras_err_handler_data(adev); @@ -3481,7 +3560,7 @@ static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) WARN_ON_ONCE(adev->gfx.gfx_off_state); WARN_ON_ONCE(adev->gfx.gfx_off_req_count); - if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) + if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true, 0)) adev->gfx.gfx_off_state = true; } @@ -4111,11 +4190,6 @@ static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev) } #endif -static const struct attribute *amdgpu_dev_attributes[] = { - &dev_attr_pcie_replay_count.attr, - NULL -}; - static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) { if (amdgpu_mcbp == 1) @@ -4212,7 +4286,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); - mutex_init(&adev->virt.rlcg_reg_lock); hash_init(adev->mn_hash); mutex_init(&adev->psp.mutex); mutex_init(&adev->notifier_lock); @@ -4221,7 +4294,14 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->gfx.reset_sem_mutex); /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ mutex_init(&adev->enforce_isolation_mutex); + for (i = 0; i < MAX_XCP; ++i) { + adev->isolation[i].spearhead = dma_fence_get_stub(); + amdgpu_sync_create(&adev->isolation[i].active); + amdgpu_sync_create(&adev->isolation[i].prev); + } mutex_init(&adev->gfx.kfd_sch_mutex); + mutex_init(&adev->gfx.workload_profile_mutex); + mutex_init(&adev->vcn.workload_profile_mutex); amdgpu_device_init_apu_flags(adev); @@ -4238,6 +4318,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->se_cac_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock); spin_lock_init(&adev->mm_stats.lock); + spin_lock_init(&adev->virt.rlcg_reg_lock); spin_lock_init(&adev->wb.lock); INIT_LIST_HEAD(&adev->reset_list); @@ -4282,10 +4363,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, * for throttling interrupt) = 60 seconds. */ ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1); - ratelimit_state_init(&adev->virt.ras_telemetry_rs, 5 * HZ, 1); ratelimit_set_flags(&adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE); - ratelimit_set_flags(&adev->virt.ras_telemetry_rs, RATELIMIT_MSG_ON_RELEASE); /* Registers mapping */ /* TODO: block userspace mapping of io register */ @@ -4309,7 +4388,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, /* * Reset domain needs to be present early, before XGMI hive discovered - * (if any) and intitialized to use reset sem and in_gpu reset flag + * (if any) and initialized to use reset sem and in_gpu reset flag * early on during init and before calling to RREG32. */ adev->reset_domain = amdgpu_reset_create_reset_domain(SINGLE_DEVICE, "amdgpu-reset-dev"); @@ -4317,7 +4396,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, return -ENOMEM; /* detect hw virtualization here */ - amdgpu_detect_virtualization(adev); + amdgpu_virt_init(adev); amdgpu_device_get_pcie_info(adev); @@ -4340,10 +4419,17 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; - /* Get rid of things like offb */ - r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name); - if (r) - return r; + /* + * No need to remove conflicting FBs for non-display class devices. + * This prevents the sysfb from being freed accidently. + */ + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || + (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { + /* Get rid of things like offb */ + r = aperture_remove_conflicting_pci_devices(adev->pdev, amdgpu_kms_driver.name); + if (r) + return r; + } /* Enable TMZ based on IP_VERSION */ amdgpu_gmc_tmz_set(adev); @@ -4461,8 +4547,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, goto failed; } /* init i2c buses */ - if (!amdgpu_device_has_dc_support(adev)) - amdgpu_atombios_i2c_init(adev); + amdgpu_i2c_init(adev); } } @@ -4555,7 +4640,7 @@ fence_driver_init: } else adev->ucode_sysfs_en = true; - r = sysfs_create_files(&adev->dev->kobj, amdgpu_dev_attributes); + r = amdgpu_device_attr_sysfs_init(adev); if (r) dev_err(adev->dev, "Could not create amdgpu device attr\n"); @@ -4599,6 +4684,11 @@ fence_driver_init: amdgpu_device_check_iommu_direct_map(adev); + adev->pm_nb.notifier_call = amdgpu_device_pm_notifier; + r = register_pm_notifier(&adev->pm_nb); + if (r) + goto failed; + return 0; release_ras_con: @@ -4663,6 +4753,8 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) drain_workqueue(adev->mman.bdev.wq); adev->shutdown = true; + unregister_pm_notifier(&adev->pm_nb); + /* make sure IB test finished before entering exclusive mode * to avoid preemption on IB test */ @@ -4685,7 +4777,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) amdgpu_pm_sysfs_fini(adev); if (adev->ucode_sysfs_en) amdgpu_ucode_sysfs_fini(adev); - sysfs_remove_files(&adev->dev->kobj, amdgpu_dev_attributes); + amdgpu_device_attr_sysfs_fini(adev); amdgpu_fru_sysfs_fini(adev); amdgpu_reg_state_sysfs_fini(adev); @@ -4712,7 +4804,7 @@ void amdgpu_device_fini_hw(struct amdgpu_device *adev) void amdgpu_device_fini_sw(struct amdgpu_device *adev) { - int idx; + int i, idx; bool px; amdgpu_device_ip_fini(adev); @@ -4720,22 +4812,29 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev) amdgpu_ucode_release(&adev->firmware.gpu_info_fw); adev->accel_working = false; dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); + for (i = 0; i < MAX_XCP; ++i) { + dma_fence_put(adev->isolation[i].spearhead); + amdgpu_sync_free(&adev->isolation[i].active); + amdgpu_sync_free(&adev->isolation[i].prev); + } amdgpu_reset_fini(adev); /* free i2c buses */ - if (!amdgpu_device_has_dc_support(adev)) - amdgpu_i2c_fini(adev); - - if (amdgpu_emu_mode != 1) - amdgpu_atombios_fini(adev); + amdgpu_i2c_fini(adev); - kfree(adev->bios); - adev->bios = NULL; + if (adev->bios) { + if (amdgpu_emu_mode != 1) + amdgpu_atombios_fini(adev); + amdgpu_bios_release(adev); + } kfree(adev->fru_info); adev->fru_info = NULL; + kfree(adev->xcp_mgr); + adev->xcp_mgr = NULL; + px = amdgpu_device_supports_px(adev_to_drm(adev)); if (px || (!dev_is_removable(&adev->pdev->dev) && @@ -4781,8 +4880,8 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) { int ret; - /* No need to evict vram on APUs for suspend to ram or s2idle */ - if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU)) + /* No need to evict vram on APUs unless going to S4 */ + if (!adev->in_s4 && (adev->flags & AMD_IS_APU)) return 0; ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); @@ -4795,6 +4894,41 @@ static int amdgpu_device_evict_resources(struct amdgpu_device *adev) * Suspend & resume. */ /** + * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events + * @nb: notifier block + * @mode: suspend mode + * @data: data + * + * This function is called when the system is about to suspend or hibernate. + * It is used to evict resources from the device before the system goes to + * sleep while there is still access to swap. + */ +static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, + void *data) +{ + struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); + int r; + + switch (mode) { + case PM_HIBERNATION_PREPARE: + adev->in_s4 = true; + fallthrough; + case PM_SUSPEND_PREPARE: + r = amdgpu_device_evict_resources(adev); + /* + * This is considered non-fatal at this time because + * amdgpu_device_prepare() will also fatally evict resources. + * See https://gitlab.freedesktop.org/drm/amd/-/issues/3781 + */ + if (r) + drm_warn(adev_to_drm(adev), "Failed to evict resources, freeze active processes if problems occur: %d\n", r); + break; + } + + return NOTIFY_DONE; +} + +/** * amdgpu_device_prepare - prepare for device suspend * * @dev: drm dev pointer @@ -4833,7 +4967,7 @@ int amdgpu_device_prepare(struct drm_device *dev) return 0; unprepare: - adev->in_s0ix = adev->in_s3 = false; + adev->in_s0ix = adev->in_s3 = adev->in_s4 = false; return r; } @@ -5184,7 +5318,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (r) return r; - amdgpu_ras_set_fed(adev, false); + amdgpu_ras_clear_err_state(adev); amdgpu_irq_gpu_reset_resume_helper(adev); /* some sw clean up VF needs to do before recover */ @@ -5232,6 +5366,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) amdgpu_ras_resume(adev); @@ -5241,16 +5376,18 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, } /** - * amdgpu_device_has_job_running - check if there is any job in mirror list + * amdgpu_device_has_job_running - check if there is any unfinished job * * @adev: amdgpu_device pointer * - * check if there is any job in mirror list + * check if there is any job running on the device when guest driver receives + * FLR notification from host driver. If there are still jobs running, then + * the guest driver will not respond the FLR reset. Instead, let the job hit + * the timeout and guest driver then issue the reset request. */ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) { int i; - struct drm_sched_job *job; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -5258,11 +5395,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev) if (!amdgpu_ring_sched_ready(ring)) continue; - spin_lock(&ring->sched.job_list_lock); - job = list_first_entry_or_null(&ring->sched.pending_list, - struct drm_sched_job, list); - spin_unlock(&ring->sched.job_list_lock); - if (job) + if (amdgpu_fence_count_emitted(ring)) return true; } return false; @@ -5324,7 +5457,8 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) u32 i; int ret = 0; - amdgpu_atombios_scratch_regs_engine_hung(adev, true); + if (adev->bios) + amdgpu_atombios_scratch_regs_engine_hung(adev, true); dev_info(adev->dev, "GPU mode1 reset\n"); @@ -5366,7 +5500,8 @@ int amdgpu_device_mode1_reset(struct amdgpu_device *adev) goto mode1_reset_failed; } - amdgpu_atombios_scratch_regs_engine_hung(adev, false); + if (adev->bios) + amdgpu_atombios_scratch_regs_engine_hung(adev, false); return 0; @@ -5487,7 +5622,7 @@ int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) amdgpu_set_init_level(tmp_adev, init_level); if (full_reset) { /* post card */ - amdgpu_ras_set_fed(tmp_adev, false); + amdgpu_ras_clear_err_state(tmp_adev); r = amdgpu_device_asic_init(tmp_adev); if (r) { dev_warn(tmp_adev->dev, "asic atom init failed!"); @@ -5821,6 +5956,18 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, int retry_limit = AMDGPU_MAX_RETRY_LIMIT; /* + * If it reaches here because of hang/timeout and a RAS error is + * detected at the same time, let RAS recovery take care of it. + */ + if (amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY) && + !amdgpu_sriov_vf(adev) && + reset_context->src != AMDGPU_RESET_SRC_RAS) { + dev_dbg(adev->dev, + "Gpu recovery from source: %d yielding to RAS error recovery handling", + reset_context->src); + return 0; + } + /* * Special case: RAS triggered and full reset isn't supported */ need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); @@ -5903,7 +6050,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_amdkfd_pre_reset(tmp_adev, reset_context); /* - * Mark these ASICs to be reseted as untracked first + * Mark these ASICs to be reset as untracked first * And add them back after reset completed */ amdgpu_unregister_gpu_instance(tmp_adev); @@ -6060,6 +6207,10 @@ end_reset: dev_info(adev->dev, "GPU reset end with ret = %d\n", r); atomic_set(&adev->reset_domain->reset_res, r); + + if (!r) + drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE); + return r; } @@ -6102,19 +6253,56 @@ static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, } /** + * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU + * + * @adev: amdgpu_device pointer + * @speed: pointer to the speed of the link + * @width: pointer to the width of the link + * + * Evaluate the hierarchy to find the speed and bandwidth capabilities of the + * AMD dGPU which may be a virtual upstream bridge. + */ +static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev, + enum pci_bus_speed *speed, + enum pcie_link_width *width) +{ + struct pci_dev *parent = adev->pdev; + + if (!speed || !width) + return; + + parent = pci_upstream_bridge(parent); + if (parent && parent->vendor == PCI_VENDOR_ID_ATI) { + /* use the upstream/downstream switches internal to dGPU */ + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + while ((parent = pci_upstream_bridge(parent))) { + if (parent->vendor == PCI_VENDOR_ID_ATI) { + /* use the upstream/downstream switches internal to dGPU */ + *speed = pcie_get_speed_cap(parent); + *width = pcie_get_width_cap(parent); + } + } + } else { + /* use the device itself */ + *speed = pcie_get_speed_cap(adev->pdev); + *width = pcie_get_width_cap(adev->pdev); + } +} + +/** * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot * * @adev: amdgpu_device pointer * - * Fetchs and stores in the driver the PCIE capabilities (gen speed + * Fetches and stores in the driver the PCIE capabilities (gen speed * and lanes) of the slot the device is in. Handles APUs and * virtualized environments where PCIE config space may not be available. */ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) { - struct pci_dev *pdev; enum pci_bus_speed speed_cap, platform_speed_cap; - enum pcie_link_width platform_link_width; + enum pcie_link_width platform_link_width, link_width; if (amdgpu_pcie_gen_cap) adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; @@ -6136,11 +6324,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) amdgpu_device_partner_bandwidth(adev, &platform_speed_cap, &platform_link_width); + amdgpu_device_gpu_bandwidth(adev, &speed_cap, &link_width); if (adev->pm.pcie_gen_mask == 0) { /* asic caps */ - pdev = adev->pdev; - speed_cap = pcie_get_speed_cap(pdev); if (speed_cap == PCI_SPEED_UNKNOWN) { adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | @@ -6196,51 +6383,103 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) } } if (adev->pm.pcie_mlw_mask == 0) { + /* asic caps */ + if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { + adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK; + } else { + switch (link_width) { + case PCIE_LNK_X32: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X16: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X12: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X8: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X4: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X2: + adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); + break; + case PCIE_LNK_X1: + adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1; + break; + default: + break; + } + } + /* platform caps */ if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; } else { switch (platform_link_width) { case PCIE_LNK_X32: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X16: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X12: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X8: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X4: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X2: - adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | - CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); + adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | + CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); break; case PCIE_LNK_X1: - adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; + adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; break; default: break; @@ -6700,22 +6939,117 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, { struct dma_fence *old = NULL; + dma_fence_get(gang); do { dma_fence_put(old); old = amdgpu_device_get_gang(adev); if (old == gang) break; - if (!dma_fence_is_signaled(old)) + if (!dma_fence_is_signaled(old)) { + dma_fence_put(gang); return old; + } } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, old, gang) != old); + /* + * Drop it once for the exchanged reference in adev and once for the + * thread local reference acquired in amdgpu_device_get_gang(). + */ + dma_fence_put(old); dma_fence_put(old); return NULL; } +/** + * amdgpu_device_enforce_isolation - enforce HW isolation + * @adev: the amdgpu device pointer + * @ring: the HW ring the job is supposed to run on + * @job: the job which is about to be pushed to the HW ring + * + * Makes sure that only one client at a time can use the GFX block. + * Returns: The dependency to wait on before the job can be pushed to the HW. + * The function is called multiple times until NULL is returned. + */ +struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_job *job) +{ + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; + struct drm_sched_fence *f = job->base.s_fence; + struct dma_fence *dep; + void *owner; + int r; + + /* + * For now enforce isolation only for the GFX block since we only need + * the cleaner shader on those rings. + */ + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX && + ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) + return NULL; + + /* + * All submissions where enforce isolation is false are handled as if + * they come from a single client. Use ~0l as the owner to distinct it + * from kernel submissions where the owner is NULL. + */ + owner = job->enforce_isolation ? f->owner : (void *)~0l; + + mutex_lock(&adev->enforce_isolation_mutex); + + /* + * The "spearhead" submission is the first one which changes the + * ownership to its client. We always need to wait for it to be + * pushed to the HW before proceeding with anything. + */ + if (&f->scheduled != isolation->spearhead && + !dma_fence_is_signaled(isolation->spearhead)) { + dep = isolation->spearhead; + goto out_grab_ref; + } + + if (isolation->owner != owner) { + + /* + * Wait for any gang to be assembled before switching to a + * different owner or otherwise we could deadlock the + * submissions. + */ + if (!job->gang_submit) { + dep = amdgpu_device_get_gang(adev); + if (!dma_fence_is_signaled(dep)) + goto out_return_dep; + dma_fence_put(dep); + } + + dma_fence_put(isolation->spearhead); + isolation->spearhead = dma_fence_get(&f->scheduled); + amdgpu_sync_move(&isolation->active, &isolation->prev); + trace_amdgpu_isolation(isolation->owner, owner); + isolation->owner = owner; + } + + /* + * Specifying the ring here helps to pipeline submissions even when + * isolation is enabled. If that is not desired for testing NULL can be + * used instead of the ring to enforce a CPU round trip while switching + * between clients. + */ + dep = amdgpu_sync_peek_fence(&isolation->prev, ring); + r = amdgpu_sync_fence(&isolation->active, &f->finished, GFP_NOWAIT); + if (r) + DRM_WARN("OOM tracking isolation\n"); + +out_grab_ref: + dma_fence_get(dep); +out_return_dep: + mutex_unlock(&adev->enforce_isolation_mutex); + return dep; +} + bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) { switch (adev->asic_type) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 1040204ac8b9..dc2713ec95a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -1,5 +1,5 @@ /* - * Copyright 2018 Advanced Micro Devices, Inc. + * Copyright 2018-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -104,15 +104,22 @@ #include "smuio_v13_0_6.h" #include "smuio_v14_0_2.h" #include "vcn_v5_0_0.h" +#include "vcn_v5_0_1.h" #include "jpeg_v5_0_0.h" +#include "jpeg_v5_0_1.h" #include "amdgpu_vpe.h" #if defined(CONFIG_DRM_AMD_ISP) #include "amdgpu_isp.h" #endif -#define FIRMWARE_IP_DISCOVERY "amdgpu/ip_discovery.bin" -MODULE_FIRMWARE(FIRMWARE_IP_DISCOVERY); +MODULE_FIRMWARE("amdgpu/ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega10_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega12_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/vega20_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/raven_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/raven2_ip_discovery.bin"); +MODULE_FIRMWARE("amdgpu/picasso_ip_discovery.bin"); #define mmIP_DISCOVERY_VERSION 0x16A00 #define mmRCC_CONFIG_MEMSIZE 0xde3 @@ -295,21 +302,13 @@ static int amdgpu_discovery_read_binary_from_mem(struct amdgpu_device *adev, return ret; } -static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, uint8_t *binary) +static int amdgpu_discovery_read_binary_from_file(struct amdgpu_device *adev, + uint8_t *binary, + const char *fw_name) { const struct firmware *fw; - const char *fw_name; int r; - switch (amdgpu_discovery) { - case 2: - fw_name = FIRMWARE_IP_DISCOVERY; - break; - default: - dev_warn(adev->dev, "amdgpu_discovery is not set properly\n"); - return -EINVAL; - } - r = request_firmware(&fw, fw_name, adev->dev); if (r) { dev_err(adev->dev, "can't load firmware \"%s\"\n", @@ -402,10 +401,39 @@ static int amdgpu_discovery_verify_npsinfo(struct amdgpu_device *adev, return 0; } +static const char *amdgpu_discovery_get_fw_name(struct amdgpu_device *adev) +{ + if (amdgpu_discovery == 2) + return "amdgpu/ip_discovery.bin"; + + switch (adev->asic_type) { + case CHIP_VEGA10: + return "amdgpu/vega10_ip_discovery.bin"; + case CHIP_VEGA12: + return "amdgpu/vega12_ip_discovery.bin"; + case CHIP_RAVEN: + if (adev->apu_flags & AMD_APU_IS_RAVEN2) + return "amdgpu/raven2_ip_discovery.bin"; + else if (adev->apu_flags & AMD_APU_IS_PICASSO) + return "amdgpu/picasso_ip_discovery.bin"; + else + return "amdgpu/raven_ip_discovery.bin"; + case CHIP_VEGA20: + return "amdgpu/vega20_ip_discovery.bin"; + case CHIP_ARCTURUS: + return "amdgpu/arcturus_ip_discovery.bin"; + case CHIP_ALDEBARAN: + return "amdgpu/aldebaran_ip_discovery.bin"; + default: + return NULL; + } +} + static int amdgpu_discovery_init(struct amdgpu_device *adev) { struct table_info *info; struct binary_header *bhdr; + const char *fw_name; uint16_t offset; uint16_t size; uint16_t checksum; @@ -417,9 +445,10 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev) return -ENOMEM; /* Read from file if it is the preferred option */ - if (amdgpu_discovery == 2) { + fw_name = amdgpu_discovery_get_fw_name(adev); + if (fw_name != NULL) { dev_info(adev->dev, "use ip discovery information from file"); - r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin); + r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin, fw_name); if (r) { dev_err(adev->dev, "failed to read ip discovery binary from file\n"); @@ -585,16 +614,19 @@ void amdgpu_discovery_fini(struct amdgpu_device *adev) adev->mman.discovery_bin = NULL; } -static int amdgpu_discovery_validate_ip(const struct ip_v4 *ip) +static int amdgpu_discovery_validate_ip(struct amdgpu_device *adev, + uint8_t instance, uint16_t hw_id) { - if (ip->instance_number >= HWIP_MAX_INSTANCE) { - DRM_ERROR("Unexpected instance_number (%d) from ip discovery blob\n", - ip->instance_number); + if (instance >= HWIP_MAX_INSTANCE) { + dev_err(adev->dev, + "Unexpected instance_number (%d) from ip discovery blob\n", + instance); return -EINVAL; } - if (le16_to_cpu(ip->hw_id) >= HW_ID_MAX) { - DRM_ERROR("Unexpected hw_id (%d) from ip discovery blob\n", - le16_to_cpu(ip->hw_id)); + if (hw_id >= HW_ID_MAX) { + dev_err(adev->dev, + "Unexpected hw_id (%d) from ip discovery blob\n", + hw_id); return -EINVAL; } @@ -607,8 +639,10 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, struct binary_header *bhdr; struct ip_discovery_header *ihdr; struct die_header *dhdr; - struct ip_v4 *ip; + struct ip *ip; uint16_t die_offset, ip_offset, num_dies, num_ips; + uint16_t hw_id; + uint8_t inst; int i, j; bhdr = (struct binary_header *)adev->mman.discovery_bin; @@ -624,16 +658,18 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, ip_offset = die_offset + sizeof(*dhdr); for (j = 0; j < num_ips; j++) { - ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); - - if (amdgpu_discovery_validate_ip(ip)) + ip = (struct ip *)(adev->mman.discovery_bin + + ip_offset); + inst = ip->number_instance; + hw_id = le16_to_cpu(ip->hw_id); + if (amdgpu_discovery_validate_ip(adev, inst, hw_id)) goto next_ip; - if (le16_to_cpu(ip->variant) == 1) { - switch (le16_to_cpu(ip->hw_id)) { + if (ip->harvest == 1) { + switch (hw_id) { case VCN_HWID: (*vcn_harvest_count)++; - if (ip->instance_number == 0) { + if (inst == 0) { adev->vcn.harvest_config |= AMDGPU_VCN_HARVEST_VCN0; adev->vcn.inst_mask &= ~AMDGPU_VCN_HARVEST_VCN0; @@ -655,10 +691,8 @@ static void amdgpu_discovery_read_harvest_bit_per_ip(struct amdgpu_device *adev, } } next_ip: - if (ihdr->base_addr_64_bit) - ip_offset += struct_size(ip, base_address_64, ip->num_base_address); - else - ip_offset += struct_size(ip, base_address, ip->num_base_address); + ip_offset += struct_size(ip, base_address, + ip->num_base_address); } } } @@ -1017,6 +1051,8 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, bool reg_base_64) { int ii, jj, kk, res; + uint16_t hw_id; + uint8_t inst; DRM_DEBUG("num_ips:%d", num_ips); @@ -1032,8 +1068,10 @@ static int amdgpu_discovery_sysfs_ips(struct amdgpu_device *adev, struct ip_hw_instance *ip_hw_instance; ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); - if (amdgpu_discovery_validate_ip(ip) || - le16_to_cpu(ip->hw_id) != ii) + inst = ip->instance_number; + hw_id = le16_to_cpu(ip->hw_id); + if (amdgpu_discovery_validate_ip(adev, inst, hw_id) || + hw_id != ii) goto next_ip; DRM_DEBUG("match:%d @ ip_offset:%zu", ii, ip_offset); @@ -1279,7 +1317,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) uint16_t die_offset; uint16_t ip_offset; uint16_t num_dies; + uint32_t wafl_ver; uint16_t num_ips; + uint16_t hw_id; + uint8_t inst; int hw_ip; int i, j, k; int r; @@ -1290,6 +1331,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) return r; } + wafl_ver = 0; adev->gfx.xcc_mask = 0; adev->sdma.sdma_mask = 0; adev->vcn.inst_mask = 0; @@ -1319,7 +1361,9 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) for (j = 0; j < num_ips; j++) { ip = (struct ip_v4 *)(adev->mman.discovery_bin + ip_offset); - if (amdgpu_discovery_validate_ip(ip)) + inst = ip->instance_number; + hw_id = le16_to_cpu(ip->hw_id); + if (amdgpu_discovery_validate_ip(adev, inst, hw_id)) goto next_ip; num_base_address = ip->num_base_address; @@ -1340,7 +1384,7 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) */ if (adev->vcn.num_vcn_inst < AMDGPU_MAX_VCN_INSTANCES) { - adev->vcn.vcn_config[adev->vcn.num_vcn_inst] = + adev->vcn.inst[adev->vcn.num_vcn_inst].vcn_config = ip->revision & 0xc0; adev->vcn.num_vcn_inst++; adev->vcn.inst_mask |= @@ -1388,6 +1432,10 @@ static int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev) adev->gfx.xcc_mask |= (1U << ip->instance_number); + if (!wafl_ver && le16_to_cpu(ip->hw_id) == WAFLC_HWID) + wafl_ver = IP_VERSION_FULL(ip->major, ip->minor, + ip->revision, 0, 0); + for (k = 0; k < num_base_address; k++) { /* * convert the endianness of base addresses in place, @@ -1453,22 +1501,32 @@ next_ip: } } + if (wafl_ver && !adev->ip_versions[XGMI_HWIP][0]) + adev->ip_versions[XGMI_HWIP][0] = wafl_ver; + return 0; } static void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev) { + struct ip_discovery_header *ihdr; + struct binary_header *bhdr; int vcn_harvest_count = 0; int umc_harvest_count = 0; + uint16_t offset, ihdr_ver; + bhdr = (struct binary_header *)adev->mman.discovery_bin; + offset = le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset); + ihdr = (struct ip_discovery_header *)(adev->mman.discovery_bin + + offset); + ihdr_ver = le16_to_cpu(ihdr->version); /* * Harvest table does not fit Navi1x and legacy GPUs, * so read harvest bit per IP data structure to set * harvest configuration. */ if (amdgpu_ip_version(adev, GC_HWIP, 0) < IP_VERSION(10, 2, 0) && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4)) { + ihdr_ver <= 2) { if ((adev->pdev->device == 0x731E && (adev->pdev->revision == 0xC6 || adev->pdev->revision == 0xC7)) || @@ -1705,7 +1763,7 @@ static int amdgpu_discovery_get_vcn_info(struct amdgpu_device *adev) * so this won't overflow. */ for (v = 0; v < adev->vcn.num_vcn_inst; v++) { - adev->vcn.vcn_codec_disable_mask[v] = + adev->vcn.inst[v].vcn_codec_disable_mask = le32_to_cpu(vcn_info->v1.instance_info[v].fuse_data.all_bits); } break; @@ -1836,6 +1894,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); break; case IP_VERSION(10, 1, 10): @@ -1861,6 +1920,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): amdgpu_device_ip_block_add(adev, &soc21_common_ip_block); break; case IP_VERSION(12, 0, 0): @@ -1890,6 +1950,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); break; case IP_VERSION(10, 1, 10): @@ -1915,6 +1976,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block); break; case IP_VERSION(12, 0, 0): @@ -1994,6 +2056,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 12): case IP_VERSION(11, 0, 13): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 2): amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); break; case IP_VERSION(11, 0, 8): @@ -2013,6 +2076,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 8): case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 11): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): case IP_VERSION(14, 0, 0): case IP_VERSION(14, 0, 1): @@ -2024,6 +2088,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(14, 0, 2): case IP_VERSION(14, 0, 3): + case IP_VERSION(14, 0, 5): amdgpu_device_ip_block_add(adev, &psp_v14_0_ip_block); break; default: @@ -2056,6 +2121,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 12): case IP_VERSION(11, 0, 13): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 2): amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block); break; case IP_VERSION(12, 0, 0): @@ -2074,6 +2140,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(13, 0, 10): case IP_VERSION(13, 0, 11): case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 12): amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block); break; case IP_VERSION(14, 0, 0): @@ -2081,6 +2148,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(14, 0, 2): case IP_VERSION(14, 0, 3): case IP_VERSION(14, 0, 4): + case IP_VERSION(14, 0, 5): amdgpu_device_ip_block_add(adev, &smu_v14_0_ip_block); break; default: @@ -2132,6 +2200,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(3, 2, 1): case IP_VERSION(3, 5, 0): case IP_VERSION(3, 5, 1): + case IP_VERSION(3, 6, 0): case IP_VERSION(4, 1, 0): /* TODO: Fix IP version. DC code expects version 4.0.1 */ if (adev->ip_versions[DCE_HWIP][0] == IP_VERSION(4, 1, 0)) @@ -2184,6 +2253,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): amdgpu_device_ip_block_add(adev, &gfx_v9_4_3_ip_block); break; case IP_VERSION(10, 1, 10): @@ -2209,6 +2279,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block); break; case IP_VERSION(12, 0, 0): @@ -2238,6 +2309,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(4, 4, 2): case IP_VERSION(4, 4, 5): + case IP_VERSION(4, 4, 4): amdgpu_device_ip_block_add(adev, &sdma_v4_4_2_ip_block); break; case IP_VERSION(5, 0, 0): @@ -2263,6 +2335,7 @@ static int amdgpu_discovery_set_sdma_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(6, 1, 0): case IP_VERSION(6, 1, 1): case IP_VERSION(6, 1, 2): + case IP_VERSION(6, 1, 3): amdgpu_device_ip_block_add(adev, &sdma_v6_0_ip_block); break; case IP_VERSION(7, 0, 0): @@ -2361,6 +2434,10 @@ static int amdgpu_discovery_set_mm_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vcn_v5_0_0_ip_block); amdgpu_device_ip_block_add(adev, &jpeg_v5_0_0_ip_block); break; + case IP_VERSION(5, 0, 1): + amdgpu_device_ip_block_add(adev, &vcn_v5_0_1_ip_block); + amdgpu_device_ip_block_add(adev, &jpeg_v5_0_1_ip_block); + break; default: dev_err(adev->dev, "Failed to add vcn/jpeg ip block(UVD_HWIP:0x%x)\n", @@ -2382,6 +2459,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block); adev->enable_mes = true; adev->enable_mes_kiq = true; @@ -2405,6 +2483,7 @@ static void amdgpu_discovery_init_soc_config(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): aqua_vanjaram_init_soc_config(adev); break; default: @@ -2468,6 +2547,38 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_RAVEN: + case CHIP_VEGA20: + case CHIP_ARCTURUS: + case CHIP_ALDEBARAN: + /* this is not fatal. We have a fallback below + * if the new firmwares are not present. some of + * this will be overridden below to keep things + * consistent with the current behavior. + */ + r = amdgpu_discovery_reg_base_init(adev); + if (!r) { + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); + } + break; + default: + r = amdgpu_discovery_reg_base_init(adev); + if (r) + return -EINVAL; + + amdgpu_discovery_harvest_ip(adev); + amdgpu_discovery_get_gfx_info(adev); + amdgpu_discovery_get_mall_info(adev); + amdgpu_discovery_get_vcn_info(adev); + break; + } + + switch (adev->asic_type) { + case CHIP_VEGA10: vega10_reg_base_init(adev); adev->sdma.num_instances = 2; adev->gmc.num_umc = 4; @@ -2630,14 +2741,6 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 1, 0); break; default: - r = amdgpu_discovery_reg_base_init(adev); - if (r) - return -EINVAL; - - amdgpu_discovery_harvest_ip(adev); - amdgpu_discovery_get_gfx_info(adev); - amdgpu_discovery_get_mall_info(adev); - amdgpu_discovery_get_vcn_info(adev); break; } @@ -2652,6 +2755,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): adev->family = AMDGPU_FAMILY_AI; break; case IP_VERSION(9, 1, 0): @@ -2695,6 +2799,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): adev->family = AMDGPU_FAMILY_GC_11_5_0; break; case IP_VERSION(12, 0, 0): @@ -2720,19 +2825,13 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): adev->flags |= AMD_IS_APU; break; default: break; } - if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(4, 8, 0)) - adev->gmc.xgmi.supported = true; - - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) - adev->ip_versions[XGMI_HWIP][0] = IP_VERSION(6, 4, 0); - /* set NBIO version */ switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(6, 1, 0): @@ -2753,11 +2852,13 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg; break; case IP_VERSION(7, 9, 0): + case IP_VERSION(7, 9, 1): adev->nbio.funcs = &nbio_v7_9_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_9_hdp_flush_reg; break; case IP_VERSION(7, 11, 0): case IP_VERSION(7, 11, 1): + case IP_VERSION(7, 11, 2): case IP_VERSION(7, 11, 3): adev->nbio.funcs = &nbio_v7_11_funcs; adev->nbio.hdp_flush_reg = &nbio_v7_11_hdp_flush_reg; @@ -2885,6 +2986,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) case IP_VERSION(11, 0, 10): case IP_VERSION(11, 0, 11): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 2): case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 9): case IP_VERSION(13, 0, 10): @@ -2894,6 +2996,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev) adev->smuio.funcs = &smuio_v13_0_funcs; break; case IP_VERSION(13, 0, 3): + case IP_VERSION(13, 0, 11): adev->smuio.funcs = &smuio_v13_0_3_funcs; if (adev->smuio.funcs->get_pkg_type(adev) == AMDGPU_PKG_TYPE_APU) { adev->flags |= AMD_IS_APU; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index b119d27271c1..35c778426a7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -33,6 +33,7 @@ #include "soc15_common.h" #include "gc/gc_11_0_0_offset.h" #include "gc/gc_11_0_0_sh_mask.h" +#include "bif/bif_4_1_d.h" #include <asm/div64.h> #include <linux/pci.h> @@ -1788,3 +1789,82 @@ int amdgpu_display_resume_helper(struct amdgpu_device *adev) return 0; } +/* panic_bo is set in amdgpu_dm_plane_get_scanout_buffer() and only used in amdgpu_dm_set_pixel() + * they are called from the panic handler, and protected by the drm_panic spinlock. + */ +static struct amdgpu_bo *panic_abo; + +/* Use the indirect MMIO to write each pixel to the GPU VRAM, + * This is a simplified version of amdgpu_device_mm_access() + */ +static void amdgpu_display_set_pixel(struct drm_scanout_buffer *sb, + unsigned int x, + unsigned int y, + u32 color) +{ + struct amdgpu_res_cursor cursor; + unsigned long offset; + struct amdgpu_bo *abo = panic_abo; + struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); + uint32_t tmp; + + offset = x * 4 + y * sb->pitch[0]; + amdgpu_res_first(abo->tbo.resource, offset, 4, &cursor); + + tmp = cursor.start >> 31; + WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t) cursor.start) | 0x80000000); + if (tmp != 0xffffffff) + WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); + WREG32_NO_KIQ(mmMM_DATA, color); +} + +int amdgpu_display_get_scanout_buffer(struct drm_plane *plane, + struct drm_scanout_buffer *sb) +{ + struct amdgpu_bo *abo; + struct drm_framebuffer *fb = plane->state->fb; + + if (!fb) + return -EINVAL; + + DRM_DEBUG_KMS("Framebuffer %dx%d %p4cc\n", fb->width, fb->height, &fb->format->format); + + abo = gem_to_amdgpu_bo(fb->obj[0]); + if (!abo) + return -EINVAL; + + sb->width = fb->width; + sb->height = fb->height; + /* Use the generic linear format, because tiling will be disabled in panic_flush() */ + sb->format = drm_format_info(fb->format->format); + if (!sb->format) + return -EINVAL; + + sb->pitch[0] = fb->pitches[0]; + + if (abo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS) { + if (abo->tbo.resource->mem_type != TTM_PL_VRAM) { + drm_warn(plane->dev, "amdgpu panic, framebuffer not in VRAM\n"); + return -EINVAL; + } + /* Only handle 32bits format, to simplify mmio access */ + if (fb->format->cpp[0] != 4) { + drm_warn(plane->dev, "amdgpu panic, pixel format is not 32bits\n"); + return -EINVAL; + } + sb->set_pixel = amdgpu_display_set_pixel; + panic_abo = abo; + return 0; + } + if (!abo->kmap.virtual && + ttm_bo_kmap(&abo->tbo, 0, PFN_UP(abo->tbo.base.size), &abo->kmap)) { + drm_warn(plane->dev, "amdgpu bo map failed, panic won't be displayed\n"); + return -ENOMEM; + } + if (abo->kmap.bo_kmap_type & TTM_BO_MAP_IOMEM_MASK) + iosys_map_set_vaddr_iomem(&sb->map[0], abo->kmap.virtual); + else + iosys_map_set_vaddr(&sb->map[0], abo->kmap.virtual); + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 9d19940f73c8..dfa0d642ac16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -23,6 +23,8 @@ #ifndef __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__ +#include <drm/drm_panic.h> + #define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) #define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l)) #define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e)) @@ -49,4 +51,7 @@ amdgpu_lookup_format_info(u32 format, uint64_t modifier); int amdgpu_display_suspend_helper(struct amdgpu_device *adev); int amdgpu_display_resume_helper(struct amdgpu_device *adev); +int amdgpu_display_get_scanout_buffer(struct drm_plane *plane, + struct drm_scanout_buffer *sb); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 8e81a83d37d8..9f627caedc3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -36,6 +36,7 @@ #include "amdgpu_gem.h" #include "amdgpu_dma_buf.h" #include "amdgpu_xgmi.h" +#include "amdgpu_vm.h" #include <drm/amdgpu_drm.h> #include <drm/ttm/ttm_tt.h> #include <linux/dma-buf.h> @@ -60,6 +61,8 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, if (pci_p2pdma_distance(adev->pdev, attach->dev, false) < 0) attach->peer2peer = false; + amdgpu_vm_bo_update_shared(bo); + return 0; } @@ -345,7 +348,7 @@ amdgpu_dma_buf_move_notify(struct dma_buf_attachment *attach) /* FIXME: This should be after the "if", but needs a fix to make sure * DMABuf imports are initialized in the right VM list. */ - amdgpu_vm_bo_invalidate(adev, bo, false); + amdgpu_vm_bo_invalidate(bo, false); if (!bo->tbo.resource || bo->tbo.resource->mem_type == TTM_PL_SYSTEM) return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 38686203bea6..23cfce5aa1fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -23,7 +23,7 @@ */ #include <drm/amdgpu_drm.h> -#include <drm/drm_client_setup.h> +#include <drm/clients/drm_client_setup.h> #include <drm/drm_drv.h> #include <drm/drm_fbdev_ttm.h> #include <drm/drm_gem.h> @@ -119,9 +119,13 @@ * - 3.57.0 - Compute tunneling on GFX10+ * - 3.58.0 - Add GFX12 DCC support * - 3.59.0 - Cleared VRAM + * - 3.60.0 - Add AMDGPU_TILING_GFX12_DCC_WRITE_COMPRESS_DISABLE (Vulkan requirement) + * - 3.61.0 - Contains fix for RV/PCO compute queues + * - 3.62.0 - Add AMDGPU_IDS_FLAGS_MODE_PF, AMDGPU_IDS_FLAGS_MODE_VF & AMDGPU_IDS_FLAGS_MODE_PT + * - 3.63.0 - GFX12 display DCC supports 256B max compressed block size */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 59 +#define KMS_DRIVER_MINOR 63 #define KMS_DRIVER_PATCHLEVEL 0 /* @@ -134,6 +138,8 @@ enum AMDGPU_DEBUG_MASK { AMDGPU_DEBUG_USE_VRAM_FW_BUF = BIT(3), AMDGPU_DEBUG_ENABLE_RAS_ACA = BIT(4), AMDGPU_DEBUG_ENABLE_EXP_RESETS = BIT(5), + AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6), + AMDGPU_DEBUG_SMU_POOL = BIT(7), }; unsigned int amdgpu_vram_limit = UINT_MAX; @@ -171,6 +177,7 @@ uint amdgpu_sdma_phase_quantum = 32; char *amdgpu_disable_cu; char *amdgpu_virtual_display; bool enforce_isolation; +int amdgpu_modeset = -1; /* Specifies the default granularity for SVM, used in buffer * migration and restoration of backing memory when handling @@ -280,7 +287,8 @@ module_param_named(gartsize, amdgpu_gart_size, uint, 0600); /** * DOC: gttsize (int) * Restrict the size of GTT domain (for userspace use) in MiB for testing. - * The default is -1 (Use 1/2 RAM, minimum value is 3GB). + * The default is -1 (Use value specified by TTM). + * This parameter is deprecated and will be removed in the future. */ MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)"); module_param_named(gttsize, amdgpu_gtt_size, int, 0600); @@ -399,7 +407,7 @@ module_param_named(runpm, amdgpu_runtime_pm, int, 0444); * the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device). */ MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))"); -module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); +module_param_named_unsafe(ip_block_mask, amdgpu_ip_block_mask, uint, 0444); /** * DOC: bapm (int) @@ -457,7 +465,7 @@ module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444); * Enable experimental hw support (1 = enable). The default is 0 (disabled). */ MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))"); -module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444); +module_param_named_unsafe(exp_hw_support, amdgpu_exp_hw_support, int, 0444); /** * DOC: dc (int) @@ -568,14 +576,14 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444); * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV). */ MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)"); -module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444); +module_param_named_unsafe(gpu_recovery, amdgpu_gpu_recovery, int, 0444); /** * DOC: emu_mode (int) * Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled). */ MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)"); -module_param_named(emu_mode, amdgpu_emu_mode, int, 0444); +module_param_named_unsafe(emu_mode, amdgpu_emu_mode, int, 0444); /** * DOC: ras_enable (int) @@ -730,7 +738,7 @@ module_param_named(noretry, amdgpu_noretry, int, 0644); */ MODULE_PARM_DESC(force_asic_type, "A non negative value used to specify the asic type for all supported GPUs"); -module_param_named(force_asic_type, amdgpu_force_asic_type, int, 0444); +module_param_named_unsafe(force_asic_type, amdgpu_force_asic_type, int, 0444); /** * DOC: use_xgmi_p2p (int) @@ -749,7 +757,7 @@ module_param_named(use_xgmi_p2p, amdgpu_use_xgmi_p2p, int, 0444); * assigns queues to HQDs. */ int sched_policy = KFD_SCHED_POLICY_HWS; -module_param(sched_policy, int, 0444); +module_param_unsafe(sched_policy, int, 0444); MODULE_PARM_DESC(sched_policy, "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); @@ -799,7 +807,7 @@ MODULE_PARM_DESC(send_sigterm, * Setting 1 enables halt on hang. */ int halt_if_hws_hang; -module_param(halt_if_hws_hang, int, 0644); +module_param_unsafe(halt_if_hws_hang, int, 0644); MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); /** @@ -808,7 +816,7 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau * check says. Default value: false (rely on MEC2 firmware version check). */ bool hws_gws_support; -module_param(hws_gws_support, bool, 0444); +module_param_unsafe(hws_gws_support, bool, 0444); MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = rely on FW version check (Default), true = force supported)"); /** @@ -841,7 +849,7 @@ MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = defa */ int amdgpu_no_queue_eviction_on_vm_fault; MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)"); -module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); +module_param_named_unsafe(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); #endif /** @@ -849,7 +857,7 @@ module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm */ int amdgpu_mtype_local; MODULE_PARM_DESC(mtype_local, "MTYPE for local memory (0 = MTYPE_RW (default), 1 = MTYPE_NC, 2 = MTYPE_CC)"); -module_param_named(mtype_local, amdgpu_mtype_local, int, 0444); +module_param_named_unsafe(mtype_local, amdgpu_mtype_local, int, 0444); /** * DOC: pcie_p2p (bool) @@ -953,7 +961,7 @@ module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) */ MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco/bamaco)"); -module_param_named(reset_method, amdgpu_reset_method, int, 0644); +module_param_named_unsafe(reset_method, amdgpu_reset_method, int, 0644); /** * DOC: bad_page_threshold (int) Bad page threshold is specifies the @@ -961,7 +969,7 @@ module_param_named(reset_method, amdgpu_reset_method, int, 0644); * result in the GPU entering bad status when the number of total * faulty pages by ECC exceeds the threshold value. */ -MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = driver sets threshold)"); +MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = ignore threshold (default value), 0 = disable bad page retirement, -2 = threshold determined by a formula, 0 < threshold < max records, user-defined threshold)"); module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444); MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)"); @@ -1032,6 +1040,13 @@ module_param(enforce_isolation, bool, 0444); MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on"); /** + * DOC: modeset (int) + * Override nomodeset (1 = override, -1 = auto). The default is -1 (auto). + */ +MODULE_PARM_DESC(modeset, "Override nomodeset (1 = enable, -1 = auto)"); +module_param_named(modeset, amdgpu_modeset, int, 0444); + +/** * DOC: seamless (int) * Seamless boot will keep the image on the screen during the boot process. */ @@ -1047,9 +1062,14 @@ module_param_named(seamless, amdgpu_seamless, int, 0444); * limits the VRAM size reported to ROCm applications to the visible * size, usually 256MB. * - 0x4: Disable GPU soft recovery, always do a full reset + * - 0x8: Use VRAM for firmware loading + * - 0x10: Enable ACA based RAS logging + * - 0x20: Enable experimental resets + * - 0x40: Disable ring resets + * - 0x80: Use VRAM for SMU pool */ MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default"); -module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444); +module_param_named_unsafe(debug_mask, amdgpu_debug_mask, uint, 0444); /** * DOC: agp (int) @@ -2219,6 +2239,15 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev) pr_info("debug: enable experimental reset features\n"); adev->debug_exp_resets = true; } + + if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_GPU_RING_RESET) { + pr_info("debug: ring reset disabled\n"); + adev->debug_disable_gpu_ring_reset = true; + } + if (amdgpu_debug_mask & AMDGPU_DEBUG_SMU_POOL) { + pr_info("debug: use vram for smu pool\n"); + adev->pm.smu_debug_mask |= SMU_DEBUG_POOL_USE_VRAM; + } } static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags) @@ -2246,6 +2275,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, int ret, retry = 0, i; bool supports_atomic = false; + if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || + (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { + if (drm_firmware_drivers_only() && amdgpu_modeset == -1) + return -EINVAL; + } + /* skip devices which are owned by radeon */ for (i = 0; i < ARRAY_SIZE(amdgpu_unsupported_pciidlist); i++) { if (amdgpu_unsupported_pciidlist[i] == pdev->device) @@ -2552,9 +2587,7 @@ static int amdgpu_pmops_freeze(struct device *dev) struct amdgpu_device *adev = drm_to_adev(drm_dev); int r; - adev->in_s4 = true; r = amdgpu_device_suspend(drm_dev, true); - adev->in_s4 = false; if (r) return r; @@ -2566,8 +2599,13 @@ static int amdgpu_pmops_freeze(struct device *dev) static int amdgpu_pmops_thaw(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + int r; - return amdgpu_device_resume(drm_dev, true); + r = amdgpu_device_resume(drm_dev, true); + adev->in_s4 = false; + + return r; } static int amdgpu_pmops_poweroff(struct device *dev) @@ -2580,6 +2618,9 @@ static int amdgpu_pmops_poweroff(struct device *dev) static int amdgpu_pmops_restore(struct device *dev) { struct drm_device *drm_dev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(drm_dev); + + adev->in_s4 = false; return amdgpu_device_resume(drm_dev, true); } @@ -2916,7 +2957,6 @@ static const struct drm_driver amdgpu_kms_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = KMS_DRIVER_MAJOR, .minor = KMS_DRIVER_MINOR, .patchlevel = KMS_DRIVER_PATCHLEVEL, @@ -2940,7 +2980,6 @@ const struct drm_driver amdgpu_partition_driver = { .name = DRIVER_NAME, .desc = DRIVER_DESC, - .date = DRIVER_DATE, .major = KMS_DRIVER_MAJOR, .minor = KMS_DRIVER_MINOR, .patchlevel = KMS_DRIVER_PATCHLEVEL, @@ -2975,9 +3014,6 @@ static int __init amdgpu_init(void) { int r; - if (drm_firmware_drivers_only()) - return -EINVAL; - r = amdgpu_sync_init(); if (r) goto error_sync; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h index 5bc2cb661af7..2d86cc6f7f4d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h @@ -40,7 +40,6 @@ #define DRIVER_NAME "amdgpu" #define DRIVER_DESC "AMD GPU" -#define DRIVER_DATE "20150101" extern const struct drm_driver amdgpu_partition_driver; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c index df2cf5c33925..91d638098889 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c @@ -60,7 +60,7 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) struct amdgpu_fpriv *fpriv = file->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_mem_stats stats[__AMDGPU_PL_LAST + 1] = { }; + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]; ktime_t usage[AMDGPU_HW_IP_NUM]; const char *pl_name[] = { [TTM_PL_VRAM] = "vram", @@ -72,15 +72,8 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) [AMDGPU_PL_DOORBELL] = "doorbell", }; unsigned int hw_ip, i; - int ret; - - ret = amdgpu_bo_reserve(vm->root.bo, false); - if (ret) - return; - - amdgpu_vm_get_memory(vm, stats, ARRAY_SIZE(stats)); - amdgpu_bo_unreserve(vm->root.bo); + amdgpu_vm_get_memory(vm, stats); amdgpu_ctx_mgr_usage(&fpriv->ctx_mgr, usage); /* @@ -114,9 +107,11 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file) drm_printf(p, "amd-evicted-vram:\t%llu KiB\n", stats[TTM_PL_VRAM].evicted/1024UL); drm_printf(p, "amd-requested-vram:\t%llu KiB\n", - stats[TTM_PL_VRAM].requested/1024UL); + (stats[TTM_PL_VRAM].drm.shared + + stats[TTM_PL_VRAM].drm.private) / 1024UL); drm_printf(p, "amd-requested-gtt:\t%llu KiB\n", - stats[TTM_PL_TT].requested/1024UL); + (stats[TTM_PL_TT].drm.shared + + stats[TTM_PL_TT].drm.private) / 1024UL); for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) { if (!usage[hw_ip]) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index ceb5163480f4..1ae88c459da5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -31,6 +31,7 @@ #define FRU_EEPROM_MADDR_6 0x60000 #define FRU_EEPROM_MADDR_8 0x80000 +#define FRU_EEPROM_MADDR_INV 0xFFFFF static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) { @@ -63,10 +64,10 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) switch (adev->asic_type) { case CHIP_VEGA20: /* D161 and D163 are the VG20 server SKUs */ - if (strnstr(atom_ctx->vbios_pn, "D161", - sizeof(atom_ctx->vbios_pn)) || - strnstr(atom_ctx->vbios_pn, "D163", - sizeof(atom_ctx->vbios_pn))) { + if (atom_ctx && (strnstr(atom_ctx->vbios_pn, "D161", + sizeof(atom_ctx->vbios_pn)) || + strnstr(atom_ctx->vbios_pn, "D163", + sizeof(atom_ctx->vbios_pn)))) { if (fru_addr) *fru_addr = FRU_EEPROM_MADDR_6; return true; @@ -78,8 +79,8 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) return false; } case IP_VERSION(11, 0, 7): - if (strnstr(atom_ctx->vbios_pn, "D603", - sizeof(atom_ctx->vbios_pn))) { + if (atom_ctx && strnstr(atom_ctx->vbios_pn, "D603", + sizeof(atom_ctx->vbios_pn))) { if (strnstr(atom_ctx->vbios_pn, "D603GLXE", sizeof(atom_ctx->vbios_pn))) { return false; @@ -94,8 +95,8 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) } case IP_VERSION(13, 0, 2): /* All Aldebaran SKUs have an FRU */ - if (!strnstr(atom_ctx->vbios_pn, "D673", - sizeof(atom_ctx->vbios_pn))) + if (atom_ctx && !strnstr(atom_ctx->vbios_pn, "D673", + sizeof(atom_ctx->vbios_pn))) if (fru_addr) *fru_addr = FRU_EEPROM_MADDR_6; return true; @@ -104,6 +105,10 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr) if (fru_addr) *fru_addr = FRU_EEPROM_MADDR_8; return true; + case IP_VERSION(13, 0, 12): + if (fru_addr) + *fru_addr = FRU_EEPROM_MADDR_INV; + return true; default: return false; } @@ -120,6 +125,10 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev) if (!is_fru_eeprom_supported(adev, &fru_addr)) return 0; + /* FRU data avaialble, but no direct EEPROM access */ + if (fru_addr == FRU_EEPROM_MADDR_INV) + return 0; + if (!adev->fru_info) { adev->fru_info = kzalloc(sizeof(*adev->fru_info), GFP_KERNEL); if (!adev->fru_info) @@ -384,7 +393,7 @@ int amdgpu_fru_sysfs_init(struct amdgpu_device *adev) void amdgpu_fru_sysfs_fini(struct amdgpu_device *adev) { - if (!is_fru_eeprom_supported(adev, NULL) || !adev->fru_info) + if (!adev->fru_info) return; sysfs_remove_files(&adev->dev->kobj, amdgpu_fru_attributes); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h index bc58dca18035..98f3196599ef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.h @@ -32,7 +32,7 @@ struct amdgpu_fru_info { char product_name[AMDGPU_PRODUCT_NAME_LEN]; char serial[20]; char manufacturer_name[32]; - char fru_id[32]; + char fru_id[50]; }; int amdgpu_fru_get_product_info(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 1a5df8b94661..69429df09477 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -42,6 +42,7 @@ #include "amdgpu_dma_buf.h" #include "amdgpu_hmm.h" #include "amdgpu_xgmi.h" +#include "amdgpu_vm.h" static vm_fault_t amdgpu_gem_fault(struct vm_fault *vmf) { @@ -87,10 +88,8 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj) { struct amdgpu_bo *aobj = gem_to_amdgpu_bo(gobj); - if (aobj) { - amdgpu_hmm_unregister(aobj); - ttm_bo_put(&aobj->tbo); - } + amdgpu_hmm_unregister(aobj); + ttm_bo_put(&aobj->tbo); } int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, @@ -179,6 +178,7 @@ static int amdgpu_gem_object_open(struct drm_gem_object *obj, if (r) return r; + amdgpu_vm_bo_update_shared(abo); bo_va = amdgpu_vm_bo_find(vm, abo); if (!bo_va) bo_va = amdgpu_vm_bo_add(adev, vm, abo); @@ -252,6 +252,7 @@ static void amdgpu_gem_object_close(struct drm_gem_object *obj, goto out_unlock; amdgpu_vm_bo_del(adev, bo_va); + amdgpu_vm_bo_update_shared(bo); if (!amdgpu_vm_ready(vm)) goto out_unlock; @@ -839,7 +840,6 @@ error: int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { - struct amdgpu_device *adev = drm_to_adev(dev); struct drm_amdgpu_gem_op *args = data; struct drm_gem_object *gobj; struct amdgpu_vm_bo_base *base; @@ -899,7 +899,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT; if (robj->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) - amdgpu_vm_bo_invalidate(adev, robj, true); + amdgpu_vm_bo_invalidate(robj, true); amdgpu_bo_unreserve(robj); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 1d155463d044..72af5e5a894a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -515,7 +515,7 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev, int xcc_id) if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (!kiq_ring->sched.ready || adev->job_hang || amdgpu_in_reset(adev)) + if (!kiq_ring->sched.ready || amdgpu_in_reset(adev)) return 0; spin_lock(&kiq->ring_lock); @@ -567,7 +567,7 @@ int amdgpu_gfx_disable_kgq(struct amdgpu_device *adev, int xcc_id) if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; - if (!adev->gfx.kiq[0].ring.sched.ready || adev->job_hang) + if (!adev->gfx.kiq[0].ring.sched.ready || amdgpu_in_reset(adev)) return 0; if (amdgpu_gfx_is_master_xcc(adev, xcc_id)) { @@ -771,18 +771,8 @@ int amdgpu_gfx_enable_kgq(struct amdgpu_device *adev, int xcc_id) return r; } -/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable - * - * @adev: amdgpu_device pointer - * @bool enable true: enable gfx off feature, false: disable gfx off feature - * - * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled. - * 2. other client can send request to disable gfx off feature, the request should be honored. - * 3. other client can cancel their request of disable gfx off feature - * 4. other client should not send request to enable gfx off feature before disable gfx off feature. - */ - -void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) +static void amdgpu_gfx_do_off_ctrl(struct amdgpu_device *adev, bool enable, + bool no_delay) { unsigned long delay = GFX_OFF_DELAY_ENABLE; @@ -804,9 +794,9 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) if (adev->gfx.gfx_off_req_count == 0 && !adev->gfx.gfx_off_state) { /* If going to s2idle, no need to wait */ - if (adev->in_s0ix) { + if (no_delay) { if (!amdgpu_dpm_set_powergating_by_smu(adev, - AMD_IP_BLOCK_TYPE_GFX, true)) + AMD_IP_BLOCK_TYPE_GFX, true, 0)) adev->gfx.gfx_off_state = true; } else { schedule_delayed_work(&adev->gfx.gfx_off_delay_work, @@ -818,7 +808,7 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); if (adev->gfx.gfx_off_state && - !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { + !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false, 0)) { adev->gfx.gfx_off_state = false; if (adev->gfx.funcs->init_spm_golden) { @@ -836,6 +826,43 @@ unlock: mutex_unlock(&adev->gfx.gfx_off_mutex); } +/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable + * + * @adev: amdgpu_device pointer + * @bool enable true: enable gfx off feature, false: disable gfx off feature + * + * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled. + * 2. other client can send request to disable gfx off feature, the request should be honored. + * 3. other client can cancel their request of disable gfx off feature + * 4. other client should not send request to enable gfx off feature before disable gfx off feature. + * + * gfx off allow will be delayed by GFX_OFF_DELAY_ENABLE ms. + */ +void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) +{ + /* If going to s2idle, no need to wait */ + bool no_delay = adev->in_s0ix ? true : false; + + amdgpu_gfx_do_off_ctrl(adev, enable, no_delay); +} + +/* amdgpu_gfx_off_ctrl_immediate - Handle gfx off feature enable/disable + * + * @adev: amdgpu_device pointer + * @bool enable true: enable gfx off feature, false: disable gfx off feature + * + * 1. gfx off feature will be enabled by gfx ip after gfx cg pg enabled. + * 2. other client can send request to disable gfx off feature, the request should be honored. + * 3. other client can cancel their request of disable gfx off feature + * 4. other client should not send request to enable gfx off feature before disable gfx off feature. + * + * gfx off allow will be issued immediately. + */ +void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable) +{ + amdgpu_gfx_do_off_ctrl(adev, enable, true); +} + int amdgpu_set_gfx_off_residency(struct amdgpu_device *adev, bool value) { int r = 0; @@ -1484,6 +1511,24 @@ static int amdgpu_gfx_run_cleaner_shader(struct amdgpu_device *adev, int xcp_id) return 0; } +/** + * amdgpu_gfx_set_run_cleaner_shader - Execute the AMDGPU GFX Cleaner Shader + * @dev: The device structure + * @attr: The device attribute structure + * @buf: The buffer containing the input data + * @count: The size of the input data + * + * Provides the sysfs interface to manually run a cleaner shader, which is + * used to clear the GPU state between different tasks. Writing a value to the + * 'run_cleaner_shader' sysfs file triggers the cleaner shader execution. + * The value written corresponds to the partition index on multi-partition + * devices. On single-partition devices, the value should be '0'. + * + * The cleaner shader clears the Local Data Store (LDS) and General Purpose + * Registers (GPRs) to ensure data isolation between GPU workloads. + * + * Return: The number of bytes written to the sysfs file. + */ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, struct device_attribute *attr, const char *buf, @@ -1532,6 +1577,19 @@ static ssize_t amdgpu_gfx_set_run_cleaner_shader(struct device *dev, return count; } +/** + * amdgpu_gfx_get_enforce_isolation - Query AMDGPU GFX Enforce Isolation Settings + * @dev: The device structure + * @attr: The device attribute structure + * @buf: The buffer to store the output data + * + * Provides the sysfs read interface to get the current settings of the 'enforce_isolation' + * feature for each GPU partition. Reading from the 'enforce_isolation' + * sysfs file returns the isolation settings for all partitions, where '0' + * indicates disabled and '1' indicates enabled. + * + * Return: The number of bytes read from the sysfs file. + */ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, struct device_attribute *attr, char *buf) @@ -1555,6 +1613,20 @@ static ssize_t amdgpu_gfx_get_enforce_isolation(struct device *dev, return size; } +/** + * amdgpu_gfx_set_enforce_isolation - Control AMDGPU GFX Enforce Isolation + * @dev: The device structure + * @attr: The device attribute structure + * @buf: The buffer containing the input data + * @count: The size of the input data + * + * This function allows control over the 'enforce_isolation' feature, which + * serializes access to the graphics engine. Writing '1' or '0' to the + * 'enforce_isolation' sysfs file enables or disables process isolation for + * each partition. The input should specify the setting for all partitions. + * + * Return: The number of bytes written to the sysfs file. + */ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) @@ -1593,22 +1665,12 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, } mutex_lock(&adev->enforce_isolation_mutex); - - for (i = 0; i < num_partitions; i++) { - if (adev->enforce_isolation[i] && !partition_values[i]) { - /* Going from enabled to disabled */ - amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); - amdgpu_mes_set_enforce_isolation(adev, i, false); - } else if (!adev->enforce_isolation[i] && partition_values[i]) { - /* Going from disabled to enabled */ - amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); - amdgpu_mes_set_enforce_isolation(adev, i, true); - } + for (i = 0; i < num_partitions; i++) adev->enforce_isolation[i] = partition_values[i]; - } - mutex_unlock(&adev->enforce_isolation_mutex); + amdgpu_mes_update_enforce_isolation(adev); + return count; } @@ -1933,13 +1995,24 @@ void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work) if (adev->kfd.init_complete) { WARN_ON_ONCE(!adev->gfx.kfd_sch_inactive[idx]); WARN_ON_ONCE(adev->gfx.kfd_sch_req_count[idx]); - amdgpu_amdkfd_start_sched(adev, idx); - adev->gfx.kfd_sch_inactive[idx] = false; + amdgpu_amdkfd_start_sched(adev, idx); + adev->gfx.kfd_sch_inactive[idx] = false; } } mutex_unlock(&adev->enforce_isolation_mutex); } +/** + * amdgpu_gfx_enforce_isolation_wait_for_kfd - Manage KFD wait period for process isolation + * @adev: amdgpu_device pointer + * @idx: Index of the GPU partition + * + * When kernel submissions come in, the jobs are given a time slice and once + * that time slice is up, if there are KFD user queues active, kernel + * submissions are blocked until KFD has had its time slice. Once the KFD time + * slice is up, KFD user queues are preempted and kernel submissions are + * unblocked and allowed to run again. + */ static void amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, u32 idx) @@ -1985,6 +2058,15 @@ amdgpu_gfx_enforce_isolation_wait_for_kfd(struct amdgpu_device *adev, msleep(GFX_SLICE_PERIOD_MS); } +/** + * amdgpu_gfx_enforce_isolation_ring_begin_use - Begin use of a ring with enforced isolation + * @ring: Pointer to the amdgpu_ring structure + * + * Ring begin_use helper implementation for gfx which serializes access to the + * gfx IP between kernel submission IOCTLs and KFD user queues when isolation + * enforcement is enabled. The kernel submission IOCTLs and KFD user queues + * each get a time slice when both are active. + */ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2016,6 +2098,15 @@ void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring) amdgpu_gfx_kfd_sch_ctrl(adev, idx, false); } +/** + * amdgpu_gfx_enforce_isolation_ring_end_use - End use of a ring with enforced isolation + * @ring: Pointer to the amdgpu_ring structure + * + * Ring end_use helper implementation for gfx which serializes access to the + * gfx IP between kernel submission IOCTLs and KFD user queues when isolation + * enforcement is enabled. The kernel submission IOCTLs and KFD user queues + * each get a time slice when both are active. + */ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; @@ -2044,6 +2135,80 @@ void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring) amdgpu_gfx_kfd_sch_ctrl(adev, idx, true); } +void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, gfx.idle_work.work); + enum PP_SMC_POWER_PROFILE profile; + u32 i, fences = 0; + int r; + + if (adev->gfx.num_gfx_rings) + profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D; + else + profile = PP_SMC_POWER_PROFILE_COMPUTE; + + for (i = 0; i < AMDGPU_MAX_GFX_RINGS; ++i) + fences += amdgpu_fence_count_emitted(&adev->gfx.gfx_ring[i]); + for (i = 0; i < (AMDGPU_MAX_COMPUTE_RINGS * AMDGPU_MAX_GC_INSTANCES); ++i) + fences += amdgpu_fence_count_emitted(&adev->gfx.compute_ring[i]); + if (!fences && !atomic_read(&adev->gfx.total_submission_cnt)) { + mutex_lock(&adev->gfx.workload_profile_mutex); + if (adev->gfx.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, profile, false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, + profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? + "fullscreen 3D" : "compute"); + adev->gfx.workload_profile_active = false; + } + mutex_unlock(&adev->gfx.workload_profile_mutex); + } else { + schedule_delayed_work(&adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); + } +} + +void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + enum PP_SMC_POWER_PROFILE profile; + int r; + + if (adev->gfx.num_gfx_rings) + profile = PP_SMC_POWER_PROFILE_FULLSCREEN3D; + else + profile = PP_SMC_POWER_PROFILE_COMPUTE; + + atomic_inc(&adev->gfx.total_submission_cnt); + + cancel_delayed_work_sync(&adev->gfx.idle_work); + + /* We can safely return early here because we've cancelled the + * the delayed work so there is no one else to set it to false + * and we don't care if someone else sets it to true. + */ + if (adev->gfx.workload_profile_active) + return; + + mutex_lock(&adev->gfx.workload_profile_mutex); + if (!adev->gfx.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, profile, true); + if (r) + dev_warn(adev->dev, "(%d) failed to disable %s power profile mode\n", r, + profile == PP_SMC_POWER_PROFILE_FULLSCREEN3D ? + "fullscreen 3D" : "compute"); + adev->gfx.workload_profile_active = true; + } + mutex_unlock(&adev->gfx.workload_profile_mutex); +} + +void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring) +{ + atomic_dec(&ring->adev->gfx.total_submission_cnt); + + schedule_delayed_work(&ring->adev->gfx.idle_work, GFX_PROFILE_IDLE_TIMEOUT); +} + /* * debugfs for to enable/disable gfx job submission to specific core. */ @@ -2058,7 +2223,7 @@ static int amdgpu_debugfs_gfx_sched_mask_set(void *data, u64 val) if (!adev) return -ENODEV; - mask = (1 << adev->gfx.num_gfx_rings) - 1; + mask = (1ULL << adev->gfx.num_gfx_rings) - 1; if ((val & mask) == 0) return -EINVAL; @@ -2086,7 +2251,7 @@ static int amdgpu_debugfs_gfx_sched_mask_get(void *data, u64 *val) for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { ring = &adev->gfx.gfx_ring[i]; if (ring->sched.ready) - mask |= 1 << i; + mask |= 1ULL << i; } *val = mask; @@ -2128,7 +2293,7 @@ static int amdgpu_debugfs_compute_sched_mask_set(void *data, u64 val) if (!adev) return -ENODEV; - mask = (1 << adev->gfx.num_compute_rings) - 1; + mask = (1ULL << adev->gfx.num_compute_rings) - 1; if ((val & mask) == 0) return -EINVAL; @@ -2157,7 +2322,7 @@ static int amdgpu_debugfs_compute_sched_mask_get(void *data, u64 *val) for (i = 0; i < adev->gfx.num_compute_rings; ++i) { ring = &adev->gfx.compute_ring[i]; if (ring->sched.ready) - mask |= 1 << i; + mask |= 1ULL << i; } *val = mask; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8b5bd63b5773..87e862188766 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -57,6 +57,9 @@ enum amdgpu_gfx_pipe_priority { #define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0 #define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15 +/* 1 second timeout */ +#define GFX_PROFILE_IDLE_TIMEOUT msecs_to_jiffies(1000) + enum amdgpu_gfx_partition { AMDGPU_SPX_PARTITION_MODE = 0, AMDGPU_DPX_PARTITION_MODE = 1, @@ -476,6 +479,11 @@ struct amdgpu_gfx { bool kfd_sch_inactive[MAX_XCP]; unsigned long enforce_isolation_jiffies[MAX_XCP]; unsigned long enforce_isolation_time[MAX_XCP]; + + atomic_t total_submission_cnt; + struct delayed_work idle_work; + bool workload_profile_active; + struct mutex workload_profile_mutex; }; struct amdgpu_gfx_ras_reg_entry { @@ -547,6 +555,7 @@ int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me, bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me, int pipe, int queue); void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); +void amdgpu_gfx_off_ctrl_immediate(struct amdgpu_device *adev, bool enable); int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value); int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); void amdgpu_gfx_ras_fini(struct amdgpu_device *adev); @@ -584,6 +593,11 @@ void amdgpu_gfx_cleaner_shader_init(struct amdgpu_device *adev, void amdgpu_gfx_enforce_isolation_handler(struct work_struct *work); void amdgpu_gfx_enforce_isolation_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_gfx_enforce_isolation_ring_end_use(struct amdgpu_ring *ring); + +void amdgpu_gfx_profile_idle_work_handler(struct work_struct *work); +void amdgpu_gfx_profile_ring_begin_use(struct amdgpu_ring *ring); +void amdgpu_gfx_profile_ring_end_use(struct amdgpu_ring *ring); + void amdgpu_debugfs_gfx_sched_mask_init(struct amdgpu_device *adev); void amdgpu_debugfs_compute_sched_mask_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 1c19a65e6553..464625282872 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -269,7 +269,7 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc * @mc: memory controller structure holding memory information * @gart_placement: GART placement policy with respect to VRAM * - * Function will place try to place GART before or after VRAM. + * Function will try to place GART before or after VRAM. * If GART size is bigger than space left then we ajust GART size. * Thus function will never fails. */ @@ -573,6 +573,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = {0}; unsigned i; unsigned vmhub, inv_eng; + struct amdgpu_ring *shared_ring; /* init the vm inv eng for all vmhubs */ for_each_set_bit(i, adev->vmhubs_mask, AMDGPU_MAX_VMHUBS) { @@ -591,7 +592,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) if (ring == &adev->mes.ring[0] || ring == &adev->mes.ring[1] || - ring == &adev->umsch_mm.ring) + ring == &adev->umsch_mm.ring || + ring == &adev->cper.ring_buf) + continue; + + /* Skip if the ring is a shared ring */ + if (amdgpu_sdma_is_shared_inv_eng(adev, ring)) continue; inv_eng = ffs(vm_inv_engs[vmhub]); @@ -606,6 +612,21 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev) dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", ring->name, ring->vm_inv_eng, ring->vm_hub); + /* SDMA has a special packet which allows it to use the same + * invalidation engine for all the rings in one instance. + * Therefore, we do not allocate a separate VM invalidation engine + * for SDMA page rings. Instead, they share the VM invalidation + * engine with the SDMA gfx ring. This change ensures efficient + * resource management and avoids the issue of insufficient VM + * invalidation engines. + */ + shared_ring = amdgpu_sdma_get_shared_ring(adev, ring); + if (shared_ring) { + shared_ring->vm_inv_eng = ring->vm_inv_eng; + dev_info(adev->dev, "ring %s shares VM invalidation engine %u with ring %s on hub %u\n", + ring->name, ring->vm_inv_eng, shared_ring->name, ring->vm_hub); + continue; + } } return 0; @@ -851,6 +872,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): /* Don't enable it by default yet. */ if (amdgpu_tmz < 1) { @@ -888,6 +910,7 @@ void amdgpu_gmc_noretry_set(struct amdgpu_device *adev) gc_ver == IP_VERSION(9, 4, 2) || gc_ver == IP_VERSION(9, 4, 3) || gc_ver == IP_VERSION(9, 4, 4) || + gc_ver == IP_VERSION(9, 5, 0) || gc_ver >= IP_VERSION(10, 3, 0)); if (!amdgpu_sriov_xnack_support(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 459a30fe239f..bd7fc123b8f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -29,6 +29,7 @@ #include <linux/types.h> #include "amdgpu_irq.h" +#include "amdgpu_xgmi.h" #include "amdgpu_ras.h" /* VA hole for 48bit addresses on Vega10 */ @@ -174,28 +175,6 @@ struct amdgpu_gmc_funcs { bool (*need_reset_on_init)(struct amdgpu_device *adev); }; -struct amdgpu_xgmi_ras { - struct amdgpu_ras_block_object ras_block; -}; - -struct amdgpu_xgmi { - /* from psp */ - u64 node_id; - u64 hive_id; - /* fixed per family */ - u64 node_segment_size; - /* physical node (0-3) */ - unsigned physical_node_id; - /* number of nodes (0-4) */ - unsigned num_physical_nodes; - /* gpu list in the same hive */ - struct list_head head; - bool supported; - struct ras_common_if *ras_if; - bool connected_to_cpu; - struct amdgpu_xgmi_ras *ras; -}; - struct amdgpu_mem_partition_info { union { struct { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c index f0765ccde668..8179d0814db9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c @@ -225,6 +225,25 @@ void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c) kfree(i2c); } +void amdgpu_i2c_init(struct amdgpu_device *adev) +{ + if (!adev->is_atom_fw) { + if (!amdgpu_device_has_dc_support(adev)) { + amdgpu_atombios_i2c_init(adev); + } else { + switch (adev->asic_type) { + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + amdgpu_atombios_oem_i2c_init(adev, 0x97); + break; + default: + break; + } + } + } +} + /* remove all the buses */ void amdgpu_i2c_fini(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h index 21e3d1dad0a1..1d3d3806e0dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.h @@ -28,6 +28,7 @@ struct amdgpu_i2c_chan *amdgpu_i2c_create(struct drm_device *dev, const struct amdgpu_i2c_bus_rec *rec, const char *name); void amdgpu_i2c_destroy(struct amdgpu_i2c_chan *i2c); +void amdgpu_i2c_init(struct amdgpu_device *adev); void amdgpu_i2c_fini(struct amdgpu_device *adev); struct amdgpu_i2c_chan * amdgpu_i2c_lookup(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 071f187f5e28..2ea98ec60220 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -89,16 +89,14 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, /** * amdgpu_ib_free - free an IB (Indirect Buffer) * - * @adev: amdgpu_device pointer * @ib: IB object to free * @f: the fence SA bo need wait on for the ib alloation * * Free an IB (all asics). */ -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f) +void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f) { - amdgpu_sa_bo_free(adev, &ib->sa_bo, f); + amdgpu_sa_bo_free(&ib->sa_bo, f); } /** @@ -299,7 +297,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs, amdgpu_ring_patch_cond_exec(ring, cond_exec); ring->current_ctx = fence_ctx; - if (vm && ring->funcs->emit_switch_buffer) + if (job && ring->funcs->emit_switch_buffer) amdgpu_ring_emit_switch_buffer(ring); if (ring->funcs->emit_wave_limit && diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 8e712a11aba5..4c4e087230ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -209,7 +209,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_ring *ring, return 0; } - fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL); + fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_NOWAIT); if (!fences) return -ENOMEM; @@ -287,46 +287,34 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, (*id)->flushed_updates < updates || !(*id)->last_flush || ((*id)->last_flush->context != fence_context && - !dma_fence_is_signaled((*id)->last_flush))) { + !dma_fence_is_signaled((*id)->last_flush))) + needs_flush = true; + + if ((*id)->owner != vm->immediate.fence_context || + (!adev->vm_manager.concurrent_flush && needs_flush)) { struct dma_fence *tmp; - /* Wait for the gang to be assembled before using a - * reserved VMID or otherwise the gang could deadlock. + /* Don't use per engine and per process VMID at the + * same time */ - tmp = amdgpu_device_get_gang(adev); - if (!dma_fence_is_signaled(tmp) && tmp != job->gang_submit) { + if (adev->vm_manager.concurrent_flush) + ring = NULL; + + /* to prevent one context starved by another context */ + (*id)->pd_gpu_addr = 0; + tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); + if (tmp) { *id = NULL; - *fence = tmp; + *fence = dma_fence_get(tmp); return 0; } - dma_fence_put(tmp); - - /* Make sure the id is owned by the gang before proceeding */ - if (!job->gang_submit || - (*id)->owner != vm->immediate.fence_context) { - - /* Don't use per engine and per process VMID at the - * same time - */ - if (adev->vm_manager.concurrent_flush) - ring = NULL; - - /* to prevent one context starved by another context */ - (*id)->pd_gpu_addr = 0; - tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); - if (tmp) { - *id = NULL; - *fence = dma_fence_get(tmp); - return 0; - } - } - needs_flush = true; } /* Good we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished); + r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished, + GFP_NOWAIT); if (r) return r; @@ -385,7 +373,8 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, * user of the VMID. */ r = amdgpu_sync_fence(&(*id)->active, - &job->base.s_fence->finished); + &job->base.s_fence->finished, + GFP_NOWAIT); if (r) return r; @@ -422,7 +411,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (amdgpu_vmid_uses_reserved(adev, vm, vmhub)) { + if (amdgpu_vmid_uses_reserved(vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -437,7 +426,8 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, /* Remember this submission as user of the VMID */ r = amdgpu_sync_fence(&id->active, - &job->base.s_fence->finished); + &job->base.s_fence->finished, + GFP_NOWAIT); if (r) goto error; @@ -474,19 +464,14 @@ error: /* * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID - * @adev: amdgpu_device pointer * @vm: the VM to check * @vmhub: the VMHUB which will be used * * Returns: True if the VM will use a reserved VMID. */ -bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, unsigned int vmhub) +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) { - return vm->reserved_vmid[vmhub] || - (adev->enforce_isolation[(vm->root.bo->xcp_id != AMDGPU_XCP_NO_PARTITION) ? - vm->root.bo->xcp_id : 0] && - AMDGPU_IS_GFXHUB(vmhub)); + return vm->reserved_vmid[vmhub]; } int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 4012fb2dd08a..240fa6751260 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,8 +78,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); -bool amdgpu_vmid_uses_reserved(struct amdgpu_device *adev, - struct amdgpu_vm *vm, unsigned int vmhub); +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index f3b0aaf3ebc6..901f8b12c672 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -298,3 +298,9 @@ uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, dw2 = le32_to_cpu(ih->ring[ring_index + 2]); return dw1 | ((u64)(dw2 & 0xffff) << 32); } + +const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) +{ + return ih == &adev->irq.ih ? "ih" : ih == &adev->irq.ih_soft ? "sw ih" : + ih == &adev->irq.ih1 ? "ih1" : ih == &adev->irq.ih2 ? "ih2" : "unknown"; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 508f02eb0cf8..b0a88f92cd82 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -78,6 +78,9 @@ struct amdgpu_ih_ring { #define amdgpu_ih_ts_after(t1, t2) \ (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) > 0LL) +#define amdgpu_ih_ts_after_or_equal(t1, t2) \ + (((int64_t)((t2) << 16) - (int64_t)((t1) << 16)) >= 0LL) + /* provided by the ih block */ struct amdgpu_ih_funcs { /* ring read/write ptr handling, called from interrupt context */ @@ -110,4 +113,5 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry); uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr, signed int offset); +const char *amdgpu_ih_ring_name(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c index 263ce1811cc8..43fc941dfa57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.c @@ -77,7 +77,8 @@ static int isp_load_fw_by_psp(struct amdgpu_device *adev) sizeof(ucode_prefix)); /* read isp fw */ - r = amdgpu_ucode_request(adev, &adev->isp.fw, "amdgpu/%s.bin", ucode_prefix); + r = amdgpu_ucode_request(adev, &adev->isp.fw, AMDGPU_UCODE_OPTIONAL, + "amdgpu/%s.bin", ucode_prefix); if (r) { amdgpu_ucode_release(&adev->isp.fw); return r; @@ -123,18 +124,18 @@ static int isp_early_init(struct amdgpu_ip_block *ip_block) return 0; } -static bool isp_is_idle(void *handle) +static bool isp_is_idle(struct amdgpu_ip_block *ip_block) { return true; } -static int isp_set_clockgating_state(void *handle, +static int isp_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int isp_set_powergating_state(void *handle, +static int isp_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h index b03664c66dd6..4f3b7b5d9c1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_isp.h @@ -50,6 +50,7 @@ struct amdgpu_isp { struct mfd_cell *isp_cell; struct resource *isp_res; struct resource *isp_i2c_res; + struct resource *isp_gpio_res; struct isp_platform_data *isp_pdata; unsigned int harvest_config; const struct firmware *fw; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index a21c510c408e..acb21fc8b3ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -102,8 +102,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) return DRM_GPU_SCHED_STAT_ENODEV; } - adev->job_hang = true; - /* * Do the coredump immediately after a job timeout to get a very * close dump/snapshot/representation of GPU's current error status @@ -132,29 +130,48 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) amdgpu_vm_put_task_info(ti); } - dma_fence_set_error(&s_job->s_fence->finished, -ETIME); - /* attempt a per ring reset */ - if (amdgpu_gpu_recovery && - ring->funcs->reset) { + if (unlikely(adev->debug_disable_gpu_ring_reset)) { + dev_err(adev->dev, "Ring reset disabled by debug mask\n"); + } else if (amdgpu_gpu_recovery && ring->funcs->reset) { + bool is_guilty; + dev_err(adev->dev, "Starting %s ring reset\n", s_job->sched->name); /* stop the scheduler, but don't mess with the * bad job yet because if ring reset fails * we'll fall back to full GPU reset. */ drm_sched_wqueue_stop(&ring->sched); + + /* for engine resets, we need to reset the engine, + * but individual queues may be unaffected. + * check here to make sure the accounting is correct. + */ + if (ring->funcs->is_guilty) + is_guilty = ring->funcs->is_guilty(ring); + else + is_guilty = true; + + if (is_guilty) + dma_fence_set_error(&s_job->s_fence->finished, -ETIME); + r = amdgpu_ring_reset(ring, job->vmid); if (!r) { if (amdgpu_ring_sched_ready(ring)) drm_sched_stop(&ring->sched, s_job); - atomic_inc(&ring->adev->gpu_reset_counter); - amdgpu_fence_driver_force_completion(ring); + if (is_guilty) { + atomic_inc(&ring->adev->gpu_reset_counter); + amdgpu_fence_driver_force_completion(ring); + } if (amdgpu_ring_sched_ready(ring)) drm_sched_start(&ring->sched, 0); + dev_err(adev->dev, "Ring %s reset succeeded\n", ring->sched.name); + drm_dev_wedged_event(adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE); goto exit; } dev_err(adev->dev, "Ring %s reset failure\n", ring->sched.name); } + dma_fence_set_error(&s_job->s_fence->finished, -ETIME); if (amdgpu_device_should_recover_gpu(ring->adev)) { struct amdgpu_reset_context reset_context; @@ -181,7 +198,6 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) } exit: - adev->job_hang = false; drm_dev_exit(idx); return DRM_GPU_SCHED_STAT_NOMINAL; } @@ -197,11 +213,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (!*job) return -ENOMEM; - /* - * Initialize the scheduler to at least some ring so that we always - * have a pointer to adev. - */ - (*job)->base.sched = &adev->rings[0]->sched; (*job)->vm = vm; amdgpu_sync_create(&(*job)->explicit_sync); @@ -267,7 +278,7 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) f = NULL; for (i = 0; i < job->num_ibs; ++i) - amdgpu_ib_free(NULL, &job->ibs[i], f); + amdgpu_ib_free(&job->ibs[i], f); } static void amdgpu_job_free_cb(struct drm_sched_job *s_job) @@ -350,25 +361,40 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, { struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched); struct amdgpu_job *job = to_amdgpu_job(sched_job); - struct dma_fence *fence = NULL; + struct dma_fence *fence; int r; r = drm_sched_entity_error(s_entity); if (r) goto error; - if (job->gang_submit) + if (job->gang_submit) { fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); + if (fence) + return fence; + } + + fence = amdgpu_device_enforce_isolation(ring->adev, ring, job); + if (fence) + return fence; - if (!fence && job->vm && !job->vmid) { + if (job->vm && !job->vmid) { r = amdgpu_vmid_grab(job->vm, ring, job, &fence); if (r) { dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); goto error; } + /* + * The VM structure might be released after the VMID is + * assigned, we had multiple problems with people trying to use + * the VM pointer so better set it to NULL. + */ + if (!fence) + job->vm = NULL; + return fence; } - return fence; + return NULL; error: dma_fence_set_error(&job->base.s_fence->finished, r); @@ -412,8 +438,24 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job) return fence; } -#define to_drm_sched_job(sched_job) \ - container_of((sched_job), struct drm_sched_job, queue_node) +/* + * This is a duplicate function from DRM scheduler sched_internal.h. + * Plan is to remove it when amdgpu_job_stop_all_jobs_on_sched is removed, due + * latter being incorrect and racy. + * + * See https://lore.kernel.org/amd-gfx/44edde63-7181-44fb-a4f7-94e50514f539@amd.com/ + */ +static struct drm_sched_job * +drm_sched_entity_queue_pop(struct drm_sched_entity *entity) +{ + struct spsc_node *node; + + node = spsc_queue_pop(&entity->job_queue); + if (!node) + return NULL; + + return container_of(node, struct drm_sched_job, queue_node); +} void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) { @@ -426,7 +468,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched) struct drm_sched_rq *rq = sched->sched_rq[i]; spin_lock(&rq->lock); list_for_each_entry(s_entity, &rq->entities, list) { - while ((s_job = to_drm_sched_job(spsc_queue_pop(&s_entity->job_queue)))) { + while ((s_job = drm_sched_entity_queue_pop(s_entity))) { struct drm_sched_fence *s_fence = s_job->s_fence; dma_fence_signal(&s_fence->scheduled); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index b6d2eb049f54..dda29132dfb2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -33,6 +33,7 @@ #define JPEG_IDLE_TIMEOUT msecs_to_jiffies(1000) static void amdgpu_jpeg_idle_work_handler(struct work_struct *work); +static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev); int amdgpu_jpeg_sw_init(struct amdgpu_device *adev) { @@ -85,6 +86,9 @@ int amdgpu_jpeg_sw_fini(struct amdgpu_device *adev) amdgpu_ring_fini(&adev->jpeg.inst[i].ring_dec[j]); } + if (adev->jpeg.reg_list) + amdgpu_jpeg_reg_dump_fini(adev); + mutex_destroy(&adev->jpeg.jpeg_pg_lock); return 0; @@ -452,3 +456,83 @@ void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_jpeg_reset_mask); } } + +int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev, + const struct amdgpu_hwip_reg_entry *reg, u32 count) +{ + adev->jpeg.ip_dump = kcalloc(adev->jpeg.num_jpeg_inst * count, + sizeof(uint32_t), GFP_KERNEL); + if (!adev->jpeg.ip_dump) { + DRM_ERROR("Failed to allocate memory for JPEG IP Dump\n"); + return -ENOMEM; + } + adev->jpeg.reg_list = reg; + adev->jpeg.reg_count = count; + + return 0; +} + +static void amdgpu_jpeg_reg_dump_fini(struct amdgpu_device *adev) +{ + kfree(adev->jpeg.ip_dump); + adev->jpeg.reg_list = NULL; + adev->jpeg.reg_count = 0; +} + +void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + u32 inst_off, inst_id, is_powered; + int i, j; + + if (!adev->jpeg.ip_dump) + return; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + if (adev->jpeg.harvest_config & (1 << i)) + continue; + + inst_id = GET_INST(JPEG, i); + inst_off = i * adev->jpeg.reg_count; + /* check power status from UVD_JPEG_POWER_STATUS */ + adev->jpeg.ip_dump[inst_off] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[0], + inst_id)); + is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1); + + if (is_powered) + for (j = 1; j < adev->jpeg.reg_count; j++) + adev->jpeg.ip_dump[inst_off + j] = + RREG32(SOC15_REG_ENTRY_OFFSET_INST(adev->jpeg.reg_list[j], + inst_id)); + } +} + +void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +{ + struct amdgpu_device *adev = ip_block->adev; + u32 inst_off, is_powered; + int i, j; + + if (!adev->jpeg.ip_dump) + return; + + drm_printf(p, "num_instances:%d\n", adev->jpeg.num_jpeg_inst); + for (i = 0; i < adev->jpeg.num_jpeg_inst; i++) { + if (adev->jpeg.harvest_config & (1 << i)) { + drm_printf(p, "\nHarvested Instance:JPEG%d Skipping dump\n", i); + continue; + } + + inst_off = i * adev->jpeg.reg_count; + is_powered = ((adev->jpeg.ip_dump[inst_off] & 0x1) != 1); + + if (is_powered) { + drm_printf(p, "Active Instance:JPEG%d\n", i); + for (j = 0; j < adev->jpeg.reg_count; j++) + drm_printf(p, "%-50s \t 0x%08x\n", adev->jpeg.reg_list[j].reg_name, + adev->jpeg.ip_dump[inst_off + j]); + } else + drm_printf(p, "\nInactive Instance:JPEG%d\n", i); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 3eb4a4653fce..4f0775e39b54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -27,7 +27,8 @@ #include "amdgpu_ras.h" #define AMDGPU_MAX_JPEG_INSTANCES 4 -#define AMDGPU_MAX_JPEG_RINGS 8 +#define AMDGPU_MAX_JPEG_RINGS 10 +#define AMDGPU_MAX_JPEG_RINGS_4_0_3 8 #define AMDGPU_JPEG_HARVEST_JPEG0 (1 << 0) #define AMDGPU_JPEG_HARVEST_JPEG1 (1 << 1) @@ -91,6 +92,14 @@ *adev->jpeg.inst[inst_idx].dpg_sram_curr_addr++ = value; \ } while (0) +struct amdgpu_hwip_reg_entry; + +enum amdgpu_jpeg_caps { + AMDGPU_JPEG_RRMT_ENABLED, +}; + +#define AMDGPU_JPEG_CAPS(caps) BIT(AMDGPU_JPEG_##caps) + struct amdgpu_jpeg_reg{ unsigned jpeg_pitch[AMDGPU_MAX_JPEG_RINGS]; }; @@ -129,6 +138,10 @@ struct amdgpu_jpeg { uint8_t num_inst_per_aid; bool indirect_sram; uint32_t supported_reset; + uint32_t caps; + u32 *ip_dump; + u32 reg_count; + const struct amdgpu_hwip_reg_entry *reg_list; }; int amdgpu_jpeg_sw_init(struct amdgpu_device *adev); @@ -153,5 +166,9 @@ int amdgpu_jpeg_psp_update_sram(struct amdgpu_device *adev, int inst_idx, void amdgpu_debugfs_jpeg_sched_mask_init(struct amdgpu_device *adev); int amdgpu_jpeg_sysfs_reset_mask_init(struct amdgpu_device *adev); void amdgpu_jpeg_sysfs_reset_mask_fini(struct amdgpu_device *adev); +int amdgpu_jpeg_reg_dump_init(struct amdgpu_device *adev, + const struct amdgpu_hwip_reg_entry *reg, u32 count); +void amdgpu_jpeg_dump_ip_state(struct amdgpu_ip_block *ip_block); +void amdgpu_jpeg_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p); #endif /*__AMDGPU_JPEG_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 016a6f6c4267..27bfe9c8af06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -459,7 +459,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev, if (adev->vcn.harvest_config & (1 << i)) continue; - for (j = 0; j < adev->vcn.num_enc_rings; j++) + for (j = 0; j < adev->vcn.inst[i].num_enc_rings; j++) if (adev->vcn.inst[i].ring_enc[j].sched.ready) ++num_rings; } @@ -846,7 +846,7 @@ out: case AMDGPU_INFO_DEV_INFO: { struct drm_amdgpu_info_device *dev_info; uint64_t vm_size; - uint32_t pcie_gen_mask; + uint32_t pcie_gen_mask, pcie_width_mask; dev_info = kzalloc(sizeof(*dev_info), GFP_KERNEL); if (!dev_info) @@ -888,6 +888,15 @@ out: if (adev->gfx.config.ta_cntl2_truncate_coord_mode) dev_info->ids_flags |= AMDGPU_IDS_FLAGS_CONFORMANT_TRUNC_COORD; + if (amdgpu_passthrough(adev)) + dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_PT << + AMDGPU_IDS_FLAGS_MODE_SHIFT) & + AMDGPU_IDS_FLAGS_MODE_MASK; + else if (amdgpu_sriov_vf(adev)) + dev_info->ids_flags |= (AMDGPU_IDS_FLAGS_MODE_VF << + AMDGPU_IDS_FLAGS_MODE_SHIFT) & + AMDGPU_IDS_FLAGS_MODE_MASK; + vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE; vm_size -= AMDGPU_VA_RESERVED_TOP; @@ -934,15 +943,18 @@ out: dev_info->tcc_disabled_mask = adev->gfx.config.tcc_disabled_mask; /* Combine the chip gen mask with the platform (CPU/mobo) mask. */ - pcie_gen_mask = adev->pm.pcie_gen_mask & (adev->pm.pcie_gen_mask >> 16); + pcie_gen_mask = adev->pm.pcie_gen_mask & + (adev->pm.pcie_gen_mask >> CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT); + pcie_width_mask = adev->pm.pcie_mlw_mask & + (adev->pm.pcie_mlw_mask >> CAIL_PCIE_LINK_WIDTH_SUPPORT_SHIFT); dev_info->pcie_gen = fls(pcie_gen_mask); dev_info->pcie_num_lanes = - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 : - adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1; + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 ? 32 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 ? 16 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 ? 12 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 ? 8 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 ? 4 : + pcie_width_mask & CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 ? 2 : 1; dev_info->tcp_cache_size = adev->gfx.config.gc_tcp_l1_size; dev_info->num_sqc_per_wgp = adev->gfx.config.gc_num_sqc_per_wgp; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c index 59ec20b07a6a..85f774063f9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c @@ -145,9 +145,8 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.vmid_mask_gfxhub = 0xffffff00; for (i = 0; i < AMDGPU_MES_MAX_COMPUTE_PIPES; i++) { - /* use only 1st MEC pipes */ - if (i >= adev->gfx.mec.num_pipe_per_mec) - continue; + if (i >= (adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_mec)) + break; adev->mes.compute_hqd_mask[i] = 0xc; } @@ -155,14 +154,9 @@ int amdgpu_mes_init(struct amdgpu_device *adev) adev->mes.gfx_hqd_mask[i] = i ? 0 : 0xfffffffe; for (i = 0; i < AMDGPU_MES_MAX_SDMA_PIPES; i++) { - if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < - IP_VERSION(6, 0, 0)) - adev->mes.sdma_hqd_mask[i] = i ? 0 : 0x3fc; - /* zero sdma_hqd_mask for non-existent engine */ - else if (adev->sdma.num_instances == 1) - adev->mes.sdma_hqd_mask[i] = i ? 0 : 0xfc; - else - adev->mes.sdma_hqd_mask[i] = 0xfc; + if (i >= adev->sdma.num_instances) + break; + adev->mes.sdma_hqd_mask[i] = 0xfc; } for (i = 0; i < AMDGPU_MAX_MES_PIPES; i++) { @@ -1336,14 +1330,14 @@ int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev, DRM_ERROR("failed to do vm_bo_update on meta data\n"); goto error_del_bo_va; } - amdgpu_sync_fence(&sync, bo_va->last_pt_update); + amdgpu_sync_fence(&sync, bo_va->last_pt_update, GFP_KERNEL); r = amdgpu_vm_update_pdes(adev, vm, false); if (r) { DRM_ERROR("failed to update pdes on meta data\n"); goto error_del_bo_va; } - amdgpu_sync_fence(&sync, vm->last_update); + amdgpu_sync_fence(&sync, vm->last_update, GFP_KERNEL); amdgpu_sync_wait(&sync, false); drm_exec_fini(&exec); @@ -1610,10 +1604,12 @@ int amdgpu_mes_init_microcode(struct amdgpu_device *adev, int pipe) pipe == AMDGPU_MES_SCHED_PIPE ? "" : "1"); } - r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], AMDGPU_UCODE_REQUIRED, + "%s", fw_name); if (r && need_retry && pipe == AMDGPU_MES_SCHED_PIPE) { dev_info(adev->dev, "try to fall back to %s_mes.bin\n", ucode_prefix); r = amdgpu_ucode_request(adev, &adev->mes.fw[pipe], + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mes.bin", ucode_prefix); } @@ -1679,7 +1675,8 @@ bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) } /* Fix me -- node_id is used to identify the correct MES instances in the future */ -int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable) +static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, + uint32_t node_id, bool enable) { struct mes_misc_op_input op_input = {0}; int r; @@ -1701,6 +1698,23 @@ error: return r; } +int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) +{ + int i, r = 0; + + if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { + mutex_lock(&adev->enforce_isolation_mutex); + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { + if (adev->enforce_isolation[i]) + r |= amdgpu_mes_set_enforce_isolation(adev, i, true); + else + r |= amdgpu_mes_set_enforce_isolation(adev, i, false); + } + mutex_unlock(&adev->enforce_isolation_mutex); + } + return r; +} + #if defined(CONFIG_DEBUG_FS) static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index c6f93cbd6739..da2c9a8cb3e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -40,7 +40,7 @@ #define AMDGPU_MES_VERSION_MASK 0x00000fff #define AMDGPU_MES_API_VERSION_MASK 0x00fff000 #define AMDGPU_MES_FEAT_VERSION_MASK 0xff000000 -#define AMDGPU_MES_MSCRATCH_SIZE 0x8000 +#define AMDGPU_MES_MSCRATCH_SIZE 0x40000 enum amdgpu_mes_priority_level { AMDGPU_MES_PRIORITY_LEVEL_LOW = 0, @@ -56,7 +56,7 @@ enum amdgpu_mes_priority_level { struct amdgpu_mes_funcs; -enum admgpu_mes_pipe { +enum amdgpu_mes_pipe { AMDGPU_MES_SCHED_PIPE = 0, AMDGPU_MES_KIQ_PIPE, AMDGPU_MAX_MES_PIPES = 2, @@ -143,9 +143,9 @@ struct amdgpu_mes { const struct amdgpu_mes_funcs *funcs; /* mes resource_1 bo*/ - struct amdgpu_bo *resource_1; - uint64_t resource_1_gpu_addr; - void *resource_1_addr; + struct amdgpu_bo *resource_1[AMDGPU_MAX_MES_PIPES]; + uint64_t resource_1_gpu_addr[AMDGPU_MAX_MES_PIPES]; + void *resource_1_addr[AMDGPU_MAX_MES_PIPES]; }; @@ -534,6 +534,6 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); -int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable); +int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev); #endif /* __AMDGPU_MES_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 5e3faefc5510..6da4f946cac0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -609,6 +609,7 @@ struct amdgpu_i2c_adapter { struct i2c_adapter base; struct ddc_service *ddc_service; + bool oem; }; #define TO_DM_AUX(x) container_of((x), struct amdgpu_dm_dp_aux, aux) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6852d50caa89..80cd6f5273db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -41,6 +41,7 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_vram_mgr.h" #include "amdgpu_vm.h" +#include "amdgpu_dma_buf.h" /** * DOC: amdgpu_object @@ -324,6 +325,9 @@ error_free: * * Allocates and pins a BO for kernel internal use. * + * This function is exported to allow the V4L2 isp device + * external to drm device to create and access the kernel BO. + * * Note: For bo_ptr new BO is only created if bo_ptr points to NULL. * * Returns: @@ -347,6 +351,76 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, return 0; } +EXPORT_SYMBOL(amdgpu_bo_create_kernel); + +/** + * amdgpu_bo_create_isp_user - create user BO for isp + * + * @adev: amdgpu device object + * @dma_buf: DMABUF handle for isp buffer + * @domain: where to place it + * @bo: used to initialize BOs in structures + * @gpu_addr: GPU addr of the pinned BO + * + * Imports isp DMABUF to allocate and pin a user BO for isp internal use. It does + * GART alloc to generate gpu_addr for BO to make it accessible through the + * GART aperture for ISP HW. + * + * This function is exported to allow the V4L2 isp device external to drm device + * to create and access the isp user BO. + * + * Returns: + * 0 on success, negative error code otherwise. + */ +int amdgpu_bo_create_isp_user(struct amdgpu_device *adev, + struct dma_buf *dma_buf, u32 domain, struct amdgpu_bo **bo, + u64 *gpu_addr) + +{ + struct drm_gem_object *gem_obj; + int r; + + gem_obj = amdgpu_gem_prime_import(&adev->ddev, dma_buf); + *bo = gem_to_amdgpu_bo(gem_obj); + if (!(*bo)) { + dev_err(adev->dev, "failed to get valid isp user bo\n"); + return -EINVAL; + } + + r = amdgpu_bo_reserve(*bo, false); + if (r) { + dev_err(adev->dev, "(%d) failed to reserve isp user bo\n", r); + return r; + } + + r = amdgpu_bo_pin(*bo, domain); + if (r) { + dev_err(adev->dev, "(%d) isp user bo pin failed\n", r); + goto error_unreserve; + } + + r = amdgpu_ttm_alloc_gart(&(*bo)->tbo); + if (r) { + dev_err(adev->dev, "%p bind failed\n", *bo); + goto error_unpin; + } + + if (!WARN_ON(!gpu_addr)) + *gpu_addr = amdgpu_bo_gpu_offset(*bo); + + amdgpu_bo_unreserve(*bo); + + return 0; + +error_unpin: + amdgpu_bo_unpin(*bo); +error_unreserve: + amdgpu_bo_unreserve(*bo); + amdgpu_bo_unref(bo); + + return r; +} +EXPORT_SYMBOL(amdgpu_bo_create_isp_user); /** * amdgpu_bo_create_kernel_at - create BO for kernel use at specific location @@ -423,6 +497,9 @@ error: * @cpu_addr: pointer to where the BO's CPU memory space address was stored * * unmaps and unpin a BO for kernel internal use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the kernel BO. */ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr) @@ -447,6 +524,30 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, if (cpu_addr) *cpu_addr = NULL; } +EXPORT_SYMBOL(amdgpu_bo_free_kernel); + +/** + * amdgpu_bo_free_isp_user - free BO for isp use + * + * @bo: amdgpu isp user BO to free + * + * unpin and unref BO for isp internal use. + * + * This function is exported to allow the V4L2 isp device + * external to drm device to free the isp user BO. + */ +void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo) +{ + if (bo == NULL) + return; + + if (amdgpu_bo_reserve(bo, true) == 0) { + amdgpu_bo_unpin(bo); + amdgpu_bo_unreserve(bo); + } + amdgpu_bo_unref(&bo); +} +EXPORT_SYMBOL(amdgpu_bo_free_isp_user); /* Validate bo size is bit bigger than the request domain */ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev, @@ -1150,7 +1251,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict, struct ttm_resource *new_mem) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); struct ttm_resource *old_mem = bo->resource; struct amdgpu_bo *abo; @@ -1158,7 +1258,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, return; abo = ttm_to_amdgpu_bo(bo); - amdgpu_vm_bo_invalidate(adev, abo, evict); + amdgpu_vm_bo_move(abo, new_mem, evict); amdgpu_bo_kunmap(abo); @@ -1171,75 +1271,6 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, old_mem ? old_mem->mem_type : -1); } -void amdgpu_bo_get_memory(struct amdgpu_bo *bo, - struct amdgpu_mem_stats *stats, - unsigned int sz) -{ - const unsigned int domain_to_pl[] = { - [ilog2(AMDGPU_GEM_DOMAIN_CPU)] = TTM_PL_SYSTEM, - [ilog2(AMDGPU_GEM_DOMAIN_GTT)] = TTM_PL_TT, - [ilog2(AMDGPU_GEM_DOMAIN_VRAM)] = TTM_PL_VRAM, - [ilog2(AMDGPU_GEM_DOMAIN_GDS)] = AMDGPU_PL_GDS, - [ilog2(AMDGPU_GEM_DOMAIN_GWS)] = AMDGPU_PL_GWS, - [ilog2(AMDGPU_GEM_DOMAIN_OA)] = AMDGPU_PL_OA, - [ilog2(AMDGPU_GEM_DOMAIN_DOORBELL)] = AMDGPU_PL_DOORBELL, - }; - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); - struct ttm_resource *res = bo->tbo.resource; - struct drm_gem_object *obj = &bo->tbo.base; - uint64_t size = amdgpu_bo_size(bo); - unsigned int type; - - if (!res) { - /* - * If no backing store use one of the preferred domain for basic - * stats. We take the MSB since that should give a reasonable - * view. - */ - BUILD_BUG_ON(TTM_PL_VRAM < TTM_PL_TT || - TTM_PL_VRAM < TTM_PL_SYSTEM); - type = fls(bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK); - if (!type) - return; - type--; - if (drm_WARN_ON_ONCE(&adev->ddev, - type >= ARRAY_SIZE(domain_to_pl))) - return; - type = domain_to_pl[type]; - } else { - type = res->mem_type; - } - - if (drm_WARN_ON_ONCE(&adev->ddev, type >= sz)) - return; - - /* DRM stats common fields: */ - - if (drm_gem_object_is_shared_for_memory_stats(obj)) - stats[type].drm.shared += size; - else - stats[type].drm.private += size; - - if (res) { - stats[type].drm.resident += size; - - if (!dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_BOOKKEEP)) - stats[type].drm.active += size; - else if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) - stats[type].drm.purgeable += size; - } - - /* amdgpu specific stats: */ - - if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) { - stats[TTM_PL_VRAM].requested += size; - if (type != TTM_PL_VRAM) - stats[TTM_PL_VRAM].evicted += size; - } else if (bo->preferred_domains & AMDGPU_GEM_DOMAIN_GTT) { - stats[TTM_PL_TT].requested += size; - } -} - /** * amdgpu_bo_release_notify - notification about a BO being released * @bo: pointer to a buffer object @@ -1264,28 +1295,36 @@ void amdgpu_bo_release_notify(struct ttm_buffer_object *bo) if (abo->kfd_bo) amdgpu_amdkfd_release_notify(abo); - /* We only remove the fence if the resv has individualized. */ - WARN_ON_ONCE(bo->type == ttm_bo_type_kernel - && bo->base.resv != &bo->base._resv); - if (bo->base.resv == &bo->base._resv) - amdgpu_amdkfd_remove_fence_on_pt_pd_bos(abo); + /* + * We lock the private dma_resv object here and since the BO is about to + * be released nobody else should have a pointer to it. + * So when this locking here fails something is wrong with the reference + * counting. + */ + if (WARN_ON_ONCE(!dma_resv_trylock(&bo->base._resv))) + return; + + amdgpu_amdkfd_remove_all_eviction_fences(abo); if (!bo->resource || bo->resource->mem_type != TTM_PL_VRAM || !(abo->flags & AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE) || adev->in_suspend || drm_dev_is_unplugged(adev_to_drm(adev))) - return; + goto out; - if (WARN_ON_ONCE(!dma_resv_trylock(bo->base.resv))) - return; + r = dma_resv_reserve_fences(&bo->base._resv, 1); + if (r) + goto out; - r = amdgpu_fill_buffer(abo, 0, bo->base.resv, &fence, true); - if (!WARN_ON(r)) { - amdgpu_vram_mgr_set_cleared(bo->resource); - amdgpu_bo_fence(abo, fence, false); - dma_fence_put(fence); - } + r = amdgpu_fill_buffer(abo, 0, &bo->base._resv, &fence, true); + if (WARN_ON(r)) + goto out; + + amdgpu_vram_mgr_set_cleared(bo->resource); + dma_resv_add_fence(&bo->base._resv, fence, DMA_RESV_USAGE_KERNEL); + dma_fence_put(fence); - dma_resv_unlock(bo->base.resv); +out: + dma_resv_unlock(&bo->base._resv); } /** @@ -1455,6 +1494,45 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo) } /** + * amdgpu_bo_mem_stats_placement - bo placement for memory accounting + * @bo: the buffer object we should look at + * + * BO can have multiple preferred placements, to avoid double counting we want + * to file it under a single placement for memory stats. + * Luckily, if we take the highest set bit in preferred_domains the result is + * quite sensible. + * + * Returns: + * Which of the placements should the BO be accounted under. + */ +uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo) +{ + uint32_t domain = bo->preferred_domains & AMDGPU_GEM_DOMAIN_MASK; + + if (!domain) + return TTM_PL_SYSTEM; + + switch (rounddown_pow_of_two(domain)) { + case AMDGPU_GEM_DOMAIN_CPU: + return TTM_PL_SYSTEM; + case AMDGPU_GEM_DOMAIN_GTT: + return TTM_PL_TT; + case AMDGPU_GEM_DOMAIN_VRAM: + return TTM_PL_VRAM; + case AMDGPU_GEM_DOMAIN_GDS: + return AMDGPU_PL_GDS; + case AMDGPU_GEM_DOMAIN_GWS: + return AMDGPU_PL_GWS; + case AMDGPU_GEM_DOMAIN_OA: + return AMDGPU_PL_OA; + case AMDGPU_GEM_DOMAIN_DOORBELL: + return AMDGPU_PL_DOORBELL; + default: + return TTM_PL_SYSTEM; + } +} + +/** * amdgpu_bo_get_preferred_domain - get preferred domain * @adev: amdgpu device object * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>` diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index be6769852ece..375448627f7b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -260,6 +260,10 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, unsigned long size, int align, u32 domain, struct amdgpu_bo **bo_ptr, u64 *gpu_addr, void **cpu_addr); +int amdgpu_bo_create_isp_user(struct amdgpu_device *adev, + struct dma_buf *dbuf, u32 domain, + struct amdgpu_bo **bo, + u64 *gpu_addr); int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, uint64_t offset, uint64_t size, struct amdgpu_bo **bo_ptr, void **cpu_addr); @@ -271,6 +275,7 @@ int amdgpu_bo_create_vm(struct amdgpu_device *adev, struct amdgpu_bo_vm **ubo_ptr); void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); +void amdgpu_bo_free_isp_user(struct amdgpu_bo *bo); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); void *amdgpu_bo_kptr(struct amdgpu_bo *bo); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); @@ -300,9 +305,7 @@ int amdgpu_bo_sync_wait_resv(struct amdgpu_device *adev, struct dma_resv *resv, int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo); -void amdgpu_bo_get_memory(struct amdgpu_bo *bo, - struct amdgpu_mem_stats *stats, - unsigned int size); +uint32_t amdgpu_bo_mem_stats_placement(struct amdgpu_bo *bo); uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev, uint32_t domain); @@ -337,8 +340,7 @@ int amdgpu_sa_bo_manager_start(struct amdgpu_device *adev, int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, struct drm_suballoc **sa_bo, unsigned int size); -void amdgpu_sa_bo_free(struct amdgpu_device *adev, - struct drm_suballoc **sa_bo, +void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence); #if defined(CONFIG_DEBUG_FS) void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 448f9e742983..df5d5dbd7f0f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -44,7 +44,7 @@ #include "amdgpu_securedisplay.h" #include "amdgpu_atomfirmware.h" -#define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*3) +#define AMD_VBIOS_FILE_MAX_SIZE_B (1024*1024*16) static int psp_load_smu_fw(struct psp_context *psp); static int psp_rap_terminate(struct psp_context *psp); @@ -153,6 +153,9 @@ static int psp_init_sriov_microcode(struct psp_context *psp) adev->virt.autoload_ucode_id = AMDGPU_UCODE_ID_CP_MES1_DATA; ret = psp_init_cap_microcode(psp, ucode_prefix); break; + case IP_VERSION(13, 0, 12): + ret = psp_init_ta_microcode(psp, ucode_prefix); + break; default: return -EINVAL; } @@ -193,6 +196,7 @@ static int psp_early_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 0, 9): case IP_VERSION(11, 0, 11): case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 2): case IP_VERSION(11, 0, 12): case IP_VERSION(11, 0, 13): psp_v11_0_set_psp_funcs(psp); @@ -212,6 +216,11 @@ static int psp_early_init(struct amdgpu_ip_block *ip_block) psp_v13_0_set_psp_funcs(psp); psp->autoload_supported = false; break; + case IP_VERSION(13, 0, 12): + psp_v13_0_set_psp_funcs(psp); + psp->autoload_supported = false; + adev->psp.sup_ifwi_up = !amdgpu_sriov_vf(adev); + break; case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 5): @@ -245,6 +254,10 @@ static int psp_early_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(14, 0, 3): psp_v14_0_set_psp_funcs(psp); break; + case IP_VERSION(14, 0, 5): + psp_v14_0_set_psp_funcs(psp); + psp->boot_time_tmr = false; + break; default: return -EINVAL; } @@ -359,6 +372,7 @@ static bool psp_get_runtime_db_entry(struct amdgpu_device *adev, int i; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) return false; @@ -531,7 +545,6 @@ static int psp_sw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; struct psp_context *psp = &adev->psp; - struct psp_gfx_cmd_resp *cmd = psp->cmd; psp_memory_training_fini(psp); @@ -541,8 +554,8 @@ static int psp_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_ucode_release(&psp->cap_fw); amdgpu_ucode_release(&psp->toc_fw); - kfree(cmd); - cmd = NULL; + kfree(psp->cmd); + psp->cmd = NULL; psp_free_shared_bufs(psp); @@ -870,6 +883,7 @@ static bool psp_skip_tmr(struct psp_context *psp) case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): return true; default: @@ -1783,34 +1797,47 @@ int psp_ras_initialize(struct psp_context *psp) if (ret) dev_warn(adev->dev, "PSP get boot config failed\n"); - if (!amdgpu_ras_is_supported(psp->adev, AMDGPU_RAS_BLOCK__UMC)) { - if (!boot_cfg) { - dev_info(adev->dev, "GECC is disabled\n"); - } else { - /* disable GECC in next boot cycle if ras is - * disabled by module parameter amdgpu_ras_enable - * and/or amdgpu_ras_mask, or boot_config_get call - * is failed - */ - ret = psp_boot_config_set(adev, 0); - if (ret) - dev_warn(adev->dev, "PSP set boot config failed\n"); - else - dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n"); - } + if (boot_cfg == 1 && !adev->ras_default_ecc_enabled && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) { + dev_warn(adev->dev, "GECC is currently enabled, which may affect performance\n"); + dev_warn(adev->dev, + "To disable GECC, please reboot the system and load the amdgpu driver with the parameter amdgpu_ras_enable=0\n"); } else { - if (boot_cfg == 1) { - dev_info(adev->dev, "GECC is enabled\n"); + if ((adev->ras_default_ecc_enabled || amdgpu_ras_enable == 1) && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) { + if (boot_cfg == 1) { + dev_info(adev->dev, "GECC is enabled\n"); + } else { + /* enable GECC in next boot cycle if it is disabled + * in boot config, or force enable GECC if failed to + * get boot configuration + */ + ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC); + if (ret) + dev_warn(adev->dev, "PSP set boot config failed\n"); + else + dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n"); + } } else { - /* enable GECC in next boot cycle if it is disabled - * in boot config, or force enable GECC if failed to - * get boot configuration - */ - ret = psp_boot_config_set(adev, BOOT_CONFIG_GECC); - if (ret) - dev_warn(adev->dev, "PSP set boot config failed\n"); - else - dev_warn(adev->dev, "GECC will be enabled in next boot cycle\n"); + if (!boot_cfg) { + if (!adev->ras_default_ecc_enabled && + amdgpu_ras_enable != 1 && + amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__UMC)) + dev_warn(adev->dev, "GECC is disabled, set amdgpu_ras_enable=1 to enable GECC in next boot cycle if needed\n"); + else + dev_info(adev->dev, "GECC is disabled\n"); + } else { + /* disable GECC in next boot cycle if ras is + * disabled by module parameter amdgpu_ras_enable + * and/or amdgpu_ras_mask, or boot_config_get call + * is failed + */ + ret = psp_boot_config_set(adev, 0); + if (ret) + dev_warn(adev->dev, "PSP set boot config failed\n"); + else + dev_warn(adev->dev, "GECC will be disabled in next boot cycle if set amdgpu_ras_enable and/or amdgpu_ras_mask to 0x0\n"); + } } } } @@ -1837,6 +1864,7 @@ int psp_ras_initialize(struct psp_context *psp) if (adev->gmc.gmc_funcs->query_mem_partition_mode) ras_cmd->ras_in_message.init_flags.nps_mode = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + ras_cmd->ras_in_message.init_flags.active_umc_mask = adev->umc.active_mask; ret = psp_ta_load(psp, &psp->ras_context.context); @@ -2264,7 +2292,8 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id) return -EINVAL; if (ta_cmd_id != TA_SECUREDISPLAY_COMMAND__QUERY_TA && - ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC) + ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC && + ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2) return -EINVAL; ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context); @@ -2385,6 +2414,15 @@ static int psp_hw_start(struct psp_context *psp) } } + if ((is_psp_fw_valid(psp->spdm_drv)) && + (psp->funcs->bootloader_load_spdm_drv != NULL)) { + ret = psp_bootloader_load_spdm_drv(psp); + if (ret) { + dev_err(adev->dev, "PSP load spdm_drv failed!\n"); + return ret; + } + } + if ((is_psp_fw_valid(psp->sos)) && (psp->funcs->bootloader_load_sos != NULL)) { ret = psp_bootloader_load_sos(psp); @@ -3007,10 +3045,7 @@ static int psp_hw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; mutex_lock(&adev->firmware.mutex); - /* - * This sequence is just used on hw_init only once, no need on - * resume. - */ + ret = amdgpu_ucode_init_bo(adev); if (ret) goto failed; @@ -3135,6 +3170,10 @@ static int psp_resume(struct amdgpu_ip_block *ip_block) mutex_lock(&adev->firmware.mutex); + ret = amdgpu_ucode_init_bo(adev); + if (ret) + goto failed; + ret = psp_hw_start(psp); if (ret) goto failed; @@ -3289,7 +3328,8 @@ int psp_init_asd_microcode(struct psp_context *psp, const char *chip_name) const struct psp_firmware_header_v1_0 *asd_hdr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, "amdgpu/%s_asd.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.asd_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_asd.bin", chip_name); if (err) goto out; @@ -3311,7 +3351,8 @@ int psp_init_toc_microcode(struct psp_context *psp, const char *chip_name) const struct psp_firmware_header_v1_0 *toc_hdr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, "amdgpu/%s_toc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_toc.bin", chip_name); if (err) goto out; @@ -3407,6 +3448,12 @@ static int parse_sos_bin_descriptor(struct psp_context *psp, psp->ipkeymgr_drv.size_bytes = le32_to_cpu(desc->size_bytes); psp->ipkeymgr_drv.start_addr = ucode_start_addr; break; + case PSP_FW_TYPE_PSP_SPDM_DRV: + psp->spdm_drv.fw_version = le32_to_cpu(desc->fw_version); + psp->spdm_drv.feature_version = le32_to_cpu(desc->fw_version); + psp->spdm_drv.size_bytes = le32_to_cpu(desc->size_bytes); + psp->spdm_drv.start_addr = ucode_start_addr; + break; default: dev_warn(psp->adev->dev, "Unsupported PSP FW type: %d\n", desc->fw_type); break; @@ -3474,7 +3521,8 @@ int psp_init_sos_microcode(struct psp_context *psp, const char *chip_name) uint8_t *ucode_array_start_addr; int err = 0; - err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, "amdgpu/%s_sos.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.sos_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sos.bin", chip_name); if (err) goto out; @@ -3750,7 +3798,8 @@ int psp_init_ta_microcode(struct psp_context *psp, const char *chip_name) struct amdgpu_device *adev = psp->adev; int err; - err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, "amdgpu/%s_ta.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.ta_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_ta.bin", chip_name); if (err) return err; @@ -3785,14 +3834,16 @@ int psp_init_cap_microcode(struct psp_context *psp, const char *chip_name) return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, "amdgpu/%s_cap.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->psp.cap_fw, AMDGPU_UCODE_OPTIONAL, + "amdgpu/%s_cap.bin", chip_name); if (err) { if (err == -ENODEV) { dev_warn(adev->dev, "cap microcode does not exist, skip\n"); err = 0; - goto out; + } else { + dev_err(adev->dev, "fail to initialize cap microcode\n"); } - dev_err(adev->dev, "fail to initialize cap microcode\n"); + goto out; } info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CAP]; @@ -3849,13 +3900,13 @@ int psp_config_sq_perfmon(struct psp_context *psp, return ret; } -static int psp_set_clockgating_state(void *handle, +static int psp_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int psp_set_powergating_state(void *handle, +static int psp_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -3867,10 +3918,12 @@ static ssize_t psp_usbc_pd_fw_sysfs_read(struct device *dev, { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); + struct amdgpu_ip_block *ip_block; uint32_t fw_ver; int ret; - if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP); + if (!ip_block || !ip_block->status.late_initialized) { dev_info(adev->dev, "PSP block is not ready yet\n."); return -EBUSY; } @@ -3899,8 +3952,10 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, struct amdgpu_bo *fw_buf_bo = NULL; uint64_t fw_pri_mc_addr; void *fw_pri_cpu_addr; + struct amdgpu_ip_block *ip_block; - if (!adev->ip_blocks[AMD_IP_BLOCK_TYPE_PSP].status.late_initialized) { + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_PSP); + if (!ip_block || !ip_block->status.late_initialized) { dev_err(adev->dev, "PSP block is not ready yet."); return -EBUSY; } @@ -3908,7 +3963,8 @@ static ssize_t psp_usbc_pd_fw_sysfs_write(struct device *dev, if (!drm_dev_enter(ddev, &idx)) return -ENODEV; - ret = amdgpu_ucode_request(adev, &usbc_pd_fw, "amdgpu/%s", buf); + ret = amdgpu_ucode_request(adev, &usbc_pd_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s", buf); if (ret) goto fail; @@ -3969,7 +4025,7 @@ int is_psp_fw_valid(struct psp_bin_desc bin) } static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -4005,7 +4061,7 @@ static ssize_t amdgpu_psp_vbflash_write(struct file *filp, struct kobject *kobj, } static ssize_t amdgpu_psp_vbflash_read(struct file *filp, struct kobject *kobj, - struct bin_attribute *bin_attr, char *buffer, + const struct bin_attribute *bin_attr, char *buffer, loff_t pos, size_t count) { struct device *dev = kobj_to_dev(kobj); @@ -4057,11 +4113,11 @@ rel_buf: * Writing to this file will stage an IFWI for update. Reading from this file * will trigger the update process. */ -static struct bin_attribute psp_vbflash_bin_attr = { +static const struct bin_attribute psp_vbflash_bin_attr = { .attr = {.name = "psp_vbflash", .mode = 0660}, .size = 0, - .write = amdgpu_psp_vbflash_write, - .read = amdgpu_psp_vbflash_read, + .write_new = amdgpu_psp_vbflash_write, + .read_new = amdgpu_psp_vbflash_read, }; /** @@ -4088,7 +4144,7 @@ static ssize_t amdgpu_psp_vbflash_status(struct device *dev, } static DEVICE_ATTR(psp_vbflash_status, 0440, amdgpu_psp_vbflash_status, NULL); -static struct bin_attribute *bin_flash_attrs[] = { +static const struct bin_attribute *const bin_flash_attrs[] = { &psp_vbflash_bin_attr, NULL }; @@ -4124,7 +4180,7 @@ static umode_t amdgpu_bin_flash_attr_is_visible(struct kobject *kobj, const struct attribute_group amdgpu_flash_attr_group = { .attrs = flash_attrs, - .bin_attrs = bin_flash_attrs, + .bin_attrs_new = bin_flash_attrs, .is_bin_visible = amdgpu_bin_flash_attr_is_visible, .is_visible = amdgpu_flash_attr_is_visible, }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 567cb1f924ca..8d5acc415d38 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -80,6 +80,7 @@ enum psp_bootloader_cmd { PSP_BL__DRAM_LONG_TRAIN = 0x100000, PSP_BL__DRAM_SHORT_TRAIN = 0x200000, PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000, + PSP_BL__LOAD_SPDMDRV = 0x20000000, }; enum psp_ring_type { @@ -120,6 +121,7 @@ struct psp_funcs { int (*bootloader_load_dbg_drv)(struct psp_context *psp); int (*bootloader_load_ras_drv)(struct psp_context *psp); int (*bootloader_load_ipkeymgr_drv)(struct psp_context *psp); + int (*bootloader_load_spdm_drv)(struct psp_context *psp); int (*bootloader_load_sos)(struct psp_context *psp); int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); @@ -343,6 +345,7 @@ struct psp_context { struct psp_bin_desc dbg_drv; struct psp_bin_desc ras_drv; struct psp_bin_desc ipkeymgr_drv; + struct psp_bin_desc spdm_drv; /* tmr buffer */ struct amdgpu_bo *tmr_bo; @@ -434,6 +437,9 @@ struct amdgpu_psp_funcs { #define psp_bootloader_load_ipkeymgr_drv(psp) \ ((psp)->funcs->bootloader_load_ipkeymgr_drv ? \ (psp)->funcs->bootloader_load_ipkeymgr_drv((psp)) : 0) +#define psp_bootloader_load_spdm_drv(psp) \ + ((psp)->funcs->bootloader_load_spdm_drv ? \ + (psp)->funcs->bootloader_load_spdm_drv((psp)) : 0) #define psp_bootloader_load_sos(psp) \ ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 4c9fa24dd972..68685aca2835 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -36,6 +36,7 @@ #include "amdgpu_xgmi.h" #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include "nbio_v4_3.h" +#include "nbif_v6_3_1.h" #include "nbio_v7_9.h" #include "atom.h" #include "amdgpu_reset.h" @@ -192,7 +193,7 @@ static int amdgpu_reserve_page_direct(struct amdgpu_device *adev, uint64_t addre if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, - err_data.err_addr_cnt); + err_data.err_addr_cnt, false); amdgpu_ras_save_bad_pages(adev, NULL); } @@ -1732,7 +1733,7 @@ static char *amdgpu_ras_badpage_flags_str(unsigned int flags) */ static ssize_t amdgpu_ras_sysfs_badpages_read(struct file *f, - struct kobject *kobj, struct bin_attribute *attr, + struct kobject *kobj, const struct bin_attribute *attr, char *buf, loff_t ppos, size_t count) { struct amdgpu_ras *con = @@ -1863,6 +1864,9 @@ int amdgpu_ras_sysfs_create(struct amdgpu_device *adev, if (!obj || obj->attr_inuse) return -EINVAL; + if (amdgpu_sriov_vf(adev) && !amdgpu_virt_ras_telemetry_block_en(adev, head->block)) + return 0; + get_obj(obj); snprintf(obj->fs_data.sysfs_name, sizeof(obj->fs_data.sysfs_name), @@ -2015,6 +2019,7 @@ static bool amdgpu_ras_aca_is_supported(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): ret = true; break; @@ -2063,8 +2068,8 @@ void amdgpu_ras_debugfs_create_all(struct amdgpu_device *adev) /* debugfs end */ /* ras fs */ -static BIN_ATTR(gpu_vram_bad_pages, S_IRUGO, - amdgpu_ras_sysfs_badpages_read, NULL, 0); +static const BIN_ATTR(gpu_vram_bad_pages, S_IRUGO, + amdgpu_ras_sysfs_badpages_read, NULL, 0); static DEVICE_ATTR(features, S_IRUGO, amdgpu_ras_sysfs_features_read, NULL); static DEVICE_ATTR(version, 0444, @@ -2086,7 +2091,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) &con->event_state_attr.attr, NULL }; - struct bin_attribute *bin_attrs[] = { + const struct bin_attribute *bin_attrs[] = { NULL, NULL, }; @@ -2112,11 +2117,10 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) if (amdgpu_bad_page_threshold != 0) { /* add bad_page_features entry */ - bin_attr_gpu_vram_bad_pages.private = NULL; con->badpages_attr = bin_attr_gpu_vram_bad_pages; + sysfs_bin_attr_init(&con->badpages_attr); bin_attrs[0] = &con->badpages_attr; - group.bin_attrs = bin_attrs; - sysfs_bin_attr_init(bin_attrs[0]); + group.bin_attrs_new = bin_attrs; } r = sysfs_create_group(&adev->dev->kobj, &group); @@ -2156,6 +2160,16 @@ void amdgpu_ras_interrupt_fatal_error_handler(struct amdgpu_device *adev) /* Fatal error events are handled on host side */ if (amdgpu_sriov_vf(adev)) return; + /** + * If the current interrupt is caused by a non-fatal RAS error, skip + * check for fatal error. For fatal errors, FED status of all devices + * in XGMI hive gets set when the first device gets fatal error + * interrupt. The error gets propagated to other devices as well, so + * make sure to ack the interrupt regardless of FED status. + */ + if (!amdgpu_ras_get_fed_status(adev) && + amdgpu_ras_is_err_state(adev, AMDGPU_RAS_BLOCK__ANY)) + return; if (adev->nbio.ras && adev->nbio.ras->handle_ras_controller_intr_no_bifring) @@ -2185,6 +2199,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * if (ret) return; + amdgpu_ras_set_err_poison(adev, block_obj->ras_comm.block); /* both query_poison_status and handle_poison_consumption are optional, * but at least one of them should be implemented if we need poison * consumption handler @@ -2717,41 +2732,224 @@ static int amdgpu_ras_realloc_eh_data_space(struct amdgpu_device *adev, return 0; } -/* it deal with vram only. */ -int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - struct eeprom_table_record *bps, int pages) +static int amdgpu_ras_mca2pa_by_idx(struct amdgpu_device *adev, + struct eeprom_table_record *bps, + struct ras_err_data *err_data) { - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct ras_err_handler_data *data; + struct ta_ras_query_address_input addr_in; + uint32_t socket = 0; int ret = 0; - uint32_t i; - if (!con || !con->eh_data || !bps || pages <= 0) - return 0; + if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) + socket = adev->smuio.funcs->get_socket_id(adev); - mutex_lock(&con->recovery_lock); - data = con->eh_data; - if (!data) - goto out; + /* reinit err_data */ + err_data->err_addr_cnt = 0; + err_data->err_addr_len = adev->umc.retire_unit; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = bps->address; + addr_in.ma.socket_id = socket; + addr_in.ma.ch_inst = bps->mem_channel; + /* tell RAS TA the node instance is not used */ + addr_in.ma.node_inst = TA_RAS_INV_NODE; + + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) + ret = adev->umc.ras->convert_ras_err_addr(adev, err_data, + &addr_in, NULL, false); + + return ret; +} - for (i = 0; i < pages; i++) { +static int amdgpu_ras_mca2pa(struct amdgpu_device *adev, + struct eeprom_table_record *bps, + struct ras_err_data *err_data) +{ + struct ta_ras_query_address_input addr_in; + uint32_t die_id, socket = 0; + + if (adev->smuio.funcs && adev->smuio.funcs->get_socket_id) + socket = adev->smuio.funcs->get_socket_id(adev); + + /* although die id is gotten from PA in nps1 mode, the id is + * fitable for any nps mode + */ + if (adev->umc.ras && adev->umc.ras->get_die_id_from_pa) + die_id = adev->umc.ras->get_die_id_from_pa(adev, bps->address, + bps->retired_page << AMDGPU_GPU_PAGE_SHIFT); + else + return -EINVAL; + + /* reinit err_data */ + err_data->err_addr_cnt = 0; + err_data->err_addr_len = adev->umc.retire_unit; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = bps->address; + addr_in.ma.ch_inst = bps->mem_channel; + addr_in.ma.umc_inst = bps->mcumc_id; + addr_in.ma.node_inst = die_id; + addr_in.ma.socket_id = socket; + + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) + return adev->umc.ras->convert_ras_err_addr(adev, err_data, + &addr_in, NULL, false); + else + return -EINVAL; +} + +static int __amdgpu_ras_restore_bad_pages(struct amdgpu_device *adev, + struct eeprom_table_record *bps, int count) +{ + int j; + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_handler_data *data = con->eh_data; + + for (j = 0; j < count; j++) { if (amdgpu_ras_check_bad_page_unlock(con, - bps[i].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + bps[j].retired_page << AMDGPU_GPU_PAGE_SHIFT)) continue; if (!data->space_left && - amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { - ret = -ENOMEM; - goto out; + amdgpu_ras_realloc_eh_data_space(adev, data, 256)) { + return -ENOMEM; } - amdgpu_ras_reserve_page(adev, bps[i].retired_page); + amdgpu_ras_reserve_page(adev, bps[j].retired_page); - memcpy(&data->bps[data->count], &bps[i], sizeof(*data->bps)); + memcpy(&data->bps[data->count], &(bps[j]), + sizeof(struct eeprom_table_record)); data->count++; data->space_left--; } + + return 0; +} + +static int __amdgpu_ras_convert_rec_array_from_rom(struct amdgpu_device *adev, + struct eeprom_table_record *bps, struct ras_err_data *err_data, + enum amdgpu_memory_partition nps) +{ + int i = 0; + enum amdgpu_memory_partition save_nps; + + save_nps = (bps[0].retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; + + /*old asics just have pa in eeprom*/ + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) { + memcpy(err_data->err_addr, bps, + sizeof(struct eeprom_table_record) * adev->umc.retire_unit); + goto out; + } + + for (i = 0; i < adev->umc.retire_unit; i++) + bps[i].retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT); + + if (save_nps) { + if (save_nps == nps) { + if (amdgpu_umc_pages_in_a_row(adev, err_data, + bps[0].retired_page << AMDGPU_GPU_PAGE_SHIFT)) + return -EINVAL; + } else { + if (amdgpu_ras_mca2pa_by_idx(adev, &bps[0], err_data)) + return -EINVAL; + } + } else { + if (amdgpu_ras_mca2pa(adev, &bps[0], err_data)) { + if (nps == AMDGPU_NPS1_PARTITION_MODE) + memcpy(err_data->err_addr, bps, + sizeof(struct eeprom_table_record) * adev->umc.retire_unit); + else + return -EOPNOTSUPP; + } + } + out: + return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, adev->umc.retire_unit); +} + +static int __amdgpu_ras_convert_rec_from_rom(struct amdgpu_device *adev, + struct eeprom_table_record *bps, struct ras_err_data *err_data, + enum amdgpu_memory_partition nps) +{ + enum amdgpu_memory_partition save_nps; + + save_nps = (bps->retired_page >> UMC_NPS_SHIFT) & UMC_NPS_MASK; + bps->retired_page &= ~(UMC_NPS_MASK << UMC_NPS_SHIFT); + + if (save_nps == nps) { + if (amdgpu_umc_pages_in_a_row(adev, err_data, + bps->retired_page << AMDGPU_GPU_PAGE_SHIFT)) + return -EINVAL; + } else { + if (amdgpu_ras_mca2pa_by_idx(adev, bps, err_data)) + return -EINVAL; + } + return __amdgpu_ras_restore_bad_pages(adev, err_data->err_addr, + adev->umc.retire_unit); +} + +/* it deal with vram only. */ +int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, + struct eeprom_table_record *bps, int pages, bool from_rom) +{ + struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct ras_err_data err_data; + struct amdgpu_ras_eeprom_control *control = + &adev->psp.ras_context.ras->eeprom_control; + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; + int ret = 0; + uint32_t i; + + if (!con || !con->eh_data || !bps || pages <= 0) + return 0; + + if (from_rom) { + err_data.err_addr = + kcalloc(adev->umc.retire_unit, + sizeof(struct eeprom_table_record), GFP_KERNEL); + if (!err_data.err_addr) { + dev_warn(adev->dev, "Failed to alloc UMC error address record in mca2pa conversion!\n"); + return -ENOMEM; + } + + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + } + + mutex_lock(&con->recovery_lock); + + if (from_rom) { + for (i = 0; i < pages; i++) { + if (control->ras_num_recs - i >= adev->umc.retire_unit) { + if ((bps[i].address == bps[i + 1].address) && + (bps[i].mem_channel == bps[i + 1].mem_channel)) { + //deal with retire_unit records a time + ret = __amdgpu_ras_convert_rec_array_from_rom(adev, + &bps[i], &err_data, nps); + if (ret) + goto free; + i += (adev->umc.retire_unit - 1); + } else { + break; + } + } else { + break; + } + } + for (; i < pages; i++) { + ret = __amdgpu_ras_convert_rec_from_rom(adev, + &bps[i], &err_data, nps); + if (ret) + goto free; + } + } else { + ret = __amdgpu_ras_restore_bad_pages(adev, bps, pages); + } + +free: + if (from_rom) + kfree(err_data.err_addr); mutex_unlock(&con->recovery_lock); return ret; @@ -2768,7 +2966,7 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; struct amdgpu_ras_eeprom_control *control; - int save_count; + int save_count, unit_num, bad_page_num, i; if (!con || !con->eh_data) { if (new_cnt) @@ -2780,19 +2978,32 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, mutex_lock(&con->recovery_lock); control = &con->eeprom_control; data = con->eh_data; - save_count = data->count - control->ras_num_recs; + bad_page_num = control->ras_num_bad_pages; + save_count = data->count - bad_page_num; mutex_unlock(&con->recovery_lock); + unit_num = save_count / adev->umc.retire_unit; if (new_cnt) - *new_cnt = save_count / adev->umc.retire_unit; + *new_cnt = unit_num; /* only new entries are saved */ if (save_count > 0) { - if (amdgpu_ras_eeprom_append(control, - &data->bps[control->ras_num_recs], - save_count)) { - dev_err(adev->dev, "Failed to save EEPROM table data!"); - return -EIO; + /*old asics only save pa to eeprom like before*/ + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) { + if (amdgpu_ras_eeprom_append(control, + &data->bps[bad_page_num], save_count)) { + dev_err(adev->dev, "Failed to save EEPROM table data!"); + return -EIO; + } + } else { + for (i = 0; i < unit_num; i++) { + if (amdgpu_ras_eeprom_append(control, + &data->bps[bad_page_num + + i * adev->umc.retire_unit], 1)) { + dev_err(adev->dev, "Failed to save EEPROM table data!"); + return -EIO; + } + } } dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count); @@ -2810,7 +3021,7 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) struct amdgpu_ras_eeprom_control *control = &adev->psp.ras_context.ras->eeprom_control; struct eeprom_table_record *bps; - int ret; + int ret, i = 0; /* no bad page record, skip eeprom access */ if (control->ras_num_recs == 0 || amdgpu_bad_page_threshold == 0) @@ -2821,11 +3032,42 @@ static int amdgpu_ras_load_bad_pages(struct amdgpu_device *adev) return -ENOMEM; ret = amdgpu_ras_eeprom_read(control, bps, control->ras_num_recs); - if (ret) + if (ret) { dev_err(adev->dev, "Failed to load EEPROM table records!"); - else - ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs); + } else { + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { + for (i = 0; i < control->ras_num_recs; i++) { + if ((control->ras_num_recs - i) >= adev->umc.retire_unit) { + if ((bps[i].address == bps[i + 1].address) && + (bps[i].mem_channel == bps[i + 1].mem_channel)) { + control->ras_num_pa_recs += adev->umc.retire_unit; + i += (adev->umc.retire_unit - 1); + } else { + control->ras_num_mca_recs += + (control->ras_num_recs - i); + break; + } + } else { + control->ras_num_mca_recs += (control->ras_num_recs - i); + break; + } + } + } + + ret = amdgpu_ras_eeprom_check(control); + if (ret) + goto out; + + /* HW not usable */ + if (amdgpu_ras_is_rma(adev)) { + ret = -EHWPOISON; + goto out; + } + + ret = amdgpu_ras_add_bad_pages(adev, bps, control->ras_num_recs, true); + } +out: kfree(bps); return ret; } @@ -2870,31 +3112,29 @@ static void amdgpu_ras_validate_threshold(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); /* - * Justification of value bad_page_cnt_threshold in ras structure - * - * Generally, 0 <= amdgpu_bad_page_threshold <= max record length - * in eeprom or amdgpu_bad_page_threshold == -2, introduce two - * scenarios accordingly. - * - * Bad page retirement enablement: - * - If amdgpu_bad_page_threshold = -2, - * bad_page_cnt_threshold = typical value by formula. - * - * - When the value from user is 0 < amdgpu_bad_page_threshold < - * max record length in eeprom, use it directly. - * - * Bad page retirement disablement: - * - If amdgpu_bad_page_threshold = 0, bad page retirement - * functionality is disabled, and bad_page_cnt_threshold will - * take no effect. + * amdgpu_bad_page_threshold is used to config + * the threshold for the number of bad pages. + * -1: Threshold is set to default value + * Driver will issue a warning message when threshold is reached + * and continue runtime services. + * 0: Disable bad page retirement + * Driver will not retire bad pages + * which is intended for debugging purpose. + * -2: Threshold is determined by a formula + * that assumes 1 bad page per 100M of local memory. + * Driver will continue runtime services when threhold is reached. + * 0 < threshold < max number of bad page records in EEPROM, + * A user-defined threshold is set + * Driver will halt runtime services when this custom threshold is reached. */ - - if (amdgpu_bad_page_threshold < 0) { + if (amdgpu_bad_page_threshold == -2) { u64 val = adev->gmc.mc_vram_size; do_div(val, RAS_BAD_PAGE_COVER); con->bad_page_cnt_threshold = min(lower_32_bits(val), max_count); + } else if (amdgpu_bad_page_threshold == -1) { + con->bad_page_cnt_threshold = ((con->reserved_pages_in_bytes) >> 21) << 4; } else { con->bad_page_cnt_threshold = min_t(int, max_count, amdgpu_bad_page_threshold); @@ -3205,33 +3445,40 @@ static int amdgpu_ras_page_retirement_thread(void *param) int amdgpu_ras_init_badpage_info(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + struct amdgpu_ras_eeprom_control *control; int ret; if (!con || amdgpu_sriov_vf(adev)) return 0; - ret = amdgpu_ras_eeprom_init(&con->eeprom_control); - + control = &con->eeprom_control; + ret = amdgpu_ras_eeprom_init(control); if (ret) return ret; - /* HW not usable */ - if (amdgpu_ras_is_rma(adev)) - return -EHWPOISON; + if (!adev->umc.ras || !adev->umc.ras->convert_ras_err_addr) + control->ras_num_pa_recs = control->ras_num_recs; - if (con->eeprom_control.ras_num_recs) { + if (control->ras_num_recs) { ret = amdgpu_ras_load_bad_pages(adev); if (ret) return ret; amdgpu_dpm_send_hbm_bad_pages_num( - adev, con->eeprom_control.ras_num_recs); + adev, control->ras_num_bad_pages); if (con->update_channel_flag == true) { amdgpu_dpm_send_hbm_bad_channel_flag( - adev, con->eeprom_control.bad_channel_bitmap); + adev, control->bad_channel_bitmap); con->update_channel_flag = false; } + + /* The format action is only applied to new ASICs */ + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) >= 12 && + control->tbl_hdr.version < RAS_TABLE_VER_V3) + if (!amdgpu_ras_eeprom_reset_table(control)) + if (amdgpu_ras_save_bad_pages(adev, NULL)) + dev_warn(adev->dev, "Failed to format RAS EEPROM data in V3 version!\n"); } return ret; @@ -3366,6 +3613,7 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): return true; default: @@ -3378,7 +3626,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) case IP_VERSION(13, 0, 0): case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): + case IP_VERSION(14, 0, 3): return true; default: return false; @@ -3541,8 +3791,11 @@ init_ras_enabled_flag: adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : adev->ras_hw_enabled & amdgpu_ras_mask; - /* aca is disabled by default */ - adev->aca.is_enabled = false; + /* aca is disabled by default except for psp v13_0_6/v13_0_12/v13_0_14 */ + adev->aca.is_enabled = + (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)); /* bad page feature is not applicable to specific app platform */ if (adev->gmc.is_app_apu && @@ -3629,8 +3882,11 @@ static void amdgpu_ras_init_reserved_vram_size(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): + con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT; + break; case IP_VERSION(13, 0, 14): - con->reserved_pages_in_bytes = AMDGPU_RAS_RESERVED_VRAM_SIZE; + con->reserved_pages_in_bytes = (AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT << 1); break; default: break; @@ -3704,7 +3960,19 @@ int amdgpu_ras_init(struct amdgpu_device *adev) * check DF RAS */ adev->nbio.ras = &nbio_v4_3_ras; break; + case IP_VERSION(6, 3, 1): + if (adev->ras_hw_enabled & (1 << AMDGPU_RAS_BLOCK__DF)) + /* unlike other generation of nbio ras, + * nbif v6_3_1 only support fatal error interrupt + * to inform software that DF is freezed due to + * system fatal error event. driver should not + * enable nbio ras in such case. Instead, + * check DF RAS + */ + adev->nbio.ras = &nbif_v6_3_1_ras; + break; case IP_VERSION(7, 9, 0): + case IP_VERSION(7, 9, 1): if (!adev->gmc.is_app_apu) adev->nbio.ras = &nbio_v7_9_ras; break; @@ -4083,7 +4351,7 @@ bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev) if (!ras) return false; - return atomic_read(&ras->fed); + return test_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); } void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) @@ -4091,8 +4359,48 @@ void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) struct amdgpu_ras *ras; ras = amdgpu_ras_get_context(adev); + if (ras) { + if (status) + set_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); + else + clear_bit(AMDGPU_RAS_BLOCK__LAST, &ras->ras_err_state); + } +} + +void amdgpu_ras_clear_err_state(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) + ras->ras_err_state = 0; +} + +void amdgpu_ras_set_err_poison(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); if (ras) - atomic_set(&ras->fed, !!status); + set_bit(block, &ras->ras_err_state); +} + +bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (ras) { + if (block == AMDGPU_RAS_BLOCK__ANY) + return (ras->ras_err_state != 0); + else + return test_bit(block, &ras->ras_err_state) || + test_bit(AMDGPU_RAS_BLOCK__LAST, + &ras->ras_err_state); + } + + return false; } static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev) @@ -4856,9 +5164,9 @@ static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev, "socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n", socket_id, aid_id, fw_status); - if (AMDGPU_RAS_GPU_ERR_UNKNOWN(boot_error)) + if (AMDGPU_RAS_GPU_ERR_GENERIC(boot_error)) dev_info(adev->dev, - "socket: %d, aid: %d, fw_status: 0x%x, unknown boot time errors\n", + "socket: %d, aid: %d, fw_status: 0x%x, Boot Controller Generic Error\n", socket_id, aid_id, fw_status); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 6db772ecfee4..764e9fa0a914 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -47,7 +47,7 @@ struct amdgpu_iv_entry; #define AMDGPU_RAS_GPU_ERR_AID_ID(x) AMDGPU_GET_REG_FIELD(x, 12, 11) #define AMDGPU_RAS_GPU_ERR_HBM_ID(x) AMDGPU_GET_REG_FIELD(x, 14, 13) #define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x) AMDGPU_GET_REG_FIELD(x, 29, 29) -#define AMDGPU_RAS_GPU_ERR_UNKNOWN(x) AMDGPU_GET_REG_FIELD(x, 30, 30) +#define AMDGPU_RAS_GPU_ERR_GENERIC(x) AMDGPU_GET_REG_FIELD(x, 30, 30) #define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT 100 #define AMDGPU_RAS_BOOT_STEADY_STATUS 0xBA @@ -65,7 +65,7 @@ struct amdgpu_iv_entry; /* Reserve 8 physical dram row for possible retirement. * In worst cases, it will lose 8 * 2MB memory in vram domain */ -#define AMDGPU_RAS_RESERVED_VRAM_SIZE (16ULL << 20) +#define AMDGPU_RAS_RESERVED_VRAM_SIZE_DEFAULT (16ULL << 20) /* The high three bits indicates socketid */ #define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK) @@ -99,7 +99,8 @@ enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__IH, AMDGPU_RAS_BLOCK__MPIO, - AMDGPU_RAS_BLOCK__LAST + AMDGPU_RAS_BLOCK__LAST, + AMDGPU_RAS_BLOCK__ANY = -1 }; enum amdgpu_ras_mca_block { @@ -482,6 +483,8 @@ struct ras_ecc_err { uint64_t ipid; uint64_t addr; uint64_t pa_pfn; + /* save global channel index across all UMC instances */ + uint32_t channel_idx; struct ras_err_pages err_pages; }; @@ -558,8 +561,8 @@ struct amdgpu_ras { struct ras_ecc_log_info umc_ecc_log; struct delayed_work page_retirement_dwork; - /* Fatal error detected flag */ - atomic_t fed; + /* ras errors detected */ + unsigned long ras_err_state; /* RAS event manager */ struct ras_event_manager __event_mgr; @@ -750,7 +753,7 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev, /* error handling functions */ int amdgpu_ras_add_bad_pages(struct amdgpu_device *adev, - struct eeprom_table_record *bps, int pages); + struct eeprom_table_record *bps, int pages, bool from_rom); int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev, unsigned long *new_cnt); @@ -952,6 +955,10 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); +void amdgpu_ras_set_err_poison(struct amdgpu_device *adev, + enum amdgpu_ras_block block); +void amdgpu_ras_clear_err_state(struct amdgpu_device *adev); +bool amdgpu_ras_is_err_state(struct amdgpu_device *adev, int block); u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index f28f6b4ba765..0ea7cfaf3587 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -161,6 +161,7 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) case IP_VERSION(13, 0, 10): return true; case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): return (adev->gmc.is_app_apu) ? false : true; default: @@ -177,7 +178,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, if (!control) return false; - if (amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) { + if (adev->bios && amdgpu_atomfirmware_ras_rom_addr(adev, &i2c_addr)) { /* The address given by VBIOS is an 8-bit, wire-format * address, i.e. the most significant byte. * @@ -223,6 +224,7 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, return true; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): control->i2c_address = EEPROM_I2C_MADDR_4; return true; @@ -413,9 +415,11 @@ static void amdgpu_ras_set_eeprom_table_version(struct amdgpu_ras_eeprom_control switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { case IP_VERSION(8, 10, 0): - case IP_VERSION(12, 0, 0): hdr->version = RAS_TABLE_VER_V2_1; return; + case IP_VERSION(12, 0, 0): + hdr->version = RAS_TABLE_VER_V3; + return; default: hdr->version = RAS_TABLE_VER_V1; return; @@ -443,7 +447,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) hdr->header = RAS_TABLE_HDR_VAL; amdgpu_ras_set_eeprom_table_version(control); - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { hdr->first_rec_offset = RAS_RECORD_START_V2_1; hdr->tbl_size = RAS_TABLE_HEADER_SIZE + RAS_TABLE_V2_1_INFO_SIZE; @@ -461,7 +465,7 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) } csum = __calc_hdr_byte_sum(control); - if (hdr->version == RAS_TABLE_VER_V2_1) + if (hdr->version >= RAS_TABLE_VER_V2_1) csum += __calc_ras_info_byte_sum(control); csum = -csum; hdr->checksum = csum; @@ -470,9 +474,10 @@ int amdgpu_ras_eeprom_reset_table(struct amdgpu_ras_eeprom_control *control) res = __write_table_ras_info(control); control->ras_num_recs = 0; + control->ras_num_bad_pages = 0; control->ras_fri = 0; - amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_recs); + amdgpu_dpm_send_hbm_bad_pages_num(adev, control->ras_num_bad_pages); control->bad_channel_bitmap = 0; amdgpu_dpm_send_hbm_bad_channel_flag(adev, control->bad_channel_bitmap); @@ -557,16 +562,17 @@ bool amdgpu_ras_eeprom_check_err_threshold(struct amdgpu_device *adev) return false; if (con->eeprom_control.tbl_hdr.header == RAS_TABLE_HDR_BAD) { - if (amdgpu_bad_page_threshold == -1) { + if (con->eeprom_control.ras_num_bad_pages > con->bad_page_cnt_threshold) dev_warn(adev->dev, "RAS records:%d exceed threshold:%d", - con->eeprom_control.ras_num_recs, con->bad_page_cnt_threshold); + con->eeprom_control.ras_num_bad_pages, con->bad_page_cnt_threshold); + if ((amdgpu_bad_page_threshold == -1) || + (amdgpu_bad_page_threshold == -2)) { dev_warn(adev->dev, - "But GPU can be operated due to bad_page_threshold = -1.\n"); + "Please consult AMD Service Action Guide (SAG) for appropriate service procedures.\n"); return false; } else { - dev_warn(adev->dev, "This GPU is in BAD status."); - dev_warn(adev->dev, "Please retire it or set a larger " - "threshold value when reloading driver.\n"); + dev_warn(adev->dev, + "Please consider adjusting the customized threshold.\n"); return true; } } @@ -621,6 +627,7 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, const u32 num) { struct amdgpu_ras *con = amdgpu_ras_get_context(to_amdgpu_device(control)); + struct amdgpu_device *adev = to_amdgpu_device(control); u32 a, b, i; u8 *buf, *pp; int res; @@ -723,6 +730,15 @@ amdgpu_ras_eeprom_append_table(struct amdgpu_ras_eeprom_control *control, control->ras_num_recs = 1 + (control->ras_max_record_count + b - control->ras_fri) % control->ras_max_record_count; + + /*old asics only save pa to eeprom like before*/ + if (IP_VERSION_MAJ(amdgpu_ip_version(adev, UMC_HWIP, 0)) < 12) + control->ras_num_pa_recs += num; + else + control->ras_num_mca_recs += num; + + control->ras_num_bad_pages = control->ras_num_pa_recs + + control->ras_num_mca_recs * adev->umc.retire_unit; Out: kfree(buf); return res; @@ -740,24 +756,25 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) /* Modify the header if it exceeds. */ if (amdgpu_bad_page_threshold != 0 && - control->ras_num_recs >= ras->bad_page_cnt_threshold) { + control->ras_num_bad_pages > ras->bad_page_cnt_threshold) { dev_warn(adev->dev, "Saved bad pages %d reaches threshold value %d\n", - control->ras_num_recs, ras->bad_page_cnt_threshold); + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); control->tbl_hdr.header = RAS_TABLE_HDR_BAD; - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) { + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) { control->tbl_rai.rma_status = GPU_RETIRED__ECC_REACH_THRESHOLD; control->tbl_rai.health_percent = 0; } - if (amdgpu_bad_page_threshold != -1) + if ((amdgpu_bad_page_threshold != -1) && + (amdgpu_bad_page_threshold != -2)) ras->is_rma = true; /* ignore the -ENOTSUPP return value */ amdgpu_dpm_send_rma_reason(adev); } - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) control->tbl_hdr.tbl_size = RAS_TABLE_HEADER_SIZE + RAS_TABLE_V2_1_INFO_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; @@ -797,10 +814,10 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) * now calculate gpu health percent */ if (amdgpu_bad_page_threshold != 0 && - control->tbl_hdr.version == RAS_TABLE_VER_V2_1 && - control->ras_num_recs < ras->bad_page_cnt_threshold) + control->tbl_hdr.version >= RAS_TABLE_VER_V2_1 && + control->ras_num_bad_pages <= ras->bad_page_cnt_threshold) control->tbl_rai.health_percent = ((ras->bad_page_cnt_threshold - - control->ras_num_recs) * 100) / + control->ras_num_bad_pages) * 100) / ras->bad_page_cnt_threshold; /* Recalc the checksum. @@ -810,7 +827,7 @@ amdgpu_ras_eeprom_update_header(struct amdgpu_ras_eeprom_control *control) csum += *pp; csum += __calc_hdr_byte_sum(control); - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) csum += __calc_ras_info_byte_sum(control); /* avoid sign extension when assigning to "checksum" */ csum = -csum; @@ -841,7 +858,8 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, const u32 num) { struct amdgpu_device *adev = to_amdgpu_device(control); - int res; + int res, i; + uint64_t nps = AMDGPU_NPS1_PARTITION_MODE; if (!__is_ras_eeprom_supported(adev)) return 0; @@ -855,6 +873,13 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, return -EINVAL; } + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + + /* set the new channel index flag */ + for (i = 0; i < num; i++) + record[i].retired_page |= (nps << UMC_NPS_SHIFT); + mutex_lock(&control->ras_tbl_mutex); res = amdgpu_ras_eeprom_append_table(control, record, num); @@ -864,6 +889,11 @@ int amdgpu_ras_eeprom_append(struct amdgpu_ras_eeprom_control *control, amdgpu_ras_debugfs_set_ret_size(control); mutex_unlock(&control->ras_tbl_mutex); + + /* clear channel index flag, the flag is only saved on eeprom */ + for (i = 0; i < num; i++) + record[i].retired_page &= ~(nps << UMC_NPS_SHIFT); + return res; } @@ -1014,7 +1044,7 @@ uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *co /* get available eeprom table version first before eeprom table init */ amdgpu_ras_set_eeprom_table_version(control); - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) return RAS_MAX_RECORD_COUNT_V2_1; else return RAS_MAX_RECORD_COUNT; @@ -1259,7 +1289,7 @@ static int __verify_ras_table_checksum(struct amdgpu_ras_eeprom_control *control int buf_size, res; u8 csum, *buf, *pp; - if (control->tbl_hdr.version == RAS_TABLE_VER_V2_1) + if (control->tbl_hdr.version >= RAS_TABLE_VER_V2_1) buf_size = RAS_TABLE_HEADER_SIZE + RAS_TABLE_V2_1_INFO_SIZE + control->ras_num_recs * RAS_TABLE_RECORD_SIZE; @@ -1362,7 +1392,7 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) __decode_table_header_from_buf(hdr, buf); - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { control->ras_num_recs = RAS_NUM_RECS_V2_1(hdr); control->ras_record_offset = RAS_RECORD_START_V2_1; control->ras_max_record_count = RAS_MAX_RECORD_COUNT_V2_1; @@ -1373,11 +1403,36 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) } control->ras_fri = RAS_OFFSET_TO_INDEX(control, hdr->first_rec_offset); + control->ras_num_mca_recs = 0; + control->ras_num_pa_recs = 0; + return 0; +} + +int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control) +{ + struct amdgpu_device *adev = to_amdgpu_device(control); + struct amdgpu_ras_eeprom_table_header *hdr = &control->tbl_hdr; + struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + int res; + + if (!__is_ras_eeprom_supported(adev)) + return 0; + + /* Verify i2c adapter is initialized */ + if (!adev->pm.ras_eeprom_i2c_bus || !adev->pm.ras_eeprom_i2c_bus->algo) + return -ENOENT; + + if (!__get_eeprom_i2c_addr(adev, control)) + return -EINVAL; + + control->ras_num_bad_pages = control->ras_num_pa_recs + + control->ras_num_mca_recs * adev->umc.retire_unit; + if (hdr->header == RAS_TABLE_HDR_VAL) { DRM_DEBUG_DRIVER("Found existing EEPROM table with %d records", - control->ras_num_recs); + control->ras_num_bad_pages); - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { res = __read_table_ras_info(control); if (res) return res; @@ -1385,28 +1440,32 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) res = __verify_ras_table_checksum(control); if (res) - DRM_ERROR("RAS table incorrect checksum or error:%d\n", - res); + dev_err(adev->dev, + "RAS table incorrect checksum or error:%d\n", + res); /* Warn if we are at 90% of the threshold or above */ - if (10 * control->ras_num_recs >= 9 * ras->bad_page_cnt_threshold) + if (10 * control->ras_num_bad_pages >= 9 * ras->bad_page_cnt_threshold) dev_warn(adev->dev, "RAS records:%u exceeds 90%% of threshold:%d", - control->ras_num_recs, + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); } else if (hdr->header == RAS_TABLE_HDR_BAD && amdgpu_bad_page_threshold != 0) { - if (hdr->version == RAS_TABLE_VER_V2_1) { + if (hdr->version >= RAS_TABLE_VER_V2_1) { res = __read_table_ras_info(control); if (res) return res; } res = __verify_ras_table_checksum(control); - if (res) - DRM_ERROR("RAS Table incorrect checksum or error:%d\n", - res); - if (ras->bad_page_cnt_threshold > control->ras_num_recs) { + if (res) { + dev_err(adev->dev, + "RAS Table incorrect checksum or error:%d\n", + res); + return -EINVAL; + } + if (ras->bad_page_cnt_threshold >= control->ras_num_bad_pages) { /* This means that, the threshold was increased since * the last time the system was booted, and now, * ras->bad_page_cnt_threshold - control->num_recs > 0, @@ -1416,22 +1475,23 @@ int amdgpu_ras_eeprom_init(struct amdgpu_ras_eeprom_control *control) dev_info(adev->dev, "records:%d threshold:%d, resetting " "RAS table header signature", - control->ras_num_recs, + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); res = amdgpu_ras_eeprom_correct_header_tag(control, RAS_TABLE_HDR_VAL); } else { - dev_err(adev->dev, "RAS records:%d exceed threshold:%d", - control->ras_num_recs, ras->bad_page_cnt_threshold); - if (amdgpu_bad_page_threshold == -1) { - dev_warn(adev->dev, "GPU will be initialized due to bad_page_threshold = -1."); + dev_warn(adev->dev, + "RAS records:%d exceed threshold:%d\n", + control->ras_num_bad_pages, ras->bad_page_cnt_threshold); + if ((amdgpu_bad_page_threshold == -1) || + (amdgpu_bad_page_threshold == -2)) { res = 0; + dev_warn(adev->dev, + "Please consult AMD Service Action Guide (SAG) for appropriate service procedures\n"); } else { ras->is_rma = true; - dev_err(adev->dev, - "RAS records:%d exceed threshold:%d, " - "GPU will not be initialized. Replace this GPU or increase the threshold", - control->ras_num_recs, ras->bad_page_cnt_threshold); + dev_warn(adev->dev, + "User defined threshold is set, runtime service will be halt when threshold is reached\n"); } } } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h index b9ebda577797..ec6d7ea37ad0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.h @@ -28,6 +28,7 @@ #define RAS_TABLE_VER_V1 0x00010000 #define RAS_TABLE_VER_V2_1 0x00021000 +#define RAS_TABLE_VER_V3 0x00030000 struct amdgpu_device; @@ -82,6 +83,17 @@ struct amdgpu_ras_eeprom_control { */ u32 ras_num_recs; + /* the bad page number is ras_num_recs or + * ras_num_recs * umc.retire_unit + */ + u32 ras_num_bad_pages; + + /* Number of records store mca address */ + u32 ras_num_mca_recs; + + /* Number of records store physical address */ + u32 ras_num_pa_recs; + /* First record index to read, 0-based. * Range is [0, num_recs-1]. This is * an absolute index, starting right after @@ -145,6 +157,8 @@ uint32_t amdgpu_ras_eeprom_max_record_count(struct amdgpu_ras_eeprom_control *co void amdgpu_ras_debugfs_set_ret_size(struct amdgpu_ras_eeprom_control *control); +int amdgpu_ras_eeprom_check(struct amdgpu_ras_eeprom_control *control); + extern const struct file_operations amdgpu_ras_debugfs_eeprom_size_ops; extern const struct file_operations amdgpu_ras_debugfs_eeprom_table_ops; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c index a0acb65f4b40..dabfbdf6f1ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c @@ -183,6 +183,7 @@ int amdgpu_reset_init(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): ret = aldebaran_reset_init(adev); break; @@ -206,6 +207,7 @@ int amdgpu_reset_fini(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(13, 0, 2): case IP_VERSION(13, 0, 6): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): ret = aldebaran_reset_fini(adev); break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index a6e28fe3f8d6..d55c8b7fdb59 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -324,24 +324,33 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, /* always set cond_exec_polling to CONTINUE */ *ring->cond_exe_cpu_addr = 1; - r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type); - if (r) { - dev_err(adev->dev, "failed initializing fences (%d).\n", r); - return r; - } + if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) { + r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type); + if (r) { + dev_err(adev->dev, "failed initializing fences (%d).\n", r); + return r; + } - max_ibs_dw = ring->funcs->emit_frame_size + - amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size; - max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask; + max_ibs_dw = ring->funcs->emit_frame_size + + amdgpu_ring_max_ibs(ring->funcs->type) * ring->funcs->emit_ib_size; + max_ibs_dw = (max_ibs_dw + ring->funcs->align_mask) & ~ring->funcs->align_mask; - if (WARN_ON(max_ibs_dw > max_dw)) - max_dw = max_ibs_dw; + if (WARN_ON(max_ibs_dw > max_dw)) + max_dw = max_ibs_dw; - ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission); + ring->ring_size = roundup_pow_of_two(max_dw * 4 * sched_hw_submission); + } else { + ring->ring_size = roundup_pow_of_two(max_dw * 4); + ring->count_dw = (ring->ring_size - 4) >> 2; + /* ring buffer is empty now */ + ring->wptr = *ring->rptr_cpu_addr = 0; + } ring->buf_mask = (ring->ring_size / 4) - 1; ring->ptr_mask = ring->funcs->support_64bit_ptrs ? 0xffffffffffffffff : ring->buf_mask; + /* Initialize cached_rptr to 0 */ + ring->cached_rptr = 0; /* Allocate ring buffer */ if (ring->is_mes_queue) { @@ -493,6 +502,7 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, { struct amdgpu_ring *ring = file_inode(f)->i_private; uint32_t value, result, early[3]; + uint64_t p; loff_t i; int r; @@ -502,13 +512,18 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, result = 0; if (*pos < 12) { + if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) + mutex_lock(&ring->adev->cper.ring_lock); + early[0] = amdgpu_ring_get_rptr(ring) & ring->buf_mask; early[1] = amdgpu_ring_get_wptr(ring) & ring->buf_mask; early[2] = ring->wptr & ring->buf_mask; for (i = *pos / 4; i < 3 && size; i++) { r = put_user(early[i], (uint32_t *)buf); - if (r) - return r; + if (r) { + result = r; + goto out; + } buf += 4; result += 4; size -= 4; @@ -516,29 +531,79 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf, } } - while (size) { - if (*pos >= (ring->ring_size + 12)) - return result; + if (ring->funcs->type != AMDGPU_RING_TYPE_CPER) { + while (size) { + if (*pos >= (ring->ring_size + 12)) + return result; - value = ring->ring[(*pos - 12)/4]; - r = put_user(value, (uint32_t *)buf); - if (r) - return r; - buf += 4; - result += 4; - size -= 4; - *pos += 4; + value = ring->ring[(*pos - 12)/4]; + r = put_user(value, (uint32_t *)buf); + if (r) + return r; + buf += 4; + result += 4; + size -= 4; + *pos += 4; + } + } else { + p = early[0]; + if (early[0] <= early[1]) + size = (early[1] - early[0]); + else + size = ring->ring_size - (early[0] - early[1]); + + while (size) { + if (p == early[1]) + goto out; + + value = ring->ring[p]; + r = put_user(value, (uint32_t *)buf); + if (r) { + result = r; + goto out; + } + + buf += 4; + result += 4; + size--; + p++; + p &= ring->ptr_mask; + } } +out: + if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) + mutex_unlock(&ring->adev->cper.ring_lock); + return result; } +static ssize_t amdgpu_debugfs_virt_ring_read(struct file *f, char __user *buf, + size_t size, loff_t *pos) +{ + struct amdgpu_ring *ring = file_inode(f)->i_private; + + if (*pos & 3 || size & 3) + return -EINVAL; + + if (ring->funcs->type == AMDGPU_RING_TYPE_CPER) + amdgpu_virt_req_ras_cper_dump(ring->adev, false); + + return amdgpu_debugfs_ring_read(f, buf, size, pos); +} + static const struct file_operations amdgpu_debugfs_ring_fops = { .owner = THIS_MODULE, .read = amdgpu_debugfs_ring_read, .llseek = default_llseek }; +static const struct file_operations amdgpu_debugfs_virt_ring_fops = { + .owner = THIS_MODULE, + .read = amdgpu_debugfs_virt_ring_read, + .llseek = default_llseek +}; + static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf, size_t size, loff_t *pos) { @@ -626,9 +691,14 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device *adev, char name[32]; sprintf(name, "amdgpu_ring_%s", ring->name); - debugfs_create_file_size(name, S_IFREG | 0444, root, ring, - &amdgpu_debugfs_ring_fops, - ring->ring_size + 12); + if (amdgpu_sriov_vf(adev)) + debugfs_create_file_size(name, S_IFREG | 0444, root, ring, + &amdgpu_debugfs_virt_ring_fops, + ring->ring_size + 12); + else + debugfs_create_file_size(name, S_IFREG | 0444, root, ring, + &amdgpu_debugfs_ring_fops, + ring->ring_size + 12); if (ring->mqd_obj) { sprintf(name, "amdgpu_mqd_%s", ring->name); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 36fc9578c53c..bb2b66385223 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -37,7 +37,7 @@ struct amdgpu_job; struct amdgpu_vm; /* max number of rings */ -#define AMDGPU_MAX_RINGS 124 +#define AMDGPU_MAX_RINGS 149 #define AMDGPU_MAX_HWIP_RINGS 64 #define AMDGPU_MAX_GFX_RINGS 2 #define AMDGPU_MAX_SW_GFX_RINGS 2 @@ -82,6 +82,7 @@ enum amdgpu_ring_type { AMDGPU_RING_TYPE_KIQ, AMDGPU_RING_TYPE_MES, AMDGPU_RING_TYPE_UMSCH_MM, + AMDGPU_RING_TYPE_CPER, }; enum amdgpu_ib_pool_type { @@ -237,6 +238,7 @@ struct amdgpu_ring_funcs { void (*patch_de)(struct amdgpu_ring *ring, unsigned offset); int (*reset)(struct amdgpu_ring *ring, unsigned int vmid); void (*emit_cleaner_shader)(struct amdgpu_ring *ring); + bool (*is_guilty)(struct amdgpu_ring *ring); }; struct amdgpu_ring { @@ -306,6 +308,8 @@ struct amdgpu_ring { bool is_sw_ring; unsigned int entry_index; + /* store the cached rptr to restore after reset */ + uint64_t cached_rptr; }; @@ -462,8 +466,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, enum amdgpu_ib_pool_type pool, struct amdgpu_ib *ib); -void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, - struct dma_fence *f); +void amdgpu_ib_free(struct amdgpu_ib *ib, struct dma_fence *f); int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, struct amdgpu_ib *ibs, struct amdgpu_job *job, struct dma_fence **f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index 10df731998b2..39070b2a4c04 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -93,8 +93,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, return 0; } -void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct drm_suballoc **sa_bo, - struct dma_fence *fence) +void amdgpu_sa_bo_free(struct drm_suballoc **sa_bo, struct dma_fence *fence) { if (sa_bo == NULL || *sa_bo == NULL) { return; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c index 113f0d242618..529c9696c2f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "amdgpu_sdma.h" #include "amdgpu_ras.h" +#include "amdgpu_reset.h" #define AMDGPU_CSA_SDMA_SIZE 64 /* SDMA CSA reside in the 3rd page of CSA */ @@ -219,9 +220,11 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, amdgpu_ucode_ip_version_decode(adev, SDMA0_HWIP, ucode_prefix, sizeof(ucode_prefix)); if (instance == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s.bin", ucode_prefix); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[instance].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s%d.bin", ucode_prefix, instance); if (err) goto out; @@ -261,6 +264,8 @@ int amdgpu_sdma_init_microcode(struct amdgpu_device *adev, if ((amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || amdgpu_ip_version(adev, SDMA0_HWIP, 0) == + IP_VERSION(4, 4, 4) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) && adev->firmware.load_type == AMDGPU_FW_LOAD_PSP && @@ -351,23 +356,44 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev) static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val) { struct amdgpu_device *adev = (struct amdgpu_device *)data; - u32 i; + u64 i, num_ring; u64 mask = 0; - struct amdgpu_ring *ring; + struct amdgpu_ring *ring, *page = NULL; if (!adev) return -ENODEV; - mask = (1 << adev->sdma.num_instances) - 1; + /* Determine the number of rings per SDMA instance + * (1 for sdma gfx ring, 2 if page queue exists) + */ + if (adev->sdma.has_page_queue) + num_ring = 2; + else + num_ring = 1; + + /* Calculate the maximum possible mask value + * based on the number of SDMA instances and rings + */ + mask = BIT_ULL(adev->sdma.num_instances * num_ring) - 1; + if ((val & mask) == 0) return -EINVAL; for (i = 0; i < adev->sdma.num_instances; ++i) { ring = &adev->sdma.instance[i].ring; - if (val & (1 << i)) + if (adev->sdma.has_page_queue) + page = &adev->sdma.instance[i].page; + if (val & BIT_ULL(i * num_ring)) ring->sched.ready = true; else ring->sched.ready = false; + + if (page) { + if (val & BIT_ULL(i * num_ring + 1)) + page->sched.ready = true; + else + page->sched.ready = false; + } } /* publish sched.ready flag update effective immediately across smp */ smp_rmb(); @@ -377,16 +403,37 @@ static int amdgpu_debugfs_sdma_sched_mask_set(void *data, u64 val) static int amdgpu_debugfs_sdma_sched_mask_get(void *data, u64 *val) { struct amdgpu_device *adev = (struct amdgpu_device *)data; - u32 i; + u64 i, num_ring; u64 mask = 0; - struct amdgpu_ring *ring; + struct amdgpu_ring *ring, *page = NULL; if (!adev) return -ENODEV; + + /* Determine the number of rings per SDMA instance + * (1 for sdma gfx ring, 2 if page queue exists) + */ + if (adev->sdma.has_page_queue) + num_ring = 2; + else + num_ring = 1; + for (i = 0; i < adev->sdma.num_instances; ++i) { ring = &adev->sdma.instance[i].ring; + if (adev->sdma.has_page_queue) + page = &adev->sdma.instance[i].page; + if (ring->sched.ready) - mask |= 1 << i; + mask |= BIT_ULL(i * num_ring); + else + mask &= ~BIT_ULL(i * num_ring); + + if (page) { + if (page->sched.ready) + mask |= BIT_ULL(i * num_ring + 1); + else + mask &= ~BIT_ULL(i * num_ring + 1); + } } *val = mask; @@ -456,3 +503,147 @@ void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_sdma_reset_mask); } } + +struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + if (adev->sdma.has_page_queue && + (ring->me < adev->sdma.num_instances) && + (ring == &adev->sdma.instance[ring->me].ring)) + return &adev->sdma.instance[ring->me].page; + else + return NULL; +} + +/** +* amdgpu_sdma_is_shared_inv_eng - Check if a ring is an SDMA ring that shares a VM invalidation engine +* @adev: Pointer to the AMDGPU device structure +* @ring: Pointer to the ring structure to check +* +* This function checks if the given ring is an SDMA ring that shares a VM invalidation engine. +* It returns true if the ring is such an SDMA ring, false otherwise. +*/ +bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring) +{ + int i = ring->me; + + if (!adev->sdma.has_page_queue || i >= adev->sdma.num_instances) + return false; + + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0)) + return (ring == &adev->sdma.instance[i].page); + else + return false; +} + +/** + * amdgpu_sdma_register_on_reset_callbacks - Register SDMA reset callbacks + * @funcs: Pointer to the callback structure containing pre_reset and post_reset functions + * + * This function allows KFD and AMDGPU to register their own callbacks for handling + * pre-reset and post-reset operations for engine reset. These are needed because engine + * reset will stop all queues on that engine. + */ +void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs) +{ + if (!funcs) + return; + + /* Ensure the reset_callback_list is initialized */ + if (!adev->sdma.reset_callback_list.next) { + INIT_LIST_HEAD(&adev->sdma.reset_callback_list); + } + /* Initialize the list node in the callback structure */ + INIT_LIST_HEAD(&funcs->list); + + /* Add the callback structure to the global list */ + list_add_tail(&funcs->list, &adev->sdma.reset_callback_list); +} + +/** + * amdgpu_sdma_reset_engine - Reset a specific SDMA engine + * @adev: Pointer to the AMDGPU device + * @instance_id: ID of the SDMA engine instance to reset + * + * This function performs the following steps: + * 1. Calls all registered pre_reset callbacks to allow KFD and AMDGPU to save their state. + * 2. Resets the specified SDMA engine instance. + * 3. Calls all registered post_reset callbacks to allow KFD and AMDGPU to restore their state. + * + * Returns: 0 on success, or a negative error code on failure. + */ +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id) +{ + struct sdma_on_reset_funcs *funcs; + int ret = 0; + struct amdgpu_sdma_instance *sdma_instance = &adev->sdma.instance[instance_id]; + struct amdgpu_ring *gfx_ring = &sdma_instance->ring; + struct amdgpu_ring *page_ring = &sdma_instance->page; + bool gfx_sched_stopped = false, page_sched_stopped = false; + + mutex_lock(&sdma_instance->engine_reset_mutex); + /* Stop the scheduler's work queue for the GFX and page rings if they are running. + * This ensures that no new tasks are submitted to the queues while + * the reset is in progress. + */ + if (!amdgpu_ring_sched_ready(gfx_ring)) { + drm_sched_wqueue_stop(&gfx_ring->sched); + gfx_sched_stopped = true; + } + + if (adev->sdma.has_page_queue && !amdgpu_ring_sched_ready(page_ring)) { + drm_sched_wqueue_stop(&page_ring->sched); + page_sched_stopped = true; + } + + /* Invoke all registered pre_reset callbacks */ + list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { + if (funcs->pre_reset) { + ret = funcs->pre_reset(adev, instance_id); + if (ret) { + dev_err(adev->dev, + "beforeReset callback failed for instance %u: %d\n", + instance_id, ret); + goto exit; + } + } + } + + /* Perform the SDMA reset for the specified instance */ + ret = amdgpu_dpm_reset_sdma(adev, 1 << instance_id); + if (ret) { + dev_err(adev->dev, "Failed to reset SDMA instance %u\n", instance_id); + goto exit; + } + + /* Invoke all registered post_reset callbacks */ + list_for_each_entry(funcs, &adev->sdma.reset_callback_list, list) { + if (funcs->post_reset) { + ret = funcs->post_reset(adev, instance_id); + if (ret) { + dev_err(adev->dev, + "afterReset callback failed for instance %u: %d\n", + instance_id, ret); + goto exit; + } + } + } + +exit: + /* Restart the scheduler's work queue for the GFX and page rings + * if they were stopped by this function. This allows new tasks + * to be submitted to the queues after the reset is complete. + */ + if (!ret) { + if (gfx_sched_stopped && amdgpu_ring_sched_ready(gfx_ring)) { + drm_sched_wqueue_start(&gfx_ring->sched); + } + if (page_sched_stopped && amdgpu_ring_sched_ready(page_ring)) { + drm_sched_wqueue_start(&page_ring->sched); + } + } + mutex_unlock(&sdma_instance->engine_reset_mutex); + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h index 2db58b5812a8..47d56fd0589f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -64,6 +64,11 @@ struct amdgpu_sdma_instance { struct amdgpu_bo *sdma_fw_obj; uint64_t sdma_fw_gpu_addr; uint32_t *sdma_fw_ptr; + struct mutex engine_reset_mutex; + /* track guilty state of GFX and PAGE queues */ + bool gfx_guilty; + bool page_guilty; + }; enum amdgpu_sdma_ras_memory_id { @@ -98,6 +103,13 @@ struct amdgpu_sdma_ras { struct amdgpu_ras_block_object ras_block; }; +struct sdma_on_reset_funcs { + int (*pre_reset)(struct amdgpu_device *adev, uint32_t instance_id); + int (*post_reset)(struct amdgpu_device *adev, uint32_t instance_id); + /* Linked list node to store this structure in a list; */ + struct list_head list; +}; + struct amdgpu_sdma { struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; struct amdgpu_irq_src trap_irq; @@ -107,6 +119,7 @@ struct amdgpu_sdma { struct amdgpu_irq_src doorbell_invalid_irq; struct amdgpu_irq_src pool_timeout_irq; struct amdgpu_irq_src srbm_write_irq; + struct amdgpu_irq_src ctxt_empty_irq; int num_instances; uint32_t sdma_mask; @@ -117,6 +130,7 @@ struct amdgpu_sdma { struct amdgpu_sdma_ras *ras; uint32_t *ip_dump; uint32_t supported_reset; + struct list_head reset_callback_list; }; /* @@ -156,6 +170,9 @@ struct amdgpu_buffer_funcs { uint32_t byte_count); }; +void amdgpu_sdma_register_on_reset_callbacks(struct amdgpu_device *adev, struct sdma_on_reset_funcs *funcs); +int amdgpu_sdma_reset_engine(struct amdgpu_device *adev, uint32_t instance_id); + #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b), (t)) #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) @@ -179,4 +196,7 @@ int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev); void amdgpu_debugfs_sdma_sched_mask_init(struct amdgpu_device *adev); int amdgpu_sdma_sysfs_reset_mask_init(struct amdgpu_device *adev); void amdgpu_sdma_sysfs_reset_mask_fini(struct amdgpu_device *adev); +bool amdgpu_sdma_is_shared_inv_eng(struct amdgpu_device *adev, struct amdgpu_ring *ring); +struct amdgpu_ring *amdgpu_sdma_get_shared_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586ab4c911b..5576ed0b508f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -135,11 +135,16 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) struct amdgpu_sync_entry *e; hash_for_each_possible(sync->fences, e, node, f->context) { - if (unlikely(e->fence->context != f->context)) - continue; + if (dma_fence_is_signaled(e->fence)) { + dma_fence_put(e->fence); + e->fence = dma_fence_get(f); + return true; + } - amdgpu_sync_keep_later(&e->fence, f); - return true; + if (likely(e->fence->context == f->context)) { + amdgpu_sync_keep_later(&e->fence, f); + return true; + } } return false; } @@ -149,10 +154,12 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) * * @sync: sync object to add fence to * @f: fence to sync to + * @flags: memory allocation flags to use when allocating sync entry * * Add the fence to the sync object. */ -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, + gfp_t flags) { struct amdgpu_sync_entry *e; @@ -162,7 +169,7 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) if (amdgpu_sync_add_later(sync, f)) return 0; - e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); + e = kmem_cache_alloc(amdgpu_sync_slab, flags); if (!e) return -ENOMEM; @@ -249,7 +256,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_fence *tmp = dma_fence_chain_contained(f); if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) { - r = amdgpu_sync_fence(sync, f); + r = amdgpu_sync_fence(sync, f, GFP_KERNEL); dma_fence_put(f); if (r) return r; @@ -281,7 +288,7 @@ int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv) if (fence_owner != AMDGPU_FENCE_OWNER_KFD) continue; - r = amdgpu_sync_fence(sync, f); + r = amdgpu_sync_fence(sync, f, GFP_KERNEL); if (r) break; } @@ -388,7 +395,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) hash_for_each_safe(source->fences, i, tmp, e, node) { f = e->fence; if (!dma_fence_is_signaled(f)) { - r = amdgpu_sync_fence(clone, f); + r = amdgpu_sync_fence(clone, f, GFP_KERNEL); if (r) return r; } else { @@ -400,6 +407,25 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) } /** + * amdgpu_sync_move - move all fences from src to dst + * + * @src: source of the fences, empty after function + * @dst: destination for the fences + * + * Moves all fences from source to destination. All fences in destination are + * freed and source is empty after the function call. + */ +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst) +{ + unsigned int i; + + amdgpu_sync_free(dst); + + for (i = 0; i < HASH_SIZE(src->fences); ++i) + hlist_move_list(&src->fences[i], &dst->fences[i]); +} + +/** * amdgpu_sync_push_to_job - push fences into job * @sync: sync object to get the fences from * @job: job to push the fences into diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index e3272dce798d..51eb4382c91e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -47,7 +47,8 @@ struct amdgpu_sync { }; void amdgpu_sync_create(struct amdgpu_sync *sync); -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, + gfp_t flags); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); @@ -56,6 +57,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); +void amdgpu_sync_move(struct amdgpu_sync *src, struct amdgpu_sync *dst); int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 383fce40d4dd..11dd2e0f7979 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -457,6 +457,38 @@ DEFINE_EVENT(amdgpu_pasid, amdgpu_pasid_freed, TP_ARGS(pasid) ); +TRACE_EVENT(amdgpu_isolation, + TP_PROTO(void *prev, void *next), + TP_ARGS(prev, next), + TP_STRUCT__entry( + __field(void *, prev) + __field(void *, next) + ), + + TP_fast_assign( + __entry->prev = prev; + __entry->next = next; + ), + TP_printk("prev=%p, next=%p", + __entry->prev, + __entry->next) +); + +TRACE_EVENT(amdgpu_cleaner_shader, + TP_PROTO(struct amdgpu_ring *ring, struct dma_fence *fence), + TP_ARGS(ring, fence), + TP_STRUCT__entry( + __string(ring, ring->name) + __field(u64, seqno) + ), + + TP_fast_assign( + __assign_str(ring); + __entry->seqno = fence->seqno; + ), + TP_printk("ring=%s, seqno=%Lu", __get_str(ring), __entry->seqno) +); + TRACE_EVENT(amdgpu_bo_list_set, TP_PROTO(struct amdgpu_bo_list *list, struct amdgpu_bo *bo), TP_ARGS(list, bo), diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index c8180cad0abd..53b71e9d8076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -309,7 +309,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, mutex_lock(&adev->mman.gtt_window_lock); while (src_mm.remaining) { uint64_t from, to, cur_size, tiling_flags; - uint32_t num_type, data_format, max_com; + uint32_t num_type, data_format, max_com, write_compress_disable; struct dma_fence *next; /* Never copy more than 256MiB at once to avoid a timeout */ @@ -340,9 +340,13 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK); num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE); data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT); + write_compress_disable = + AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_WRITE_COMPRESS_DISABLE); copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) | AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) | - AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format)); + AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format) | + AMDGPU_COPY_FLAGS_SET(WRITE_COMPRESS_DISABLE, + write_compress_disable)); } r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, @@ -1762,7 +1766,8 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) if (!adev->bios && (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 5, 0))) reserve_size = max(reserve_size, (uint32_t)280 << 20); else if (!reserve_size) reserve_size = DISCOVERY_TMR_OFFSET; @@ -1959,10 +1964,19 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Compute GTT size, either based on TTM limit * or whatever the user passed on module init. */ - if (amdgpu_gtt_size == -1) - gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT; - else - gtt_size = (uint64_t)amdgpu_gtt_size << 20; + gtt_size = ttm_tt_pages_limit() << PAGE_SHIFT; + if (amdgpu_gtt_size != -1) { + uint64_t configured_size = (uint64_t)amdgpu_gtt_size << 20; + + drm_warn(&adev->ddev, + "Configuring gttsize via module parameter is deprecated, please use ttm.pages_limit\n"); + if (gtt_size != configured_size) + drm_warn(&adev->ddev, + "GTT size has been set as %llu but TTM size has been set as %llu, this is unusual\n", + configured_size, gtt_size); + + gtt_size = configured_size; + } /* Initialize GTT memory pool */ r = amdgpu_gtt_mgr_init(adev, gtt_size); @@ -1973,6 +1987,11 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) DRM_INFO("amdgpu: %uM of GTT memory ready.\n", (unsigned int)(gtt_size / (1024 * 1024))); + if (adev->flags & AMD_IS_APU) { + if (adev->gmc.real_vram_size < gtt_size) + adev->apu_prefer_gtt = true; + } + /* Initialize doorbell pool on PCI BAR */ r = amdgpu_ttm_init_on_chip(adev, AMDGPU_PL_DOORBELL, adev->doorbell.size / PAGE_SIZE); if (r) { @@ -2065,6 +2084,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GDS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_GWS); ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_OA); + ttm_range_man_fini(&adev->mman.bdev, AMDGPU_PL_DOORBELL); ttm_device_fini(&adev->mman.bdev); adev->mman.initialized = false; DRM_INFO("amdgpu: ttm finalized\n"); @@ -2275,7 +2295,7 @@ int amdgpu_ttm_clear_buffer(struct amdgpu_bo *bo, struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; struct amdgpu_res_cursor cursor; u64 addr; - int r; + int r = 0; if (!adev->mman.buffer_funcs_enabled) return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 2852a6064c9a..208b7d1d8a27 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -26,15 +26,15 @@ #include <linux/dma-direction.h> #include <drm/gpu_scheduler.h> +#include <drm/ttm/ttm_placement.h> #include "amdgpu_vram_mgr.h" -#include "amdgpu.h" #define AMDGPU_PL_GDS (TTM_PL_PRIV + 0) #define AMDGPU_PL_GWS (TTM_PL_PRIV + 1) #define AMDGPU_PL_OA (TTM_PL_PRIV + 2) #define AMDGPU_PL_PREEMPT (TTM_PL_PRIV + 3) #define AMDGPU_PL_DOORBELL (TTM_PL_PRIV + 4) -#define __AMDGPU_PL_LAST (TTM_PL_PRIV + 4) +#define __AMDGPU_PL_NUM (TTM_PL_PRIV + 5) #define AMDGPU_GTT_MAX_TRANSFER_SIZE 512 #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 @@ -119,6 +119,8 @@ struct amdgpu_copy_mem { #define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07 #define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8 #define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f +#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_SHIFT 14 +#define AMDGPU_COPY_FLAGS_WRITE_COMPRESS_DISABLE_MASK 0x1 #define AMDGPU_COPY_FLAGS_SET(field, value) \ (((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index 4c7b53648a50..3d9e9fdc10b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -1216,6 +1216,7 @@ static const char *amdgpu_ucode_legacy_naming(struct amdgpu_device *adev, int bl case IP_VERSION(11, 0, 13): return "beige_goby"; case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 2): return "vangogh"; case IP_VERSION(12, 0, 1): return "green_sardine"; @@ -1434,6 +1435,7 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, * * @adev: amdgpu device * @fw: pointer to load firmware to + * @required: whether the firmware is required * @fmt: firmware name format string * @...: variable arguments * @@ -1442,7 +1444,7 @@ void amdgpu_ucode_ip_version_decode(struct amdgpu_device *adev, int block_type, * the error code to -ENODEV, so that early_init functions will fail to load. */ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, - const char *fmt, ...) + enum amdgpu_ucode_required required, const char *fmt, ...) { char fname[AMDGPU_UCODE_NAME_MAX]; va_list ap; @@ -1456,16 +1458,24 @@ int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, return -EOVERFLOW; } - r = request_firmware(fw, fname, adev->dev); + if (required == AMDGPU_UCODE_REQUIRED) + r = request_firmware(fw, fname, adev->dev); + else { + r = firmware_request_nowarn(fw, fname, adev->dev); + if (r) + drm_info(&adev->ddev, "Optional firmware \"%s\" was not found\n", fname); + } if (r) return -ENODEV; r = amdgpu_ucode_validate(*fw); - if (r) { + if (r) + /* + * The amdgpu_ucode_request() should be paired with amdgpu_ucode_release() + * regardless of success/failure, and the amdgpu_ucode_release() takes care of + * firmware release and need to avoid redundant release FW operation here. + */ dev_dbg(adev->dev, "\"%s\" failed to validate\n", fname); - release_firmware(*fw); - *fw = NULL; - } return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 4150ec0aa10d..4eedd92f000b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -126,6 +126,7 @@ enum psp_fw_type { PSP_FW_TYPE_PSP_DBG_DRV, PSP_FW_TYPE_PSP_RAS_DRV, PSP_FW_TYPE_PSP_IPKEYMGR_DRV, + PSP_FW_TYPE_PSP_SPDM_DRV, PSP_FW_TYPE_MAX_INDEX, }; @@ -551,6 +552,11 @@ enum amdgpu_firmware_load_type { AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO, }; +enum amdgpu_ucode_required { + AMDGPU_UCODE_OPTIONAL, + AMDGPU_UCODE_REQUIRED, +}; + /* conform to smu_ucode_xfer_cz.h */ #define AMDGPU_SDMA0_UCODE_LOADED 0x00000001 #define AMDGPU_SDMA1_UCODE_LOADED 0x00000002 @@ -604,9 +610,9 @@ void amdgpu_ucode_print_rlc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_sdma_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_psp_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr); -__printf(3, 4) +__printf(4, 5) int amdgpu_ucode_request(struct amdgpu_device *adev, const struct firmware **fw, - const char *fmt, ...); + enum amdgpu_ucode_required required, const char *fmt, ...); void amdgpu_ucode_release(const struct firmware **fw); bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, uint16_t hdr_major, uint16_t hdr_minor); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index 896f3609b0ee..0a1ef95b2866 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -78,7 +78,7 @@ int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, if (amdgpu_bad_page_threshold != 0) { amdgpu_ras_add_bad_pages(adev, err_data.err_addr, - err_data.err_addr_cnt); + err_data.err_addr_cnt, false); amdgpu_ras_save_bad_pages(adev, NULL); } @@ -166,10 +166,11 @@ void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, if ((amdgpu_bad_page_threshold != 0) && err_data->err_addr_cnt) { amdgpu_ras_add_bad_pages(adev, err_data->err_addr, - err_data->err_addr_cnt); + err_data->err_addr_cnt, false); amdgpu_ras_save_bad_pages(adev, &err_count); - amdgpu_dpm_send_hbm_bad_pages_num(adev, con->eeprom_control.ras_num_recs); + amdgpu_dpm_send_hbm_bad_pages_num(adev, + con->eeprom_control.ras_num_bad_pages); if (con->update_channel_flag == true) { amdgpu_dpm_send_hbm_bad_channel_flag(adev, con->eeprom_control.bad_channel_bitmap); @@ -386,6 +387,45 @@ int amdgpu_umc_fill_error_record(struct ras_err_data *err_data, return 0; } +static int amdgpu_umc_loop_all_aid(struct amdgpu_device *adev, umc_func func, + void *data) +{ + uint32_t umc_node_inst; + uint32_t node_inst; + uint32_t umc_inst; + uint32_t ch_inst; + int ret; + + /* + * This loop is done based on the following - + * umc.active mask = mask of active umc instances across all nodes + * umc.umc_inst_num = maximum number of umc instancess per node + * umc.node_inst_num = maximum number of node instances + * Channel instances are not assumed to be harvested. + */ + dev_dbg(adev->dev, "active umcs :%lx umc_inst per node: %d", + adev->umc.active_mask, adev->umc.umc_inst_num); + for_each_set_bit(umc_node_inst, &(adev->umc.active_mask), + adev->umc.node_inst_num * adev->umc.umc_inst_num) { + node_inst = umc_node_inst / adev->umc.umc_inst_num; + umc_inst = umc_node_inst % adev->umc.umc_inst_num; + LOOP_UMC_CH_INST(ch_inst) { + dev_dbg(adev->dev, + "node_inst :%d umc_inst: %d ch_inst: %d", + node_inst, umc_inst, ch_inst); + ret = func(adev, node_inst, umc_inst, ch_inst, data); + if (ret) { + dev_err(adev->dev, + "Node %d umc %d ch %d func returns %d\n", + node_inst, umc_inst, ch_inst, ret); + return ret; + } + } + } + + return 0; +} + int amdgpu_umc_loop_channels(struct amdgpu_device *adev, umc_func func, void *data) { @@ -394,6 +434,9 @@ int amdgpu_umc_loop_channels(struct amdgpu_device *adev, uint32_t ch_inst = 0; int ret = 0; + if (adev->aid_mask) + return amdgpu_umc_loop_all_aid(adev, func, data); + if (adev->umc.node_inst_num) { LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) { ret = func(adev, node_inst, umc_inst, ch_inst, data); @@ -444,3 +487,77 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, return ret; } + +int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t pa_addr) +{ + struct ta_ras_query_address_output addr_out; + + /* reinit err_data */ + err_data->err_addr_cnt = 0; + err_data->err_addr_len = adev->umc.retire_unit; + + addr_out.pa.pa = pa_addr; + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) + return adev->umc.ras->convert_ras_err_addr(adev, err_data, NULL, + &addr_out, false); + else + return -EINVAL; +} + +int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t pa_addr, uint64_t *pfns, int len) +{ + int i, ret; + struct ras_err_data err_data; + + err_data.err_addr = kcalloc(adev->umc.retire_unit, + sizeof(struct eeprom_table_record), GFP_KERNEL); + if (!err_data.err_addr) { + dev_warn(adev->dev, "Failed to alloc memory in bad page lookup!\n"); + return 0; + } + + ret = amdgpu_umc_pages_in_a_row(adev, &err_data, pa_addr); + if (ret) + goto out; + + for (i = 0; i < adev->umc.retire_unit; i++) { + if (i >= len) + goto out; + + pfns[i] = err_data.err_addr[i].retired_page; + } + ret = i; + +out: + kfree(err_data.err_addr); + return ret; +} + +int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch, uint32_t umc, + uint32_t node, uint32_t socket, + struct ta_ras_query_address_output *addr_out, bool dump_addr) +{ + struct ta_ras_query_address_input addr_in; + int ret; + + memset(&addr_in, 0, sizeof(addr_in)); + addr_in.ma.err_addr = err_addr; + addr_in.ma.ch_inst = ch; + addr_in.ma.umc_inst = umc; + addr_in.ma.node_inst = node; + addr_in.ma.socket_id = socket; + + if (adev->umc.ras && adev->umc.ras->convert_ras_err_addr) { + ret = adev->umc.ras->convert_ras_err_addr(adev, NULL, &addr_in, + addr_out, dump_addr); + if (ret) + return ret; + } else { + return 0; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index ce4179db2a6d..857693bcd8d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -54,6 +54,29 @@ /* Page retirement tag */ #define UMC_ECC_NEW_DETECTED_TAG 0x1 +/* + * a flag to indicate v2 of channel index stored in eeprom + * + * v1 (legacy way): store channel index within a umc instance in eeprom + * range in UMC v12: 0 ~ 7 + * v2: store global channel index in eeprom + * range in UMC v12: 0 ~ 127 + * + * NOTE: it's better to store it in eeprom_table_record.mem_channel, + * but there is only 8 bits in mem_channel, and the channel number may + * increase in the future, we decide to save it in + * eeprom_table_record.retired_page. retired_page is useless in v2, + * we depend on eeprom_table_record.address instead of retired_page in v2. + * Only 48 bits are saved on eeprom, use bit 47 here. + */ +#define UMC_CHANNEL_IDX_V2 BIT_ULL(47) + +/* + * save nps value to eeprom_table_record.retired_page[47:40], + * the channel index flag above will be retired. + */ +#define UMC_NPS_SHIFT 40 +#define UMC_NPS_MASK 0xffULL typedef int (*umc_func)(struct amdgpu_device *adev, uint32_t node_inst, uint32_t umc_inst, uint32_t ch_inst, void *data); @@ -70,6 +93,13 @@ struct amdgpu_umc_ras { enum amdgpu_mca_error_type type, void *ras_error_status); int (*update_ecc_status)(struct amdgpu_device *adev, uint64_t status, uint64_t ipid, uint64_t addr); + int (*convert_ras_err_addr)(struct amdgpu_device *adev, + struct ras_err_data *err_data, + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out, + bool dump_addr); + uint32_t (*get_die_id_from_pa)(struct amdgpu_device *adev, + uint64_t mca_addr, uint64_t retired_page); }; struct amdgpu_umc_funcs { @@ -134,4 +164,12 @@ int amdgpu_umc_logs_ecc_err(struct amdgpu_device *adev, void amdgpu_umc_handle_bad_pages(struct amdgpu_device *adev, void *ras_error_status); +int amdgpu_umc_pages_in_a_row(struct amdgpu_device *adev, + struct ras_err_data *err_data, uint64_t pa_addr); +int amdgpu_umc_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, + uint64_t pa_addr, uint64_t *pfns, int len); +int amdgpu_umc_mca_to_addr(struct amdgpu_device *adev, + uint64_t err_addr, uint32_t ch, uint32_t umc, + uint32_t node, uint32_t socket, + struct ta_ras_query_address_output *addr_out, bool dump_addr); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c index bd2d3863c3ed..cd707d70a0bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umsch_mm.c @@ -32,462 +32,7 @@ #include "amdgpu_umsch_mm.h" #include "umsch_mm_v4_0.h" -struct umsch_mm_test_ctx_data { - uint8_t process_csa[PAGE_SIZE]; - uint8_t vpe_ctx_csa[PAGE_SIZE]; - uint8_t vcn_ctx_csa[PAGE_SIZE]; -}; - -struct umsch_mm_test_mqd_data { - uint8_t vpe_mqd[PAGE_SIZE]; - uint8_t vcn_mqd[PAGE_SIZE]; -}; - -struct umsch_mm_test_ring_data { - uint8_t vpe_ring[PAGE_SIZE]; - uint8_t vpe_ib[PAGE_SIZE]; - uint8_t vcn_ring[PAGE_SIZE]; - uint8_t vcn_ib[PAGE_SIZE]; -}; - -struct umsch_mm_test_queue_info { - uint64_t mqd_addr; - uint64_t csa_addr; - uint32_t doorbell_offset_0; - uint32_t doorbell_offset_1; - enum UMSCH_SWIP_ENGINE_TYPE engine; -}; - -struct umsch_mm_test { - struct amdgpu_bo *ctx_data_obj; - uint64_t ctx_data_gpu_addr; - uint32_t *ctx_data_cpu_addr; - - struct amdgpu_bo *mqd_data_obj; - uint64_t mqd_data_gpu_addr; - uint32_t *mqd_data_cpu_addr; - - struct amdgpu_bo *ring_data_obj; - uint64_t ring_data_gpu_addr; - uint32_t *ring_data_cpu_addr; - - - struct amdgpu_vm *vm; - struct amdgpu_bo_va *bo_va; - uint32_t pasid; - uint32_t vm_cntx_cntl; - uint32_t num_queues; -}; - -static int map_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, - uint64_t addr, uint32_t size) -{ - struct amdgpu_sync sync; - struct drm_exec exec; - int r; - - amdgpu_sync_create(&sync); - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, &bo->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto error_fini_exec; - } - - *bo_va = amdgpu_vm_bo_add(adev, vm, bo); - if (!*bo_va) { - r = -ENOMEM; - goto error_fini_exec; - } - - r = amdgpu_vm_bo_map(adev, *bo_va, addr, 0, size, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); - - if (r) - goto error_del_bo_va; - - - r = amdgpu_vm_bo_update(adev, *bo_va, false); - if (r) - goto error_del_bo_va; - - amdgpu_sync_fence(&sync, (*bo_va)->last_pt_update); - - r = amdgpu_vm_update_pdes(adev, vm, false); - if (r) - goto error_del_bo_va; - - amdgpu_sync_fence(&sync, vm->last_update); - - amdgpu_sync_wait(&sync, false); - drm_exec_fini(&exec); - - amdgpu_sync_free(&sync); - - return 0; - -error_del_bo_va: - amdgpu_vm_bo_del(adev, *bo_va); - amdgpu_sync_free(&sync); - -error_fini_exec: - drm_exec_fini(&exec); - amdgpu_sync_free(&sync); - return r; -} - -static int unmap_ring_data(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo *bo, struct amdgpu_bo_va *bo_va, - uint64_t addr) -{ - struct drm_exec exec; - long r; - - drm_exec_init(&exec, 0, 0); - drm_exec_until_all_locked(&exec) { - r = drm_exec_lock_obj(&exec, &bo->tbo.base); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - - r = amdgpu_vm_lock_pd(vm, &exec, 0); - drm_exec_retry_on_contention(&exec); - if (unlikely(r)) - goto out_unlock; - } - - - r = amdgpu_vm_bo_unmap(adev, bo_va, addr); - if (r) - goto out_unlock; - - amdgpu_vm_bo_del(adev, bo_va); - -out_unlock: - drm_exec_fini(&exec); - - return r; -} - -static void setup_vpe_queue(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - struct MQD_INFO *mqd = (struct MQD_INFO *)test->mqd_data_cpu_addr; - uint64_t ring_gpu_addr = test->ring_data_gpu_addr; - - mqd->rb_base_lo = (ring_gpu_addr >> 8); - mqd->rb_base_hi = (ring_gpu_addr >> 40); - mqd->rb_size = PAGE_SIZE / 4; - mqd->wptr_val = 0; - mqd->rptr_val = 0; - mqd->unmapped = 1; - - if (adev->vpe.collaborate_mode) - memcpy(++mqd, test->mqd_data_cpu_addr, sizeof(struct MQD_INFO)); - - qinfo->mqd_addr = test->mqd_data_gpu_addr; - qinfo->csa_addr = test->ctx_data_gpu_addr + - offsetof(struct umsch_mm_test_ctx_data, vpe_ctx_csa); - qinfo->doorbell_offset_0 = 0; - qinfo->doorbell_offset_1 = 0; -} - -static void setup_vcn_queue(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ -} - -static int add_test_queue(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - struct umsch_mm_add_queue_input queue_input = {}; - int r; - - queue_input.process_id = test->pasid; - queue_input.page_table_base_addr = amdgpu_gmc_pd_addr(test->vm->root.bo); - - queue_input.process_va_start = 0; - queue_input.process_va_end = (adev->vm_manager.max_pfn - 1) << AMDGPU_GPU_PAGE_SHIFT; - - queue_input.process_quantum = 100000; /* 10ms */ - queue_input.process_csa_addr = test->ctx_data_gpu_addr + - offsetof(struct umsch_mm_test_ctx_data, process_csa); - - queue_input.context_quantum = 10000; /* 1ms */ - queue_input.context_csa_addr = qinfo->csa_addr; - - queue_input.inprocess_context_priority = CONTEXT_PRIORITY_LEVEL_NORMAL; - queue_input.context_global_priority_level = CONTEXT_PRIORITY_LEVEL_NORMAL; - queue_input.doorbell_offset_0 = qinfo->doorbell_offset_0; - queue_input.doorbell_offset_1 = qinfo->doorbell_offset_1; - - queue_input.engine_type = qinfo->engine; - queue_input.mqd_addr = qinfo->mqd_addr; - queue_input.vm_context_cntl = test->vm_cntx_cntl; - - amdgpu_umsch_mm_lock(&adev->umsch_mm); - r = adev->umsch_mm.funcs->add_queue(&adev->umsch_mm, &queue_input); - amdgpu_umsch_mm_unlock(&adev->umsch_mm); - if (r) - return r; - - return 0; -} - -static int remove_test_queue(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - struct umsch_mm_remove_queue_input queue_input = {}; - int r; - - queue_input.doorbell_offset_0 = qinfo->doorbell_offset_0; - queue_input.doorbell_offset_1 = qinfo->doorbell_offset_1; - queue_input.context_csa_addr = qinfo->csa_addr; - - amdgpu_umsch_mm_lock(&adev->umsch_mm); - r = adev->umsch_mm.funcs->remove_queue(&adev->umsch_mm, &queue_input); - amdgpu_umsch_mm_unlock(&adev->umsch_mm); - if (r) - return r; - - return 0; -} - -static int submit_vpe_queue(struct amdgpu_device *adev, struct umsch_mm_test *test) -{ - struct MQD_INFO *mqd = (struct MQD_INFO *)test->mqd_data_cpu_addr; - uint32_t *ring = test->ring_data_cpu_addr + - offsetof(struct umsch_mm_test_ring_data, vpe_ring) / 4; - uint32_t *ib = test->ring_data_cpu_addr + - offsetof(struct umsch_mm_test_ring_data, vpe_ib) / 4; - uint64_t ib_gpu_addr = test->ring_data_gpu_addr + - offsetof(struct umsch_mm_test_ring_data, vpe_ib); - uint32_t *fence = ib + 2048 / 4; - uint64_t fence_gpu_addr = ib_gpu_addr + 2048; - const uint32_t test_pattern = 0xdeadbeef; - int i; - - ib[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_FENCE, 0); - ib[1] = lower_32_bits(fence_gpu_addr); - ib[2] = upper_32_bits(fence_gpu_addr); - ib[3] = test_pattern; - - ring[0] = VPE_CMD_HEADER(VPE_CMD_OPCODE_INDIRECT, 0); - ring[1] = (ib_gpu_addr & 0xffffffe0); - ring[2] = upper_32_bits(ib_gpu_addr); - ring[3] = 4; - ring[4] = 0; - ring[5] = 0; - - mqd->wptr_val = (6 << 2); - if (adev->vpe.collaborate_mode) - (++mqd)->wptr_val = (6 << 2); - - WDOORBELL32(adev->umsch_mm.agdb_index[CONTEXT_PRIORITY_LEVEL_NORMAL], mqd->wptr_val); - - for (i = 0; i < adev->usec_timeout; i++) { - if (*fence == test_pattern) - return 0; - udelay(1); - } - - dev_err(adev->dev, "vpe queue submission timeout\n"); - - return -ETIMEDOUT; -} - -static int submit_vcn_queue(struct amdgpu_device *adev, struct umsch_mm_test *test) -{ - return 0; -} - -static int setup_umsch_mm_test(struct amdgpu_device *adev, - struct umsch_mm_test *test) -{ - struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_MMHUB0(0)]; - int r; - - test->vm_cntx_cntl = hub->vm_cntx_cntl; - - test->vm = kzalloc(sizeof(*test->vm), GFP_KERNEL); - if (!test->vm) { - r = -ENOMEM; - return r; - } - - r = amdgpu_vm_init(adev, test->vm, -1); - if (r) - goto error_free_vm; - - r = amdgpu_pasid_alloc(16); - if (r < 0) - goto error_fini_vm; - test->pasid = r; - - r = amdgpu_bo_create_kernel(adev, sizeof(struct umsch_mm_test_ctx_data), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &test->ctx_data_obj, - &test->ctx_data_gpu_addr, - (void **)&test->ctx_data_cpu_addr); - if (r) - goto error_free_pasid; - - memset(test->ctx_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ctx_data)); - - r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &test->mqd_data_obj, - &test->mqd_data_gpu_addr, - (void **)&test->mqd_data_cpu_addr); - if (r) - goto error_free_ctx_data_obj; - - memset(test->mqd_data_cpu_addr, 0, PAGE_SIZE); - - r = amdgpu_bo_create_kernel(adev, sizeof(struct umsch_mm_test_ring_data), - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &test->ring_data_obj, - NULL, - (void **)&test->ring_data_cpu_addr); - if (r) - goto error_free_mqd_data_obj; - - memset(test->ring_data_cpu_addr, 0, sizeof(struct umsch_mm_test_ring_data)); - - test->ring_data_gpu_addr = AMDGPU_VA_RESERVED_BOTTOM; - r = map_ring_data(adev, test->vm, test->ring_data_obj, &test->bo_va, - test->ring_data_gpu_addr, sizeof(struct umsch_mm_test_ring_data)); - if (r) - goto error_free_ring_data_obj; - - return 0; - -error_free_ring_data_obj: - amdgpu_bo_free_kernel(&test->ring_data_obj, NULL, - (void **)&test->ring_data_cpu_addr); -error_free_mqd_data_obj: - amdgpu_bo_free_kernel(&test->mqd_data_obj, &test->mqd_data_gpu_addr, - (void **)&test->mqd_data_cpu_addr); -error_free_ctx_data_obj: - amdgpu_bo_free_kernel(&test->ctx_data_obj, &test->ctx_data_gpu_addr, - (void **)&test->ctx_data_cpu_addr); -error_free_pasid: - amdgpu_pasid_free(test->pasid); -error_fini_vm: - amdgpu_vm_fini(adev, test->vm); -error_free_vm: - kfree(test->vm); - - return r; -} - -static void cleanup_umsch_mm_test(struct amdgpu_device *adev, - struct umsch_mm_test *test) -{ - unmap_ring_data(adev, test->vm, test->ring_data_obj, - test->bo_va, test->ring_data_gpu_addr); - amdgpu_bo_free_kernel(&test->mqd_data_obj, &test->mqd_data_gpu_addr, - (void **)&test->mqd_data_cpu_addr); - amdgpu_bo_free_kernel(&test->ring_data_obj, NULL, - (void **)&test->ring_data_cpu_addr); - amdgpu_bo_free_kernel(&test->ctx_data_obj, &test->ctx_data_gpu_addr, - (void **)&test->ctx_data_cpu_addr); - amdgpu_pasid_free(test->pasid); - amdgpu_vm_fini(adev, test->vm); - kfree(test->vm); -} - -static int setup_test_queues(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - int i, r; - - for (i = 0; i < test->num_queues; i++) { - if (qinfo[i].engine == UMSCH_SWIP_ENGINE_TYPE_VPE) - setup_vpe_queue(adev, test, &qinfo[i]); - else - setup_vcn_queue(adev, test, &qinfo[i]); - - r = add_test_queue(adev, test, &qinfo[i]); - if (r) - return r; - } - - return 0; -} - -static int submit_test_queues(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - int i, r; - - for (i = 0; i < test->num_queues; i++) { - if (qinfo[i].engine == UMSCH_SWIP_ENGINE_TYPE_VPE) - r = submit_vpe_queue(adev, test); - else - r = submit_vcn_queue(adev, test); - if (r) - return r; - } - - return 0; -} - -static void cleanup_test_queues(struct amdgpu_device *adev, - struct umsch_mm_test *test, - struct umsch_mm_test_queue_info *qinfo) -{ - int i; - - for (i = 0; i < test->num_queues; i++) - remove_test_queue(adev, test, &qinfo[i]); -} - -static int umsch_mm_test(struct amdgpu_device *adev) -{ - struct umsch_mm_test_queue_info qinfo[] = { - { .engine = UMSCH_SWIP_ENGINE_TYPE_VPE }, - }; - struct umsch_mm_test test = { .num_queues = ARRAY_SIZE(qinfo) }; - int r; - - r = setup_umsch_mm_test(adev, &test); - if (r) - return r; - - r = setup_test_queues(adev, &test, qinfo); - if (r) - goto cleanup; - - r = submit_test_queues(adev, &test, qinfo); - if (r) - goto cleanup; - - cleanup_test_queues(adev, &test, qinfo); - cleanup_umsch_mm_test(adev, &test); - - return 0; - -cleanup: - cleanup_test_queues(adev, &test, qinfo); - cleanup_umsch_mm_test(adev, &test); - return r; -} +MODULE_FIRMWARE("amdgpu/umsch_mm_4_0_0.bin"); int amdgpu_umsch_mm_submit_pkt(struct amdgpu_umsch_mm *umsch, void *pkt, int ndws) { @@ -581,13 +126,14 @@ int amdgpu_umsch_mm_init_microcode(struct amdgpu_umsch_mm *umsch) switch (amdgpu_ip_version(adev, VCN_HWIP, 0)) { case IP_VERSION(4, 0, 5): case IP_VERSION(4, 0, 6): - fw_name = "amdgpu/umsch_mm_4_0_0.bin"; + fw_name = "4_0_0"; break; default: - break; + return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->umsch_mm.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/umsch_mm_%s.bin", fw_name); if (r) { release_firmware(adev->umsch_mm.fw); adev->umsch_mm.fw = NULL; @@ -791,7 +337,7 @@ static int umsch_mm_late_init(struct amdgpu_ip_block *ip_block) if (amdgpu_in_reset(adev) || adev->in_s0ix || adev->in_suspend) return 0; - return umsch_mm_test(adev); + return 0; } static int umsch_mm_sw_init(struct amdgpu_ip_block *ip_block) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 65bb26215e86..74758b5ffc6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -260,7 +260,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->uvd.fw, "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->uvd.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware \"%s\"\n", fw_name); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 599d3ca4e0ef..b9060bcd4806 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -158,7 +158,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size) return -EINVAL; } - r = amdgpu_ucode_request(adev, &adev->vce.fw, "%s", fw_name); + r = amdgpu_ucode_request(adev, &adev->vce.fw, AMDGPU_UCODE_REQUIRED, "%s", fw_name); if (r) { dev_err(adev->dev, "amdgpu_vce: Can't validate firmware \"%s\"\n", fw_name); @@ -503,7 +503,7 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[i] = 0x0; r = amdgpu_job_submit_direct(job, ring, &f); - amdgpu_ib_free(ring->adev, &ib_msg, f); + amdgpu_ib_free(&ib_msg, f); if (r) goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 3e94c3ba1ba2..1991dd3d1056 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2024 Advanced Micro Devices, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -62,6 +62,7 @@ #define FIRMWARE_VCN4_0_6 "amdgpu/vcn_4_0_6.bin" #define FIRMWARE_VCN4_0_6_1 "amdgpu/vcn_4_0_6_1.bin" #define FIRMWARE_VCN5_0_0 "amdgpu/vcn_5_0_0.bin" +#define FIRMWARE_VCN5_0_1 "amdgpu/vcn_5_0_1.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); MODULE_FIRMWARE(FIRMWARE_PICASSO); @@ -88,46 +89,57 @@ MODULE_FIRMWARE(FIRMWARE_VCN4_0_5); MODULE_FIRMWARE(FIRMWARE_VCN4_0_6); MODULE_FIRMWARE(FIRMWARE_VCN4_0_6_1); MODULE_FIRMWARE(FIRMWARE_VCN5_0_0); +MODULE_FIRMWARE(FIRMWARE_VCN5_0_1); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); -int amdgpu_vcn_early_init(struct amdgpu_device *adev) +int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i) { char ucode_prefix[25]; - int r, i; + int r; + adev->vcn.inst[i].adev = adev; + adev->vcn.inst[i].inst = i; amdgpu_ucode_ip_version_decode(adev, UVD_HWIP, ucode_prefix, sizeof(ucode_prefix)); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (i == 1 && amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6)) - r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s_%d.bin", ucode_prefix, i); - else - r = amdgpu_ucode_request(adev, &adev->vcn.fw[i], "amdgpu/%s.bin", ucode_prefix); - if (r) { - amdgpu_ucode_release(&adev->vcn.fw[i]); - return r; + + if (i != 0 && adev->vcn.per_inst_fw) { + r = amdgpu_ucode_request(adev, &adev->vcn.inst[i].fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_%d.bin", ucode_prefix, i); + if (r) + amdgpu_ucode_release(&adev->vcn.inst[i].fw); + } else { + if (!adev->vcn.inst[0].fw) { + r = amdgpu_ucode_request(adev, &adev->vcn.inst[0].fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", ucode_prefix); + if (r) + amdgpu_ucode_release(&adev->vcn.inst[0].fw); + } else { + r = 0; } + adev->vcn.inst[i].fw = adev->vcn.inst[0].fw; } + return r; } -int amdgpu_vcn_sw_init(struct amdgpu_device *adev) +int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i) { unsigned long bo_size; const struct common_firmware_header *hdr; unsigned char fw_check; unsigned int fw_shared_size, log_offset; - int i, r; - - INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler); - mutex_init(&adev->vcn.vcn_pg_lock); - mutex_init(&adev->vcn.vcn1_jpeg1_workaround); - atomic_set(&adev->vcn.total_submission_cnt, 0); - for (i = 0; i < adev->vcn.num_vcn_inst; i++) - atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); + int r; + mutex_init(&adev->vcn.inst[i].vcn1_jpeg1_workaround); + mutex_init(&adev->vcn.inst[i].vcn_pg_lock); + atomic_set(&adev->vcn.inst[i].total_submission_cnt, 0); + INIT_DELAYED_WORK(&adev->vcn.inst[i].idle_work, amdgpu_vcn_idle_work_handler); + atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0); if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) && (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)) - adev->vcn.indirect_sram = true; + adev->vcn.inst[i].indirect_sram = true; /* * Some Steam Deck's BIOS versions are incompatible with the @@ -140,18 +152,19 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) const char *bios_ver = dmi_get_system_info(DMI_BIOS_VERSION); if (bios_ver && (!strncmp("F7A0113", bios_ver, 7) || - !strncmp("F7A0114", bios_ver, 7))) { - adev->vcn.indirect_sram = false; + !strncmp("F7A0114", bios_ver, 7))) { + adev->vcn.inst[i].indirect_sram = false; dev_info(adev->dev, - "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver); + "Steam Deck quirk: indirect SRAM disabled on BIOS %s\n", bios_ver); } } /* from vcn4 and above, only unified queue is used */ - adev->vcn.using_unified_queue = + adev->vcn.inst[i].using_unified_queue = amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); - hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; + adev->vcn.inst[i].fw_version = le32_to_cpu(hdr->ucode_version); adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); /* Bit 20-23, it is encode major and non-zero for new naming convention. @@ -169,16 +182,17 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) enc_major = fw_check; dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf; vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf; - DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", - enc_major, enc_minor, dec_ver, vep, fw_rev); + dev_info(adev->dev, + "Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n", + enc_major, enc_minor, dec_ver, vep, fw_rev); } else { unsigned int version_major, version_minor, family_id; family_id = le32_to_cpu(hdr->ucode_version) & 0xff; version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff; version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff; - DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n", - version_major, version_minor, family_id); + dev_info(adev->dev, "Found VCN firmware Version: %u.%u Family ID: %u\n", + version_major, version_minor, family_id); } bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; @@ -201,80 +215,77 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) if (amdgpu_vcnfw_log) bo_size += AMDGPU_VCNFW_LOG_SIZE; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT, + &adev->vcn.inst[i].vcpu_bo, + &adev->vcn.inst[i].gpu_addr, + &adev->vcn.inst[i].cpu_addr); + if (r) { + dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + return r; + } - r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, + adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr + + bo_size - fw_shared_size; + adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr + + bo_size - fw_shared_size; + + adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; + + if (amdgpu_vcnfw_log) { + adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE; + adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE; + adev->vcn.inst[i].fw_shared.log_offset = log_offset; + } + + if (adev->vcn.inst[i].indirect_sram) { + r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT, - &adev->vcn.inst[i].vcpu_bo, - &adev->vcn.inst[i].gpu_addr, - &adev->vcn.inst[i].cpu_addr); + &adev->vcn.inst[i].dpg_sram_bo, + &adev->vcn.inst[i].dpg_sram_gpu_addr, + &adev->vcn.inst[i].dpg_sram_cpu_addr); if (r) { - dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r); + dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); return r; } - - adev->vcn.inst[i].fw_shared.cpu_addr = adev->vcn.inst[i].cpu_addr + - bo_size - fw_shared_size; - adev->vcn.inst[i].fw_shared.gpu_addr = adev->vcn.inst[i].gpu_addr + - bo_size - fw_shared_size; - - adev->vcn.inst[i].fw_shared.mem_size = fw_shared_size; - - if (amdgpu_vcnfw_log) { - adev->vcn.inst[i].fw_shared.cpu_addr -= AMDGPU_VCNFW_LOG_SIZE; - adev->vcn.inst[i].fw_shared.gpu_addr -= AMDGPU_VCNFW_LOG_SIZE; - adev->vcn.inst[i].fw_shared.log_offset = log_offset; - } - - if (adev->vcn.indirect_sram) { - r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM | - AMDGPU_GEM_DOMAIN_GTT, - &adev->vcn.inst[i].dpg_sram_bo, - &adev->vcn.inst[i].dpg_sram_gpu_addr, - &adev->vcn.inst[i].dpg_sram_cpu_addr); - if (r) { - dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r); - return r; - } - } } return 0; } -int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) +int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i) { - int i, j; + int j; - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) - continue; + if (adev->vcn.harvest_config & (1 << i)) + return 0; - amdgpu_bo_free_kernel( - &adev->vcn.inst[j].dpg_sram_bo, - &adev->vcn.inst[j].dpg_sram_gpu_addr, - (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr); + amdgpu_bo_free_kernel( + &adev->vcn.inst[i].dpg_sram_bo, + &adev->vcn.inst[i].dpg_sram_gpu_addr, + (void **)&adev->vcn.inst[i].dpg_sram_cpu_addr); - kvfree(adev->vcn.inst[j].saved_bo); + kvfree(adev->vcn.inst[i].saved_bo); - amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo, - &adev->vcn.inst[j].gpu_addr, - (void **)&adev->vcn.inst[j].cpu_addr); + amdgpu_bo_free_kernel(&adev->vcn.inst[i].vcpu_bo, + &adev->vcn.inst[i].gpu_addr, + (void **)&adev->vcn.inst[i].cpu_addr); - amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec); + amdgpu_ring_fini(&adev->vcn.inst[i].ring_dec); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]); + for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) + amdgpu_ring_fini(&adev->vcn.inst[i].ring_enc[j]); - amdgpu_ucode_release(&adev->vcn.fw[j]); + if (adev->vcn.per_inst_fw) { + amdgpu_ucode_release(&adev->vcn.inst[i].fw); + } else { + amdgpu_ucode_release(&adev->vcn.inst[0].fw); + adev->vcn.inst[i].fw = NULL; } - - mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround); - mutex_destroy(&adev->vcn.vcn_pg_lock); + mutex_destroy(&adev->vcn.inst[i].vcn_pg_lock); + mutex_destroy(&adev->vcn.inst[i].vcn1_jpeg1_workaround); return 0; } @@ -282,7 +293,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; - int vcn_config = adev->vcn.vcn_config[vcn_instance]; + int vcn_config = adev->vcn.inst[vcn_instance].vcn_config; if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) ret = true; @@ -294,179 +305,208 @@ bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type t return ret; } -int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev) +static int amdgpu_vcn_save_vcpu_bo_inst(struct amdgpu_device *adev, int i) { unsigned int size; void *ptr; - int i, idx; + int idx; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - if (adev->vcn.inst[i].vcpu_bo == NULL) - return 0; + if (adev->vcn.harvest_config & (1 << i)) + return 0; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return 0; - size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); - ptr = adev->vcn.inst[i].cpu_addr; + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; - adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); - if (!adev->vcn.inst[i].saved_bo) - return -ENOMEM; + adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL); + if (!adev->vcn.inst[i].saved_bo) + return -ENOMEM; - if (drm_dev_enter(adev_to_drm(adev), &idx)) { - memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); - drm_dev_exit(idx); - } + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size); + drm_dev_exit(idx); } return 0; } -int amdgpu_vcn_suspend(struct amdgpu_device *adev) +int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev) +{ + int ret, i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ret = amdgpu_vcn_save_vcpu_bo_inst(adev, i); + if (ret) + return ret; + } + + return 0; +} + +int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i) { bool in_ras_intr = amdgpu_ras_intr_triggered(); - cancel_delayed_work_sync(&adev->vcn.idle_work); + if (adev->vcn.harvest_config & (1 << i)) + return 0; + + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); /* err_event_athub will corrupt VCPU buffer, so we need to * restore fw data and clear buffer in amdgpu_vcn_resume() */ if (in_ras_intr) return 0; - return amdgpu_vcn_save_vcpu_bo(adev); + return amdgpu_vcn_save_vcpu_bo_inst(adev, i); } -int amdgpu_vcn_resume(struct amdgpu_device *adev) +int amdgpu_vcn_resume(struct amdgpu_device *adev, int i) { unsigned int size; void *ptr; - int i, idx; + int idx; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - if (adev->vcn.inst[i].vcpu_bo == NULL) - return -EINVAL; + if (adev->vcn.harvest_config & (1 << i)) + return 0; + if (adev->vcn.inst[i].vcpu_bo == NULL) + return -EINVAL; + + size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); + ptr = adev->vcn.inst[i].cpu_addr; - size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo); - ptr = adev->vcn.inst[i].cpu_addr; + if (adev->vcn.inst[i].saved_bo != NULL) { + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + drm_dev_exit(idx); + } + kvfree(adev->vcn.inst[i].saved_bo); + adev->vcn.inst[i].saved_bo = NULL; + } else { + const struct common_firmware_header *hdr; + unsigned int offset; - if (adev->vcn.inst[i].saved_bo != NULL) { + hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + offset = le32_to_cpu(hdr->ucode_array_offset_bytes); if (drm_dev_enter(adev_to_drm(adev), &idx)) { - memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size); + memcpy_toio(adev->vcn.inst[i].cpu_addr, + adev->vcn.inst[i].fw->data + offset, + le32_to_cpu(hdr->ucode_size_bytes)); drm_dev_exit(idx); } - kvfree(adev->vcn.inst[i].saved_bo); - adev->vcn.inst[i].saved_bo = NULL; - } else { - const struct common_firmware_header *hdr; - unsigned int offset; - - hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - offset = le32_to_cpu(hdr->ucode_array_offset_bytes); - if (drm_dev_enter(adev_to_drm(adev), &idx)) { - memcpy_toio(adev->vcn.inst[i].cpu_addr, - adev->vcn.fw[i]->data + offset, - le32_to_cpu(hdr->ucode_size_bytes)); - drm_dev_exit(idx); - } - size -= le32_to_cpu(hdr->ucode_size_bytes); - ptr += le32_to_cpu(hdr->ucode_size_bytes); - } - memset_io(ptr, 0, size); + size -= le32_to_cpu(hdr->ucode_size_bytes); + ptr += le32_to_cpu(hdr->ucode_size_bytes); } + memset_io(ptr, 0, size); } + return 0; } static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { - struct amdgpu_device *adev = - container_of(work, struct amdgpu_device, vcn.idle_work.work); + struct amdgpu_vcn_inst *vcn_inst = + container_of(work, struct amdgpu_vcn_inst, idle_work.work); + struct amdgpu_device *adev = vcn_inst->adev; unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; - unsigned int i, j; + unsigned int i = vcn_inst->inst, j; int r = 0; - for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { - if (adev->vcn.harvest_config & (1 << j)) - continue; + if (adev->vcn.harvest_config & (1 << i)) + return; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); + for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) + fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[j]); - /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && - !adev->vcn.using_unified_queue) { - struct dpg_pause_state new_state; - - if (fence[j] || - unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt))) - new_state.fw_based = VCN_DPG_STATE__PAUSE; - else - new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !adev->vcn.inst[i].using_unified_queue) { + struct dpg_pause_state new_state; - adev->vcn.pause_dpg_mode(adev, j, &new_state); - } + if (fence[i] || + unlikely(atomic_read(&vcn_inst->dpg_enc_submission_cnt))) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; - fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec); - fences += fence[j]; + adev->vcn.inst[i].pause_dpg_mode(vcn_inst, &new_state); } - if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) { - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_GATE); - r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - false); - if (r) - dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + fence[i] += amdgpu_fence_count_emitted(&vcn_inst->ring_dec); + fences += fence[i]; + + if (!fences && !atomic_read(&vcn_inst->total_submission_cnt)) { + vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_GATE); + mutex_lock(&adev->vcn.workload_profile_mutex); + if (adev->vcn.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + adev->vcn.workload_profile_active = false; + } + mutex_unlock(&adev->vcn.workload_profile_mutex); } else { - schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + schedule_delayed_work(&vcn_inst->idle_work, VCN_IDLE_TIMEOUT); } } void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *vcn_inst = &adev->vcn.inst[ring->me]; int r = 0; - atomic_inc(&adev->vcn.total_submission_cnt); + atomic_inc(&vcn_inst->total_submission_cnt); + + cancel_delayed_work_sync(&vcn_inst->idle_work); + + /* We can safely return early here because we've cancelled the + * the delayed work so there is no one else to set it to false + * and we don't care if someone else sets it to true. + */ + if (adev->vcn.workload_profile_active) + goto pg_lock; - if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) { + mutex_lock(&adev->vcn.workload_profile_mutex); + if (!adev->vcn.workload_profile_active) { r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, - true); + true); if (r) dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); + adev->vcn.workload_profile_active = true; } + mutex_unlock(&adev->vcn.workload_profile_mutex); - mutex_lock(&adev->vcn.vcn_pg_lock); - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, - AMD_PG_STATE_UNGATE); +pg_lock: + mutex_lock(&vcn_inst->vcn_pg_lock); + vcn_inst->set_pg_state(vcn_inst, AMD_PG_STATE_UNGATE); /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && - !adev->vcn.using_unified_queue) { + !vcn_inst->using_unified_queue) { struct dpg_pause_state new_state; if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { - atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt); + atomic_inc(&vcn_inst->dpg_enc_submission_cnt); new_state.fw_based = VCN_DPG_STATE__PAUSE; } else { unsigned int fences = 0; unsigned int i; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) - fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]); + for (i = 0; i < vcn_inst->num_enc_rings; ++i) + fences += amdgpu_fence_count_emitted(&vcn_inst->ring_enc[i]); - if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt)) + if (fences || atomic_read(&vcn_inst->dpg_enc_submission_cnt)) new_state.fw_based = VCN_DPG_STATE__PAUSE; else new_state.fw_based = VCN_DPG_STATE__UNPAUSE; } - adev->vcn.pause_dpg_mode(adev, ring->me, &new_state); + vcn_inst->pause_dpg_mode(vcn_inst, &new_state); } - mutex_unlock(&adev->vcn.vcn_pg_lock); + mutex_unlock(&vcn_inst->vcn_pg_lock); } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) @@ -476,12 +516,13 @@ void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && - !adev->vcn.using_unified_queue) + !adev->vcn.inst[ring->me].using_unified_queue) atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); - atomic_dec(&ring->adev->vcn.total_submission_cnt); + atomic_dec(&ring->adev->vcn.inst[ring->me].total_submission_cnt); - schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + schedule_delayed_work(&ring->adev->vcn.inst[ring->me].idle_work, + VCN_IDLE_TIMEOUT); } int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) @@ -499,7 +540,7 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 3); if (r) return r; - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { @@ -564,14 +605,14 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, goto err; ib = &job->ibs[0]; - ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0); + ib->ptr[0] = PACKET0(adev->vcn.inst[ring->me].internal.data0, 0); ib->ptr[1] = addr; - ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0); + ib->ptr[2] = PACKET0(adev->vcn.inst[ring->me].internal.data1, 0); ib->ptr[3] = addr >> 32; - ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0); + ib->ptr[4] = PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0); ib->ptr[5] = 0; for (i = 6; i < 16; i += 2) { - ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0); + ib->ptr[i] = PACKET0(adev->vcn.inst[ring->me].internal.nop, 0); ib->ptr[i+1] = 0; } ib->length_dw = 16; @@ -580,7 +621,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, if (r) goto err_free; - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); if (fence) *fence = dma_fence_get(f); @@ -591,7 +632,7 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring, err_free: amdgpu_job_free(job); err: - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); return r; } @@ -734,7 +775,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, uint32_t ib_pack_in_dw; int i, r; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -747,7 +788,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, ib->length_dw = 0; /* single queue headers */ - if (adev->vcn.using_unified_queue) { + if (adev->vcn.inst[ring->me].using_unified_queue) { ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) + 4 + 2; /* engine info + decoding ib in dw */ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); @@ -766,14 +807,14 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err_free; - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); if (fence) *fence = dma_fence_get(f); @@ -784,7 +825,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, err_free: amdgpu_job_free(job); err: - amdgpu_ib_free(adev, ib_msg, f); + amdgpu_ib_free(ib_msg, f); return r; } @@ -864,7 +905,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand uint64_t addr; int i, r; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -878,7 +919,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand ib->length_dw = 0; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -900,7 +941,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); @@ -931,7 +972,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han uint64_t addr; int i, r; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -945,7 +986,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han ib->length_dw = 0; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -967,7 +1008,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (adev->vcn.using_unified_queue) + if (adev->vcn.inst[ring->me].using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); @@ -1014,7 +1055,7 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = 0; error: - amdgpu_ib_free(adev, &ib, fence); + amdgpu_ib_free(&ib, fence); dma_fence_put(fence); return r; @@ -1025,7 +1066,8 @@ int amdgpu_vcn_unified_ring_test_ib(struct amdgpu_ring *ring, long timeout) struct amdgpu_device *adev = ring->adev; long r; - if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) { + if ((amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(4, 0, 3)) && + (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(5, 0, 1))) { r = amdgpu_vcn_enc_ring_test_ib(ring, timeout); if (r) goto error; @@ -1051,34 +1093,32 @@ enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring) } } -void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev) +void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i) { - int i; unsigned int idx; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { const struct common_firmware_header *hdr; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - hdr = (const struct common_firmware_header *)adev->vcn.fw[i]->data; - /* currently only support 2 FW instances */ - if (i >= 2) { - dev_info(adev->dev, "More then 2 VCN FW instances!\n"); - break; - } - idx = AMDGPU_UCODE_ID_VCN + i; - adev->firmware.ucode[idx].ucode_id = idx; - adev->firmware.ucode[idx].fw = adev->vcn.fw[i]; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); - - if (amdgpu_ip_version(adev, UVD_HWIP, 0) == - IP_VERSION(4, 0, 3)) - break; + if (adev->vcn.harvest_config & (1 << i)) + return; + + if ((amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 3) || + amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(5, 0, 1)) + && (i > 0)) + return; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[i].fw->data; + /* currently only support 2 FW instances */ + if (i >= 2) { + dev_info(adev->dev, "More then 2 VCN FW instances!\n"); + return; } + idx = AMDGPU_UCODE_ID_VCN + i; + adev->firmware.ucode[idx].ucode_id = idx; + adev->firmware.ucode[idx].fw = adev->vcn.inst[i].fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); } } @@ -1320,3 +1360,94 @@ void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev) device_remove_file(adev->dev, &dev_attr_vcn_reset_mask); } } + +/* + * debugfs to enable/disable vcn job submission to specific core or + * instance. It is created only if the queue type is unified. + */ +#if defined(CONFIG_DEBUG_FS) +static int amdgpu_debugfs_vcn_sched_mask_set(void *data, u64 val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + + mask = (1ULL << adev->vcn.num_vcn_inst) - 1; + if ((val & mask) == 0) + return -EINVAL; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + if (val & (1ULL << i)) + ring->sched.ready = true; + else + ring->sched.ready = false; + } + /* publish sched.ready flag update effective immediately across smp */ + smp_rmb(); + return 0; +} + +static int amdgpu_debugfs_vcn_sched_mask_get(void *data, u64 *val) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)data; + u32 i; + u64 mask = 0; + struct amdgpu_ring *ring; + + if (!adev) + return -ENODEV; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ring = &adev->vcn.inst[i].ring_enc[0]; + if (ring->sched.ready) + mask |= 1ULL << i; + } + *val = mask; + return 0; +} + +DEFINE_DEBUGFS_ATTRIBUTE(amdgpu_debugfs_vcn_sched_mask_fops, + amdgpu_debugfs_vcn_sched_mask_get, + amdgpu_debugfs_vcn_sched_mask_set, "%llx\n"); +#endif + +void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev) +{ +#if defined(CONFIG_DEBUG_FS) + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *root = minor->debugfs_root; + char name[32]; + + if (adev->vcn.num_vcn_inst <= 1 || !adev->vcn.inst[0].using_unified_queue) + return; + sprintf(name, "amdgpu_vcn_sched_mask"); + debugfs_create_file(name, 0600, root, adev, + &amdgpu_debugfs_vcn_sched_mask_fops); +#endif +} + +/** + * vcn_set_powergating_state - set VCN block powergating state + * + * @ip_block: amdgpu_ip_block pointer + * @state: power gating state + * + * Set VCN block powergating state + */ +int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret = 0, i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + ret |= vinst->set_pg_state(vinst, state); + } + + return ret; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 1e32311c1dff..cdcdae7f71ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -1,5 +1,5 @@ /* - * Copyright 2016 Advanced Micro Devices, Inc. + * Copyright 2016-2024 Advanced Micro Devices, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -163,20 +163,30 @@ #define SOC24_DPG_MODE_OFFSET(ip, inst_idx, reg) \ ({ \ uint32_t internal_reg_offset, addr; \ - bool video_range, aon_range; \ + bool video_range, video1_range, aon_range, aon1_range; \ \ addr = (adev->reg_offset[ip##_HWIP][inst_idx][reg##_BASE_IDX] + reg); \ addr <<= 2; \ video_range = ((((0xFFFFF & addr) >= (VCN_VID_SOC_ADDRESS)) && \ ((0xFFFFF & addr) < ((VCN_VID_SOC_ADDRESS + 0x2600))))); \ + video1_range = ((((0xFFFFF & addr) >= (VCN1_VID_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN1_VID_SOC_ADDRESS + 0x2600))))); \ aon_range = ((((0xFFFFF & addr) >= (VCN_AON_SOC_ADDRESS)) && \ ((0xFFFFF & addr) < ((VCN_AON_SOC_ADDRESS + 0x600))))); \ + aon1_range = ((((0xFFFFF & addr) >= (VCN1_AON_SOC_ADDRESS)) && \ + ((0xFFFFF & addr) < ((VCN1_AON_SOC_ADDRESS + 0x600))))); \ if (video_range) \ internal_reg_offset = ((0xFFFFF & addr) - (VCN_VID_SOC_ADDRESS) + \ (VCN_VID_IP_ADDRESS)); \ else if (aon_range) \ internal_reg_offset = ((0xFFFFF & addr) - (VCN_AON_SOC_ADDRESS) + \ (VCN_AON_IP_ADDRESS)); \ + else if (video1_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN1_VID_SOC_ADDRESS) + \ + (VCN_VID_IP_ADDRESS)); \ + else if (aon1_range) \ + internal_reg_offset = ((0xFFFFF & addr) - (VCN1_AON_SOC_ADDRESS) + \ + (VCN_AON_IP_ADDRESS)); \ else \ internal_reg_offset = (0xFFFFF & addr); \ \ @@ -228,6 +238,12 @@ #define AMDGPU_DRM_KEY_INJECT_WORKAROUND_VCNFW_ASD_HANDSHAKING 2 +enum amdgpu_vcn_caps { + AMDGPU_VCN_RRMT_ENABLED, +}; + +#define AMDGPU_VCN_CAPS(caps) BIT(AMDGPU_VCN_##caps) + enum fw_queue_mode { FW_QUEUE_RING_RESET = 1, FW_QUEUE_DPG_HOLD_OFF = 2, @@ -279,6 +295,8 @@ struct amdgpu_vcn_fw_shared { }; struct amdgpu_vcn_inst { + struct amdgpu_device *adev; + int inst; struct amdgpu_bo *vcpu_bo; void *cpu_addr; uint64_t gpu_addr; @@ -297,6 +315,23 @@ struct amdgpu_vcn_inst { atomic_t dpg_enc_submission_cnt; struct amdgpu_vcn_fw_shared fw_shared; uint8_t aid_id; + const struct firmware *fw; /* VCN firmware */ + uint8_t vcn_config; + uint32_t vcn_codec_disable_mask; + atomic_t total_submission_cnt; + struct mutex vcn_pg_lock; + enum amd_powergating_state cur_state; + struct delayed_work idle_work; + unsigned fw_version; + unsigned num_enc_rings; + bool indirect_sram; + struct amdgpu_vcn_reg internal; + struct mutex vcn1_jpeg1_workaround; + int (*pause_dpg_mode)(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state); + int (*set_pg_state)(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); + bool using_unified_queue; }; struct amdgpu_vcn_ras { @@ -304,37 +339,28 @@ struct amdgpu_vcn_ras { }; struct amdgpu_vcn { - unsigned fw_version; - struct delayed_work idle_work; - const struct firmware *fw[AMDGPU_MAX_VCN_INSTANCES]; /* VCN firmware */ - unsigned num_enc_rings; - enum amd_powergating_state cur_state; - bool indirect_sram; - uint8_t num_vcn_inst; struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES]; - uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES]; - uint32_t vcn_codec_disable_mask[AMDGPU_MAX_VCN_INSTANCES]; - struct amdgpu_vcn_reg internal; - struct mutex vcn_pg_lock; - struct mutex vcn1_jpeg1_workaround; - atomic_t total_submission_cnt; unsigned harvest_config; - int (*pause_dpg_mode)(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state); struct ras_common_if *ras_if; struct amdgpu_vcn_ras *ras; uint16_t inst_mask; uint8_t num_inst_per_aid; - bool using_unified_queue; /* IP reg dump */ uint32_t *ip_dump; uint32_t supported_reset; + uint32_t caps; + + bool per_inst_fw; + unsigned fw_version; + + bool workload_profile_active; + struct mutex workload_profile_mutex; }; struct amdgpu_fw_shared_rb_ptrs_struct { @@ -483,11 +509,11 @@ enum vcn_ring_type { VCN_UNIFIED_RING, }; -int amdgpu_vcn_early_init(struct amdgpu_device *adev); -int amdgpu_vcn_sw_init(struct amdgpu_device *adev); -int amdgpu_vcn_sw_fini(struct amdgpu_device *adev); -int amdgpu_vcn_suspend(struct amdgpu_device *adev); -int amdgpu_vcn_resume(struct amdgpu_device *adev); +int amdgpu_vcn_early_init(struct amdgpu_device *adev, int i); +int amdgpu_vcn_sw_init(struct amdgpu_device *adev, int i); +int amdgpu_vcn_sw_fini(struct amdgpu_device *adev, int i); +int amdgpu_vcn_suspend(struct amdgpu_device *adev, int i); +int amdgpu_vcn_resume(struct amdgpu_device *adev, int i); void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring); void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring); @@ -505,7 +531,7 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout); enum amdgpu_ring_priority_level amdgpu_vcn_get_enc_ring_prio(int ring); -void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev); +void amdgpu_vcn_setup_ucode(struct amdgpu_device *adev, int i); void amdgpu_vcn_fwlog_init(struct amdgpu_vcn_inst *vcn); void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev, @@ -523,5 +549,9 @@ int amdgpu_vcn_psp_update_sram(struct amdgpu_device *adev, int inst_idx, int amdgpu_vcn_save_vcpu_bo(struct amdgpu_device *adev); int amdgpu_vcn_sysfs_reset_mask_init(struct amdgpu_device *adev); void amdgpu_vcn_sysfs_reset_mask_fini(struct amdgpu_device *adev); +void amdgpu_debugfs_vcn_sched_mask_init(struct amdgpu_device *adev); + +int vcn_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index c704e9803e11..0bb8cbe0dcc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -614,10 +614,11 @@ static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev) vf2pf_info->decode_usage = 0; vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr; - vf2pf_info->mes_info_addr = (uint64_t)adev->mes.resource_1_gpu_addr; - - if (adev->mes.resource_1) { - vf2pf_info->mes_info_size = adev->mes.resource_1->tbo.base.size; + if (amdgpu_sriov_is_mes_info_enable(adev)) { + vf2pf_info->mes_info_addr = + (uint64_t)(adev->mes.resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE); + vf2pf_info->mes_info_size = + adev->mes.resource_1[0]->tbo.base.size - AMDGPU_GPU_PAGE_SIZE; } vf2pf_info->checksum = amd_sriov_msg_checksum( @@ -739,7 +740,7 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev) } } -void amdgpu_detect_virtualization(struct amdgpu_device *adev) +static u32 amdgpu_virt_init_detect_asic(struct amdgpu_device *adev) { uint32_t reg; @@ -775,8 +776,17 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; } + return reg; +} + +static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg) +{ + bool is_sriov = false; + /* we have the ability to check now */ if (amdgpu_sriov_vf(adev)) { + is_sriov = true; + switch (adev->asic_type) { case CHIP_TONGA: case CHIP_FIJI: @@ -805,10 +815,39 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) amdgpu_virt_request_init_data(adev); break; default: /* other chip doesn't support SRIOV */ + is_sriov = false; DRM_ERROR("Unknown asic type: %d!\n", adev->asic_type); break; } } + + return is_sriov; +} + +static void amdgpu_virt_init_ras(struct amdgpu_device *adev) +{ + ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1); + ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1); + + ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs, + RATELIMIT_MSG_ON_RELEASE); + ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs, + RATELIMIT_MSG_ON_RELEASE); + + mutex_init(&adev->virt.ras.ras_telemetry_mutex); + + adev->virt.ras.cper_rptr = 0; +} + +void amdgpu_virt_init(struct amdgpu_device *adev) +{ + bool is_sriov = false; + uint32_t reg = amdgpu_virt_init_detect_asic(adev); + + is_sriov = amdgpu_virt_init_req_data(adev, reg); + + if (is_sriov) + amdgpu_virt_init_ras(adev); } static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev) @@ -1017,6 +1056,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f void *scratch_reg2; void *scratch_reg3; void *spare_int; + unsigned long flags; if (!adev->gfx.rlc.rlcg_reg_access_supported) { dev_err(adev->dev, @@ -1038,7 +1078,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2; scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3; - mutex_lock(&adev->virt.rlcg_reg_lock); + spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags); if (reg_access_ctrl->spare_int) spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int; @@ -1097,7 +1137,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f ret = readl(scratch_reg0); - mutex_unlock(&adev->virt.rlcg_reg_lock); + spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags); return ret; } @@ -1246,7 +1286,8 @@ amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block bloc case AMDGPU_RAS_BLOCK__MPIO: return RAS_TELEMETRY_GPU_BLOCK_MPIO; default: - dev_err(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n", block); + DRM_WARN_ONCE("Unsupported SRIOV RAS telemetry block 0x%x\n", + block); return RAS_TELEMETRY_GPU_BLOCK_COUNT; } } @@ -1263,12 +1304,10 @@ static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev, if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) return 0; - tmp = kmalloc(used_size, GFP_KERNEL); + tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL); if (!tmp) return -ENOMEM; - memcpy(tmp, &host_telemetry->body.error_count, used_size); - if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0)) goto out; @@ -1288,10 +1327,12 @@ static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bo * will ignore incoming guest messages. Ratelimit the guest messages to * prevent guest self DOS. */ - if (__ratelimit(&adev->virt.ras_telemetry_rs) || force_update) { + if (__ratelimit(&virt->ras.ras_error_cnt_rs) || force_update) { + mutex_lock(&virt->ras.ras_telemetry_mutex); if (!virt->ops->req_ras_err_count(adev)) amdgpu_virt_cache_host_error_counts(adev, - adev->virt.fw_reserve.ras_telemetry); + virt->fw_reserve.ras_telemetry); + mutex_unlock(&virt->ras.ras_telemetry_mutex); } return 0; @@ -1322,6 +1363,98 @@ int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_bl return 0; } +static int +amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev, + struct amdsriov_ras_telemetry *host_telemetry, + u32 *more) +{ + struct amd_sriov_ras_cper_dump *cper_dump = NULL; + struct cper_hdr *entry = NULL; + struct amdgpu_ring *ring = &adev->cper.ring_buf; + uint32_t checksum, used_size, i; + int ret = 0; + + checksum = host_telemetry->header.checksum; + used_size = host_telemetry->header.used_size; + + if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10)) + return 0; + + cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL); + if (!cper_dump) + return -ENOMEM; + + if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) + goto out; + + *more = cper_dump->more; + + if (cper_dump->wptr < adev->virt.ras.cper_rptr) { + dev_warn( + adev->dev, + "guest specified rptr that was too high! guest rptr: 0x%llx, host rptr: 0x%llx\n", + adev->virt.ras.cper_rptr, cper_dump->wptr); + + adev->virt.ras.cper_rptr = cper_dump->wptr; + goto out; + } + + entry = (struct cper_hdr *)&cper_dump->buf[0]; + + for (i = 0; i < cper_dump->count; i++) { + amdgpu_cper_ring_write(ring, entry, entry->record_length); + entry = (struct cper_hdr *)((char *)entry + + entry->record_length); + } + + if (cper_dump->overflow_count) + dev_warn(adev->dev, + "host reported CPER overflow of 0x%llx entries!\n", + cper_dump->overflow_count); + + adev->virt.ras.cper_rptr = cper_dump->wptr; +out: + kfree(cper_dump); + + return ret; +} + +static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev) +{ + struct amdgpu_virt *virt = &adev->virt; + int ret = 0; + uint32_t more = 0; + + if (!amdgpu_sriov_ras_cper_en(adev)) + return -EOPNOTSUPP; + + do { + if (!virt->ops->req_ras_cper_dump(adev, virt->ras.cper_rptr)) + ret = amdgpu_virt_write_cpers_to_ring( + adev, virt->fw_reserve.ras_telemetry, &more); + else + ret = 0; + } while (more); + + return ret; +} + +int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update) +{ + struct amdgpu_virt *virt = &adev->virt; + int ret = 0; + + if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) && + down_read_trylock(&adev->reset_domain->sem)) { + mutex_lock(&virt->ras.ras_telemetry_mutex); + ret = amdgpu_virt_req_ras_cper_dump_internal(adev); + mutex_unlock(&virt->ras.ras_telemetry_mutex); + up_read(&adev->reset_domain->sem); + } + + return ret; +} + int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev) { unsigned long ue_count, ce_count; @@ -1333,3 +1466,17 @@ int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev) return 0; } + +bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, + enum amdgpu_ras_block block) +{ + enum amd_sriov_ras_telemetry_gpu_block sriov_block; + + sriov_block = amdgpu_ras_block_to_sriov(adev, block); + + if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT || + !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block)) + return false; + + return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 5381b8d596e6..df03dba67ab8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -96,6 +96,7 @@ struct amdgpu_virt_ops { enum amdgpu_ras_block block); bool (*rcvd_ras_intr)(struct amdgpu_device *adev); int (*req_ras_err_count)(struct amdgpu_device *adev); + int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr); }; /* @@ -140,6 +141,7 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8), AMDGIM_FEATURE_RAS_CAPS = (1 << 9), AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10), + AMDGIM_FEATURE_RAS_CPER = (1 << 11), }; enum AMDGIM_REG_ACCESS_FLAG { @@ -242,6 +244,13 @@ struct amdgpu_virt_ras_err_handler_data { int last_reserved; }; +struct amdgpu_virt_ras { + struct ratelimit_state ras_error_cnt_rs; + struct ratelimit_state ras_cper_dump_rs; + struct mutex ras_telemetry_mutex; + uint64_t cper_rptr; +}; + /* GPU virtualization */ struct amdgpu_virt { uint32_t caps; @@ -279,12 +288,12 @@ struct amdgpu_virt { /* the ucode id to signal the autoload */ uint32_t autoload_ucode_id; - struct mutex rlcg_reg_lock; + /* Spinlock to protect access to the RLCG register interface */ + spinlock_t rlcg_reg_lock; union amd_sriov_ras_caps ras_en_caps; union amd_sriov_ras_caps ras_telemetry_en_caps; - - struct ratelimit_state ras_telemetry_rs; + struct amdgpu_virt_ras ras; struct amd_sriov_ras_telemetry_error_count count_cache; }; @@ -339,6 +348,9 @@ struct amdgpu_video_codec_info; #define amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \ (amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all & BIT(sriov_blk)) +#define amdgpu_sriov_ras_cper_en(adev) \ +((adev)->virt.gim_feature & AMDGIM_FEATURE_RAS_CPER) + static inline bool is_virtual_machine(void) { #if defined(CONFIG_X86) @@ -352,6 +364,8 @@ static inline bool is_virtual_machine(void) #define amdgpu_sriov_is_pp_one_vf(adev) \ ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF) +#define amdgpu_sriov_multi_vf_mode(adev) \ + (amdgpu_sriov_vf(adev) && !amdgpu_sriov_is_pp_one_vf(adev)) #define amdgpu_sriov_is_debug(adev) \ ((!amdgpu_in_reset(adev)) && adev->virt.tdr_debug) #define amdgpu_sriov_is_normal(adev) \ @@ -377,7 +391,7 @@ void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); void amdgpu_virt_exchange_data(struct amdgpu_device *adev); void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev); -void amdgpu_detect_virtualization(struct amdgpu_device *adev); +void amdgpu_virt_init(struct amdgpu_device *adev); bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); @@ -405,5 +419,8 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 f bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev); int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block, struct ras_err_data *err_data); +int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update); int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev); +bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev, + enum amdgpu_ras_block block); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 8bf28d336807..155bb9891a17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -188,8 +188,8 @@ static int amdgpu_vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc, amdgpu_crtc->connector = NULL; amdgpu_crtc->vsync_timer_enabled = AMDGPU_IRQ_STATE_DISABLE; - hrtimer_init(&amdgpu_crtc->vblank_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - amdgpu_crtc->vblank_timer.function = &amdgpu_vkms_vblank_simulate; + hrtimer_setup(&amdgpu_crtc->vblank_timer, &amdgpu_vkms_vblank_simulate, CLOCK_MONOTONIC, + HRTIMER_MODE_REL); return ret; } @@ -627,18 +627,18 @@ static int amdgpu_vkms_resume(struct amdgpu_ip_block *ip_block) return drm_mode_config_helper_resume(adev_to_drm(ip_block->adev)); } -static bool amdgpu_vkms_is_idle(void *handle) +static bool amdgpu_vkms_is_idle(struct amdgpu_ip_block *ip_block) { return true; } -static int amdgpu_vkms_set_clockgating_state(void *handle, +static int amdgpu_vkms_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int amdgpu_vkms_set_powergating_state(void *handle, +static int amdgpu_vkms_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index c9c48b782ec1..ce52b4d75e94 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -36,6 +36,7 @@ #include <drm/ttm/ttm_tt.h> #include <drm/drm_exec.h> #include "amdgpu.h" +#include "amdgpu_vm.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_gmc.h" @@ -311,6 +312,111 @@ static void amdgpu_vm_bo_reset_state_machine(struct amdgpu_vm *vm) } /** + * amdgpu_vm_update_shared - helper to update shared memory stat + * @base: base structure for tracking BO usage in a VM + * + * Takes the vm status_lock and updates the shared memory stat. If the basic + * stat changed (e.g. buffer was moved) amdgpu_vm_update_stats need to be called + * as well. + */ +static void amdgpu_vm_update_shared(struct amdgpu_vm_bo_base *base) +{ + struct amdgpu_vm *vm = base->vm; + struct amdgpu_bo *bo = base->bo; + uint64_t size = amdgpu_bo_size(bo); + uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); + bool shared; + + spin_lock(&vm->status_lock); + shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); + if (base->shared != shared) { + base->shared = shared; + if (shared) { + vm->stats[bo_memtype].drm.shared += size; + vm->stats[bo_memtype].drm.private -= size; + } else { + vm->stats[bo_memtype].drm.shared -= size; + vm->stats[bo_memtype].drm.private += size; + } + } + spin_unlock(&vm->status_lock); +} + +/** + * amdgpu_vm_bo_update_shared - callback when bo gets shared/unshared + * @bo: amdgpu buffer object + * + * Update the per VM stats for all the vm if needed from private to shared or + * vice versa. + */ +void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo) +{ + struct amdgpu_vm_bo_base *base; + + for (base = bo->vm_bo; base; base = base->next) + amdgpu_vm_update_shared(base); +} + +/** + * amdgpu_vm_update_stats_locked - helper to update normal memory stat + * @base: base structure for tracking BO usage in a VM + * @res: the ttm_resource to use for the purpose of accounting, may or may not + * be bo->tbo.resource + * @sign: if we should add (+1) or subtract (-1) from the stat + * + * Caller need to have the vm status_lock held. Useful for when multiple update + * need to happen at the same time. + */ +static void amdgpu_vm_update_stats_locked(struct amdgpu_vm_bo_base *base, + struct ttm_resource *res, int sign) +{ + struct amdgpu_vm *vm = base->vm; + struct amdgpu_bo *bo = base->bo; + int64_t size = sign * amdgpu_bo_size(bo); + uint32_t bo_memtype = amdgpu_bo_mem_stats_placement(bo); + + /* For drm-total- and drm-shared-, BO are accounted by their preferred + * placement, see also amdgpu_bo_mem_stats_placement. + */ + if (base->shared) + vm->stats[bo_memtype].drm.shared += size; + else + vm->stats[bo_memtype].drm.private += size; + + if (res && res->mem_type < __AMDGPU_PL_NUM) { + uint32_t res_memtype = res->mem_type; + + vm->stats[res_memtype].drm.resident += size; + /* BO only count as purgeable if it is resident, + * since otherwise there's nothing to purge. + */ + if (bo->flags & AMDGPU_GEM_CREATE_DISCARDABLE) + vm->stats[res_memtype].drm.purgeable += size; + if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(res_memtype))) + vm->stats[bo_memtype].evicted += size; + } +} + +/** + * amdgpu_vm_update_stats - helper to update normal memory stat + * @base: base structure for tracking BO usage in a VM + * @res: the ttm_resource to use for the purpose of accounting, may or may not + * be bo->tbo.resource + * @sign: if we should add (+1) or subtract (-1) from the stat + * + * Updates the basic memory stat when bo is added/deleted/moved. + */ +void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base, + struct ttm_resource *res, int sign) +{ + struct amdgpu_vm *vm = base->vm; + + spin_lock(&vm->status_lock); + amdgpu_vm_update_stats_locked(base, res, sign); + spin_unlock(&vm->status_lock); +} + +/** * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm * * @base: base structure for tracking BO usage in a VM @@ -333,6 +439,11 @@ void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, base->next = bo->vm_bo; bo->vm_bo = base; + spin_lock(&vm->status_lock); + base->shared = drm_gem_object_is_shared_for_memory_stats(&bo->tbo.base); + amdgpu_vm_update_stats_locked(base, bo->tbo.resource, +1); + spin_unlock(&vm->status_lock); + if (!amdgpu_vm_is_bo_always_valid(vm, bo)) return; @@ -643,6 +754,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync) { struct amdgpu_device *adev = ring->adev; + struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; unsigned vmhub = ring->vm_hub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; @@ -650,8 +762,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool gds_switch_needed = ring->funcs->emit_gds_switch && job->gds_switch_needed; bool vm_flush_needed = job->vm_needs_flush; - struct dma_fence *fence = NULL; + bool cleaner_shader_needed = false; bool pasid_mapping_needed = false; + struct dma_fence *fence = NULL; unsigned int patch; int r; @@ -674,8 +787,12 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; + cleaner_shader_needed = adev->gfx.enable_cleaner_shader && + ring->funcs->emit_cleaner_shader && job->base.s_fence && + &job->base.s_fence->scheduled == isolation->spearhead; + if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync && - !(job->enforce_isolation && !job->vmid)) + !cleaner_shader_needed) return 0; amdgpu_ring_ib_begin(ring); @@ -686,9 +803,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (need_pipe_sync) amdgpu_ring_emit_pipeline_sync(ring); - if (adev->gfx.enable_cleaner_shader && - ring->funcs->emit_cleaner_shader && - job->enforce_isolation) + if (cleaner_shader_needed) ring->funcs->emit_cleaner_shader(ring); if (vm_flush_needed) { @@ -710,7 +825,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, job->oa_size); } - if (vm_flush_needed || pasid_mapping_needed) { + if (vm_flush_needed || pasid_mapping_needed || cleaner_shader_needed) { r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) return r; @@ -732,6 +847,18 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, id->pasid_mapping = dma_fence_get(fence); mutex_unlock(&id_mgr->lock); } + + /* + * Make sure that all other submissions wait for the cleaner shader to + * finish before we push them to the HW. + */ + if (cleaner_shader_needed) { + trace_amdgpu_cleaner_shader(ring, fence); + mutex_lock(&adev->enforce_isolation_mutex); + dma_fence_put(isolation->spearhead); + isolation->spearhead = dma_fence_get(fence); + mutex_unlock(&adev->enforce_isolation_mutex); + } dma_fence_put(fence); amdgpu_ring_patch_cond_exec(ring, patch); @@ -1083,53 +1210,11 @@ error_free: return r; } -static void amdgpu_vm_bo_get_memory(struct amdgpu_bo_va *bo_va, - struct amdgpu_mem_stats *stats, - unsigned int size) -{ - struct amdgpu_vm *vm = bo_va->base.vm; - struct amdgpu_bo *bo = bo_va->base.bo; - - if (!bo) - return; - - /* - * For now ignore BOs which are currently locked and potentially - * changing their location. - */ - if (!amdgpu_vm_is_bo_always_valid(vm, bo) && - !dma_resv_trylock(bo->tbo.base.resv)) - return; - - amdgpu_bo_get_memory(bo, stats, size); - if (!amdgpu_vm_is_bo_always_valid(vm, bo)) - dma_resv_unlock(bo->tbo.base.resv); -} - void amdgpu_vm_get_memory(struct amdgpu_vm *vm, - struct amdgpu_mem_stats *stats, - unsigned int size) + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]) { - struct amdgpu_bo_va *bo_va, *tmp; - spin_lock(&vm->status_lock); - list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); - - list_for_each_entry_safe(bo_va, tmp, &vm->evicted, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); - - list_for_each_entry_safe(bo_va, tmp, &vm->relocated, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); - - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); - - list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); - - list_for_each_entry_safe(bo_va, tmp, &vm->done, base.vm_status) - amdgpu_vm_bo_get_memory(bo_va, stats, size); + memcpy(stats, vm->stats, sizeof(*stats) * __AMDGPU_PL_NUM); spin_unlock(&vm->status_lock); } @@ -2075,6 +2160,7 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, if (*base != &bo_va->base) continue; + amdgpu_vm_update_stats(*base, bo->tbo.resource, -1); *base = bo_va->base.next; break; } @@ -2143,14 +2229,12 @@ bool amdgpu_vm_evictable(struct amdgpu_bo *bo) /** * amdgpu_vm_bo_invalidate - mark the bo as invalid * - * @adev: amdgpu_device pointer * @bo: amdgpu buffer object * @evicted: is the BO evicted * * Mark @bo as invalid. */ -void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, - struct amdgpu_bo *bo, bool evicted) +void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted) { struct amdgpu_vm_bo_base *bo_base; @@ -2176,6 +2260,32 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, } /** + * amdgpu_vm_bo_move - handle BO move + * + * @bo: amdgpu buffer object + * @new_mem: the new placement of the BO move + * @evicted: is the BO evicted + * + * Update the memory stats for the new placement and mark @bo as invalid. + */ +void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem, + bool evicted) +{ + struct amdgpu_vm_bo_base *bo_base; + + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { + struct amdgpu_vm *vm = bo_base->vm; + + spin_lock(&vm->status_lock); + amdgpu_vm_update_stats_locked(bo_base, bo->tbo.resource, -1); + amdgpu_vm_update_stats_locked(bo_base, new_mem, +1); + spin_unlock(&vm->status_lock); + } + + amdgpu_vm_bo_invalidate(bo, evicted); +} + +/** * amdgpu_vm_get_block_size - calculate VM page table size as power of two * * @vm_size: VM size @@ -2440,8 +2550,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->done); - INIT_LIST_HEAD(&vm->pt_freed); - INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); INIT_KFIFO(vm->faults); r = amdgpu_vm_init_entities(adev, vm); @@ -2580,18 +2688,14 @@ unreserve_bo: return r; } -/** - * amdgpu_vm_release_compute - release a compute vm - * @adev: amdgpu_device pointer - * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute - * - * This is a correspondant of amdgpu_vm_make_compute. It decouples compute - * pasid from vm. Compute should stop use of vm after this call. - */ -void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) +static int amdgpu_vm_stats_is_zero(struct amdgpu_vm *vm) { - amdgpu_vm_set_pasid(adev, vm, 0); - vm->is_compute_context = false; + for (int i = 0; i < __AMDGPU_PL_NUM; ++i) { + if (!(drm_memory_stats_is_zero(&vm->stats[i].drm) && + vm->stats[i].evicted == 0)) + return false; + } + return true; } /** @@ -2613,11 +2717,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); - flush_work(&vm->pt_free_work); - root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); - amdgpu_vm_put_task_info(vm->task_info); amdgpu_vm_set_pasid(adev, vm, 0); dma_fence_wait(vm->last_unlocked, false); dma_fence_put(vm->last_unlocked); @@ -2666,6 +2767,16 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); + + if (!amdgpu_vm_stats_is_zero(vm)) { + struct amdgpu_task_info *ti = vm->task_info; + + dev_warn(adev->dev, + "VM memory stats for proc %s(%d) task %s(%d) is non-zero when fini\n", + ti->process_name, ti->pid, ti->task_name, ti->tgid); + } + + amdgpu_vm_put_task_info(vm->task_info); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 5d119ac26c4f..f3ad687125ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -35,6 +35,7 @@ #include "amdgpu_sync.h" #include "amdgpu_ring.h" #include "amdgpu_ids.h" +#include "amdgpu_ttm.h" struct drm_exec; @@ -202,9 +203,13 @@ struct amdgpu_vm_bo_base { /* protected by bo being reserved */ struct amdgpu_vm_bo_base *next; - /* protected by spinlock */ + /* protected by vm status_lock */ struct list_head vm_status; + /* if the bo is counted as shared in mem stats + * protected by vm status_lock */ + bool shared; + /* protected by the BO being reserved */ bool moved; }; @@ -324,10 +329,7 @@ struct amdgpu_vm_fault_info { struct amdgpu_mem_stats { struct drm_memory_stats drm; - /* buffers that requested this placement */ - uint64_t requested; - /* buffers that requested this placement - * but are currently evicted */ + /* buffers that requested this placement but are currently evicted */ uint64_t evicted; }; @@ -345,6 +347,9 @@ struct amdgpu_vm { /* Lock to protect vm_bo add/del/move on all lists of vm */ spinlock_t status_lock; + /* Memory statistics for this vm, protected by status_lock */ + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]; + /* Per-VM and PT BOs who needs a validation */ struct list_head evicted; @@ -369,10 +374,6 @@ struct amdgpu_vm { /* BOs which are invalidated, has been updated in the PTs */ struct list_head done; - /* PT BOs scheduled to free and fill with zero if vm_resv is not hold */ - struct list_head pt_freed; - struct work_struct pt_free_work; - /* contains the page directory */ struct amdgpu_vm_bo_base root; struct dma_fence *last_update; @@ -488,7 +489,6 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id); int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); -void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec, unsigned int num_fences); @@ -524,8 +524,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); bool amdgpu_vm_evictable(struct amdgpu_bo *bo); -void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, - struct amdgpu_bo *bo, bool evicted); +void amdgpu_vm_bo_invalidate(struct amdgpu_bo *bo, bool evicted); +void amdgpu_vm_update_stats(struct amdgpu_vm_bo_base *base, + struct ttm_resource *new_res, int sign); +void amdgpu_vm_bo_update_shared(struct amdgpu_bo *bo); +void amdgpu_vm_bo_move(struct amdgpu_bo *bo, struct ttm_resource *new_mem, + bool evicted); uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, struct amdgpu_bo *bo); @@ -576,8 +580,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_get_memory(struct amdgpu_vm *vm, - struct amdgpu_mem_stats *stats, - unsigned int size); + struct amdgpu_mem_stats stats[__AMDGPU_PL_NUM]); int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct amdgpu_bo_vm *vmbo, bool immediate); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index f78a0434a48f..30022123b0bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -537,6 +537,7 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) if (!entry->bo) return; + amdgpu_vm_update_stats(entry, entry->bo->tbo.resource, -1); entry->bo->vm_bo = NULL; ttm_bo_set_bulk_move(&entry->bo->tbo, NULL); @@ -546,27 +547,6 @@ static void amdgpu_vm_pt_free(struct amdgpu_vm_bo_base *entry) amdgpu_bo_unref(&entry->bo); } -void amdgpu_vm_pt_free_work(struct work_struct *work) -{ - struct amdgpu_vm_bo_base *entry, *next; - struct amdgpu_vm *vm; - LIST_HEAD(pt_freed); - - vm = container_of(work, struct amdgpu_vm, pt_free_work); - - spin_lock(&vm->status_lock); - list_splice_init(&vm->pt_freed, &pt_freed); - spin_unlock(&vm->status_lock); - - /* flush_work in amdgpu_vm_fini ensure vm->root.bo is valid. */ - amdgpu_bo_reserve(vm->root.bo, true); - - list_for_each_entry_safe(entry, next, &pt_freed, vm_status) - amdgpu_vm_pt_free(entry); - - amdgpu_bo_unreserve(vm->root.bo); -} - /** * amdgpu_vm_pt_free_list - free PD/PT levels * @@ -579,19 +559,15 @@ void amdgpu_vm_pt_free_list(struct amdgpu_device *adev, struct amdgpu_vm_update_params *params) { struct amdgpu_vm_bo_base *entry, *next; - struct amdgpu_vm *vm = params->vm; bool unlocked = params->unlocked; if (list_empty(¶ms->tlb_flush_waitlist)) return; - if (unlocked) { - spin_lock(&vm->status_lock); - list_splice_init(¶ms->tlb_flush_waitlist, &vm->pt_freed); - spin_unlock(&vm->status_lock); - schedule_work(&vm->pt_free_work); - return; - } + /* + * unlocked unmap clear page table leaves, warning to free the page entry. + */ + WARN_ON(unlocked); list_for_each_entry_safe(entry, next, ¶ms->tlb_flush_waitlist, vm_status) amdgpu_vm_pt_free(entry); @@ -899,7 +875,15 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; mask = amdgpu_vm_pt_entries_mask(adev, cursor.level); pe_start = ((cursor.pfn >> shift) & mask) * 8; - entry_end = ((uint64_t)mask + 1) << shift; + + if (cursor.level < AMDGPU_VM_PTB && params->unlocked) + /* + * MMU notifier callback unlocked unmap huge page, leave is PDE entry, + * only clear one entry. Next entry search again for PDE or PTE leave. + */ + entry_end = 1ULL << shift; + else + entry_end = ((uint64_t)mask + 1) << shift; entry_end += cursor.pfn & ~(entry_end - 1); entry_end = min(entry_end, end); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c index 110b120d7375..121ee17b522b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c @@ -236,7 +236,8 @@ int amdgpu_vpe_init_microcode(struct amdgpu_vpe *vpe) int ret; amdgpu_ucode_ip_version_decode(adev, VPE_HWIP, fw_prefix, sizeof(fw_prefix)); - ret = amdgpu_ucode_request(adev, &adev->vpe.fw, "amdgpu/%s.bin", fw_prefix); + ret = amdgpu_ucode_request(adev, &adev->vpe.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s.bin", fw_prefix); if (ret) goto out; @@ -646,16 +647,16 @@ static int vpe_ring_preempt_ib(struct amdgpu_ring *ring) return r; } -static int vpe_set_clockgating_state(void *handle, +static int vpe_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int vpe_set_powergating_state(void *handle, +static int vpe_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; struct amdgpu_vpe *vpe = &adev->vpe; if (!adev->pm.dpm_enabled) @@ -833,7 +834,7 @@ static int vpe_ring_test_ib(struct amdgpu_ring *ring, long timeout) ret = (le32_to_cpu(adev->wb.wb[index]) == test_pattern) ? 0 : -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index ff5e52025266..6da8994e0469 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -28,7 +28,6 @@ #include "amdgpu.h" #include "amdgpu_vm.h" #include "amdgpu_res_cursor.h" -#include "amdgpu_atomfirmware.h" #include "atom.h" #define AMDGPU_MAX_SG_SEGMENT_SIZE (2UL << 30) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index e209b5e101df..23b6f7a4aa4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -427,7 +427,7 @@ void amdgpu_xcp_release_sched(struct amdgpu_device *adev, return; sched = entity->entity.rq->sched; - if (sched->ready) { + if (drm_sched_wqueue_ready(sched)) { ring = to_amdgpu_ring(entity->entity.rq->sched); atomic_dec(&adev->xcp_mgr->xcp[ring->xcp_id].ref_cnt); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 74b4349e345a..6029a799074d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -315,6 +315,7 @@ int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num) switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): xgmi_state_reg_val = xgmi_v6_4_get_link_status(adev, global_link_num); break; default: @@ -818,28 +819,71 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev * num_hops[2:0] = number of hops */ int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, - struct amdgpu_device *peer_adev) + struct amdgpu_device *peer_adev) { struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; uint8_t num_hops_mask = 0x7; int i; + if (!adev->gmc.xgmi.supported) + return 0; + for (i = 0 ; i < top->num_nodes; ++i) if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) return top->nodes[i].num_hops & num_hops_mask; - return -EINVAL; + + dev_err(adev->dev, "Failed to get xgmi hops count for peer %d.\n", + peer_adev->gmc.xgmi.physical_node_id); + + return 0; } -int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, - struct amdgpu_device *peer_adev) +int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev, + enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit, + uint32_t *min_bw, uint32_t *max_bw) { - struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; - int i; + bool peer_mode = bw_mode == AMDGPU_XGMI_BW_MODE_PER_PEER; + int unit_scale = bw_unit == AMDGPU_XGMI_BW_UNIT_MBYTES ? 1000 : 1; + int num_lanes = adev->gmc.xgmi.max_width; + int speed = adev->gmc.xgmi.max_speed; + int num_links = !peer_mode ? 1 : -1; - for (i = 0 ; i < top->num_nodes; ++i) - if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id) - return top->nodes[i].num_links; - return -EINVAL; + if (!(min_bw && max_bw)) + return -EINVAL; + + *min_bw = 0; + *max_bw = 0; + + if (!adev->gmc.xgmi.supported) + return -ENODATA; + + if (peer_mode && !peer_adev) + return -EINVAL; + + if (peer_mode) { + struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info; + int i; + + for (i = 0 ; i < top->num_nodes; ++i) { + if (top->nodes[i].node_id != peer_adev->gmc.xgmi.node_id) + continue; + + num_links = top->nodes[i].num_links; + break; + } + } + + if (num_links == -1) { + dev_err(adev->dev, "Failed to get number of xgmi links for peer %d.\n", + peer_adev->gmc.xgmi.physical_node_id); + } else if (num_links) { + int per_link_bw = (speed * num_lanes * unit_scale)/BITS_PER_BYTE; + + *min_bw = per_link_bw; + *max_bw = num_links * per_link_bw; + } + + return 0; } bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev, @@ -1123,11 +1167,13 @@ static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_ban if (ext_error_code != 0 && ext_error_code != 9) count = 0ULL; + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, count); break; case ACA_SMU_TYPE_CE: count = ext_error_code == 6 ? count : 0ULL; - ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count); + bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, count); break; default: return -EINVAL; @@ -1162,6 +1208,7 @@ static int amdgpu_xgmi_ras_late_init(struct amdgpu_device *adev, struct ras_comm switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__XGMI_WAFL, &xgmi_v6_4_0_aca_info, NULL); if (r) @@ -1221,6 +1268,7 @@ static void amdgpu_xgmi_legacy_reset_ras_error_count(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) pcs_clear_status(adev, xgmi3x16_pcs_err_status_reg_v6_4[i]); @@ -1255,6 +1303,7 @@ static void amdgpu_xgmi_reset_ras_error_count(struct amdgpu_device *adev) { switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): xgmi_v6_4_0_reset_ras_error_count(adev); break; default: @@ -1280,7 +1329,9 @@ static int amdgpu_xgmi_query_pcs_error_status(struct amdgpu_device *adev, if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0) || amdgpu_ip_version(adev, XGMI_HWIP, 0) == - IP_VERSION(6, 4, 0)) { + IP_VERSION(6, 4, 0) || + amdgpu_ip_version(adev, XGMI_HWIP, 0) == + IP_VERSION(6, 4, 1)) { pcs_ras_fields = &xgmi3x16_pcs_ras_fields[0]; field_array_size = ARRAY_SIZE(xgmi3x16_pcs_ras_fields); } else { @@ -1388,6 +1439,7 @@ static void amdgpu_xgmi_legacy_query_ras_error_count(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): /* check xgmi3x16 pcs error */ for (i = 0; i < ARRAY_SIZE(xgmi3x16_pcs_err_status_reg_v6_4); i++) { data = RREG32_PCIE(xgmi3x16_pcs_err_status_reg_v6_4[i]); @@ -1484,6 +1536,7 @@ static void amdgpu_xgmi_query_ras_error_count(struct amdgpu_device *adev, { switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { case IP_VERSION(6, 4, 0): + case IP_VERSION(6, 4, 1): xgmi_v6_4_0_query_ras_error_count(adev, ras_error_status); break; default: @@ -1671,3 +1724,34 @@ int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev, return r; } + +bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev) +{ + return (amdgpu_use_xgmi_p2p && adev != bo_adev && + adev->gmc.xgmi.hive_id && + adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); +} + +void amdgpu_xgmi_early_init(struct amdgpu_device *adev) +{ + if (!adev->gmc.xgmi.supported) + return; + + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 0): + case IP_VERSION(9, 4, 1): + case IP_VERSION(9, 4, 2): + adev->gmc.xgmi.max_speed = XGMI_SPEED_25GT; + adev->gmc.xgmi.max_width = 16; + break; + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): + adev->gmc.xgmi.max_speed = XGMI_SPEED_32GT; + adev->gmc.xgmi.max_width = 16; + break; + default: + break; + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index d1282b4c6348..32dabba4062f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -23,9 +23,14 @@ #define __AMDGPU_XGMI_H__ #include <drm/task_barrier.h> -#include "amdgpu_psp.h" #include "amdgpu_ras.h" +enum amdgpu_xgmi_link_speed { + XGMI_SPEED_16GT = 16, + XGMI_SPEED_25GT = 25, + XGMI_SPEED_32GT = 32 +}; + struct amdgpu_hive_info { struct kobject kobj; uint64_t hive_id; @@ -55,29 +60,63 @@ struct amdgpu_pcs_ras_field { uint32_t pcs_err_shift; }; -extern struct amdgpu_xgmi_ras xgmi_ras; +/** + * Bandwidth range reporting comes in two modes. + * + * PER_LINK - range for any xgmi link + * PER_PEER - range of max of single xgmi link to max of multiple links based on source peer + */ +enum amdgpu_xgmi_bw_mode { + AMDGPU_XGMI_BW_MODE_PER_LINK = 0, + AMDGPU_XGMI_BW_MODE_PER_PEER +}; + +enum amdgpu_xgmi_bw_unit { + AMDGPU_XGMI_BW_UNIT_GBYTES = 0, + AMDGPU_XGMI_BW_UNIT_MBYTES +}; + +struct amdgpu_xgmi_ras { + struct amdgpu_ras_block_object ras_block; +}; +extern struct amdgpu_xgmi_ras xgmi_ras; + +struct amdgpu_xgmi { + /* from psp */ + u64 node_id; + u64 hive_id; + /* fixed per family */ + u64 node_segment_size; + /* physical node (0-3) */ + unsigned physical_node_id; + /* number of nodes (0-4) */ + unsigned num_physical_nodes; + /* gpu list in the same hive */ + struct list_head head; + bool supported; + struct ras_common_if *ras_if; + bool connected_to_cpu; + struct amdgpu_xgmi_ras *ras; + enum amdgpu_xgmi_link_speed max_speed; + uint8_t max_width; +}; + struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); void amdgpu_put_xgmi_hive(struct amdgpu_hive_info *hive); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); int amdgpu_xgmi_add_device(struct amdgpu_device *adev); int amdgpu_xgmi_remove_device(struct amdgpu_device *adev); int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); -int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, - struct amdgpu_device *peer_adev); -int amdgpu_xgmi_get_num_links(struct amdgpu_device *adev, - struct amdgpu_device *peer_adev); +int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); +int amdgpu_xgmi_get_bandwidth(struct amdgpu_device *adev, struct amdgpu_device *peer_adev, + enum amdgpu_xgmi_bw_mode bw_mode, enum amdgpu_xgmi_bw_unit bw_unit, + uint32_t *min_bw, uint32_t *max_bw); bool amdgpu_xgmi_get_is_sharing_enabled(struct amdgpu_device *adev, struct amdgpu_device *peer_adev); uint64_t amdgpu_xgmi_get_relative_phy_addr(struct amdgpu_device *adev, uint64_t addr); -static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev) -{ - return (amdgpu_use_xgmi_p2p && - adev != bo_adev && - adev->gmc.xgmi.hive_id && - adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); -} +bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev); int amdgpu_xgmi_ras_sw_init(struct amdgpu_device *adev); int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev); @@ -87,4 +126,7 @@ int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev, int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num); +void amdgpu_xgmi_early_init(struct amdgpu_device *adev); +uint32_t amdgpu_xgmi_get_max_bandwidth(struct amdgpu_device *adev); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index b4f9c2f4e92c..d6ac2652f0ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -97,11 +97,12 @@ union amd_sriov_msg_feature_flags { uint32_t pp_one_vf_mode : 1; uint32_t reg_indirect_acc : 1; uint32_t av1_support : 1; - uint32_t vcn_rb_decouple : 1; + uint32_t vcn_rb_decouple : 1; uint32_t mes_info_dump_enable : 1; uint32_t ras_caps : 1; uint32_t ras_telemetry : 1; - uint32_t reserved : 21; + uint32_t ras_cper : 1; + uint32_t reserved : 20; } flags; uint32_t all; }; @@ -328,21 +329,25 @@ enum amd_sriov_mailbox_request_message { MB_REQ_MSG_READY_TO_RESET = 201, MB_REQ_MSG_RAS_POISON = 202, MB_REQ_RAS_ERROR_COUNT = 203, + MB_REQ_RAS_CPER_DUMP = 204, }; /* mailbox message send from host to guest */ enum amd_sriov_mailbox_response_message { - MB_RES_MSG_CLR_MSG_BUF = 0, - MB_RES_MSG_READY_TO_ACCESS_GPU = 1, - MB_RES_MSG_FLR_NOTIFICATION, - MB_RES_MSG_FLR_NOTIFICATION_COMPLETION, - MB_RES_MSG_SUCCESS, - MB_RES_MSG_FAIL, - MB_RES_MSG_QUERY_ALIVE, - MB_RES_MSG_GPU_INIT_DATA_READY, - MB_RES_MSG_RAS_ERROR_COUNT_READY = 11, - - MB_RES_MSG_TEXT_MESSAGE = 255 + MB_RES_MSG_CLR_MSG_BUF = 0, + MB_RES_MSG_READY_TO_ACCESS_GPU = 1, + MB_RES_MSG_FLR_NOTIFICATION = 2, + MB_RES_MSG_FLR_NOTIFICATION_COMPLETION = 3, + MB_RES_MSG_SUCCESS = 4, + MB_RES_MSG_FAIL = 5, + MB_RES_MSG_QUERY_ALIVE = 6, + MB_RES_MSG_GPU_INIT_DATA_READY = 7, + MB_RES_MSG_RAS_POISON_READY = 8, + MB_RES_MSG_PF_SOFT_FLR_NOTIFICATION = 9, + MB_RES_MSG_GPU_RMA = 10, + MB_RES_MSG_RAS_ERROR_COUNT_READY = 11, + MB_REQ_RAS_CPER_DUMP_READY = 14, + MB_RES_MSG_TEXT_MESSAGE = 255 }; enum amd_sriov_ras_telemetry_gpu_block { @@ -386,11 +391,20 @@ struct amd_sriov_ras_telemetry_error_count { } block[RAS_TELEMETRY_GPU_BLOCK_COUNT]; }; +struct amd_sriov_ras_cper_dump { + uint32_t more; + uint64_t overflow_count; + uint64_t count; + uint64_t wptr; + uint32_t buf[]; +}; + struct amdsriov_ras_telemetry { struct amd_sriov_ras_telemetry_header header; union { struct amd_sriov_ras_telemetry_error_count error_count; + struct amd_sriov_ras_cper_dump cper_dump; } body; }; diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index e157d6d857b6..3c07517be09a 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -77,7 +77,8 @@ static void aqua_vanjaram_set_xcp_id(struct amdgpu_device *adev, ring->xcp_id = AMDGPU_XCP_NO_PARTITION; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) adev->gfx.enforce_isolation[0].xcp_id = ring->xcp_id; - if (adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + if ((adev->xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) || + (ring->funcs->type == AMDGPU_RING_TYPE_CPER)) return; inst_mask = 1 << inst_idx; @@ -559,8 +560,11 @@ static bool __aqua_vanjaram_is_valid_mode(struct amdgpu_xcp_mgr *xcp_mgr, adev->gmc.num_mem_partitions == 4) && (num_xccs_per_xcp >= 2); case AMDGPU_CPX_PARTITION_MODE: + /* (num_xcc > 1) because 1 XCC is considered SPX, not CPX. + * (num_xcc % adev->gmc.num_mem_partitions) == 0 because + * num_compute_partitions can't be less than num_mem_partitions + */ return ((num_xcc > 1) && - (adev->gmc.num_mem_partitions == 1 || adev->gmc.num_mem_partitions == 4) && (num_xcc % adev->gmc.num_mem_partitions) == 0); default: return false; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c index 622634c08c7b..521b9faab180 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c @@ -430,7 +430,7 @@ void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector, } int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector, - struct drm_display_mode *mode) + const struct drm_display_mode *mode) { struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector); struct amdgpu_connector_atom_dig *dig_connector; diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h index f59d85eaddf0..3e24acf8133f 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.h +++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.h @@ -32,7 +32,7 @@ int amdgpu_atombios_dp_get_panel_mode(struct drm_encoder *encoder, void amdgpu_atombios_dp_set_link_config(struct drm_connector *connector, const struct drm_display_mode *mode); int amdgpu_atombios_dp_mode_valid_helper(struct drm_connector *connector, - struct drm_display_mode *mode); + const struct drm_display_mode *mode); bool amdgpu_atombios_dp_needs_link_train(struct amdgpu_connector *amdgpu_connector); void amdgpu_atombios_dp_set_rx_power_state(struct drm_connector *connector, u8 power_state); diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index e2cb1f080e88..9cd63b4177bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -2148,7 +2148,7 @@ static int cik_common_resume(struct amdgpu_ip_block *ip_block) return cik_common_hw_init(ip_block); } -static bool cik_common_is_idle(void *handle) +static bool cik_common_is_idle(struct amdgpu_ip_block *ip_block) { return true; } @@ -2161,13 +2161,13 @@ static int cik_common_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int cik_common_set_clockgating_state(void *handle, +static int cik_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int cik_common_set_powergating_state(void *handle, +static int cik_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 1da17755ad53..41f4705bdbbd 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -345,9 +345,9 @@ static int cik_ih_resume(struct amdgpu_ip_block *ip_block) return cik_ih_hw_init(ip_block); } -static bool cik_ih_is_idle(void *handle) +static bool cik_ih_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & SRBM_STATUS__IH_BUSY_MASK) @@ -402,13 +402,13 @@ static int cik_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int cik_ih_set_clockgating_state(void *handle, +static int cik_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int cik_ih_set_powergating_state(void *handle, +static int cik_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index ede1a028d48d..508cea965983 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -133,9 +133,11 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; @@ -696,7 +698,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1023,9 +1025,9 @@ static int cik_sdma_resume(struct amdgpu_ip_block *ip_block) return cik_sdma_hw_init(ip_block); } -static bool cik_sdma_is_idle(void *handle) +static bool cik_sdma_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS2); if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK | @@ -1189,11 +1191,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int cik_sdma_set_clockgating_state(void *handle, +static int cik_sdma_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -1204,7 +1206,7 @@ static int cik_sdma_set_clockgating_state(void *handle, return 0; } -static int cik_sdma_set_powergating_state(void *handle, +static int cik_sdma_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h index 06088d52d81c..279288365940 100644 --- a/drivers/gpu/drm/amd/amdgpu/cikd.h +++ b/drivers/gpu/drm/amd/amdgpu/cikd.h @@ -51,6 +51,15 @@ #define HPD4_REGISTER_OFFSET (0x1813 - 0x1807) #define HPD5_REGISTER_OFFSET (0x1816 - 0x1807) +/* audio endpt instance offsets */ +#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780) +#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780) +#define AUD2_REGISTER_OFFSET (0x178c - 0x1780) +#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780) +#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780) +#define AUD5_REGISTER_OFFSET (0x179d - 0x1780) +#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780) + #define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001 #define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003 diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index d72973bd570d..2f891fb846d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -341,9 +341,9 @@ static int cz_ih_resume(struct amdgpu_ip_block *ip_block) return cz_ih_hw_init(ip_block); } -static bool cz_ih_is_idle(void *handle) +static bool cz_ih_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS); if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY)) @@ -398,14 +398,14 @@ static int cz_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int cz_ih_set_clockgating_state(void *handle, +static int cz_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { // TODO return 0; } -static int cz_ih_set_powergating_state(void *handle, +static int cz_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { // TODO diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 5098c50d54c8..df401aded662 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -2687,6 +2687,32 @@ static const struct drm_crtc_helper_funcs dce_v10_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v10_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); + +} + +static const struct drm_plane_helper_funcs dce_v10_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v10_0_panic_flush, +}; + static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2734,6 +2760,7 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v10_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v10_0_drm_primary_plane_helper_funcs); return 0; } @@ -2943,7 +2970,7 @@ static int dce_v10_0_resume(struct amdgpu_ip_block *ip_block) return amdgpu_display_resume_helper(adev); } -static bool dce_v10_0_is_idle(void *handle) +static bool dce_v10_0_is_idle(struct amdgpu_ip_block *ip_block) { return true; } @@ -3302,13 +3329,13 @@ static int dce_v10_0_hpd_irq(struct amdgpu_device *adev, return 0; } -static int dce_v10_0_set_clockgating_state(void *handle, +static int dce_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v10_0_set_powergating_state(void *handle, +static int dce_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index c5680ff4ab9f..80f01c3989cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -2800,6 +2800,32 @@ static const struct drm_crtc_helper_funcs dce_v11_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v11_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); + +} + +static const struct drm_plane_helper_funcs dce_v11_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v11_0_panic_flush, +}; + static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2847,6 +2873,7 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v11_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v11_0_drm_primary_plane_helper_funcs); return 0; } @@ -3081,7 +3108,7 @@ static int dce_v11_0_resume(struct amdgpu_ip_block *ip_block) return amdgpu_display_resume_helper(adev); } -static bool dce_v11_0_is_idle(void *handle) +static bool dce_v11_0_is_idle(struct amdgpu_ip_block *ip_block) { return true; } @@ -3434,13 +3461,13 @@ static int dce_v11_0_hpd_irq(struct amdgpu_device *adev, return 0; } -static int dce_v11_0_set_clockgating_state(void *handle, +static int dce_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v11_0_set_powergating_state(void *handle, +static int dce_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index eb7de9122d99..255c70959343 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -40,18 +40,25 @@ #include "amdgpu_connectors.h" #include "amdgpu_display.h" +#include "dce_v6_0.h" +#include "sid.h" + #include "bif/bif_3_0_d.h" #include "bif/bif_3_0_sh_mask.h" + #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" + #include "gca/gfx_6_0_d.h" #include "gca/gfx_6_0_sh_mask.h" +#include "gca/gfx_7_2_enum.h" + #include "gmc/gmc_6_0_d.h" #include "gmc/gmc_6_0_sh_mask.h" + #include "dce/dce_6_0_d.h" #include "dce/dce_6_0_sh_mask.h" -#include "gca/gfx_7_2_enum.h" -#include "dce_v6_0.h" + #include "si_enums.h" static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev); @@ -59,31 +66,31 @@ static void dce_v6_0_set_irq_funcs(struct amdgpu_device *adev); static const u32 crtc_offsets[6] = { - SI_CRTC0_REGISTER_OFFSET, - SI_CRTC1_REGISTER_OFFSET, - SI_CRTC2_REGISTER_OFFSET, - SI_CRTC3_REGISTER_OFFSET, - SI_CRTC4_REGISTER_OFFSET, - SI_CRTC5_REGISTER_OFFSET + CRTC0_REGISTER_OFFSET, + CRTC1_REGISTER_OFFSET, + CRTC2_REGISTER_OFFSET, + CRTC3_REGISTER_OFFSET, + CRTC4_REGISTER_OFFSET, + CRTC5_REGISTER_OFFSET }; static const u32 hpd_offsets[] = { - mmDC_HPD1_INT_STATUS - mmDC_HPD1_INT_STATUS, - mmDC_HPD2_INT_STATUS - mmDC_HPD1_INT_STATUS, - mmDC_HPD3_INT_STATUS - mmDC_HPD1_INT_STATUS, - mmDC_HPD4_INT_STATUS - mmDC_HPD1_INT_STATUS, - mmDC_HPD5_INT_STATUS - mmDC_HPD1_INT_STATUS, - mmDC_HPD6_INT_STATUS - mmDC_HPD1_INT_STATUS, + HPD0_REGISTER_OFFSET, + HPD1_REGISTER_OFFSET, + HPD2_REGISTER_OFFSET, + HPD3_REGISTER_OFFSET, + HPD4_REGISTER_OFFSET, + HPD5_REGISTER_OFFSET }; static const uint32_t dig_offsets[] = { - SI_CRTC0_REGISTER_OFFSET, - SI_CRTC1_REGISTER_OFFSET, - SI_CRTC2_REGISTER_OFFSET, - SI_CRTC3_REGISTER_OFFSET, - SI_CRTC4_REGISTER_OFFSET, - SI_CRTC5_REGISTER_OFFSET, + CRTC0_REGISTER_OFFSET, + CRTC1_REGISTER_OFFSET, + CRTC2_REGISTER_OFFSET, + CRTC3_REGISTER_OFFSET, + CRTC4_REGISTER_OFFSET, + CRTC5_REGISTER_OFFSET, (0x13830 - 0x7030) >> 2, }; @@ -206,9 +213,9 @@ static void dce_v6_0_page_flip(struct amdgpu_device *adev, /* update the scanout addresses */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset, upper_32_bits(crtc_base)); + /* writing to the low address triggers the update */ WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset, (u32)crtc_base); - /* post the write */ RREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + amdgpu_crtc->crtc_offset); } @@ -218,11 +225,11 @@ static int dce_v6_0_crtc_get_scanoutpos(struct amdgpu_device *adev, int crtc, { if ((crtc < 0) || (crtc >= adev->mode_info.num_crtc)) return -EINVAL; + *vbl = RREG32(mmCRTC_V_BLANK_START_END + crtc_offsets[crtc]); *position = RREG32(mmCRTC_STATUS_POSITION + crtc_offsets[crtc]); return 0; - } /** @@ -242,7 +249,8 @@ static bool dce_v6_0_hpd_sense(struct amdgpu_device *adev, if (hpd >= adev->mode_info.num_hpd) return connected; - if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) & DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK) + if (RREG32(mmDC_HPD1_INT_STATUS + hpd_offsets[hpd]) & + DC_HPD1_INT_STATUS__DC_HPD1_SENSE_MASK) connected = true; return connected; @@ -370,13 +378,41 @@ static u32 dce_v6_0_hpd_get_gpio_reg(struct amdgpu_device *adev) return mmDC_GPIO_HPD_A; } +static bool dce_v6_0_is_display_hung(struct amdgpu_device *adev) +{ + u32 crtc_hung = 0; + u32 crtc_status[6]; + u32 i, j, tmp; + + for (i = 0; i < adev->mode_info.num_crtc; i++) { + if (RREG32(mmCRTC_CONTROL + crtc_offsets[i]) & CRTC_CONTROL__CRTC_MASTER_EN_MASK) { + crtc_status[i] = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]); + crtc_hung |= (1 << i); + } + } + + for (j = 0; j < 10; j++) { + for (i = 0; i < adev->mode_info.num_crtc; i++) { + if (crtc_hung & (1 << i)) { + tmp = RREG32(mmCRTC_STATUS_HV_COUNT + crtc_offsets[i]); + if (tmp != crtc_status[i]) + crtc_hung &= ~(1 << i); + } + } + if (crtc_hung == 0) + return false; + udelay(100); + } + + return true; +} + static void dce_v6_0_set_vga_render_state(struct amdgpu_device *adev, bool render) { if (!render) WREG32(mmVGA_RENDER_CONTROL, RREG32(mmVGA_RENDER_CONTROL) & VGA_VSTATUS_CNTL); - } static int dce_v6_0_get_num_crtc(struct amdgpu_device *adev) @@ -419,7 +455,6 @@ void dce_v6_0_disable_dce(struct amdgpu_device *adev) static void dce_v6_0_program_fmt(struct drm_encoder *encoder) { - struct drm_device *dev = encoder->dev; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder); @@ -895,8 +930,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, wm_high.dram_channels = dram_channels; wm_high.num_heads = num_heads; - if (adev->pm.dpm_enabled) { /* watermark for low clocks */ + if (adev->pm.dpm_enabled) { wm_low.yclk = amdgpu_dpm_get_mclk(adev, true) * 10; wm_low.sclk = @@ -1006,6 +1041,20 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev, } /* watermark setup */ +/** + * dce_v6_0_line_buffer_adjust - Set up the line buffer + * + * @adev: amdgpu_device pointer + * @amdgpu_crtc: the selected display controller + * @mode: the current display mode on the selected display + * controller + * @other_mode: the display mode of another display controller + * that may be sharing the line buffer + * + * Setup up the line buffer allocation for + * the selected display controller (CIK). + * Returns the line buffer size in pixels. + */ static u32 dce_v6_0_line_buffer_adjust(struct amdgpu_device *adev, struct amdgpu_crtc *amdgpu_crtc, struct drm_display_mode *mode, @@ -1347,13 +1396,13 @@ static void dce_v6_0_audio_enable(struct amdgpu_device *adev, static const u32 pin_offsets[7] = { - (0x1780 - 0x1780), - (0x1786 - 0x1780), - (0x178c - 0x1780), - (0x1792 - 0x1780), - (0x1798 - 0x1780), - (0x179d - 0x1780), - (0x17a4 - 0x1780), + AUD0_REGISTER_OFFSET, + AUD1_REGISTER_OFFSET, + AUD2_REGISTER_OFFSET, + AUD3_REGISTER_OFFSET, + AUD4_REGISTER_OFFSET, + AUD5_REGISTER_OFFSET, + AUD6_REGISTER_OFFSET, }; static int dce_v6_0_audio_init(struct amdgpu_device *adev) @@ -1386,6 +1435,8 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev) adev->mode_info.audio.pin[i].connected = false; adev->mode_info.audio.pin[i].offset = pin_offsets[i]; adev->mode_info.audio.pin[i].id = i; + /* disable audio. it will be set up later */ + /* XXX remove once we switch to ip funcs */ dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); } @@ -2602,6 +2653,32 @@ static const struct drm_crtc_helper_funcs dce_v6_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v6_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_ARRAY_MODE(0x7); + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); + +} + +static const struct drm_plane_helper_funcs dce_v6_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v6_0_panic_flush, +}; + static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2629,6 +2706,7 @@ static int dce_v6_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v6_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v6_0_drm_primary_plane_helper_funcs); return 0; } @@ -2838,14 +2916,35 @@ static int dce_v6_0_resume(struct amdgpu_ip_block *ip_block) return amdgpu_display_resume_helper(adev); } -static bool dce_v6_0_is_idle(void *handle) +static bool dce_v6_0_is_idle(struct amdgpu_ip_block *ip_block) { return true; } static int dce_v6_0_soft_reset(struct amdgpu_ip_block *ip_block) { - DRM_INFO("xxxx: dce_v6_0_soft_reset --- no impl!!\n"); + u32 srbm_soft_reset = 0, tmp; + struct amdgpu_device *adev = ip_block->adev; + + if (dce_v6_0_is_display_hung(adev)) + srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK; + + if (srbm_soft_reset) { + tmp = RREG32(mmSRBM_SOFT_RESET); + tmp |= srbm_soft_reset; + dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp); + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); + + udelay(50); + + tmp &= ~srbm_soft_reset; + WREG32(mmSRBM_SOFT_RESET, tmp); + tmp = RREG32(mmSRBM_SOFT_RESET); + + /* Wait a little for things to settle down */ + udelay(50); + } return 0; } @@ -2862,22 +2961,22 @@ static void dce_v6_0_set_crtc_vblank_interrupt_state(struct amdgpu_device *adev, switch (crtc) { case 0: - reg_block = SI_CRTC0_REGISTER_OFFSET; + reg_block = CRTC0_REGISTER_OFFSET; break; case 1: - reg_block = SI_CRTC1_REGISTER_OFFSET; + reg_block = CRTC1_REGISTER_OFFSET; break; case 2: - reg_block = SI_CRTC2_REGISTER_OFFSET; + reg_block = CRTC2_REGISTER_OFFSET; break; case 3: - reg_block = SI_CRTC3_REGISTER_OFFSET; + reg_block = CRTC3_REGISTER_OFFSET; break; case 4: - reg_block = SI_CRTC4_REGISTER_OFFSET; + reg_block = CRTC4_REGISTER_OFFSET; break; case 5: - reg_block = SI_CRTC5_REGISTER_OFFSET; + reg_block = CRTC5_REGISTER_OFFSET; break; default: DRM_DEBUG("invalid crtc %d\n", crtc); @@ -3121,16 +3220,15 @@ static int dce_v6_0_hpd_irq(struct amdgpu_device *adev, } return 0; - } -static int dce_v6_0_set_clockgating_state(void *handle, +static int dce_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v6_0_set_powergating_state(void *handle, +static int dce_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; @@ -3254,8 +3352,7 @@ static void dce_v6_0_ext_commit(struct drm_encoder *encoder) } -static void -dce_v6_0_ext_mode_set(struct drm_encoder *encoder, +static void dce_v6_0_ext_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode, struct drm_display_mode *adjusted_mode) { @@ -3267,8 +3364,7 @@ static void dce_v6_0_ext_disable(struct drm_encoder *encoder) } -static void -dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode) +static void dce_v6_0_ext_dpms(struct drm_encoder *encoder, int mode) { } @@ -3339,7 +3435,6 @@ static void dce_v6_0_encoder_add(struct amdgpu_device *adev, amdgpu_encoder->devices |= supported_device; return; } - } /* add a new one */ diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 04b79ff87f75..07358546581f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1395,13 +1395,13 @@ static void dce_v8_0_audio_enable(struct amdgpu_device *adev, } static const u32 pin_offsets[7] = { - (0x1780 - 0x1780), - (0x1786 - 0x1780), - (0x178c - 0x1780), - (0x1792 - 0x1780), - (0x1798 - 0x1780), - (0x179d - 0x1780), - (0x17a4 - 0x1780), + AUD0_REGISTER_OFFSET, + AUD1_REGISTER_OFFSET, + AUD2_REGISTER_OFFSET, + AUD3_REGISTER_OFFSET, + AUD4_REGISTER_OFFSET, + AUD5_REGISTER_OFFSET, + AUD6_REGISTER_OFFSET, }; static int dce_v8_0_audio_init(struct amdgpu_device *adev) @@ -2613,6 +2613,31 @@ static const struct drm_crtc_helper_funcs dce_v8_0_crtc_helper_funcs = { .get_scanout_position = amdgpu_crtc_get_scanout_position, }; +static void dce_v8_0_panic_flush(struct drm_plane *plane) +{ + struct drm_framebuffer *fb; + struct amdgpu_crtc *amdgpu_crtc; + struct amdgpu_device *adev; + uint32_t fb_format; + + if (!plane->fb) + return; + + fb = plane->fb; + amdgpu_crtc = to_amdgpu_crtc(plane->crtc); + adev = drm_to_adev(fb->dev); + + /* Disable DC tiling */ + fb_format = RREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset); + fb_format &= ~GRPH_CONTROL__GRPH_ARRAY_MODE_MASK; + WREG32(mmGRPH_CONTROL + amdgpu_crtc->crtc_offset, fb_format); +} + +static const struct drm_plane_helper_funcs dce_v8_0_drm_primary_plane_helper_funcs = { + .get_scanout_buffer = amdgpu_display_get_scanout_buffer, + .panic_flush = dce_v8_0_panic_flush, +}; + static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) { struct amdgpu_crtc *amdgpu_crtc; @@ -2640,6 +2665,7 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index) amdgpu_crtc->encoder = NULL; amdgpu_crtc->connector = NULL; drm_crtc_helper_add(&amdgpu_crtc->base, &dce_v8_0_crtc_helper_funcs); + drm_plane_helper_add(amdgpu_crtc->base.primary, &dce_v8_0_drm_primary_plane_helper_funcs); return 0; } @@ -2861,7 +2887,7 @@ static int dce_v8_0_resume(struct amdgpu_ip_block *ip_block) return amdgpu_display_resume_helper(adev); } -static bool dce_v8_0_is_idle(void *handle) +static bool dce_v8_0_is_idle(struct amdgpu_ip_block *ip_block) { return true; } @@ -3212,13 +3238,13 @@ static int dce_v8_0_hpd_irq(struct amdgpu_device *adev, } -static int dce_v8_0_set_clockgating_state(void *handle, +static int dce_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int dce_v8_0_set_powergating_state(void *handle, +static int dce_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index 24dce803a829..6d514efb0a6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -40,11 +40,11 @@ #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" -#include "soc15d.h" #include "soc15_common.h" #include "clearstate_gfx10.h" #include "v10_structs.h" #include "gfx_v10_0.h" +#include "gfx_v10_0_cleaner_shader.h" #include "nbio_v2_3.h" /* @@ -3673,7 +3673,7 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring, static void gfx_v10_0_update_spm_vmid_internal(struct amdgpu_device *adev, unsigned int vmid); -static int gfx_v10_0_set_powergating_state(void *handle, +static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask) { @@ -3789,12 +3789,65 @@ static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring, gfx_v10_0_ring_invalidate_tlbs(kiq_ring, pasid, flush_type, all_hub, 1); } +static void gfx_v10_0_kiq_reset_hw_queue(struct amdgpu_ring *kiq_ring, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, uint32_t queue_id, + uint32_t xcc_id, uint32_t vmid) +{ + struct amdgpu_device *adev = kiq_ring->adev; + unsigned i; + uint32_t tmp; + + /* enter save mode */ + amdgpu_gfx_rlc_enter_safe_mode(adev, xcc_id); + mutex_lock(&adev->srbm_mutex); + nv_grbm_select(adev, me_id, pipe_id, queue_id, 0); + + if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + WREG32_SOC15(GC, 0, mmCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, mmSPI_COMPUTE_QUEUE_RESET, 0x1); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "fail to wait on hqd deactive\n"); + } else if (queue_type == AMDGPU_RING_TYPE_GFX) { + WREG32_SOC15(GC, 0, mmGRBM_GFX_INDEX, + (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT)); + tmp = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + if (pipe_id == 0) + tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id); + else + tmp = REG_SET_FIELD(tmp, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id); + WREG32_SOC15(GC, 0, mmCP_VMID_RESET, tmp); + + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, mmCP_GFX_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) + dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n"); + } else { + dev_err(adev->dev, "reset queue_type(%d) not supported\n", queue_type); + } + + nv_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + /* exit safe mode */ + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); +} + static const struct kiq_pm4_funcs gfx_v10_0_kiq_pm4_funcs = { .kiq_set_resources = gfx10_kiq_set_resources, .kiq_map_queues = gfx10_kiq_map_queues, .kiq_unmap_queues = gfx10_kiq_unmap_queues, .kiq_query_status = gfx10_kiq_query_status, .kiq_invalidate_tlbs = gfx10_kiq_invalidate_tlbs, + .kiq_reset_hw_queue = gfx_v10_0_kiq_reset_hw_queue, .set_resources_size = 8, .map_queues_size = 7, .unmap_queues_size = 6, @@ -4036,7 +4089,7 @@ static int gfx_v10_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -4138,18 +4191,21 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me%s.bin", ucode_prefix, wks); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce%s.bin", ucode_prefix, wks); if (err) goto out; @@ -4173,6 +4229,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec%s.bin", ucode_prefix, wks); if (err) goto out; @@ -4180,6 +4237,7 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1_JT); err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2%s.bin", ucode_prefix, wks); if (!err) { amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); @@ -4695,6 +4753,8 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(10, 1, 10): case IP_VERSION(10, 1, 1): @@ -4733,6 +4793,39 @@ static int gfx_v10_0_sw_init(struct amdgpu_ip_block *ip_block) break; } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(10, 1, 10): + case IP_VERSION(10, 1, 1): + case IP_VERSION(10, 1, 2): + adev->gfx.cleaner_shader_ptr = gfx_10_1_10_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_1_10_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 101 && + adev->gfx.pfp_fw_version >= 158 && + adev->gfx.mec_fw_version >= 152) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; + case IP_VERSION(10, 3, 0): + case IP_VERSION(10, 3, 2): + case IP_VERSION(10, 3, 4): + case IP_VERSION(10, 3, 5): + adev->gfx.cleaner_shader_ptr = gfx_10_3_0_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_10_3_0_cleaner_shader_hex); + if (adev->gfx.me_fw_version >= 64 && + adev->gfx.pfp_fw_version >= 100 && + adev->gfx.mec_fw_version >= 122) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; @@ -5952,7 +6045,7 @@ static int gfx_v10_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) else WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); - if (adev->job_hang && !enable) + if (amdgpu_in_reset(adev) && !enable) return 0; for (i = 0; i < adev->usec_timeout; i++) { @@ -6599,17 +6692,13 @@ static void gfx_v10_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS_Sienna_Cichlid, tmp | 0x80); break; default: tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80); break; } } @@ -7448,6 +7537,8 @@ static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + cancel_delayed_work_sync(&adev->gfx.idle_work); + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); @@ -7457,7 +7548,7 @@ static int gfx_v10_0_hw_fini(struct amdgpu_ip_block *ip_block) * otherwise the gfxoff disallowing will be failed to set. */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 3, 1)) - gfx_v10_0_set_powergating_state(ip_block->adev, AMD_PG_STATE_UNGATE); + gfx_v10_0_set_powergating_state(ip_block, AMD_PG_STATE_UNGATE); if (!adev->no_hw_access) { if (amdgpu_async_gfx_ring) { @@ -7492,9 +7583,9 @@ static int gfx_v10_0_resume(struct amdgpu_ip_block *ip_block) return gfx_v10_0_hw_init(ip_block); } -static bool gfx_v10_0_is_idle(void *handle) +static bool gfx_v10_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) @@ -8345,10 +8436,10 @@ static const struct amdgpu_rlc_funcs gfx_v10_0_rlc_funcs_sriov = { .is_rlcg_access_range = gfx_v10_0_is_rlcg_access_range, }; -static int gfx_v10_0_set_powergating_state(void *handle, +static int gfx_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -8383,10 +8474,10 @@ static int gfx_v10_0_set_powergating_state(void *handle, return 0; } -static int gfx_v10_0_set_clockgating_state(void *handle, +static int gfx_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -8412,9 +8503,9 @@ static int gfx_v10_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v10_0_get_clockgating_state(void *handle, u64 *flags) +static void gfx_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; /* AMD_CG_SUPPORT_GFX_FGCG */ @@ -9729,6 +9820,20 @@ static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ } +static void gfx_v10_0_ring_begin_use(struct amdgpu_ring *ring) +{ + amdgpu_gfx_profile_ring_begin_use(ring); + + amdgpu_gfx_enforce_isolation_ring_begin_use(ring); +} + +static void gfx_v10_0_ring_end_use(struct amdgpu_ring *ring) +{ + amdgpu_gfx_profile_ring_end_use(ring); + + amdgpu_gfx_enforce_isolation_ring_end_use(ring); +} + static const struct amd_ip_funcs gfx_v10_0_ip_funcs = { .name = "gfx_v10_0", .early_init = gfx_v10_0_early_init, @@ -9804,8 +9909,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = { .emit_mem_sync = gfx_v10_0_emit_mem_sync, .reset = gfx_v10_0_reset_kgq, .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader, - .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, - .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, + .begin_use = gfx_v10_0_ring_begin_use, + .end_use = gfx_v10_0_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { @@ -9845,8 +9950,8 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = { .emit_mem_sync = gfx_v10_0_emit_mem_sync, .reset = gfx_v10_0_reset_kcq, .emit_cleaner_shader = gfx_v10_0_ring_emit_cleaner_shader, - .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, - .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, + .begin_use = gfx_v10_0_ring_begin_use, + .end_use = gfx_v10_0_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h new file mode 100644 index 000000000000..5255378af53c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* Define the cleaner shader gfx_10_1_10 */ +static const u32 gfx_10_1_10_cleaner_shader_hex[] = { + 0xb0804004, 0xbf8a0000, + 0xbf068100, 0xbf840023, + 0xbe8203b8, 0xbefc0380, + 0x7e008480, 0x7e028480, + 0x7e048480, 0x7e068480, + 0x7e088480, 0x7e0a8480, + 0x7e0c8480, 0x7e0e8480, + 0xbefc0302, 0x80828802, + 0xbf84fff5, 0xbe8203ff, + 0x80000000, 0x87020102, + 0xbf840012, 0xbefe03c1, + 0xbeff03c1, 0xd7650001, + 0x0001007f, 0xd7660001, + 0x0002027e, 0x16020288, + 0xbe8203bf, 0xbefc03c1, + 0xd9382000, 0x00020201, + 0xd9386040, 0x00040401, + 0xd70f6a01, 0x000202ff, + 0x00000400, 0x80828102, + 0xbf84fff7, 0xbefc03ff, + 0x00000068, 0xbe803080, + 0xbe813080, 0xbe823080, + 0xbe833080, 0x80fc847c, + 0xbf84fffa, 0xbeea0480, + 0xbeec0480, 0xbeee0480, + 0xbef00480, 0xbef20480, + 0xbef40480, 0xbef60480, + 0xbef80480, 0xbefa0480, + 0xbf810000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, +}; + +/* Define the cleaner shader gfx_10_3_0 */ +static const u32 gfx_10_3_0_cleaner_shader_hex[] = { + 0xb0804004, 0xbf8a0000, + 0xbe8203b8, 0xbefc0380, + 0x7e008480, 0x7e028480, + 0x7e048480, 0x7e068480, + 0x7e088480, 0x7e0a8480, + 0x7e0c8480, 0x7e0e8480, + 0xbefc0302, 0x80828802, + 0xbf84fff5, 0xbe8203ff, + 0x80000000, 0x87020002, + 0xbf840012, 0xbefe03c1, + 0xbeff03c1, 0xd7650001, + 0x0001007f, 0xd7660001, + 0x0002027e, 0x16020288, + 0xbe8203bf, 0xbefc03c1, + 0xd9382000, 0x00020201, + 0xd9386040, 0x00040401, + 0xd70f6a01, 0x000202ff, + 0x00000400, 0x80828102, + 0xbf84fff7, 0xbefc03ff, + 0x00000068, 0xbe803080, + 0xbe813080, 0xbe823080, + 0xbe833080, 0x80fc847c, + 0xbf84fffa, 0xbeea0480, + 0xbeec0480, 0xbeee0480, + 0xbef00480, 0xbef20480, + 0xbef40480, 0xbef60480, + 0xbef80480, 0xbefa0480, + 0xbf810000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, + 0xbf9f0000, 0xbf9f0000, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm new file mode 100644 index 000000000000..9ba3359253c9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm @@ -0,0 +1,126 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 256 Dwords cleaner shader. + +// GFX10.1 : Clear SGPRs, VGPRs and LDS +// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot +// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD +// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS) +// It takes 2 workgroups to use all of LDS: one on each CU of the WGP +// Each wave clears SGPRs 0 - 107 +// Each wave clears VGPRs 0 - 63 +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. + + +shader main + asic(GFX10.1) + type(CS) + wave_size(32) +// Note: original source code from SQ team + +// +// Create 32 waves in a threadgroup (CS waves) +// Each allocates 64 VGPRs +// The workgroup allocates all of LDS (64kbytes) +// +// Takes about 2500 clocks to run. +// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks) +// + S_BARRIER + s_cmp_eq_u32 s0, 1 // Bit0 is set, sgpr0 is set then clear VGPRS and LDS as FW set COMPUTE_USER_DATA_0 + s_cbranch_scc0 label_0023 // Clean VGPRs and LDS if sgpr0 of wave is set, scc = (s0 == 1) + + s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance) + s_mov_b32 m0, 0 + // + // CLEAR VGPRs + // +label_0005: + v_movreld_b32 v0, 0 + v_movreld_b32 v1, 0 + v_movreld_b32 v2, 0 + v_movreld_b32 v3, 0 + v_movreld_b32 v4, 0 + v_movreld_b32 v5, 0 + v_movreld_b32 v6, 0 + v_movreld_b32 v7, 0 + s_mov_b32 m0, s2 + s_sub_u32 s2, s2, 8 + s_cbranch_scc0 label_0005 + // + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iterations + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + + // + // CLEAR SGPRs + // +label_0023: + s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc + s_mov_b64 vcc, 0 //clear vcc + //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_lo, 0 //clear flat scratch lo SGPR + //s_setreg_imm32_b32 hw_reg_shader_flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 + + s_endpgm + +end + + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm new file mode 100644 index 000000000000..0e1c246166c0 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_3_0_cleaner_shader.asm @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright 2025 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +// This shader is to clean LDS, SGPRs and VGPRs. It is first 64 Dwords or 256 bytes of 192 Dwords cleaner shader. +//To turn this shader program on for complitaion change this to main and lower shader main to main_1 + +// GFX10.3 : Clear SGPRs, VGPRs and LDS +// Launch 32 waves per CU (16 per SIMD) as a workgroup (threadgroup) to fill every wave slot +// Waves are "wave32" and have 64 VGPRs each, which uses all 1024 VGPRs per SIMD +// Waves are launched in "CU" mode, and the workgroup shares 64KB of LDS (half of the WGP's LDS) +// It takes 2 workgroups to use all of LDS: one on each CU of the WGP +// Each wave clears SGPRs 0 - 107 +// Each wave clears VGPRs 0 - 63 +// The first wave of the workgroup clears its 64KB of LDS +// The shader starts with "S_BARRIER" to ensure SPI has launched all waves of the workgroup +// before any wave in the workgroup could end. Without this, it is possible not all SGPRs get cleared. + + +shader main + asic(GFX10) + type(CS) + wave_size(32) +// Note: original source code from SQ team + +// +// Create 32 waves in a threadgroup (CS waves) +// Each allocates 64 VGPRs +// The workgroup allocates all of LDS (64kbytes) +// +// Takes about 2500 clocks to run. +// (theorhetical fastest = 1024clks vgpr + 640lds = 1660 clks) +// + S_BARRIER + s_mov_b32 s2, 0x00000038 // Loop 64/8=8 times (loop unrolled for performance) + s_mov_b32 m0, 0 + // + // CLEAR VGPRs + // +label_0005: + v_movreld_b32 v0, 0 + v_movreld_b32 v1, 0 + v_movreld_b32 v2, 0 + v_movreld_b32 v3, 0 + v_movreld_b32 v4, 0 + v_movreld_b32 v5, 0 + v_movreld_b32 v6, 0 + v_movreld_b32 v7, 0 + s_mov_b32 m0, s2 + s_sub_u32 s2, s2, 8 + s_cbranch_scc0 label_0005 + // + s_mov_b32 s2, 0x80000000 // Bit31 is first_wave + s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set + s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup + // CLEAR LDS + // + s_mov_b32 exec_lo, 0xffffffff + s_mov_b32 exec_hi, 0xffffffff + v_mbcnt_lo_u32_b32 v1, exec_hi, 0 // Set V1 to thread-ID (0..63) + v_mbcnt_hi_u32_b32 v1, exec_lo, v1 // Set V1 to thread-ID (0..63) + v_mul_u32_u24 v1, 0x00000008, v1 // * 8, so each thread is a double-dword address (8byte) + s_mov_b32 s2, 0x00000003f // 64 loop iterations + s_mov_b32 m0, 0xffffffff + // Clear all of LDS space + // Each FirstWave of WorkGroup clears 64kbyte block + +label_001F: + ds_write2_b64 v1, v[2:3], v[2:3] offset1:32 + ds_write2_b64 v1, v[4:5], v[4:5] offset0:64 offset1:96 + v_add_co_u32 v1, vcc, 0x00000400, v1 + s_sub_u32 s2, s2, 1 + s_cbranch_scc0 label_001F + + // + // CLEAR SGPRs + // +label_0023: + s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance) +label_sgpr_loop: + s_movreld_b32 s0, 0 + s_movreld_b32 s1, 0 + s_movreld_b32 s2, 0 + s_movreld_b32 s3, 0 + s_sub_u32 m0, m0, 4 + s_cbranch_scc0 label_sgpr_loop + + //clear vcc + s_mov_b32 flat_scratch_lo, 0 //clear flat scratch lo SGPR + s_mov_b32 flat_scratch_hi, 0 //clear flat scratch hi SGPR + s_mov_b64 vcc, 0 //clear vcc + s_mov_b64 ttmp0, 0 //Clear ttmp0 and ttmp1 + s_mov_b64 ttmp2, 0 //Clear ttmp2 and ttmp3 + s_mov_b64 ttmp4, 0 //Clear ttmp4 and ttmp5 + s_mov_b64 ttmp6, 0 //Clear ttmp6 and ttmp7 + s_mov_b64 ttmp8, 0 //Clear ttmp8 and ttmp9 + s_mov_b64 ttmp10, 0 //Clear ttmp10 and ttmp11 + s_mov_b64 ttmp12, 0 //Clear ttmp12 and ttmp13 + s_mov_b64 ttmp14, 0 //Clear ttmp14 and ttmp15 + + s_endpgm + +end + + diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 2ae058a224f4..d8772cd6db63 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -29,7 +29,6 @@ #include "amdgpu_gfx.h" #include "amdgpu_psp.h" #include "amdgpu_smu.h" -#include "amdgpu_atomfirmware.h" #include "imu_v11_0.h" #include "soc21.h" #include "nvd.h" @@ -42,7 +41,6 @@ #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" #include "soc15.h" -#include "soc15d.h" #include "clearstate_gfx11.h" #include "v11_structs.h" #include "gfx_v11_0.h" @@ -64,6 +62,23 @@ #define regPC_CONFIG_CNTL_1 0x194d #define regPC_CONFIG_CNTL_1_BASE_IDX 1 +#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 +#define regCP_GFX_HQD_CNTL_DEFAULT 0x00a00000 +#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 + +#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 +#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 +#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 + MODULE_FIRMWARE("amdgpu/gc_11_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_0_mec.bin"); @@ -98,6 +113,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_2_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_2_me.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_2_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_2_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_rlc.bin"); static const struct amdgpu_hwip_reg_entry gc_reg_list_11_0[] = { SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), @@ -615,7 +634,7 @@ static int gfx_v11_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: if (!ring->is_mes_queue) - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: if (!ring->is_mes_queue) @@ -639,6 +658,7 @@ static int gfx_v11_0_init_toc_microcode(struct amdgpu_device *adev, const char * int err = 0; err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_toc.bin", ucode_prefix); if (err) goto out; @@ -688,6 +708,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; @@ -705,6 +726,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", ucode_prefix); if (err) goto out; @@ -720,9 +742,11 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 0) && adev->pdev->revision == 0xCE) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/gc_11_0_0_rlc_1.bin"); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; @@ -735,6 +759,7 @@ static int gfx_v11_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", ucode_prefix); if (err) goto out; @@ -1081,6 +1106,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): adev->gfx.config.max_hw_contexts = 8; adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1546,6 +1572,8 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(11, 0, 0): case IP_VERSION(11, 0, 2): @@ -1562,6 +1590,7 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): adev->gfx.me.num_me = 1; adev->gfx.me.num_pipe_per_me = 1; adev->gfx.me.num_queue_per_pipe = 1; @@ -1597,6 +1626,20 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block) } } break; + case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 1): + adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; + adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); + if (adev->gfx.mec_fw_version >= 26 && + adev->mes.fw_version[0] >= 114) { + adev->gfx.enable_cleaner_shader = true; + r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); + if (r) { + adev->gfx.enable_cleaner_shader = false; + dev_err(adev->dev, "Failed to initialize cleaner shader\n"); + } + } + break; default: adev->gfx.enable_cleaner_shader = false; break; @@ -1885,6 +1928,7 @@ static u32 gfx_v11_0_get_rb_active_bitmap(struct amdgpu_device *adev) static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) { + u32 rb_bitmap_per_sa; u32 rb_bitmap_width_per_sa; u32 max_sa; u32 active_sa_bitmap; @@ -1902,9 +1946,11 @@ static void gfx_v11_0_setup_rb(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / adev->gfx.config.max_sh_per_se; + rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); + for (i = 0; i < max_sa; i++) { if (active_sa_bitmap & (1 << i)) - active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); + active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); } active_rb_bitmap &= global_active_rb_bitmap; @@ -2917,7 +2963,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev) IP_VERSION(11, 0, 4) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 1) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2)) + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 2) || + amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 5, 3)) bootload_status = RREG32_SOC15(GC, 0, regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1); else @@ -3918,9 +3965,7 @@ static void gfx_v11_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v11_0_cp_set_doorbell_range(struct amdgpu_device *adev) @@ -3951,7 +3996,7 @@ static void gfx_v11_0_gfx_mqd_set_priority(struct amdgpu_device *adev, if (prop->hqd_pipe_priority == AMDGPU_GFX_PIPE_PRIO_HIGH) priority = 1; - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); + tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, priority); mqd->cp_gfx_hqd_queue_priority = tmp; } @@ -3973,14 +4018,14 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set up mqd control */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); + tmp = regCP_GFX_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); mqd->cp_gfx_mqd_control = tmp; /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); + tmp = regCP_GFX_HQD_VMID_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); mqd->cp_gfx_hqd_vmid = 0; @@ -3988,7 +4033,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, gfx_v11_0_gfx_mqd_set_priority(adev, mqd, prop); /* set up time quantum */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); + tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); mqd->cp_gfx_hqd_quantum = tmp; @@ -4010,7 +4055,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ rb_bufsz = order_base_2(prop->queue_size / 4) - 1; - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); + tmp = regCP_GFX_HQD_CNTL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); #ifdef __BIG_ENDIAN @@ -4019,7 +4064,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ - tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); + tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -4031,7 +4076,7 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_rb_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); + mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; /* active the queue */ mqd->cp_gfx_hqd_active = 1; @@ -4117,14 +4162,14 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); + tmp = regCP_HQD_EOP_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, (order_base_2(GFX11_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -4153,7 +4198,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set MQD vmid to 0 */ - tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); + tmp = regCP_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); mqd->cp_mqd_control = tmp; @@ -4163,7 +4208,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); + tmp = regCP_HQD_PQ_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, @@ -4189,7 +4234,7 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = 0; /* enable the doorbell if requested */ if (prop->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -4204,17 +4249,17 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); + mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); + tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); mqd->cp_hqd_persistent_state = tmp; /* set MIN_IB_AVAIL_SIZE */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); + tmp = regCP_HQD_IB_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; @@ -4727,6 +4772,8 @@ static int gfx_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + cancel_delayed_work_sync(&adev->gfx.idle_work); + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); @@ -4771,9 +4818,9 @@ static int gfx_v11_0_resume(struct amdgpu_ip_block *ip_block) return gfx_v11_0_hw_init(ip_block); } -static bool gfx_v11_0_is_idle(void *handle) +static bool gfx_v11_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) @@ -5441,6 +5488,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1); break; default: @@ -5458,10 +5506,10 @@ static void gfx_v11_cntl_pg(struct amdgpu_device *adev, bool enable) amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } -static int gfx_v11_0_set_powergating_state(void *handle, +static int gfx_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -5478,6 +5526,7 @@ static int gfx_v11_0_set_powergating_state(void *handle, case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): if (!enable) amdgpu_gfx_off_ctrl(adev, false); @@ -5494,10 +5543,10 @@ static int gfx_v11_0_set_powergating_state(void *handle, return 0; } -static int gfx_v11_0_set_clockgating_state(void *handle, +static int gfx_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -5511,6 +5560,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle, case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): gfx_v11_0_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE); break; @@ -5521,9 +5571,9 @@ static int gfx_v11_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v11_0_get_clockgating_state(void *handle, u64 *flags) +static void gfx_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; /* AMD_CG_SUPPORT_GFX_MGCG */ @@ -6646,30 +6696,14 @@ static int gfx_v11_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) static int gfx_v11_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - int i, r = 0; + int r = 0; if (amdgpu_sriov_vf(adev)) return -EINVAL; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - mutex_lock(&adev->srbm_mutex); - soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); - WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); - - /* make sure dequeue is complete*/ - for (i = 0; i < adev->usec_timeout; i++) { - if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) - break; - udelay(1); - } - if (i >= adev->usec_timeout) - r = -ETIMEDOUT; - soc21_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); if (r) { - dev_err(adev->dev, "fail to wait on hqd deactivate\n"); + dev_err(adev->dev, "reset via MMIO failed %d\n", r); return r; } @@ -6835,6 +6869,20 @@ static void gfx_v11_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ } +static void gfx_v11_0_ring_begin_use(struct amdgpu_ring *ring) +{ + amdgpu_gfx_profile_ring_begin_use(ring); + + amdgpu_gfx_enforce_isolation_ring_begin_use(ring); +} + +static void gfx_v11_0_ring_end_use(struct amdgpu_ring *ring) +{ + amdgpu_gfx_profile_ring_end_use(ring); + + amdgpu_gfx_enforce_isolation_ring_end_use(ring); +} + static const struct amd_ip_funcs gfx_v11_0_ip_funcs = { .name = "gfx_v11_0", .early_init = gfx_v11_0_early_init, @@ -6909,8 +6957,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = { .emit_mem_sync = gfx_v11_0_emit_mem_sync, .reset = gfx_v11_0_reset_kgq, .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, - .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, - .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, + .begin_use = gfx_v11_0_ring_begin_use, + .end_use = gfx_v11_0_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { @@ -6951,8 +6999,8 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_compute = { .emit_mem_sync = gfx_v11_0_emit_mem_sync, .reset = gfx_v11_0_reset_kcq, .emit_cleaner_shader = gfx_v11_0_ring_emit_cleaner_shader, - .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, - .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, + .begin_use = gfx_v11_0_ring_begin_use, + .end_use = gfx_v11_0_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index da327ab48a57..dceb5ad38862 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -29,7 +29,6 @@ #include "amdgpu_gfx.h" #include "amdgpu_psp.h" #include "amdgpu_smu.h" -#include "amdgpu_atomfirmware.h" #include "imu_v12_0.h" #include "soc24.h" #include "nvd.h" @@ -40,7 +39,6 @@ #include "ivsrcid/gfx/irqsrcs_gfx_11_0_0.h" #include "soc15.h" -#include "soc15d.h" #include "clearstate_gfx12.h" #include "v12_structs.h" #include "gfx_v12_0.h" @@ -52,6 +50,24 @@ #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L +#define regCP_GFX_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_GFX_HQD_VMID_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT 0x00000000 +#define regCP_GFX_HQD_QUANTUM_DEFAULT 0x00000a01 +#define regCP_GFX_HQD_CNTL_DEFAULT 0x00f00000 +#define regCP_RB_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_GFX_HQD_RPTR_DEFAULT 0x00000000 + +#define regCP_HQD_EOP_CONTROL_DEFAULT 0x00000006 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_MQD_CONTROL_DEFAULT 0x00000100 +#define regCP_HQD_PQ_CONTROL_DEFAULT 0x00308509 +#define regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT 0x00000000 +#define regCP_HQD_PQ_RPTR_DEFAULT 0x00000000 +#define regCP_HQD_PERSISTENT_STATE_DEFAULT 0x0be05501 +#define regCP_HQD_IB_CONTROL_DEFAULT 0x00300000 + + MODULE_FIRMWARE("amdgpu/gc_12_0_0_pfp.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_0_me.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_0_mec.bin"); @@ -513,7 +529,7 @@ static int gfx_v12_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: if (!ring->is_mes_queue) - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: if (!ring->is_mes_queue) @@ -537,6 +553,7 @@ static int gfx_v12_0_init_toc_microcode(struct amdgpu_device *adev, const char * int err = 0; err = amdgpu_ucode_request(adev, &adev->psp.toc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_toc.bin", ucode_prefix); if (err) goto out; @@ -566,6 +583,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", ucode_prefix); if (err) goto out; @@ -573,6 +591,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", ucode_prefix); if (err) goto out; @@ -581,6 +600,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) { err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", ucode_prefix); if (err) goto out; @@ -593,6 +613,7 @@ static int gfx_v12_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", ucode_prefix); if (err) goto out; @@ -1326,6 +1347,8 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) int xcc_id = 0; struct amdgpu_device *adev = ip_block->adev; + INIT_DELAYED_WORK(&adev->gfx.idle_work, amdgpu_gfx_profile_idle_work_handler); + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): @@ -1347,6 +1370,14 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) } switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if (adev->gfx.me_fw_version >= 2480 && + adev->gfx.pfp_fw_version >= 2530 && + adev->gfx.mec_fw_version >= 2680 && + adev->mes.fw_version[0] >= 100) + adev->gfx.enable_cleaner_shader = true; + break; default: adev->gfx.enable_cleaner_shader = false; break; @@ -1437,11 +1468,19 @@ static int gfx_v12_0_sw_init(struct amdgpu_ip_block *ip_block) } } - /* TODO: Add queue reset mask when FW fully supports it */ adev->gfx.gfx_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.gfx_ring[0]); adev->gfx.compute_supported_reset = amdgpu_get_soft_full_reset_mask(&adev->gfx.compute_ring[0]); + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 0, 1): + if ((adev->gfx.me_fw_version >= 2660) && + (adev->gfx.mec_fw_version >= 2920)) { + adev->gfx.compute_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + adev->gfx.gfx_supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + } + } if (!adev->enable_mes_kiq) { r = amdgpu_gfx_kiq_init(adev, GFX12_MEC_HPD_SIZE, 0); @@ -1610,6 +1649,7 @@ static u32 gfx_v12_0_get_rb_active_bitmap(struct amdgpu_device *adev) static void gfx_v12_0_setup_rb(struct amdgpu_device *adev) { + u32 rb_bitmap_per_sa; u32 rb_bitmap_width_per_sa; u32 max_sa; u32 active_sa_bitmap; @@ -1627,12 +1667,14 @@ static void gfx_v12_0_setup_rb(struct amdgpu_device *adev) adev->gfx.config.max_sh_per_se; rb_bitmap_width_per_sa = adev->gfx.config.max_backends_per_se / adev->gfx.config.max_sh_per_se; + rb_bitmap_per_sa = amdgpu_gfx_create_bitmask(rb_bitmap_width_per_sa); + for (i = 0; i < max_sa; i++) { if (active_sa_bitmap & (1 << i)) - active_rb_bitmap |= (0x3 << (i * rb_bitmap_width_per_sa)); + active_rb_bitmap |= (rb_bitmap_per_sa << (i * rb_bitmap_width_per_sa)); } - active_rb_bitmap |= global_active_rb_bitmap; + active_rb_bitmap &= global_active_rb_bitmap; adev->gfx.config.backend_enable_mask = active_rb_bitmap; adev->gfx.config.num_rbs = hweight32(active_rb_bitmap); } @@ -2413,7 +2455,7 @@ static int gfx_v12_0_cp_gfx_load_me_microcode_rs64(struct amdgpu_device *adev) (void **)&adev->gfx.me.me_fw_data_ptr); if (r) { dev_err(adev->dev, "(%d) failed to create me data bo\n", r); - gfx_v12_0_pfp_fini(adev); + gfx_v12_0_me_fini(adev); return r; } @@ -2595,7 +2637,6 @@ static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev) u32 tmp; u32 rb_bufsz; u64 rb_addr, rptr_addr, wptr_gpu_addr; - u32 i; /* Set the write pointer delay */ WREG32_SOC15(GC, 0, regCP_RB_WPTR_DELAY, 0); @@ -2650,12 +2691,6 @@ static int gfx_v12_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v12_0_cp_gfx_start(adev); - - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } - return 0; } @@ -2832,9 +2867,7 @@ static void gfx_v12_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v12_0_cp_set_doorbell_range(struct amdgpu_device *adev) @@ -2869,25 +2902,25 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set up mqd control */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_MQD_CONTROL); + tmp = regCP_GFX_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, VMID, 0); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, PRIV_STATE, 1); tmp = REG_SET_FIELD(tmp, CP_GFX_MQD_CONTROL, CACHE_POLICY, 0); mqd->cp_gfx_mqd_control = tmp; /* set up gfx_hqd_vimd with 0x0 to indicate the ring buffer's vmid */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_VMID); + tmp = regCP_GFX_HQD_VMID_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_VMID, VMID, 0); mqd->cp_gfx_hqd_vmid = 0; /* set up default queue priority level * 0x0 = low priority, 0x1 = high priority */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY); + tmp = regCP_GFX_HQD_QUEUE_PRIORITY_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUEUE_PRIORITY, PRIORITY_LEVEL, 0); mqd->cp_gfx_hqd_queue_priority = tmp; /* set up time quantum */ - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_QUANTUM); + tmp = regCP_GFX_HQD_QUANTUM_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_QUANTUM, QUANTUM_EN, 1); mqd->cp_gfx_hqd_quantum = tmp; @@ -2909,7 +2942,7 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, /* set up the gfx_hqd_control, similar as CP_RB0_CNTL */ rb_bufsz = order_base_2(prop->queue_size / 4) - 1; - tmp = RREG32_SOC15(GC, 0, regCP_GFX_HQD_CNTL); + tmp = regCP_GFX_HQD_CNTL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BUFSZ, rb_bufsz); tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_BLKSZ, rb_bufsz - 2); #ifdef __BIG_ENDIAN @@ -2918,7 +2951,7 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_gfx_hqd_cntl = tmp; /* set up cp_doorbell_control */ - tmp = RREG32_SOC15(GC, 0, regCP_RB_DOORBELL_CONTROL); + tmp = regCP_RB_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_RB_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -2930,7 +2963,7 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_rb_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_gfx_hqd_rptr = RREG32_SOC15(GC, 0, regCP_GFX_HQD_RPTR); + mqd->cp_gfx_hqd_rptr = regCP_GFX_HQD_RPTR_DEFAULT; /* active the queue */ mqd->cp_gfx_hqd_active = 1; @@ -2997,10 +3030,6 @@ static int gfx_v12_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev) if (r) goto done; - for (i = 0; i < adev->gfx.num_gfx_rings; i++) { - ring = &adev->gfx.gfx_ring[i]; - ring->sched.ready = true; - } done: return r; } @@ -3025,14 +3054,14 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_eop_base_addr_hi = upper_32_bits(eop_base_addr); /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_EOP_CONTROL); + tmp = regCP_HQD_EOP_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_EOP_CONTROL, EOP_SIZE, (order_base_2(GFX12_MEC_HPD_SIZE / 4) - 1)); mqd->cp_hqd_eop_control = tmp; /* enable doorbell? */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; if (prop->use_doorbell) { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, @@ -3061,7 +3090,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_mqd_base_addr_hi = upper_32_bits(prop->mqd_gpu_addr); /* set MQD vmid to 0 */ - tmp = RREG32_SOC15(GC, 0, regCP_MQD_CONTROL); + tmp = regCP_MQD_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_MQD_CONTROL, VMID, 0); mqd->cp_mqd_control = tmp; @@ -3071,7 +3100,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr); /* set up the HQD, this is similar to CP_RB0_CNTL */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_CONTROL); + tmp = regCP_HQD_PQ_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, QUEUE_SIZE, (order_base_2(prop->queue_size / 4) - 1)); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, RPTR_BLOCK_SIZE, @@ -3096,7 +3125,7 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, tmp = 0; /* enable the doorbell if requested */ if (prop->use_doorbell) { - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL); + tmp = regCP_HQD_PQ_DOORBELL_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_OFFSET, prop->doorbell_index); @@ -3111,17 +3140,17 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m, mqd->cp_hqd_pq_doorbell_control = tmp; /* reset read and write pointers, similar to CP_RB0_WPTR/_RPTR */ - mqd->cp_hqd_pq_rptr = RREG32_SOC15(GC, 0, regCP_HQD_PQ_RPTR); + mqd->cp_hqd_pq_rptr = regCP_HQD_PQ_RPTR_DEFAULT; /* set the vmid for the queue */ mqd->cp_hqd_vmid = 0; - tmp = RREG32_SOC15(GC, 0, regCP_HQD_PERSISTENT_STATE); + tmp = regCP_HQD_PERSISTENT_STATE_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_PERSISTENT_STATE, PRELOAD_SIZE, 0x55); mqd->cp_hqd_persistent_state = tmp; /* set MIN_IB_AVAIL_SIZE */ - tmp = RREG32_SOC15(GC, 0, regCP_HQD_IB_CONTROL); + tmp = regCP_HQD_IB_CONTROL_DEFAULT; tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3); mqd->cp_hqd_ib_control = tmp; @@ -3626,6 +3655,8 @@ static int gfx_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; uint32_t tmp; + cancel_delayed_work_sync(&adev->gfx.idle_work); + amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); amdgpu_irq_put(adev, &adev->gfx.bad_op_irq, 0); @@ -3671,9 +3702,9 @@ static int gfx_v12_0_resume(struct amdgpu_ip_block *ip_block) return gfx_v12_0_hw_init(ip_block); } -static bool gfx_v12_0_is_idle(void *handle) +static bool gfx_v12_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (REG_GET_FIELD(RREG32_SOC15(GC, 0, regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) @@ -3864,10 +3895,10 @@ static void gfx_v12_cntl_pg(struct amdgpu_device *adev, bool enable) } #endif -static int gfx_v12_0_set_powergating_state(void *handle, +static int gfx_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -3999,17 +4030,6 @@ static void gfx_v12_0_update_coarse_grain_clock_gating(struct amdgpu_device *ade if (def != data) WREG32_SOC15(GC, 0, regRLC_CGCG_CGLS_CTRL_3D, data); - - data = RREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL); - data &= ~SDMA0_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA0_RLC_CGCG_CTRL, data); - - /* Some ASICs only have one SDMA instance, not need to configure SDMA1 */ - if (adev->sdma.num_instances > 1) { - data = RREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL); - data &= ~SDMA1_RLC_CGCG_CTRL__CGCG_INT_ENABLE_MASK; - WREG32_SOC15(GC, 0, regSDMA1_RLC_CGCG_CTRL, data); - } } } @@ -4115,10 +4135,10 @@ static int gfx_v12_0_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static int gfx_v12_0_set_clockgating_state(void *handle, +static int gfx_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -4136,9 +4156,9 @@ static int gfx_v12_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v12_0_get_clockgating_state(void *handle, u64 *flags) +static void gfx_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; /* AMD_CG_SUPPORT_GFX_MGCG */ @@ -5233,24 +5253,16 @@ static int gfx_v12_0_reset_kgq(struct amdgpu_ring *ring, unsigned int vmid) static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) { struct amdgpu_device *adev = ring->adev; - int r, i; + int r; if (amdgpu_sriov_vf(adev)) return -EINVAL; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - mutex_lock(&adev->srbm_mutex); - soc24_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); - WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); - WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); - for (i = 0; i < adev->usec_timeout; i++) { - if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) - break; - udelay(1); + r = amdgpu_mes_reset_legacy_queue(ring->adev, ring, vmid, true); + if (r) { + dev_err(adev->dev, "reset via MMIO failed %d\n", r); + return r; } - soc24_grbm_select(adev, 0, 0, 0, 0); - mutex_unlock(&adev->srbm_mutex); - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) { @@ -5277,6 +5289,20 @@ static int gfx_v12_0_reset_kcq(struct amdgpu_ring *ring, unsigned int vmid) return amdgpu_ring_test_ring(ring); } +static void gfx_v12_0_ring_begin_use(struct amdgpu_ring *ring) +{ + amdgpu_gfx_profile_ring_begin_use(ring); + + amdgpu_gfx_enforce_isolation_ring_begin_use(ring); +} + +static void gfx_v12_0_ring_end_use(struct amdgpu_ring *ring) +{ + amdgpu_gfx_profile_ring_end_use(ring); + + amdgpu_gfx_enforce_isolation_ring_end_use(ring); +} + static const struct amd_ip_funcs gfx_v12_0_ip_funcs = { .name = "gfx_v12_0", .early_init = gfx_v12_0_early_init, @@ -5342,8 +5368,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { .emit_mem_sync = gfx_v12_0_emit_mem_sync, .reset = gfx_v12_0_reset_kgq, .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader, - .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, - .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, + .begin_use = gfx_v12_0_ring_begin_use, + .end_use = gfx_v12_0_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { @@ -5381,8 +5407,8 @@ static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_compute = { .emit_mem_sync = gfx_v12_0_emit_mem_sync, .reset = gfx_v12_0_reset_kcq, .emit_cleaner_shader = gfx_v12_0_ring_emit_cleaner_shader, - .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, - .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, + .begin_use = gfx_v12_0_ring_begin_use, + .end_use = gfx_v12_0_ring_end_use, }; static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h index bcc9c72ccbde..f7184b2dc4e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.h @@ -26,4 +26,6 @@ extern const struct amdgpu_ip_block_version gfx_v12_0_ip_block; +int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index 41f50bf380c4..13fbee46417a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -28,19 +28,30 @@ #include "amdgpu_gfx.h" #include "amdgpu_ucode.h" #include "clearstate_si.h" +#include "si.h" +#include "sid.h" + #include "bif/bif_3_0_d.h" #include "bif/bif_3_0_sh_mask.h" + #include "oss/oss_1_0_d.h" #include "oss/oss_1_0_sh_mask.h" + #include "gca/gfx_6_0_d.h" #include "gca/gfx_6_0_sh_mask.h" +#include "gca/gfx_7_2_enum.h" + #include "gmc/gmc_6_0_d.h" #include "gmc/gmc_6_0_sh_mask.h" + #include "dce/dce_6_0_d.h" #include "dce/dce_6_0_sh_mask.h" -#include "gca/gfx_7_2_enum.h" + #include "si_enums.h" -#include "si.h" + +#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003 +#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002 +#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001 static void gfx_v6_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v6_0_set_irq_funcs(struct amdgpu_device *adev); @@ -337,6 +348,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; @@ -345,6 +357,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); if (err) goto out; @@ -353,6 +366,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); if (err) goto out; @@ -361,6 +375,7 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -1906,7 +1921,7 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; error: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; } @@ -3163,9 +3178,9 @@ static int gfx_v6_0_resume(struct amdgpu_ip_block *ip_block) return gfx_v6_0_hw_init(ip_block); } -static bool gfx_v6_0_is_idle(void *handle) +static bool gfx_v6_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK) return false; @@ -3179,7 +3194,7 @@ static int gfx_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (gfx_v6_0_is_idle(adev)) + if (gfx_v6_0_is_idle(ip_block)) return 0; udelay(1); } @@ -3373,11 +3388,11 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } -static int gfx_v6_0_set_clockgating_state(void *handle, +static int gfx_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -3395,11 +3410,11 @@ static int gfx_v6_0_set_clockgating_state(void *handle, return 0; } -static int gfx_v6_0_set_powergating_state(void *handle, +static int gfx_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) gate = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 824d5913103b..8181bd0e4f18 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -934,33 +934,39 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev) } err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); if (err) goto out; err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); if (err) goto out; err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); if (err) goto out; if (adev->asic_type == CHIP_KAVERI) { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); if (err) goto out; } err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); out: if (err) { @@ -2324,7 +2330,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; error: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; } @@ -4509,9 +4515,9 @@ static int gfx_v7_0_resume(struct amdgpu_ip_block *ip_block) return gfx_v7_0_hw_init(ip_block); } -static bool gfx_v7_0_is_idle(void *handle) +static bool gfx_v7_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (RREG32(mmGRBM_STATUS) & GRBM_STATUS__GUI_ACTIVE_MASK) return false; @@ -4846,11 +4852,11 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev, return 0; } -static int gfx_v7_0_set_clockgating_state(void *handle, +static int gfx_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -4869,11 +4875,11 @@ static int gfx_v7_0_set_clockgating_state(void *handle, return 0; } -static int gfx_v7_0_set_powergating_state(void *handle, +static int gfx_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) gate = true; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index b7006c41e270..d116a2e2f469 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -914,7 +914,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -982,13 +982,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_pfp_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); } if (err) @@ -999,13 +1002,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_me_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); } if (err) @@ -1017,13 +1023,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_ce_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); } if (err) @@ -1044,6 +1053,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) adev->virt.chained_ib_support = false; err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -1093,13 +1103,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_mec_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); } if (err) @@ -1112,13 +1125,16 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) (adev->asic_type != CHIP_TOPAZ)) { if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_OPTIONAL, "amdgpu/%s_mec2_2.bin", chip_name); if (err == -ENODEV) { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); } } else { err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); } if (!err) { @@ -1640,7 +1656,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) RREG32(sec_ded_counter_registers[i]); fail: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; @@ -4304,9 +4320,7 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32(mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32(mmRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32(mmRLC_CP_SCHEDULERS, tmp); + WREG32(mmRLC_CP_SCHEDULERS, tmp | 0x80); } static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) @@ -4837,9 +4851,9 @@ static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev) return r; } -static bool gfx_v8_0_is_idle(void *handle) +static bool gfx_v8_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE) || RREG32(mmGRBM_STATUS2) != 0x8) @@ -4878,7 +4892,7 @@ static int gfx_v8_0_wait_for_idle(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (gfx_v8_0_is_idle(adev)) + if (gfx_v8_0_is_idle(ip_block)) return 0; udelay(1); @@ -5321,7 +5335,7 @@ static void gfx_v8_0_enable_gfx_static_mg_power_gating(struct amdgpu_device *ade (adev->asic_type == CHIP_POLARIS12) || (adev->asic_type == CHIP_VEGAM)) /* Send msg to SMU via Powerplay */ - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, enable, 0); WREG32_FIELD(RLC_PG_CNTL, STATIC_PER_CU_PG_ENABLE, enable ? 1 : 0); } @@ -5367,10 +5381,10 @@ static void cz_update_gfx_cg_power_gating(struct amdgpu_device *adev, } } -static int gfx_v8_0_set_powergating_state(void *handle, +static int gfx_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -5438,9 +5452,9 @@ static int gfx_v8_0_set_powergating_state(void *handle, return 0; } -static void gfx_v8_0_get_clockgating_state(void *handle, u64 *flags) +static void gfx_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) @@ -5625,8 +5639,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t temp, data; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) { @@ -5720,8 +5732,6 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev /* 7- wait for RLC_SERDES_CU_MASTER & RLC_SERDES_NONCU_MASTER idle */ gfx_v8_0_wait_for_rlc_serdes(adev); } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -5731,8 +5741,6 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); data1 &= ~RLC_CGTT_MGCG_OVERRIDE__CGCG_MASK; @@ -5813,12 +5821,12 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev } gfx_v8_0_wait_for_rlc_serdes(adev); - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + if (enable) { /* CGCG/CGLS should be enabled after MGCG/MGLS/TS(CG/LS) * === MGCG + MGLS + TS(CG/LS) === @@ -5832,6 +5840,8 @@ static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, gfx_v8_0_update_coarse_grain_clock_gating(adev, enable); gfx_v8_0_update_medium_grain_clock_gating(adev, enable); } + + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -5982,10 +5992,10 @@ static int gfx_v8_0_polaris_update_gfx_clock_gating(struct amdgpu_device *adev, return 0; } -static int gfx_v8_0_set_clockgating_state(void *handle, +static int gfx_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0b6f09f2cc9b..d345285ea885 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1243,7 +1243,7 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -1429,18 +1429,21 @@ static int gfx_v9_0_init_cp_gfx_microcode(struct amdgpu_device *adev, int err; err = amdgpu_ucode_request(adev, &adev->gfx.pfp_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_pfp.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_PFP); err = amdgpu_ucode_request(adev, &adev->gfx.me_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_me.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_ME); err = amdgpu_ucode_request(adev, &adev->gfx.ce_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_ce.bin", chip_name); if (err) goto out; @@ -1476,6 +1479,7 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc_am4.bin", chip_name); else if (!strcmp(chip_name, "raven") && (amdgpu_pm_load_smu_firmware(adev, &smu_version) == 0) && (smu_version >= 0x41e2b)) @@ -1483,9 +1487,11 @@ static int gfx_v9_0_init_rlc_microcode(struct amdgpu_device *adev, *SMC is loaded by SBIOS on APU and it's able to get the SMU version directly. */ err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_kicker_rlc.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -1518,9 +1524,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, - "amdgpu/%s_sjt_mec.bin", chip_name); + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sjt_mec.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec.bin", chip_name); if (err) goto out; @@ -1531,9 +1539,11 @@ static int gfx_v9_0_init_cp_compute_microcode(struct amdgpu_device *adev, if (gfx_v9_0_load_mec2_fw_bin_support(adev)) { if (amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_ALDEBARAN)) err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sjt_mec2.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->gfx.mec2_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_mec2.bin", chip_name); if (!err) { amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC2); @@ -2627,7 +2637,10 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) u32 tmp; int i; - WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + if (!amdgpu_sriov_vf(adev) || + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 2)) { + WREG32_FIELD15_RLC(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); + } gfx_v9_0_tiling_mode_table_init(adev); @@ -3488,9 +3501,7 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, mmRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) @@ -4034,7 +4045,8 @@ static int gfx_v9_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2)) + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) && + !amdgpu_sriov_vf(adev)) gfx_v9_4_2_set_power_brake_sequence(adev); return r; @@ -4102,9 +4114,9 @@ static int gfx_v9_0_resume(struct amdgpu_ip_block *ip_block) return gfx_v9_0_hw_init(ip_block); } -static bool gfx_v9_0_is_idle(void *handle) +static bool gfx_v9_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (REG_GET_FIELD(RREG32_SOC15(GC, 0, mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) @@ -4119,7 +4131,7 @@ static int gfx_v9_0_wait_for_idle(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (gfx_v9_0_is_idle(adev)) + if (gfx_v9_0_is_idle(ip_block)) return 0; udelay(1); } @@ -4780,7 +4792,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) } fail: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); return r; @@ -4956,8 +4968,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t data, def; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { /* 1 - RLC_CGTT_MGCG_OVERRIDE */ @@ -5022,8 +5032,6 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data); } } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, @@ -5034,8 +5042,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, if (!adev->gfx.num_gfx_rings) return; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - /* Enable 3D CGCG/CGLS */ if (enable) { /* write cmd to clear cgcg/cgls ov */ @@ -5077,8 +5083,6 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -5086,8 +5090,6 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev { uint32_t def, data; - amdgpu_gfx_rlc_enter_safe_mode(adev, 0); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); /* unset CGCG override */ @@ -5129,13 +5131,12 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev if (def != data) WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); } - - amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) { + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (enable) { /* CGCG/CGLS should be enabled after MGCG/MGLS * === MGCG + MGLS === @@ -5155,6 +5156,7 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, /* === MGCG + MGLS === */ gfx_v9_0_update_medium_grain_clock_gating(adev, enable); } + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); return 0; } @@ -5232,10 +5234,10 @@ static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { .is_rlcg_access_range = gfx_v9_0_is_rlcg_access_range, }; -static int gfx_v9_0_set_powergating_state(void *handle, +static int gfx_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { @@ -5243,7 +5245,7 @@ static int gfx_v9_0_set_powergating_state(void *handle, case IP_VERSION(9, 1, 0): case IP_VERSION(9, 3, 0): if (!enable) - amdgpu_gfx_off_ctrl(adev, false); + amdgpu_gfx_off_ctrl_immediate(adev, false); if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); @@ -5265,10 +5267,10 @@ static int gfx_v9_0_set_powergating_state(void *handle, gfx_v9_0_update_gfx_mg_power_gating(adev, enable); if (enable) - amdgpu_gfx_off_ctrl(adev, true); + amdgpu_gfx_off_ctrl_immediate(adev, true); break; case IP_VERSION(9, 2, 1): - amdgpu_gfx_off_ctrl(adev, enable); + amdgpu_gfx_off_ctrl_immediate(adev, enable); break; default: break; @@ -5277,10 +5279,10 @@ static int gfx_v9_0_set_powergating_state(void *handle, return 0; } -static int gfx_v9_0_set_clockgating_state(void *handle, +static int gfx_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -5303,9 +5305,9 @@ static int gfx_v9_0_set_clockgating_state(void *handle, return 0; } -static void gfx_v9_0_get_clockgating_state(void *handle, u64 *flags) +static void gfx_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) @@ -7439,6 +7441,38 @@ static void gfx_v9_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */ } +static void gfx_v9_0_ring_begin_use_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ip_block *gfx_block = + amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + + amdgpu_gfx_enforce_isolation_ring_begin_use(ring); + + /* Raven and PCO APUs seem to have stability issues + * with compute and gfxoff and gfx pg. Disable gfx pg during + * submission and allow again afterwards. + */ + if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) + gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_UNGATE); +} + +static void gfx_v9_0_ring_end_use_compute(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_ip_block *gfx_block = + amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX); + + /* Raven and PCO APUs seem to have stability issues + * with compute and gfxoff and gfx pg. Disable gfx pg during + * submission and allow again afterwards. + */ + if (gfx_block && amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 1, 0)) + gfx_v9_0_set_powergating_state(gfx_block, AMD_PG_STATE_GATE); + + amdgpu_gfx_enforce_isolation_ring_end_use(ring); +} + static const struct amd_ip_funcs gfx_v9_0_ip_funcs = { .name = "gfx_v9_0", .early_init = gfx_v9_0_early_init, @@ -7615,8 +7649,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { .emit_wave_limit = gfx_v9_0_emit_wave_limit, .reset = gfx_v9_0_reset_kcq, .emit_cleaner_shader = gfx_v9_0_ring_emit_cleaner_shader, - .begin_use = amdgpu_gfx_enforce_isolation_ring_begin_use, - .end_use = amdgpu_gfx_enforce_isolation_ring_end_use, + .begin_use = gfx_v9_0_ring_begin_use_compute, + .end_use = gfx_v9_0_ring_end_use_compute, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c index f53b379d8971..6028afd81690 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4.c @@ -27,7 +27,6 @@ #include "amdgpu_gfx.h" #include "soc15.h" #include "soc15d.h" -#include "amdgpu_atomfirmware.h" #include "amdgpu_pm.h" #include "gc/gc_9_4_1_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index 3f4fd2f08163..d81449f9d822 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -412,7 +412,7 @@ static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev, r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr); if (r) { dev_err(adev->dev, "ib submit failed (%d).\n", r); - amdgpu_ib_free(adev, ib, NULL); + amdgpu_ib_free(ib, NULL); } return r; } @@ -611,16 +611,16 @@ static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev) } disp2_failed: - amdgpu_ib_free(adev, &disp_ibs[2], NULL); + amdgpu_ib_free(&disp_ibs[2], NULL); dma_fence_put(fences[2]); disp1_failed: - amdgpu_ib_free(adev, &disp_ibs[1], NULL); + amdgpu_ib_free(&disp_ibs[1], NULL); dma_fence_put(fences[1]); disp0_failed: - amdgpu_ib_free(adev, &disp_ibs[0], NULL); + amdgpu_ib_free(&disp_ibs[0], NULL); dma_fence_put(fences[0]); pro_end: - amdgpu_ib_free(adev, &wb_ib, NULL); + amdgpu_ib_free(&wb_ib, NULL); if (r) dev_info(adev->dev, "Init SGPRS Failed\n"); @@ -687,10 +687,10 @@ static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev) } disp_failed: - amdgpu_ib_free(adev, &disp_ib, NULL); + amdgpu_ib_free(&disp_ib, NULL); dma_fence_put(fence); pro_end: - amdgpu_ib_free(adev, &wb_ib, NULL); + amdgpu_ib_free(&wb_ib, NULL); if (r) dev_info(adev->dev, "Init VGPRS Failed\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 54459254bd37..736398b0d16d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -43,8 +43,10 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_3_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_4_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_5_0_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_rlc.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_4_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_9_5_0_rlc.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_3_sjt_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin"); @@ -54,10 +56,6 @@ MODULE_FIRMWARE("amdgpu/gc_9_4_4_sjt_mec.bin"); #define GOLDEN_GB_ADDR_CONFIG 0x2a114042 #define CP_HQD_PERSISTENT_STATE_DEFAULT 0xbe05301 -#define mmSMNAID_XCD0_MCA_SMU 0x36430400 /* SMN AID XCD0 */ -#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */ -#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */ - #define XCC_REG_RANGE_0_LOW 0x2000 /* XCC gfxdec0 lower Bound */ #define XCC_REG_RANGE_0_HIGH 0x3400 /* XCC gfxdec0 upper Bound */ #define XCC_REG_RANGE_1_LOW 0xA000 /* XCC gfxdec1 lower Bound */ @@ -351,14 +349,7 @@ static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) WREG32_SOC15(GC, dev_inst, regGB_ADDR_CONFIG, GOLDEN_GB_ADDR_CONFIG); - /* Golden settings applied by driver for ASIC with rev_id 0 */ - if (adev->rev_id == 0) { - WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL1, - REDUCE_FIFO_DEPTH_BY_2, 2); - } else { - WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, - SPARE, 0x1); - } + WREG32_FIELD15_PREREG(GC, dev_inst, TCP_UTCL1_CNTL2, SPARE, 0x1); } } @@ -501,7 +492,7 @@ static int gfx_v9_4_3_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err2: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err1: amdgpu_device_wb_free(adev, index); @@ -545,6 +536,7 @@ static int gfx_v9_4_3_init_rlc_microcode(struct amdgpu_device *adev, err = amdgpu_ucode_request(adev, &adev->gfx.rlc_fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_rlc.bin", chip_name); if (err) goto out; @@ -560,28 +552,24 @@ out: return err; } -static bool gfx_v9_4_3_should_disable_gfxoff(struct pci_dev *pdev) -{ - return true; -} - -static void gfx_v9_4_3_check_if_need_gfxoff(struct amdgpu_device *adev) -{ - if (gfx_v9_4_3_should_disable_gfxoff(adev->pdev)) - adev->pm.pp_feature &= ~PP_GFXOFF_MASK; -} - static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, const char *chip_name) { int err; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, - "amdgpu/%s_sjt_mec.bin", chip_name); - else + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_sjt_mec.bin", chip_name); + + if (err) + err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mec.bin", chip_name); + } else err = amdgpu_ucode_request(adev, &adev->gfx.mec_fw, - "amdgpu/%s_mec.bin", chip_name); + AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mec.bin", chip_name); if (err) goto out; amdgpu_gfx_cp_init_microcode(adev, AMDGPU_UCODE_ID_CP_MEC1); @@ -590,8 +578,6 @@ static int gfx_v9_4_3_init_cp_compute_microcode(struct amdgpu_device *adev, adev->gfx.mec2_fw_version = adev->gfx.mec_fw_version; adev->gfx.mec2_feature_version = adev->gfx.mec_feature_version; - gfx_v9_4_3_check_if_need_gfxoff(adev); - out: if (err) amdgpu_ucode_release(&adev->gfx.mec_fw); @@ -881,12 +867,14 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, switch (type) { case ACA_SMU_TYPE_UE: - ret = aca_error_cache_log_bank_error(handle, &info, - ACA_ERROR_TYPE_UE, 1ULL); + bank->aca_err_type = ACA_BANK_ERR_UE_DE_DECODE(bank); + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, 1ULL); break; case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); ret = aca_error_cache_log_bank_error(handle, &info, - ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0)); + bank->aca_err_type, + ACA_REG__MISC0__ERRCNT(misc0)); break; default: return -EINVAL; @@ -927,27 +915,15 @@ static const struct aca_info gfx_v9_4_3_aca_info = { static int gfx_v9_4_3_gpu_early_init(struct amdgpu_device *adev) { - u32 gb_addr_config; - adev->gfx.funcs = &gfx_v9_4_3_gfx_funcs; adev->gfx.ras = &gfx_v9_4_3_ras; - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(9, 4, 3): - case IP_VERSION(9, 4, 4): - adev->gfx.config.max_hw_contexts = 8; - adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; - adev->gfx.config.sc_prim_fifo_size_backend = 0x100; - adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; - adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; - gb_addr_config = RREG32_SOC15(GC, GET_INST(GC, 0), regGB_ADDR_CONFIG); - break; - default: - BUG(); - break; - } - - adev->gfx.config.gb_addr_config = gb_addr_config; + adev->gfx.config.max_hw_contexts = 8; + adev->gfx.config.sc_prim_fifo_size_frontend = 0x20; + adev->gfx.config.sc_prim_fifo_size_backend = 0x100; + adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; + adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; + adev->gfx.config.gb_addr_config = GOLDEN_GB_ADDR_CONFIG; adev->gfx.config.gb_addr_config_fields.num_pipes = 1 << REG_GET_FIELD( @@ -1358,10 +1334,8 @@ static void gfx_v9_4_3_xcc_init_pg(struct amdgpu_device *adev, int xcc_id) { /* * Rlc save restore list is workable since v2_1. - * And it's needed by gfxoff feature. */ - if (adev->gfx.rlc.is_rlc_v2_1) - gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id); + gfx_v9_4_3_xcc_enable_save_restore_machine(adev, xcc_id); } static void gfx_v9_4_3_xcc_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) @@ -1785,9 +1759,7 @@ static void gfx_v9_4_3_xcc_kiq_setting(struct amdgpu_ring *ring, int xcc_id) tmp = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15_RLC(GC, GET_INST(GC, xcc_id), regRLC_CP_SCHEDULERS, tmp | 0x80); } static void gfx_v9_4_3_mqd_set_priority(struct amdgpu_ring *ring, struct v9_mqd *mqd) @@ -1848,7 +1820,7 @@ static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id) DOORBELL_SOURCE, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_multi_vf_mode(adev)) tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_MODE, 1); } else { @@ -2406,9 +2378,9 @@ static int gfx_v9_4_3_resume(struct amdgpu_ip_block *ip_block) return gfx_v9_4_3_hw_init(ip_block); } -static bool gfx_v9_4_3_is_idle(void *handle) +static bool gfx_v9_4_3_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, num_xcc; num_xcc = NUM_XCC(adev->gfx.xcc_mask); @@ -2426,7 +2398,7 @@ static int gfx_v9_4_3_wait_for_idle(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (gfx_v9_4_3_is_idle(adev)) + if (gfx_v9_4_3_is_idle(ip_block)) return 0; udelay(1); } @@ -2770,38 +2742,32 @@ static const struct amdgpu_rlc_funcs gfx_v9_4_3_rlc_funcs = { .is_rlcg_access_range = gfx_v9_4_3_is_rlcg_access_range, }; -static int gfx_v9_4_3_set_powergating_state(void *handle, +static int gfx_v9_4_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static int gfx_v9_4_3_set_clockgating_state(void *handle, +static int gfx_v9_4_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, num_xcc; if (amdgpu_sriov_vf(adev)) return 0; num_xcc = NUM_XCC(adev->gfx.xcc_mask); - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(9, 4, 3): - case IP_VERSION(9, 4, 4): - for (i = 0; i < num_xcc; i++) - gfx_v9_4_3_xcc_update_gfx_clock_gating( - adev, state == AMD_CG_STATE_GATE, i); - break; - default: - break; - } + for (i = 0; i < num_xcc; i++) + gfx_v9_4_3_xcc_update_gfx_clock_gating( + adev, state == AMD_CG_STATE_GATE, i); + return 0; } -static void gfx_v9_4_3_get_clockgating_state(void *handle, u64 *flags) +static void gfx_v9_4_3_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) @@ -4659,7 +4625,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) num_xcc = NUM_XCC(adev->gfx.xcc_mask); - amdgpu_gfx_off_ctrl(adev, false); for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { xcc_offset = xcc_id * reg_count; for (i = 0; i < reg_count; i++) @@ -4667,7 +4632,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) RREG32(SOC15_REG_ENTRY_OFFSET_INST(gc_reg_list_9_4_3[i], GET_INST(GC, xcc_id))); } - amdgpu_gfx_off_ctrl(adev, true); /* dump compute queue registers for all instances */ if (!adev->gfx.ip_dump_compute_queues) @@ -4676,7 +4640,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) num_inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe; reg_count = ARRAY_SIZE(gc_cp_reg_list_9_4_3); - amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->srbm_mutex); for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { xcc_offset = xcc_id * reg_count * num_inst; @@ -4703,7 +4666,6 @@ static void gfx_v9_4_3_ip_dump(struct amdgpu_ip_block *ip_block) } soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - amdgpu_gfx_off_ctrl(adev, true); } static void gfx_v9_4_3_ring_emit_cleaner_shader(struct amdgpu_ring *ring) @@ -4862,32 +4824,13 @@ static void gfx_v9_4_3_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v9_4_3_set_gds_init(struct amdgpu_device *adev) { - /* init asci gds info */ - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(9, 4, 3): - case IP_VERSION(9, 4, 4): - /* 9.4.3 removed all the GDS internal memory, - * only support GWS opcode in kernel, like barrier - * semaphore.etc */ - adev->gds.gds_size = 0; - break; - default: - adev->gds.gds_size = 0x10000; - break; - } - - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(9, 4, 3): - case IP_VERSION(9, 4, 4): - /* deprecated for 9.4.3, no usage at all */ - adev->gds.gds_compute_max_wave_id = 0; - break; - default: - /* this really depends on the chip */ - adev->gds.gds_compute_max_wave_id = 0x7ff; - break; - } + /* 9.4.3 variants removed all the GDS internal memory, + * only support GWS opcode in kernel, like barrier + * semaphore.etc */ + /* init asic gds info */ + adev->gds.gds_size = 0; + adev->gds.gds_compute_max_wave_id = 0; adev->gds.gws_size = 64; adev->gds.oa_size = 16; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 0e3ddea7b8e0..a7bfc9f41d0e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -92,12 +92,12 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; - /* Program the AGP BAR */ - WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0); - WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); - WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); - if (!amdgpu_sriov_vf(adev) || adev->asic_type <= CHIP_VEGA10) { + /* Program the AGP BAR */ + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BASE, 0); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15_RLC(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); + /* Program the system aperture low logical page number. */ WREG32_SOC15_RLC(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index ed8e130c7d19..cb25f7f0dfc1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -313,6 +313,16 @@ gfxhub_v1_2_xcc_disable_identity_aperture(struct amdgpu_device *adev, } } +static inline bool +gfxhub_v1_2_per_process_xnack_support(struct amdgpu_device *adev) +{ + /* + * TODO: Check if this function is really needed, so far only 9.4.3 + * variants use GFXHUB 1.2 + */ + return !!adev->aid_mask; +} + static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, uint32_t xcc_mask) { @@ -355,7 +365,7 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, PAGE_TABLE_BLOCK_SIZE, block_size); /* Send no-retry XNACK on fault to suppress VM fault storm. - * On 9.4.2 and 9.4.3, XNACK can be enabled in + * On 9.4.3 variants, XNACK can be enabled in * the SQ per-process. * Retry faults need to be enabled for that to work. */ @@ -363,12 +373,8 @@ static void gfxhub_v1_2_xcc_setup_vmid_config(struct amdgpu_device *adev, tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, !adev->gmc.noretry || - amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 4)); + gfxhub_v1_2_per_process_xnack_support( + adev)); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), regVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, GET_INST(GC, j), diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 17509f32f61a..deb95fab02df 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -505,42 +505,6 @@ static void gfxhub_v2_1_init(struct amdgpu_device *adev) hub->vmhub_funcs = &gfxhub_v2_1_vmhub_funcs; } -static int gfxhub_v2_1_get_xgmi_info(struct amdgpu_device *adev) -{ - u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_CNTL); - u32 max_region = - REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); - u32 max_num_physical_nodes = 0; - u32 max_physical_node_id = 0; - - switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) { - case IP_VERSION(4, 8, 0): - max_num_physical_nodes = 4; - max_physical_node_id = 3; - break; - default: - return -EINVAL; - } - - /* PF_MAX_REGION=0 means xgmi is disabled */ - if (max_region) { - adev->gmc.xgmi.num_physical_nodes = max_region + 1; - if (adev->gmc.xgmi.num_physical_nodes > max_num_physical_nodes) - return -EINVAL; - - adev->gmc.xgmi.physical_node_id = - REG_GET_FIELD(xgmi_lfb_cntl, GCMC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); - if (adev->gmc.xgmi.physical_node_id > max_physical_node_id) - return -EINVAL; - - adev->gmc.xgmi.node_segment_size = REG_GET_FIELD( - RREG32_SOC15(GC, 0, mmGCMC_VM_XGMI_LFB_SIZE), - GCMC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; - } - - return 0; -} - static void gfxhub_v2_1_utcl2_harvest(struct amdgpu_device *adev) { int i; @@ -696,7 +660,6 @@ const struct amdgpu_gfxhub_funcs gfxhub_v2_1_funcs = { .gart_disable = gfxhub_v2_1_gart_disable, .set_fault_enable_default = gfxhub_v2_1_set_fault_enable_default, .init = gfxhub_v2_1_init, - .get_xgmi_info = gfxhub_v2_1_get_xgmi_info, .utcl2_harvest = gfxhub_v2_1_utcl2_harvest, .mode2_save_regs = gfxhub_v2_1_save_regs, .mode2_restore_regs = gfxhub_v2_1_restore_regs, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 697599c46240..95d894a231fc 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -1076,7 +1076,7 @@ static int gmc_v10_0_resume(struct amdgpu_ip_block *ip_block) return 0; } -static bool gmc_v10_0_is_idle(void *handle) +static bool gmc_v10_0_is_idle(struct amdgpu_ip_block *ip_block) { /* MC is always ready in GMC v10.*/ return true; @@ -1088,11 +1088,11 @@ static int gmc_v10_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v10_0_set_clockgating_state(void *handle, +static int gmc_v10_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; /* * The issue mmhub can't disconnect from DF with MMHUB clock gating being disabled @@ -1115,9 +1115,9 @@ static int gmc_v10_0_set_clockgating_state(void *handle, return athub_v2_0_set_clockgating(adev, state); } -static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags) +static void gmc_v10_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 3) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(10, 1, 4)) @@ -1131,7 +1131,7 @@ static void gmc_v10_0_get_clockgating_state(void *handle, u64 *flags) athub_v2_0_get_clockgating(adev, flags); } -static int gmc_v10_0_set_powergating_state(void *handle, +static int gmc_v10_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index f893ab4c14df..ad099f136f84 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -579,6 +579,7 @@ static void gmc_v11_0_set_mmhub_funcs(struct amdgpu_device *adev) break; case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): + case IP_VERSION(3, 3, 2): adev->mmhub.funcs = &mmhub_v3_3_funcs; break; default: @@ -596,6 +597,7 @@ static void gmc_v11_0_set_gfxhub_funcs(struct amdgpu_device *adev) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): adev->gfxhub.funcs = &gfxhub_v11_5_0_funcs; break; default: @@ -759,6 +761,7 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block) case IP_VERSION(11, 5, 0): case IP_VERSION(11, 5, 1): case IP_VERSION(11, 5, 2): + case IP_VERSION(11, 5, 3): set_bit(AMDGPU_GFXHUB(0), adev->vmhubs_mask); set_bit(AMDGPU_MMHUB0(0), adev->vmhubs_mask); /* @@ -984,7 +987,7 @@ static int gmc_v11_0_resume(struct amdgpu_ip_block *ip_block) return 0; } -static bool gmc_v11_0_is_idle(void *handle) +static bool gmc_v11_0_is_idle(struct amdgpu_ip_block *ip_block) { /* MC is always ready in GMC v11.*/ return true; @@ -996,11 +999,11 @@ static int gmc_v11_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v11_0_set_clockgating_state(void *handle, +static int gmc_v11_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = adev->mmhub.funcs->set_clockgating(adev, state); if (r) @@ -1009,16 +1012,16 @@ static int gmc_v11_0_set_clockgating_state(void *handle, return athub_v3_0_set_clockgating(adev, state); } -static void gmc_v11_0_get_clockgating_state(void *handle, u64 *flags) +static void gmc_v11_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->get_clockgating(adev, flags); athub_v3_0_get_clockgating(adev, flags); } -static int gmc_v11_0_set_powergating_state(void *handle, +static int gmc_v11_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c index d22b027fd0bb..05c026d0b0d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v12_0.c @@ -40,7 +40,7 @@ #include "gfxhub_v12_0.h" #include "mmhub_v4_1_0.h" #include "athub_v4_1_0.h" - +#include "umc_v8_14.h" static int gmc_v12_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, @@ -501,9 +501,6 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, uint64_t *flags) { struct amdgpu_bo *bo = mapping->bo_va->base.bo; - struct amdgpu_device *bo_adev; - bool coherent, is_system; - *flags &= ~AMDGPU_PTE_EXECUTABLE; *flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; @@ -519,25 +516,11 @@ static void gmc_v12_0_get_vm_pte(struct amdgpu_device *adev, *flags &= ~AMDGPU_PTE_VALID; } - if (!bo) - return; - - if (bo->flags & (AMDGPU_GEM_CREATE_COHERENT | - AMDGPU_GEM_CREATE_UNCACHED)) - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); - - bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); - coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; - is_system = (bo->tbo.resource->mem_type == TTM_PL_TT) || - (bo->tbo.resource->mem_type == AMDGPU_PL_PREEMPT); - if (bo && bo->flags & AMDGPU_GEM_CREATE_GFX12_DCC) *flags |= AMDGPU_PTE_DCC; - /* WA for HW bug */ - if (is_system || ((bo_adev != adev) && coherent)) - *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_NC); - + if (bo && bo->flags & AMDGPU_GEM_CREATE_UNCACHED) + *flags = AMDGPU_PTE_MTYPE_GFX12(*flags, MTYPE_UC); } static unsigned gmc_v12_0_get_vbios_fb_size(struct amdgpu_device *adev) @@ -581,6 +564,18 @@ static void gmc_v12_0_set_gmc_funcs(struct amdgpu_device *adev) static void gmc_v12_0_set_umc_funcs(struct amdgpu_device *adev) { + switch (amdgpu_ip_version(adev, UMC_HWIP, 0)) { + case IP_VERSION(8, 14, 0): + adev->umc.channel_inst_num = UMC_V8_14_CHANNEL_INSTANCE_NUM; + adev->umc.umc_inst_num = UMC_V8_14_UMC_INSTANCE_NUM(adev); + adev->umc.node_inst_num = 0; + adev->umc.max_ras_err_cnt_per_query = UMC_V8_14_TOTAL_CHANNEL_NUM(adev); + adev->umc.channel_offs = UMC_V8_14_PER_CHANNEL_OFFSET; + adev->umc.ras = &umc_v8_14_ras; + break; + default: + break; + } } @@ -829,6 +824,10 @@ static int gmc_v12_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_vm_manager_init(adev); + r = amdgpu_gmc_ras_sw_init(adev); + if (r) + return r; + return 0; } @@ -968,7 +967,7 @@ static int gmc_v12_0_resume(struct amdgpu_ip_block *ip_block) return 0; } -static bool gmc_v12_0_is_idle(void *handle) +static bool gmc_v12_0_is_idle(struct amdgpu_ip_block *ip_block) { /* MC is always ready in GMC v11.*/ return true; @@ -980,11 +979,11 @@ static int gmc_v12_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v12_0_set_clockgating_state(void *handle, +static int gmc_v12_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; r = adev->mmhub.funcs->set_clockgating(adev, state); if (r) @@ -993,16 +992,16 @@ static int gmc_v12_0_set_clockgating_state(void *handle, return athub_v4_1_0_set_clockgating(adev, state); } -static void gmc_v12_0_get_clockgating_state(void *handle, u64 *flags) +static void gmc_v12_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->get_clockgating(adev, flags); athub_v4_1_0_get_clockgating(adev, flags); } -static int gmc_v12_0_set_powergating_state(void *handle, +static int gmc_v12_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index ca000b3d1afc..a992e79d9581 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -131,7 +131,8 @@ static int gmc_v6_0_init_microcode(struct amdgpu_device *adev) if (((RREG32(mmMC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58) chip_name = "si58"; - err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mc.bin", chip_name); if (err) { dev_err(adev->dev, "si_mc: Failed to load firmware \"%s_mc.bin\"\n", @@ -956,9 +957,9 @@ static int gmc_v6_0_resume(struct amdgpu_ip_block *ip_block) return 0; } -static bool gmc_v6_0_is_idle(void *handle) +static bool gmc_v6_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS); @@ -975,7 +976,7 @@ static int gmc_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (gmc_v6_0_is_idle(adev)) + if (gmc_v6_0_is_idle(ip_block)) return 0; udelay(1); } @@ -1094,13 +1095,13 @@ static int gmc_v6_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int gmc_v6_0_set_clockgating_state(void *handle, +static int gmc_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int gmc_v6_0_set_powergating_state(void *handle, +static int gmc_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index b6016f11956e..83e39f16044a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -157,7 +157,8 @@ static int gmc_v7_0_init_microcode(struct amdgpu_device *adev) return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mc.bin", chip_name); if (err) { pr_err("cik_mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name); amdgpu_ucode_release(&adev->gmc.fw); @@ -1141,9 +1142,9 @@ static int gmc_v7_0_resume(struct amdgpu_ip_block *ip_block) return 0; } -static bool gmc_v7_0_is_idle(void *handle) +static bool gmc_v7_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | @@ -1317,11 +1318,11 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int gmc_v7_0_set_clockgating_state(void *handle, +static int gmc_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) gate = true; @@ -1337,7 +1338,7 @@ static int gmc_v7_0_set_clockgating_state(void *handle, return 0; } -static int gmc_v7_0_set_powergating_state(void *handle, +static int gmc_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 12d5967ecd45..99ca08e9bdb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -259,7 +259,8 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) return -EINVAL; } - err = amdgpu_ucode_request(adev, &adev->gmc.fw, "amdgpu/%s_mc.bin", chip_name); + err = amdgpu_ucode_request(adev, &adev->gmc.fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_mc.bin", chip_name); if (err) { pr_err("mc: Failed to load firmware \"%s_mc.bin\"\n", chip_name); amdgpu_ucode_release(&adev->gmc.fw); @@ -1262,9 +1263,9 @@ static int gmc_v8_0_resume(struct amdgpu_ip_block *ip_block) return 0; } -static bool gmc_v8_0_is_idle(void *handle) +static bool gmc_v8_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS); if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK | @@ -1658,10 +1659,10 @@ static void fiji_update_mc_light_sleep(struct amdgpu_device *adev, } } -static int gmc_v8_0_set_clockgating_state(void *handle, +static int gmc_v8_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1679,15 +1680,15 @@ static int gmc_v8_0_set_clockgating_state(void *handle, return 0; } -static int gmc_v8_0_set_powergating_state(void *handle, +static int gmc_v8_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static void gmc_v8_0_get_clockgating_state(void *handle, u64 *flags) +static void gmc_v8_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 50c5da3020cb..783e0c3b86b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -411,6 +411,11 @@ static const uint32_t ecc_umc_mcumc_ctrl_mask_addrs[] = { (0x001d43e0 + 0x00001800), }; +static inline bool gmc_v9_0_is_multi_chiplet(struct amdgpu_device *adev) +{ + return !!adev->aid_mask; +} + static int gmc_v9_0_ecc_interrupt_state(struct amdgpu_device *adev, struct amdgpu_irq_src *src, unsigned int type, @@ -623,6 +628,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, } } + if (kgd2kfd_vmfault_fast_path(adev, entry, retry_fault)) + return 1; + if (!printk_ratelimit()) return 0; @@ -644,8 +652,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr, entry->client_id, soc15_ih_clientid_name[entry->client_id]); - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + if (gmc_v9_0_is_multi_chiplet(adev)) dev_err(adev->dev, " cookie node_id %d fault from die %s%d%s\n", node_id, node_id % 4 == 3 ? "RSV" : "AID", node_id / 4, node_id % 4 == 1 ? ".XCD0" : node_id % 4 == 2 ? ".XCD1" : ""); @@ -794,8 +801,7 @@ static bool gmc_v9_0_use_invalidate_semaphore(struct amdgpu_device *adev, uint32_t vmhub) { if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + gmc_v9_0_is_multi_chiplet(adev)) return false; return ((vmhub == AMDGPU_MMHUB0(0) || @@ -1138,12 +1144,13 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; struct amdgpu_vm *vm = mapping->bo_va->base.vm; unsigned int mtype_local, mtype; + uint32_t gc_ip_version = amdgpu_ip_version(adev, GC_HWIP, 0); bool snoop = false; bool is_local; dma_resv_assert_held(bo->tbo.base.resv); - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + switch (gc_ip_version) { case IP_VERSION(9, 4, 1): case IP_VERSION(9, 4, 2): if (is_vram) { @@ -1157,10 +1164,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, /* FIXME: is this still needed? Or does * amdgpu_ttm_tt_pde_flags already handle this? */ - if ((amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == - IP_VERSION(9, 4, 3)) && + if (gc_ip_version == IP_VERSION(9, 4, 2) && adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { @@ -1184,6 +1188,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): /* Only local VRAM BOs or system memory on non-NUMA APUs * can be assumed to be local in their entirety. Choose * MTYPE_NC as safe fallback for all system memory BOs on @@ -1208,7 +1213,7 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, if (uncached) { mtype = MTYPE_UC; } else if (ext_coherent) { - if (adev->rev_id) + if (gc_ip_version == IP_VERSION(9, 5, 0) || adev->rev_id) mtype = is_local ? MTYPE_CC : MTYPE_UC; else mtype = MTYPE_UC; @@ -1218,10 +1223,10 @@ static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, /* dGPU */ if (is_local) mtype = mtype_local; - else if (is_vram) - mtype = MTYPE_NC; - else + else if (gc_ip_version < IP_VERSION(9, 5, 0) && !is_vram) mtype = MTYPE_UC; + else + mtype = MTYPE_NC; } break; @@ -1274,8 +1279,8 @@ static void gmc_v9_0_override_vm_pte_flags(struct amdgpu_device *adev, /* Only GFX 9.4.3 APUs associate GPUs with NUMA nodes. Local system * memory can use more efficient MTYPEs. */ - if (amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3) && - amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 4)) + if (!(adev->flags & AMD_IS_APU) || + amdgpu_ip_version(adev, GC_HWIP, 0) != IP_VERSION(9, 4, 3)) return; /* Only direct-mapped memory allows us to determine the NUMA node from @@ -1493,13 +1498,13 @@ static void gmc_v9_0_set_umc_funcs(struct amdgpu_device *adev) adev->umc.channel_idx_tbl = &umc_v6_7_channel_idx_tbl_second[0][0]; break; case IP_VERSION(12, 0, 0): + case IP_VERSION(12, 5, 0): adev->umc.max_ras_err_cnt_per_query = UMC_V12_0_TOTAL_CHANNEL_NUM(adev) * UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; adev->umc.channel_inst_num = UMC_V12_0_CHANNEL_INSTANCE_NUM; adev->umc.umc_inst_num = UMC_V12_0_UMC_INSTANCE_NUM; adev->umc.node_inst_num /= UMC_V12_0_UMC_INSTANCE_NUM; adev->umc.channel_offs = UMC_V12_0_PER_CHANNEL_OFFSET; - adev->umc.active_mask = adev->aid_mask; adev->umc.retire_unit = UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) adev->umc.ras = &umc_v12_0_ras; @@ -1519,6 +1524,7 @@ static void gmc_v9_0_set_mmhub_funcs(struct amdgpu_device *adev) adev->mmhub.funcs = &mmhub_v1_7_funcs; break; case IP_VERSION(1, 8, 0): + case IP_VERSION(1, 8, 1): adev->mmhub.funcs = &mmhub_v1_8_funcs; break; default: @@ -1540,6 +1546,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) adev->mmhub.ras = &mmhub_v1_7_ras; break; case IP_VERSION(1, 8, 0): + case IP_VERSION(1, 8, 1): adev->mmhub.ras = &mmhub_v1_8_ras; break; default: @@ -1550,8 +1557,7 @@ static void gmc_v9_0_set_mmhub_ras_funcs(struct amdgpu_device *adev) static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) { - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + if (gmc_v9_0_is_multi_chiplet(adev)) adev->gfxhub.funcs = &gfxhub_v1_2_funcs; else adev->gfxhub.funcs = &gfxhub_v1_0_funcs; @@ -1588,23 +1594,38 @@ static void gmc_v9_0_set_xgmi_ras_funcs(struct amdgpu_device *adev) static void gmc_v9_0_init_nps_details(struct amdgpu_device *adev) { + enum amdgpu_memory_partition mode; + uint32_t supp_modes; + int i; + adev->gmc.supported_nps_modes = 0; if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU)) return; - /*TODO: Check PSP version also which supports NPS switch. Otherwise keep + mode = gmc_v9_0_get_memory_partition(adev, &supp_modes); + + /* Mode detected by hardware and supported modes available */ + if ((mode != UNKNOWN_MEMORY_PARTITION_MODE) && supp_modes) { + while ((i = ffs(supp_modes))) { + if (AMDGPU_ALL_NPS_MASK & BIT(i)) + adev->gmc.supported_nps_modes |= BIT(i); + supp_modes &= supp_modes - 1; + } + } else { + /*TODO: Check PSP version also which supports NPS switch. Otherwise keep * supported modes as 0. */ - switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { - case IP_VERSION(9, 4, 3): - case IP_VERSION(9, 4, 4): - adev->gmc.supported_nps_modes = - BIT(AMDGPU_NPS1_PARTITION_MODE) | - BIT(AMDGPU_NPS4_PARTITION_MODE); - break; - default: - break; + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + adev->gmc.supported_nps_modes = + BIT(AMDGPU_NPS1_PARTITION_MODE) | + BIT(AMDGPU_NPS4_PARTITION_MODE); + break; + default: + break; + } } } @@ -1618,8 +1639,7 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block) */ if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 0) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + gmc_v9_0_is_multi_chiplet(adev)) adev->gmc.xgmi.supported = true; if (amdgpu_ip_version(adev, XGMI_HWIP, 0) == IP_VERSION(6, 1, 0)) { @@ -1628,8 +1648,7 @@ static int gmc_v9_0_early_init(struct amdgpu_ip_block *ip_block) adev->smuio.funcs->is_host_gpu_xgmi_supported(adev); } - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { + if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3)) { enum amdgpu_pkg_type pkg_type = adev->smuio.funcs->get_pkg_type(adev); /* On GFXIP 9.4.3. APU, there is no physical VRAM domain present @@ -1792,6 +1811,7 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev) case IP_VERSION(9, 4, 2): case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): default: adev->gmc.gart_size = 512ULL << 20; break; @@ -2069,8 +2089,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) spin_lock_init(&adev->gmc.invalidate_lock); - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { + if (gmc_v9_0_is_multi_chiplet(adev)) { gmc_v9_4_3_init_vram_info(adev); } else if (!adev->bios) { if (adev->flags & AMD_IS_APU) { @@ -2154,6 +2173,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), NUM_XCC(adev->gfx.xcc_mask)); @@ -2219,8 +2239,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) amdgpu_gmc_get_vbios_allocations(adev); - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) { + if (gmc_v9_0_is_multi_chiplet(adev)) { r = gmc_v9_0_init_mem_ranges(adev); if (r) return r; @@ -2249,8 +2268,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) adev->vm_manager.first_kfd_vmid = (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 1) || amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) ? + gmc_v9_0_is_multi_chiplet(adev)) ? 3 : 8; @@ -2262,8 +2280,7 @@ static int gmc_v9_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + if (gmc_v9_0_is_multi_chiplet(adev)) amdgpu_gmc_sysfs_init(adev); return 0; @@ -2273,8 +2290,7 @@ static int gmc_v9_0_sw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 3) || - amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)) + if (gmc_v9_0_is_multi_chiplet(adev)) amdgpu_gmc_sysfs_fini(adev); amdgpu_gmc_ras_fini(adev); @@ -2526,7 +2542,7 @@ static int gmc_v9_0_resume(struct amdgpu_ip_block *ip_block) return 0; } -static bool gmc_v9_0_is_idle(void *handle) +static bool gmc_v9_0_is_idle(struct amdgpu_ip_block *ip_block) { /* MC is always ready in GMC v9.*/ return true; @@ -2544,10 +2560,10 @@ static int gmc_v9_0_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int gmc_v9_0_set_clockgating_state(void *handle, +static int gmc_v9_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->set_clockgating(adev, state); @@ -2556,16 +2572,16 @@ static int gmc_v9_0_set_clockgating_state(void *handle, return 0; } -static void gmc_v9_0_get_clockgating_state(void *handle, u64 *flags) +static void gmc_v9_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->mmhub.funcs->get_clockgating(adev, flags); athub_v1_0_get_clockgating(adev, flags); } -static int gmc_v9_0_set_powergating_state(void *handle, +static int gmc_v9_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index 194026e9be33..f1dc13b3ab38 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "hdp_v4_0.h" #include "amdgpu_ras.h" diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c index d3962d469088..43195c079748 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "hdp_v5_0.h" #include "hdp/hdp_5_0_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c index f52552c5fa27..fcb8dd2876bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "hdp_v5_2.h" #include "hdp/hdp_5_2_1_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c index 6948fe9956ce..a88d25a06c29 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "hdp_v6_0.h" #include "hdp/hdp_6_0_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c index 63820329f67e..49f7eb4fbd11 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "hdp_v7_0.h" #include "hdp/hdp_7_0_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 7f45e93c0397..1317ede131b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -335,9 +335,9 @@ static int iceland_ih_resume(struct amdgpu_ip_block *ip_block) return iceland_ih_hw_init(ip_block); } -static bool iceland_ih_is_idle(void *handle) +static bool iceland_ih_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS); if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY)) @@ -392,13 +392,13 @@ static int iceland_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int iceland_ih_set_clockgating_state(void *handle, +static int iceland_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int iceland_ih_set_powergating_state(void *handle, +static int iceland_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c index 38f953fd65d9..eb4185dcbd1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_0.c @@ -652,7 +652,7 @@ static int ih_v6_0_resume(struct amdgpu_ip_block *ip_block) return ih_v6_0_hw_init(ip_block); } -static bool ih_v6_0_is_idle(void *handle) +static bool ih_v6_0_is_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return true; @@ -693,10 +693,10 @@ static void ih_v6_0_update_clockgating_state(struct amdgpu_device *adev, } } -static int ih_v6_0_set_clockgating_state(void *handle, +static int ih_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ih_v6_0_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -756,10 +756,10 @@ static void ih_v6_0_update_ih_mem_power_gating(struct amdgpu_device *adev, WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); } -static int ih_v6_0_set_powergating_state(void *handle, +static int ih_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) @@ -768,9 +768,9 @@ static int ih_v6_0_set_powergating_state(void *handle, return 0; } -static void ih_v6_0_get_clockgating_state(void *handle, u64 *flags) +static void ih_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL)) *flags |= AMD_CG_SUPPORT_IH_CG; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c index 61381e0c3795..068ed849dbad 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v6_1.c @@ -631,7 +631,7 @@ static int ih_v6_1_resume(struct amdgpu_ip_block *ip_block) return ih_v6_1_hw_init(ip_block); } -static bool ih_v6_1_is_idle(void *handle) +static bool ih_v6_1_is_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return true; @@ -674,10 +674,10 @@ static void ih_v6_1_update_clockgating_state(struct amdgpu_device *adev, return; } -static int ih_v6_1_set_clockgating_state(void *handle, +static int ih_v6_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ih_v6_1_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -737,10 +737,10 @@ static void ih_v6_1_update_ih_mem_power_gating(struct amdgpu_device *adev, WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); } -static int ih_v6_1_set_powergating_state(void *handle, +static int ih_v6_1_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) @@ -749,9 +749,9 @@ static int ih_v6_1_set_powergating_state(void *handle, return 0; } -static void ih_v6_1_get_clockgating_state(void *handle, u64 *flags) +static void ih_v6_1_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL)) *flags |= AMD_CG_SUPPORT_IH_CG; diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c index d2428cf5d385..40a3530e0453 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -621,7 +621,7 @@ static int ih_v7_0_resume(struct amdgpu_ip_block *ip_block) return ih_v7_0_hw_init(ip_block); } -static bool ih_v7_0_is_idle(void *handle) +static bool ih_v7_0_is_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return true; @@ -664,10 +664,10 @@ static void ih_v7_0_update_clockgating_state(struct amdgpu_device *adev, return; } -static int ih_v7_0_set_clockgating_state(void *handle, +static int ih_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; ih_v7_0_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -727,10 +727,10 @@ static void ih_v7_0_update_ih_mem_power_gating(struct amdgpu_device *adev, WREG32_SOC15(OSSSYS, 0, regIH_MEM_POWER_CTRL, ih_mem_pwr_cntl); } -static int ih_v7_0_set_powergating_state(void *handle, +static int ih_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_PG_STATE_GATE); if (adev->pg_flags & AMD_PG_SUPPORT_IH_SRAM_PG) @@ -739,9 +739,9 @@ static int ih_v7_0_set_powergating_state(void *handle, return 0; } -static void ih_v7_0_get_clockgating_state(void *handle, u64 *flags) +static void ih_v7_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!RREG32_SOC15(OSSSYS, 0, regIH_CLK_CTRL)) *flags |= AMD_CG_SUPPORT_IH_CG; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index d4f72e47ae9e..cfa91d709d49 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -39,6 +39,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_0_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_1_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_2_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_imu.bin"); static int imu_v11_0_init_microcode(struct amdgpu_device *adev) { @@ -50,7 +51,8 @@ static int imu_v11_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c index 1341f0292031..df898dbb746e 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v12_0.c @@ -47,7 +47,8 @@ static int imu_v12_0_init_microcode(struct amdgpu_device *adev) DRM_DEBUG("\n"); amdgpu_ucode_ip_version_decode(adev, GC_HWIP, ucode_prefix, sizeof(ucode_prefix)); - err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, "amdgpu/%s_imu.bin", ucode_prefix); + err = amdgpu_ucode_request(adev, &adev->gfx.imu_fw, AMDGPU_UCODE_REQUIRED, + "amdgpu/%s_imu.bin", ucode_prefix); if (err) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c index 964c29ef25dc..0027a639c7e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.c @@ -50,26 +50,29 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) isp_base = adev->rmmio_base; - isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL); + isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; - DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); + drm_err(&adev->ddev, + "%s: isp mfd cell alloc failed\n", __func__); goto failure; } - num_res = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES + MAX_ISP410_INT_SRC; + num_res = MAX_ISP410_MEM_RES + MAX_ISP410_INT_SRC; isp->isp_res = kcalloc(num_res, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; - DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + drm_err(&adev->ddev, + "%s: isp mfd res alloc failed\n", __func__); goto failure; } isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL); if (!isp->isp_pdata) { r = -ENOMEM; - DRM_ERROR("%s: isp platform data alloc failed\n", __func__); + drm_err(&adev->ddev, + "%s: isp platform data alloc failed\n", __func__); goto failure; } @@ -88,14 +91,7 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) isp->isp_res[1].start = isp_base + ISP410_PHY0_OFFSET; isp->isp_res[1].end = isp_base + ISP410_PHY0_OFFSET + ISP410_PHY0_SIZE; - isp->isp_res[2].name = "isp_gpio_sensor0_reg"; - isp->isp_res[2].flags = IORESOURCE_MEM; - isp->isp_res[2].start = isp_base + ISP410_GPIO_SENSOR0_OFFSET; - isp->isp_res[2].end = isp_base + ISP410_GPIO_SENSOR0_OFFSET + - ISP410_GPIO_SENSOR0_SIZE; - - for (idx = MAX_ISP410_MEM_RES + MAX_ISP410_SENSOR_RES, int_idx = 0; - idx < num_res; idx++, int_idx++) { + for (idx = MAX_ISP410_MEM_RES, int_idx = 0; idx < num_res; idx++, int_idx++) { isp->isp_res[idx].name = "isp_4_1_0_irq"; isp->isp_res[idx].flags = IORESOURCE_IRQ; isp->isp_res[idx].start = @@ -110,11 +106,12 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) isp->isp_cell[0].platform_data = isp->isp_pdata; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); - isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), - GFP_KERNEL); + /* initialize isp i2c platform data */ + isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_i2c_res) { r = -ENOMEM; - DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + drm_err(&adev->ddev, + "%s: isp mfd res alloc failed\n", __func__); goto failure; } @@ -129,9 +126,31 @@ static int isp_v4_1_0_hw_init(struct amdgpu_isp *isp) isp->isp_cell[1].platform_data = isp->isp_pdata; isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); + /* initialize isp gpiochip platform data */ + isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); + if (!isp->isp_gpio_res) { + r = -ENOMEM; + drm_err(&adev->ddev, + "%s: isp gpio res alloc failed\n", __func__); + goto failure; + } + + isp->isp_gpio_res[0].name = "isp_gpio_reg"; + isp->isp_gpio_res[0].flags = IORESOURCE_MEM; + isp->isp_gpio_res[0].start = isp_base + ISP410_GPIO_SENSOR_OFFSET; + isp->isp_gpio_res[0].end = isp_base + ISP410_GPIO_SENSOR_OFFSET + + ISP410_GPIO_SENSOR_SIZE; + + isp->isp_cell[2].name = "amdisp-pinctrl"; + isp->isp_cell[2].num_resources = 1; + isp->isp_cell[2].resources = &isp->isp_gpio_res[0]; + isp->isp_cell[2].platform_data = isp->isp_pdata; + isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 3); if (r) { - DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); + drm_err(&adev->ddev, + "%s: add mfd hotplug device failed\n", __func__); goto failure; } @@ -143,6 +162,7 @@ failure: kfree(isp->isp_res); kfree(isp->isp_cell); kfree(isp->isp_i2c_res); + kfree(isp->isp_gpio_res); return r; } @@ -155,6 +175,7 @@ static int isp_v4_1_0_hw_fini(struct amdgpu_isp *isp) kfree(isp->isp_cell); kfree(isp->isp_pdata); kfree(isp->isp_i2c_res); + kfree(isp->isp_gpio_res); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h index 7db24c0f1080..4d239198edd0 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_0.h @@ -42,8 +42,8 @@ #define ISP410_I2C0_OFFSET 0x66400 #define ISP410_I2C0_SIZE 0x100 -#define ISP410_GPIO_SENSOR0_OFFSET 0x6613C -#define ISP410_GPIO_SENSOR0_SIZE 0x4 +#define ISP410_GPIO_SENSOR_OFFSET 0x6613C +#define ISP410_GPIO_SENSOR_SIZE 0x54 void isp_v4_1_0_set_isp_funcs(struct amdgpu_isp *isp); diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index b56f27295468..69dd92f6e86d 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -50,27 +50,30 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp_base = adev->rmmio_base; - isp->isp_cell = kcalloc(2, sizeof(struct mfd_cell), GFP_KERNEL); + isp->isp_cell = kcalloc(3, sizeof(struct mfd_cell), GFP_KERNEL); if (!isp->isp_cell) { r = -ENOMEM; - DRM_ERROR("%s: isp mfd cell alloc failed\n", __func__); + drm_err(&adev->ddev, + "%s: isp mfd cell alloc failed\n", __func__); goto failure; } - num_res = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES + MAX_ISP411_INT_SRC; + num_res = MAX_ISP411_MEM_RES + MAX_ISP411_INT_SRC; isp->isp_res = kcalloc(num_res, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_res) { r = -ENOMEM; - DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + drm_err(&adev->ddev, + "%s: isp mfd res alloc failed\n", __func__); goto failure; } isp->isp_pdata = kzalloc(sizeof(*isp->isp_pdata), GFP_KERNEL); if (!isp->isp_pdata) { r = -ENOMEM; - DRM_ERROR("%s: isp platform data alloc failed\n", __func__); + drm_err(&adev->ddev, + "%s: isp platform data alloc failed\n", __func__); goto failure; } @@ -89,14 +92,7 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_res[1].start = isp_base + ISP411_PHY0_OFFSET; isp->isp_res[1].end = isp_base + ISP411_PHY0_OFFSET + ISP411_PHY0_SIZE; - isp->isp_res[2].name = "isp_4_1_1_sensor0_reg"; - isp->isp_res[2].flags = IORESOURCE_MEM; - isp->isp_res[2].start = isp_base + ISP411_GPIO_SENSOR0_OFFSET; - isp->isp_res[2].end = isp_base + ISP411_GPIO_SENSOR0_OFFSET + - ISP411_GPIO_SENSOR0_SIZE; - - for (idx = MAX_ISP411_MEM_RES + MAX_ISP411_SENSOR_RES, int_idx = 0; - idx < num_res; idx++, int_idx++) { + for (idx = MAX_ISP411_MEM_RES, int_idx = 0; idx < num_res; idx++, int_idx++) { isp->isp_res[idx].name = "isp_4_1_1_irq"; isp->isp_res[idx].flags = IORESOURCE_IRQ; isp->isp_res[idx].start = @@ -111,10 +107,12 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_cell[0].platform_data = isp->isp_pdata; isp->isp_cell[0].pdata_size = sizeof(struct isp_platform_data); + /* initialize isp i2c platform data */ isp->isp_i2c_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); if (!isp->isp_i2c_res) { r = -ENOMEM; - DRM_ERROR("%s: isp mfd res alloc failed\n", __func__); + drm_err(&adev->ddev, + "%s: isp mfd res alloc failed\n", __func__); goto failure; } @@ -129,9 +127,31 @@ static int isp_v4_1_1_hw_init(struct amdgpu_isp *isp) isp->isp_cell[1].platform_data = isp->isp_pdata; isp->isp_cell[1].pdata_size = sizeof(struct isp_platform_data); - r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 2); + /* initialize isp gpiochip platform data */ + isp->isp_gpio_res = kcalloc(1, sizeof(struct resource), GFP_KERNEL); + if (!isp->isp_gpio_res) { + r = -ENOMEM; + drm_err(&adev->ddev, + "%s: isp gpio res alloc failed\n", __func__); + goto failure; + } + + isp->isp_gpio_res[0].name = "isp_gpio_reg"; + isp->isp_gpio_res[0].flags = IORESOURCE_MEM; + isp->isp_gpio_res[0].start = isp_base + ISP411_GPIO_SENSOR_OFFSET; + isp->isp_gpio_res[0].end = isp_base + ISP411_GPIO_SENSOR_OFFSET + + ISP411_GPIO_SENSOR_SIZE; + + isp->isp_cell[2].name = "amdisp-pinctrl"; + isp->isp_cell[2].num_resources = 1; + isp->isp_cell[2].resources = &isp->isp_gpio_res[0]; + isp->isp_cell[2].platform_data = isp->isp_pdata; + isp->isp_cell[2].pdata_size = sizeof(struct isp_platform_data); + + r = mfd_add_hotplug_devices(isp->parent, isp->isp_cell, 3); if (r) { - DRM_ERROR("%s: add mfd hotplug device failed\n", __func__); + drm_err(&adev->ddev, + "%s: add mfd hotplug device failed\n", __func__); goto failure; } @@ -143,6 +163,7 @@ failure: kfree(isp->isp_res); kfree(isp->isp_cell); kfree(isp->isp_i2c_res); + kfree(isp->isp_gpio_res); return r; } @@ -155,6 +176,7 @@ static int isp_v4_1_1_hw_fini(struct amdgpu_isp *isp) kfree(isp->isp_cell); kfree(isp->isp_pdata); kfree(isp->isp_i2c_res); + kfree(isp->isp_gpio_res); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h index 40887ddeb08c..fe45d70d87f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.h @@ -33,7 +33,6 @@ #include "ivsrcid/isp/irqsrcs_isp_4_1.h" #define MAX_ISP411_MEM_RES 2 -#define MAX_ISP411_SENSOR_RES 1 #define MAX_ISP411_INT_SRC 8 #define ISP411_PHY0_OFFSET 0x66700 @@ -42,8 +41,8 @@ #define ISP411_I2C0_OFFSET 0x66400 #define ISP411_I2C0_SIZE 0x100 -#define ISP411_GPIO_SENSOR0_OFFSET 0x6613C -#define ISP411_GPIO_SENSOR0_SIZE 0x4 +#define ISP411_GPIO_SENSOR_OFFSET 0x6613C +#define ISP411_GPIO_SENSOR_SIZE 0x54 void isp_v4_1_1_set_isp_funcs(struct amdgpu_isp *isp); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c index 03b8b7cd5229..9e428e669ada 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v1_0.c @@ -604,15 +604,15 @@ static void jpeg_v1_0_set_irq_funcs(struct amdgpu_device *adev) static void jpeg_v1_0_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work); int cnt = 0; - mutex_lock(&adev->vcn.vcn1_jpeg1_workaround); + mutex_lock(&adev->vcn.inst[0].vcn1_jpeg1_workaround); if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_dec)) DRM_ERROR("JPEG dec: vcn dec ring may not be empty\n"); - for (cnt = 0; cnt < adev->vcn.num_enc_rings; cnt++) { + for (cnt = 0; cnt < adev->vcn.inst[0].num_enc_rings; cnt++) { if (amdgpu_fence_wait_empty(&adev->vcn.inst->ring_enc[cnt])) DRM_ERROR("JPEG dec: vcn enc ring[%d] may not be empty\n", cnt); } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index 6e29b69894a5..4cde8a8bcc83 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -33,9 +33,25 @@ #include "vcn/vcn_2_0_0_sh_mask.h" #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h" +static const struct amdgpu_hwip_reg_entry jpeg_reg_list_2_0[] = { + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH), +}; + static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v2_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v2_0_set_powergating_state(void *handle, +static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); /** @@ -98,7 +114,14 @@ static int jpeg_v2_0_sw_init(struct amdgpu_ip_block *ip_block) adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); - return 0; + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_2_0, ARRAY_SIZE(jpeg_reg_list_2_0)); + if (r) + return r; + + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + + return r; } /** @@ -117,6 +140,8 @@ static int jpeg_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; + amdgpu_jpeg_sysfs_reset_mask_fini(adev); + r = amdgpu_jpeg_sw_fini(adev); return r; @@ -154,7 +179,7 @@ static int jpeg_v2_0_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) - jpeg_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v2_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -655,9 +680,9 @@ void jpeg_v2_0_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) } } -static bool jpeg_v2_0_is_idle(void *handle) +static bool jpeg_v2_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return ((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) & UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == @@ -675,14 +700,14 @@ static int jpeg_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int jpeg_v2_0_set_clockgating_state(void *handle, +static int jpeg_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); if (enable) { - if (!jpeg_v2_0_is_idle(handle)) + if (!jpeg_v2_0_is_idle(ip_block)) return -EBUSY; jpeg_v2_0_enable_clock_gating(adev); } else { @@ -692,10 +717,10 @@ static int jpeg_v2_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v2_0_set_powergating_state(void *handle, +static int jpeg_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->jpeg.cur_state) @@ -739,6 +764,13 @@ static int jpeg_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v2_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + jpeg_v2_0_stop(ring->adev); + jpeg_v2_0_start(ring->adev); + return amdgpu_ring_test_helper(ring); +} + static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { .name = "jpeg_v2_0", .early_init = jpeg_v2_0_early_init, @@ -752,6 +784,8 @@ static const struct amd_ip_funcs jpeg_v2_0_ip_funcs = { .wait_for_idle = jpeg_v2_0_wait_for_idle, .set_clockgating_state = jpeg_v2_0_set_clockgating_state, .set_powergating_state = jpeg_v2_0_set_powergating_state, + .dump_ip_state = amdgpu_jpeg_dump_ip_state, + .print_ip_state = amdgpu_jpeg_print_ip_state, }; static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { @@ -782,6 +816,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v2_0_ring_reset, }; static void jpeg_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index 9ac421486f05..8b39e114f3be 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -36,9 +36,25 @@ #define JPEG25_MAX_HW_INSTANCES_ARCTURUS 2 +static const struct amdgpu_hwip_reg_entry jpeg_reg_list_2_5[] = { + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH), +}; + static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v2_5_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v2_5_set_powergating_state(void *handle, +static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev); @@ -147,7 +163,14 @@ static int jpeg_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - return 0; + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_2_5, ARRAY_SIZE(jpeg_reg_list_2_5)); + if (r) + return r; + + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + + return r; } /** @@ -166,6 +189,8 @@ static int jpeg_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; + amdgpu_jpeg_sysfs_reset_mask_fini(adev); + r = amdgpu_jpeg_sw_fini(adev); return r; @@ -219,7 +244,7 @@ static int jpeg_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, i, mmUVD_JRBC_STATUS)) - jpeg_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v2_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE); if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) amdgpu_irq_put(adev, &adev->jpeg.inst[i].ras_poison_irq, 0); @@ -310,6 +335,44 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst) WREG32_SOC15(JPEG, inst, mmJPEG_CGC_GATE, data); } +static void jpeg_v2_5_start_inst(struct amdgpu_device *adev, int i) +{ + struct amdgpu_ring *ring = adev->jpeg.inst[i].ring_dec; + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + jpeg_v2_5_disable_clock_gating(adev, i); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC_MASK, + ~JPEG_SYS_INT_EN__DJRBC_MASK); + + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); + ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR); +} + /** * jpeg_v2_5_start - start JPEG block * @@ -319,52 +382,33 @@ static void jpeg_v2_5_enable_clock_gating(struct amdgpu_device *adev, int inst) */ static int jpeg_v2_5_start(struct amdgpu_device *adev) { - struct amdgpu_ring *ring; int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; + jpeg_v2_5_start_inst(adev, i); - ring = adev->jpeg.inst[i].ring_dec; - /* disable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), 0, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - - /* JPEG disable CGC */ - jpeg_v2_5_disable_clock_gating(adev, i); - - /* MJPEG global tiling registers */ - WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(JPEG, i, mmJPEG_DEC_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC_MASK, - ~JPEG_SYS_INT_EN__DJRBC_MASK); - - WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); - WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, i, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_RPTR, 0); - WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_CNTL, 0x00000002L); - WREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_SIZE, ring->ring_size / 4); - ring->wptr = RREG32_SOC15(JPEG, i, mmUVD_JRBC_RB_WPTR); } return 0; } +static void jpeg_v2_5_stop_inst(struct amdgpu_device *adev, int i) +{ + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v2_5_enable_clock_gating(adev, i); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); +} + /** * jpeg_v2_5_stop - stop JPEG block * @@ -379,18 +423,7 @@ static int jpeg_v2_5_stop(struct amdgpu_device *adev) for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (adev->jpeg.harvest_config & (1 << i)) continue; - - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - jpeg_v2_5_enable_clock_gating(adev, i); - - /* enable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, i, mmUVD_JPEG_POWER_STATUS), - UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + jpeg_v2_5_stop_inst(adev, i); } return 0; @@ -482,9 +515,9 @@ static void jpeg_v2_6_dec_ring_insert_end(struct amdgpu_ring *ring) amdgpu_ring_write(ring, (1 << (ring->me * 2 + 14))); } -static bool jpeg_v2_5_is_idle(void *handle) +static bool jpeg_v2_5_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 1; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { @@ -518,10 +551,10 @@ static int jpeg_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int jpeg_v2_5_set_clockgating_state(void *handle, +static int jpeg_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); int i; @@ -530,7 +563,7 @@ static int jpeg_v2_5_set_clockgating_state(void *handle, continue; if (enable) { - if (!jpeg_v2_5_is_idle(handle)) + if (!jpeg_v2_5_is_idle(ip_block)) return -EBUSY; jpeg_v2_5_enable_clock_gating(adev, i); } else { @@ -541,10 +574,10 @@ static int jpeg_v2_5_set_clockgating_state(void *handle, return 0; } -static int jpeg_v2_5_set_powergating_state(void *handle, +static int jpeg_v2_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->jpeg.cur_state) @@ -610,6 +643,13 @@ static int jpeg_v2_5_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v2_5_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + jpeg_v2_5_stop_inst(ring->adev, ring->me); + jpeg_v2_5_start_inst(ring->adev, ring->me); + return amdgpu_ring_test_helper(ring); +} + static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { .name = "jpeg_v2_5", .early_init = jpeg_v2_5_early_init, @@ -623,6 +663,8 @@ static const struct amd_ip_funcs jpeg_v2_5_ip_funcs = { .wait_for_idle = jpeg_v2_5_wait_for_idle, .set_clockgating_state = jpeg_v2_5_set_clockgating_state, .set_powergating_state = jpeg_v2_5_set_powergating_state, + .dump_ip_state = amdgpu_jpeg_dump_ip_state, + .print_ip_state = amdgpu_jpeg_print_ip_state, }; static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { @@ -638,6 +680,8 @@ static const struct amd_ip_funcs jpeg_v2_6_ip_funcs = { .wait_for_idle = jpeg_v2_5_wait_for_idle, .set_clockgating_state = jpeg_v2_5_set_clockgating_state, .set_powergating_state = jpeg_v2_5_set_powergating_state, + .dump_ip_state = amdgpu_jpeg_dump_ip_state, + .print_ip_state = amdgpu_jpeg_print_ip_state, }; static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { @@ -668,6 +712,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_5_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v2_5_ring_reset, }; static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { @@ -698,6 +743,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_6_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v2_5_ring_reset, }; static void jpeg_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c index e0df6800502c..2f8510c2986b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v3_0.c @@ -34,9 +34,25 @@ #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +static const struct amdgpu_hwip_reg_entry jpeg_reg_list_3_0[] = { + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_POWER_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_INT_STAT), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_CNTL), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_RB_SIZE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_ADDR_MODE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_GFX10_ADDR_CONFIG), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_Y_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmJPEG_DEC_UV_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, mmUVD_JPEG_UV_PITCH), +}; + static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v3_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v3_0_set_powergating_state(void *handle, +static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); /** @@ -112,7 +128,14 @@ static int jpeg_v3_0_sw_init(struct amdgpu_ip_block *ip_block) adev->jpeg.internal.jpeg_pitch[0] = mmUVD_JPEG_PITCH_INTERNAL_OFFSET; adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, mmUVD_JPEG_PITCH); - return 0; + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_3_0, ARRAY_SIZE(jpeg_reg_list_3_0)); + if (r) + return r; + + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + + return r; } /** @@ -131,6 +154,8 @@ static int jpeg_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; + amdgpu_jpeg_sysfs_reset_mask_fini(adev); + r = amdgpu_jpeg_sw_fini(adev); return r; @@ -168,7 +193,7 @@ static int jpeg_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS)) - jpeg_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v3_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -445,9 +470,9 @@ static void jpeg_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring) } } -static bool jpeg_v3_0_is_idle(void *handle) +static bool jpeg_v3_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 1; ret &= (((RREG32_SOC15(JPEG, 0, mmUVD_JRBC_STATUS) & @@ -466,14 +491,14 @@ static int jpeg_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } -static int jpeg_v3_0_set_clockgating_state(void *handle, +static int jpeg_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; if (enable) { - if (!jpeg_v3_0_is_idle(handle)) + if (!jpeg_v3_0_is_idle(ip_block)) return -EBUSY; jpeg_v3_0_enable_clock_gating(adev); } else { @@ -483,10 +508,10 @@ static int jpeg_v3_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v3_0_set_powergating_state(void *handle, +static int jpeg_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if(state == adev->jpeg.cur_state) @@ -530,6 +555,13 @@ static int jpeg_v3_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v3_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + jpeg_v3_0_stop(ring->adev); + jpeg_v3_0_start(ring->adev); + return amdgpu_ring_test_helper(ring); +} + static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { .name = "jpeg_v3_0", .early_init = jpeg_v3_0_early_init, @@ -543,6 +575,8 @@ static const struct amd_ip_funcs jpeg_v3_0_ip_funcs = { .wait_for_idle = jpeg_v3_0_wait_for_idle, .set_clockgating_state = jpeg_v3_0_set_clockgating_state, .set_powergating_state = jpeg_v3_0_set_powergating_state, + .dump_ip_state = amdgpu_jpeg_dump_ip_state, + .print_ip_state = amdgpu_jpeg_print_ip_state, }; static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { @@ -573,6 +607,7 @@ static const struct amdgpu_ring_funcs jpeg_v3_0_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v3_0_ring_reset, }; static void jpeg_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index eca1963c33b6..f17ec5414fd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -36,13 +36,28 @@ #define regUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f +static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0[] = { + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH), +}; + static int jpeg_v4_0_start_sriov(struct amdgpu_device *adev); static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v4_0_set_powergating_state(void *handle, +static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev); - static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring); /** @@ -123,14 +138,15 @@ static int jpeg_v4_0_sw_init(struct amdgpu_ip_block *ip_block) r = amdgpu_jpeg_ras_sw_init(adev); if (r) return r; - /* TODO: Add queue reset mask when FW fully supports it */ - adev->jpeg.supported_reset = - amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); - r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0, ARRAY_SIZE(jpeg_reg_list_4_0)); if (r) return r; - return 0; + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + + return r; } /** @@ -206,7 +222,7 @@ static int jpeg_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) { if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) - jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v4_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__JPEG)) amdgpu_irq_put(adev, &adev->jpeg.inst->ras_poison_irq, 0); @@ -614,9 +630,9 @@ static void jpeg_v4_0_dec_ring_set_wptr(struct amdgpu_ring *ring) } } -static bool jpeg_v4_0_is_idle(void *handle) +static bool jpeg_v4_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 1; ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) & @@ -635,14 +651,14 @@ static int jpeg_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block) UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } -static int jpeg_v4_0_set_clockgating_state(void *handle, +static int jpeg_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; if (enable) { - if (!jpeg_v4_0_is_idle(handle)) + if (!jpeg_v4_0_is_idle(ip_block)) return -EBUSY; jpeg_v4_0_enable_clock_gating(adev); } else { @@ -652,10 +668,10 @@ static int jpeg_v4_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v4_0_set_powergating_state(void *handle, +static int jpeg_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (amdgpu_sriov_vf(adev)) { @@ -704,6 +720,16 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev, return 0; } +static int jpeg_v4_0_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + if (amdgpu_sriov_vf(ring->adev)) + return -EINVAL; + + jpeg_v4_0_stop(ring->adev); + jpeg_v4_0_start(ring->adev); + return amdgpu_ring_test_helper(ring); +} + static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { .name = "jpeg_v4_0", .early_init = jpeg_v4_0_early_init, @@ -717,6 +743,8 @@ static const struct amd_ip_funcs jpeg_v4_0_ip_funcs = { .wait_for_idle = jpeg_v4_0_wait_for_idle, .set_clockgating_state = jpeg_v4_0_set_clockgating_state, .set_powergating_state = jpeg_v4_0_set_powergating_state, + .dump_ip_state = amdgpu_jpeg_dump_ip_state, + .print_ip_state = amdgpu_jpeg_print_ip_state, }; static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { @@ -747,6 +775,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_dec_ring_vm_funcs = { .emit_wreg = jpeg_v2_0_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v4_0_ring_reset, }; static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c index 67b51bcbacd1..5598a35f72af 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c @@ -43,7 +43,7 @@ enum jpeg_engin_status { static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v4_0_3_set_powergating_state(void *handle, +static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); static void jpeg_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring); @@ -59,10 +59,53 @@ static int amdgpu_ih_srcid_jpeg[] = { VCN_4_0__SRCID__JPEG7_DECODE }; +static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_3[] = { + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_SYS_INT_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS), +}; + static inline bool jpeg_v4_0_3_normalizn_reqd(struct amdgpu_device *adev) { - return amdgpu_sriov_vf(adev) || - (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4)); + return (adev->jpeg.caps & AMDGPU_JPEG_CAPS(RRMT_ENABLED)) == 0; +} + +static inline int jpeg_v4_0_3_core_reg_offset(u32 pipe) +{ + if (pipe) + return ((0x40 * pipe) - 0xc80); + else + return 0; } /** @@ -76,7 +119,7 @@ static int jpeg_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS_4_0_3; jpeg_v4_0_3_set_dec_ring_funcs(adev); jpeg_v4_0_3_set_irq_funcs(adev); @@ -144,10 +187,8 @@ static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) adev->jpeg.internal.jpeg_pitch[j] = regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; adev->jpeg.inst[i].external.jpeg_pitch[j] = - SOC15_REG_OFFSET1( - JPEG, jpeg_inst, - regUVD_JRBC0_UVD_JRBC_SCRATCH0, - (j ? (0x40 * j - 0xc80) : 0)); + SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_SCRATCH0, + jpeg_v4_0_3_core_reg_offset(j)); } } @@ -159,13 +200,17 @@ static int jpeg_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) } } - /* TODO: Add queue reset mask when FW fully supports it */ - adev->jpeg.supported_reset = - amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); - r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_3, ARRAY_SIZE(jpeg_reg_list_4_0_3)); if (r) return r; + if (!amdgpu_sriov_vf(adev)) { + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + if (r) + return r; + } + return 0; } @@ -185,7 +230,9 @@ static int jpeg_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) if (r) return r; - amdgpu_jpeg_sysfs_reset_mask_fini(adev); + if (!amdgpu_sriov_vf(adev)) + amdgpu_jpeg_sysfs_reset_mask_fini(adev); + r = amdgpu_jpeg_sw_fini(adev); return r; @@ -321,7 +368,7 @@ static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { ring = &adev->jpeg.inst[i].ring_dec[j]; ring->wptr = 0; @@ -331,6 +378,11 @@ static int jpeg_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) } } } else { + /* This flag is not set for VF, assumed to be disabled always */ + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & + 0x100) + adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED); + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { jpeg_inst = GET_INST(JPEG, i); @@ -379,7 +431,7 @@ static int jpeg_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) { if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) - ret = jpeg_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + ret = jpeg_v4_0_3_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } return ret; @@ -475,6 +527,75 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CGC_GATE, data); } +static void jpeg_v4_0_3_start_inst(struct amdgpu_device *adev, int inst) +{ + int jpeg_inst = GET_INST(JPEG, inst); + + WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, + 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); + SOC15_WAIT_ON_RREG(JPEG, jpeg_inst, regUVD_PGFSM_STATUS, + UVD_PGFSM_STATUS__UVDJ_PWR_ON << + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, + UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), + 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* JPEG disable CGC */ + jpeg_v4_0_3_disable_clock_gating(adev, inst); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); +} + +static void jpeg_v4_0_3_start_jrbc(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + int jpeg_inst = GET_INST(JPEG, ring->me); + int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe); + + /* enable System Interrupt for JRBC */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN), + JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe, + ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe)); + + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_CNTL, + reg_offset, + (0x00000001L | 0x00000002L)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, + reg_offset, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + reg_offset, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_RPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_WPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_CNTL, + reg_offset, 0x00000002L); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC0_UVD_JRBC_RB_SIZE, + reg_offset, ring->ring_size / 4); + ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR, + reg_offset); +} + /** * jpeg_v4_0_3_start - start JPEG block * @@ -485,84 +606,36 @@ static void jpeg_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst static int jpeg_v4_0_3_start(struct amdgpu_device *adev) { struct amdgpu_ring *ring; - int i, j, jpeg_inst; + int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - jpeg_inst = GET_INST(JPEG, i); - - WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, - 1 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG( - JPEG, jpeg_inst, regUVD_PGFSM_STATUS, - UVD_PGFSM_STATUS__UVDJ_PWR_ON - << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); - - /* disable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, - regUVD_JPEG_POWER_STATUS), - 0, ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - - /* JPEG disable CGC */ - jpeg_v4_0_3_disable_clock_gating(adev, i); - - /* MJPEG global tiling registers */ - WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(JPEG, jpeg_inst, regJPEG_DEC_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* enable JMI channel */ - WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - + jpeg_v4_0_3_start_inst(adev, i); for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { - unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); - ring = &adev->jpeg.inst[i].ring_dec[j]; - - /* enable System Interrupt for JRBC */ - WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, - regJPEG_SYS_INT_EN), - JPEG_SYS_INT_EN__DJRBC0_MASK << j, - ~(JPEG_SYS_INT_EN__DJRBC0_MASK << j)); - - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JMI0_UVD_LMI_JRBC_RB_VMID, - reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JRBC0_UVD_JRBC_RB_CNTL, - reg_offset, - (0x00000001L | 0x00000002L)); - WREG32_SOC15_OFFSET( - JPEG, jpeg_inst, - regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_LOW, - reg_offset, lower_32_bits(ring->gpu_addr)); - WREG32_SOC15_OFFSET( - JPEG, jpeg_inst, - regUVD_JMI0_UVD_LMI_JRBC_RB_64BIT_BAR_HIGH, - reg_offset, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JRBC0_UVD_JRBC_RB_RPTR, - reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JRBC0_UVD_JRBC_RB_WPTR, - reg_offset, 0); - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JRBC0_UVD_JRBC_RB_CNTL, - reg_offset, 0x00000002L); - WREG32_SOC15_OFFSET(JPEG, jpeg_inst, - regUVD_JRBC0_UVD_JRBC_RB_SIZE, - reg_offset, ring->ring_size / 4); - ring->wptr = RREG32_SOC15_OFFSET( - JPEG, jpeg_inst, regUVD_JRBC0_UVD_JRBC_RB_WPTR, - reg_offset); + jpeg_v4_0_3_start_jrbc(ring); } } return 0; } +static void jpeg_v4_0_3_stop_inst(struct amdgpu_device *adev, int inst) +{ + int jpeg_inst = GET_INST(JPEG, inst); + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + jpeg_v4_0_3_enable_clock_gating(adev, inst); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + +} + /** * jpeg_v4_0_3_stop - stop JPEG block * @@ -572,31 +645,10 @@ static int jpeg_v4_0_3_start(struct amdgpu_device *adev) */ static int jpeg_v4_0_3_stop(struct amdgpu_device *adev) { - int i, jpeg_inst; + int i; - for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { - jpeg_inst = GET_INST(JPEG, i); - /* reset JMI */ - WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), - UVD_JMI_CNTL__SOFT_RESET_MASK, - ~UVD_JMI_CNTL__SOFT_RESET_MASK); - - jpeg_v4_0_3_enable_clock_gating(adev, i); - - /* enable anti hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, - regUVD_JPEG_POWER_STATUS), - UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, - ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); - - WREG32_SOC15(JPEG, jpeg_inst, regUVD_PGFSM_CONFIG, - 2 << UVD_PGFSM_CONFIG__UVDJ_PWR_CONFIG__SHIFT); - SOC15_WAIT_ON_RREG( - JPEG, jpeg_inst, regUVD_PGFSM_STATUS, - UVD_PGFSM_STATUS__UVDJ_PWR_OFF - << UVD_PGFSM_STATUS__UVDJ_PWR_STATUS__SHIFT, - UVD_PGFSM_STATUS__UVDJ_PWR_STATUS_MASK); - } + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) + jpeg_v4_0_3_stop_inst(adev, i); return 0; } @@ -612,9 +664,8 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_rptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - return RREG32_SOC15_OFFSET( - JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR, - ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_RPTR, + jpeg_v4_0_3_core_reg_offset(ring->pipe)); } /** @@ -630,14 +681,12 @@ static uint64_t jpeg_v4_0_3_dec_ring_get_wptr(struct amdgpu_ring *ring) if (ring->use_doorbell) return adev->wb.wb[ring->wptr_offs]; - else - return RREG32_SOC15_OFFSET( - JPEG, GET_INST(JPEG, ring->me), - regUVD_JRBC0_UVD_JRBC_RB_WPTR, - ring->pipe ? (0x40 * ring->pipe - 0xc80) : 0); + + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR, + jpeg_v4_0_3_core_reg_offset(ring->pipe)); } -static void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) { /* JPEG engine access for HDP flush doesn't work when RRMT is enabled. * This is a workaround to avoid any HDP flush through JPEG ring. @@ -659,10 +708,8 @@ static void jpeg_v4_0_3_dec_ring_set_wptr(struct amdgpu_ring *ring) adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } else { - WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), - regUVD_JRBC0_UVD_JRBC_RB_WPTR, - (ring->pipe ? (0x40 * ring->pipe - 0xc80) : - 0), + WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC0_UVD_JRBC_RB_WPTR, + jpeg_v4_0_3_core_reg_offset(ring->pipe), lower_32_bits(ring->wptr)); } } @@ -907,21 +954,17 @@ void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count) } } -static bool jpeg_v4_0_3_is_idle(void *handle) +static bool jpeg_v4_0_3_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool ret = false; int i, j; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { - unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); - - ret &= ((RREG32_SOC15_OFFSET( - JPEG, GET_INST(JPEG, i), - regUVD_JRBC0_UVD_JRBC_STATUS, - reg_offset) & - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i), + regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j)) & + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } } @@ -937,28 +980,25 @@ static int jpeg_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block) for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { - unsigned int reg_offset = (j?(0x40 * j - 0xc80):0); - - ret &= SOC15_WAIT_ON_RREG_OFFSET( - JPEG, GET_INST(JPEG, i), - regUVD_JRBC0_UVD_JRBC_STATUS, reg_offset, + ret &= (SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i), + regUVD_JRBC0_UVD_JRBC_STATUS, jpeg_v4_0_3_core_reg_offset(j), UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK, - UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + UVD_JRBC0_UVD_JRBC_STATUS__RB_JOB_DONE_MASK)); } } return ret; } -static int jpeg_v4_0_3_set_clockgating_state(void *handle, +static int jpeg_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { if (enable) { - if (!jpeg_v4_0_3_is_idle(handle)) + if (!jpeg_v4_0_3_is_idle(ip_block)) return -EBUSY; jpeg_v4_0_3_enable_clock_gating(adev, i); } else { @@ -968,10 +1008,10 @@ static int jpeg_v4_0_3_set_clockgating_state(void *handle, return 0; } -static int jpeg_v4_0_3_set_powergating_state(void *handle, +static int jpeg_v4_0_3_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (amdgpu_sriov_vf(adev)) { @@ -1055,6 +1095,41 @@ static int jpeg_v4_0_3_process_interrupt(struct amdgpu_device *adev, return 0; } +static void jpeg_v4_0_3_core_stall_reset(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + int jpeg_inst = GET_INST(JPEG, ring->me); + int reg_offset = jpeg_v4_0_3_core_reg_offset(ring->pipe); + + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_JMI_CLIENT_STALL, + reg_offset, 0x1F); + SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS, + reg_offset, 0x1F, 0x1F); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_JPEG_LMI_DROP, + reg_offset, 0x1F); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_JMI_CLIENT_STALL, + reg_offset, 0x00); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_JPEG_LMI_DROP, + reg_offset, 0x00); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00); +} + +static int jpeg_v4_0_3_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + if (amdgpu_sriov_vf(ring->adev)) + return -EOPNOTSUPP; + + jpeg_v4_0_3_core_stall_reset(ring); + jpeg_v4_0_3_start_jrbc(ring); + return amdgpu_ring_test_helper(ring); +} + static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { .name = "jpeg_v4_0_3", .early_init = jpeg_v4_0_3_early_init, @@ -1068,6 +1143,8 @@ static const struct amd_ip_funcs jpeg_v4_0_3_ip_funcs = { .wait_for_idle = jpeg_v4_0_3_wait_for_idle, .set_clockgating_state = jpeg_v4_0_3_set_clockgating_state, .set_powergating_state = jpeg_v4_0_3_set_powergating_state, + .dump_ip_state = amdgpu_jpeg_dump_ip_state, + .print_ip_state = amdgpu_jpeg_print_ip_state, }; static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { @@ -1099,6 +1176,7 @@ static const struct amdgpu_ring_funcs jpeg_v4_0_3_dec_ring_vm_funcs = { .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v4_0_3_ring_reset, }; static void jpeg_v4_0_3_set_dec_ring_funcs(struct amdgpu_device *adev) @@ -1231,9 +1309,97 @@ static const struct amdgpu_ras_block_hw_ops jpeg_v4_0_3_ras_hw_ops = { .reset_ras_error_count = jpeg_v4_0_3_reset_ras_error_count, }; +static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* reference to smu driver if header file */ +static int jpeg_v4_0_3_err_codes[] = { + 16, 17, 18, 19, 20, 21, 22, 23, /* JPEG[0-7][S|D] */ + 24, 25, 26, 27, 28, 29, 30, 31 +}; + +static bool jpeg_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + jpeg_v4_0_3_err_codes, + ARRAY_SIZE(jpeg_v4_0_3_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops jpeg_v4_0_3_aca_bank_ops = { + .aca_bank_parser = jpeg_v4_0_3_aca_bank_parser, + .aca_bank_is_valid = jpeg_v4_0_3_aca_bank_is_valid, +}; + +static const struct aca_info jpeg_v4_0_3_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &jpeg_v4_0_3_aca_bank_ops, +}; + +static int jpeg_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__JPEG, + &jpeg_v4_0_3_aca_info, NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + static struct amdgpu_jpeg_ras jpeg_v4_0_3_ras = { .ras_block = { .hw_ops = &jpeg_v4_0_3_ras_hw_ops, + .ras_late_init = jpeg_v4_0_3_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h index 747a3e5f6856..a90bf370a002 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.h @@ -56,6 +56,7 @@ void jpeg_v4_0_3_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq unsigned int flags); void jpeg_v4_0_3_dec_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid, uint64_t pd_addr); +void jpeg_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_nop(struct amdgpu_ring *ring, uint32_t count); void jpeg_v4_0_3_dec_ring_insert_start(struct amdgpu_ring *ring); void jpeg_v4_0_3_dec_ring_insert_end(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c index 1d9e3b101c3a..974030a5c03c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_5.c @@ -46,11 +46,26 @@ #define regJPEG_CGC_GATE_INTERNAL_OFFSET 0x4160 #define regUVD_NO_OP_INTERNAL_OFFSET 0x0029 +static const struct amdgpu_hwip_reg_entry jpeg_reg_list_4_0_5[] = { + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH), +}; + static void jpeg_v4_0_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v4_0_5_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v4_0_5_set_powergating_state(void *handle, +static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); - static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring); static int amdgpu_ih_clientid_jpeg[] = { @@ -58,6 +73,8 @@ static int amdgpu_ih_clientid_jpeg[] = { SOC15_IH_CLIENTID_VCN1 }; + + /** * jpeg_v4_0_5_early_init - set function pointers * @@ -153,6 +170,10 @@ static int jpeg_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) adev->jpeg.inst[i].external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, i, regUVD_JPEG_PITCH); } + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_4_0_5, ARRAY_SIZE(jpeg_reg_list_4_0_5)); + if (r) + return r; + /* TODO: Add queue reset mask when FW fully supports it */ adev->jpeg.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); @@ -236,7 +257,7 @@ static int jpeg_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) { if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, i, regUVD_JRBC_STATUS)) - jpeg_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v4_0_5_set_powergating_state(ip_block, AMD_PG_STATE_GATE); } } return 0; @@ -627,9 +648,9 @@ static void jpeg_v4_0_5_dec_ring_set_wptr(struct amdgpu_ring *ring) } } -static bool jpeg_v4_0_5_is_idle(void *handle) +static bool jpeg_v4_0_5_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 1; for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { @@ -660,10 +681,10 @@ static int jpeg_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block) return 0; } -static int jpeg_v4_0_5_set_clockgating_state(void *handle, +static int jpeg_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; int i; @@ -672,7 +693,7 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle, continue; if (enable) { - if (!jpeg_v4_0_5_is_idle(handle)) + if (!jpeg_v4_0_5_is_idle(ip_block)) return -EBUSY; jpeg_v4_0_5_enable_clock_gating(adev, i); @@ -684,10 +705,10 @@ static int jpeg_v4_0_5_set_clockgating_state(void *handle, return 0; } -static int jpeg_v4_0_5_set_powergating_state(void *handle, +static int jpeg_v4_0_5_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (amdgpu_sriov_vf(adev)) { @@ -759,6 +780,8 @@ static const struct amd_ip_funcs jpeg_v4_0_5_ip_funcs = { .wait_for_idle = jpeg_v4_0_5_wait_for_idle, .set_clockgating_state = jpeg_v4_0_5_set_clockgating_state, .set_powergating_state = jpeg_v4_0_5_set_powergating_state, + .dump_ip_state = amdgpu_jpeg_dump_ip_state, + .print_ip_state = amdgpu_jpeg_print_ip_state, }; static const struct amdgpu_ring_funcs jpeg_v4_0_5_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c index 58fb1e5fa89c..31d213ccbe0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_0.c @@ -31,12 +31,28 @@ #include "vcn/vcn_5_0_0_offset.h" #include "vcn/vcn_5_0_0_sh_mask.h" -#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" #include "jpeg_v5_0_0.h" +static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0[] = { + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_CNTL), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_RB_SIZE), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH), +}; + static void jpeg_v5_0_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void jpeg_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); -static int jpeg_v5_0_0_set_powergating_state(void *handle, +static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state); /** @@ -74,7 +90,7 @@ static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) /* JPEG TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, - VCN_4_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); + VCN_5_0__SRCID__JPEG_DECODE, &adev->jpeg.inst->irq); if (r) return r; @@ -100,6 +116,10 @@ static int jpeg_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) adev->jpeg.internal.jpeg_pitch[0] = regUVD_JPEG_PITCH_INTERNAL_OFFSET; adev->jpeg.inst->external.jpeg_pitch[0] = SOC15_REG_OFFSET(JPEG, 0, regUVD_JPEG_PITCH); + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0, ARRAY_SIZE(jpeg_reg_list_5_0)); + if (r) + return r; + /* TODO: Add queue reset mask when FW fully supports it */ adev->jpeg.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->jpeg.inst[0].ring_dec[0]); @@ -172,7 +192,7 @@ static int jpeg_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block) if (adev->jpeg.cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS)) - jpeg_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + jpeg_v5_0_0_set_powergating_state(ip_block, AMD_PG_STATE_GATE); return 0; } @@ -539,9 +559,9 @@ static void jpeg_v5_0_0_dec_ring_set_wptr(struct amdgpu_ring *ring) } } -static bool jpeg_v5_0_0_is_idle(void *handle) +static bool jpeg_v5_0_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 1; ret &= (((RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS) & @@ -560,14 +580,14 @@ static int jpeg_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block) UVD_JRBC_STATUS__RB_JOB_DONE_MASK); } -static int jpeg_v5_0_0_set_clockgating_state(void *handle, +static int jpeg_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; if (enable) { - if (!jpeg_v5_0_0_is_idle(handle)) + if (!jpeg_v5_0_0_is_idle(ip_block)) return -EBUSY; jpeg_v5_0_0_enable_clock_gating(adev); } else { @@ -577,10 +597,10 @@ static int jpeg_v5_0_0_set_clockgating_state(void *handle, return 0; } -static int jpeg_v5_0_0_set_powergating_state(void *handle, +static int jpeg_v5_0_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret; if (state == adev->jpeg.cur_state) @@ -612,7 +632,7 @@ static int jpeg_v5_0_0_process_interrupt(struct amdgpu_device *adev, DRM_DEBUG("IH: JPEG TRAP\n"); switch (entry->src_id) { - case VCN_4_0__SRCID__JPEG_DECODE: + case VCN_5_0__SRCID__JPEG_DECODE: amdgpu_fence_process(adev->jpeg.inst->ring_dec); break; default: @@ -637,6 +657,8 @@ static const struct amd_ip_funcs jpeg_v5_0_0_ip_funcs = { .wait_for_idle = jpeg_v5_0_0_wait_for_idle, .set_clockgating_state = jpeg_v5_0_0_set_clockgating_state, .set_powergating_state = jpeg_v5_0_0_set_powergating_state, + .dump_ip_state = amdgpu_jpeg_dump_ip_state, + .print_ip_state = amdgpu_jpeg_print_ip_state, }; static const struct amdgpu_ring_funcs jpeg_v5_0_0_dec_ring_vm_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c new file mode 100644 index 000000000000..218e16b68f1d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.c @@ -0,0 +1,791 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2014-2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu.h" +#include "amdgpu_jpeg.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "jpeg_v4_0_3.h" +#include "jpeg_v5_0_1.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" + +static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev); +static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); +static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state); +static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring); + +static int amdgpu_ih_srcid_jpeg[] = { + VCN_5_0__SRCID__JPEG_DECODE, + VCN_5_0__SRCID__JPEG1_DECODE, + VCN_5_0__SRCID__JPEG2_DECODE, + VCN_5_0__SRCID__JPEG3_DECODE, + VCN_5_0__SRCID__JPEG4_DECODE, + VCN_5_0__SRCID__JPEG5_DECODE, + VCN_5_0__SRCID__JPEG6_DECODE, + VCN_5_0__SRCID__JPEG7_DECODE, + VCN_5_0__SRCID__JPEG8_DECODE, + VCN_5_0__SRCID__JPEG9_DECODE, +}; + +static const struct amdgpu_hwip_reg_entry jpeg_reg_list_5_0_1[] = { + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_POWER_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_INT_STAT), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC0_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_ADDR_MODE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_Y_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regJPEG_DEC_UV_GFX10_TILING_SURFACE), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JPEG_UV_PITCH), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC1_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC2_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC3_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC4_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC5_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC6_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC7_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC8_UVD_JRBC_STATUS), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_RPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_RB_WPTR), + SOC15_REG_ENTRY_STR(JPEG, 0, regUVD_JRBC9_UVD_JRBC_STATUS), +}; + +static int jpeg_v5_0_1_core_reg_offset(u32 pipe) +{ + if (pipe <= AMDGPU_MAX_JPEG_RINGS_4_0_3) + return ((0x40 * pipe) - 0xc80); + else + return ((0x40 * pipe) - 0x440); +} + +/** + * jpeg_v5_0_1_early_init - set function pointers + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Set ring and irq function pointers + */ +static int jpeg_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + + if (!adev->jpeg.num_jpeg_inst || adev->jpeg.num_jpeg_inst > AMDGPU_MAX_JPEG_INSTANCES) + return -ENOENT; + + adev->jpeg.num_jpeg_rings = AMDGPU_MAX_JPEG_RINGS; + jpeg_v5_0_1_set_dec_ring_funcs(adev); + jpeg_v5_0_1_set_irq_funcs(adev); + + return 0; +} + +/** + * jpeg_v5_0_1_sw_init - sw init for JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Load firmware and sw initialization + */ +static int jpeg_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, j, r, jpeg_inst; + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + /* JPEG TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + amdgpu_ih_srcid_jpeg[j], &adev->jpeg.inst->irq); + if (r) + return r; + } + + r = amdgpu_jpeg_sw_init(adev); + if (r) + return r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + ring->use_doorbell = false; + ring->vm_hub = AMDGPU_MMHUB0(adev->jpeg.inst[i].aid_id); + if (!amdgpu_sriov_vf(adev)) { + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 1 + j + 11 * jpeg_inst; + } else { + if (j < 4) + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 4 + j + 32 * jpeg_inst; + else + ring->doorbell_index = + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 8 + j + 32 * jpeg_inst; + } + sprintf(ring->name, "jpeg_dec_%d.%d", adev->jpeg.inst[i].aid_id, j); + r = amdgpu_ring_init(adev, ring, 512, &adev->jpeg.inst->irq, 0, + AMDGPU_RING_PRIO_DEFAULT, NULL); + if (r) + return r; + + adev->jpeg.internal.jpeg_pitch[j] = + regUVD_JRBC0_UVD_JRBC_SCRATCH0_INTERNAL_OFFSET; + adev->jpeg.inst[i].external.jpeg_pitch[j] = + SOC15_REG_OFFSET1(JPEG, jpeg_inst, regUVD_JRBC_SCRATCH0, + (j ? jpeg_v5_0_1_core_reg_offset(j) : 0)); + } + } + + r = amdgpu_jpeg_reg_dump_init(adev, jpeg_reg_list_5_0_1, ARRAY_SIZE(jpeg_reg_list_5_0_1)); + if (r) + return r; + + if (!amdgpu_sriov_vf(adev)) { + adev->jpeg.supported_reset = AMDGPU_RESET_TYPE_PER_QUEUE; + r = amdgpu_jpeg_sysfs_reset_mask_init(adev); + if (r) + return r; + } + + return 0; +} + +/** + * jpeg_v5_0_1_sw_fini - sw fini for JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * JPEG suspend and free up sw allocation + */ +static int jpeg_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_jpeg_suspend(adev); + if (r) + return r; + + if (!amdgpu_sriov_vf(adev)) + amdgpu_jpeg_sysfs_reset_mask_fini(adev); + + r = amdgpu_jpeg_sw_fini(adev); + + return r; +} + +/** + * jpeg_v5_0_1_hw_init - start and test JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + */ +static int jpeg_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, j, r, jpeg_inst; + + if (amdgpu_sriov_vf(adev)) { + /* jpeg_v5_0_1_start_sriov(adev); */ + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + ring->wptr = 0; + ring->wptr_old = 0; + jpeg_v5_0_1_dec_ring_set_wptr(ring); + ring->sched.ready = true; + } + } + return 0; + } + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) + adev->jpeg.caps |= AMDGPU_JPEG_CAPS(RRMT_ENABLED); + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_inst = GET_INST(JPEG, i); + ring = adev->jpeg.inst[i].ring_dec; + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 11 * jpeg_inst, + adev->jpeg.inst[i].aid_id); + + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + if (ring->use_doorbell) + WREG32_SOC15_OFFSET(VCN, GET_INST(VCN, i), regVCN_JPEG_DB_CTRL, + (ring->pipe ? (ring->pipe - 0x15) : 0), + ring->doorbell_index << + VCN_JPEG_DB_CTRL__OFFSET__SHIFT | + VCN_JPEG_DB_CTRL__EN_MASK); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + } + + return 0; +} + +/** + * jpeg_v5_0_1_hw_fini - stop the hardware block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Stop the JPEG block, mark ring as not ready any more + */ +static int jpeg_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret = 0; + + cancel_delayed_work_sync(&adev->jpeg.idle_work); + + if (adev->jpeg.cur_state != AMD_PG_STATE_GATE) + ret = jpeg_v5_0_1_set_powergating_state(ip_block, AMD_PG_STATE_GATE); + + return ret; +} + +/** + * jpeg_v5_0_1_suspend - suspend JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * HW fini and suspend JPEG block + */ +static int jpeg_v5_0_1_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = jpeg_v5_0_1_hw_fini(ip_block); + if (r) + return r; + + r = amdgpu_jpeg_suspend(adev); + + return r; +} + +/** + * jpeg_v5_0_1_resume - resume JPEG block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Resume firmware and hw init JPEG block + */ +static int jpeg_v5_0_1_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r; + + r = amdgpu_jpeg_resume(adev); + if (r) + return r; + + r = jpeg_v5_0_1_hw_init(ip_block); + + return r; +} + +static void jpeg_v5_0_1_init_inst(struct amdgpu_device *adev, int i) +{ + int jpeg_inst = GET_INST(JPEG, i); + + /* disable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); + + /* keep the JPEG in static PG mode */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), 0, + ~UVD_JPEG_POWER_STATUS__JPEG_PG_MODE_MASK); + + /* MJPEG global tiling registers */ + WREG32_SOC15(JPEG, 0, regJPEG_DEC_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* enable JMI channel */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), 0, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); +} + +static void jpeg_v5_0_1_deinit_inst(struct amdgpu_device *adev, int i) +{ + int jpeg_inst = GET_INST(JPEG, i); + /* reset JMI */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JMI_CNTL), + UVD_JMI_CNTL__SOFT_RESET_MASK, + ~UVD_JMI_CNTL__SOFT_RESET_MASK); + + /* enable anti hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(JPEG, jpeg_inst, regUVD_JPEG_POWER_STATUS), + UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK, + ~UVD_JPEG_POWER_STATUS__JPEG_POWER_STATUS_MASK); +} + +static void jpeg_v5_0_1_init_jrbc(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u32 reg, data, mask; + int jpeg_inst = GET_INST(JPEG, ring->me); + int reg_offset = ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0; + + /* enable System Interrupt for JRBC */ + reg = SOC15_REG_OFFSET(JPEG, jpeg_inst, regJPEG_SYS_INT_EN); + if (ring->pipe < AMDGPU_MAX_JPEG_RINGS_4_0_3) { + data = JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe; + mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << ring->pipe); + WREG32_P(reg, data, mask); + } else { + data = JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12); + mask = ~(JPEG_SYS_INT_EN__DJRBC0_MASK << (ring->pipe+12)); + WREG32_P(reg, data, mask); + } + + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_VMID, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_CNTL, + reg_offset, + (0x00000001L | 0x00000002L)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + reg_offset, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + reg_offset, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_RPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_WPTR, + reg_offset, 0); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_CNTL, + reg_offset, 0x00000002L); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JRBC_RB_SIZE, + reg_offset, ring->ring_size / 4); + ring->wptr = RREG32_SOC15_OFFSET(JPEG, jpeg_inst, regUVD_JRBC_RB_WPTR, + reg_offset); +} + +/** + * jpeg_v5_0_1_start - start JPEG block + * + * @adev: amdgpu_device pointer + * + * Setup and start the JPEG block + */ +static int jpeg_v5_0_1_start(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + jpeg_v5_0_1_init_inst(adev, i); + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + ring = &adev->jpeg.inst[i].ring_dec[j]; + jpeg_v5_0_1_init_jrbc(ring); + } + } + + return 0; +} + +/** + * jpeg_v5_0_1_stop - stop JPEG block + * + * @adev: amdgpu_device pointer + * + * stop the JPEG block + */ +static int jpeg_v5_0_1_stop(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) + jpeg_v5_0_1_deinit_inst(adev, i); + + return 0; +} + +/** + * jpeg_v5_0_1_dec_ring_get_rptr - get read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware read pointer + */ +static uint64_t jpeg_v5_0_1_dec_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_RPTR, + ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0); +} + +/** + * jpeg_v5_0_1_dec_ring_get_wptr - get write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware write pointer + */ +static uint64_t jpeg_v5_0_1_dec_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) + return adev->wb.wb[ring->wptr_offs]; + + return RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), regUVD_JRBC_RB_WPTR, + ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0); +} + +/** + * jpeg_v5_0_1_dec_ring_set_wptr - set write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the write pointer to the hardware + */ +static void jpeg_v5_0_1_dec_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, ring->me), + regUVD_JRBC_RB_WPTR, + (ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0), + lower_32_bits(ring->wptr)); + } +} + +static bool jpeg_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + bool ret = false; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0); + + ret &= ((RREG32_SOC15_OFFSET(JPEG, GET_INST(JPEG, i), + regUVD_JRBC_STATUS, reg_offset) & + UVD_JRBC_STATUS__RB_JOB_DONE_MASK) == + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } + } + + return ret; +} + +static int jpeg_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret = 0; + int i, j; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + int reg_offset = (j ? jpeg_v5_0_1_core_reg_offset(j) : 0); + + ret &= SOC15_WAIT_ON_RREG_OFFSET(JPEG, GET_INST(JPEG, i), + regUVD_JRBC_STATUS, reg_offset, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK, + UVD_JRBC_STATUS__RB_JOB_DONE_MASK); + } + } + return ret; +} + +static int jpeg_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + bool enable = (state == AMD_CG_STATE_GATE) ? true : false; + + int i; + + if (!enable) + return 0; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + if (!jpeg_v5_0_1_is_idle(ip_block)) + return -EBUSY; + } + + return 0; +} + +static int jpeg_v5_0_1_set_powergating_state(struct amdgpu_ip_block *ip_block, + enum amd_powergating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + int ret; + + if (state == adev->jpeg.cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = jpeg_v5_0_1_stop(adev); + else + ret = jpeg_v5_0_1_start(adev); + + if (!ret) + adev->jpeg.cur_state = state; + + return ret; +} + +static int jpeg_v5_0_1_set_interrupt_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + unsigned int type, + enum amdgpu_interrupt_state state) +{ + return 0; +} + +static int jpeg_v5_0_1_process_interrupt(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + u32 i, inst; + + i = node_id_to_phys_map[entry->node_id]; + DRM_DEV_DEBUG(adev->dev, "IH: JPEG TRAP\n"); + + for (inst = 0; inst < adev->jpeg.num_jpeg_inst; ++inst) + if (adev->jpeg.inst[inst].aid_id == i) + break; + + if (inst >= adev->jpeg.num_jpeg_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown JPEG instance %d", + entry->node_id); + return 0; + } + + switch (entry->src_id) { + case VCN_5_0__SRCID__JPEG_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[0]); + break; + case VCN_5_0__SRCID__JPEG1_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[1]); + break; + case VCN_5_0__SRCID__JPEG2_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[2]); + break; + case VCN_5_0__SRCID__JPEG3_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[3]); + break; + case VCN_5_0__SRCID__JPEG4_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[4]); + break; + case VCN_5_0__SRCID__JPEG5_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[5]); + break; + case VCN_5_0__SRCID__JPEG6_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[6]); + break; + case VCN_5_0__SRCID__JPEG7_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[7]); + break; + case VCN_5_0__SRCID__JPEG8_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[8]); + break; + case VCN_5_0__SRCID__JPEG9_DECODE: + amdgpu_fence_process(&adev->jpeg.inst[inst].ring_dec[9]); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static void jpeg_v5_0_1_core_stall_reset(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + int jpeg_inst = GET_INST(JPEG, ring->me); + int reg_offset = ring->pipe ? jpeg_v5_0_1_core_reg_offset(ring->pipe) : 0; + + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_JMI_CLIENT_STALL, + reg_offset, 0x1F); + SOC15_WAIT_ON_RREG_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS, + reg_offset, 0x1F, 0x1F); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_JPEG_LMI_DROP, + reg_offset, 0x1F); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 1 << ring->pipe); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_UVD_JMI_CLIENT_STALL, + reg_offset, 0x00); + WREG32_SOC15_OFFSET(JPEG, jpeg_inst, + regUVD_JMI0_JPEG_LMI_DROP, + reg_offset, 0x00); + WREG32_SOC15(JPEG, jpeg_inst, regJPEG_CORE_RST_CTRL, 0x00); +} + +static int jpeg_v5_0_1_ring_reset(struct amdgpu_ring *ring, unsigned int vmid) +{ + if (amdgpu_sriov_vf(ring->adev)) + return -EOPNOTSUPP; + + jpeg_v5_0_1_core_stall_reset(ring); + jpeg_v5_0_1_init_jrbc(ring); + return amdgpu_ring_test_helper(ring); +} + +static const struct amd_ip_funcs jpeg_v5_0_1_ip_funcs = { + .name = "jpeg_v5_0_1", + .early_init = jpeg_v5_0_1_early_init, + .late_init = NULL, + .sw_init = jpeg_v5_0_1_sw_init, + .sw_fini = jpeg_v5_0_1_sw_fini, + .hw_init = jpeg_v5_0_1_hw_init, + .hw_fini = jpeg_v5_0_1_hw_fini, + .suspend = jpeg_v5_0_1_suspend, + .resume = jpeg_v5_0_1_resume, + .is_idle = jpeg_v5_0_1_is_idle, + .wait_for_idle = jpeg_v5_0_1_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = jpeg_v5_0_1_set_clockgating_state, + .set_powergating_state = jpeg_v5_0_1_set_powergating_state, + .dump_ip_state = amdgpu_jpeg_dump_ip_state, + .print_ip_state = amdgpu_jpeg_print_ip_state, +}; + +static const struct amdgpu_ring_funcs jpeg_v5_0_1_dec_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_JPEG, + .align_mask = 0xf, + .get_rptr = jpeg_v5_0_1_dec_ring_get_rptr, + .get_wptr = jpeg_v5_0_1_dec_ring_get_wptr, + .set_wptr = jpeg_v5_0_1_dec_ring_set_wptr, + .emit_frame_size = + SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + + 8 + /* jpeg_v5_0_1_dec_ring_emit_vm_flush */ + 22 + 22 + /* jpeg_v5_0_1_dec_ring_emit_fence x2 vm fence */ + 8 + 16, + .emit_ib_size = 22, /* jpeg_v5_0_1_dec_ring_emit_ib */ + .emit_ib = jpeg_v4_0_3_dec_ring_emit_ib, + .emit_fence = jpeg_v4_0_3_dec_ring_emit_fence, + .emit_vm_flush = jpeg_v4_0_3_dec_ring_emit_vm_flush, + .emit_hdp_flush = jpeg_v4_0_3_ring_emit_hdp_flush, + .test_ring = amdgpu_jpeg_dec_ring_test_ring, + .test_ib = amdgpu_jpeg_dec_ring_test_ib, + .insert_nop = jpeg_v4_0_3_dec_ring_nop, + .insert_start = jpeg_v4_0_3_dec_ring_insert_start, + .insert_end = jpeg_v4_0_3_dec_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_jpeg_ring_begin_use, + .end_use = amdgpu_jpeg_ring_end_use, + .emit_wreg = jpeg_v4_0_3_dec_ring_emit_wreg, + .emit_reg_wait = jpeg_v4_0_3_dec_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = jpeg_v5_0_1_ring_reset, +}; + +static void jpeg_v5_0_1_set_dec_ring_funcs(struct amdgpu_device *adev) +{ + int i, j, jpeg_inst; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) { + for (j = 0; j < adev->jpeg.num_jpeg_rings; ++j) { + adev->jpeg.inst[i].ring_dec[j].funcs = &jpeg_v5_0_1_dec_ring_vm_funcs; + adev->jpeg.inst[i].ring_dec[j].me = i; + adev->jpeg.inst[i].ring_dec[j].pipe = j; + } + jpeg_inst = GET_INST(JPEG, i); + adev->jpeg.inst[i].aid_id = + jpeg_inst / adev->jpeg.num_inst_per_aid; + } +} + +static const struct amdgpu_irq_src_funcs jpeg_v5_0_1_irq_funcs = { + .set = jpeg_v5_0_1_set_interrupt_state, + .process = jpeg_v5_0_1_process_interrupt, +}; + +static void jpeg_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->jpeg.num_jpeg_inst; ++i) + adev->jpeg.inst->irq.num_types += adev->jpeg.num_jpeg_rings; + + adev->jpeg.inst->irq.funcs = &jpeg_v5_0_1_irq_funcs; +} + +const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_JPEG, + .major = 5, + .minor = 0, + .rev = 1, + .funcs = &jpeg_v5_0_1_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h new file mode 100644 index 000000000000..efdab57324e4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v5_0_1.h @@ -0,0 +1,101 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __JPEG_V5_0_1_H__ +#define __JPEG_V5_0_1_H__ + +extern const struct amdgpu_ip_block_version jpeg_v5_0_1_ip_block; + +#define regUVD_JRBC0_UVD_JRBC_RB_WPTR 0x0640 +#define regUVD_JRBC0_UVD_JRBC_RB_WPTR_BASE_IDX 1 +#define regUVD_JRBC0_UVD_JRBC_STATUS 0x0649 +#define regUVD_JRBC0_UVD_JRBC_STATUS_BASE_IDX 1 +#define regUVD_JRBC0_UVD_JRBC_RB_RPTR 0x064a +#define regUVD_JRBC0_UVD_JRBC_RB_RPTR_BASE_IDX 1 +#define regUVD_JRBC1_UVD_JRBC_RB_WPTR 0x0000 +#define regUVD_JRBC1_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC1_UVD_JRBC_STATUS 0x0009 +#define regUVD_JRBC1_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC1_UVD_JRBC_RB_RPTR 0x000a +#define regUVD_JRBC1_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC2_UVD_JRBC_RB_WPTR 0x0040 +#define regUVD_JRBC2_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC2_UVD_JRBC_STATUS 0x0049 +#define regUVD_JRBC2_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC2_UVD_JRBC_RB_RPTR 0x004a +#define regUVD_JRBC2_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC3_UVD_JRBC_RB_WPTR 0x0080 +#define regUVD_JRBC3_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC3_UVD_JRBC_STATUS 0x0089 +#define regUVD_JRBC3_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC3_UVD_JRBC_RB_RPTR 0x008a +#define regUVD_JRBC3_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC4_UVD_JRBC_RB_WPTR 0x00c0 +#define regUVD_JRBC4_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC4_UVD_JRBC_STATUS 0x00c9 +#define regUVD_JRBC4_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC4_UVD_JRBC_RB_RPTR 0x00ca +#define regUVD_JRBC4_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC5_UVD_JRBC_RB_WPTR 0x0100 +#define regUVD_JRBC5_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC5_UVD_JRBC_STATUS 0x0109 +#define regUVD_JRBC5_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC5_UVD_JRBC_RB_RPTR 0x010a +#define regUVD_JRBC5_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC6_UVD_JRBC_RB_WPTR 0x0140 +#define regUVD_JRBC6_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC6_UVD_JRBC_STATUS 0x0149 +#define regUVD_JRBC6_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC6_UVD_JRBC_RB_RPTR 0x014a +#define regUVD_JRBC6_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC7_UVD_JRBC_RB_WPTR 0x0180 +#define regUVD_JRBC7_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC7_UVD_JRBC_STATUS 0x0189 +#define regUVD_JRBC7_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC7_UVD_JRBC_RB_RPTR 0x018a +#define regUVD_JRBC7_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC8_UVD_JRBC_RB_WPTR 0x01c0 +#define regUVD_JRBC8_UVD_JRBC_RB_WPTR_BASE_IDX 0 +#define regUVD_JRBC8_UVD_JRBC_STATUS 0x01c9 +#define regUVD_JRBC8_UVD_JRBC_STATUS_BASE_IDX 0 +#define regUVD_JRBC8_UVD_JRBC_RB_RPTR 0x01ca +#define regUVD_JRBC8_UVD_JRBC_RB_RPTR_BASE_IDX 0 +#define regUVD_JRBC9_UVD_JRBC_RB_WPTR 0x0440 +#define regUVD_JRBC9_UVD_JRBC_RB_WPTR_BASE_IDX 1 +#define regUVD_JRBC9_UVD_JRBC_STATUS 0x0449 +#define regUVD_JRBC9_UVD_JRBC_STATUS_BASE_IDX 1 +#define regUVD_JRBC9_UVD_JRBC_RB_RPTR 0x044a +#define regUVD_JRBC9_UVD_JRBC_RB_RPTR_BASE_IDX 1 +#define regUVD_JMI0_JPEG_LMI_DROP 0x0663 +#define regUVD_JMI0_JPEG_LMI_DROP_BASE_IDX 1 +#define regUVD_JMI0_UVD_JMI_CLIENT_STALL 0x067a +#define regUVD_JMI0_UVD_JMI_CLIENT_STALL_BASE_IDX 1 +#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS 0x067b +#define regUVD_JMI0_UVD_JMI_CLIENT_CLEAN_STATUS_BASE_IDX 1 +#define regJPEG_CORE_RST_CTRL 0x072e +#define regJPEG_CORE_RST_CTRL_BASE_IDX 1 + +#define regVCN_RRMT_CNTL 0x0940 +#define regVCN_RRMT_CNTL_BASE_IDX 1 + +#endif /* __JPEG_V5_0_1_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 9c905b9e9376..e65916ada23b 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -54,6 +54,8 @@ MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_1_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes_2.bin"); MODULE_FIRMWARE("amdgpu/gc_11_5_2_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes_2.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_5_3_mes1.bin"); static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block); static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block); @@ -62,6 +64,7 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev); #define MES_EOP_SIZE 2048 #define GFX_MES_DRAM_SIZE 0x80000 +#define MES11_HW_RESOURCE_1_SIZE (128 * AMDGPU_GPU_PAGE_SIZE) static void mes_v11_0_ring_set_wptr(struct amdgpu_ring *ring) { @@ -730,9 +733,6 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) { - int size = 128 * PAGE_SIZE; - int ret = 0; - struct amdgpu_device *adev = mes->adev; union MESAPI_SET_HW_RESOURCES_1 mes_set_hw_res_pkt; memset(&mes_set_hw_res_pkt, 0, sizeof(mes_set_hw_res_pkt)); @@ -741,18 +741,13 @@ static int mes_v11_0_set_hw_resources_1(struct amdgpu_mes *mes) mes_set_hw_res_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_pkt.enable_mes_info_ctx = 1; - ret = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &mes->resource_1, - &mes->resource_1_gpu_addr, - &mes->resource_1_addr); - if (ret) { - dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", ret); - return ret; + mes_set_hw_res_pkt.cleaner_shader_fence_mc_addr = mes->resource_1_gpu_addr[0]; + if (amdgpu_sriov_is_mes_info_enable(mes->adev)) { + mes_set_hw_res_pkt.mes_info_ctx_mc_addr = + mes->resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE; + mes_set_hw_res_pkt.mes_info_ctx_size = MES11_HW_RESOURCE_1_SIZE; } - mes_set_hw_res_pkt.mes_info_ctx_mc_addr = mes->resource_1_gpu_addr; - mes_set_hw_res_pkt.mes_info_ctx_size = mes->resource_1->tbo.base.size; return mes_v11_0_submit_pkt_and_poll_completion(mes, &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), offsetof(union MESAPI_SET_HW_RESOURCES_1, api_status)); @@ -808,7 +803,7 @@ static const struct amdgpu_mes_funcs mes_v11_0_funcs = { }; static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r; const struct mes_firmware_header_v1_0 *mes_hdr; @@ -843,7 +838,7 @@ static int mes_v11_0_allocate_ucode_buffer(struct amdgpu_device *adev, } static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r; const struct mes_firmware_header_v1_0 *mes_hdr; @@ -884,7 +879,7 @@ static int mes_v11_0_allocate_ucode_data_buffer(struct amdgpu_device *adev, } static void mes_v11_0_free_ucode_buffers(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe], &adev->mes.data_fw_gpu_addr[pipe], @@ -982,7 +977,7 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable) /* This function is for backdoor MES firmware */ static int mes_v11_0_load_microcode(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe, bool prime_icache) + enum amdgpu_mes_pipe pipe, bool prime_icache) { int r; uint32_t data; @@ -1054,7 +1049,7 @@ static int mes_v11_0_load_microcode(struct amdgpu_device *adev, } static int mes_v11_0_allocate_eop_buf(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r; u32 *eop; @@ -1265,7 +1260,7 @@ static int mes_v11_0_kiq_enable_queue(struct amdgpu_device *adev) } static int mes_v11_0_queue_init(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { struct amdgpu_ring *ring; int r; @@ -1348,7 +1343,7 @@ static int mes_v11_0_kiq_ring_init(struct amdgpu_device *adev) } static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r, mqd_size = sizeof(struct v11_compute_mqd); struct amdgpu_ring *ring; @@ -1389,7 +1384,7 @@ static int mes_v11_0_mqd_sw_init(struct amdgpu_device *adev, static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - int pipe, r; + int pipe, r, bo_size; adev->mes.funcs = &mes_v11_0_funcs; adev->mes.kiq_hw_init = &mes_v11_0_kiq_hw_init; @@ -1424,6 +1419,23 @@ static int mes_v11_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; + bo_size = AMDGPU_GPU_PAGE_SIZE; + if (amdgpu_sriov_is_mes_info_enable(adev)) + bo_size += MES11_HW_RESOURCE_1_SIZE; + + /* Only needed for AMDGPU_MES_SCHED_PIPE on MES 11*/ + r = amdgpu_bo_create_kernel(adev, + bo_size, + PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.resource_1[0], + &adev->mes.resource_1_gpu_addr[0], + &adev->mes.resource_1_addr[0]); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo\n", r); + return r; + } + return 0; } @@ -1432,6 +1444,9 @@ static int mes_v11_0_sw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int pipe; + amdgpu_bo_free_kernel(&adev->mes.resource_1[0], &adev->mes.resource_1_gpu_addr[0], + &adev->mes.resource_1_addr[0]); + for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { kfree(adev->mes.mqd_backup[pipe]); @@ -1505,9 +1520,7 @@ static void mes_v11_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static void mes_v11_0_kiq_clear(struct amdgpu_device *adev) @@ -1621,12 +1634,10 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - if (amdgpu_sriov_is_mes_info_enable(adev)) { - r = mes_v11_0_set_hw_resources_1(&adev->mes); - if (r) { - DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); - goto failure; - } + r = mes_v11_0_set_hw_resources_1(&adev->mes); + if (r) { + DRM_ERROR("failed mes_v11_0_set_hw_resources_1, r=%d\n", r); + goto failure; } r = mes_v11_0_query_sched_status(&adev->mes); @@ -1635,6 +1646,10 @@ static int mes_v11_0_hw_init(struct amdgpu_ip_block *ip_block) goto failure; } + r = amdgpu_mes_update_enforce_isolation(adev); + if (r) + goto failure; + out: /* * Disable KIQ ring usage from the driver once MES is enabled. @@ -1653,34 +1668,17 @@ failure: static int mes_v11_0_hw_fini(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = ip_block->adev; - if (amdgpu_sriov_is_mes_info_enable(adev)) { - amdgpu_bo_free_kernel(&adev->mes.resource_1, &adev->mes.resource_1_gpu_addr, - &adev->mes.resource_1_addr); - } return 0; } static int mes_v11_0_suspend(struct amdgpu_ip_block *ip_block) { - int r; - - r = amdgpu_mes_suspend(ip_block->adev); - if (r) - return r; - return mes_v11_0_hw_fini(ip_block); } static int mes_v11_0_resume(struct amdgpu_ip_block *ip_block) { - int r; - - r = mes_v11_0_hw_init(ip_block); - if (r) - return r; - - return amdgpu_mes_resume(ip_block->adev); + return mes_v11_0_hw_init(ip_block); } static int mes_v11_0_early_init(struct amdgpu_ip_block *ip_block) diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 9ecc5d61e49b..183dd3346da5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -24,6 +24,7 @@ #include <linux/firmware.h> #include <linux/module.h> #include "amdgpu.h" +#include "gfx_v12_0.h" #include "soc15_common.h" #include "soc21.h" #include "gc/gc_12_0_0_offset.h" @@ -350,6 +351,132 @@ static int mes_v12_0_remove_hw_queue(struct amdgpu_mes *mes, offsetof(union MESAPI__REMOVE_QUEUE, api_status)); } +int gfx_v12_0_request_gfx_index_mutex(struct amdgpu_device *adev, + bool req) +{ + u32 i, tmp, val; + + for (i = 0; i < adev->usec_timeout; i++) { + /* Request with MeId=2, PipeId=0 */ + tmp = REG_SET_FIELD(0, CP_GFX_INDEX_MUTEX, REQUEST, req); + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, CLIENTID, 4); + WREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX, tmp); + + val = RREG32_SOC15(GC, 0, regCP_GFX_INDEX_MUTEX); + if (req) { + if (val == tmp) + break; + } else { + tmp = REG_SET_FIELD(tmp, CP_GFX_INDEX_MUTEX, + REQUEST, 1); + + /* unlocked or locked by firmware */ + if (val != tmp) + break; + } + udelay(1); + } + + if (i >= adev->usec_timeout) + return -EINVAL; + + return 0; +} + +static int mes_v12_0_reset_queue_mmio(struct amdgpu_mes *mes, uint32_t queue_type, + uint32_t me_id, uint32_t pipe_id, + uint32_t queue_id, uint32_t vmid) +{ + struct amdgpu_device *adev = mes->adev; + uint32_t value, reg; + int i, r = 0; + + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); + + if (queue_type == AMDGPU_RING_TYPE_GFX) { + dev_info(adev->dev, "reset gfx queue (%d:%d:%d: vmid:%d)\n", + me_id, pipe_id, queue_id, vmid); + + mutex_lock(&adev->gfx.reset_sem_mutex); + gfx_v12_0_request_gfx_index_mutex(adev, true); + /* all se allow writes */ + WREG32_SOC15(GC, 0, regGRBM_GFX_INDEX, + (uint32_t)(0x1 << GRBM_GFX_INDEX__SE_BROADCAST_WRITES__SHIFT)); + value = REG_SET_FIELD(0, CP_VMID_RESET, RESET_REQUEST, 1 << vmid); + if (pipe_id == 0) + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE0_QUEUES, 1 << queue_id); + else + value = REG_SET_FIELD(value, CP_VMID_RESET, PIPE1_QUEUES, 1 << queue_id); + WREG32_SOC15(GC, 0, regCP_VMID_RESET, value); + gfx_v12_0_request_gfx_index_mutex(adev, false); + mutex_unlock(&adev->gfx.reset_sem_mutex); + + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on gfx hqd deactivate\n"); + r = -ETIMEDOUT; + } + + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else if (queue_type == AMDGPU_RING_TYPE_COMPUTE) { + dev_info(adev->dev, "reset compute queue (%d:%d:%d)\n", + me_id, pipe_id, queue_id); + mutex_lock(&adev->srbm_mutex); + soc21_grbm_select(adev, me_id, pipe_id, queue_id, 0); + WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2); + WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1); + + /* wait till dequeue take effects */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) & 1)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on hqd deactivate\n"); + r = -ETIMEDOUT; + } + soc21_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + } else if (queue_type == AMDGPU_RING_TYPE_SDMA) { + dev_info(adev->dev, "reset sdma queue (%d:%d:%d)\n", + me_id, pipe_id, queue_id); + switch (me_id) { + case 1: + reg = SOC15_REG_OFFSET(GC, 0, regSDMA1_QUEUE_RESET_REQ); + break; + case 0: + default: + reg = SOC15_REG_OFFSET(GC, 0, regSDMA0_QUEUE_RESET_REQ); + break; + } + + value = 1 << queue_id; + WREG32(reg, value); + /* wait for queue reset done */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!(RREG32(reg) & value)) + break; + udelay(1); + } + if (i >= adev->usec_timeout) { + dev_err(adev->dev, "failed to wait on sdma queue reset done\n"); + r = -ETIMEDOUT; + } + } + + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); + return r; +} + static int mes_v12_0_reset_hw_queue(struct amdgpu_mes *mes, struct mes_reset_queue_input *input) { @@ -559,6 +686,8 @@ static int mes_v12_0_set_hw_resources_1(struct amdgpu_mes *mes, int pipe) mes_set_hw_res_1_pkt.header.opcode = MES_SCH_API_SET_HW_RSRC_1; mes_set_hw_res_1_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS; mes_set_hw_res_1_pkt.mes_kiq_unmap_timeout = 0xa; + mes_set_hw_res_1_pkt.cleaner_shader_fence_mc_addr = + mes->resource_1_gpu_addr[pipe]; return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, &mes_set_hw_res_1_pkt, sizeof(mes_set_hw_res_1_pkt), @@ -629,7 +758,8 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) if (amdgpu_mes_log_enable) { mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; - mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr + pipe * AMDGPU_MES_LOG_BUFFER_SIZE; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr + + pipe * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); } if (enforce_isolation) @@ -721,6 +851,11 @@ static int mes_v12_0_reset_legacy_queue(struct amdgpu_mes *mes, union MESAPI__RESET mes_reset_queue_pkt; int pipe; + if (input->use_mmio) + return mes_v12_0_reset_queue_mmio(mes, input->queue_type, + input->me_id, input->pipe_id, + input->queue_id, input->vmid); + memset(&mes_reset_queue_pkt, 0, sizeof(mes_reset_queue_pkt)); mes_reset_queue_pkt.header.type = MES_API_TYPE_SCHEDULER; @@ -766,7 +901,7 @@ static const struct amdgpu_mes_funcs mes_v12_0_funcs = { }; static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r; const struct mes_firmware_header_v1_0 *mes_hdr; @@ -800,7 +935,7 @@ static int mes_v12_0_allocate_ucode_buffer(struct amdgpu_device *adev, } static int mes_v12_0_allocate_ucode_data_buffer(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r; const struct mes_firmware_header_v1_0 *mes_hdr; @@ -834,7 +969,7 @@ static int mes_v12_0_allocate_ucode_data_buffer(struct amdgpu_device *adev, } static void mes_v12_0_free_ucode_buffers(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { amdgpu_bo_free_kernel(&adev->mes.data_fw_obj[pipe], &adev->mes.data_fw_gpu_addr[pipe], @@ -851,29 +986,50 @@ static void mes_v12_0_enable(struct amdgpu_device *adev, bool enable) uint32_t pipe, data = 0; if (enable) { - data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); - WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); - mutex_lock(&adev->srbm_mutex); for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { soc21_grbm_select(adev, 3, pipe, 0, 0); + if (amdgpu_mes_log_enable) { + u32 log_size = AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE; + /* In case uni mes is not enabled, only program for pipe 0 */ + if (adev->mes.event_log_size >= (pipe + 1) * log_size) { + WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO, + lower_32_bits(adev->mes.event_log_gpu_addr + + pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE)); + WREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI, + upper_32_bits(adev->mes.event_log_gpu_addr + + pipe * log_size + AMDGPU_MES_LOG_BUFFER_SIZE)); + dev_info(adev->dev, "Setup CP MES MSCRATCH address : 0x%x. 0x%x\n", + RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_HI), + RREG32_SOC15(GC, 0, regCP_MES_MSCRATCH_LO)); + } + } + + data = RREG32_SOC15(GC, 0, regCP_MES_CNTL); + if (pipe == 0) + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE0_RESET, 1); + else + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_RESET, 1); + WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); ucode_addr = adev->mes.uc_start_addr[pipe] >> 2; WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START, lower_32_bits(ucode_addr)); WREG32_SOC15(GC, 0, regCP_MES_PRGRM_CNTR_START_HI, upper_32_bits(ucode_addr)); + + /* unhalt MES and activate one pipe each loop */ + data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); + if (pipe) + data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); + dev_info(adev->dev, "program CP_MES_CNTL : 0x%x\n", data); + + WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); + } soc21_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - /* unhalt MES and activate pipe0 */ - data = REG_SET_FIELD(0, CP_MES_CNTL, MES_PIPE0_ACTIVE, 1); - data = REG_SET_FIELD(data, CP_MES_CNTL, MES_PIPE1_ACTIVE, 1); - WREG32_SOC15(GC, 0, regCP_MES_CNTL, data); - if (amdgpu_emu_mode) msleep(100); else if (adev->enable_uni_mes) @@ -919,7 +1075,7 @@ static void mes_v12_0_set_ucode_start_addr(struct amdgpu_device *adev) /* This function is for backdoor MES firmware */ static int mes_v12_0_load_microcode(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe, bool prime_icache) + enum amdgpu_mes_pipe pipe, bool prime_icache) { int r; uint32_t data; @@ -983,7 +1139,7 @@ static int mes_v12_0_load_microcode(struct amdgpu_device *adev, } static int mes_v12_0_allocate_eop_buf(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r; u32 *eop; @@ -1204,7 +1360,7 @@ static int mes_v12_0_kiq_enable_queue(struct amdgpu_device *adev) } static int mes_v12_0_queue_init(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { struct amdgpu_ring *ring; int r; @@ -1304,7 +1460,7 @@ static int mes_v12_0_kiq_ring_init(struct amdgpu_device *adev) } static int mes_v12_0_mqd_sw_init(struct amdgpu_device *adev, - enum admgpu_mes_pipe pipe) + enum amdgpu_mes_pipe pipe) { int r, mqd_size = sizeof(struct v12_compute_mqd); struct amdgpu_ring *ring; @@ -1347,8 +1503,9 @@ static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block) adev->mes.kiq_hw_fini = &mes_v12_0_kiq_hw_fini; adev->mes.enable_legacy_queue_map = true; - adev->mes.event_log_size = adev->enable_uni_mes ? (AMDGPU_MAX_MES_PIPES * AMDGPU_MES_LOG_BUFFER_SIZE) : AMDGPU_MES_LOG_BUFFER_SIZE; - + adev->mes.event_log_size = adev->enable_uni_mes ? + (AMDGPU_MAX_MES_PIPES * (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE)) : + (AMDGPU_MES_LOG_BUFFER_SIZE + AMDGPU_MES_MSCRATCH_SIZE); r = amdgpu_mes_init(adev); if (r) return r; @@ -1362,12 +1519,23 @@ static int mes_v12_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) + if (!adev->enable_uni_mes && pipe == AMDGPU_MES_KIQ_PIPE) { r = mes_v12_0_kiq_ring_init(adev); - else + } + else { r = mes_v12_0_ring_init(adev, pipe); - if (r) - return r; + if (r) + return r; + r = amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->mes.resource_1[pipe], + &adev->mes.resource_1_gpu_addr[pipe], + &adev->mes.resource_1_addr[pipe]); + if (r) { + dev_err(adev->dev, "(%d) failed to create mes resource_1 bo pipe[%d]\n", r, pipe); + return r; + } + } } return 0; @@ -1379,6 +1547,10 @@ static int mes_v12_0_sw_fini(struct amdgpu_ip_block *ip_block) int pipe; for (pipe = 0; pipe < AMDGPU_MAX_MES_PIPES; pipe++) { + amdgpu_bo_free_kernel(&adev->mes.resource_1[pipe], + &adev->mes.resource_1_gpu_addr[pipe], + &adev->mes.resource_1_addr[pipe]); + kfree(adev->mes.mqd_backup[pipe]); amdgpu_bo_free_kernel(&adev->mes.eop_gpu_obj[pipe], @@ -1455,9 +1627,7 @@ static void mes_v12_0_kiq_setting(struct amdgpu_ring *ring) tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); tmp &= 0xffffff00; tmp |= (ring->me << 5) | (ring->pipe << 3) | (ring->queue); - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - tmp |= 0x80; - WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); + WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp | 0x80); } static int mes_v12_0_kiq_hw_init(struct amdgpu_device *adev) @@ -1579,8 +1749,7 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) goto failure; - if (adev->enable_uni_mes) - mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); + mes_v12_0_set_hw_resources_1(&adev->mes, AMDGPU_MES_SCHED_PIPE); mes_v12_0_init_aggregated_doorbell(&adev->mes); @@ -1590,6 +1759,10 @@ static int mes_v12_0_hw_init(struct amdgpu_ip_block *ip_block) goto failure; } + r = amdgpu_mes_update_enforce_isolation(adev); + if (r) + goto failure; + out: /* * Disable KIQ ring usage from the driver once MES is enabled. @@ -1613,24 +1786,12 @@ static int mes_v12_0_hw_fini(struct amdgpu_ip_block *ip_block) static int mes_v12_0_suspend(struct amdgpu_ip_block *ip_block) { - int r; - - r = amdgpu_mes_suspend(ip_block->adev); - if (r) - return r; - return mes_v12_0_hw_fini(ip_block); } static int mes_v12_0_resume(struct amdgpu_ip_block *ip_block) { - int r; - - r = mes_v12_0_hw_init(ip_block); - if (r) - return r; - - return amdgpu_mes_resume(ip_block->adev); + return mes_v12_0_hw_init(ip_block); } static int mes_v12_0_early_init(struct amdgpu_ip_block *ip_block) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index e9a6f33ca710..243eabda0607 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -356,7 +356,7 @@ static void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, if (adev->pg_flags & AMD_PG_SUPPORT_MMHUB) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, - enable); + enable, 0); } static int mmhub_v1_0_gart_enable(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 9689e2b5d4e5..2adee2b94c37 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -172,6 +172,30 @@ static void mmhub_v1_7_init_tlb_regs(struct amdgpu_device *adev) WREG32_SOC15(MMHUB, 0, regMC_VM_MX_L1_TLB_CNTL, tmp); } +/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */ +static void mmhub_v1_7_init_snoop_override_regs(struct amdgpu_device *adev) +{ + uint32_t tmp; + int i; + uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE - + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE; + + for (i = 0; i < 5; i++) { /* DAGB instances */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance); + tmp |= (1 << 15); /* SDMA client is BIT15 */ + WREG32_SOC15_OFFSET(MMHUB, 0, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, i * distance, tmp); + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance); + tmp |= (1 << 15); + WREG32_SOC15_OFFSET(MMHUB, 0, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, i * distance, tmp); + } + +} + static void mmhub_v1_7_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; @@ -337,6 +361,7 @@ static int mmhub_v1_7_gart_enable(struct amdgpu_device *adev) mmhub_v1_7_init_system_aperture_regs(adev); mmhub_v1_7_init_tlb_regs(adev); mmhub_v1_7_init_cache_regs(adev); + mmhub_v1_7_init_snoop_override_regs(adev); mmhub_v1_7_enable_system_domain(adev); mmhub_v1_7_disable_identity_aperture(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c index b01bb759d0f4..a54e7b929295 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c @@ -33,7 +33,6 @@ #define regVM_L2_CNTL3_DEFAULT 0x80100007 #define regVM_L2_CNTL4_DEFAULT 0x000000c1 -#define mmSMNAID_AID0_MCA_SMU 0x03b30400 static u64 mmhub_v1_8_get_fb_location(struct amdgpu_device *adev) { @@ -214,6 +213,32 @@ static void mmhub_v1_8_init_tlb_regs(struct amdgpu_device *adev) } } +/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */ +static void mmhub_v1_8_init_snoop_override_regs(struct amdgpu_device *adev) +{ + uint32_t tmp, inst_mask; + int i, j; + uint32_t distance = regDAGB1_WRCLI_GPU_SNOOP_OVERRIDE - + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE; + + inst_mask = adev->aid_mask; + for_each_inst(i, inst_mask) { + for (j = 0; j < 5; j++) { /* DAGB instances */ + tmp = RREG32_SOC15_OFFSET(MMHUB, i, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance); + tmp |= (1 << 15); /* SDMA client is BIT15 */ + WREG32_SOC15_OFFSET(MMHUB, i, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, j * distance, tmp); + + tmp = RREG32_SOC15_OFFSET(MMHUB, i, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance); + tmp |= (1 << 15); + WREG32_SOC15_OFFSET(MMHUB, i, + regDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, j * distance, tmp); + } + } +} + static void mmhub_v1_8_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp, inst_mask; @@ -419,6 +444,7 @@ static int mmhub_v1_8_gart_enable(struct amdgpu_device *adev) mmhub_v1_8_init_system_aperture_regs(adev); mmhub_v1_8_init_tlb_regs(adev); mmhub_v1_8_init_cache_regs(adev); + mmhub_v1_8_init_snoop_override_regs(adev); mmhub_v1_8_enable_system_domain(adev); mmhub_v1_8_disable_identity_aperture(adev); @@ -720,11 +746,13 @@ static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: - ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c index b4ce3375d3fd..bc3d6c2fc87a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_3.c @@ -103,6 +103,7 @@ mmhub_v3_3_print_l2_protection_fault_status(struct amdgpu_device *adev, switch (amdgpu_ip_version(adev, MMHUB_HWIP, 0)) { case IP_VERSION(3, 3, 0): case IP_VERSION(3, 3, 1): + case IP_VERSION(3, 3, 2): mmhub_cid = cid < ARRAY_SIZE(mmhub_client_ids_v3_3) ? mmhub_client_ids_v3_3[cid][rw] : cid == 0x140 ? "UMSCH" : NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c index ff1b58e44689..fe0710b55c3a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c @@ -198,6 +198,36 @@ static void mmhub_v9_4_init_tlb_regs(struct amdgpu_device *adev, int hubid) hubid * MMHUB_INSTANCE_REGISTER_OFFSET, tmp); } +/* Set snoop bit for SDMA so that SDMA writes probe-invalidates RW lines */ +static void mmhub_v9_4_init_snoop_override_regs(struct amdgpu_device *adev, int hubid) +{ + uint32_t tmp; + int i; + uint32_t distance = mmDAGB1_WRCLI_GPU_SNOOP_OVERRIDE - + mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE; + uint32_t huboffset = hubid * MMHUB_INSTANCE_REGISTER_OFFSET; + + for (i = 0; i < 5 - (2 * hubid); i++) { + /* DAGB instances 0 to 4 are in hub0 and 5 to 7 are in hub1 */ + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, + huboffset + i * distance); + tmp |= (1 << 15); /* SDMA client is BIT15 */ + WREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE, + huboffset + i * distance, tmp); + + tmp = RREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, + huboffset + i * distance); + tmp |= (1 << 15); + WREG32_SOC15_OFFSET(MMHUB, 0, + mmDAGB0_WRCLI_GPU_SNOOP_OVERRIDE_VALUE, + huboffset + i * distance, tmp); + } + +} + static void mmhub_v9_4_init_cache_regs(struct amdgpu_device *adev, int hubid) { uint32_t tmp; @@ -392,6 +422,7 @@ static int mmhub_v9_4_gart_enable(struct amdgpu_device *adev) if (!amdgpu_sriov_vf(adev)) mmhub_v9_4_init_cache_regs(adev, i); + mmhub_v9_4_init_snoop_override_regs(adev, i); mmhub_v9_4_enable_system_domain(adev, i); if (!amdgpu_sriov_vf(adev)) mmhub_v9_4_disable_identity_aperture(adev, i); diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c index 4dcb72d1bdda..5aadf24cb202 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c @@ -184,6 +184,9 @@ send_request: case IDH_REQ_RAS_ERROR_COUNT: event = IDH_RAS_ERROR_COUNT_READY; break; + case IDH_REQ_RAS_CPER_DUMP: + event = IDH_RAS_CPER_DUMP_READY; + break; default: break; } @@ -467,6 +470,16 @@ static int xgpu_nv_req_ras_err_count(struct amdgpu_device *adev) return xgpu_nv_send_access_requests(adev, IDH_REQ_RAS_ERROR_COUNT); } +static int xgpu_nv_req_ras_cper_dump(struct amdgpu_device *adev, u64 vf_rptr) +{ + uint32_t vf_rptr_hi, vf_rptr_lo; + + vf_rptr_hi = (uint32_t)(vf_rptr >> 32); + vf_rptr_lo = (uint32_t)(vf_rptr & 0xFFFFFFFF); + return xgpu_nv_send_access_requests_with_param( + adev, IDH_REQ_RAS_CPER_DUMP, vf_rptr_hi, vf_rptr_lo, 0); +} + const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .req_full_gpu = xgpu_nv_request_full_gpu_access, .rel_full_gpu = xgpu_nv_release_full_gpu_access, @@ -478,4 +491,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = { .ras_poison_handler = xgpu_nv_ras_poison_handler, .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr, .req_ras_err_count = xgpu_nv_req_ras_err_count, + .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump, }; diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h index 9d61d76e1bf9..72c9fceb9d79 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.h @@ -41,6 +41,7 @@ enum idh_request { IDH_READY_TO_RESET = 201, IDH_RAS_POISON = 202, IDH_REQ_RAS_ERROR_COUNT = 203, + IDH_REQ_RAS_CPER_DUMP = 204, }; enum idh_event { @@ -56,6 +57,7 @@ enum idh_event { IDH_PF_SOFT_FLR_NOTIFICATION, IDH_RAS_ERROR_DETECTED, IDH_RAS_ERROR_COUNT_READY = 11, + IDH_RAS_CPER_DUMP_READY = 14, IDH_TEXT_MESSAGE = 255, }; diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 0820ed62e2e8..4cd325149b63 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -434,9 +434,8 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, * this should allow us to catch up. */ tmp = (wptr + 32) & ih->ptr_mask; - dev_warn(adev->dev, "IH ring buffer overflow " - "(0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, tmp); + dev_warn(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp); ih->rptr = tmp; tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); @@ -626,7 +625,7 @@ static int navi10_ih_resume(struct amdgpu_ip_block *ip_block) return navi10_ih_hw_init(ip_block); } -static bool navi10_ih_is_idle(void *handle) +static bool navi10_ih_is_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return true; @@ -667,25 +666,25 @@ static void navi10_ih_update_clockgating_state(struct amdgpu_device *adev, } } -static int navi10_ih_set_clockgating_state(void *handle, +static int navi10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; navi10_ih_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); return 0; } -static int navi10_ih_set_powergating_state(void *handle, +static int navi10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static void navi10_ih_get_clockgating_state(void *handle, u64 *flags) +static void navi10_ih_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (!RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL)) *flags |= AMD_CG_SUPPORT_IH_CG; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c index 39919e0892c1..9b4025c39e44 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.c @@ -21,13 +21,13 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "nbif_v6_3_1.h" #include "nbif/nbif_6_3_1_offset.h" #include "nbif/nbif_6_3_1_sh_mask.h" #include "pcie/pcie_6_1_0_offset.h" #include "pcie/pcie_6_1_0_sh_mask.h" +#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" #include <uapi/linux/kfd_ioctl.h> static void nbif_v6_3_1_remap_hdp_registers(struct amdgpu_device *adev) @@ -473,48 +473,82 @@ const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs = { }; -static void nbif_v6_3_1_sriov_ih_doorbell_range(struct amdgpu_device *adev, - bool use_doorbell, int doorbell_index) +static int nbif_v6_3_1_set_ras_err_event_athub_irq_state(struct amdgpu_device *adev, + struct amdgpu_irq_src *src, + unsigned type, + enum amdgpu_interrupt_state state) { + /* The ras_controller_irq enablement should be done in psp bl when it + * tries to enable ras feature. Driver only need to set the correct interrupt + * vector for bare-metal and sriov use case respectively + */ + uint32_t bif_doorbell_int_cntl; + + bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL); + bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_DISABLE, + (state == AMDGPU_IRQ_STATE_ENABLE) ? 0 : 1); + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl); + + return 0; } -static void nbif_v6_3_1_sriov_sdma_doorbell_range(struct amdgpu_device *adev, - int instance, bool use_doorbell, - int doorbell_index, - int doorbell_size) +static int nbif_v6_3_1_process_err_event_athub_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) { + /* By design, the ih cookie for err_event_athub_irq should be written + * to bif ring. since bif ring is not enabled, just leave process callback + * as a dummy one. + */ + return 0; } -static void nbif_v6_3_1_sriov_vcn_doorbell_range(struct amdgpu_device *adev, - bool use_doorbell, - int doorbell_index, int instance) +static const struct amdgpu_irq_src_funcs nbif_v6_3_1_ras_err_event_athub_irq_funcs = { + .set = nbif_v6_3_1_set_ras_err_event_athub_irq_state, + .process = nbif_v6_3_1_process_err_event_athub_irq, +}; + +static void nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring(struct amdgpu_device *adev) { + uint32_t bif_doorbell_int_cntl; + + bif_doorbell_int_cntl = RREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL); + if (REG_GET_FIELD(bif_doorbell_int_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_STATUS)) { + /* driver has to clear the interrupt status when bif ring is disabled */ + bif_doorbell_int_cntl = REG_SET_FIELD(bif_doorbell_int_cntl, + BIF_BX0_BIF_DOORBELL_INT_CNTL, + RAS_ATHUB_ERR_EVENT_INTERRUPT_CLEAR, 1); + WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_DOORBELL_INT_CNTL, bif_doorbell_int_cntl); + amdgpu_ras_global_ras_isr(adev); + } } -static void nbif_v6_3_1_sriov_gc_doorbell_init(struct amdgpu_device *adev) +static int nbif_v6_3_1_init_ras_err_event_athub_interrupt(struct amdgpu_device *adev) { + int r; + + /* init the irq funcs */ + adev->nbio.ras_err_event_athub_irq.funcs = + &nbif_v6_3_1_ras_err_event_athub_irq_funcs; + adev->nbio.ras_err_event_athub_irq.num_types = 1; + + /* register ras err event athub interrupt + * nbif v6_3_1 uses the same irq source as nbio v7_4 + */ + r = amdgpu_irq_add_id(adev, SOC21_IH_CLIENTID_BIF, + NBIF_7_4__SRCID__ERREVENT_ATHUB_INTERRUPT, + &adev->nbio.ras_err_event_athub_irq); + + return r; } -const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs = { - .get_hdp_flush_req_offset = nbif_v6_3_1_get_hdp_flush_req_offset, - .get_hdp_flush_done_offset = nbif_v6_3_1_get_hdp_flush_done_offset, - .get_pcie_index_offset = nbif_v6_3_1_get_pcie_index_offset, - .get_pcie_data_offset = nbif_v6_3_1_get_pcie_data_offset, - .get_rev_id = nbif_v6_3_1_get_rev_id, - .mc_access_enable = nbif_v6_3_1_mc_access_enable, - .get_memsize = nbif_v6_3_1_get_memsize, - .sdma_doorbell_range = nbif_v6_3_1_sriov_sdma_doorbell_range, - .vcn_doorbell_range = nbif_v6_3_1_sriov_vcn_doorbell_range, - .gc_doorbell_init = nbif_v6_3_1_sriov_gc_doorbell_init, - .enable_doorbell_aperture = nbif_v6_3_1_enable_doorbell_aperture, - .enable_doorbell_selfring_aperture = nbif_v6_3_1_enable_doorbell_selfring_aperture, - .ih_doorbell_range = nbif_v6_3_1_sriov_ih_doorbell_range, - .update_medium_grain_clock_gating = nbif_v6_3_1_update_medium_grain_clock_gating, - .update_medium_grain_light_sleep = nbif_v6_3_1_update_medium_grain_light_sleep, - .get_clockgating_state = nbif_v6_3_1_get_clockgating_state, - .ih_control = nbif_v6_3_1_ih_control, - .init_registers = nbif_v6_3_1_init_registers, - .remap_hdp_registers = nbif_v6_3_1_remap_hdp_registers, - .get_rom_offset = nbif_v6_3_1_get_rom_offset, - .set_reg_remap = nbif_v6_3_1_set_reg_remap, +struct amdgpu_nbio_ras nbif_v6_3_1_ras = { + .handle_ras_err_event_athub_intr_no_bifring = + nbif_v6_3_1_handle_ras_err_event_athub_intr_no_bifring, + .init_ras_err_event_athub_interrupt = + nbif_v6_3_1_init_ras_err_event_athub_interrupt, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h index b7f2e0d88905..3afec715a9fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h +++ b/drivers/gpu/drm/amd/amdgpu/nbif_v6_3_1.h @@ -28,6 +28,6 @@ extern const struct nbio_hdp_flush_reg nbif_v6_3_1_hdp_flush_reg; extern const struct amdgpu_nbio_funcs nbif_v6_3_1_funcs; -extern const struct amdgpu_nbio_funcs nbif_v6_3_1_sriov_funcs; +extern struct amdgpu_nbio_ras nbif_v6_3_1_ras; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index 739fce4fa8fd..04041b398781 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "nbio_v2_3.h" #include "nbio/nbio_2_3_default.h" diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c index a54052dea8bf..f89e5f40e1a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v4_3.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "nbio_v4_3.h" #include "nbio/nbio_4_3_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 34180c6070dd..e911368c1aeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "nbio_v6_1.h" #include "nbio/nbio_6_1_default.h" diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index d1032e9992b4..1569a1e934ec 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "nbio_v7_0.h" #include "nbio/nbio_7_0_default.h" diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c index 41421da63a08..2ece3ae75ec1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_11.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "nbio_v7_11.h" #include "nbio/nbio_7_11_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c index a766e2d90cd0..acc5f363684a 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "nbio_v7_2.h" #include "nbio/nbio_7_2_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index a26a9be58eac..d5002ff931d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "nbio_v7_4.h" #include "amdgpu_ras.h" diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c index 3fb6d2aa7e3b..2ee60b8746a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_7.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "nbio_v7_7.h" #include "nbio/nbio_7_7_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c index 8a0a63ac88d2..f23cb79110d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_9.c @@ -21,7 +21,6 @@ * */ #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "nbio_v7_9.h" #include "amdgpu_ras.h" diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 3bad565ded73..50e77d9b30af 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -78,12 +78,12 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode = { /* Navi1x */ static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -104,10 +104,10 @@ static const struct amdgpu_video_codecs sc_video_codecs_encode = { }; static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -115,10 +115,10 @@ static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn0[] }; static const struct amdgpu_video_codec_info sc_video_codecs_decode_array_vcn1[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -141,23 +141,23 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] = { }; static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn0[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array_vcn1[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -454,6 +454,7 @@ nv_asic_reset_method(struct amdgpu_device *adev) switch (amdgpu_ip_version(adev, MP1_HWIP, 0)) { case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 2): case IP_VERSION(13, 0, 1): case IP_VERSION(13, 0, 3): case IP_VERSION(13, 0, 5): @@ -1034,15 +1035,15 @@ static int nv_common_resume(struct amdgpu_ip_block *ip_block) return nv_common_hw_init(ip_block); } -static bool nv_common_is_idle(void *handle) +static bool nv_common_is_idle(struct amdgpu_ip_block *ip_block) { return true; } -static int nv_common_set_clockgating_state(void *handle, +static int nv_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1070,16 +1071,16 @@ static int nv_common_set_clockgating_state(void *handle, return 0; } -static int nv_common_set_powergating_state(void *handle, +static int nv_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* TODO */ return 0; } -static void nv_common_get_clockgating_state(void *handle, u64 *flags) +static void nv_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) *flags = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/nvd.h b/drivers/gpu/drm/amd/amdgpu/nvd.h index 631dafb92299..56f1bfac0b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/nvd.h +++ b/drivers/gpu/drm/amd/amdgpu/nvd.h @@ -64,6 +64,24 @@ #define PACKET3_INDIRECT_BUFFER_CNST_END 0x19 #define PACKET3_ATOMIC_GDS 0x1D #define PACKET3_ATOMIC_MEM 0x1E +#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x7F) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x))) +#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0 +#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1 +#define PACKET3_ATOMIC_MEM__COMMAND__WAIT_FOR_WRITE_CONFIRMATION 2 +#define PACKET3_ATOMIC_MEM__COMMAND__SEND_AND_CONTINUE 3 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__LRU 0 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__STREAM 1 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__NOA 2 +#define PACKET3_ATOMIC_MEM__CACHE_POLICY__BYPASS 3 #define PACKET3_OCCLUSION_QUERY 0x1F #define PACKET3_SET_PREDICATION 0x20 #define PACKET3_REG_RMW 0x21 @@ -105,6 +123,38 @@ * 1 - pfp * 2 - ce */ +#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WRITE_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_WRITE_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 22) +#define PACKET3_WRITE_DATA__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 24) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_LO(x) ((unsigned)(x)) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2 +#define PACKET3_WRITE_DATA__DST_SEL__GDS 3 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6 +#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0 +#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1 +#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0 +#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1 +#define PACKET3_WRITE_DATA__MODE__PF_VF_DISABLED 0 +#define PACKET3_WRITE_DATA__MODE__PF_VF_ENABLED 1 +#define PACKET3_WRITE_DATA__TEMPORAL__RT 0 +#define PACKET3_WRITE_DATA__TEMPORAL__NT 1 +#define PACKET3_WRITE_DATA__TEMPORAL__HT 2 +#define PACKET3_WRITE_DATA__TEMPORAL__LU 3 +#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0 +#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1 +#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2 +#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3 #define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 #define PACKET3_MEM_SEMAPHORE 0x39 # define PACKET3_SEM_USE_MAILBOX (0x1 << 16) @@ -135,6 +185,42 @@ /* 0 - me * 1 - pfp */ +#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4) +#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6) +#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22) +#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24) +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WAIT_REG_MEM__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0X3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0X3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2 +#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3 +#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0 +#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__LRU 0 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__STREAM 1 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__NOA 2 +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY__BYPASS 3 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__RT 0 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__NT 1 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__HT 2 +#define PACKET3_WAIT_REG_MEM__TEMPORAL__LU 3 #define PACKET3_INDIRECT_BUFFER 0x3F #define INDIRECT_BUFFER_VALID (1 << 23) #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) @@ -144,8 +230,94 @@ */ #define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) #define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x)) +#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0) +#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23) +#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24) +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28) +#define PACKET3_INDIRECT_BUFFER__TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 28) +#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__RT 0 +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__NT 1 +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__HT 2 +#define PACKET3_INDIRECT_BUFFER__TEMPORAL__LU 3 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__NOA 2 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__BYPASS 3 #define PACKET3_COND_INDIRECT_BUFFER 0x3F #define PACKET3_COPY_DATA 0x40 +#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0) +#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13) +#define PACKET3_COPY_DATA__SRC_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 13) +#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__MODE(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_COPY_DATA__AID_ID(x) ((((unsigned)(x)) & 0x3) << 23) +#define PACKET3_COPY_DATA__DST_TEMPORAL(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET_LO(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_COPY_DATA__DST_REG_OFFSET_LO(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__DST_REG_OFFSET_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__SRC_SEL__TC_L2_OBSOLETE 1 +#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__SRC_SEL__GDS 3 +#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5 +#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8 +#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9 +#define PACKET3_COPY_DATA__SRC_SEL__SYSTEM_CLOCK_COUNT 10 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__DST_SEL__GDS 3 +#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__DST_SEL__TC_L2_OBSOLETE 5 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__RT 0 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__NT 1 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__HT 2 +#define PACKET3_COPY_DATA__SRC_TEMPORAL__LU 3 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__NOA 2 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__BYPASS 3 +#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0 +#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1 +#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0 +#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1 +#define PACKET3_COPY_DATA__MODE__PF_VF_DISABLED 0 +#define PACKET3_COPY_DATA__MODE__PF_VF_ENABLED 1 +#define PACKET3_COPY_DATA__DST_TEMPORAL__RT 0 +#define PACKET3_COPY_DATA__DST_TEMPORAL__NT 1 +#define PACKET3_COPY_DATA__DST_TEMPORAL__HT 2 +#define PACKET3_COPY_DATA__DST_TEMPORAL__LU 3 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__NOA 2 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__BYPASS 3 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1 #define PACKET3_CP_DMA 0x41 #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_SURFACE_SYNC 0x43 @@ -160,6 +332,23 @@ * 3 - SAMPLE_STREAMOUTSTAT* * 4 - *S_PARTIAL_FLUSH */ +#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0) +#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29) +#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 0) +#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) ((unsigned)(x)) +#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTAT 2 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS 8 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS1 9 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS2 10 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_STREAMOUTSTATS3 11 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__LEGACY_MODE 0 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE1 1 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__NEW_MODE 2 +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE__MIXED_MODE3 3 #define PACKET3_EVENT_WRITE_EOP 0x47 #define PACKET3_EVENT_WRITE_EOS 0x48 #define PACKET3_RELEASE_MEM 0x49 @@ -304,6 +493,12 @@ * 2: REVERSE */ #define PACKET3_ACQUIRE_MEM_GCR_RANGE_IS_PA (1 << 18) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FFFF) << 0) #define PACKET3_REWIND 0x59 #define PACKET3_INTERRUPT 0x5A #define PACKET3_GEN_PDEPTE 0x5B @@ -330,11 +525,17 @@ #define PACKET3_SET_SH_REG 0x76 #define PACKET3_SET_SH_REG_START 0x00002c00 #define PACKET3_SET_SH_REG_END 0x00003000 +#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23) +#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28) +#define PACKET3_SET_SH_REG__INDEX__DEFAULT 0 +#define PACKET3_SET_SH_REG__INDEX__INSERT_VMID 1 #define PACKET3_SET_SH_REG_OFFSET 0x77 #define PACKET3_SET_QUEUE_REG 0x78 #define PACKET3_SET_UCONFIG_REG 0x79 #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 +#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) #define PACKET3_SET_UCONFIG_REG_INDEX 0x7A #define PACKET3_FORWARD_HEADER 0x7C #define PACKET3_SCRATCH_RAM_WRITE 0x7D @@ -369,6 +570,7 @@ # define PACKET3_INVALIDATE_TLBS_DST_SEL(x) ((x) << 0) # define PACKET3_INVALIDATE_TLBS_ALL_HUB(x) ((x) << 4) # define PACKET3_INVALIDATE_TLBS_PASID(x) ((x) << 5) +# define PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(x) ((x) << 29) #define PACKET3_AQL_PACKET 0x99 #define PACKET3_DMA_DATA_FILL_MULTI 0x9A #define PACKET3_SET_SH_REG_INDEX 0x9B @@ -462,6 +664,12 @@ # define PACKET3_QUERY_STATUS_ENG_SEL(x) ((x) << 25) #define PACKET3_RUN_LIST 0xA5 #define PACKET3_MAP_PROCESS_VM 0xA6 + +#define PACKET3_RUN_CLEANER_SHADER 0xD2 +/* 1. header + * 2. RESERVED [31:0] + */ + /* GFX11 */ #define PACKET3_SET_Q_PREEMPTION_MODE 0xF0 # define PACKET3_SET_Q_PREEMPTION_MODE_IB_VMID(x) ((x) << 0) diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 2395f1856962..bb5dfc410a66 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -129,6 +129,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp) err = psp_init_ta_microcode(psp, ucode_prefix); break; case IP_VERSION(11, 5, 0): + case IP_VERSION(11, 5, 2): err = psp_init_asd_microcode(psp, ucode_prefix); if (err) return err; diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index c4b775aaee9f..cc621064610f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -51,6 +51,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_6_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_12_sos.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_12_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_14_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_13_0_14_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_0_toc.bin"); @@ -122,6 +124,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp) case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 7): case IP_VERSION(13, 0, 10): + case IP_VERSION(13, 0, 12): case IP_VERSION(13, 0, 14): err = psp_init_sos_microcode(psp, ucode_prefix); if (err) @@ -177,6 +180,7 @@ static int psp_v13_0_wait_for_bootloader(struct psp_context *psp) retry_cnt = ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14))) ? PSP_VMBX_POLLING_LIMIT : 10; @@ -203,6 +207,7 @@ static int psp_v13_0_wait_for_bootloader_steady_state(struct psp_context *psp) int ret; if (amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) { ret = psp_v13_0_wait_for_vmbx_ready(psp); if (ret) @@ -288,6 +293,11 @@ static int psp_v13_0_bootloader_load_ras_drv(struct psp_context *psp) return psp_v13_0_bootloader_load_component(psp, &psp->ras_drv, PSP_BL__LOAD_RASDRV); } +static int psp_v13_0_bootloader_load_spdm_drv(struct psp_context *psp) +{ + return psp_v13_0_bootloader_load_component(psp, &psp->spdm_drv, PSP_BL__LOAD_SPDMDRV); +} + static inline void psp_v13_0_init_sos_version(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -798,6 +808,7 @@ static bool psp_v13_0_get_ras_capability(struct psp_context *psp) return false; if ((amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP0_HWIP, 0) == IP_VERSION(13, 0, 14)) && (!(adev->flags & AMD_IS_APU))) { reg_data = RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_127); @@ -857,6 +868,7 @@ static const struct psp_funcs psp_v13_0_funcs = { .bootloader_load_intf_drv = psp_v13_0_bootloader_load_intf_drv, .bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv, .bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv, + .bootloader_load_spdm_drv = psp_v13_0_bootloader_load_spdm_drv, .bootloader_load_sos = psp_v13_0_bootloader_load_sos, .ring_create = psp_v13_0_ring_create, .ring_stop = psp_v13_0_ring_stop, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c index 4d33c95a5116..7c49c3f3c388 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v14_0.c @@ -35,6 +35,8 @@ MODULE_FIRMWARE("amdgpu/psp_14_0_2_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_2_ta.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_sos.bin"); MODULE_FIRMWARE("amdgpu/psp_14_0_3_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_5_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_14_0_5_ta.bin"); /* For large FW files the time to complete can be very long */ #define USBC_PD_POLLING_LIMIT_S 240 @@ -72,6 +74,14 @@ static int psp_v14_0_init_microcode(struct psp_context *psp) if (err) return err; break; + case IP_VERSION(14, 0, 5): + err = psp_init_toc_microcode(psp, ucode_prefix); + if (err) + return err; + err = psp_init_ta_microcode(psp, ucode_prefix); + if (err) + return err; + break; default: BUG(); } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 7948d74f8722..92ce580647cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -145,9 +145,11 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; @@ -631,7 +633,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -909,9 +911,9 @@ static int sdma_v2_4_resume(struct amdgpu_ip_block *ip_block) return sdma_v2_4_hw_init(ip_block); } -static bool sdma_v2_4_is_idle(void *handle) +static bool sdma_v2_4_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS2); if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK | @@ -1080,14 +1082,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int sdma_v2_4_set_clockgating_state(void *handle, +static int sdma_v2_4_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { /* XXX handled via the smc on VI */ return 0; } -static int sdma_v2_4_set_powergating_state(void *handle, +static int sdma_v2_4_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 9a3d729545a7..1c076bd1cf73 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -305,9 +305,11 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (i == 0) err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma.bin", chip_name); else err = amdgpu_ucode_request(adev, &adev->sdma.instance[i].fw, + AMDGPU_UCODE_REQUIRED, "amdgpu/%s_sdma1.bin", chip_name); if (err) goto out; @@ -904,7 +906,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) else r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1198,9 +1200,9 @@ static int sdma_v3_0_resume(struct amdgpu_ip_block *ip_block) return sdma_v3_0_hw_init(ip_block); } -static bool sdma_v3_0_is_idle(void *handle) +static bool sdma_v3_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS2); if (tmp & (SRBM_STATUS2__SDMA_BUSY_MASK | @@ -1483,10 +1485,10 @@ static void sdma_v3_0_update_sdma_medium_grain_light_sleep( } } -static int sdma_v3_0_set_clockgating_state(void *handle, +static int sdma_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1506,15 +1508,15 @@ static int sdma_v3_0_set_clockgating_state(void *handle, return 0; } -static int sdma_v3_0_set_powergating_state(void *handle, +static int sdma_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static void sdma_v3_0_get_clockgating_state(void *handle, u64 *flags) +static void sdma_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index c1f98f6cf20d..33ed2b158fcd 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1565,7 +1565,7 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1956,7 +1956,7 @@ static int sdma_v4_0_hw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; if (adev->flags & AMD_IS_APU) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false, 0); if (!amdgpu_sriov_vf(adev)) sdma_v4_0_init_golden_registers(adev); @@ -1983,7 +1983,7 @@ static int sdma_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) sdma_v4_0_enable(adev, false); if (adev->flags & AMD_IS_APU) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true); + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true, 0); return 0; } @@ -2015,9 +2015,9 @@ static int sdma_v4_0_resume(struct amdgpu_ip_block *ip_block) return sdma_v4_0_hw_init(ip_block); } -static bool sdma_v4_0_is_idle(void *handle) +static bool sdma_v4_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -2297,10 +2297,10 @@ static void sdma_v4_0_update_medium_grain_light_sleep( } } -static int sdma_v4_0_set_clockgating_state(void *handle, +static int sdma_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -2312,10 +2312,10 @@ static int sdma_v4_0_set_clockgating_state(void *handle, return 0; } -static int sdma_v4_0_set_powergating_state(void *handle, +static int sdma_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) { case IP_VERSION(4, 1, 0): @@ -2331,9 +2331,9 @@ static int sdma_v4_0_set_powergating_state(void *handle, return 0; } -static void sdma_v4_0_get_clockgating_state(void *handle, u64 *flags) +static void sdma_v4_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) @@ -2381,7 +2381,6 @@ static void sdma_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block) if (!adev->sdma.ip_dump) return; - amdgpu_gfx_off_ctrl(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { instance_offset = i * reg_count; for (j = 0; j < reg_count; j++) @@ -2389,7 +2388,6 @@ static void sdma_v4_0_dump_ip_state(struct amdgpu_ip_block *ip_block) RREG32(sdma_v4_0_get_reg_offset(adev, i, sdma_reg_list_4_0[j].reg_offset)); } - amdgpu_gfx_off_ctrl(adev, true); } const struct amd_ip_funcs sdma_v4_0_ip_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index a38553f38fdc..dc94d58d33a6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -30,6 +30,8 @@ #include "amdgpu_xcp.h" #include "amdgpu_ucode.h" #include "amdgpu_trace.h" +#include "amdgpu_reset.h" +#include "gc/gc_9_0_sh_mask.h" #include "sdma/sdma_4_4_2_offset.h" #include "sdma/sdma_4_4_2_sh_mask.h" @@ -105,6 +107,8 @@ static void sdma_v4_4_2_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev); static void sdma_v4_4_2_set_ras_funcs(struct amdgpu_device *adev); +static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev); +static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev); static u32 sdma_v4_4_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) @@ -189,6 +193,7 @@ static int sdma_v4_4_2_init_microcode(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) || amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) { ret = amdgpu_sdma_init_microcode(adev, 0, true); break; @@ -667,11 +672,12 @@ static uint32_t sdma_v4_4_2_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) * * @adev: amdgpu_device pointer * @i: instance to resume + * @restore: used to restore wptr when restart * * Set up the gfx DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) +static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -679,6 +685,7 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) u32 doorbell; u32 doorbell_offset; u64 wptr_gpu_addr; + u64 rwptr; wb_offset = (ring->rptr_offs * 4); @@ -698,16 +705,32 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, regSDMA_GFX_RB_BASE, ring->gpu_addr >> 8); WREG32_SDMA(i, regSDMA_GFX_RB_BASE_HI, ring->gpu_addr >> 40); - ring->wptr = 0; + if (!restore) + ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_GFX_MINOR_PTR_UPDATE, 1); + /* For the guilty queue, set RPTR to the current wptr to skip bad commands, + * It is not a guilty queue, restore cache_rptr and continue execution. + */ + if (adev->sdma.instance[i].gfx_guilty) + rwptr = ring->wptr; + else + rwptr = ring->cached_rptr; + /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + if (restore) { + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, upper_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, upper_32_bits(rwptr << 2)); + } else { + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_GFX_RB_WPTR_HI, 0); + } doorbell = RREG32_SDMA(i, regSDMA_GFX_DOORBELL); doorbell_offset = RREG32_SDMA(i, regSDMA_GFX_DOORBELL_OFFSET); @@ -755,11 +778,12 @@ static void sdma_v4_4_2_gfx_resume(struct amdgpu_device *adev, unsigned int i) * * @adev: amdgpu_device pointer * @i: instance to resume + * @restore: boolean to say restore needed or not * * Set up the page DMA ring buffers and enable them. * Returns 0 for success, error for failure. */ -static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) +static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i, bool restore) { struct amdgpu_ring *ring = &adev->sdma.instance[i].page; u32 rb_cntl, ib_cntl, wptr_poll_cntl; @@ -767,6 +791,7 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) u32 doorbell; u32 doorbell_offset; u64 wptr_gpu_addr; + u64 rwptr; wb_offset = (ring->rptr_offs * 4); @@ -774,11 +799,26 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) rb_cntl = sdma_v4_4_2_rb_cntl(ring, rb_cntl); WREG32_SDMA(i, regSDMA_PAGE_RB_CNTL, rb_cntl); + /* For the guilty queue, set RPTR to the current wptr to skip bad commands, + * It is not a guilty queue, restore cache_rptr and continue execution. + */ + if (adev->sdma.instance[i].page_guilty) + rwptr = ring->wptr; + else + rwptr = ring->cached_rptr; + /* Initialize the ring buffer's read and write pointers */ - WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0); - WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0); + if (restore) { + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, upper_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, lower_32_bits(rwptr << 2)); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, upper_32_bits(rwptr << 2)); + } else { + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_HI, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR, 0); + WREG32_SDMA(i, regSDMA_PAGE_RB_WPTR_HI, 0); + } /* set the wb address whether it's enabled or not */ WREG32_SDMA(i, regSDMA_PAGE_RB_RPTR_ADDR_HI, @@ -792,7 +832,8 @@ static void sdma_v4_4_2_page_resume(struct amdgpu_device *adev, unsigned int i) WREG32_SDMA(i, regSDMA_PAGE_RB_BASE, ring->gpu_addr >> 8); WREG32_SDMA(i, regSDMA_PAGE_RB_BASE_HI, ring->gpu_addr >> 40); - ring->wptr = 0; + if (!restore) + ring->wptr = 0; /* before programing wptr to a less value, need set minor_ptr_update first */ WREG32_SDMA(i, regSDMA_PAGE_MINOR_PTR_UPDATE, 1); @@ -911,12 +952,13 @@ static int sdma_v4_4_2_inst_load_microcode(struct amdgpu_device *adev, * * @adev: amdgpu_device pointer * @inst_mask: mask of dma engine instances to be enabled + * @restore: boolean to say restore needed or not * * Set up the DMA engines and enable them. * Returns 0 for success, error for failure. */ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, - uint32_t inst_mask) + uint32_t inst_mask, bool restore) { struct amdgpu_ring *ring; uint32_t tmp_mask; @@ -927,7 +969,7 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, sdma_v4_4_2_inst_enable(adev, false, inst_mask); } else { /* bypass sdma microcode loading on Gopher */ - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && + if (!restore && adev->firmware.load_type != AMDGPU_FW_LOAD_PSP && adev->sdma.instance[0].fw) { r = sdma_v4_4_2_inst_load_microcode(adev, inst_mask); if (r) @@ -946,17 +988,19 @@ static int sdma_v4_4_2_inst_start(struct amdgpu_device *adev, uint32_t temp; WREG32_SDMA(i, regSDMA_SEM_WAIT_FAIL_TIMER_CNTL, 0); - sdma_v4_4_2_gfx_resume(adev, i); + sdma_v4_4_2_gfx_resume(adev, i, restore); if (adev->sdma.has_page_queue) - sdma_v4_4_2_page_resume(adev, i); + sdma_v4_4_2_page_resume(adev, i, restore); /* set utc l1 enable flag always to 1 */ temp = RREG32_SDMA(i, regSDMA_CNTL); temp = REG_SET_FIELD(temp, SDMA_CNTL, UTC_L1_ENABLE, 1); - /* enable context empty interrupt during initialization */ - temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1); - WREG32_SDMA(i, regSDMA_CNTL, temp); + if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) < IP_VERSION(4, 4, 5)) { + /* enable context empty interrupt during initialization */ + temp = REG_SET_FIELD(temp, SDMA_CNTL, CTXEMPTY_INT_ENABLE, 1); + WREG32_SDMA(i, regSDMA_CNTL, temp); + } if (!amdgpu_sriov_vf(adev)) { if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { /* unhalt engine */ @@ -1110,7 +1154,7 @@ static int sdma_v4_4_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -1247,21 +1291,71 @@ static void sdma_v4_4_2_ring_emit_pipeline_sync(struct amdgpu_ring *ring) seq, 0xffffffff, 4); } - -/** - * sdma_v4_4_2_ring_emit_vm_flush - vm flush using sDMA +/* + * sdma_v4_4_2_get_invalidate_req - Construct the VM_INVALIDATE_ENG0_REQ register value + * @vmid: The VMID to invalidate + * @flush_type: The type of flush (0 = legacy, 1 = lightweight, 2 = heavyweight) * - * @ring: amdgpu_ring pointer - * @vmid: vmid number to use - * @pd_addr: address + * This function constructs the VM_INVALIDATE_ENG0_REQ register value for the specified VMID + * and flush type. It ensures that all relevant page table cache levels (L1 PTEs, L2 PTEs, and + * L2 PDEs) are invalidated. + */ +static uint32_t sdma_v4_4_2_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) +{ + u32 req = 0; + + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + PER_VMID_INVALIDATE_REQ, 1 << vmid); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, + CLEAR_PROTECTION_FAULT_STATUS_ADDR, 0); + + return req; +} + +/* + * sdma_v4_4_2_ring_emit_vm_flush - Emit VM flush commands for SDMA + * @ring: The SDMA ring + * @vmid: The VMID to flush + * @pd_addr: The page directory address * - * Update the page table base and flush the VM TLB - * using sDMA. + * This function emits the necessary register writes and waits to perform a VM flush for the + * specified VMID. It updates the PTB address registers and issues a VM invalidation request + * using the specified VM invalidation engine. */ static void sdma_v4_4_2_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned vmid, uint64_t pd_addr) + unsigned int vmid, uint64_t pd_addr) { - amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr); + struct amdgpu_device *adev = ring->adev; + uint32_t req = sdma_v4_4_2_get_invalidate_req(vmid, 0); + unsigned int eng = ring->vm_inv_eng; + struct amdgpu_vmhub *hub = &adev->vmhub[ring->vm_hub]; + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + + (hub->ctx_addr_distance * vmid), + lower_32_bits(pd_addr)); + + amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_hi32 + + (hub->ctx_addr_distance * vmid), + upper_32_bits(pd_addr)); + /* + * Construct and emit the VM invalidation packet + */ + amdgpu_ring_write(ring, + SDMA_PKT_VM_INVALIDATION_HEADER_OP(SDMA_OP_VM_INVALIDATE) | + SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(SDMA_SUBOP_VM_INVALIDATE) | + SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(0x1f) | + SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(0x1f) | + SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(eng)); + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(req)); + amdgpu_ring_write(ring, 0); + amdgpu_ring_write(ring, SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(BIT(vmid))); } static void sdma_v4_4_2_ring_emit_wreg(struct amdgpu_ring *ring, @@ -1308,6 +1402,7 @@ static int sdma_v4_4_2_early_init(struct amdgpu_ip_block *ip_block) sdma_v4_4_2_set_vm_pte_funcs(adev); sdma_v4_4_2_set_irq_funcs(adev); sdma_v4_4_2_set_ras_funcs(adev); + sdma_v4_4_2_set_engine_reset_funcs(adev); return 0; } @@ -1329,6 +1424,12 @@ static int sdma_v4_4_2_late_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_persistent_edc_harvesting_supported(adev)) amdgpu_ras_reset_error_count(adev, AMDGPU_RAS_BLOCK__SDMA); + /* The initialization is done in the late_init stage to ensure that the SMU + * initialization and capability setup are completed before we check the SDMA + * reset capability + */ + sdma_v4_4_2_update_reset_mask(adev); + return 0; } @@ -1384,9 +1485,20 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) &adev->sdma.srbm_write_irq); if (r) return r; + + r = amdgpu_irq_add_id(adev, sdma_v4_4_2_seq_to_irq_id(i), + SDMA0_4_0__SRCID__SDMA_CTXEMPTY, + &adev->sdma.ctxt_empty_irq); + if (r) + return r; } for (i = 0; i < adev->sdma.num_instances; i++) { + mutex_init(&adev->sdma.instance[i].engine_reset_mutex); + /* Initialize guilty flags for GFX and PAGE queues */ + adev->sdma.instance[i].gfx_guilty = false; + adev->sdma.instance[i].page_guilty = false; + ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -1430,7 +1542,6 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block) } } - /* TODO: Add queue reset mask when FW fully supports it */ adev->sdma.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); @@ -1466,6 +1577,7 @@ static int sdma_v4_4_2_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_sdma_sysfs_reset_mask_fini(adev); if (amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 4) || amdgpu_ip_version(adev, SDMA0_HWIP, 0) == IP_VERSION(4, 4, 5)) amdgpu_sdma_destroy_inst_ctx(adev, true); else @@ -1486,7 +1598,7 @@ static int sdma_v4_4_2_hw_init(struct amdgpu_ip_block *ip_block) if (!amdgpu_sriov_vf(adev)) sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); - r = sdma_v4_4_2_inst_start(adev, inst_mask); + r = sdma_v4_4_2_inst_start(adev, inst_mask, false); return r; } @@ -1514,7 +1626,7 @@ static int sdma_v4_4_2_hw_fini(struct amdgpu_ip_block *ip_block) return 0; } -static int sdma_v4_4_2_set_clockgating_state(void *handle, +static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block) @@ -1522,7 +1634,7 @@ static int sdma_v4_4_2_suspend(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; if (amdgpu_in_reset(adev)) - sdma_v4_4_2_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); + sdma_v4_4_2_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE); return sdma_v4_4_2_hw_fini(ip_block); } @@ -1532,9 +1644,9 @@ static int sdma_v4_4_2_resume(struct amdgpu_ip_block *ip_block) return sdma_v4_4_2_hw_init(ip_block); } -static bool sdma_v4_4_2_is_idle(void *handle) +static bool sdma_v4_4_2_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1573,6 +1685,121 @@ static int sdma_v4_4_2_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } +static bool sdma_v4_4_2_is_queue_selected(struct amdgpu_device *adev, uint32_t instance_id, bool is_page_queue) +{ + uint32_t reg_offset = is_page_queue ? regSDMA_PAGE_CONTEXT_STATUS : regSDMA_GFX_CONTEXT_STATUS; + uint32_t context_status = RREG32(sdma_v4_4_2_get_reg_offset(adev, instance_id, reg_offset)); + + /* Check if the SELECTED bit is set */ + return (context_status & SDMA_GFX_CONTEXT_STATUS__SELECTED_MASK) != 0; +} + +static bool sdma_v4_4_2_ring_is_guilty(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t instance_id = ring->me; + + return sdma_v4_4_2_is_queue_selected(adev, instance_id, false); +} + +static bool sdma_v4_4_2_page_ring_is_guilty(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t instance_id = ring->me; + + if (!adev->sdma.has_page_queue) + return false; + + return sdma_v4_4_2_is_queue_selected(adev, instance_id, true); +} + +static int sdma_v4_4_2_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + u32 id = GET_INST(SDMA0, ring->me); + int r; + + if (!(adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)) + return -EOPNOTSUPP; + + amdgpu_amdkfd_suspend(adev, false); + r = amdgpu_sdma_reset_engine(adev, id); + amdgpu_amdkfd_resume(adev, false); + + return r; +} + +static int sdma_v4_4_2_stop_queue(struct amdgpu_device *adev, uint32_t instance_id) +{ + u32 inst_mask; + uint64_t rptr; + struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + /* Check if this queue is the guilty one */ + adev->sdma.instance[instance_id].gfx_guilty = + sdma_v4_4_2_is_queue_selected(adev, instance_id, false); + if (adev->sdma.has_page_queue) + adev->sdma.instance[instance_id].page_guilty = + sdma_v4_4_2_is_queue_selected(adev, instance_id, true); + + /* Cache the rptr before reset, after the reset, + * all of the registers will be reset to 0 + */ + rptr = amdgpu_ring_get_rptr(ring); + ring->cached_rptr = rptr; + /* Cache the rptr for the page queue if it exists */ + if (adev->sdma.has_page_queue) { + struct amdgpu_ring *page_ring = &adev->sdma.instance[instance_id].page; + rptr = amdgpu_ring_get_rptr(page_ring); + page_ring->cached_rptr = rptr; + } + + /* stop queue */ + inst_mask = 1 << ring->me; + sdma_v4_4_2_inst_gfx_stop(adev, inst_mask); + if (adev->sdma.has_page_queue) + sdma_v4_4_2_inst_page_stop(adev, inst_mask); + + return 0; +} + +static int sdma_v4_4_2_restore_queue(struct amdgpu_device *adev, uint32_t instance_id) +{ + int i; + u32 inst_mask; + struct amdgpu_ring *ring = &adev->sdma.instance[instance_id].ring; + + inst_mask = 1 << ring->me; + udelay(50); + + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SDMA(ring->me, regSDMA_F32_CNTL), SDMA_F32_CNTL, HALT)) + break; + udelay(1); + } + + if (i == adev->usec_timeout) { + dev_err(adev->dev, "timed out waiting for SDMA%d unhalt after reset\n", + ring->me); + return -ETIMEDOUT; + } + + return sdma_v4_4_2_inst_start(adev, inst_mask, true); +} + +static struct sdma_on_reset_funcs sdma_v4_4_2_engine_reset_funcs = { + .pre_reset = sdma_v4_4_2_stop_queue, + .post_reset = sdma_v4_4_2_restore_queue, +}; + +static void sdma_v4_4_2_set_engine_reset_funcs(struct amdgpu_device *adev) +{ + amdgpu_sdma_register_on_reset_callbacks(adev, &sdma_v4_4_2_engine_reset_funcs); +} + static int sdma_v4_4_2_set_trap_irq_state(struct amdgpu_device *adev, struct amdgpu_irq_src *source, unsigned type, @@ -1618,6 +1845,9 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev, case 0: amdgpu_fence_process(&adev->sdma.instance[i].ring); break; + case 1: + amdgpu_fence_process(&adev->sdma.instance[i].page); + break; default: break; } @@ -1755,6 +1985,16 @@ static int sdma_v4_4_2_process_srbm_write_irq(struct amdgpu_device *adev, return 0; } +static int sdma_v4_4_2_process_ctxt_empty_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + /* There is nothing useful to be done here, only kept for debug */ + dev_dbg_ratelimited(adev->dev, "SDMA context empty interrupt"); + sdma_v4_4_2_print_iv_entry(adev, entry); + return 0; +} + static void sdma_v4_4_2_inst_update_medium_grain_light_sleep( struct amdgpu_device *adev, bool enable, uint32_t inst_mask) { @@ -1821,10 +2061,10 @@ static void sdma_v4_4_2_inst_update_medium_grain_clock_gating( } } -static int sdma_v4_4_2_set_clockgating_state(void *handle, +static int sdma_v4_4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; uint32_t inst_mask; if (amdgpu_sriov_vf(adev)) @@ -1839,15 +2079,15 @@ static int sdma_v4_4_2_set_clockgating_state(void *handle, return 0; } -static int sdma_v4_4_2_set_powergating_state(void *handle, +static int sdma_v4_4_2_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static void sdma_v4_4_2_get_clockgating_state(void *handle, u64 *flags) +static void sdma_v4_4_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) @@ -1895,7 +2135,6 @@ static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block) if (!adev->sdma.ip_dump) return; - amdgpu_gfx_off_ctrl(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { instance_offset = i * reg_count; for (j = 0; j < reg_count; j++) @@ -1903,7 +2142,6 @@ static void sdma_v4_4_2_dump_ip_state(struct amdgpu_ip_block *ip_block) RREG32(sdma_v4_4_2_get_reg_offset(adev, i, sdma_reg_list_4_4_2[j].reg_offset)); } - amdgpu_gfx_off_ctrl(adev, true); } const struct amd_ip_funcs sdma_v4_4_2_ip_funcs = { @@ -1939,8 +2177,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 4 + 2 * 3 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, @@ -1955,6 +2192,8 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = { .emit_wreg = sdma_v4_4_2_ring_emit_wreg, .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = sdma_v4_4_2_reset_queue, + .is_guilty = sdma_v4_4_2_ring_is_guilty, }; static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { @@ -1970,8 +2209,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { 3 + /* hdp invalidate */ 6 + /* sdma_v4_4_2_ring_emit_pipeline_sync */ /* sdma_v4_4_2_ring_emit_vm_flush */ - SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + - SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 4 + 2 * 3 + 10 + 10 + 10, /* sdma_v4_4_2_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 7 + 6, /* sdma_v4_4_2_ring_emit_ib */ .emit_ib = sdma_v4_4_2_ring_emit_ib, @@ -1986,6 +2224,8 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = { .emit_wreg = sdma_v4_4_2_ring_emit_wreg, .emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, + .reset = sdma_v4_4_2_reset_queue, + .is_guilty = sdma_v4_4_2_page_ring_is_guilty, }; static void sdma_v4_4_2_set_ring_funcs(struct amdgpu_device *adev) @@ -2038,6 +2278,10 @@ static const struct amdgpu_irq_src_funcs sdma_v4_4_2_srbm_write_irq_funcs = { .process = sdma_v4_4_2_process_srbm_write_irq, }; +static const struct amdgpu_irq_src_funcs sdma_v4_4_2_ctxt_empty_irq_funcs = { + .process = sdma_v4_4_2_process_ctxt_empty_irq, +}; + static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) { adev->sdma.trap_irq.num_types = adev->sdma.num_instances; @@ -2046,6 +2290,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances; adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances; adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances; + adev->sdma.ctxt_empty_irq.num_types = adev->sdma.num_instances; adev->sdma.trap_irq.funcs = &sdma_v4_4_2_trap_irq_funcs; adev->sdma.illegal_inst_irq.funcs = &sdma_v4_4_2_illegal_inst_irq_funcs; @@ -2054,6 +2299,7 @@ static void sdma_v4_4_2_set_irq_funcs(struct amdgpu_device *adev) adev->sdma.doorbell_invalid_irq.funcs = &sdma_v4_4_2_doorbell_invalid_irq_funcs; adev->sdma.pool_timeout_irq.funcs = &sdma_v4_4_2_pool_timeout_irq_funcs; adev->sdma.srbm_write_irq.funcs = &sdma_v4_4_2_srbm_write_irq_funcs; + adev->sdma.ctxt_empty_irq.funcs = &sdma_v4_4_2_ctxt_empty_irq_funcs; } /** @@ -2151,6 +2397,38 @@ static void sdma_v4_4_2_set_vm_pte_funcs(struct amdgpu_device *adev) adev->vm_manager.vm_pte_num_scheds = adev->sdma.num_instances; } +/** + * sdma_v4_4_2_update_reset_mask - update reset mask for SDMA + * @adev: Pointer to the AMDGPU device structure + * + * This function update reset mask for SDMA and sets the supported + * reset types based on the IP version and firmware versions. + * + */ +static void sdma_v4_4_2_update_reset_mask(struct amdgpu_device *adev) +{ + /* per queue reset not supported for SRIOV */ + if (amdgpu_sriov_vf(adev)) + return; + + /* + * the user queue relies on MEC fw and pmfw when the sdma queue do reset. + * it needs to check both of them at here to skip old mec and pmfw. + */ + switch (amdgpu_ip_version(adev, GC_HWIP, 0)) { + case IP_VERSION(9, 4, 3): + case IP_VERSION(9, 4, 4): + if ((adev->gfx.mec_fw_version >= 0xb0) && amdgpu_dpm_reset_sdma_is_supported(adev)) + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + break; + case IP_VERSION(9, 5, 0): + /*TODO: enable the queue reset flag until fw supported */ + default: + break; + } + +} + const struct amdgpu_ip_block_version sdma_v4_4_2_ip_block = { .type = AMD_IP_BLOCK_TYPE_SDMA, .major = 4, @@ -2167,7 +2445,7 @@ static int sdma_v4_4_2_xcp_resume(void *handle, uint32_t inst_mask) if (!amdgpu_sriov_vf(adev)) sdma_v4_4_2_inst_init_golden_registers(adev, inst_mask); - r = sdma_v4_4_2_inst_start(adev, inst_mask); + r = sdma_v4_4_2_inst_start(adev, inst_mask, false); return r; } @@ -2312,11 +2590,13 @@ static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_ban misc0 = bank->regs[ACA_REG_IDX_MISC0]; switch (type) { case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, 1ULL); break; case ACA_SMU_TYPE_CE: - ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, + bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, ACA_REG__MISC0__ERRCNT(misc0)); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index fa9b40934957..0dce59f4f6e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -1194,7 +1194,7 @@ static int sdma_v5_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1530,9 +1530,9 @@ static int sdma_v5_0_resume(struct amdgpu_ip_block *ip_block) return sdma_v5_0_hw_init(ip_block); } -static bool sdma_v5_0_is_idle(void *handle) +static bool sdma_v5_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1853,10 +1853,10 @@ static void sdma_v5_0_update_medium_grain_light_sleep(struct amdgpu_device *adev } } -static int sdma_v5_0_set_clockgating_state(void *handle, +static int sdma_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1877,15 +1877,15 @@ static int sdma_v5_0_set_clockgating_state(void *handle, return 0; } -static int sdma_v5_0_set_powergating_state(void *handle, +static int sdma_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static void sdma_v5_0_get_clockgating_state(void *handle, u64 *flags) +static void sdma_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index ba5160399ab2..2b39a03ff0c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1050,7 +1050,7 @@ static int sdma_v5_2_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1435,9 +1435,9 @@ static int sdma_v5_2_resume(struct amdgpu_ip_block *ip_block) return sdma_v5_2_hw_init(ip_block); } -static bool sdma_v5_2_is_idle(void *handle) +static bool sdma_v5_2_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1812,10 +1812,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev } } -static int sdma_v5_2_set_clockgating_state(void *handle, +static int sdma_v5_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1841,15 +1841,15 @@ static int sdma_v5_2_set_clockgating_state(void *handle, return 0; } -static int sdma_v5_2_set_powergating_state(void *handle, +static int sdma_v5_2_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static void sdma_v5_2_get_clockgating_state(void *handle, u64 *flags) +static void sdma_v5_2_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index d46128b0ec92..c214c3d2149b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -51,6 +51,7 @@ MODULE_FIRMWARE("amdgpu/sdma_6_0_3.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_0.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_1.bin"); MODULE_FIRMWARE("amdgpu/sdma_6_1_2.bin"); +MODULE_FIRMWARE("amdgpu/sdma_6_1_3.bin"); #define SDMA1_REG_OFFSET 0x600 #define SDMA0_HYP_DEC_REG_START 0x5880 @@ -1063,7 +1064,7 @@ static int sdma_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1428,9 +1429,9 @@ static int sdma_v6_0_resume(struct amdgpu_ip_block *ip_block) return sdma_v6_0_hw_init(ip_block); } -static bool sdma_v6_0_is_idle(void *handle) +static bool sdma_v6_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1601,19 +1602,19 @@ static int sdma_v6_0_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int sdma_v6_0_set_clockgating_state(void *handle, +static int sdma_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int sdma_v6_0_set_powergating_state(void *handle, +static int sdma_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static void sdma_v6_0_get_clockgating_state(void *handle, u64 *flags) +static void sdma_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index d2ce6b6a7ff6..b2706221df99 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -490,162 +490,185 @@ static void sdma_v7_0_enable(struct amdgpu_device *adev, bool enable) } /** - * sdma_v7_0_gfx_resume - setup and start the async dma engines + * sdma_v7_0_gfx_resume_instance - start/restart a certain sdma engine * * @adev: amdgpu_device pointer + * @i: instance + * @restore: used to restore wptr when restart * - * Set up the gfx DMA ring buffers and enable them. - * Returns 0 for success, error for failure. + * Set up the gfx DMA ring buffers and enable them. On restart, we will restore wptr and rptr. + * Return 0 for success. */ -static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev) +static int sdma_v7_0_gfx_resume_instance(struct amdgpu_device *adev, int i, bool restore) { struct amdgpu_ring *ring; u32 rb_cntl, ib_cntl; u32 rb_bufsz; u32 doorbell; u32 doorbell_offset; - u32 tmp; + u32 temp; u64 wptr_gpu_addr; - int i, r; - - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; + int r; - //if (!amdgpu_sriov_vf(adev)) - // WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + ring = &adev->sdma.instance[i].ring; - /* Set ring buffer size in dwords */ - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); + /* Set ring buffer size in dwords */ + rb_bufsz = order_base_2(ring->ring_size / 4); + rb_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL)); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SIZE, rb_bufsz); #ifdef __BIG_ENDIAN - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, - RPTR_WRITEBACK_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); #endif - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); - - /* Initialize the ring buffer's read and write pointers */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_PRIV, 1); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + + /* Initialize the ring buffer's read and write pointers */ + if (restore) { + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr << 2)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + } else { WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR), 0); WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_HI), 0); WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), 0); WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), 0); + } + /* setup the wptr shadow polling */ + wptr_gpu_addr = ring->wptr_gpu_addr; + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO), + lower_32_bits(wptr_gpu_addr)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI), + upper_32_bits(wptr_gpu_addr)); + + /* set the wb address whether it's enabled or not */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI), + upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO), + lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + if (amdgpu_sriov_vf(adev)) + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); + else + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); - /* setup the wptr shadow polling */ - wptr_gpu_addr = ring->wptr_gpu_addr; - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_LO), - lower_32_bits(wptr_gpu_addr)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_POLL_ADDR_HI), - upper_32_bits(wptr_gpu_addr)); - - /* set the wb address whether it's enabled or not */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_HI), - upper_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFF); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_RPTR_ADDR_LO), - lower_32_bits(ring->rptr_gpu_addr) & 0xFFFFFFFC); - - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); - if (amdgpu_sriov_vf(adev)) - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 1); - else - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, WPTR_POLL_ENABLE, 0); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, MCU_WPTR_POLL_ENABLE, 1); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE), ring->gpu_addr >> 8); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_BASE_HI), ring->gpu_addr >> 40); + if (!restore) ring->wptr = 0; - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1); + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 1); - if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - } + if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR), lower_32_bits(ring->wptr) << 2); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); + } - doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL)); - doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET)); + doorbell = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL)); + doorbell_offset = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET)); - if (ring->use_doorbell) { - doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET, - OFFSET, ring->doorbell_index); - } else { - doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0); - } - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - - if (i == 0) - adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, - ring->doorbell_index, - adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); - - if (amdgpu_sriov_vf(adev)) - sdma_v7_0_ring_set_wptr(ring); - - /* set minor_ptr_update to 0 after wptr programed */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); - - /* Set up sdma hang watchdog */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); - /* 100ms per unit */ - tmp = REG_SET_FIELD(tmp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, - max(adev->usec_timeout/100000, 1)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), tmp); - - /* Set up RESP_MODE to non-copy addresses */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); - tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); - tmp = REG_SET_FIELD(tmp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), tmp); - - /* program default cache read and write policy */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE)); - /* clean read policy and write policy bits */ - tmp &= 0xFF0FFF; - tmp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | - (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), tmp); - - if (!amdgpu_sriov_vf(adev)) { - /* unhalt engine */ - tmp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); - tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, HALT, 0); - tmp = REG_SET_FIELD(tmp, SDMA0_MCU_CNTL, RESET, 0); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), tmp); - } + if (ring->use_doorbell) { + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 1); + doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_QUEUE0_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + } else { + doorbell = REG_SET_FIELD(doorbell, SDMA0_QUEUE0_DOORBELL, ENABLE, 0); + } + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL), doorbell); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_DOORBELL_OFFSET), doorbell_offset); - /* enable DMA RB */ - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1); - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + if (i == 0) + adev->nbio.funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index, + adev->doorbell_index.sdma_doorbell_range * adev->sdma.num_instances); - ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1); + if (amdgpu_sriov_vf(adev)) + sdma_v7_0_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_MINOR_PTR_UPDATE), 0); + + /* Set up sdma hang watchdog */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL)); + /* 100ms per unit */ + temp = REG_SET_FIELD(temp, SDMA0_WATCHDOG_CNTL, QUEUE_HANG_COUNT, + max(adev->usec_timeout/100000, 1)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_WATCHDOG_CNTL), temp); + + /* Set up RESP_MODE to non-copy addresses */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, RESP_MODE, 3); + temp = REG_SET_FIELD(temp, SDMA0_UTCL1_CNTL, REDO_DELAY, 9); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_CNTL), temp); + + /* program default cache read and write policy */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE)); + /* clean read policy and write policy bits */ + temp &= 0xFF0FFF; + temp |= ((CACHE_READ_POLICY_L2__DEFAULT << 12) | + (CACHE_WRITE_POLICY_L2__DEFAULT << 14)); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_UTCL1_PAGE), temp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL)); + temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, HALT, 0); + temp = REG_SET_FIELD(temp, SDMA0_MCU_CNTL, RESET, 0); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_MCU_CNTL), temp); + } + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_QUEUE0_RB_CNTL, RB_ENABLE, 1); + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_RB_CNTL), rb_cntl); + + ib_cntl = RREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL)); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_ENABLE, 1); #ifdef __BIG_ENDIAN - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_QUEUE0_IB_CNTL, IB_SWAP_ENABLE, 1); #endif - /* enable DMA IBs */ - WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); + /* enable DMA IBs */ + WREG32_SOC15_IP(GC, sdma_v7_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_IB_CNTL), ib_cntl); + ring->sched.ready = true; - ring->sched.ready = true; + if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ + sdma_v7_0_ctx_switch_enable(adev, true); + sdma_v7_0_enable(adev, true); + } - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ - sdma_v7_0_ctx_switch_enable(adev, true); - sdma_v7_0_enable(adev, true); - } + r = amdgpu_ring_test_helper(ring); + if (r) + ring->sched.ready = false; - r = amdgpu_ring_test_helper(ring); - if (r) { - ring->sched.ready = false; - return r; - } + return r; +} + +/** + * sdma_v7_0_gfx_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * + * Set up the gfx DMA ring buffers and enable them. + * Returns 0 for success, error for failure. + */ +static int sdma_v7_0_gfx_resume(struct amdgpu_device *adev) +{ + int i, r; + for (i = 0; i < adev->sdma.num_instances; i++) { + r = sdma_v7_0_gfx_resume_instance(adev, i, false); + if (r) + return r; } return 0; + } /** @@ -806,6 +829,31 @@ static bool sdma_v7_0_check_soft_reset(struct amdgpu_ip_block *ip_block) return false; } +static int sdma_v7_0_reset_queue(struct amdgpu_ring *ring, unsigned int vmid) +{ + struct amdgpu_device *adev = ring->adev; + int i, r; + + if (amdgpu_sriov_vf(adev)) + return -EINVAL; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (ring == &adev->sdma.instance[i].ring) + break; + } + + if (i == adev->sdma.num_instances) { + DRM_ERROR("sdma instance not found\n"); + return -EINVAL; + } + + r = amdgpu_mes_reset_legacy_queue(adev, ring, vmid, true); + if (r) + return r; + + return sdma_v7_0_gfx_resume_instance(adev, i, true); +} + /** * sdma_v7_0_start - setup and start the async dma engines * @@ -1060,7 +1108,7 @@ static int sdma_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: if (!ring->is_mes_queue) @@ -1316,6 +1364,13 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } + adev->sdma.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->sdma.instance[0].ring); + adev->sdma.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE; + + r = amdgpu_sdma_sysfs_reset_mask_init(adev); + if (r) + return r; /* Allocate memory for SDMA IP Dump buffer */ ptr = kcalloc(adev->sdma.num_instances * reg_count, sizeof(uint32_t), GFP_KERNEL); if (ptr) @@ -1334,6 +1389,7 @@ static int sdma_v7_0_sw_fini(struct amdgpu_ip_block *ip_block) for (i = 0; i < adev->sdma.num_instances; i++) amdgpu_ring_fini(&adev->sdma.instance[i].ring); + amdgpu_sdma_sysfs_reset_mask_fini(adev); amdgpu_sdma_destroy_inst_ctx(adev, true); if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) @@ -1374,9 +1430,9 @@ static int sdma_v7_0_resume(struct amdgpu_ip_block *ip_block) return sdma_v7_0_hw_init(ip_block); } -static bool sdma_v7_0_is_idle(void *handle) +static bool sdma_v7_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { @@ -1524,19 +1580,19 @@ static int sdma_v7_0_process_illegal_inst_irq(struct amdgpu_device *adev, return 0; } -static int sdma_v7_0_set_clockgating_state(void *handle, +static int sdma_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int sdma_v7_0_set_powergating_state(void *handle, +static int sdma_v7_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static void sdma_v7_0_get_clockgating_state(void *handle, u64 *flags) +static void sdma_v7_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { } @@ -1636,6 +1692,7 @@ static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = { .emit_reg_write_reg_wait = sdma_v7_0_ring_emit_reg_write_reg_wait, .init_cond_exec = sdma_v7_0_ring_init_cond_exec, .preempt_ib = sdma_v7_0_ring_preempt_ib, + .reset = sdma_v7_0_reset_queue, }; static void sdma_v7_0_set_ring_funcs(struct amdgpu_device *adev) @@ -1684,11 +1741,12 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, uint32_t byte_count, uint32_t copy_flags) { - uint32_t num_type, data_format, max_com; + uint32_t num_type, data_format, max_com, write_cm; max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED); data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT); num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE); + write_cm = AMDGPU_COPY_FLAGS_GET(copy_flags, WRITE_COMPRESS_DISABLE) ? 2 : 1; ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | @@ -1705,7 +1763,7 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED))) ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) | ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | - ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | + ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(write_cm) : 0) | SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); else ib->ptr[ib->length_dw++] = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 00f63d3fbea7..2247f6a94858 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -909,7 +909,7 @@ static const u32 hainan_mgcg_cgcg_init[] = /* XXX: update when we support VCE */ #if 0 -/* tahiti, pitcarin, verde */ +/* tahiti, pitcairn, verde */ static const struct amdgpu_video_codec_info tahiti_video_codecs_encode_array[] = { { @@ -940,7 +940,7 @@ static const struct amdgpu_video_codecs hainan_video_codecs_encode = .codec_array = NULL, }; -/* tahiti, pitcarin, verde, oland */ +/* tahiti, pitcairn, verde, oland */ static const struct amdgpu_video_codec_info tahiti_video_codecs_decode_array[] = { { @@ -1124,41 +1124,41 @@ static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = { {mmCP_STALLED_STAT3}, {GB_ADDR_CONFIG}, {MC_ARB_RAMCFG}, - {GB_TILE_MODE0}, - {GB_TILE_MODE1}, - {GB_TILE_MODE2}, - {GB_TILE_MODE3}, - {GB_TILE_MODE4}, - {GB_TILE_MODE5}, - {GB_TILE_MODE6}, - {GB_TILE_MODE7}, - {GB_TILE_MODE8}, - {GB_TILE_MODE9}, - {GB_TILE_MODE10}, - {GB_TILE_MODE11}, - {GB_TILE_MODE12}, - {GB_TILE_MODE13}, - {GB_TILE_MODE14}, - {GB_TILE_MODE15}, - {GB_TILE_MODE16}, - {GB_TILE_MODE17}, - {GB_TILE_MODE18}, - {GB_TILE_MODE19}, - {GB_TILE_MODE20}, - {GB_TILE_MODE21}, - {GB_TILE_MODE22}, - {GB_TILE_MODE23}, - {GB_TILE_MODE24}, - {GB_TILE_MODE25}, - {GB_TILE_MODE26}, - {GB_TILE_MODE27}, - {GB_TILE_MODE28}, - {GB_TILE_MODE29}, - {GB_TILE_MODE30}, - {GB_TILE_MODE31}, + {mmGB_TILE_MODE0}, + {mmGB_TILE_MODE1}, + {mmGB_TILE_MODE2}, + {mmGB_TILE_MODE3}, + {mmGB_TILE_MODE4}, + {mmGB_TILE_MODE5}, + {mmGB_TILE_MODE6}, + {mmGB_TILE_MODE7}, + {mmGB_TILE_MODE8}, + {mmGB_TILE_MODE9}, + {mmGB_TILE_MODE10}, + {mmGB_TILE_MODE11}, + {mmGB_TILE_MODE12}, + {mmGB_TILE_MODE13}, + {mmGB_TILE_MODE14}, + {mmGB_TILE_MODE15}, + {mmGB_TILE_MODE16}, + {mmGB_TILE_MODE17}, + {mmGB_TILE_MODE18}, + {mmGB_TILE_MODE19}, + {mmGB_TILE_MODE20}, + {mmGB_TILE_MODE21}, + {mmGB_TILE_MODE22}, + {mmGB_TILE_MODE23}, + {mmGB_TILE_MODE24}, + {mmGB_TILE_MODE25}, + {mmGB_TILE_MODE26}, + {mmGB_TILE_MODE27}, + {mmGB_TILE_MODE28}, + {mmGB_TILE_MODE29}, + {mmGB_TILE_MODE30}, + {mmGB_TILE_MODE31}, {CC_RB_BACKEND_DISABLE, true}, - {GC_USER_RB_BACKEND_DISABLE, true}, - {PA_SC_RASTER_CONFIG, true}, + {mmGC_USER_RB_BACKEND_DISABLE, true}, + {mmPA_SC_RASTER_CONFIG, true}, }; static uint32_t si_get_register_value(struct amdgpu_device *adev, @@ -1888,7 +1888,7 @@ static int si_vce_send_vcepll_ctlreq(struct amdgpu_device *adev) WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK); if (i == SI_MAX_CTLACKS_ASSERTION_WAIT) { - DRM_ERROR("Timeout setting UVD clocks!\n"); + DRM_ERROR("Timeout setting VCE clocks!\n"); return -ETIMEDOUT; } @@ -2644,18 +2644,18 @@ static int si_common_resume(struct amdgpu_ip_block *ip_block) return si_common_hw_init(ip_block); } -static bool si_common_is_idle(void *handle) +static bool si_common_is_idle(struct amdgpu_ip_block *ip_block) { return true; } -static int si_common_set_clockgating_state(void *handle, +static int si_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int si_common_set_powergating_state(void *handle, +static int si_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 47647a6083e8..e2089c8da71b 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -286,7 +286,7 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = -EINVAL; err1: - amdgpu_ib_free(adev, &ib, NULL); + amdgpu_ib_free(&ib, NULL); dma_fence_put(f); err0: amdgpu_device_wb_free(adev, index); @@ -541,9 +541,9 @@ static int si_dma_resume(struct amdgpu_ip_block *ip_block) return si_dma_hw_init(ip_block); } -static bool si_dma_is_idle(void *handle) +static bool si_dma_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(SRBM_STATUS2); @@ -559,7 +559,7 @@ static int si_dma_wait_for_idle(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (si_dma_is_idle(adev)) + if (si_dma_is_idle(ip_block)) return 0; udelay(1); } @@ -629,13 +629,13 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev, return 0; } -static int si_dma_set_clockgating_state(void *handle, +static int si_dma_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { u32 orig, data, offset; int i; bool enable; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; enable = (state == AMD_CG_STATE_GATE); @@ -672,12 +672,12 @@ static int si_dma_set_clockgating_state(void *handle, return 0; } -static int si_dma_set_powergating_state(void *handle, +static int si_dma_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { u32 tmp; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; WREG32(DMA_PGFSM_WRITE, 0x00002000); WREG32(DMA_PGFSM_CONFIG, 0x100010ff); diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index 4e935baa7b91..d656ef1fa6e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -121,15 +121,7 @@ #define CURSOR_UPDATE_LOCK (1 << 16) #define CURSOR_DISABLE_MULTIPLE_UPDATE (1 << 24) -#define SI_CRTC0_REGISTER_OFFSET 0 -#define SI_CRTC1_REGISTER_OFFSET 0x300 -#define SI_CRTC2_REGISTER_OFFSET 0x2600 -#define SI_CRTC3_REGISTER_OFFSET 0x2900 -#define SI_CRTC4_REGISTER_OFFSET 0x2c00 -#define SI_CRTC5_REGISTER_OFFSET 0x2f00 -#define DMA0_REGISTER_OFFSET 0x000 -#define DMA1_REGISTER_OFFSET 0x200 #define ES_AND_GS_AUTO 3 #define RADEON_PACKET_TYPE3 3 #define CE_PARTITION_BASE 3 @@ -161,10 +153,6 @@ #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET 0x3D -#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003 -#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x02010002 -#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02011003 - #define PACKET3(op, n) ((RADEON_PACKET_TYPE3 << 30) | \ (((op) & 0xFF) << 8) | \ ((n) & 0x3FFF) << 16) diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 2ec1ebe4db11..5c38e1fb1dca 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -210,9 +210,9 @@ static int si_ih_resume(struct amdgpu_ip_block *ip_block) return si_ih_hw_init(ip_block); } -static bool si_ih_is_idle(void *handle) +static bool si_ih_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(SRBM_STATUS); if (tmp & SRBM_STATUS__IH_BUSY_MASK) @@ -227,7 +227,7 @@ static int si_ih_wait_for_idle(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (si_ih_is_idle(adev)) + if (si_ih_is_idle(ip_block)) return 0; udelay(1); } @@ -263,13 +263,13 @@ static int si_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int si_ih_set_clockgating_state(void *handle, +static int si_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int si_ih_set_powergating_state(void *handle, +static int si_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index 9a39cbfe6db9..cbf232f5235b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -26,10 +26,6 @@ #define TAHITI_RB_BITMAP_WIDTH_PER_SH 2 -#define TAHITI_GB_ADDR_CONFIG_GOLDEN 0x12011003 -#define VERDE_GB_ADDR_CONFIG_GOLDEN 0x12010002 -#define HAINAN_GB_ADDR_CONFIG_GOLDEN 0x02010001 - #define SI_MAX_SH_GPRS 256 #define SI_MAX_TEMP_GPRS 16 #define SI_MAX_SH_THREADS 256 @@ -696,18 +692,6 @@ #define HDP_REG_COHERENCY_FLUSH_CNTL 0x1528 /* DCE6 ELD audio interface */ -#define AZ_F0_CODEC_ENDPOINT_INDEX 0x1780 -# define AZ_ENDPOINT_REG_INDEX(x) (((x) & 0xff) << 0) -# define AZ_ENDPOINT_REG_WRITE_EN (1 << 8) -#define AZ_F0_CODEC_ENDPOINT_DATA 0x1781 - -#define AZ_F0_CODEC_PIN_CONTROL_CHANNEL_SPEAKER 0x25 -#define SPEAKER_ALLOCATION(x) (((x) & 0x7f) << 0) -#define SPEAKER_ALLOCATION_MASK (0x7f << 0) -#define SPEAKER_ALLOCATION_SHIFT 0 -#define HDMI_CONNECTION (1 << 16) -#define DP_CONNECTION (1 << 17) - #define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR0 0x28 /* LPCM */ #define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR1 0x29 /* AC3 */ #define AZ_F0_CODEC_PIN_CONTROL_AUDIO_DESCRIPTOR2 0x2A /* MPEG1 */ @@ -909,26 +893,11 @@ #define CRTC_STATUS_FRAME_COUNT 0x1BA6 /* Audio clocks */ -#define DCCG_AUDIO_DTO_SOURCE 0x05ac -# define DCCG_AUDIO_DTO0_SOURCE_SEL(x) ((x) << 0) /* crtc0 - crtc5 */ -# define DCCG_AUDIO_DTO_SEL (1 << 4) /* 0=dto0 1=dto1 */ - #define DCCG_AUDIO_DTO0_PHASE 0x05b0 #define DCCG_AUDIO_DTO0_MODULE 0x05b4 #define DCCG_AUDIO_DTO1_PHASE 0x05c0 #define DCCG_AUDIO_DTO1_MODULE 0x05c4 -#define AFMT_AUDIO_SRC_CONTROL 0x1c4f -#define AFMT_AUDIO_SRC_SELECT(x) (((x) & 7) << 0) -/* AFMT_AUDIO_SRC_SELECT - * 0 = stream0 - * 1 = stream1 - * 2 = stream2 - * 3 = stream3 - * 4 = stream4 - * 5 = stream5 - */ - #define GRBM_CNTL 0x2000 #define GRBM_READ_TIMEOUT(x) ((x) << 0) @@ -977,30 +946,6 @@ #define SE_DB_BUSY (1 << 30) #define SE_CB_BUSY (1 << 31) -#define GRBM_SOFT_RESET 0x2008 -#define SOFT_RESET_CP (1 << 0) -#define SOFT_RESET_CB (1 << 1) -#define SOFT_RESET_RLC (1 << 2) -#define SOFT_RESET_DB (1 << 3) -#define SOFT_RESET_GDS (1 << 4) -#define SOFT_RESET_PA (1 << 5) -#define SOFT_RESET_SC (1 << 6) -#define SOFT_RESET_BCI (1 << 7) -#define SOFT_RESET_SPI (1 << 8) -#define SOFT_RESET_SX (1 << 10) -#define SOFT_RESET_TC (1 << 11) -#define SOFT_RESET_TA (1 << 12) -#define SOFT_RESET_VGT (1 << 14) -#define SOFT_RESET_IA (1 << 15) - -#define GRBM_GFX_INDEX 0x200B -#define INSTANCE_INDEX(x) ((x) << 0) -#define SH_INDEX(x) ((x) << 8) -#define SE_INDEX(x) ((x) << 16) -#define SH_BROADCAST_WRITES (1 << 29) -#define INSTANCE_BROADCAST_WRITES (1 << 30) -#define SE_BROADCAST_WRITES (1 << 31) - #define GRBM_INT_CNTL 0x2018 # define RDERR_INT_ENABLE (1 << 0) # define GUI_IDLE_INT_ENABLE (1 << 19) @@ -1045,16 +990,6 @@ #define VGT_VTX_VECT_EJECT_REG 0x222C -#define VGT_CACHE_INVALIDATION 0x2231 -#define CACHE_INVALIDATION(x) ((x) << 0) -#define VC_ONLY 0 -#define TC_ONLY 1 -#define VC_AND_TC 2 -#define AUTO_INVLD_EN(x) ((x) << 6) -#define NO_AUTO 0 -#define ES_AUTO 1 -#define GS_AUTO 2 -#define ES_AND_GS_AUTO 3 #define VGT_ESGS_RING_SIZE 0x2232 #define VGT_GSVS_RING_SIZE 0x2233 @@ -1072,11 +1007,6 @@ #define VGT_TF_MEMORY_BASE 0x226E -#define CC_GC_SHADER_ARRAY_CONFIG 0x226F -#define INACTIVE_CUS_MASK 0xFFFF0000 -#define INACTIVE_CUS_SHIFT 16 -#define GC_USER_SHADER_ARRAY_CONFIG 0x2270 - #define PA_CL_ENHANCE 0x2285 #define CLIP_VTX_REORDER_ENA (1 << 0) #define NUM_CLIP_SEQ(x) ((x) << 1) @@ -1169,89 +1099,6 @@ #define ROW_SIZE_MASK 0x30000000 #define ROW_SIZE_SHIFT 28 -#define GB_TILE_MODE0 0x2644 -# define MICRO_TILE_MODE(x) ((x) << 0) -# define ADDR_SURF_DISPLAY_MICRO_TILING 0 -# define ADDR_SURF_THIN_MICRO_TILING 1 -# define ADDR_SURF_DEPTH_MICRO_TILING 2 -# define ARRAY_MODE(x) ((x) << 2) -# define ARRAY_LINEAR_GENERAL 0 -# define ARRAY_LINEAR_ALIGNED 1 -# define ARRAY_1D_TILED_THIN1 2 -# define ARRAY_2D_TILED_THIN1 4 -# define PIPE_CONFIG(x) ((x) << 6) -# define ADDR_SURF_P2 0 -# define ADDR_SURF_P4_8x16 4 -# define ADDR_SURF_P4_16x16 5 -# define ADDR_SURF_P4_16x32 6 -# define ADDR_SURF_P4_32x32 7 -# define ADDR_SURF_P8_16x16_8x16 8 -# define ADDR_SURF_P8_16x32_8x16 9 -# define ADDR_SURF_P8_32x32_8x16 10 -# define ADDR_SURF_P8_16x32_16x16 11 -# define ADDR_SURF_P8_32x32_16x16 12 -# define ADDR_SURF_P8_32x32_16x32 13 -# define ADDR_SURF_P8_32x64_32x32 14 -# define TILE_SPLIT(x) ((x) << 11) -# define ADDR_SURF_TILE_SPLIT_64B 0 -# define ADDR_SURF_TILE_SPLIT_128B 1 -# define ADDR_SURF_TILE_SPLIT_256B 2 -# define ADDR_SURF_TILE_SPLIT_512B 3 -# define ADDR_SURF_TILE_SPLIT_1KB 4 -# define ADDR_SURF_TILE_SPLIT_2KB 5 -# define ADDR_SURF_TILE_SPLIT_4KB 6 -# define BANK_WIDTH(x) ((x) << 14) -# define ADDR_SURF_BANK_WIDTH_1 0 -# define ADDR_SURF_BANK_WIDTH_2 1 -# define ADDR_SURF_BANK_WIDTH_4 2 -# define ADDR_SURF_BANK_WIDTH_8 3 -# define BANK_HEIGHT(x) ((x) << 16) -# define ADDR_SURF_BANK_HEIGHT_1 0 -# define ADDR_SURF_BANK_HEIGHT_2 1 -# define ADDR_SURF_BANK_HEIGHT_4 2 -# define ADDR_SURF_BANK_HEIGHT_8 3 -# define MACRO_TILE_ASPECT(x) ((x) << 18) -# define ADDR_SURF_MACRO_ASPECT_1 0 -# define ADDR_SURF_MACRO_ASPECT_2 1 -# define ADDR_SURF_MACRO_ASPECT_4 2 -# define ADDR_SURF_MACRO_ASPECT_8 3 -# define NUM_BANKS(x) ((x) << 20) -# define ADDR_SURF_2_BANK 0 -# define ADDR_SURF_4_BANK 1 -# define ADDR_SURF_8_BANK 2 -# define ADDR_SURF_16_BANK 3 -#define GB_TILE_MODE1 0x2645 -#define GB_TILE_MODE2 0x2646 -#define GB_TILE_MODE3 0x2647 -#define GB_TILE_MODE4 0x2648 -#define GB_TILE_MODE5 0x2649 -#define GB_TILE_MODE6 0x264a -#define GB_TILE_MODE7 0x264b -#define GB_TILE_MODE8 0x264c -#define GB_TILE_MODE9 0x264d -#define GB_TILE_MODE10 0x264e -#define GB_TILE_MODE11 0x264f -#define GB_TILE_MODE12 0x2650 -#define GB_TILE_MODE13 0x2651 -#define GB_TILE_MODE14 0x2652 -#define GB_TILE_MODE15 0x2653 -#define GB_TILE_MODE16 0x2654 -#define GB_TILE_MODE17 0x2655 -#define GB_TILE_MODE18 0x2656 -#define GB_TILE_MODE19 0x2657 -#define GB_TILE_MODE20 0x2658 -#define GB_TILE_MODE21 0x2659 -#define GB_TILE_MODE22 0x265a -#define GB_TILE_MODE23 0x265b -#define GB_TILE_MODE24 0x265c -#define GB_TILE_MODE25 0x265d -#define GB_TILE_MODE26 0x265e -#define GB_TILE_MODE27 0x265f -#define GB_TILE_MODE28 0x2660 -#define GB_TILE_MODE29 0x2661 -#define GB_TILE_MODE30 0x2662 -#define GB_TILE_MODE31 0x2663 - #define CB_PERFCOUNTER0_SELECT0 0x2688 #define CB_PERFCOUNTER0_SELECT1 0x2689 #define CB_PERFCOUNTER1_SELECT0 0x268A @@ -1263,10 +1110,6 @@ #define CB_CGTT_SCLK_CTRL 0x2698 -#define GC_USER_RB_BACKEND_DISABLE 0x26DF -#define BACKEND_DISABLE_MASK 0x00FF0000 -#define BACKEND_DISABLE_SHIFT 16 - #define TCP_CHAN_STEER_LO 0x2B03 #define TCP_CHAN_STEER_HI 0x2B94 @@ -1320,101 +1163,12 @@ # define CP_RINGID1_INT_STAT (1 << 30) # define CP_RINGID0_INT_STAT (1 << 31) -#define CP_MEM_SLP_CNTL 0x3079 -# define CP_MEM_LS_EN (1 << 0) - -#define CP_DEBUG 0x307F - -#define RLC_CNTL 0x30C0 -# define RLC_ENABLE (1 << 0) -#define RLC_RL_BASE 0x30C1 -#define RLC_RL_SIZE 0x30C2 -#define RLC_LB_CNTL 0x30C3 -# define LOAD_BALANCE_ENABLE (1 << 0) -#define RLC_SAVE_AND_RESTORE_BASE 0x30C4 -#define RLC_LB_CNTR_MAX 0x30C5 -#define RLC_LB_CNTR_INIT 0x30C6 - -#define RLC_CLEAR_STATE_RESTORE_BASE 0x30C8 - -#define RLC_UCODE_ADDR 0x30CB -#define RLC_UCODE_DATA 0x30CC - -#define RLC_GPU_CLOCK_COUNT_LSB 0x30CE -#define RLC_GPU_CLOCK_COUNT_MSB 0x30CF -#define RLC_CAPTURE_GPU_CLOCK_COUNT 0x30D0 -#define RLC_MC_CNTL 0x30D1 -#define RLC_UCODE_CNTL 0x30D2 -#define RLC_STAT 0x30D3 -# define RLC_BUSY_STATUS (1 << 0) -# define GFX_POWER_STATUS (1 << 1) -# define GFX_CLOCK_STATUS (1 << 2) -# define GFX_LS_STATUS (1 << 3) - -#define RLC_PG_CNTL 0x30D7 -# define GFX_PG_ENABLE (1 << 0) -# define GFX_PG_SRC (1 << 1) - -#define RLC_CGTT_MGCG_OVERRIDE 0x3100 -#define RLC_CGCG_CGLS_CTRL 0x3101 -# define CGCG_EN (1 << 0) -# define CGLS_EN (1 << 1) - -#define RLC_TTOP_D 0x3105 -# define RLC_PUD(x) ((x) << 0) -# define RLC_PUD_MASK (0xff << 0) -# define RLC_PDD(x) ((x) << 8) -# define RLC_PDD_MASK (0xff << 8) -# define RLC_TTPD(x) ((x) << 16) -# define RLC_TTPD_MASK (0xff << 16) -# define RLC_MSD(x) ((x) << 24) -# define RLC_MSD_MASK (0xff << 24) - -#define RLC_LB_INIT_CU_MASK 0x3107 - -#define RLC_PG_AO_CU_MASK 0x310B -#define RLC_MAX_PG_CU 0x310C -# define MAX_PU_CU(x) ((x) << 0) -# define MAX_PU_CU_MASK (0xff << 0) -#define RLC_AUTO_PG_CTRL 0x310C -# define AUTO_PG_EN (1 << 0) -# define GRBM_REG_SGIT(x) ((x) << 3) -# define GRBM_REG_SGIT_MASK (0xffff << 3) -# define PG_AFTER_GRBM_REG_ST(x) ((x) << 19) -# define PG_AFTER_GRBM_REG_ST_MASK (0x1fff << 19) - -#define RLC_SERDES_WR_MASTER_MASK_0 0x3115 -#define RLC_SERDES_WR_MASTER_MASK_1 0x3116 -#define RLC_SERDES_WR_CTRL 0x3117 - -#define RLC_SERDES_MASTER_BUSY_0 0x3119 -#define RLC_SERDES_MASTER_BUSY_1 0x311A - -#define RLC_GCPM_GENERAL_3 0x311E - -#define DB_RENDER_CONTROL 0xA000 - -#define DB_DEPTH_INFO 0xA00F - -#define PA_SC_RASTER_CONFIG 0xA0D4 -# define RB_MAP_PKR0(x) ((x) << 0) -# define RB_MAP_PKR0_MASK (0x3 << 0) -# define RB_MAP_PKR1(x) ((x) << 2) -# define RB_MAP_PKR1_MASK (0x3 << 2) -# define RASTER_CONFIG_RB_MAP_0 0 -# define RASTER_CONFIG_RB_MAP_1 1 -# define RASTER_CONFIG_RB_MAP_2 2 -# define RASTER_CONFIG_RB_MAP_3 3 +// #define PA_SC_RASTER_CONFIG 0xA0D4 # define RB_XSEL2(x) ((x) << 4) # define RB_XSEL2_MASK (0x3 << 4) # define RB_XSEL (1 << 6) # define RB_YSEL (1 << 7) # define PKR_MAP(x) ((x) << 8) -# define PKR_MAP_MASK (0x3 << 8) -# define RASTER_CONFIG_PKR_MAP_0 0 -# define RASTER_CONFIG_PKR_MAP_1 1 -# define RASTER_CONFIG_PKR_MAP_2 2 -# define RASTER_CONFIG_PKR_MAP_3 3 # define PKR_XSEL(x) ((x) << 10) # define PKR_XSEL_MASK (0x3 << 10) # define PKR_YSEL(x) ((x) << 12) @@ -1426,56 +1180,11 @@ # define SC_YSEL(x) ((x) << 20) # define SC_YSEL_MASK (0x3 << 20) # define SE_MAP(x) ((x) << 24) -# define SE_MAP_MASK (0x3 << 24) -# define RASTER_CONFIG_SE_MAP_0 0 -# define RASTER_CONFIG_SE_MAP_1 1 -# define RASTER_CONFIG_SE_MAP_2 2 -# define RASTER_CONFIG_SE_MAP_3 3 # define SE_XSEL(x) ((x) << 26) # define SE_XSEL_MASK (0x3 << 26) # define SE_YSEL(x) ((x) << 28) # define SE_YSEL_MASK (0x3 << 28) - -#define VGT_EVENT_INITIATOR 0xA2A4 -# define SAMPLE_STREAMOUTSTATS1 (1 << 0) -# define SAMPLE_STREAMOUTSTATS2 (2 << 0) -# define SAMPLE_STREAMOUTSTATS3 (3 << 0) -# define CACHE_FLUSH_TS (4 << 0) -# define CACHE_FLUSH (6 << 0) -# define CS_PARTIAL_FLUSH (7 << 0) -# define VGT_STREAMOUT_RESET (10 << 0) -# define END_OF_PIPE_INCR_DE (11 << 0) -# define END_OF_PIPE_IB_END (12 << 0) -# define RST_PIX_CNT (13 << 0) -# define VS_PARTIAL_FLUSH (15 << 0) -# define PS_PARTIAL_FLUSH (16 << 0) -# define CACHE_FLUSH_AND_INV_TS_EVENT (20 << 0) -# define ZPASS_DONE (21 << 0) -# define CACHE_FLUSH_AND_INV_EVENT (22 << 0) -# define PERFCOUNTER_START (23 << 0) -# define PERFCOUNTER_STOP (24 << 0) -# define PIPELINESTAT_START (25 << 0) -# define PIPELINESTAT_STOP (26 << 0) -# define PERFCOUNTER_SAMPLE (27 << 0) -# define SAMPLE_PIPELINESTAT (30 << 0) -# define SAMPLE_STREAMOUTSTATS (32 << 0) -# define RESET_VTX_CNT (33 << 0) -# define VGT_FLUSH (36 << 0) -# define BOTTOM_OF_PIPE_TS (40 << 0) -# define DB_CACHE_FLUSH_AND_INV (42 << 0) -# define FLUSH_AND_INV_DB_DATA_TS (43 << 0) -# define FLUSH_AND_INV_DB_META (44 << 0) -# define FLUSH_AND_INV_CB_DATA_TS (45 << 0) -# define FLUSH_AND_INV_CB_META (46 << 0) -# define CS_DONE (47 << 0) -# define PS_DONE (48 << 0) -# define FLUSH_AND_INV_CB_PIXEL_DATA (49 << 0) -# define THREAD_TRACE_START (51 << 0) -# define THREAD_TRACE_STOP (52 << 0) -# define THREAD_TRACE_FLUSH (54 << 0) -# define THREAD_TRACE_FINISH (55 << 0) - /* PIF PHY0 registers idx/data 0x8/0xc */ #define PB0_PIF_CNTL 0x10 # define LS2_EXIT_TIME(x) ((x) << 17) @@ -1991,12 +1700,29 @@ //#dce stupp /* display controller offsets used for crtc/cur/lut/grph/viewport/etc. */ -#define SI_CRTC0_REGISTER_OFFSET 0 //(0x6df0 - 0x6df0)/4 -#define SI_CRTC1_REGISTER_OFFSET 0x300 //(0x79f0 - 0x6df0)/4 -#define SI_CRTC2_REGISTER_OFFSET 0x2600 //(0x105f0 - 0x6df0)/4 -#define SI_CRTC3_REGISTER_OFFSET 0x2900 //(0x111f0 - 0x6df0)/4 -#define SI_CRTC4_REGISTER_OFFSET 0x2c00 //(0x11df0 - 0x6df0)/4 -#define SI_CRTC5_REGISTER_OFFSET 0x2f00 //(0x129f0 - 0x6df0)/4 +#define CRTC0_REGISTER_OFFSET (0x1b7c - 0x1b7c) //(0x6df0 - 0x6df0)/4 +#define CRTC1_REGISTER_OFFSET (0x1e7c - 0x1b7c) //(0x79f0 - 0x6df0)/4 +#define CRTC2_REGISTER_OFFSET (0x417c - 0x1b7c) //(0x105f0 - 0x6df0)/4 +#define CRTC3_REGISTER_OFFSET (0x447c - 0x1b7c) //(0x111f0 - 0x6df0)/4 +#define CRTC4_REGISTER_OFFSET (0x477c - 0x1b7c) //(0x11df0 - 0x6df0)/4 +#define CRTC5_REGISTER_OFFSET (0x4a7c - 0x1b7c) //(0x129f0 - 0x6df0)/4 + +/* hpd instance offsets */ +#define HPD0_REGISTER_OFFSET (0x1807 - 0x1807) +#define HPD1_REGISTER_OFFSET (0x180a - 0x1807) +#define HPD2_REGISTER_OFFSET (0x180d - 0x1807) +#define HPD3_REGISTER_OFFSET (0x1810 - 0x1807) +#define HPD4_REGISTER_OFFSET (0x1813 - 0x1807) +#define HPD5_REGISTER_OFFSET (0x1816 - 0x1807) + +/* audio endpt instance offsets */ +#define AUD0_REGISTER_OFFSET (0x1780 - 0x1780) +#define AUD1_REGISTER_OFFSET (0x1786 - 0x1780) +#define AUD2_REGISTER_OFFSET (0x178c - 0x1780) +#define AUD3_REGISTER_OFFSET (0x1792 - 0x1780) +#define AUD4_REGISTER_OFFSET (0x1798 - 0x1780) +#define AUD5_REGISTER_OFFSET (0x179d - 0x1780) +#define AUD6_REGISTER_OFFSET (0x17a4 - 0x1780) #define CURSOR_WIDTH 64 #define CURSOR_HEIGHT 64 @@ -2036,9 +1762,6 @@ #define EVERGREEN_DATA_FORMAT 0x1ac0 # define EVERGREEN_INTERLEAVE_EN (1 << 0) -#define MC_SHARED_CHMAP__NOOFCHAN_MASK 0xf000 -#define MC_SHARED_CHMAP__NOOFCHAN__SHIFT 0xc - #define R600_D1GRPH_ARRAY_MODE_LINEAR_GENERAL (0 << 20) #define R600_D1GRPH_ARRAY_MODE_LINEAR_ALIGNED (1 << 20) #define R600_D1GRPH_ARRAY_MODE_1D_TILED_THIN1 (2 << 20) @@ -2050,32 +1773,6 @@ #define R700_D2GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1847 #define R700_D1GRPH_SECONDARY_SURFACE_ADDRESS_HIGH 0x1a47 -#define DISP_INTERRUPT_STATUS__LB_D1_VBLANK_INTERRUPT_MASK 0x8 -#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VBLANK_INTERRUPT_MASK 0x8 -#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VBLANK_INTERRUPT_MASK 0x8 -#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VBLANK_INTERRUPT_MASK 0x8 -#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VBLANK_INTERRUPT_MASK 0x8 -#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VBLANK_INTERRUPT_MASK 0x8 - -#define DISP_INTERRUPT_STATUS__LB_D1_VLINE_INTERRUPT_MASK 0x4 -#define DISP_INTERRUPT_STATUS_CONTINUE__LB_D2_VLINE_INTERRUPT_MASK 0x4 -#define DISP_INTERRUPT_STATUS_CONTINUE2__LB_D3_VLINE_INTERRUPT_MASK 0x4 -#define DISP_INTERRUPT_STATUS_CONTINUE3__LB_D4_VLINE_INTERRUPT_MASK 0x4 -#define DISP_INTERRUPT_STATUS_CONTINUE4__LB_D5_VLINE_INTERRUPT_MASK 0x4 -#define DISP_INTERRUPT_STATUS_CONTINUE5__LB_D6_VLINE_INTERRUPT_MASK 0x4 - -#define DISP_INTERRUPT_STATUS__DC_HPD1_INTERRUPT_MASK 0x20000 -#define DISP_INTERRUPT_STATUS_CONTINUE__DC_HPD2_INTERRUPT_MASK 0x20000 -#define DISP_INTERRUPT_STATUS_CONTINUE2__DC_HPD3_INTERRUPT_MASK 0x20000 -#define DISP_INTERRUPT_STATUS_CONTINUE3__DC_HPD4_INTERRUPT_MASK 0x20000 -#define DISP_INTERRUPT_STATUS_CONTINUE4__DC_HPD5_INTERRUPT_MASK 0x20000 -#define DISP_INTERRUPT_STATUS_CONTINUE5__DC_HPD6_INTERRUPT_MASK 0x20000 - -#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_OCCURRED_MASK 0x1 -#define GRPH_INTERRUPT_STATUS__GRPH_PFLIP_INT_CLEAR_MASK 0x100 - -#define DC_HPD1_INT_CONTROL__DC_HPD1_INT_ACK_MASK 0x1 - #define R600_D1GRPH_SWAP_CONTROL 0x1843 #define R600_D1GRPH_SWAP_ENDIAN_NONE (0 << 0) #define R600_D1GRPH_SWAP_ENDIAN_16BIT (1 << 0) @@ -2099,8 +1796,6 @@ # define R600_SCK_PRESCALE_CRYSTAL_CLK_SHIFT 28 # define R600_SCK_PRESCALE_CRYSTAL_CLK_MASK (0xf << 28) -#define GRPH_INTERRUPT_CONTROL__GRPH_PFLIP_INT_MASK_MASK 0x1 - #define FMT_BIT_DEPTH_CONTROL 0x1bf2 #define FMT_TRUNCATE_EN (1 << 0) #define FMT_TRUNCATE_DEPTH (1 << 4) @@ -2404,19 +2099,6 @@ #define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC_MASK 0x800 #define mmSRBM_SOFT_RESET__xxSOFT_RESET_MC__SHIFT 0xb -#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8 -#define VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x3 -#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40 -#define VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x6 -#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x200 -#define VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x9 -#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x1000 -#define VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xc -#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x8000 -#define VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0xf -#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK 0x40000 -#define VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT__SHIFT 0x12 - #define MC_SEQ_MISC0__MT__MASK 0xf0000000 #define MC_SEQ_MISC0__MT__GDDR1 0x10000000 #define MC_SEQ_MISC0__MT__DDR2 0x20000000 @@ -2426,10 +2108,7 @@ #define MC_SEQ_MISC0__MT__HBM 0x60000000 #define MC_SEQ_MISC0__MT__DDR3 0xB0000000 -#define GRBM_STATUS__GUI_ACTIVE_MASK 0x80000000 #define CP_INT_CNTL_RING__TIME_STAMP_INT_ENABLE_MASK 0x4000000 -#define CP_INT_CNTL_RING0__PRIV_REG_INT_ENABLE_MASK 0x800000 -#define CP_INT_CNTL_RING0__PRIV_INSTR_INT_ENABLE_MASK 0x400000 #define PACKET3_SEM_WAIT_ON_SIGNAL (0x1 << 12) #define PACKET3_SEM_SEL_SIGNAL (0x6 << 29) #define PACKET3_SEM_SEL_WAIT (0x7 << 29) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index ede072758dab..659eab9b90be 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -28,7 +28,6 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "amdgpu_ih.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -103,12 +102,11 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode = /* Vega */ static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, }; static const struct amdgpu_video_codecs vega_video_codecs_decode = @@ -120,12 +118,12 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode = /* Raven */ static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 8192, 8192, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, }; @@ -138,10 +136,10 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode = /* Renoir, Arcturus */ static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 1920, 1088, 3)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 1920, 1088, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 1920, 1088, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -171,6 +169,24 @@ static const struct amdgpu_video_codecs vcn_4_0_3_video_codecs_encode = { .codec_array = NULL, }; +static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_encode_vcn0 = { + .codec_count = 0, + .codec_array = NULL, +}; + +static const struct amdgpu_video_codec_info vcn_5_0_1_video_codecs_decode_array_vcn0[] = { + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, +}; + +static const struct amdgpu_video_codecs vcn_5_0_1_video_codecs_decode_vcn0 = { + .codec_count = ARRAY_SIZE(vcn_5_0_1_video_codecs_decode_array_vcn0), + .codec_array = vcn_5_0_1_video_codecs_decode_array_vcn0, +}; + static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode, const struct amdgpu_video_codecs **codecs) { @@ -209,6 +225,12 @@ static int soc15_query_video_codecs(struct amdgpu_device *adev, bool encode, else *codecs = &vcn_4_0_3_video_codecs_decode; return 0; + case IP_VERSION(5, 0, 1): + if (encode) + *codecs = &vcn_5_0_1_video_codecs_encode_vcn0; + else + *codecs = &vcn_5_0_1_video_codecs_decode_vcn0; + return 0; default: return -EINVAL; } @@ -327,6 +349,7 @@ static u32 soc15_get_xclk(struct amdgpu_device *adev) if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 0) || amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(12, 0, 1) || amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 6) || + amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 12) || amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 14)) return 10000; if (amdgpu_ip_version(adev, MP1_HWIP, 0) == IP_VERSION(10, 0, 0) || @@ -556,6 +579,7 @@ soc15_asic_reset_method(struct amdgpu_device *adev) break; case IP_VERSION(13, 0, 6): case IP_VERSION(13, 0, 14): + case IP_VERSION(13, 0, 12): /* Use gpu_recovery param to target a reset method. * Enable triggering of GPU reset only if specified * by module parameter. @@ -579,12 +603,10 @@ soc15_asic_reset_method(struct amdgpu_device *adev) static bool soc15_need_reset_on_resume(struct amdgpu_device *adev) { /* Will reset for the following suspend abort cases. - * 1) Only reset on APU side, dGPU hasn't checked yet. - * 2) S3 suspend aborted in the normal S3 suspend or - * performing pm core test. + * 1) S3 suspend aborted in the normal S3 suspend + * 2) S3 suspend aborted in performing pm core test. */ - if (adev->flags & AMD_IS_APU && adev->in_s3 && - !pm_resume_via_firmware()) + if (adev->in_s3 && !pm_resume_via_firmware()) return true; else return false; @@ -1177,6 +1199,7 @@ static int soc15_common_early_init(struct amdgpu_ip_block *ip_block) break; case IP_VERSION(9, 4, 3): case IP_VERSION(9, 4, 4): + case IP_VERSION(9, 5, 0): adev->asic_funcs = &aqua_vanjaram_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | @@ -1336,7 +1359,7 @@ static int soc15_common_resume(struct amdgpu_ip_block *ip_block) return soc15_common_hw_init(ip_block); } -static bool soc15_common_is_idle(void *handle) +static bool soc15_common_is_idle(struct amdgpu_ip_block *ip_block) { return true; } @@ -1385,10 +1408,10 @@ static void soc15_update_drm_light_sleep(struct amdgpu_device *adev, bool enable WREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_LIGHT_SLEEP_CTRL), data); } -static int soc15_common_set_clockgating_state(void *handle, +static int soc15_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1437,9 +1460,9 @@ static int soc15_common_set_clockgating_state(void *handle, return 0; } -static void soc15_common_get_clockgating_state(void *handle, u64 *flags) +static void soc15_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) @@ -1453,6 +1476,7 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) if ((amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 2)) && (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6)) && + (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 12)) && (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 14))) { /* AMD_CG_SUPPORT_DRM_MGCG */ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); @@ -1473,7 +1497,7 @@ static void soc15_common_get_clockgating_state(void *handle, u64 *flags) adev->df.funcs->get_clockgating_state(adev, flags); } -static int soc15_common_set_powergating_state(void *handle, +static int soc15_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* todo */ diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index b9cbeb389edc..a5000c171c02 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -93,11 +93,25 @@ #define PACKET3_DISPATCH_INDIRECT 0x16 #define PACKET3_ATOMIC_GDS 0x1D #define PACKET3_ATOMIC_MEM 0x1E +#define PACKET3_ATOMIC_MEM__ATOMIC(x) ((((unsigned)(x)) & 0x3F) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_ATOMIC_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_ATOMIC_MEM__ADDR_LO(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__ADDR_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__SRC_DATA_LO(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__SRC_DATA_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__CMP_DATA_LO(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__CMP_DATA_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_ATOMIC_MEM__LOOP_INTERVAL(x) ((((unsigned)(x)) & 0x1FFF) << 0) +#define PACKET3_ATOMIC_MEM__COMMAND__SINGLE_PASS_ATOMIC 0 +#define PACKET3_ATOMIC_MEM__COMMAND__LOOP_UNTIL_COMPARE_SATISFIED 1 #define PACKET3_OCCLUSION_QUERY 0x1F #define PACKET3_SET_PREDICATION 0x20 #define PACKET3_REG_RMW 0x21 #define PACKET3_COND_EXEC 0x22 #define PACKET3_PRED_EXEC 0x23 +#define PACKET3_PRED_EXEC__EXEC_COUNT(x) ((((unsigned)(x)) & 0x3FFF) << 0) +#define PACKET3_PRED_EXEC__VIRTUAL_XCC_ID_SELECT(x) ((((unsigned)(x)) & 0xFF) << 24) #define PACKET3_DRAW_INDIRECT 0x24 #define PACKET3_DRAW_INDEX_INDIRECT 0x25 #define PACKET3_INDEX_BASE 0x26 @@ -132,6 +146,28 @@ * 1 - pfp * 2 - ce */ +#define PACKET3_WRITE_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_WRITE_DATA__ADDR_INCR(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_WRITE_DATA__RESUME_VF_MI300(x) ((((unsigned)(x)) & 0x1) << 19) +#define PACKET3_WRITE_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_WRITE_DATA__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WRITE_DATA__DST_MMREG_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WRITE_DATA__DST_GDS_ADDR(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WRITE_DATA__DST_MEM_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WRITE_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_WRITE_DATA__DST_SEL__TC_L2 2 +#define PACKET3_WRITE_DATA__DST_SEL__GDS 3 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY 5 +#define PACKET3_WRITE_DATA__DST_SEL__MEMORY_MAPPED_ADC_PERSISTENT_STATE 6 +#define PACKET3_WRITE_DATA__ADDR_INCR__INCREMENT_ADDRESS 0 +#define PACKET3_WRITE_DATA__ADDR_INCR__DO_NOT_INCREMENT_ADDRESS 1 +#define PACKET3_WRITE_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_WRITE_CONFIRMATION 0 +#define PACKET3_WRITE_DATA__WR_CONFIRM__WAIT_FOR_WRITE_CONFIRMATION 1 +#define PACKET3_WRITE_DATA__CACHE_POLICY__LRU 0 +#define PACKET3_WRITE_DATA__CACHE_POLICY__STREAM 1 +#define PACKET3_WRITE_DATA__CACHE_POLICY__NOA 2 +#define PACKET3_WRITE_DATA__CACHE_POLICY__BYPASS 3 #define PACKET3_DRAW_INDEX_INDIRECT_MULTI 0x38 #define PACKET3_MEM_SEMAPHORE 0x39 # define PACKET3_SEM_USE_MAILBOX (0x1 << 16) @@ -160,6 +196,33 @@ /* 0 - me * 1 - pfp */ +#define PACKET3_WAIT_REG_MEM__FUNCTION(x) ((((unsigned)(x)) & 0x7) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_SPACE(x) ((((unsigned)(x)) & 0x3) << 4) +#define PACKET3_WAIT_REG_MEM__OPERATION(x) ((((unsigned)(x)) & 0x3) << 6) +#define PACKET3_WAIT_REG_MEM__MES_INTR_PIPE(x) ((((unsigned)(x)) & 0x3) << 22) +#define PACKET3_WAIT_REG_MEM__MES_ACTION(x) ((((unsigned)(x)) & 0x1) << 24) +#define PACKET3_WAIT_REG_MEM__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_WAIT_REG_MEM__REG_POLL_ADDR(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR1(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__MEM_POLL_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__REG_WRITE_ADDR2(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_WAIT_REG_MEM__REFERENCE(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__MASK(x) ((unsigned)(x)) +#define PACKET3_WAIT_REG_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_WAIT_REG_MEM__OPTIMIZE_ACE_OFFLOAD_MODE(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_WAIT_REG_MEM__FUNCTION__ALWAYS_PASS 0 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_REF_VALUE 1 +#define PACKET3_WAIT_REG_MEM__FUNCTION__LESS_THAN_EQUAL_TO_THE_REF_VALUE 2 +#define PACKET3_WAIT_REG_MEM__FUNCTION__EQUAL_TO_THE_REFERENCE_VALUE 3 +#define PACKET3_WAIT_REG_MEM__FUNCTION__NOT_EQUAL_REFERENCE_VALUE 4 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_OR_EQUAL_REFERENCE_VALUE 5 +#define PACKET3_WAIT_REG_MEM__FUNCTION__GREATER_THAN_REFERENCE_VALUE 6 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__REGISTER_SPACE 0 +#define PACKET3_WAIT_REG_MEM__MEM_SPACE__MEMORY_SPACE 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_REG_MEM 0 +#define PACKET3_WAIT_REG_MEM__OPERATION__WR_WAIT_WR_REG 1 +#define PACKET3_WAIT_REG_MEM__OPERATION__WAIT_MEM_PREEMPTABLE 3 #define PACKET3_INDIRECT_BUFFER 0x3F #define INDIRECT_BUFFER_VALID (1 << 23) #define INDIRECT_BUFFER_CACHE_POLICY(x) ((x) << 28) @@ -169,7 +232,63 @@ */ #define INDIRECT_BUFFER_PRE_ENB(x) ((x) << 21) #define INDIRECT_BUFFER_PRE_RESUME(x) ((x) << 30) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_INDIRECT_BUFFER__IB_BASE_HI(x) ((unsigned)(x)) +#define PACKET3_INDIRECT_BUFFER__IB_SIZE(x) ((((unsigned)(x)) & 0xFFFFF) << 0) +#define PACKET3_INDIRECT_BUFFER__CHAIN(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_INDIRECT_BUFFER__OFFLOAD_POLLING(x) ((((unsigned)(x)) & 0x1) << 21) +#define PACKET3_INDIRECT_BUFFER__VALID(x) ((((unsigned)(x)) & 0x1) << 23) +#define PACKET3_INDIRECT_BUFFER__VMID(x) ((((unsigned)(x)) & 0xF) << 24) +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 28) +#define PACKET3_INDIRECT_BUFFER__PRIV(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__LRU 0 +#define PACKET3_INDIRECT_BUFFER__CACHE_POLICY__STREAM 1 #define PACKET3_COPY_DATA 0x40 +#define PACKET3_COPY_DATA__SRC_SEL(x) ((((unsigned)(x)) & 0xF) << 0) +#define PACKET3_COPY_DATA__DST_SEL(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 13) +#define PACKET3_COPY_DATA__COUNT_SEL(x) ((((unsigned)(x)) & 0x1) << 16) +#define PACKET3_COPY_DATA__WR_CONFIRM(x) ((((unsigned)(x)) & 0x1) << 20) +#define PACKET3_COPY_DATA__DST_CACHE_POLICY(x) ((((unsigned)(x)) & 0x3) << 25) +#define PACKET3_COPY_DATA__PQ_EXE_STATUS(x) ((((unsigned)(x)) & 0x1) << 29) +#define PACKET3_COPY_DATA__SRC_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__SRC_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__SRC_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__SRC_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_MEMTC_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_IMM_DATA(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__DST_REG_OFFSET(x) ((((unsigned)(x)) & 0x3FFFF) << 0) +#define PACKET3_COPY_DATA__DST_32B_ADDR_LO(x) ((((unsigned)(x)) & 0x3FFFFFFF) << 2) +#define PACKET3_COPY_DATA__DST_64B_ADDR_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_COPY_DATA__DST_GDS_ADDR_LO(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_COPY_DATA__DST_ADDR_HI(x) ((unsigned)(x)) +#define PACKET3_COPY_DATA__SRC_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__SRC_SEL__MEMORY 1 +#define PACKET3_COPY_DATA__SRC_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__SRC_SEL__GDS 3 +#define PACKET3_COPY_DATA__SRC_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__SRC_SEL__IMMEDIATE_DATA 5 +#define PACKET3_COPY_DATA__SRC_SEL__ATOMIC_RETURN_DATA 6 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA0 7 +#define PACKET3_COPY_DATA__SRC_SEL__GDS_ATOMIC_RETURN_DATA1 8 +#define PACKET3_COPY_DATA__SRC_SEL__GPU_CLOCK_COUNT 9 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REGISTER 0 +#define PACKET3_COPY_DATA__DST_SEL__TC_L2 2 +#define PACKET3_COPY_DATA__DST_SEL__GDS 3 +#define PACKET3_COPY_DATA__DST_SEL__PERFCOUNTERS 4 +#define PACKET3_COPY_DATA__DST_SEL__MEMORY 5 +#define PACKET3_COPY_DATA__DST_SEL__MEM_MAPPED_REG_DC 6 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__SRC_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__COUNT_SEL__32_BITS_OF_DATA 0 +#define PACKET3_COPY_DATA__COUNT_SEL__64_BITS_OF_DATA 1 +#define PACKET3_COPY_DATA__WR_CONFIRM__DO_NOT_WAIT_FOR_CONFIRMATION 0 +#define PACKET3_COPY_DATA__WR_CONFIRM__WAIT_FOR_CONFIRMATION 1 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__LRU 0 +#define PACKET3_COPY_DATA__DST_CACHE_POLICY__STREAM 1 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__DEFAULT 0 +#define PACKET3_COPY_DATA__PQ_EXE_STATUS__PHASE_UPDATE 1 #define PACKET3_PFP_SYNC_ME 0x42 #define PACKET3_COND_WRITE 0x45 #define PACKET3_EVENT_WRITE 0x46 @@ -181,6 +300,15 @@ * 3 - SAMPLE_STREAMOUTSTAT* * 4 - *S_PARTIAL_FLUSH */ +#define PACKET3_EVENT_WRITE__EVENT_TYPE(x) ((((unsigned)(x)) & 0x3F) << 0) +#define PACKET3_EVENT_WRITE__EVENT_INDEX(x) ((((unsigned)(x)) & 0xF) << 8) +#define PACKET3_EVENT_WRITE__OFFLOAD_ENABLE(x) ((((unsigned)(x)) & 0x1) << 31) +#define PACKET3_EVENT_WRITE__SAMP_PLST_CNTR_MODE(x) ((((unsigned)(x)) & 0x3) << 29) +#define PACKET3_EVENT_WRITE__ADDRESS_LO(x) ((((unsigned)(x)) & 0x1FFFFFFF) << 3) +#define PACKET3_EVENT_WRITE__ADDRESS_HI(x) (((unsigned)(x)) << 0) +#define PACKET3_EVENT_WRITE__EVENT_INDEX__OTHER 0 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__SAMPLE_PIPELINESTATS 2 +#define PACKET3_EVENT_WRITE__EVENT_INDEX__CS_PARTIAL_FLUSH 4 #define PACKET3_RELEASE_MEM 0x49 #define EVENT_TYPE(x) ((x) << 0) #define EVENT_INDEX(x) ((x) << 8) @@ -286,6 +414,13 @@ #define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_ICACHE_ACTION_ENA(x) ((x) << 29) #define PACKET3_ACQUIRE_MEM_CP_COHER_CNTL_SH_KCACHE_WB_ACTION_ENA(x) ((x) << 30) #define PACKET3_REWIND 0x59 +#define PACKET3_ACQUIRE_MEM__COHER_SIZE(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI(x) ((((unsigned)(x)) & 0xFF) << 0) +#define PACKET3_ACQUIRE_MEM__COHER_SIZE_HI_VG10(x) ((((unsigned)(x)) & 0xFFFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_LO(x) ((unsigned)(x)) +#define PACKET3_ACQUIRE_MEM__COHER_BASE_HI(x) ((((unsigned)(x)) & 0xFFFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__POLL_INTERVAL(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_ACQUIRE_MEM__GCR_CNTL(x) ((((unsigned)(x)) & 0x7FF) << 0) #define PACKET3_LOAD_UCONFIG_REG 0x5E #define PACKET3_LOAD_SH_REG 0x5F #define PACKET3_LOAD_CONFIG_REG 0x60 @@ -300,12 +435,16 @@ #define PACKET3_SET_SH_REG 0x76 #define PACKET3_SET_SH_REG_START 0x00002c00 #define PACKET3_SET_SH_REG_END 0x00003000 +#define PACKET3_SET_SH_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) +#define PACKET3_SET_SH_REG__VMID_SHIFT(x) ((((unsigned)(x)) & 0x1F) << 23) +#define PACKET3_SET_SH_REG__INDEX(x) ((((unsigned)(x)) & 0xF) << 28) #define PACKET3_SET_SH_REG_OFFSET 0x77 #define PACKET3_SET_QUEUE_REG 0x78 #define PACKET3_SET_UCONFIG_REG 0x79 #define PACKET3_SET_UCONFIG_REG_START 0x0000c000 #define PACKET3_SET_UCONFIG_REG_END 0x0000c400 #define PACKET3_SET_UCONFIG_REG_INDEX_TYPE (2 << 28) +#define PACKET3_SET_UCONFIG_REG__REG_OFFSET(x) ((((unsigned)(x)) & 0xFFFF) << 0) #define PACKET3_SCRATCH_RAM_WRITE 0x7D #define PACKET3_SCRATCH_RAM_READ 0x7E #define PACKET3_LOAD_CONST_RAM 0x80 diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index d6999835918f..ad36c96478a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -117,23 +117,17 @@ static struct amdgpu_video_codecs sriov_vcn_4_0_0_video_codecs_encode_vcn1 = { }; static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn0[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static struct amdgpu_video_codec_info sriov_vcn_4_0_0_video_codecs_decode_array_vcn1[] = { - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)}, - {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 16384, 16384, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, }; @@ -390,6 +384,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev) case IP_VERSION(14, 0, 0): case IP_VERSION(14, 0, 1): case IP_VERSION(14, 0, 4): + case IP_VERSION(14, 0, 5): return AMD_RESET_METHOD_MODE2; default: if (amdgpu_dpm_is_baco_supported(adev)) @@ -781,6 +776,34 @@ static int soc21_common_early_init(struct amdgpu_ip_block *ip_block) AMD_PG_SUPPORT_GFX_PG; adev->external_rev_id = adev->rev_id + 0x40; break; + case IP_VERSION(11, 5, 3): + adev->cg_flags = AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_JPEG_MGCG | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_FGCG | + AMD_CG_SUPPORT_REPEATER_FGCG | + AMD_CG_SUPPORT_GFX_PERF_CLK | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD | + AMD_CG_SUPPORT_ATHUB_MGCG | + AMD_CG_SUPPORT_ATHUB_LS | + AMD_CG_SUPPORT_IH_CG | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS; + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG_DPG | + AMD_PG_SUPPORT_JPEG | + AMD_PG_SUPPORT_GFX_PG; + adev->external_rev_id = adev->rev_id + 0x50; + break; default: /* FIXME: not supported yet */ return -EINVAL; @@ -923,15 +946,15 @@ static int soc21_common_resume(struct amdgpu_ip_block *ip_block) return soc21_common_hw_init(ip_block); } -static bool soc21_common_is_idle(void *handle) +static bool soc21_common_is_idle(struct amdgpu_ip_block *ip_block) { return true; } -static int soc21_common_set_clockgating_state(void *handle, +static int soc21_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(4, 3, 0): @@ -940,6 +963,7 @@ static int soc21_common_set_clockgating_state(void *handle, case IP_VERSION(7, 7, 1): case IP_VERSION(7, 11, 0): case IP_VERSION(7, 11, 1): + case IP_VERSION(7, 11, 2): case IP_VERSION(7, 11, 3): adev->nbio.funcs->update_medium_grain_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -954,10 +978,10 @@ static int soc21_common_set_clockgating_state(void *handle, return 0; } -static int soc21_common_set_powergating_state(void *handle, +static int soc21_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) { case IP_VERSION(6, 0, 0): @@ -972,9 +996,9 @@ static int soc21_common_set_powergating_state(void *handle, return 0; } -static void soc21_common_get_clockgating_state(void *handle, u64 *flags) +static void soc21_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->nbio.funcs->get_clockgating_state(adev, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/soc24.c b/drivers/gpu/drm/amd/amdgpu/soc24.c index be96de92b2f5..972b449ab89f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc24.c +++ b/drivers/gpu/drm/amd/amdgpu/soc24.c @@ -26,7 +26,6 @@ #include <linux/pci.h> #include "amdgpu.h" -#include "amdgpu_atombios.h" #include "amdgpu_ih.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -444,8 +443,18 @@ static int soc24_common_late_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_get_irq(adev); + } else { + if (adev->nbio.ras && + adev->nbio.ras_err_event_athub_irq.funcs) + /* don't need to fail gpu late init + * if enabling athub_err_event interrupt failed + * nbif v6_3_1 only support fatal error hanlding + * just enable the interrupt directly + */ + amdgpu_irq_get(adev, &adev->nbio.ras_err_event_athub_irq, 0); + } /* Enable selfring doorbell aperture late because doorbell BAR * aperture will change if resize BAR successfully in gmc sw_init. @@ -501,8 +510,13 @@ static int soc24_common_hw_fini(struct amdgpu_ip_block *ip_block) adev->nbio.funcs->enable_doorbell_aperture(adev, false); adev->nbio.funcs->enable_doorbell_selfring_aperture(adev, false); - if (amdgpu_sriov_vf(adev)) + if (amdgpu_sriov_vf(adev)) { xgpu_nv_mailbox_put_irq(adev); + } else { + if (adev->nbio.ras && + adev->nbio.ras_err_event_athub_irq.funcs) + amdgpu_irq_put(adev, &adev->nbio.ras_err_event_athub_irq, 0); + } return 0; } @@ -517,15 +531,15 @@ static int soc24_common_resume(struct amdgpu_ip_block *ip_block) return soc24_common_hw_init(ip_block); } -static bool soc24_common_is_idle(void *handle) +static bool soc24_common_is_idle(struct amdgpu_ip_block *ip_block) { return true; } -static int soc24_common_set_clockgating_state(void *handle, +static int soc24_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, NBIO_HWIP, 0)) { case IP_VERSION(6, 3, 1): @@ -542,10 +556,10 @@ static int soc24_common_set_clockgating_state(void *handle, return 0; } -static int soc24_common_set_powergating_state(void *handle, +static int soc24_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; switch (amdgpu_ip_version(adev, LSDMA_HWIP, 0)) { case IP_VERSION(7, 0, 0): @@ -560,9 +574,9 @@ static int soc24_common_set_powergating_state(void *handle, return 0; } -static void soc24_common_get_clockgating_state(void *handle, u64 *flags) +static void soc24_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; adev->nbio.funcs->get_clockgating_state(adev, flags); diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 21b71a427b1f..a3b5fda22432 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -30,6 +30,9 @@ #define RSP_ID_MASK (1U << 31) #define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) +/* invalid node instance value */ +#define TA_RAS_INV_NODE 0xffff + /* RAS related enumerations */ /**********************************************************/ enum ras_command { @@ -148,6 +151,7 @@ struct ta_ras_init_flags { uint16_t xcc_mask; uint8_t channel_dis_num; uint8_t nps_mode; + uint32_t active_umc_mask; }; struct ta_ras_mca_addr { diff --git a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h index 00d8bdb8254f..9ec2e03d41c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_secureDisplay_if.h @@ -31,10 +31,12 @@ * Secure Display Command ID */ enum ta_securedisplay_command { - /* Query whether TA is responding used only for validation purpose */ + /* Query whether TA is responding. It is used only for validation purpose */ TA_SECUREDISPLAY_COMMAND__QUERY_TA = 1, /* Send region of Interest and CRC value to I2C */ TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC = 2, + /* V2 to send multiple regions of Interest and CRC value to I2C */ + TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC_V2 = 3, /* Maximum Command ID */ TA_SECUREDISPLAY_COMMAND__MAX_ID = 0x7FFFFFFF, }; @@ -83,6 +85,8 @@ enum ta_securedisplay_ta_query_cmd_ret { enum ta_securedisplay_buffer_size { /* 15 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) */ TA_SECUREDISPLAY_I2C_BUFFER_SIZE = 15, + /* 16 bytes = 8 byte (ROI) + 6 byte(CRC) + 1 byte(phy_id) + 1 byte(roi_idx) */ + TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE = 16, }; /** Input/output structures for Secure Display commands */ @@ -95,7 +99,15 @@ enum ta_securedisplay_buffer_size { * Physical ID to determine which DIO scratch register should be used to get ROI */ struct ta_securedisplay_send_roi_crc_input { - uint32_t phy_id; /* Physical ID */ + /* Physical ID */ + uint32_t phy_id; +}; + +struct ta_securedisplay_send_roi_crc_v2_input { + /* Physical ID */ + uint32_t phy_id; + /* Region of interest index */ + uint8_t roi_idx; }; /** @union ta_securedisplay_cmd_input @@ -104,6 +116,8 @@ struct ta_securedisplay_send_roi_crc_input { union ta_securedisplay_cmd_input { /* send ROI and CRC input buffer format */ struct ta_securedisplay_send_roi_crc_input send_roi_crc; + /* send ROI and CRC input buffer format, v2 adds a ROI index */ + struct ta_securedisplay_send_roi_crc_v2_input send_roi_crc_v2; uint32_t reserved[4]; }; @@ -128,6 +142,10 @@ struct ta_securedisplay_send_roi_crc_output { uint8_t reserved; }; +struct ta_securedisplay_send_roi_crc_v2_output { + uint8_t i2c_buf[TA_SECUREDISPLAY_V2_I2C_BUFFER_SIZE]; /* I2C buffer */ +}; + /** @union ta_securedisplay_cmd_output * Output buffer */ @@ -136,6 +154,8 @@ union ta_securedisplay_cmd_output { struct ta_securedisplay_query_ta_output query_ta; /* Send ROI CRC output buffer format used only for validation purpose */ struct ta_securedisplay_send_roi_crc_output send_roi_crc; + /* Send ROI CRC output buffer format used only for validation purpose */ + struct ta_securedisplay_send_roi_crc_v2_output send_roi_crc_v2; uint32_t reserved[4]; }; diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 5a04a6770138..7d17ae56f901 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -353,9 +353,9 @@ static int tonga_ih_resume(struct amdgpu_ip_block *ip_block) return tonga_ih_hw_init(ip_block); } -static bool tonga_ih_is_idle(void *handle) +static bool tonga_ih_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 tmp = RREG32(mmSRBM_STATUS); if (REG_GET_FIELD(tmp, SRBM_STATUS, IH_BUSY)) @@ -448,13 +448,13 @@ static int tonga_ih_soft_reset(struct amdgpu_ip_block *ip_block) return 0; } -static int tonga_ih_set_clockgating_state(void *handle, +static int tonga_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int tonga_ih_set_powergating_state(void *handle, +static int tonga_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 1a8ea834efa6..74f57b2d30a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -173,156 +173,96 @@ static void umc_v12_0_query_ras_error_count(struct amdgpu_device *adev, umc_v12_0_reset_error_count(adev); } -static void umc_v12_0_convert_error_address(struct amdgpu_device *adev, +static int umc_v12_0_convert_error_address(struct amdgpu_device *adev, struct ras_err_data *err_data, - struct ta_ras_query_address_input *addr_in) + struct ta_ras_query_address_input *addr_in, + struct ta_ras_query_address_output *addr_out, + bool dump_addr) { - uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column, err_addr; - struct ta_ras_query_address_output addr_out; + uint32_t col, col_lower, row, row_lower, bank; + uint32_t channel_index = 0, umc_inst = 0; + uint32_t i, loop_bits[UMC_V12_0_RETIRE_LOOP_BITS]; + uint64_t soc_pa, column, err_addr; + struct ta_ras_query_address_output addr_out_tmp; + struct ta_ras_query_address_output *paddr_out; + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; + int ret = 0; + + if (!addr_out) + paddr_out = &addr_out_tmp; + else + paddr_out = addr_out; - err_addr = addr_in->ma.err_addr; - addr_in->addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, addr_in, &addr_out)) { - dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", - err_addr); + err_addr = bank = 0; + if (addr_in) { + err_addr = addr_in->ma.err_addr; + addr_in->addr_type = TA_RAS_MCA_TO_PA; + ret = psp_ras_query_address(&adev->psp, addr_in, paddr_out); + if (ret) { + dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", + err_addr); - return; - } + goto out; + } - soc_pa = addr_out.pa.pa; - bank = addr_out.pa.bank; - channel_index = addr_out.pa.channel_idx; - - col = (err_addr >> 1) & 0x1fULL; - row = (err_addr >> 10) & 0x3fffULL; - row_xor = row ^ (0x1ULL << 13); - /* clear [C3 C2] in soc physical address */ - soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); - /* clear [C4] in soc physical address */ - soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); - retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); - /* include column bit 0 and 1 */ - col &= 0x3; - col |= (column << 2); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, addr_in->ma.umc_inst); - - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); - amdgpu_umc_fill_error_record(err_data, err_addr, - retired_page, channel_index, addr_in->ma.umc_inst); + bank = paddr_out->pa.bank; + /* no need to care about umc inst if addr_in is NULL */ + umc_inst = addr_in->ma.umc_inst; } -} -static void umc_v12_0_dump_addr_info(struct amdgpu_device *adev, - struct ta_ras_query_address_output *addr_out, - uint64_t err_addr) -{ - uint32_t col, row, row_xor, bank, channel_index; - uint64_t soc_pa, retired_page, column; - - soc_pa = addr_out->pa.pa; - bank = addr_out->pa.bank; - channel_index = addr_out->pa.channel_idx; - - col = (err_addr >> 1) & 0x1fULL; - row = (err_addr >> 10) & 0x3fffULL; - row_xor = row ^ (0x1ULL << 13); - /* clear [C3 C2] in soc physical address */ - soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); - /* clear [C4] in soc physical address */ - soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); - retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); - /* include column bit 0 and 1 */ - col &= 0x3; - col |= (column << 2); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row, col, bank, channel_index); - - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); - dev_info(adev->dev, - "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", - retired_page, row_xor, col, bank, channel_index); - } -} + loop_bits[0] = UMC_V12_0_PA_C2_BIT; + loop_bits[1] = UMC_V12_0_PA_C3_BIT; + loop_bits[2] = UMC_V12_0_PA_C4_BIT; + loop_bits[3] = UMC_V12_0_PA_R13_BIT; -static int umc_v12_0_lookup_bad_pages_in_a_row(struct amdgpu_device *adev, - uint64_t pa_addr, uint64_t *pfns, int len) -{ - uint64_t soc_pa, retired_page, column; - uint32_t pos = 0; - - soc_pa = pa_addr; - /* clear [C3 C2] in soc physical address */ - soc_pa &= ~(0x3ULL << UMC_V12_0_PA_C2_BIT); - /* clear [C4] in soc physical address */ - soc_pa &= ~(0x1ULL << UMC_V12_0_PA_C4_BIT); - - /* loop for all possibilities of [C4 C3 C2] */ - for (column = 0; column < UMC_V12_0_NA_MAP_PA_NUM; column++) { - retired_page = soc_pa | ((column & 0x3) << UMC_V12_0_PA_C2_BIT); - retired_page |= (((column & 0x4) >> 2) << UMC_V12_0_PA_C4_BIT); - - if (pos >= len) - return 0; - pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; - - /* shift R13 bit */ - retired_page ^= (0x1ULL << UMC_V12_0_PA_R13_BIT); - - if (pos >= len) - return 0; - pfns[pos++] = retired_page >> AMDGPU_GPU_PAGE_SHIFT; + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + /* other nps modes are taken as nps1 */ + if (nps == AMDGPU_NPS4_PARTITION_MODE) { + loop_bits[0] = UMC_V12_0_PA_CH4_BIT; + loop_bits[1] = UMC_V12_0_PA_CH5_BIT; + loop_bits[2] = UMC_V12_0_PA_B0_BIT; + loop_bits[3] = UMC_V12_0_PA_R11_BIT; } - return pos; -} - -static int umc_v12_0_convert_mca_to_addr(struct amdgpu_device *adev, - uint64_t err_addr, uint32_t ch, uint32_t umc, - uint32_t node, uint32_t socket, - uint64_t *addr, bool dump_addr) -{ - struct ta_ras_query_address_input addr_in; - struct ta_ras_query_address_output addr_out; - - memset(&addr_in, 0, sizeof(addr_in)); - addr_in.ma.err_addr = err_addr; - addr_in.ma.ch_inst = ch; - addr_in.ma.umc_inst = umc; - addr_in.ma.node_inst = node; - addr_in.ma.socket_id = socket; - addr_in.addr_type = TA_RAS_MCA_TO_PA; - if (psp_ras_query_address(&adev->psp, &addr_in, &addr_out)) { - dev_warn(adev->dev, "Failed to query RAS physical address for 0x%llx", - err_addr); - return -EINVAL; + soc_pa = paddr_out->pa.pa; + channel_index = paddr_out->pa.channel_idx; + /* clear loop bits in soc physical address */ + for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) + soc_pa &= ~BIT_ULL(loop_bits[i]); + + paddr_out->pa.pa = soc_pa; + /* get column bit 0 and 1 in mca address */ + col_lower = (err_addr >> 1) & 0x3ULL; + /* MA_R13_BIT will be handled later */ + row_lower = (err_addr >> UMC_V12_0_MA_R0_BIT) & 0x1fffULL; + + if (!err_data && !dump_addr) + goto out; + + /* loop for all possibilities of retired bits */ + for (column = 0; column < UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL; column++) { + soc_pa = paddr_out->pa.pa; + for (i = 0; i < UMC_V12_0_RETIRE_LOOP_BITS; i++) + soc_pa |= (((column >> i) & 0x1ULL) << loop_bits[i]); + + col = ((column & 0x7) << 2) | col_lower; + /* add row bit 13 */ + row = ((column >> 3) << 13) | row_lower; + + if (dump_addr) + dev_info(adev->dev, + "Error Address(PA):0x%-10llx Row:0x%-4x Col:0x%-2x Bank:0x%x Channel:0x%x\n", + soc_pa, row, col, bank, channel_index); + + if (err_data) + amdgpu_umc_fill_error_record(err_data, err_addr, + soc_pa, channel_index, umc_inst); } - if (dump_addr) - umc_v12_0_dump_addr_info(adev, &addr_out, err_addr); - - *addr = addr_out.pa.pa; - - return 0; +out: + return ret; } static int umc_v12_0_query_error_address(struct amdgpu_device *adev, @@ -374,7 +314,7 @@ static int umc_v12_0_query_error_address(struct amdgpu_device *adev, addr_in.ma.umc_inst = umc_inst; addr_in.ma.node_inst = node_inst; - umc_v12_0_convert_error_address(adev, err_data, &addr_in); + umc_v12_0_convert_error_address(adev, err_data, &addr_in, NULL, true); } /* clear umc status */ @@ -475,6 +415,7 @@ static int umc_v12_0_aca_bank_parser(struct aca_handle *handle, struct aca_bank err_type = ACA_ERROR_TYPE_CE; else return 0; + bank->aca_err_type = err_type; ret = aca_bank_info_decode(bank, &info); if (ret) @@ -526,6 +467,9 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, uint64_t page_pfn[UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL]; uint64_t err_addr, pa_addr = 0; struct ras_ecc_err *ecc_err; + struct ta_ras_query_address_output addr_out; + enum amdgpu_memory_partition nps = AMDGPU_NPS1_PARTITION_MODE; + uint32_t shift_bit = UMC_V12_0_PA_C4_BIT; int count, ret, i; hwid = REG_GET_FIELD(ipid, MCMP1_IPIDT0, HardwareID); @@ -552,10 +496,10 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, MCA_IPID_2_UMC_CH(ipid), err_addr); - ret = umc_v12_0_convert_mca_to_addr(adev, + ret = amdgpu_umc_mca_to_addr(adev, err_addr, MCA_IPID_2_UMC_CH(ipid), MCA_IPID_2_UMC_INST(ipid), MCA_IPID_2_DIE_ID(ipid), - MCA_IPID_2_SOCKET_ID(ipid), &pa_addr, true); + MCA_IPID_2_SOCKET_ID(ipid), &addr_out, true); if (ret) return ret; @@ -563,14 +507,21 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, if (!ecc_err) return -ENOMEM; + pa_addr = addr_out.pa.pa; ecc_err->status = status; ecc_err->ipid = ipid; ecc_err->addr = addr; - ecc_err->pa_pfn = UMC_V12_ADDR_MASK_BAD_COLS(pa_addr) >> AMDGPU_GPU_PAGE_SHIFT; + ecc_err->pa_pfn = pa_addr >> AMDGPU_GPU_PAGE_SHIFT; + ecc_err->channel_idx = addr_out.pa.channel_idx; + + if (adev->gmc.gmc_funcs->query_mem_partition_mode) + nps = adev->gmc.gmc_funcs->query_mem_partition_mode(adev); + if (nps == AMDGPU_NPS4_PARTITION_MODE) + shift_bit = UMC_V12_0_PA_B0_BIT; /* If converted pa_pfn is 0, use pa C4 pfn. */ if (!ecc_err->pa_pfn) - ecc_err->pa_pfn = BIT_ULL(UMC_V12_0_PA_C4_BIT) >> AMDGPU_GPU_PAGE_SHIFT; + ecc_err->pa_pfn = BIT_ULL(shift_bit) >> AMDGPU_GPU_PAGE_SHIFT; ret = amdgpu_umc_logs_ecc_err(adev, &con->umc_ecc_log.de_page_tree, ecc_err); if (ret) { @@ -586,7 +537,7 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, con->umc_ecc_log.de_queried_count++; memset(page_pfn, 0, sizeof(page_pfn)); - count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, pa_addr, page_pfn, ARRAY_SIZE(page_pfn)); if (count <= 0) { @@ -629,7 +580,7 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, return -EINVAL; memset(page_pfn, 0, sizeof(page_pfn)); - count = umc_v12_0_lookup_bad_pages_in_a_row(adev, + count = amdgpu_umc_lookup_bad_pages_in_a_row(adev, ecc_err->pa_pfn << AMDGPU_GPU_PAGE_SHIFT, page_pfn, ARRAY_SIZE(page_pfn)); @@ -637,7 +588,7 @@ static int umc_v12_0_fill_error_record(struct amdgpu_device *adev, ret = amdgpu_umc_fill_error_record(err_data, ecc_err->addr, page_pfn[i] << AMDGPU_GPU_PAGE_SHIFT, - MCA_IPID_2_UMC_CH(ecc_err->ipid), + ecc_err->channel_idx, MCA_IPID_2_UMC_INST(ecc_err->ipid)); if (ret) break; @@ -676,6 +627,31 @@ static void umc_v12_0_query_ras_ecc_err_addr(struct amdgpu_device *adev, mutex_unlock(&con->umc_ecc_log.lock); } +static uint32_t umc_v12_0_get_die_id(struct amdgpu_device *adev, + uint64_t mca_addr, uint64_t retired_page) +{ + uint32_t die = 0; + + /* we only calculate die id for nps1 mode right now */ + die += ((((retired_page >> 12) & 0x1ULL)^ + ((retired_page >> 20) & 0x1ULL) ^ + ((retired_page >> 27) & 0x1ULL) ^ + ((retired_page >> 34) & 0x1ULL) ^ + ((retired_page >> 41) & 0x1ULL)) << 0); + + /* the original PA_C4 and PA_R13 may be cleared in retired_page, so + * get them from mca_addr. + */ + die += ((((retired_page >> 13) & 0x1ULL) ^ + ((mca_addr >> 5) & 0x1ULL) ^ + ((retired_page >> 28) & 0x1ULL) ^ + ((mca_addr >> 23) & 0x1ULL) ^ + ((retired_page >> 42) & 0x1ULL)) << 1); + die &= 3; + + return die; +} + struct amdgpu_umc_ras umc_v12_0_ras = { .ras_block = { .hw_ops = &umc_v12_0_ras_hw_ops, @@ -686,5 +662,7 @@ struct amdgpu_umc_ras umc_v12_0_ras = { .ecc_info_query_ras_error_address = umc_v12_0_query_ras_ecc_err_addr, .check_ecc_err_status = umc_v12_0_check_ecc_err_status, .update_ecc_status = umc_v12_0_update_ecc_status, + .convert_ras_err_addr = umc_v12_0_convert_error_address, + .get_die_id_from_pa = umc_v12_0_get_die_id, }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h index be5598d76c1d..9298018d938f 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.h @@ -55,12 +55,24 @@ #define UMC_V12_0_NA_MAP_PA_NUM 8 /* R13 bit shift should be considered, double the number */ #define UMC_V12_0_BAD_PAGE_NUM_PER_CHANNEL (UMC_V12_0_NA_MAP_PA_NUM * 2) +/* C2, C3, C4, R13, four bits in MCA address are looped in retirement */ +#define UMC_V12_0_RETIRE_LOOP_BITS 4 /* column bits in SOC physical address */ #define UMC_V12_0_PA_C2_BIT 15 +#define UMC_V12_0_PA_C3_BIT 16 #define UMC_V12_0_PA_C4_BIT 21 /* row bits in SOC physical address */ +#define UMC_V12_0_PA_R0_BIT 22 +#define UMC_V12_0_PA_R11_BIT 33 #define UMC_V12_0_PA_R13_BIT 35 +/* channel bit in SOC physical address */ +#define UMC_V12_0_PA_CH4_BIT 12 +#define UMC_V12_0_PA_CH5_BIT 13 +/* bank bit in SOC physical address */ +#define UMC_V12_0_PA_B0_BIT 19 +/* row bits in MCA address */ +#define UMC_V12_0_MA_R0_BIT 10 #define MCA_UMC_HWID_V12_0 0x96 #define MCA_UMC_MCATYPE_V12_0 0x0 @@ -81,11 +93,6 @@ (((REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdLo) & 0x1) << 2) | \ (REG_GET_FIELD(ipid, MCMP1_IPIDT0, InstanceIdHi) & 0x03)) -#define UMC_V12_ADDR_MASK_BAD_COLS(addr) \ - ((addr) & ~((0x3ULL << UMC_V12_0_PA_C2_BIT) | \ - (0x1ULL << UMC_V12_0_PA_C4_BIT) | \ - (0x1ULL << UMC_V12_0_PA_R13_BIT))) - bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_uncorrectable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); bool umc_v12_0_is_correctable_error(struct amdgpu_device *adev, uint64_t mc_umc_status); diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c new file mode 100644 index 000000000000..eaca10a3c4a9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.c @@ -0,0 +1,160 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "umc_v8_14.h" +#include "amdgpu_ras.h" +#include "amdgpu_umc.h" +#include "amdgpu.h" +#include "umc/umc_8_14_0_offset.h" +#include "umc/umc_8_14_0_sh_mask.h" + +static inline uint32_t get_umc_v8_14_reg_offset(struct amdgpu_device *adev, + uint32_t umc_inst, + uint32_t ch_inst) +{ + return adev->umc.channel_offs * ch_inst + UMC_V8_14_INST_DIST * umc_inst; +} + +static int umc_v8_14_clear_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + uint32_t ecc_err_cnt_addr; + uint32_t umc_reg_offset = + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); + + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + /* clear error count */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, + UMC_V8_14_CE_CNT_INIT); + + return 0; +} + +static void umc_v8_14_clear_error_count(struct amdgpu_device *adev) +{ + amdgpu_umc_loop_channels(adev, + umc_v8_14_clear_error_count_per_channel, NULL); +} + +static void umc_v8_14_query_correctable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + + /* UMC 8_14 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccErrCnt) - + UMC_V8_14_CE_CNT_INIT); +} + +static void umc_v8_14_query_uncorrectable_error_count(struct amdgpu_device *adev, + uint32_t umc_reg_offset, + unsigned long *error_count) +{ + uint32_t ecc_err_cnt, ecc_err_cnt_addr; + /* UMC 8_14 registers */ + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4); + *error_count += + (REG_GET_FIELD(ecc_err_cnt, UMCCH0_GeccErrCnt, GeccUnCorrErrCnt) - + UMC_V8_14_CE_CNT_INIT); +} + +static int umc_v8_14_query_error_count_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + struct ras_err_data *err_data = (struct ras_err_data *)data; + uint32_t umc_reg_offset = + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); + + umc_v8_14_query_correctable_error_count(adev, + umc_reg_offset, + &(err_data->ce_count)); + umc_v8_14_query_uncorrectable_error_count(adev, + umc_reg_offset, + &(err_data->ue_count)); + + return 0; +} + +static void umc_v8_14_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + amdgpu_umc_loop_channels(adev, + umc_v8_14_query_error_count_per_channel, ras_error_status); + + umc_v8_14_clear_error_count(adev); +} + +static int umc_v8_14_err_cnt_init_per_channel(struct amdgpu_device *adev, + uint32_t node_inst, uint32_t umc_inst, + uint32_t ch_inst, void *data) +{ + uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr; + uint32_t ecc_err_cnt_addr; + uint32_t umc_reg_offset = + get_umc_v8_14_reg_offset(adev, umc_inst, ch_inst); + + ecc_err_cnt_sel_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCntSel); + ecc_err_cnt_addr = + SOC15_REG_OFFSET(UMC, 0, regUMCCH0_GeccErrCnt); + + ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4); + + /* set ce error interrupt type to APIC based interrupt */ + ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_GeccErrCntSel, + GeccErrInt, 0x1); + WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel); + /* set error count to initial value */ + WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_14_CE_CNT_INIT); + + return 0; +} + +static void umc_v8_14_err_cnt_init(struct amdgpu_device *adev) +{ + amdgpu_umc_loop_channels(adev, + umc_v8_14_err_cnt_init_per_channel, NULL); +} + +const struct amdgpu_ras_block_hw_ops umc_v8_14_ras_hw_ops = { + .query_ras_error_count = umc_v8_14_query_ras_error_count, +}; + +struct amdgpu_umc_ras umc_v8_14_ras = { + .ras_block = { + .hw_ops = &umc_v8_14_ras_hw_ops, + }, + .err_cnt_init = umc_v8_14_err_cnt_init, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h new file mode 100644 index 000000000000..20a258f0017a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_14.h @@ -0,0 +1,51 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __UMC_V8_14_H__ +#define __UMC_V8_14_H__ + +#include "soc15_common.h" +#include "amdgpu.h" + +/* number of umc channel instance with memory map register access */ +#define UMC_V8_14_CHANNEL_INSTANCE_NUM 2 +/* number of umc instance with memory map register access */ +#define UMC_V8_14_UMC_INSTANCE_NUM(adev) ((adev)->umc.node_inst_num) + +/* Total channel instances for all available umc nodes */ +#define UMC_V8_14_TOTAL_CHANNEL_NUM(adev) \ + (UMC_V8_14_CHANNEL_INSTANCE_NUM * (adev)->gmc.num_umc) + +/* UMC register per channel offset */ +#define UMC_V8_14_PER_CHANNEL_OFFSET 0x400 + +#define UMC_V8_14_INST_DIST 0x40000 + +/* EccErrCnt max value */ +#define UMC_V8_14_CE_CNT_MAX 0xffff +/* umc ce interrupt threshold */ +#define UMC_V8_14_CE_INT_THRESHOLD 0xffff +/* umc ce count initial value */ +#define UMC_V8_14_CE_CNT_INIT (UMC_V8_14_CE_CNT_MAX - UMC_V8_14_CE_INT_THRESHOLD) + +extern struct amdgpu_umc_ras umc_v8_14_ras; +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c index bdbca25d80c4..5dbaebb592b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v3_1.c @@ -98,7 +98,7 @@ static void uvd_v3_1_ring_emit_ib(struct amdgpu_ring *ring, } /** - * uvd_v3_1_ring_emit_fence - emit an fence & trap command + * uvd_v3_1_ring_emit_fence - emit a fence & trap command * * @ring: amdgpu_ring pointer * @addr: address @@ -242,7 +242,7 @@ static void uvd_v3_1_mc_resume(struct amdgpu_device *adev) uint64_t addr; uint32_t size; - /* programm the VCPU memory controller bits 0-27 */ + /* program the VCPU memory controller bits 0-27 */ addr = (adev->uvd.inst->gpu_addr + AMDGPU_UVD_FIRMWARE_OFFSET) >> 3; size = AMDGPU_UVD_FIRMWARE_SIZE(adev) >> 3; WREG32(mmUVD_VCPU_CACHE_OFFSET0, addr); @@ -416,7 +416,7 @@ static int uvd_v3_1_start(struct amdgpu_device *adev) /* Set the write pointer delay */ WREG32(mmUVD_RBC_RB_WPTR_CNTL, 0); - /* programm the 4GB memory segment for rptr and ring buffer */ + /* Program the 4GB memory segment for rptr and ring buffer */ WREG32(mmUVD_LMI_EXT40_ADDR, upper_32_bits(ring->gpu_addr) | (0x7 << 16) | (0x1 << 31)); @@ -758,9 +758,9 @@ static int uvd_v3_1_resume(struct amdgpu_ip_block *ip_block) return uvd_v3_1_hw_init(ip_block); } -static bool uvd_v3_1_is_idle(void *handle) +static bool uvd_v3_1_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); } @@ -790,13 +790,13 @@ static int uvd_v3_1_soft_reset(struct amdgpu_ip_block *ip_block) return uvd_v3_1_start(adev); } -static int uvd_v3_1_set_clockgating_state(void *handle, +static int uvd_v3_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int uvd_v3_1_set_powergating_state(void *handle, +static int uvd_v3_1_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index a836dc9cfcad..4b96fd583772 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -44,7 +44,7 @@ static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v4_2_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v4_2_start(struct amdgpu_device *adev); static void uvd_v4_2_stop(struct amdgpu_device *adev); -static int uvd_v4_2_set_clockgating_state(void *handle, +static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static void uvd_v4_2_set_dcm(struct amdgpu_device *adev, bool sw_mode); @@ -302,7 +302,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev) /* enable VCPU clock */ WREG32(mmUVD_VCPU_CNTL, 1 << 9); - /* disable interupt */ + /* disable interrupt */ WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1)); #ifdef __BIG_ENDIAN @@ -312,6 +312,7 @@ static int uvd_v4_2_start(struct amdgpu_device *adev) #endif WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl); + /* initialize UVD memory controller */ WREG32(mmUVD_LMI_CTRL, 0x203108); @@ -658,9 +659,9 @@ static void uvd_v4_2_set_dcm(struct amdgpu_device *adev, WREG32_UVD_CTX(ixUVD_CGC_CTRL2, tmp2); } -static bool uvd_v4_2_is_idle(void *handle) +static bool uvd_v4_2_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); } @@ -708,13 +709,13 @@ static int uvd_v4_2_process_interrupt(struct amdgpu_device *adev, return 0; } -static int uvd_v4_2_set_clockgating_state(void *handle, +static int uvd_v4_2_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { return 0; } -static int uvd_v4_2_set_powergating_state(void *handle, +static int uvd_v4_2_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the UVD block. @@ -724,7 +725,7 @@ static int uvd_v4_2_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) { uvd_v4_2_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index ab55fae3569e..71409ad8b7ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -42,7 +42,7 @@ static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev); static void uvd_v5_0_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v5_0_start(struct amdgpu_device *adev); static void uvd_v5_0_stop(struct amdgpu_device *adev); -static int uvd_v5_0_set_clockgating_state(void *handle, +static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev, bool enable); @@ -155,7 +155,7 @@ static int uvd_v5_0_hw_init(struct amdgpu_ip_block *ip_block) int r; amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); - uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); + uvd_v5_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE); uvd_v5_0_enable_mgcg(adev, true); r = amdgpu_ring_test_helper(ring); @@ -580,9 +580,9 @@ static void uvd_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) } } -static bool uvd_v5_0_is_idle(void *handle) +static bool uvd_v5_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); } @@ -790,16 +790,11 @@ static void uvd_v5_0_enable_mgcg(struct amdgpu_device *adev, } } -static int uvd_v5_0_set_clockgating_state(void *handle, +static int uvd_v5_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); - struct amdgpu_ip_block *ip_block; - - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD); - if (!ip_block) - return -EINVAL; if (enable) { /* wait for STATUS to clear */ @@ -817,7 +812,7 @@ static int uvd_v5_0_set_clockgating_state(void *handle, return 0; } -static int uvd_v5_0_set_powergating_state(void *handle, +static int uvd_v5_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the UVD block. @@ -827,7 +822,7 @@ static int uvd_v5_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 0; if (state == AMD_PG_STATE_GATE) { @@ -842,9 +837,9 @@ out: return ret; } -static void uvd_v5_0_get_clockgating_state(void *handle, u64 *flags) +static void uvd_v5_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; mutex_lock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 39f8c3d3a135..1c07b701d0e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -48,7 +48,7 @@ static void uvd_v6_0_set_irq_funcs(struct amdgpu_device *adev); static int uvd_v6_0_start(struct amdgpu_device *adev); static void uvd_v6_0_stop(struct amdgpu_device *adev); static void uvd_v6_0_set_sw_clock_gating(struct amdgpu_device *adev); -static int uvd_v6_0_set_clockgating_state(void *handle, +static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, bool enable); @@ -467,7 +467,7 @@ static int uvd_v6_0_hw_init(struct amdgpu_ip_block *ip_block) int i, r; amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); - uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); + uvd_v6_0_set_clockgating_state(ip_block, AMD_CG_STATE_UNGATE); uvd_v6_0_enable_mgcg(adev, true); r = amdgpu_ring_test_helper(ring); @@ -1143,9 +1143,9 @@ static void uvd_v6_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, amdgpu_ring_write(ring, vmid); } -static bool uvd_v6_0_is_idle(void *handle) +static bool uvd_v6_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return !(RREG32(mmSRBM_STATUS) & SRBM_STATUS__UVD_BUSY_MASK); } @@ -1156,7 +1156,7 @@ static int uvd_v6_0_wait_for_idle(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) { - if (uvd_v6_0_is_idle(adev)) + if (uvd_v6_0_is_idle(ip_block)) return 0; } return -ETIMEDOUT; @@ -1450,17 +1450,12 @@ static void uvd_v6_0_enable_mgcg(struct amdgpu_device *adev, } } -static int uvd_v6_0_set_clockgating_state(void *handle, +static int uvd_v6_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - struct amdgpu_ip_block *ip_block; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD); - if (!ip_block) - return -EINVAL; - if (enable) { /* wait for STATUS to clear */ if (uvd_v6_0_wait_for_idle(ip_block)) @@ -1476,7 +1471,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle, return 0; } -static int uvd_v6_0_set_powergating_state(void *handle, +static int uvd_v6_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the UVD block. @@ -1486,7 +1481,7 @@ static int uvd_v6_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 0; WREG32(mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK); @@ -1503,9 +1498,9 @@ out: return ret; } -static void uvd_v6_0_get_clockgating_state(void *handle, u64 *flags) +static void uvd_v6_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; mutex_lock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 3c8ab8698af8..9d237b5937fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1511,7 +1511,7 @@ static int uvd_v7_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int uvd_v7_0_set_clockgating_state(void *handle, +static int uvd_v7_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { /* needed for driver unload*/ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index c1ed91b39415..bee3e904a6bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -201,9 +201,9 @@ static void vce_v2_0_mc_resume(struct amdgpu_device *adev) WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1); } -static bool vce_v2_0_is_idle(void *handle) +static bool vce_v2_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return !(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK); } @@ -214,7 +214,7 @@ static int vce_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block) unsigned i; for (i = 0; i < adev->usec_timeout; i++) { - if (vce_v2_0_is_idle(adev)) + if (vce_v2_0_is_idle(ip_block)) return 0; } return -ETIMEDOUT; @@ -280,11 +280,11 @@ static int vce_v2_0_stop(struct amdgpu_device *adev) if (vce_v2_0_lmi_clean(adev)) { - DRM_INFO("vce is not idle \n"); + DRM_INFO("VCE is not idle \n"); return 0; } - ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCN); + ip_block = amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE); if (!ip_block) return -EINVAL; @@ -578,13 +578,13 @@ static int vce_v2_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int vce_v2_0_set_clockgating_state(void *handle, +static int vce_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { bool gate = false; bool sw_cg = false; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_CG_STATE_GATE) { gate = true; @@ -596,7 +596,7 @@ static int vce_v2_0_set_clockgating_state(void *handle, return 0; } -static int vce_v2_0_set_powergating_state(void *handle, +static int vce_v2_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCE block. @@ -606,7 +606,7 @@ static int vce_v2_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) return vce_v2_0_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 6bb318a06f19..708123899c41 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -65,7 +65,7 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx); static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev); static void vce_v3_0_set_irq_funcs(struct amdgpu_device *adev); static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block); -static int vce_v3_0_set_clockgating_state(void *handle, +static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state); /** * vce_v3_0_ring_get_rptr - get read pointer @@ -497,7 +497,7 @@ static int vce_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) return r; vce_v3_0_stop(adev); - return vce_v3_0_set_clockgating_state(adev, AMD_CG_STATE_GATE); + return vce_v3_0_set_clockgating_state(ip_block, AMD_CG_STATE_GATE); } static int vce_v3_0_suspend(struct amdgpu_ip_block *ip_block) @@ -597,9 +597,9 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx) WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1); } -static bool vce_v3_0_is_idle(void *handle) +static bool vce_v3_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; u32 mask = 0; mask |= (adev->vce.harvest_config & AMDGPU_VCE_HARVEST_VCE0) ? 0 : SRBM_STATUS2__VCE0_BUSY_MASK; @@ -614,7 +614,7 @@ static int vce_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; for (i = 0; i < adev->usec_timeout; i++) - if (vce_v3_0_is_idle(adev)) + if (vce_v3_0_is_idle(ip_block)) return 0; return -ETIMEDOUT; @@ -760,10 +760,10 @@ static int vce_v3_0_process_interrupt(struct amdgpu_device *adev, return 0; } -static int vce_v3_0_set_clockgating_state(void *handle, +static int vce_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); int i; @@ -801,7 +801,7 @@ static int vce_v3_0_set_clockgating_state(void *handle, return 0; } -static int vce_v3_0_set_powergating_state(void *handle, +static int vce_v3_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCE block. @@ -811,7 +811,7 @@ static int vce_v3_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int ret = 0; if (state == AMD_PG_STATE_GATE) { @@ -828,9 +828,9 @@ out: return ret; } -static void vce_v3_0_get_clockgating_state(void *handle, u64 *flags) +static void vce_v3_0_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; mutex_lock(&adev->pm.mutex); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 79ee555768a5..335bda64ff5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -684,14 +684,14 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev) ~VCE_SYS_INT_EN__VCE_SYS_INT_TRAP_INTERRUPT_EN_MASK); } -static int vce_v4_0_set_clockgating_state(void *handle, +static int vce_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { /* needed for driver unload*/ return 0; } -static int vce_v4_0_set_powergating_state(void *handle, +static int vce_v4_0_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { /* This doesn't actually powergate the VCE block. @@ -701,7 +701,7 @@ static int vce_v4_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (state == AMD_PG_STATE_GATE) return vce_v4_0_stop(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 10e99c926fb8..21b57c29bf7d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -81,13 +81,14 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_1_0[] = { SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE) }; -static int vcn_v1_0_stop(struct amdgpu_device *adev); +static int vcn_v1_0_stop(struct amdgpu_vcn_inst *vinst); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); -static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state); +static int vcn_v1_0_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state); static void vcn_v1_0_idle_work_handler(struct work_struct *work); static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring); @@ -104,7 +105,8 @@ static int vcn_v1_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - adev->vcn.num_enc_rings = 2; + adev->vcn.inst[0].num_enc_rings = 2; + adev->vcn.inst[0].set_pg_state = vcn_v1_0_set_pg_state; vcn_v1_0_set_dec_ring_funcs(adev); vcn_v1_0_set_enc_ring_funcs(adev); @@ -112,7 +114,7 @@ static int vcn_v1_0_early_init(struct amdgpu_ip_block *ip_block) jpeg_v1_0_early_init(ip_block); - return amdgpu_vcn_early_init(adev); + return amdgpu_vcn_early_init(adev, 0); } /** @@ -137,23 +139,23 @@ static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block) return r; /* VCN ENC TRAP */ - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_1_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq); if (r) return r; } - r = amdgpu_vcn_sw_init(adev); + r = amdgpu_vcn_sw_init(adev, 0); if (r) return r; /* Override the work func */ - adev->vcn.idle_work.work.func = vcn_v1_0_idle_work_handler; + adev->vcn.inst[0].idle_work.work.func = vcn_v1_0_idle_work_handler; - amdgpu_vcn_setup_ucode(adev); + amdgpu_vcn_setup_ucode(adev, 0); - r = amdgpu_vcn_resume(adev); + r = amdgpu_vcn_resume(adev, 0); if (r) return r; @@ -165,18 +167,18 @@ static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->vcn.internal.scratch9 = adev->vcn.inst->external.scratch9 = + adev->vcn.inst[0].internal.scratch9 = adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); - adev->vcn.internal.data0 = adev->vcn.inst->external.data0 = + adev->vcn.inst[0].internal.data0 = adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); - adev->vcn.internal.data1 = adev->vcn.inst->external.data1 = + adev->vcn.inst[0].internal.data1 = adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); - adev->vcn.internal.cmd = adev->vcn.inst->external.cmd = + adev->vcn.inst[0].internal.cmd = adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); - adev->vcn.internal.nop = adev->vcn.inst->external.nop = + adev->vcn.inst[0].internal.nop = adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) { enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i); ring = &adev->vcn.inst->ring_enc[i]; @@ -188,7 +190,7 @@ static int vcn_v1_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - adev->vcn.pause_dpg_mode = vcn_v1_0_pause_dpg_mode; + adev->vcn.inst[0].pause_dpg_mode = vcn_v1_0_pause_dpg_mode; if (amdgpu_vcnfw_log) { volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; @@ -222,13 +224,13 @@ static int vcn_v1_0_sw_fini(struct amdgpu_ip_block *ip_block) int r; struct amdgpu_device *adev = ip_block->adev; - r = amdgpu_vcn_suspend(adev); + r = amdgpu_vcn_suspend(adev, 0); if (r) return r; jpeg_v1_0_sw_fini(ip_block); - r = amdgpu_vcn_sw_fini(adev); + r = amdgpu_vcn_sw_fini(adev, 0); kfree(adev->vcn.ip_dump); @@ -252,7 +254,7 @@ static int vcn_v1_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) { ring = &adev->vcn.inst->ring_enc[i]; r = amdgpu_ring_test_helper(ring); if (r) @@ -275,13 +277,14 @@ static int vcn_v1_0_hw_init(struct amdgpu_ip_block *ip_block) static int vcn_v1_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_vcn_inst *vinst = adev->vcn.inst; - cancel_delayed_work_sync(&adev->vcn.idle_work); + cancel_delayed_work_sync(&vinst->idle_work); if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, 0, mmUVD_STATUS))) { - vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + (vinst->cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, 0, mmUVD_STATUS))) { + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); } return 0; @@ -300,17 +303,17 @@ static int vcn_v1_0_suspend(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; bool idle_work_unexecuted; - idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.idle_work); + idle_work_unexecuted = cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work); if (idle_work_unexecuted) { if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, 0); } r = vcn_v1_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(adev); + r = amdgpu_vcn_suspend(adev, 0); return r; } @@ -326,7 +329,7 @@ static int vcn_v1_0_resume(struct amdgpu_ip_block *ip_block) { int r; - r = amdgpu_vcn_resume(ip_block->adev); + r = amdgpu_vcn_resume(ip_block->adev, 0); if (r) return r; @@ -338,13 +341,14 @@ static int vcn_v1_0_resume(struct amdgpu_ip_block *ip_block) /** * vcn_v1_0_mc_resume_spg_mode - memory controller programming * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Let the VCN memory controller know it's offsets */ -static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) +static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_vcn_inst *vinst) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + struct amdgpu_device *adev = vinst->adev; + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -409,9 +413,10 @@ static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config); } -static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) +static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + struct amdgpu_device *adev = vinst->adev; + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -484,12 +489,13 @@ static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) /** * vcn_v1_0_disable_clock_gating - disable VCN clock gating * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Disable clock gating for VCN block */ -static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev) +static void vcn_v1_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; uint32_t data; /* JPEG disable CGC */ @@ -610,12 +616,13 @@ static void vcn_v1_0_disable_clock_gating(struct amdgpu_device *adev) /** * vcn_v1_0_enable_clock_gating - enable VCN clock gating * - * @adev: amdgpu_device pointer + * @vinst: Pointer to the VCN instance structure * * Enable clock gating for VCN block */ -static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev) +static void vcn_v1_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; uint32_t data = 0; /* enable JPEG CGC */ @@ -679,8 +686,10 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev) WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); } -static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel) +static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst, + uint8_t sram_sel) { + struct amdgpu_device *adev = vinst->adev; uint32_t reg_data = 0; /* disable JPEG CGC */ @@ -733,8 +742,9 @@ static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t s WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel); } -static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev) +static void vcn_1_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; uint32_t data = 0; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { @@ -778,8 +788,9 @@ static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev) WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); } -static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) +static void vcn_1_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; uint32_t data = 0; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { @@ -822,12 +833,13 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) /** * vcn_v1_0_start_spg_mode - start VCN block * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Setup and start the VCN block */ -static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) +static int vcn_v1_0_start_spg_mode(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; @@ -836,13 +848,13 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) /* disable byte swapping */ lmi_swap_cntl = 0; - vcn_1_0_disable_static_power_gating(adev); + vcn_1_0_disable_static_power_gating(vinst); tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp); /* disable clock gating */ - vcn_v1_0_disable_clock_gating(adev); + vcn_v1_0_disable_clock_gating(vinst); /* disable interupt */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, @@ -884,7 +896,7 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); - vcn_v1_0_mc_resume_spg_mode(adev); + vcn_v1_0_mc_resume_spg_mode(vinst); WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK_1_0, 0x10); WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK_1_0, @@ -1000,8 +1012,9 @@ static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) return 0; } -static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) +static int vcn_v1_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; uint32_t lmi_swap_cntl; @@ -1009,7 +1022,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) /* disable byte swapping */ lmi_swap_cntl = 0; - vcn_1_0_enable_static_power_gating(adev); + vcn_1_0_enable_static_power_gating(vinst); /* enable dynamic power gating mode */ tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); @@ -1018,7 +1031,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp); /* enable clock gating */ - vcn_v1_0_clock_gating_dpg_mode(adev, 0); + vcn_v1_0_clock_gating_dpg_mode(vinst, 0); /* enable VCPU clock */ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); @@ -1067,7 +1080,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0xFFFFFFFF, 0); - vcn_v1_0_mc_resume_dpg_mode(adev); + vcn_v1_0_mc_resume_dpg_mode(vinst); WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0); WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0); @@ -1084,7 +1097,7 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_MASTINT_EN, UVD_MASTINT_EN__VCPU_EN_MASK, UVD_MASTINT_EN__VCPU_EN_MASK, 0); - vcn_v1_0_clock_gating_dpg_mode(adev, 1); + vcn_v1_0_clock_gating_dpg_mode(vinst, 1); /* setup mmUVD_LMI_CTRL */ WREG32_SOC15_DPG_MODE_1_0(UVD, 0, mmUVD_LMI_CTRL, (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | @@ -1144,21 +1157,24 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) return 0; } -static int vcn_v1_0_start(struct amdgpu_device *adev) +static int vcn_v1_0_start(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + return (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ? - vcn_v1_0_start_dpg_mode(adev) : vcn_v1_0_start_spg_mode(adev); + vcn_v1_0_start_dpg_mode(vinst) : vcn_v1_0_start_spg_mode(vinst); } /** * vcn_v1_0_stop_spg_mode - stop VCN block * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * stop the VCN block */ -static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev) +static int vcn_v1_0_stop_spg_mode(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; int tmp; SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7); @@ -1198,13 +1214,14 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0); - vcn_v1_0_enable_clock_gating(adev); - vcn_1_0_enable_static_power_gating(adev); + vcn_v1_0_enable_clock_gating(vinst); + vcn_1_0_enable_static_power_gating(vinst); return 0; } -static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev) +static int vcn_v1_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; uint32_t tmp; /* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */ @@ -1236,21 +1253,24 @@ static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev) return 0; } -static int vcn_v1_0_stop(struct amdgpu_device *adev) +static int vcn_v1_0_stop(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; int r; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - r = vcn_v1_0_stop_dpg_mode(adev); + r = vcn_v1_0_stop_dpg_mode(vinst); else - r = vcn_v1_0_stop_spg_mode(adev); + r = vcn_v1_0_stop_spg_mode(vinst); return r; } -static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state) +static int vcn_v1_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; int ret_code; uint32_t reg_data = 0; uint32_t reg_data2 = 0; @@ -1376,9 +1396,9 @@ static int vcn_v1_0_pause_dpg_mode(struct amdgpu_device *adev, return 0; } -static bool vcn_v1_0_is_idle(void *handle) +static bool vcn_v1_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE); } @@ -1394,20 +1414,21 @@ static int vcn_v1_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v1_0_set_clockgating_state(void *handle, +static int vcn_v1_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_vcn_inst *vinst = adev->vcn.inst; bool enable = (state == AMD_CG_STATE_GATE); if (enable) { /* wait for STATUS to clear */ - if (!vcn_v1_0_is_idle(handle)) + if (!vcn_v1_0_is_idle(ip_block)) return -EBUSY; - vcn_v1_0_enable_clock_gating(adev); + vcn_v1_0_enable_clock_gating(vinst); } else { /* disable HW gating and enable Sw gating */ - vcn_v1_0_disable_clock_gating(adev); + vcn_v1_0_disable_clock_gating(vinst); } return 0; } @@ -1799,8 +1820,8 @@ static void vcn_v1_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t coun } } -static int vcn_v1_0_set_powergating_state(void *handle, - enum amd_powergating_state state) +static int vcn_v1_0_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state) { /* This doesn't actually powergate the VCN block. * That's done in the dpm code via the SMC. This @@ -1810,28 +1831,29 @@ static int vcn_v1_0_set_powergating_state(void *handle, * the smc and the hw blocks */ int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - if (state == adev->vcn.cur_state) + if (state == vinst->cur_state) return 0; if (state == AMD_PG_STATE_GATE) - ret = vcn_v1_0_stop(adev); + ret = vcn_v1_0_stop(vinst); else - ret = vcn_v1_0_start(adev); + ret = vcn_v1_0_start(vinst); if (!ret) - adev->vcn.cur_state = state; + vinst->cur_state = state; + return ret; } static void vcn_v1_0_idle_work_handler(struct work_struct *work) { - struct amdgpu_device *adev = - container_of(work, struct amdgpu_device, vcn.idle_work.work); + struct amdgpu_vcn_inst *vcn_inst = + container_of(work, struct amdgpu_vcn_inst, idle_work.work); + struct amdgpu_device *adev = vcn_inst->adev; unsigned int fences = 0, i; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) + for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { @@ -1847,7 +1869,7 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) else new_state.jpeg = VCN_DPG_STATE__UNPAUSE; - adev->vcn.pause_dpg_mode(adev, 0, &new_state); + adev->vcn.inst->pause_dpg_mode(vcn_inst, &new_state); } fences += amdgpu_fence_count_emitted(adev->jpeg.inst->ring_dec); @@ -1856,21 +1878,21 @@ static void vcn_v1_0_idle_work_handler(struct work_struct *work) if (fences == 0) { amdgpu_gfx_off_ctrl(adev, true); if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, 0); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_GATE); } else { - schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT); + schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT); } } static void vcn_v1_0_ring_begin_use(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); + bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work); - mutex_lock(&adev->vcn.vcn1_jpeg1_workaround); + mutex_lock(&adev->vcn.inst[0].vcn1_jpeg1_workaround); if (amdgpu_fence_wait_empty(ring->adev->jpeg.inst->ring_dec)) DRM_ERROR("VCN dec: jpeg dec ring may not be empty\n"); @@ -1886,7 +1908,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks) if (set_clocks) { amdgpu_gfx_off_ctrl(adev, false); if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + amdgpu_dpm_enable_vcn(adev, true, 0); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); @@ -1896,7 +1918,7 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks) struct dpg_pause_state new_state; unsigned int fences = 0, i; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) + for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) fences += amdgpu_fence_count_emitted(&adev->vcn.inst->ring_enc[i]); if (fences) @@ -1914,14 +1936,14 @@ void vcn_v1_0_set_pg_for_begin_use(struct amdgpu_ring *ring, bool set_clocks) else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) new_state.jpeg = VCN_DPG_STATE__PAUSE; - adev->vcn.pause_dpg_mode(adev, 0, &new_state); + adev->vcn.inst->pause_dpg_mode(adev->vcn.inst, &new_state); } } void vcn_v1_0_ring_end_use(struct amdgpu_ring *ring) { - schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT); - mutex_unlock(&ring->adev->vcn.vcn1_jpeg1_workaround); + schedule_delayed_work(&ring->adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT); + mutex_unlock(&ring->adev->vcn.inst[0].vcn1_jpeg1_workaround); } static void vcn_v1_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) @@ -1996,7 +2018,7 @@ static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { .is_idle = vcn_v1_0_is_idle, .wait_for_idle = vcn_v1_0_wait_for_idle, .set_clockgating_state = vcn_v1_0_set_clockgating_state, - .set_powergating_state = vcn_v1_0_set_powergating_state, + .set_powergating_state = vcn_set_powergating_state, .dump_ip_state = vcn_v1_0_dump_ip_state, .print_ip_state = vcn_v1_0_print_ip_state, }; @@ -2055,11 +2077,11 @@ static int vcn_v1_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, uint32_t reg = amdgpu_ib_get_value(ib, i); uint32_t val = amdgpu_ib_get_value(ib, i + 1); - if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) { + if (reg == PACKET0(p->adev->vcn.inst[0].internal.data0, 0)) { msg_lo = val; - } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) { + } else if (reg == PACKET0(p->adev->vcn.inst[0].internal.data1, 0)) { msg_hi = val; - } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0)) { + } else if (reg == PACKET0(p->adev->vcn.inst[0].internal.cmd, 0)) { r = vcn_v1_0_validate_bo(p, job, ((u64)msg_hi) << 32 | msg_lo); if (r) @@ -2144,7 +2166,7 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev) { int i; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) + for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) adev->vcn.inst->ring_enc[i].funcs = &vcn_v1_0_enc_ring_vm_funcs; } @@ -2155,7 +2177,7 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 2; + adev->vcn.inst->irq.num_types = adev->vcn.inst[0].num_enc_rings + 2; adev->vcn.inst->irq.funcs = &vcn_v1_0_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c index e0322cbca3ec..8e7a36f26e9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_0.c @@ -92,10 +92,10 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_0[] = { static void vcn_v2_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v2_0_set_powergating_state(void *handle, - enum amd_powergating_state state); -static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state); +static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); +static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state); static int vcn_v2_0_start_sriov(struct amdgpu_device *adev); /** * vcn_v2_0_early_init - set function pointers and load microcode @@ -110,15 +110,16 @@ static int vcn_v2_0_early_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) - adev->vcn.num_enc_rings = 1; + adev->vcn.inst[0].num_enc_rings = 1; else - adev->vcn.num_enc_rings = 2; + adev->vcn.inst[0].num_enc_rings = 2; + adev->vcn.inst->set_pg_state = vcn_v2_0_set_pg_state; vcn_v2_0_set_dec_ring_funcs(adev); vcn_v2_0_set_enc_ring_funcs(adev); vcn_v2_0_set_irq_funcs(adev); - return amdgpu_vcn_early_init(adev); + return amdgpu_vcn_early_init(adev, 0); } /** @@ -145,7 +146,7 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) return r; /* VCN ENC TRAP */ - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) { r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq); @@ -153,13 +154,13 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - r = amdgpu_vcn_sw_init(adev); + r = amdgpu_vcn_sw_init(adev, 0); if (r) return r; - amdgpu_vcn_setup_ucode(adev); + amdgpu_vcn_setup_ucode(adev, 0); - r = amdgpu_vcn_resume(adev); + r = amdgpu_vcn_resume(adev, 0); if (r) return r; @@ -175,25 +176,25 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; - adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; - adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; - adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; - adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; - adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + adev->vcn.inst[0].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.inst[0].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.inst[0].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.inst[0].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.inst[0].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.inst[0].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; - adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; + adev->vcn.inst[0].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; adev->vcn.inst->external.scratch9 = SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9); - adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; + adev->vcn.inst[0].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; adev->vcn.inst->external.data0 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA0); - adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; + adev->vcn.inst[0].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; adev->vcn.inst->external.data1 = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_DATA1); - adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; + adev->vcn.inst[0].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; adev->vcn.inst->external.cmd = SOC15_REG_OFFSET(UVD, 0, mmUVD_GPCOM_VCPU_CMD); - adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; + adev->vcn.inst[0].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; adev->vcn.inst->external.nop = SOC15_REG_OFFSET(UVD, 0, mmUVD_NO_OP); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) { enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i); ring = &adev->vcn.inst->ring_enc[i]; @@ -210,7 +211,7 @@ static int vcn_v2_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - adev->vcn.pause_dpg_mode = vcn_v2_0_pause_dpg_mode; + adev->vcn.inst[0].pause_dpg_mode = vcn_v2_0_pause_dpg_mode; r = amdgpu_virt_alloc_mm_table(adev); if (r) @@ -254,11 +255,11 @@ static int vcn_v2_0_sw_fini(struct amdgpu_ip_block *ip_block) amdgpu_virt_free_mm_table(adev); - r = amdgpu_vcn_suspend(adev); + r = amdgpu_vcn_suspend(adev, 0); if (r) return r; - r = amdgpu_vcn_sw_fini(adev); + r = amdgpu_vcn_sw_fini(adev, 0); kfree(adev->vcn.ip_dump); @@ -292,7 +293,7 @@ static int vcn_v2_0_hw_init(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) ring->sched.ready = false; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) { ring = &adev->vcn.inst->ring_enc[i]; r = amdgpu_ring_test_helper(ring); if (r) @@ -312,13 +313,14 @@ static int vcn_v2_0_hw_init(struct amdgpu_ip_block *ip_block) static int vcn_v2_0_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_vcn_inst *vinst = adev->vcn.inst; - cancel_delayed_work_sync(&adev->vcn.idle_work); + cancel_delayed_work_sync(&vinst->idle_work); if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, 0, mmUVD_STATUS))) - vcn_v2_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + (vinst->cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, 0, mmUVD_STATUS))) + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); return 0; } @@ -338,7 +340,7 @@ static int vcn_v2_0_suspend(struct amdgpu_ip_block *ip_block) if (r) return r; - r = amdgpu_vcn_suspend(ip_block->adev); + r = amdgpu_vcn_suspend(ip_block->adev, 0); return r; } @@ -354,7 +356,7 @@ static int vcn_v2_0_resume(struct amdgpu_ip_block *ip_block) { int r; - r = amdgpu_vcn_resume(ip_block->adev); + r = amdgpu_vcn_resume(ip_block->adev, 0); if (r) return r; @@ -366,13 +368,14 @@ static int vcn_v2_0_resume(struct amdgpu_ip_block *ip_block) /** * vcn_v2_0_mc_resume - memory controller programming * - * @adev: amdgpu_device pointer + * @vinst: Pointer to the VCN instance structure * * Let the VCN memory controller know it's offsets */ -static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) +static void vcn_v2_0_mc_resume(struct amdgpu_vcn_inst *vinst) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + struct amdgpu_device *adev = vinst->adev; + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; if (amdgpu_sriov_vf(adev)) @@ -426,9 +429,11 @@ static void vcn_v2_0_mc_resume(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_GFX10_ADDR_CONFIG, adev->gfx.config.gb_addr_config); } -static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirect) +static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + struct amdgpu_device *adev = vinst->adev; + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -525,12 +530,13 @@ static void vcn_v2_0_mc_resume_dpg_mode(struct amdgpu_device *adev, bool indirec /** * vcn_v2_0_disable_clock_gating - disable VCN clock gating * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Disable clock gating for VCN block */ -static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev) +static void vcn_v2_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; uint32_t data; if (amdgpu_sriov_vf(adev)) @@ -634,9 +640,10 @@ static void vcn_v2_0_disable_clock_gating(struct amdgpu_device *adev) WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); } -static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, +static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst, uint8_t sram_sel, uint8_t indirect) { + struct amdgpu_device *adev = vinst->adev; uint32_t reg_data = 0; /* enable sw clock gating control */ @@ -685,12 +692,13 @@ static void vcn_v2_0_clock_gating_dpg_mode(struct amdgpu_device *adev, /** * vcn_v2_0_enable_clock_gating - enable VCN clock gating * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Enable clock gating for VCN block */ -static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev) +static void vcn_v2_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; uint32_t data = 0; if (amdgpu_sriov_vf(adev)) @@ -743,8 +751,9 @@ static void vcn_v2_0_enable_clock_gating(struct amdgpu_device *adev) WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); } -static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev) +static void vcn_v2_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; uint32_t data = 0; if (amdgpu_sriov_vf(adev)) @@ -792,8 +801,9 @@ static void vcn_v2_0_disable_static_power_gating(struct amdgpu_device *adev) WREG32_SOC15(VCN, 0, mmUVD_POWER_STATUS, data); } -static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) +static void vcn_v2_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; uint32_t data = 0; if (amdgpu_sriov_vf(adev)) @@ -834,13 +844,14 @@ static void vcn_v2_0_enable_static_power_gating(struct amdgpu_device *adev) } } -static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) +static int vcn_v2_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { + struct amdgpu_device *adev = vinst->adev; volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; - vcn_v2_0_enable_static_power_gating(adev); + vcn_v2_0_enable_static_power_gating(vinst); /* enable dynamic power gating mode */ tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); @@ -852,7 +863,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) adev->vcn.inst->dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst->dpg_sram_cpu_addr; /* enable clock gating */ - vcn_v2_0_clock_gating_dpg_mode(adev, 0, indirect); + vcn_v2_0_clock_gating_dpg_mode(vinst, 0, indirect); /* enable VCPU clock */ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); @@ -901,7 +912,7 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); - vcn_v2_0_mc_resume_dpg_mode(adev, indirect); + vcn_v2_0_mc_resume_dpg_mode(vinst, indirect); WREG32_SOC15_DPG_MODE(0, SOC15_DPG_MODE_OFFSET( UVD, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); @@ -969,8 +980,9 @@ static int vcn_v2_0_start_dpg_mode(struct amdgpu_device *adev, bool indirect) return 0; } -static int vcn_v2_0_start(struct amdgpu_device *adev) +static int vcn_v2_0_start(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst->fw_shared.cpu_addr; struct amdgpu_ring *ring = &adev->vcn.inst->ring_dec; uint32_t rb_bufsz, tmp; @@ -978,19 +990,19 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) int i, j, r; if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + amdgpu_dpm_enable_vcn(adev, true, 0); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - return vcn_v2_0_start_dpg_mode(adev, adev->vcn.indirect_sram); + return vcn_v2_0_start_dpg_mode(vinst, adev->vcn.inst->indirect_sram); - vcn_v2_0_disable_static_power_gating(adev); + vcn_v2_0_disable_static_power_gating(vinst); /* set uvd status busy */ tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp); /*SW clock gating */ - vcn_v2_0_disable_clock_gating(adev); + vcn_v2_0_disable_clock_gating(vinst); /* enable VCPU clock */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), @@ -1034,7 +1046,7 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); - vcn_v2_0_mc_resume(adev); + vcn_v2_0_mc_resume(vinst); /* release VCPU reset to boot */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, @@ -1142,12 +1154,13 @@ static int vcn_v2_0_start(struct amdgpu_device *adev) return 0; } -static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev) +static int vcn_v2_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; - vcn_v2_0_pause_dpg_mode(adev, 0, &state); + vcn_v2_0_pause_dpg_mode(vinst, &state); /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -1172,13 +1185,14 @@ static int vcn_v2_0_stop_dpg_mode(struct amdgpu_device *adev) return 0; } -static int vcn_v2_0_stop(struct amdgpu_device *adev) +static int vcn_v2_0_stop(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; uint32_t tmp; int r; if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v2_0_stop_dpg_mode(adev); + r = vcn_v2_0_stop_dpg_mode(vinst); if (r) return r; goto power_off; @@ -1230,19 +1244,21 @@ static int vcn_v2_0_stop(struct amdgpu_device *adev) /* clear status */ WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0); - vcn_v2_0_enable_clock_gating(adev); - vcn_v2_0_enable_static_power_gating(adev); + vcn_v2_0_enable_clock_gating(vinst); + vcn_v2_0_enable_static_power_gating(vinst); power_off: if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, 0); return 0; } -static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state) +static int vcn_v2_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; struct amdgpu_ring *ring; uint32_t reg_data = 0; int ret_code; @@ -1317,9 +1333,9 @@ static int vcn_v2_0_pause_dpg_mode(struct amdgpu_device *adev, return 0; } -static bool vcn_v2_0_is_idle(void *handle) +static bool vcn_v2_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE); } @@ -1335,10 +1351,10 @@ static int vcn_v2_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v2_0_set_clockgating_state(void *handle, +static int vcn_v2_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); if (amdgpu_sriov_vf(adev)) @@ -1346,12 +1362,12 @@ static int vcn_v2_0_set_clockgating_state(void *handle, if (enable) { /* wait for STATUS to clear */ - if (!vcn_v2_0_is_idle(handle)) + if (!vcn_v2_0_is_idle(ip_block)) return -EBUSY; - vcn_v2_0_enable_clock_gating(adev); + vcn_v2_0_enable_clock_gating(&adev->vcn.inst[0]); } else { /* disable HW gating and enable Sw gating */ - vcn_v2_0_disable_clock_gating(adev); + vcn_v2_0_disable_clock_gating(&adev->vcn.inst[0]); } return 0; } @@ -1421,9 +1437,9 @@ void vcn_v2_0_dec_ring_insert_start(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); } @@ -1438,7 +1454,7 @@ void vcn_v2_0_dec_ring_insert_end(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[0].internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_END << 1)); } @@ -1458,7 +1474,7 @@ void vcn_v2_0_dec_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) WARN_ON(ring->wptr % 2 || count % 2); for (i = 0; i < count / 2; i++) { - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.nop, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.nop, 0)); amdgpu_ring_write(ring, 0); } } @@ -1479,25 +1495,25 @@ void vcn_v2_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, struct amdgpu_device *adev = ring->adev; WARN_ON(flags & AMDGPU_FENCE_FLAG_64BIT); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.context_id, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.context_id, 0)); amdgpu_ring_write(ring, seq); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0)); amdgpu_ring_write(ring, addr & 0xffffffff); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0)); amdgpu_ring_write(ring, upper_32_bits(addr) & 0xff); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_FENCE << 1)); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0)); amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_TRAP << 1)); } @@ -1520,14 +1536,14 @@ void vcn_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; unsigned vmid = AMDGPU_JOB_GET_VMID(job); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_vmid, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_vmid, 0)); amdgpu_ring_write(ring, vmid); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_low, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_bar_low, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_bar_high, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_bar_high, 0)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.ib_size, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.ib_size, 0)); amdgpu_ring_write(ring, ib->length_dw); } @@ -1536,16 +1552,16 @@ void vcn_v2_0_dec_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, { struct amdgpu_device *adev = ring->adev; - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0)); amdgpu_ring_write(ring, reg << 2); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0)); amdgpu_ring_write(ring, val); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.gp_scratch8, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.gp_scratch8, 0)); amdgpu_ring_write(ring, mask); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_REG_READ_COND_WAIT << 1)); } @@ -1570,13 +1586,13 @@ void vcn_v2_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data0, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data0, 0)); amdgpu_ring_write(ring, reg << 2); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.data1, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.data1, 0)); amdgpu_ring_write(ring, val); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_WRITE_REG << 1)); } @@ -1777,9 +1793,9 @@ int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) r = amdgpu_ring_alloc(ring, 4); if (r) return r; - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.cmd, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.cmd, 0)); amdgpu_ring_write(ring, VCN_DEC_KMD_CMD | (VCN_DEC_CMD_PACKET_START << 1)); - amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0)); + amdgpu_ring_write(ring, PACKET0(adev->vcn.inst[ring->me].internal.scratch9, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { @@ -1796,8 +1812,8 @@ int vcn_v2_0_dec_ring_test_ring(struct amdgpu_ring *ring) } -static int vcn_v2_0_set_powergating_state(void *handle, - enum amd_powergating_state state) +static int vcn_v2_0_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state) { /* This doesn't actually powergate the VCN block. * That's done in the dpm code via the SMC. This @@ -1807,23 +1823,24 @@ static int vcn_v2_0_set_powergating_state(void *handle, * the smc and the hw blocks */ int ret; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = vinst->adev; if (amdgpu_sriov_vf(adev)) { - adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + vinst->cur_state = AMD_PG_STATE_UNGATE; return 0; } - if (state == adev->vcn.cur_state) + if (state == vinst->cur_state) return 0; if (state == AMD_PG_STATE_GATE) - ret = vcn_v2_0_stop(adev); + ret = vcn_v2_0_stop(vinst); else - ret = vcn_v2_0_start(adev); + ret = vcn_v2_0_start(vinst); if (!ret) - adev->vcn.cur_state = state; + vinst->cur_state = state; + return ret; } @@ -1862,7 +1879,7 @@ static int vcn_v2_0_start_mmsch(struct amdgpu_device *adev, adev->vcn.inst->ring_dec.wptr_old = 0; vcn_v2_0_dec_ring_set_wptr(&adev->vcn.inst->ring_dec); - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) { adev->vcn.inst->ring_enc[i].wptr = 0; adev->vcn.inst->ring_enc[i].wptr_old = 0; vcn_v2_0_enc_ring_set_wptr(&adev->vcn.inst->ring_enc[i]); @@ -1920,7 +1937,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) init_table += header->vcn_table_offset; - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[0]->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[0].fw->size + 4); MMSCH_V2_0_INSERT_DIRECT_RD_MOD_WT( SOC15_REG_OFFSET(UVD, i, mmUVD_STATUS), @@ -1988,7 +2005,7 @@ static int vcn_v2_0_start_sriov(struct amdgpu_device *adev) SOC15_REG_OFFSET(UVD, i, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE); - for (r = 0; r < adev->vcn.num_enc_rings; ++r) { + for (r = 0; r < adev->vcn.inst[0].num_enc_rings; ++r) { ring = &adev->vcn.inst->ring_enc[r]; ring->wptr = 0; MMSCH_V2_0_INSERT_DIRECT_WT( @@ -2104,7 +2121,7 @@ static const struct amd_ip_funcs vcn_v2_0_ip_funcs = { .is_idle = vcn_v2_0_is_idle, .wait_for_idle = vcn_v2_0_wait_for_idle, .set_clockgating_state = vcn_v2_0_set_clockgating_state, - .set_powergating_state = vcn_v2_0_set_powergating_state, + .set_powergating_state = vcn_set_powergating_state, .dump_ip_state = vcn_v2_0_dump_ip_state, .print_ip_state = vcn_v2_0_print_ip_state, }; @@ -2177,7 +2194,7 @@ static void vcn_v2_0_set_enc_ring_funcs(struct amdgpu_device *adev) { int i; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) + for (i = 0; i < adev->vcn.inst[0].num_enc_rings; ++i) adev->vcn.inst->ring_enc[i].funcs = &vcn_v2_0_enc_ring_vm_funcs; } @@ -2188,7 +2205,7 @@ static const struct amdgpu_irq_src_funcs vcn_v2_0_irq_funcs = { static void vcn_v2_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.inst->irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst->irq.num_types = adev->vcn.inst[0].num_enc_rings + 1; adev->vcn.inst->irq.funcs = &vcn_v2_0_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 6aa08281d094..d716510b8dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -95,10 +95,10 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_2_5[] = { static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v2_5_set_powergating_state(void *handle, - enum amd_powergating_state state); -static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state); +static int vcn_v2_5_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state); static int vcn_v2_5_sriov_start(struct amdgpu_device *adev); static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev); @@ -107,6 +107,133 @@ static int amdgpu_ih_clientid_vcns[] = { SOC15_IH_CLIENTID_VCN1 }; +static void vcn_v2_5_idle_work_handler(struct work_struct *work) +{ + struct amdgpu_vcn_inst *vcn_inst = + container_of(work, struct amdgpu_vcn_inst, idle_work.work); + struct amdgpu_device *adev = vcn_inst->adev; + unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0}; + unsigned int i, j; + int r = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *v = &adev->vcn.inst[i]; + + if (adev->vcn.harvest_config & (1 << i)) + continue; + + for (j = 0; j < v->num_enc_rings; ++j) + fence[i] += amdgpu_fence_count_emitted(&v->ring_enc[j]); + + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !v->using_unified_queue) { + struct dpg_pause_state new_state; + + if (fence[i] || + unlikely(atomic_read(&v->dpg_enc_submission_cnt))) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + + v->pause_dpg_mode(v, &new_state); + } + + fence[i] += amdgpu_fence_count_emitted(&v->ring_dec); + fences += fence[i]; + + } + + if (!fences && !atomic_read(&adev->vcn.inst[0].total_submission_cnt)) { + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_GATE); + mutex_lock(&adev->vcn.workload_profile_mutex); + if (adev->vcn.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + false); + if (r) + dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r); + adev->vcn.workload_profile_active = false; + } + mutex_unlock(&adev->vcn.workload_profile_mutex); + } else { + schedule_delayed_work(&adev->vcn.inst[0].idle_work, VCN_IDLE_TIMEOUT); + } +} + +static void vcn_v2_5_ring_begin_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_vcn_inst *v = &adev->vcn.inst[ring->me]; + int r = 0; + + atomic_inc(&adev->vcn.inst[0].total_submission_cnt); + + cancel_delayed_work_sync(&adev->vcn.inst[0].idle_work); + + /* We can safely return early here because we've cancelled the + * the delayed work so there is no one else to set it to false + * and we don't care if someone else sets it to true. + */ + if (adev->vcn.workload_profile_active) + goto pg_lock; + + mutex_lock(&adev->vcn.workload_profile_mutex); + if (!adev->vcn.workload_profile_active) { + r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO, + true); + if (r) + dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r); + adev->vcn.workload_profile_active = true; + } + mutex_unlock(&adev->vcn.workload_profile_mutex); + +pg_lock: + mutex_lock(&adev->vcn.inst[0].vcn_pg_lock); + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, + AMD_PG_STATE_UNGATE); + + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !v->using_unified_queue) { + struct dpg_pause_state new_state; + + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { + atomic_inc(&v->dpg_enc_submission_cnt); + new_state.fw_based = VCN_DPG_STATE__PAUSE; + } else { + unsigned int fences = 0; + unsigned int i; + + for (i = 0; i < v->num_enc_rings; ++i) + fences += amdgpu_fence_count_emitted(&v->ring_enc[i]); + + if (fences || atomic_read(&v->dpg_enc_submission_cnt)) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + } + v->pause_dpg_mode(v, &new_state); + } + mutex_unlock(&adev->vcn.inst[0].vcn_pg_lock); +} + +static void vcn_v2_5_ring_end_use(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && + !adev->vcn.inst[ring->me].using_unified_queue) + atomic_dec(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt); + + atomic_dec(&adev->vcn.inst[0].total_submission_cnt); + + schedule_delayed_work(&adev->vcn.inst[0].idle_work, + VCN_IDLE_TIMEOUT); +} + /** * vcn_v2_5_early_init - set function pointers and load microcode * @@ -118,11 +245,13 @@ static int amdgpu_ih_clientid_vcns[] = { static int vcn_v2_5_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int i, r; if (amdgpu_sriov_vf(adev)) { adev->vcn.num_vcn_inst = 2; adev->vcn.harvest_config = 0; - adev->vcn.num_enc_rings = 1; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + adev->vcn.inst[i].num_enc_rings = 1; } else { u32 harvest; int i; @@ -131,13 +260,12 @@ static int vcn_v2_5_early_init(struct amdgpu_ip_block *ip_block) harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING); if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK) adev->vcn.harvest_config |= 1 << i; + adev->vcn.inst[i].num_enc_rings = 2; } if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | - AMDGPU_VCN_HARVEST_VCN1)) + AMDGPU_VCN_HARVEST_VCN1)) /* both instances are harvested, disable the block */ return -ENOENT; - - adev->vcn.num_enc_rings = 2; } vcn_v2_5_set_dec_ring_funcs(adev); @@ -145,7 +273,15 @@ static int vcn_v2_5_early_init(struct amdgpu_ip_block *ip_block) vcn_v2_5_set_irq_funcs(adev); vcn_v2_5_set_ras_funcs(adev); - return amdgpu_vcn_early_init(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + adev->vcn.inst[i].set_pg_state = vcn_v2_5_set_pg_state; + + r = amdgpu_vcn_early_init(adev, i); + if (r) + return r; + } + + return 0; } /** @@ -164,6 +300,8 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; for (j = 0; j < adev->vcn.num_vcn_inst; j++) { + volatile struct amdgpu_fw_shared *fw_shared; + if (adev->vcn.harvest_config & (1 << j)) continue; /* VCN DEC TRAP */ @@ -173,7 +311,7 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) return r; /* VCN ENC TRAP */ - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) { r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j], i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq); if (r) @@ -185,39 +323,36 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) VCN_2_6__SRCID_UVD_POISON, &adev->vcn.inst[j].ras_poison_irq); if (r) return r; - } - r = amdgpu_vcn_sw_init(adev); - if (r) - return r; + r = amdgpu_vcn_sw_init(adev, j); + if (r) + return r; - amdgpu_vcn_setup_ucode(adev); + /* Override the work func */ + adev->vcn.inst[j].idle_work.work.func = vcn_v2_5_idle_work_handler; - r = amdgpu_vcn_resume(adev); - if (r) - return r; + amdgpu_vcn_setup_ucode(adev, j); - for (j = 0; j < adev->vcn.num_vcn_inst; j++) { - volatile struct amdgpu_fw_shared *fw_shared; + r = amdgpu_vcn_resume(adev, j); + if (r) + return r; - if (adev->vcn.harvest_config & (1 << j)) - continue; - adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; - adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; - adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; - adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; - adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; - adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; - - adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; + adev->vcn.inst[j].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.inst[j].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.inst[j].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.inst[j].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.inst[j].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.inst[j].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + + adev->vcn.inst[j].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9); - adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; + adev->vcn.inst[j].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0); - adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; + adev->vcn.inst[j].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1); - adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; + adev->vcn.inst[j].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD); - adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; + adev->vcn.inst[j].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP); ring = &adev->vcn.inst[j].ring_dec; @@ -237,7 +372,7 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) { enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i); ring = &adev->vcn.inst[j].ring_enc[i]; @@ -265,6 +400,9 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.inst[j].pause_dpg_mode = vcn_v2_5_pause_dpg_mode; } if (amdgpu_sriov_vf(adev)) { @@ -273,9 +411,6 @@ static int vcn_v2_5_sw_init(struct amdgpu_ip_block *ip_block) return r; } - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode; - r = amdgpu_vcn_ras_sw_init(adev); if (r) return r; @@ -319,15 +454,18 @@ static int vcn_v2_5_sw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); - r = amdgpu_vcn_suspend(adev); - if (r) - return r; - - r = amdgpu_vcn_sw_fini(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(adev, i); + if (r) + return r; + r = amdgpu_vcn_sw_fini(adev, i); + if (r) + return r; + } kfree(adev->vcn.ip_dump); - return r; + return 0; } /** @@ -366,7 +504,7 @@ static int vcn_v2_5_hw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) { ring = &adev->vcn.inst[j].ring_enc[i]; r = amdgpu_ring_test_helper(ring); if (r) @@ -390,19 +528,21 @@ static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int i; - cancel_delayed_work_sync(&adev->vcn.idle_work); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + if (adev->vcn.harvest_config & (1 << i)) continue; + cancel_delayed_work_sync(&vinst->idle_work); + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && + (vinst->cur_state != AMD_PG_STATE_GATE && RREG32_SOC15(VCN, i, mmUVD_STATUS))) - vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) - amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0); + amdgpu_irq_put(adev, &vinst->ras_poison_irq, 0); } return 0; @@ -417,15 +557,20 @@ static int vcn_v2_5_hw_fini(struct amdgpu_ip_block *ip_block) */ static int vcn_v2_5_suspend(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; r = vcn_v2_5_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(ip_block->adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(ip_block->adev, i); + if (r) + return r; + } - return r; + return 0; } /** @@ -437,11 +582,14 @@ static int vcn_v2_5_suspend(struct amdgpu_ip_block *ip_block) */ static int vcn_v2_5_resume(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; - r = amdgpu_vcn_resume(ip_block->adev); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_resume(ip_block->adev, i); + if (r) + return r; + } r = vcn_v2_5_hw_init(ip_block); @@ -451,70 +599,72 @@ static int vcn_v2_5_resume(struct amdgpu_ip_block *ip_block) /** * vcn_v2_5_mc_resume - memory controller programming * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Let the VCN memory controller know it's offsets */ -static void vcn_v2_5_mc_resume(struct amdgpu_device *adev) +static void vcn_v2_5_mc_resume(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; uint32_t size; uint32_t offset; - int i; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); - /* cache window 0: fw */ - if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { - WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo)); - WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi)); - WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0); - offset = 0; - } else { - WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[i].gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[i].gpu_addr)); - offset = size; - WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); - } - WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size); + if (adev->vcn.harvest_config & (1 << i)) + return; - /* cache window 1: stack */ - WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); - WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); - WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0); - WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); - - /* cache window 2: context */ - WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); - WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); - WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0); - WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); - - /* non-cache window */ - WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0); - WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0, - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo)); + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi)); + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); } + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset)); + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[i].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); } -static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -611,123 +761,124 @@ static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /** * vcn_v2_5_disable_clock_gating - disable VCN clock gating * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Disable clock gating for VCN block */ -static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev) +static void vcn_v2_5_disable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; uint32_t data; - int i; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - /* UVD disable CGC */ - data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); - if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) - data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - else - data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; - data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; - data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); - - data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE); - data &= ~(UVD_CGC_GATE__SYS_MASK - | UVD_CGC_GATE__UDEC_MASK - | UVD_CGC_GATE__MPEG2_MASK - | UVD_CGC_GATE__REGS_MASK - | UVD_CGC_GATE__RBC_MASK - | UVD_CGC_GATE__LMI_MC_MASK - | UVD_CGC_GATE__LMI_UMC_MASK - | UVD_CGC_GATE__IDCT_MASK - | UVD_CGC_GATE__MPRD_MASK - | UVD_CGC_GATE__MPC_MASK - | UVD_CGC_GATE__LBSI_MASK - | UVD_CGC_GATE__LRBBM_MASK - | UVD_CGC_GATE__UDEC_RE_MASK - | UVD_CGC_GATE__UDEC_CM_MASK - | UVD_CGC_GATE__UDEC_IT_MASK - | UVD_CGC_GATE__UDEC_DB_MASK - | UVD_CGC_GATE__UDEC_MP_MASK - | UVD_CGC_GATE__WCB_MASK - | UVD_CGC_GATE__VCPU_MASK - | UVD_CGC_GATE__MMSCH_MASK); - - WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); - - SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF); - - data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); - data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK - | UVD_CGC_CTRL__UDEC_CM_MODE_MASK - | UVD_CGC_CTRL__UDEC_IT_MODE_MASK - | UVD_CGC_CTRL__UDEC_DB_MODE_MASK - | UVD_CGC_CTRL__UDEC_MP_MODE_MASK - | UVD_CGC_CTRL__SYS_MODE_MASK - | UVD_CGC_CTRL__UDEC_MODE_MASK - | UVD_CGC_CTRL__MPEG2_MODE_MASK - | UVD_CGC_CTRL__REGS_MODE_MASK - | UVD_CGC_CTRL__RBC_MODE_MASK - | UVD_CGC_CTRL__LMI_MC_MODE_MASK - | UVD_CGC_CTRL__LMI_UMC_MODE_MASK - | UVD_CGC_CTRL__IDCT_MODE_MASK - | UVD_CGC_CTRL__MPRD_MODE_MASK - | UVD_CGC_CTRL__MPC_MODE_MASK - | UVD_CGC_CTRL__LBSI_MODE_MASK - | UVD_CGC_CTRL__LRBBM_MODE_MASK - | UVD_CGC_CTRL__WCB_MODE_MASK - | UVD_CGC_CTRL__VCPU_MODE_MASK - | UVD_CGC_CTRL__MMSCH_MODE_MASK); - WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); - - /* turn on */ - data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE); - data |= (UVD_SUVD_CGC_GATE__SRE_MASK - | UVD_SUVD_CGC_GATE__SIT_MASK - | UVD_SUVD_CGC_GATE__SMP_MASK - | UVD_SUVD_CGC_GATE__SCM_MASK - | UVD_SUVD_CGC_GATE__SDB_MASK - | UVD_SUVD_CGC_GATE__SRE_H264_MASK - | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK - | UVD_SUVD_CGC_GATE__SIT_H264_MASK - | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK - | UVD_SUVD_CGC_GATE__SCM_H264_MASK - | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK - | UVD_SUVD_CGC_GATE__SDB_H264_MASK - | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK - | UVD_SUVD_CGC_GATE__SCLR_MASK - | UVD_SUVD_CGC_GATE__UVD_SC_MASK - | UVD_SUVD_CGC_GATE__ENT_MASK - | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK - | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK - | UVD_SUVD_CGC_GATE__SITE_MASK - | UVD_SUVD_CGC_GATE__SRE_VP9_MASK - | UVD_SUVD_CGC_GATE__SCM_VP9_MASK - | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK - | UVD_SUVD_CGC_GATE__SDB_VP9_MASK - | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); - WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data); - - data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); - data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK - | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK - | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK - | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK - | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK - | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK - | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK - | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + if (adev->vcn.harvest_config & (1 << i)) + return; + /* UVD disable CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE); + data &= ~(UVD_CGC_GATE__SYS_MASK + | UVD_CGC_GATE__UDEC_MASK + | UVD_CGC_GATE__MPEG2_MASK + | UVD_CGC_GATE__REGS_MASK + | UVD_CGC_GATE__RBC_MASK + | UVD_CGC_GATE__LMI_MC_MASK + | UVD_CGC_GATE__LMI_UMC_MASK + | UVD_CGC_GATE__IDCT_MASK + | UVD_CGC_GATE__MPRD_MASK + | UVD_CGC_GATE__MPC_MASK + | UVD_CGC_GATE__LBSI_MASK + | UVD_CGC_GATE__LRBBM_MASK + | UVD_CGC_GATE__UDEC_RE_MASK + | UVD_CGC_GATE__UDEC_CM_MASK + | UVD_CGC_GATE__UDEC_IT_MASK + | UVD_CGC_GATE__UDEC_DB_MASK + | UVD_CGC_GATE__UDEC_MP_MASK + | UVD_CGC_GATE__WCB_MASK + | UVD_CGC_GATE__VCPU_MASK + | UVD_CGC_GATE__MMSCH_MASK); + + WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data); + + SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0, 0xFFFFFFFF); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK + | UVD_CGC_CTRL__MMSCH_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + /* turn on */ + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE); + data |= (UVD_SUVD_CGC_GATE__SRE_MASK + | UVD_SUVD_CGC_GATE__SIT_MASK + | UVD_SUVD_CGC_GATE__SMP_MASK + | UVD_SUVD_CGC_GATE__SCM_MASK + | UVD_SUVD_CGC_GATE__SDB_MASK + | UVD_SUVD_CGC_GATE__SRE_H264_MASK + | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK + | UVD_SUVD_CGC_GATE__SIT_H264_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCM_H264_MASK + | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK + | UVD_SUVD_CGC_GATE__SDB_H264_MASK + | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK + | UVD_SUVD_CGC_GATE__SCLR_MASK + | UVD_SUVD_CGC_GATE__UVD_SC_MASK + | UVD_SUVD_CGC_GATE__ENT_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK + | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK + | UVD_SUVD_CGC_GATE__SITE_MASK + | UVD_SUVD_CGC_GATE__SRE_VP9_MASK + | UVD_SUVD_CGC_GATE__SCM_VP9_MASK + | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK + | UVD_SUVD_CGC_GATE__SDB_VP9_MASK + | UVD_SUVD_CGC_GATE__IME_HEVC_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK | UVD_SUVD_CGC_CTRL__IME_MODE_MASK - | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); - WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); - } + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); } -static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev, - uint8_t sram_sel, int inst_idx, uint8_t indirect) +static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst, + uint8_t sram_sel, uint8_t indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t reg_data = 0; /* enable sw clock gating control */ @@ -776,68 +927,69 @@ static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev, /** * vcn_v2_5_enable_clock_gating - enable VCN clock gating * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Enable clock gating for VCN block */ -static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev) +static void vcn_v2_5_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; uint32_t data = 0; - int i; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - /* enable UVD CGC */ - data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); - if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) - data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - else - data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; - data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; - data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; - WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); - - data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); - data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK - | UVD_CGC_CTRL__UDEC_CM_MODE_MASK - | UVD_CGC_CTRL__UDEC_IT_MODE_MASK - | UVD_CGC_CTRL__UDEC_DB_MODE_MASK - | UVD_CGC_CTRL__UDEC_MP_MODE_MASK - | UVD_CGC_CTRL__SYS_MODE_MASK - | UVD_CGC_CTRL__UDEC_MODE_MASK - | UVD_CGC_CTRL__MPEG2_MODE_MASK - | UVD_CGC_CTRL__REGS_MODE_MASK - | UVD_CGC_CTRL__RBC_MODE_MASK - | UVD_CGC_CTRL__LMI_MC_MODE_MASK - | UVD_CGC_CTRL__LMI_UMC_MODE_MASK - | UVD_CGC_CTRL__IDCT_MODE_MASK - | UVD_CGC_CTRL__MPRD_MODE_MASK - | UVD_CGC_CTRL__MPC_MODE_MASK - | UVD_CGC_CTRL__LBSI_MODE_MASK - | UVD_CGC_CTRL__LRBBM_MODE_MASK - | UVD_CGC_CTRL__WCB_MODE_MASK - | UVD_CGC_CTRL__VCPU_MODE_MASK); - WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); - - data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); - data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK - | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK - | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK - | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK - | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK - | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK - | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK - | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK - | UVD_SUVD_CGC_CTRL__IME_MODE_MASK - | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); - WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); - } + if (adev->vcn.harvest_config & (1 << i)) + return; + /* enable UVD CGC */ + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL); + data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK + | UVD_CGC_CTRL__UDEC_CM_MODE_MASK + | UVD_CGC_CTRL__UDEC_IT_MODE_MASK + | UVD_CGC_CTRL__UDEC_DB_MODE_MASK + | UVD_CGC_CTRL__UDEC_MP_MODE_MASK + | UVD_CGC_CTRL__SYS_MODE_MASK + | UVD_CGC_CTRL__UDEC_MODE_MASK + | UVD_CGC_CTRL__MPEG2_MODE_MASK + | UVD_CGC_CTRL__REGS_MODE_MASK + | UVD_CGC_CTRL__RBC_MODE_MASK + | UVD_CGC_CTRL__LMI_MC_MODE_MASK + | UVD_CGC_CTRL__LMI_UMC_MODE_MASK + | UVD_CGC_CTRL__IDCT_MODE_MASK + | UVD_CGC_CTRL__MPRD_MODE_MASK + | UVD_CGC_CTRL__MPC_MODE_MASK + | UVD_CGC_CTRL__LBSI_MODE_MASK + | UVD_CGC_CTRL__LRBBM_MODE_MASK + | UVD_CGC_CTRL__WCB_MODE_MASK + | UVD_CGC_CTRL__VCPU_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data); + + data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL); + data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK + | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK + | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK + | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK + | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK + | UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK + | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK + | UVD_SUVD_CGC_CTRL__IME_MODE_MASK + | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK); + WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data); } -static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx, +static void vcn_v2_6_enable_ras(struct amdgpu_vcn_inst *vinst, bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t tmp; if (amdgpu_ip_version(adev, UVD_HWIP, 0) != IP_VERSION(2, 6, 0)) @@ -862,8 +1014,10 @@ static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx, tmp, 0, indirect); } -static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static int vcn_v2_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -881,7 +1035,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; /* enable clock gating */ - vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + vcn_v2_5_clock_gating_dpg_mode(vinst, 0, indirect); /* enable VCPU clock */ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); @@ -930,7 +1084,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); - vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect); + vcn_v2_5_mc_resume_dpg_mode(vinst, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect); @@ -941,7 +1095,7 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect); - vcn_v2_6_enable_ras(adev, inst_idx, indirect); + vcn_v2_6_enable_ras(vinst, indirect); /* unblock VCPU register access */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( @@ -1006,195 +1160,187 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo return 0; } -static int vcn_v2_5_start(struct amdgpu_device *adev) +static int vcn_v2_5_start(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; + volatile struct amdgpu_fw_shared *fw_shared = + adev->vcn.inst[i].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; - int i, j, k, r; + int j, k, r; + + if (adev->vcn.harvest_config & (1 << i)) + return 0; if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + amdgpu_dpm_enable_vcn(adev, true, i); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram); - continue; - } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v2_5_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram); - /* disable register anti-hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0, - ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); - /* set uvd status busy */ - tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; - WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp); - } + /* set uvd status busy */ + tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp); if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) return 0; - /*SW clock gating */ - vcn_v2_5_disable_clock_gating(adev); + /* SW clock gating */ + vcn_v2_5_disable_clock_gating(vinst); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - /* enable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); - - /* disable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); - - /* setup mmUVD_LMI_CTRL */ - tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL); - tmp &= ~0xff; - WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8| - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__MASK_MC_URGENT_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); - - /* setup mmUVD_MPC_CNTL */ - tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL); - tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; - tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; - WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); - - /* setup UVD_MPC_SET_MUXA0 */ - WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0, - ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); - - /* setup UVD_MPC_SET_MUXB0 */ - WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0, - ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); - - /* setup mmUVD_MPC_SET_MUX */ - WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX, - ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | - (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); - } + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); - vcn_v2_5_mc_resume(adev); + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - if (adev->vcn.harvest_config & (1 << i)) - continue; - /* VCN global tiling registers */ - WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); + /* setup mmUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL); + tmp &= ~0xff; + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8| + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup mmUVD_MPC_CNTL */ + tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup mmUVD_MPC_SET_MUX */ + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + + vcn_v2_5_mc_resume(vinst); - /* enable LMI MC and UMC channels */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + /* VCN global tiling registers */ + WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); - /* unblock VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - - for (k = 0; k < 10; ++k) { - uint32_t status; - - for (j = 0; j < 100; ++j) { - status = RREG32_SOC15(VCN, i, mmUVD_STATUS); - if (status & 2) - break; - if (amdgpu_emu_mode == 1) - msleep(500); - else - mdelay(10); - } - r = 0; + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (k = 0; k < 10; ++k) { + uint32_t status; + + for (j = 0; j < 100; ++j) { + status = RREG32_SOC15(VCN, i, mmUVD_STATUS); if (status & 2) break; + if (amdgpu_emu_mode == 1) + msleep(500); + else + mdelay(10); + } + r = 0; + if (status & 2) + break; - DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - mdelay(10); - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); - mdelay(10); - r = -1; - } + mdelay(10); + r = -1; + } - if (r) { - DRM_ERROR("VCN decode not responding, giving up!!!\n"); - return r; - } + if (r) { + DRM_ERROR("VCN decode not responding, giving up!!!\n"); + return r; + } - /* enable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), - UVD_MASTINT_EN__VCPU_EN_MASK, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); - /* clear the busy bit of VCN_STATUS */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0); + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0); - ring = &adev->vcn.inst[i].ring_dec; - /* force RBC into idle state */ - rb_bufsz = order_base_2(ring->ring_size); - tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); - WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); + ring = &adev->vcn.inst[i].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); - fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET; - /* program the RB_BASE for ring buffer */ - WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); + fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET; + /* program the RB_BASE for ring buffer */ + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); - /* Initialize the ring buffer's read and write pointers */ - WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0); - ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); - WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, - lower_32_bits(ring->wptr)); - fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET; + ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET; - fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET; - ring = &adev->vcn.inst[i].ring_enc[0]; - WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); - fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET; - - fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET; - ring = &adev->vcn.inst[i].ring_enc[1]; - WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); - fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET; - } + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET; + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET; + + fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET; + ring = &adev->vcn.inst[i].ring_enc[1]; + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET; return 0; } @@ -1285,7 +1431,7 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); /* mc resume*/ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V1_0_INSERT_DIRECT_WT( @@ -1395,8 +1541,10 @@ static int vcn_v2_5_sriov_start(struct amdgpu_device *adev) return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table); } -static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +static int vcn_v2_5_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t tmp; /* Wait for power status to be 1 */ @@ -1423,77 +1571,81 @@ static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) return 0; } -static int vcn_v2_5_stop(struct amdgpu_device *adev) +static int vcn_v2_5_stop(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; uint32_t tmp; - int i, r = 0; + int r; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v2_5_stop_dpg_mode(adev, i); - continue; - } + if (adev->vcn.harvest_config & (1 << i)) + return 0; - /* wait for vcn idle */ - r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7); - if (r) - return r; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v2_5_stop_dpg_mode(vinst); + goto done; + } - tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | - UVD_LMI_STATUS__READ_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); - if (r) - return r; + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + goto done; - /* block LMI UMC channel */ - tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); - tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; - WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); + if (r) + goto done; - tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| - UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); - if (r) - return r; + /* block LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); + + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); + if (r) + goto done; - /* block VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), - UVD_RB_ARB_CTRL__VCPU_DIS_MASK, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - /* reset VCPU */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); - /* disable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, - ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); - /* clear status */ - WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); + /* clear status */ + WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); - vcn_v2_5_enable_clock_gating(adev); + vcn_v2_5_enable_clock_gating(vinst); - /* enable register anti-hang mechanism */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), - UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, - ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); - } + /* enable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); +done: if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, i); - return 0; + return r; } -static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state) +static int vcn_v2_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; struct amdgpu_ring *ring; uint32_t reg_data = 0; int ret_code = 0; @@ -1639,8 +1791,8 @@ static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = { .insert_start = vcn_v2_0_dec_ring_insert_start, .insert_end = vcn_v2_0_dec_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, + .begin_use = vcn_v2_5_ring_begin_use, + .end_use = vcn_v2_5_ring_end_use, .emit_wreg = vcn_v2_0_dec_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, @@ -1737,8 +1889,8 @@ static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = { .insert_nop = amdgpu_ring_insert_nop, .insert_end = vcn_v2_0_enc_ring_insert_end, .pad_ib = amdgpu_ring_generic_pad_ib, - .begin_use = amdgpu_vcn_ring_begin_use, - .end_use = amdgpu_vcn_ring_end_use, + .begin_use = vcn_v2_5_ring_begin_use, + .end_use = vcn_v2_5_ring_end_use, .emit_wreg = vcn_v2_0_enc_ring_emit_wreg, .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait, .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, @@ -1763,21 +1915,22 @@ static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev) for (j = 0; j < adev->vcn.num_vcn_inst; ++j) { if (adev->vcn.harvest_config & (1 << j)) continue; - for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + for (i = 0; i < adev->vcn.inst[j].num_enc_rings; ++i) { adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs; adev->vcn.inst[j].ring_enc[i].me = j; } } } -static bool vcn_v2_5_is_idle(void *handle) +static bool vcn_v2_5_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 1; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; + ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE); } @@ -1801,45 +1954,50 @@ static int vcn_v2_5_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v2_5_set_clockgating_state(void *handle, +static int vcn_v2_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE); + int i; if (amdgpu_sriov_vf(adev)) return 0; - if (enable) { - if (!vcn_v2_5_is_idle(handle)) - return -EBUSY; - vcn_v2_5_enable_clock_gating(adev); - } else { - vcn_v2_5_disable_clock_gating(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + if (enable) { + if (!vcn_v2_5_is_idle(ip_block)) + return -EBUSY; + vcn_v2_5_enable_clock_gating(vinst); + } else { + vcn_v2_5_disable_clock_gating(vinst); + } } return 0; } -static int vcn_v2_5_set_powergating_state(void *handle, - enum amd_powergating_state state) +static int vcn_v2_5_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = vinst->adev; int ret; if (amdgpu_sriov_vf(adev)) return 0; - if(state == adev->vcn.cur_state) + if (state == vinst->cur_state) return 0; if (state == AMD_PG_STATE_GATE) - ret = vcn_v2_5_stop(adev); + ret = vcn_v2_5_stop(vinst); else - ret = vcn_v2_5_start(adev); + ret = vcn_v2_5_start(vinst); - if(!ret) - adev->vcn.cur_state = state; + if (!ret) + vinst->cur_state = state; return ret; } @@ -1916,10 +2074,10 @@ static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1; adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs; - adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.inst[i].num_enc_rings + 1; adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v2_6_ras_irq_funcs; } } @@ -1996,7 +2154,7 @@ static const struct amd_ip_funcs vcn_v2_5_ip_funcs = { .is_idle = vcn_v2_5_is_idle, .wait_for_idle = vcn_v2_5_wait_for_idle, .set_clockgating_state = vcn_v2_5_set_clockgating_state, - .set_powergating_state = vcn_v2_5_set_powergating_state, + .set_powergating_state = vcn_set_powergating_state, .dump_ip_state = vcn_v2_5_dump_ip_state, .print_ip_state = vcn_v2_5_print_ip_state, }; @@ -2013,7 +2171,7 @@ static const struct amd_ip_funcs vcn_v2_6_ip_funcs = { .is_idle = vcn_v2_5_is_idle, .wait_for_idle = vcn_v2_5_wait_for_idle, .set_clockgating_state = vcn_v2_5_set_clockgating_state, - .set_powergating_state = vcn_v2_5_set_powergating_state, + .set_powergating_state = vcn_set_powergating_state, .dump_ip_state = vcn_v2_5_dump_ip_state, .print_ip_state = vcn_v2_5_print_ip_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 6732ad7f16f5..22ae1939476f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -105,10 +105,10 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev); static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v3_0_set_powergating_state(void *handle, - enum amd_powergating_state state); -static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state); +static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); +static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state); static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring); static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring); @@ -124,11 +124,13 @@ static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring); static int vcn_v3_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int i, r; if (amdgpu_sriov_vf(adev)) { adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID; adev->vcn.harvest_config = 0; - adev->vcn.num_enc_rings = 1; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) + adev->vcn.inst[i].num_enc_rings = 1; } else { if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 | @@ -136,18 +138,27 @@ static int vcn_v3_0_early_init(struct amdgpu_ip_block *ip_block) /* both instances are harvested, disable the block */ return -ENOENT; - if (amdgpu_ip_version(adev, UVD_HWIP, 0) == - IP_VERSION(3, 0, 33)) - adev->vcn.num_enc_rings = 0; - else - adev->vcn.num_enc_rings = 2; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == + IP_VERSION(3, 0, 33)) + adev->vcn.inst[i].num_enc_rings = 0; + else + adev->vcn.inst[i].num_enc_rings = 2; + } } vcn_v3_0_set_dec_ring_funcs(adev); vcn_v3_0_set_enc_ring_funcs(adev); vcn_v3_0_set_irq_funcs(adev); - return amdgpu_vcn_early_init(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + adev->vcn.inst[i].set_pg_state = vcn_v3_0_set_pg_state; + + r = amdgpu_vcn_early_init(adev, i); + if (r) + return r; + } + return 0; } /** @@ -166,16 +177,6 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) uint32_t *ptr; struct amdgpu_device *adev = ip_block->adev; - r = amdgpu_vcn_sw_init(adev); - if (r) - return r; - - amdgpu_vcn_setup_ucode(adev); - - r = amdgpu_vcn_resume(adev); - if (r) - return r; - /* * Note: doorbell assignment is fixed for SRIOV multiple VCN engines * Formula: @@ -195,22 +196,32 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; - adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; - adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; - adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; - adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; - adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + r = amdgpu_vcn_sw_init(adev, i); + if (r) + return r; - adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; + amdgpu_vcn_setup_ucode(adev, i); + + r = amdgpu_vcn_resume(adev, i); + if (r) + return r; + + adev->vcn.inst[i].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET; + adev->vcn.inst[i].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET; + adev->vcn.inst[i].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET; + adev->vcn.inst[i].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET; + adev->vcn.inst[i].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET; + adev->vcn.inst[i].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET; + + adev->vcn.inst[i].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET; adev->vcn.inst[i].external.scratch9 = SOC15_REG_OFFSET(VCN, i, mmUVD_SCRATCH9); - adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; + adev->vcn.inst[i].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET; adev->vcn.inst[i].external.data0 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA0); - adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; + adev->vcn.inst[i].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET; adev->vcn.inst[i].external.data1 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA1); - adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; + adev->vcn.inst[i].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET; adev->vcn.inst[i].external.cmd = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_CMD); - adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; + adev->vcn.inst[i].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET; adev->vcn.inst[i].external.nop = SOC15_REG_OFFSET(VCN, i, mmUVD_NO_OP); /* VCN DEC TRAP */ @@ -224,7 +235,7 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) ring = &adev->vcn.inst[i].ring_dec; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) { - ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1); + ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.inst[i].num_enc_rings + 1); } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i; } @@ -236,7 +247,7 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) { enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(j); /* VCN ENC TRAP */ @@ -248,7 +259,7 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) ring = &adev->vcn.inst[i].ring_enc[j]; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) { - ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1 + j; + ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.inst[i].num_enc_rings + 1) + 1 + j; } else { ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i; } @@ -274,6 +285,9 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.inst[i].pause_dpg_mode = vcn_v3_0_pause_dpg_mode; } if (amdgpu_sriov_vf(adev)) { @@ -281,8 +295,6 @@ static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block) if (r) return r; } - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - adev->vcn.pause_dpg_mode = vcn_v3_0_pause_dpg_mode; /* Allocate memory for VCN IP Dump buffer */ ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); @@ -325,14 +337,18 @@ static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); - r = amdgpu_vcn_suspend(adev); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(adev, i); + if (r) + return r; - r = amdgpu_vcn_sw_fini(adev); + r = amdgpu_vcn_sw_fini(adev, i); + if (r) + return r; + } kfree(adev->vcn.ip_dump); - return r; + return 0; } /** @@ -370,7 +386,7 @@ static int vcn_v3_0_hw_init(struct amdgpu_ip_block *ip_block) ring->sched.ready = true; } - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) { ring = &adev->vcn.inst[i].ring_enc[j]; if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) { ring->sched.ready = false; @@ -398,7 +414,7 @@ static int vcn_v3_0_hw_init(struct amdgpu_ip_block *ip_block) if (r) return r; - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) { ring = &adev->vcn.inst[i].ring_enc[j]; r = amdgpu_ring_test_helper(ring); if (r) @@ -422,17 +438,19 @@ static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int i; - cancel_delayed_work_sync(&adev->vcn.idle_work); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + if (adev->vcn.harvest_config & (1 << i)) continue; + cancel_delayed_work_sync(&vinst->idle_work); + if (!amdgpu_sriov_vf(adev)) { if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, i, mmUVD_STATUS))) { - vcn_v3_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + (vinst->cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, mmUVD_STATUS))) { + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); } } } @@ -449,15 +467,20 @@ static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block) */ static int vcn_v3_0_suspend(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; r = vcn_v3_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(ip_block->adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(ip_block->adev, i); + if (r) + return r; + } - return r; + return 0; } /** @@ -469,11 +492,14 @@ static int vcn_v3_0_suspend(struct amdgpu_ip_block *ip_block) */ static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; - r = amdgpu_vcn_resume(ip_block->adev); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_resume(ip_block->adev, i); + if (r) + return r; + } r = vcn_v3_0_hw_init(ip_block); @@ -483,14 +509,15 @@ static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block) /** * vcn_v3_0_mc_resume - memory controller programming * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Let the VCN memory controller know it's offsets */ -static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) +static void vcn_v3_0_mc_resume(struct amdgpu_vcn_inst *vinst) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst]->size + 4); + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -538,9 +565,12 @@ static void vcn_v3_0_mc_resume(struct amdgpu_device *adev, int inst) AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared))); } -static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) { - uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[inst_idx]->size + 4); + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4); uint32_t offset; /* cache window 0: fw */ @@ -634,8 +664,10 @@ static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); } -static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) +static void vcn_v3_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data = 0; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { @@ -685,8 +717,10 @@ static void vcn_v3_0_disable_static_power_gating(struct amdgpu_device *adev, int WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data); } -static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int inst) +static void vcn_v3_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { @@ -733,13 +767,14 @@ static void vcn_v3_0_enable_static_power_gating(struct amdgpu_device *adev, int /** * vcn_v3_0_disable_clock_gating - disable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: Pointer to the VCN instance structure * * Disable clock gating for VCN block */ -static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst) +static void vcn_v3_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data; /* VCN disable CGC */ @@ -866,9 +901,12 @@ static void vcn_v3_0_disable_clock_gating(struct amdgpu_device *adev, int inst) WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data); } -static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev, - uint8_t sram_sel, int inst_idx, uint8_t indirect) +static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst, + uint8_t sram_sel, + uint8_t indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t reg_data = 0; /* enable sw clock gating control */ @@ -917,13 +955,14 @@ static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_device *adev, /** * vcn_v3_0_enable_clock_gating - enable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: Pointer to the VCN instance structure * * Enable clock gating for VCN block */ -static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst) +static void vcn_v3_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data; /* enable VCN CGC */ @@ -982,8 +1021,10 @@ static void vcn_v3_0_enable_clock_gating(struct amdgpu_device *adev, int inst) WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data); } -static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; @@ -1001,7 +1042,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; /* enable clock gating */ - vcn_v3_0_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + vcn_v3_0_clock_gating_dpg_mode(vinst, 0, indirect); /* enable VCPU clock */ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); @@ -1050,7 +1091,7 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); - vcn_v3_0_mc_resume_dpg_mode(adev, inst_idx, indirect); + vcn_v3_0_mc_resume_dpg_mode(vinst, indirect); WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect); @@ -1134,190 +1175,188 @@ static int vcn_v3_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo return 0; } -static int vcn_v3_0_start(struct amdgpu_device *adev) +static int vcn_v3_0_start(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; volatile struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t rb_bufsz, tmp; - int i, j, k, r; + int j, k, r; + + if (adev->vcn.harvest_config & (1 << i)) + return 0; if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + amdgpu_dpm_enable_vcn(adev, true, i); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v3_0_start_dpg_mode(vinst, vinst->indirect_sram); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v3_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); - continue; - } + /* disable VCN power gating */ + vcn_v3_0_disable_static_power_gating(vinst); - /* disable VCN power gating */ - vcn_v3_0_disable_static_power_gating(adev, i); + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp); - /* set VCN status busy */ - tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; - WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp); + /* SW clock gating */ + vcn_v3_0_disable_clock_gating(vinst); - /*SW clock gating */ - vcn_v3_0_disable_clock_gating(adev, i); + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); - /* enable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); - - /* disable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); - - /* enable LMI MC and UMC channels */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - - tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); - tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; - tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); - - /* setup mmUVD_LMI_CTRL */ - tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL); - WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__MASK_MC_URGENT_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); - - /* setup mmUVD_MPC_CNTL */ - tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL); - tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; - tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; - WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); - - /* setup UVD_MPC_SET_MUXA0 */ - WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0, - ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); - - /* setup UVD_MPC_SET_MUXB0 */ - WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0, - ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); - - /* setup mmUVD_MPC_SET_MUX */ - WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX, - ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | - (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); - - vcn_v3_0_mc_resume(adev, i); - - /* VCN global tiling registers */ - WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* unblock VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - - /* release VCPU reset to boot */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); - for (j = 0; j < 10; ++j) { - uint32_t status; + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - for (k = 0; k < 100; ++k) { - status = RREG32_SOC15(VCN, i, mmUVD_STATUS); - if (status & 2) - break; - mdelay(10); - } - r = 0; - if (status & 2) - break; + tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); - DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i); - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - mdelay(10); - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + /* setup mmUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL); + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup mmUVD_MPC_CNTL */ + tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup mmUVD_MPC_SET_MUX */ + WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + + vcn_v3_0_mc_resume(vinst); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, i, mmUVD_STATUS); + if (status & 2) + break; mdelay(10); - r = -1; } + r = 0; + if (status & 2) + break; - if (r) { - DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i); - return r; - } + DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); - /* enable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), - UVD_MASTINT_EN__VCPU_EN_MASK, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + mdelay(10); + r = -1; + } - /* clear the busy bit of VCN_STATUS */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + if (r) { + DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i); + return r; + } - WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0); + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); - ring = &adev->vcn.inst[i].ring_dec; - /* force RBC into idle state */ - rb_bufsz = order_base_2(ring->ring_size); - tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); - WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0); - /* programm the RB_BASE for ring buffer */ - WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, - upper_32_bits(ring->gpu_addr)); + ring = &adev->vcn.inst[i].ring_dec; + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp); - /* Initialize the ring buffer's read and write pointers */ - WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0); + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); - WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0); - ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); - WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, - lower_32_bits(ring->wptr)); - fw_shared->rb.wptr = lower_32_bits(ring->wptr); - fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - - if (amdgpu_ip_version(adev, UVD_HWIP, 0) != - IP_VERSION(3, 0, 33)) { - fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); - ring = &adev->vcn.inst[i].ring_enc[0]; - WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); - fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - - fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); - ring = &adev->vcn.inst[i].ring_enc[1]; - WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); - WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); - fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); - } + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0); + + WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0); + ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + fw_shared->rb.wptr = lower_32_bits(ring->wptr); + fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + if (amdgpu_ip_version(adev, UVD_HWIP, 0) != + IP_VERSION(3, 0, 33)) { + fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4); + fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); + + fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET); + ring = &adev->vcn.inst[i].ring_enc[1]; + WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4); + fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET); } return 0; @@ -1373,7 +1412,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) mmUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, @@ -1432,7 +1471,7 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE); - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) { ring = &adev->vcn.inst[i].ring_enc[j]; ring->wptr = 0; rb_addr = ring->gpu_addr; @@ -1532,12 +1571,14 @@ static int vcn_v3_0_start_sriov(struct amdgpu_device *adev) return 0; } -static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +static int vcn_v3_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; - vcn_v3_0_pause_dpg_mode(adev, inst_idx, &state); + vcn_v3_0_pause_dpg_mode(vinst, &state); /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1, @@ -1563,84 +1604,87 @@ static int vcn_v3_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) return 0; } -static int vcn_v3_0_stop(struct amdgpu_device *adev) +static int vcn_v3_0_stop(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; uint32_t tmp; - int i, r = 0; + int r = 0; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v3_0_stop_dpg_mode(adev, i); - continue; - } + if (adev->vcn.harvest_config & (1 << i)) + return 0; - /* wait for vcn idle */ - r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7); - if (r) - return r; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + r = vcn_v3_0_stop_dpg_mode(vinst); + goto done; + } - tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | - UVD_LMI_STATUS__READ_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); - if (r) - return r; + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + goto done; - /* disable LMI UMC channel */ - tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); - tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; - WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); - tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| - UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); - if (r) - return r; + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); + if (r) + goto done; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK| + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp); + if (r) + goto done; - /* block VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), - UVD_RB_ARB_CTRL__VCPU_DIS_MASK, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - /* reset VCPU */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); - /* disable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, - ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); - /* apply soft reset */ - tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); - tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); - tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); - tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp); - /* clear status */ - WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); + /* clear status */ + WREG32_SOC15(VCN, i, mmUVD_STATUS, 0); - /* apply HW clock gating */ - vcn_v3_0_enable_clock_gating(adev, i); + /* apply HW clock gating */ + vcn_v3_0_enable_clock_gating(vinst); - /* enable VCN power gating */ - vcn_v3_0_enable_static_power_gating(adev, i); - } + /* enable VCN power gating */ + vcn_v3_0_enable_static_power_gating(vinst); +done: if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, i); - return 0; + return r; } -static int vcn_v3_0_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state) +static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; volatile struct amdgpu_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t reg_data = 0; @@ -1924,11 +1968,11 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, uint32_t reg = amdgpu_ib_get_value(ib, i); uint32_t val = amdgpu_ib_get_value(ib, i + 1); - if (reg == PACKET0(p->adev->vcn.internal.data0, 0)) { + if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.data0, 0)) { msg_lo = val; - } else if (reg == PACKET0(p->adev->vcn.internal.data1, 0)) { + } else if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.data1, 0)) { msg_hi = val; - } else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) && + } else if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.cmd, 0) && val == 0) { r = vcn_v3_0_dec_msg(p, job, ((u64)msg_hi) << 32 | msg_lo); @@ -2092,16 +2136,16 @@ static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - for (j = 0; j < adev->vcn.num_enc_rings; ++j) { + for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) { adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs; adev->vcn.inst[i].ring_enc[j].me = i; } } } -static bool vcn_v3_0_is_idle(void *handle) +static bool vcn_v3_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 1; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -2132,54 +2176,55 @@ static int vcn_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block) return ret; } -static int vcn_v3_0_set_clockgating_state(void *handle, +static int vcn_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; if (adev->vcn.harvest_config & (1 << i)) continue; if (enable) { if (RREG32_SOC15(VCN, i, mmUVD_STATUS) != UVD_STATUS__IDLE) return -EBUSY; - vcn_v3_0_enable_clock_gating(adev, i); + vcn_v3_0_enable_clock_gating(vinst); } else { - vcn_v3_0_disable_clock_gating(adev, i); + vcn_v3_0_disable_clock_gating(vinst); } } return 0; } -static int vcn_v3_0_set_powergating_state(void *handle, - enum amd_powergating_state state) +static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret; + struct amdgpu_device *adev = vinst->adev; + int ret = 0; /* for SRIOV, guest should not control VCN Power-gating * MMSCH FW should control Power-gating and clock-gating * guest should avoid touching CGC and PG */ if (amdgpu_sriov_vf(adev)) { - adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + vinst->cur_state = AMD_PG_STATE_UNGATE; return 0; } - if (state == adev->vcn.cur_state) + if (state == vinst->cur_state) return 0; if (state == AMD_PG_STATE_GATE) - ret = vcn_v3_0_stop(adev); + ret = vcn_v3_0_stop(vinst); else - ret = vcn_v3_0_start(adev); + ret = vcn_v3_0_start(vinst); if (!ret) - adev->vcn.cur_state = state; + vinst->cur_state = state; return ret; } @@ -2244,7 +2289,7 @@ static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1; adev->vcn.inst[i].irq.funcs = &vcn_v3_0_irq_funcs; } } @@ -2322,7 +2367,7 @@ static const struct amd_ip_funcs vcn_v3_0_ip_funcs = { .is_idle = vcn_v3_0_is_idle, .wait_for_idle = vcn_v3_0_wait_for_idle, .set_clockgating_state = vcn_v3_0_set_clockgating_state, - .set_powergating_state = vcn_v3_0_set_powergating_state, + .set_powergating_state = vcn_set_powergating_state, .dump_ip_state = vcn_v3_0_dump_ip_state, .print_ip_state = vcn_v3_0_print_ip_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index fcc8511e91ee..c6f6392c1c20 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -96,10 +96,10 @@ static int amdgpu_ih_clientid_vcns[] = { static int vcn_v4_0_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v4_0_set_powergating_state(void *handle, - enum amd_powergating_state state); -static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state); +static int vcn_v4_0_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); +static int vcn_v4_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state); static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring); static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev); @@ -114,7 +114,7 @@ static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev); static int vcn_v4_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; - int i; + int i, r; if (amdgpu_sriov_vf(adev)) { adev->vcn.harvest_config = VCN_HARVEST_MMSCH; @@ -126,14 +126,23 @@ static int vcn_v4_0_early_init(struct amdgpu_ip_block *ip_block) } } - /* re-use enc ring as unified ring */ - adev->vcn.num_enc_rings = 1; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + /* re-use enc ring as unified ring */ + adev->vcn.inst[i].num_enc_rings = 1; vcn_v4_0_set_unified_ring_funcs(adev); vcn_v4_0_set_irq_funcs(adev); vcn_v4_0_set_ras_funcs(adev); - return amdgpu_vcn_early_init(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].set_pg_state = vcn_v4_0_set_pg_state; + + r = amdgpu_vcn_early_init(adev, i); + if (r) + return r; + } + + return 0; } static int vcn_v4_0_fw_shared_init(struct amdgpu_device *adev, int inst_idx) @@ -176,20 +185,20 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0); uint32_t *ptr; - r = amdgpu_vcn_sw_init(adev); - if (r) - return r; - - amdgpu_vcn_setup_ucode(adev); - - r = amdgpu_vcn_resume(adev); - if (r) - return r; - for (i = 0; i < adev->vcn.num_vcn_inst; i++) { if (adev->vcn.harvest_config & (1 << i)) continue; + r = amdgpu_vcn_sw_init(adev, i); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev, i); + + r = amdgpu_vcn_resume(adev, i); + if (r) + return r; + /* Init instance 0 sched_score to 1, so it's scheduled after other instances */ if (i == 0) atomic_set(&adev->vcn.inst[i].sched_score, 1); @@ -211,7 +220,8 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) ring = &adev->vcn.inst[i].ring_enc[0]; ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) - ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * + (adev->vcn.inst[i].num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; ring->vm_hub = AMDGPU_MMHUB0(0); @@ -223,6 +233,9 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) return r; vcn_v4_0_fw_shared_init(adev, i); + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_pause_dpg_mode; } /* TODO: Add queue reset mask when FW fully supports it */ @@ -235,8 +248,6 @@ static int vcn_v4_0_sw_init(struct amdgpu_ip_block *ip_block) return r; } - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - adev->vcn.pause_dpg_mode = vcn_v4_0_pause_dpg_mode; r = amdgpu_vcn_ras_sw_init(adev); if (r) @@ -288,16 +299,23 @@ static int vcn_v4_0_sw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); - r = amdgpu_vcn_suspend(adev); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(adev, i); + if (r) + return r; + } amdgpu_vcn_sysfs_reset_mask_fini(adev); - r = amdgpu_vcn_sw_fini(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_sw_fini(adev, i); + if (r) + return r; + } kfree(adev->vcn.ip_dump); - return r; + return 0; } /** @@ -359,20 +377,23 @@ static int vcn_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int i; - cancel_delayed_work_sync(&adev->vcn.idle_work); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + if (adev->vcn.harvest_config & (1 << i)) continue; + + cancel_delayed_work_sync(&vinst->idle_work); + if (!amdgpu_sriov_vf(adev)) { if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, i, regUVD_STATUS))) { - vcn_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + (vinst->cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, regUVD_STATUS))) { + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); } } if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) - amdgpu_irq_put(adev, &adev->vcn.inst[i].ras_poison_irq, 0); + amdgpu_irq_put(adev, &vinst->ras_poison_irq, 0); } return 0; @@ -387,15 +408,20 @@ static int vcn_v4_0_hw_fini(struct amdgpu_ip_block *ip_block) */ static int vcn_v4_0_suspend(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; r = vcn_v4_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(ip_block->adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(ip_block->adev, i); + if (r) + return r; + } - return r; + return 0; } /** @@ -407,11 +433,14 @@ static int vcn_v4_0_suspend(struct amdgpu_ip_block *ip_block) */ static int vcn_v4_0_resume(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; - r = amdgpu_vcn_resume(ip_block->adev); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_resume(ip_block->adev, i); + if (r) + return r; + } r = vcn_v4_0_hw_init(ip_block); @@ -421,17 +450,18 @@ static int vcn_v4_0_resume(struct amdgpu_ip_block *ip_block) /** * vcn_v4_0_mc_resume - memory controller programming * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Let the VCN memory controller know it's offsets */ -static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst) +static void vcn_v4_0_mc_resume(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -481,17 +511,19 @@ static void vcn_v4_0_mc_resume(struct amdgpu_device *adev, int inst) /** * vcn_v4_0_mc_resume_dpg_mode - memory controller programming for dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @indirect: indirectly write sram * * Let the VCN memory controller know it's offsets with dpg mode */ -static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -588,13 +620,14 @@ static void vcn_v4_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx /** * vcn_v4_0_disable_static_power_gating - disable VCN static power gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Disable static power gating for VCN block */ -static void vcn_v4_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) +static void vcn_v4_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data = 0; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { @@ -653,13 +686,14 @@ static void vcn_v4_0_disable_static_power_gating(struct amdgpu_device *adev, int /** * vcn_v4_0_enable_static_power_gating - enable VCN static power gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Enable static power gating for VCN block */ -static void vcn_v4_0_enable_static_power_gating(struct amdgpu_device *adev, int inst) +static void vcn_v4_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { @@ -708,13 +742,14 @@ static void vcn_v4_0_enable_static_power_gating(struct amdgpu_device *adev, int /** * vcn_v4_0_disable_clock_gating - disable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Disable clock gating for VCN block */ -static void vcn_v4_0_disable_clock_gating(struct amdgpu_device *adev, int inst) +static void vcn_v4_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data; if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) @@ -819,16 +854,18 @@ static void vcn_v4_0_disable_clock_gating(struct amdgpu_device *adev, int inst) /** * vcn_v4_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * @sram_sel: sram select - * @inst_idx: instance number index * @indirect: indirectly write sram * * Disable clock gating for VCN block with dpg mode */ -static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel, - int inst_idx, uint8_t indirect) +static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst, + uint8_t sram_sel, + uint8_t indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t reg_data = 0; if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) @@ -876,13 +913,14 @@ static void vcn_v4_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, u /** * vcn_v4_0_enable_clock_gating - enable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Enable clock gating for VCN block */ -static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst) +static void vcn_v4_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data; if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) @@ -932,9 +970,11 @@ static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst) WREG32_SOC15(VCN, inst, regUVD_SUVD_CGC_CTRL, data); } -static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx, +static void vcn_v4_0_enable_ras(struct amdgpu_vcn_inst *vinst, bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t tmp; if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) @@ -957,14 +997,15 @@ static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx, /** * vcn_v4_0_start_dpg_mode - VCN start with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @indirect: indirectly write sram * * Start VCN block with dpg mode */ -static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static int vcn_v4_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; @@ -982,7 +1023,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; /* enable clock gating */ - vcn_v4_0_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + vcn_v4_0_disable_clock_gating_dpg_mode(vinst, 0, indirect); /* enable VCPU clock */ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); @@ -1030,7 +1071,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); - vcn_v4_0_mc_resume_dpg_mode(adev, inst_idx, indirect); + vcn_v4_0_mc_resume_dpg_mode(vinst, indirect); tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; @@ -1042,7 +1083,7 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect); - vcn_v4_0_enable_ras(adev, inst_idx, indirect); + vcn_v4_0_enable_ras(vinst, indirect); /* enable master interrupt */ WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET( @@ -1086,181 +1127,179 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo /** * vcn_v4_0_start - VCN start * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Start VCN block */ -static int vcn_v4_0_start(struct amdgpu_device *adev) +static int vcn_v4_0_start(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; volatile struct amdgpu_vcn4_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; - int i, j, k, r; + int j, k, r; + + if (adev->vcn.harvest_config & (1 << i)) + return 0; if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + amdgpu_dpm_enable_vcn(adev, true, i); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v4_0_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v4_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); - continue; - } + /* disable VCN power gating */ + vcn_v4_0_disable_static_power_gating(vinst); - /* disable VCN power gating */ - vcn_v4_0_disable_static_power_gating(adev, i); - - /* set VCN status busy */ - tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; - WREG32_SOC15(VCN, i, regUVD_STATUS, tmp); - - /*SW clock gating */ - vcn_v4_0_disable_clock_gating(adev, i); - - /* enable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); - - /* disable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); - - /* enable LMI MC and UMC channels */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); - tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; - tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); - - /* setup regUVD_LMI_CTRL */ - tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); - WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__MASK_MC_URGENT_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); - - /* setup regUVD_MPC_CNTL */ - tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL); - tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; - tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; - WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp); - - /* setup UVD_MPC_SET_MUXA0 */ - WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0, - ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); - - /* setup UVD_MPC_SET_MUXB0 */ - WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0, - ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); - - /* setup UVD_MPC_SET_MUX */ - WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX, - ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | - (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); - - vcn_v4_0_mc_resume(adev, i); - - /* VCN global tiling registers */ - WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* unblock VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - - /* release VCPU reset to boot */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - - for (j = 0; j < 10; ++j) { - uint32_t status; - - for (k = 0; k < 100; ++k) { - status = RREG32_SOC15(VCN, i, regUVD_STATUS); - if (status & 2) - break; - mdelay(10); - if (amdgpu_emu_mode == 1) - msleep(1); - } + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, i, regUVD_STATUS, tmp); + + /*SW clock gating */ + vcn_v4_0_disable_clock_gating(vinst); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup regUVD_MPC_CNTL */ + tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup UVD_MPC_SET_MUX */ + WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + + vcn_v4_0_mc_resume(vinst); - if (amdgpu_emu_mode == 1) { - r = -1; - if (status & 2) { - r = 0; - break; - } - } else { + /* VCN global tiling registers */ + WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, i, regUVD_STATUS); + if (status & 2) + break; + mdelay(10); + if (amdgpu_emu_mode == 1) + msleep(1); + } + + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { r = 0; - if (status & 2) - break; - - dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - mdelay(10); - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - - mdelay(10); - r = -1; + break; } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; } + } - if (r) { - dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); - return r; - } + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } - /* enable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), - UVD_MASTINT_EN__VCPU_EN_MASK, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); - /* clear the busy bit of VCN_STATUS */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - ring = &adev->vcn.inst[i].ring_enc[0]; - WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, - ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); - - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; - WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); - - tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); - - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp |= VCN_RB_ENABLE__RB1_EN_MASK; - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); - fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); - } + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); return 0; } @@ -1341,7 +1380,7 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i, @@ -1516,17 +1555,18 @@ static int vcn_v4_0_start_sriov(struct amdgpu_device *adev) /** * vcn_v4_0_stop_dpg_mode - VCN stop with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * * Stop VCN block with dpg mode */ -static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +static void vcn_v4_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; - vcn_v4_0_pause_dpg_mode(adev, inst_idx, &state); + vcn_v4_0_pause_dpg_mode(vinst, &state); /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); @@ -1546,85 +1586,87 @@ static void vcn_v4_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) /** * vcn_v4_0_stop - VCN stop * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Stop VCN block */ -static int vcn_v4_0_stop(struct amdgpu_device *adev) +static int vcn_v4_0_stop(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; volatile struct amdgpu_vcn4_fw_shared *fw_shared; uint32_t tmp; - int i, r = 0; + int r = 0; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; - - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + if (adev->vcn.harvest_config & (1 << i)) + return 0; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - vcn_v4_0_stop_dpg_mode(adev, i); - continue; - } + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; - /* wait for vcn idle */ - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); - if (r) - return r; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v4_0_stop_dpg_mode(vinst); + r = 0; + goto done; + } - tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | - UVD_LMI_STATUS__READ_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); - if (r) - return r; + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + goto done; - /* disable LMI UMC channel */ - tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2); - tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; - WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp); - tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | - UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); - if (r) - return r; + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + goto done; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + goto done; - /* block VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), - UVD_RB_ARB_CTRL__VCPU_DIS_MASK, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - /* reset VCPU */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); - /* disable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); - /* apply soft reset */ - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); - tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); - tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); - /* clear status */ - WREG32_SOC15(VCN, i, regUVD_STATUS, 0); + /* clear status */ + WREG32_SOC15(VCN, i, regUVD_STATUS, 0); - /* apply HW clock gating */ - vcn_v4_0_enable_clock_gating(adev, i); + /* apply HW clock gating */ + vcn_v4_0_enable_clock_gating(vinst); - /* enable VCN power gating */ - vcn_v4_0_enable_static_power_gating(adev, i); - } + /* enable VCN power gating */ + vcn_v4_0_enable_static_power_gating(vinst); +done: if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, i); return 0; } @@ -1632,15 +1674,16 @@ static int vcn_v4_0_stop(struct amdgpu_device *adev) /** * vcn_v4_0_pause_dpg_mode - VCN pause with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @new_state: pause state * * Pause dpg mode for VCN block */ -static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, - struct dpg_pause_state *new_state) +static int vcn_v4_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t reg_data = 0; int ret_code; @@ -1960,13 +2003,13 @@ static void vcn_v4_0_set_unified_ring_funcs(struct amdgpu_device *adev) /** * vcn_v4_0_is_idle - check VCN block is idle * - * @handle: amdgpu_device pointer + * @ip_block: Pointer to the amdgpu_ip_block structure * * Check whether VCN block is idle */ -static bool vcn_v4_0_is_idle(void *handle) +static bool vcn_v4_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 1; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -2007,65 +2050,61 @@ static int vcn_v4_0_wait_for_idle(struct amdgpu_ip_block *ip_block) /** * vcn_v4_0_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v4_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) +static int vcn_v4_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + if (adev->vcn.harvest_config & (1 << i)) continue; if (enable) { if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE) return -EBUSY; - vcn_v4_0_enable_clock_gating(adev, i); + vcn_v4_0_enable_clock_gating(vinst); } else { - vcn_v4_0_disable_clock_gating(adev, i); + vcn_v4_0_disable_clock_gating(vinst); } } return 0; } -/** - * vcn_v4_0_set_powergating_state - set VCN block powergating state - * - * @handle: amdgpu_device pointer - * @state: power gating state - * - * Set VCN block powergating state - */ -static int vcn_v4_0_set_powergating_state(void *handle, enum amd_powergating_state state) +static int vcn_v4_0_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret; + struct amdgpu_device *adev = vinst->adev; + int ret = 0; /* for SRIOV, guest should not control VCN Power-gating * MMSCH FW should control Power-gating and clock-gating * guest should avoid touching CGC and PG */ if (amdgpu_sriov_vf(adev)) { - adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + vinst->cur_state = AMD_PG_STATE_UNGATE; return 0; } - if (state == adev->vcn.cur_state) + if (state == vinst->cur_state) return 0; if (state == AMD_PG_STATE_GATE) - ret = vcn_v4_0_stop(adev); + ret = vcn_v4_0_stop(vinst); else - ret = vcn_v4_0_start(adev); + ret = vcn_v4_0_start(vinst); if (!ret) - adev->vcn.cur_state = state; + vinst->cur_state = state; return ret; } @@ -2157,10 +2196,10 @@ static void vcn_v4_0_set_irq_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1; adev->vcn.inst[i].irq.funcs = &vcn_v4_0_irq_funcs; - adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].ras_poison_irq.num_types = adev->vcn.inst[i].num_enc_rings + 1; adev->vcn.inst[i].ras_poison_irq.funcs = &vcn_v4_0_ras_irq_funcs; } } @@ -2238,7 +2277,7 @@ static const struct amd_ip_funcs vcn_v4_0_ip_funcs = { .is_idle = vcn_v4_0_is_idle, .wait_for_idle = vcn_v4_0_wait_for_idle, .set_clockgating_state = vcn_v4_0_set_clockgating_state, - .set_powergating_state = vcn_v4_0_set_powergating_state, + .set_powergating_state = vcn_set_powergating_state, .dump_ip_state = vcn_v4_0_dump_ip_state, .print_ip_state = vcn_v4_0_print_ip_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c index 3f69b9b2bcd0..7446ecc55714 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c @@ -31,6 +31,7 @@ #include "soc15d.h" #include "soc15_hw_ip.h" #include "vcn_v2_0.h" +#include "vcn_v4_0_3.h" #include "mmsch_v4_0_3.h" #include "vcn/vcn_4_0_3_offset.h" @@ -87,10 +88,10 @@ static const struct amdgpu_hwip_reg_entry vcn_reg_list_4_0_3[] = { static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev); static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v4_0_3_set_powergating_state(void *handle, - enum amd_powergating_state state); -static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state); +static int vcn_v4_0_3_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); +static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state); static void vcn_v4_0_3_unified_ring_set_wptr(struct amdgpu_ring *ring); static void vcn_v4_0_3_set_ras_funcs(struct amdgpu_device *adev); static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev, @@ -98,8 +99,7 @@ static void vcn_v4_0_3_enable_ras(struct amdgpu_device *adev, static inline bool vcn_v4_0_3_normalizn_reqd(struct amdgpu_device *adev) { - return (amdgpu_sriov_vf(adev) || - (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 4))); + return (adev->vcn.caps & AMDGPU_VCN_CAPS(RRMT_ENABLED)) == 0; } /** @@ -112,15 +112,25 @@ static inline bool vcn_v4_0_3_normalizn_reqd(struct amdgpu_device *adev) static int vcn_v4_0_3_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int i, r; - /* re-use enc ring as unified ring */ - adev->vcn.num_enc_rings = 1; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + /* re-use enc ring as unified ring */ + adev->vcn.inst[i].num_enc_rings = 1; vcn_v4_0_3_set_unified_ring_funcs(adev); vcn_v4_0_3_set_irq_funcs(adev); vcn_v4_0_3_set_ras_funcs(adev); - return amdgpu_vcn_early_init(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].set_pg_state = vcn_v4_0_3_set_pg_state; + + r = amdgpu_vcn_early_init(adev, i); + if (r) + return r; + } + + return 0; } static int vcn_v4_0_3_fw_shared_init(struct amdgpu_device *adev, int inst_idx) @@ -152,16 +162,6 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_3); uint32_t *ptr; - r = amdgpu_vcn_sw_init(adev); - if (r) - return r; - - amdgpu_vcn_setup_ucode(adev); - - r = amdgpu_vcn_resume(adev); - if (r) - return r; - /* VCN DEC TRAP */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq); @@ -169,6 +169,17 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) return r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + + r = amdgpu_vcn_sw_init(adev, i); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev, i); + + r = amdgpu_vcn_resume(adev, i); + if (r) + return r; + vcn_inst = GET_INST(VCN, i); ring = &adev->vcn.inst[i].ring_enc[0]; @@ -192,6 +203,9 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) return r; vcn_v4_0_3_fw_shared_init(adev, i); + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; } /* TODO: Add queue reset mask when FW fully supports it */ @@ -204,9 +218,6 @@ static int vcn_v4_0_3_sw_init(struct amdgpu_ip_block *ip_block) return r; } - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - adev->vcn.pause_dpg_mode = vcn_v4_0_3_pause_dpg_mode; - if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) { r = amdgpu_vcn_ras_sw_init(adev); if (r) { @@ -257,16 +268,23 @@ static int vcn_v4_0_3_sw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); - r = amdgpu_vcn_suspend(adev); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(adev, i); + if (r) + return r; + } amdgpu_vcn_sysfs_reset_mask_fini(adev); - r = amdgpu_vcn_sw_fini(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_sw_fini(adev, i); + if (r) + return r; + } kfree(adev->vcn.ip_dump); - return r; + return 0; } /** @@ -295,6 +313,11 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) ring->sched.ready = true; } } else { + /* This flag is not set for VF, assumed to be disabled always */ + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & + 0x100) + adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { struct amdgpu_vcn4_fw_shared *fw_shared; @@ -345,11 +368,16 @@ static int vcn_v4_0_3_hw_init(struct amdgpu_ip_block *ip_block) static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int i; - cancel_delayed_work_sync(&adev->vcn.idle_work); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; - if (adev->vcn.cur_state != AMD_PG_STATE_GATE) - vcn_v4_0_3_set_powergating_state(adev, AMD_PG_STATE_GATE); + cancel_delayed_work_sync(&vinst->idle_work); + + if (vinst->cur_state != AMD_PG_STATE_GATE) + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); + } return 0; } @@ -363,15 +391,20 @@ static int vcn_v4_0_3_hw_fini(struct amdgpu_ip_block *ip_block) */ static int vcn_v4_0_3_suspend(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; r = vcn_v4_0_3_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(ip_block->adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(adev, i); + if (r) + return r; + } - return r; + return 0; } /** @@ -383,11 +416,14 @@ static int vcn_v4_0_3_suspend(struct amdgpu_ip_block *ip_block) */ static int vcn_v4_0_3_resume(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; - r = amdgpu_vcn_resume(ip_block->adev); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_resume(ip_block->adev, i); + if (r) + return r; + } r = vcn_v4_0_3_hw_init(ip_block); @@ -397,17 +433,18 @@ static int vcn_v4_0_3_resume(struct amdgpu_ip_block *ip_block) /** * vcn_v4_0_3_mc_resume - memory controller programming * - * @adev: amdgpu_device pointer - * @inst_idx: instance number + * @vinst: VCN instance * * Let the VCN memory controller know it's offsets */ -static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx) +static void vcn_v4_0_3_mc_resume(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t offset, size, vcn_inst; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); vcn_inst = GET_INST(VCN, inst_idx); @@ -471,18 +508,20 @@ static void vcn_v4_0_3_mc_resume(struct amdgpu_device *adev, int inst_idx) /** * vcn_v4_0_3_mc_resume_dpg_mode - memory controller programming for dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @indirect: indirectly write sram * * Let the VCN memory controller know it's offsets with dpg mode */ -static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -585,13 +624,14 @@ static void vcn_v4_0_3_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i /** * vcn_v4_0_3_disable_clock_gating - disable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst_idx: instance number + * @vinst: VCN instance * * Disable clock gating for VCN block */ -static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst_idx) +static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t data; int vcn_inst; @@ -678,16 +718,18 @@ static void vcn_v4_0_3_disable_clock_gating(struct amdgpu_device *adev, int inst /** * vcn_v4_0_3_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * @sram_sel: sram select - * @inst_idx: instance number index * @indirect: indirectly write sram * * Disable clock gating for VCN block with dpg mode */ -static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel, - int inst_idx, uint8_t indirect) +static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst, + uint8_t sram_sel, + uint8_t indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t reg_data = 0; if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) @@ -729,13 +771,14 @@ static void vcn_v4_0_3_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, /** * vcn_v4_0_3_enable_clock_gating - enable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst_idx: instance number + * @vinst: VCN instance * * Enable clock gating for VCN block */ -static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_idx) +static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t data; int vcn_inst; @@ -780,14 +823,16 @@ static void vcn_v4_0_3_enable_clock_gating(struct amdgpu_device *adev, int inst_ /** * vcn_v4_0_3_start_dpg_mode - VCN start with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @indirect: indirectly write sram * * Start VCN block with dpg mode */ -static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; @@ -815,7 +860,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b } /* enable clock gating */ - vcn_v4_0_3_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + vcn_v4_0_3_disable_clock_gating_dpg_mode(vinst, 0, indirect); /* enable VCPU clock */ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); @@ -865,7 +910,7 @@ static int vcn_v4_0_3_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); - vcn_v4_0_3_mc_resume_dpg_mode(adev, inst_idx, indirect); + vcn_v4_0_3_mc_resume_dpg_mode(vinst, indirect); tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; @@ -957,6 +1002,8 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) for (i = 0; i < adev->vcn.num_vcn_inst; i++) { vcn_inst = GET_INST(VCN, i); + vcn_v4_0_3_fw_shared_init(adev, vcn_inst); + memset(&header, 0, sizeof(struct mmsch_v4_0_3_init_header)); header.version = MMSCH_VERSION; header.total_size = sizeof(struct mmsch_v4_0_3_init_header) >> 2; @@ -969,7 +1016,7 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) MMSCH_V4_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, 0, regUVD_STATUS), ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY); - cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw[i]->size + 4); + cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4); if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { MMSCH_V4_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, 0, @@ -1110,189 +1157,185 @@ static int vcn_v4_0_3_start_sriov(struct amdgpu_device *adev) /** * vcn_v4_0_3_start - VCN start * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Start VCN block */ -static int vcn_v4_0_3_start(struct amdgpu_device *adev) +static int vcn_v4_0_3_start(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; volatile struct amdgpu_vcn4_fw_shared *fw_shared; struct amdgpu_ring *ring; - int i, j, k, r, vcn_inst; + int j, k, r, vcn_inst; uint32_t tmp; - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v4_0_3_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v4_0_3_start_dpg_mode(adev, i, adev->vcn.indirect_sram); - continue; - } + vcn_inst = GET_INST(VCN, i); + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | + UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp); - vcn_inst = GET_INST(VCN, i); - /* set VCN status busy */ - tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | - UVD_STATUS__UVD_BUSY; - WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp); - - /*SW clock gating */ - vcn_v4_0_3_disable_clock_gating(adev, i); - - /* enable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK, - ~UVD_VCPU_CNTL__CLK_EN_MASK); - - /* disable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); - - /* enable LMI MC and UMC channels */ - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - - tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); - tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; - tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); - - /* setup regUVD_LMI_CTRL */ - tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL); - WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, - tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__MASK_MC_URGENT_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); - - /* setup regUVD_MPC_CNTL */ - tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL); - tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; - tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; - WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp); - - /* setup UVD_MPC_SET_MUXA0 */ - WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0, - ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); - - /* setup UVD_MPC_SET_MUXB0 */ - WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0, - ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); - - /* setup UVD_MPC_SET_MUX */ - WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX, - ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | - (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); - - vcn_v4_0_3_mc_resume(adev, i); - - /* VCN global tiling registers */ - WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* unblock VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - - /* release VCPU reset to boot */ - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + /* SW clock gating */ + vcn_v4_0_3_disable_clock_gating(vinst); - for (j = 0; j < 10; ++j) { - uint32_t status; + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, + ~UVD_VCPU_CNTL__CLK_EN_MASK); - for (k = 0; k < 100; ++k) { - status = RREG32_SOC15(VCN, vcn_inst, - regUVD_STATUS); - if (status & 2) - break; - mdelay(10); - } - r = 0; - if (status & 2) - break; + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); - DRM_DEV_ERROR(adev->dev, - "VCN decode not responding, trying to reset the VCPU!!!\n"); - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, - regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - mdelay(10); - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, - regUVD_VCPU_CNTL), - 0, ~UVD_VCPU_CNTL__BLK_RST_MASK); + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, + tmp | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup regUVD_MPC_CNTL */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup UVD_MPC_SET_MUX */ + WREG32_SOC15(VCN, vcn_inst, regUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + + vcn_v4_0_3_mc_resume(vinst); + /* VCN global tiling registers */ + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX8_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, vcn_inst, + regUVD_STATUS); + if (status & 2) + break; mdelay(10); - r = -1; } + r = 0; + if (status & 2) + break; - if (r) { - DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n"); - return r; - } + DRM_DEV_ERROR(adev->dev, + "VCN decode not responding, trying to reset the VCPU!!!\n"); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, + regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, + regUVD_VCPU_CNTL), + 0, ~UVD_VCPU_CNTL__BLK_RST_MASK); - /* enable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), - UVD_MASTINT_EN__VCPU_EN_MASK, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + mdelay(10); + r = -1; + } - /* clear the busy bit of VCN_STATUS */ - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + if (r) { + DRM_DEV_ERROR(adev->dev, "VCN decode not responding, giving up!!!\n"); + return r; + } - ring = &adev->vcn.inst[i].ring_enc[0]; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); - /* program the RB_BASE for ring buffer */ - WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, - lower_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, - upper_32_bits(ring->gpu_addr)); + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, - ring->ring_size / sizeof(uint32_t)); + ring = &adev->vcn.inst[i].ring_enc[0]; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - /* resetting ring, fw should not check RB ring */ - tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); - tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK); - WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + /* program the RB_BASE for ring buffer */ + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, + upper_32_bits(ring->gpu_addr)); - /* Initialize the ring buffer's read and write pointers */ - WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); - WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, + ring->ring_size / sizeof(uint32_t)); - tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); - tmp |= VCN_RB_ENABLE__RB_EN_MASK; - WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + /* resetting ring, fw should not check RB ring */ + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); - ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); - fw_shared->sq.queue_mode &= - cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF)); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + fw_shared->sq.queue_mode &= + cpu_to_le32(~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF)); - } return 0; } /** * vcn_v4_0_3_stop_dpg_mode - VCN stop with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * * Stop VCN block with dpg mode */ -static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t tmp; int vcn_inst; @@ -1318,100 +1361,97 @@ static int vcn_v4_0_3_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) /** * vcn_v4_0_3_stop - VCN stop * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Stop VCN block */ -static int vcn_v4_0_3_stop(struct amdgpu_device *adev) +static int vcn_v4_0_3_stop(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; volatile struct amdgpu_vcn4_fw_shared *fw_shared; - int i, r = 0, vcn_inst; + int r = 0, vcn_inst; uint32_t tmp; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - vcn_inst = GET_INST(VCN, i); + vcn_inst = GET_INST(VCN, i); - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - vcn_v4_0_3_stop_dpg_mode(adev, i); - continue; - } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v4_0_3_stop_dpg_mode(vinst); + goto Done; + } - /* wait for vcn idle */ - r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, - UVD_STATUS__IDLE, 0x7); - if (r) - goto Done; - - tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | - UVD_LMI_STATUS__READ_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, - tmp); - if (r) - goto Done; - - /* stall UMC channel */ - tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2); - tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; - WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp); - tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | - UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, - tmp); - if (r) - goto Done; + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, + UVD_STATUS__IDLE, 0x7); + if (r) + goto Done; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, + tmp); + if (r) + goto Done; + + /* stall UMC channel */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, + tmp); + if (r) + goto Done; - /* Unblock VCPU Register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), - UVD_RB_ARB_CTRL__VCPU_DIS_MASK, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + /* Unblock VCPU Register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - /* release VCPU reset to boot */ - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); - /* disable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, - ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); - /* reset LMI UMC/LMI/VCPU */ - tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); - tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + /* reset LMI UMC/LMI/VCPU */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); - tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); - tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; - WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); - /* clear VCN status */ - WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); + /* clear VCN status */ + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); - /* apply HW clock gating */ - vcn_v4_0_3_enable_clock_gating(adev, i); - } -Done: - if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + /* apply HW clock gating */ + vcn_v4_0_3_enable_clock_gating(vinst); +Done: return 0; } /** * vcn_v4_0_3_pause_dpg_mode - VCN pause with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @new_state: pause state * * Pause dpg mode for VCN block */ -static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, - struct dpg_pause_state *new_state) +static int vcn_v4_0_3_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) { return 0; @@ -1455,8 +1495,8 @@ static uint64_t vcn_v4_0_3_unified_ring_get_wptr(struct amdgpu_ring *ring) regUVD_RB_WPTR); } -static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, - uint32_t val, uint32_t mask) +void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask) { /* Use normalized offsets when required */ if (vcn_v4_0_3_normalizn_reqd(ring->adev)) @@ -1468,7 +1508,8 @@ static void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t amdgpu_ring_write(ring, val); } -static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, uint32_t val) +void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val) { /* Use normalized offsets when required */ if (vcn_v4_0_3_normalizn_reqd(ring->adev)) @@ -1479,8 +1520,8 @@ static void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg amdgpu_ring_write(ring, val); } -static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, - unsigned int vmid, uint64_t pd_addr) +void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr) { struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->vm_hub]; @@ -1492,7 +1533,7 @@ static void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, lower_32_bits(pd_addr), 0xffffffff); } -static void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) +void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring) { /* VCN engine access for HDP flush doesn't work when RRMT is enabled. * This is a workaround to avoid any HDP flush through VCN ring. @@ -1575,13 +1616,13 @@ static void vcn_v4_0_3_set_unified_ring_funcs(struct amdgpu_device *adev) /** * vcn_v4_0_3_is_idle - check VCN block is idle * - * @handle: amdgpu_device pointer + * @ip_block: Pointer to the amdgpu_ip_block structure * * Check whether VCN block is idle */ -static bool vcn_v4_0_3_is_idle(void *handle) +static bool vcn_v4_0_3_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 1; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1616,64 +1657,58 @@ static int vcn_v4_0_3_wait_for_idle(struct amdgpu_ip_block *ip_block) /* vcn_v4_0_3_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v4_0_3_set_clockgating_state(void *handle, +static int vcn_v4_0_3_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = state == AMD_CG_STATE_GATE; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + if (enable) { if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE) return -EBUSY; - vcn_v4_0_3_enable_clock_gating(adev, i); + vcn_v4_0_3_enable_clock_gating(vinst); } else { - vcn_v4_0_3_disable_clock_gating(adev, i); + vcn_v4_0_3_disable_clock_gating(vinst); } } return 0; } -/** - * vcn_v4_0_3_set_powergating_state - set VCN block powergating state - * - * @handle: amdgpu_device pointer - * @state: power gating state - * - * Set VCN block powergating state - */ -static int vcn_v4_0_3_set_powergating_state(void *handle, - enum amd_powergating_state state) +static int vcn_v4_0_3_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret; + struct amdgpu_device *adev = vinst->adev; + int ret = 0; /* for SRIOV, guest should not control VCN Power-gating * MMSCH FW should control Power-gating and clock-gating * guest should avoid touching CGC and PG */ if (amdgpu_sriov_vf(adev)) { - adev->vcn.cur_state = AMD_PG_STATE_UNGATE; + vinst->cur_state = AMD_PG_STATE_UNGATE; return 0; } - if (state == adev->vcn.cur_state) + if (state == vinst->cur_state) return 0; if (state == AMD_PG_STATE_GATE) - ret = vcn_v4_0_3_stop(adev); + ret = vcn_v4_0_3_stop(vinst); else - ret = vcn_v4_0_3_start(adev); + ret = vcn_v4_0_3_start(vinst); if (!ret) - adev->vcn.cur_state = state; + vinst->cur_state = state; return ret; } @@ -1835,7 +1870,7 @@ static const struct amd_ip_funcs vcn_v4_0_3_ip_funcs = { .is_idle = vcn_v4_0_3_is_idle, .wait_for_idle = vcn_v4_0_3_wait_for_idle, .set_clockgating_state = vcn_v4_0_3_set_clockgating_state, - .set_powergating_state = vcn_v4_0_3_set_powergating_state, + .set_powergating_state = vcn_set_powergating_state, .dump_ip_state = vcn_v4_0_3_dump_ip_state, .print_ip_state = vcn_v4_0_3_print_ip_state, }; @@ -1911,9 +1946,96 @@ static const struct amdgpu_ras_block_hw_ops vcn_v4_0_3_ras_hw_ops = { .reset_ras_error_count = vcn_v4_0_3_reset_ras_error_count, }; +static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + struct aca_bank_info info; + u64 misc0; + int ret; + + ret = aca_bank_info_decode(bank, &info); + if (ret) + return ret; + + misc0 = bank->regs[ACA_REG_IDX_MISC0]; + switch (type) { + case ACA_SMU_TYPE_UE: + bank->aca_err_type = ACA_ERROR_TYPE_UE; + ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_UE, + 1ULL); + break; + case ACA_SMU_TYPE_CE: + bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank); + ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, + ACA_REG__MISC0__ERRCNT(misc0)); + break; + default: + return -EINVAL; + } + + return ret; +} + +/* reference to smu driver if header file */ +static int vcn_v4_0_3_err_codes[] = { + 14, 15, /* VCN */ +}; + +static bool vcn_v4_0_3_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank, + enum aca_smu_type type, void *data) +{ + u32 instlo; + + instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]); + instlo &= GENMASK(31, 1); + + if (instlo != mmSMNAID_AID0_MCA_SMU) + return false; + + if (aca_bank_check_error_codes(handle->adev, bank, + vcn_v4_0_3_err_codes, + ARRAY_SIZE(vcn_v4_0_3_err_codes))) + return false; + + return true; +} + +static const struct aca_bank_ops vcn_v4_0_3_aca_bank_ops = { + .aca_bank_parser = vcn_v4_0_3_aca_bank_parser, + .aca_bank_is_valid = vcn_v4_0_3_aca_bank_is_valid, +}; + +static const struct aca_info vcn_v4_0_3_aca_info = { + .hwip = ACA_HWIP_TYPE_SMU, + .mask = ACA_ERROR_UE_MASK, + .bank_ops = &vcn_v4_0_3_aca_bank_ops, +}; + +static int vcn_v4_0_3_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block) +{ + int r; + + r = amdgpu_ras_block_late_init(adev, ras_block); + if (r) + return r; + + r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__VCN, + &vcn_v4_0_3_aca_info, NULL); + if (r) + goto late_fini; + + return 0; + +late_fini: + amdgpu_ras_block_late_fini(adev, ras_block); + + return r; +} + static struct amdgpu_vcn_ras vcn_v4_0_3_ras = { .ras_block = { .hw_ops = &vcn_v4_0_3_ras_hw_ops, + .ras_late_init = vcn_v4_0_3_ras_late_init, }, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h index 0b046114373a..03572a1d0c9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.h @@ -26,4 +26,13 @@ extern const struct amdgpu_ip_block_version vcn_v4_0_3_ip_block; +void vcn_v4_0_3_enc_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val, uint32_t mask); + +void vcn_v4_0_3_enc_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, + uint32_t val); +void vcn_v4_0_3_enc_ring_emit_vm_flush(struct amdgpu_ring *ring, + unsigned int vmid, uint64_t pd_addr); +void vcn_v4_0_3_ring_emit_hdp_flush(struct amdgpu_ring *ring); + #endif /* __VCN_V4_0_3_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c index 71961fb3f7ff..ba603b2246e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c @@ -95,10 +95,10 @@ static int amdgpu_ih_clientid_vcns[] = { static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v4_0_5_set_powergating_state(void *handle, - enum amd_powergating_state state); -static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state); +static int vcn_v4_0_5_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); +static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state); static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring); /** @@ -112,13 +112,26 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring); static int vcn_v4_0_5_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int i, r; + + if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(4, 0, 6)) + adev->vcn.per_inst_fw = true; - /* re-use enc ring as unified ring */ - adev->vcn.num_enc_rings = 1; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + /* re-use enc ring as unified ring */ + adev->vcn.inst[i].num_enc_rings = 1; vcn_v4_0_5_set_unified_ring_funcs(adev); vcn_v4_0_5_set_irq_funcs(adev); - return amdgpu_vcn_early_init(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].set_pg_state = vcn_v4_0_5_set_pg_state; + + r = amdgpu_vcn_early_init(adev, i); + if (r) + return r; + } + + return 0; } /** @@ -136,15 +149,6 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_4_0_5); uint32_t *ptr; - r = amdgpu_vcn_sw_init(adev); - if (r) - return r; - - amdgpu_vcn_setup_ucode(adev); - - r = amdgpu_vcn_resume(adev); - if (r) - return r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn4_fw_shared *fw_shared; @@ -152,6 +156,16 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) if (adev->vcn.harvest_config & (1 << i)) continue; + r = amdgpu_vcn_sw_init(adev, i); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev, i); + + r = amdgpu_vcn_resume(adev, i); + if (r) + return r; + atomic_set(&adev->vcn.inst[i].sched_score, 0); /* VCN UNIFIED TRAP */ @@ -170,7 +184,7 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) ring->use_doorbell = true; if (amdgpu_sriov_vf(adev)) ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + - i * (adev->vcn.num_enc_rings + 1) + 1; + i * (adev->vcn.inst[i].num_enc_rings + 1) + 1; else ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i; @@ -195,6 +209,9 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.inst[i].pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode; } if (amdgpu_sriov_vf(adev)) { @@ -203,9 +220,6 @@ static int vcn_v4_0_5_sw_init(struct amdgpu_ip_block *ip_block) return r; } - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - adev->vcn.pause_dpg_mode = vcn_v4_0_5_pause_dpg_mode; - /* Allocate memory for VCN IP Dump buffer */ ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); if (!ptr) { @@ -247,15 +261,19 @@ static int vcn_v4_0_5_sw_fini(struct amdgpu_ip_block *ip_block) if (amdgpu_sriov_vf(adev)) amdgpu_virt_free_mm_table(adev); - r = amdgpu_vcn_suspend(adev); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(adev, i); + if (r) + return r; - r = amdgpu_vcn_sw_fini(adev); + r = amdgpu_vcn_sw_fini(adev, i); + if (r) + return r; + } kfree(adev->vcn.ip_dump); - return r; + return 0; } /** @@ -300,16 +318,19 @@ static int vcn_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int i; - cancel_delayed_work_sync(&adev->vcn.idle_work); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + if (adev->vcn.harvest_config & (1 << i)) continue; + + cancel_delayed_work_sync(&vinst->idle_work); + if (!amdgpu_sriov_vf(adev)) { if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, i, regUVD_STATUS))) { - vcn_v4_0_5_set_powergating_state(adev, AMD_PG_STATE_GATE); + (vinst->cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, regUVD_STATUS))) { + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); } } } @@ -326,13 +347,18 @@ static int vcn_v4_0_5_hw_fini(struct amdgpu_ip_block *ip_block) */ static int vcn_v4_0_5_suspend(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; r = vcn_v4_0_5_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(ip_block->adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(ip_block->adev, i); + if (r) + return r; + } return r; } @@ -346,11 +372,14 @@ static int vcn_v4_0_5_suspend(struct amdgpu_ip_block *ip_block) */ static int vcn_v4_0_5_resume(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; - r = amdgpu_vcn_resume(ip_block->adev); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_resume(ip_block->adev, i); + if (r) + return r; + } r = vcn_v4_0_5_hw_init(ip_block); @@ -360,17 +389,18 @@ static int vcn_v4_0_5_resume(struct amdgpu_ip_block *ip_block) /** * vcn_v4_0_5_mc_resume - memory controller programming * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Let the VCN memory controller know it's offsets */ -static void vcn_v4_0_5_mc_resume(struct amdgpu_device *adev, int inst) +static void vcn_v4_0_5_mc_resume(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -420,18 +450,20 @@ static void vcn_v4_0_5_mc_resume(struct amdgpu_device *adev, int inst) /** * vcn_v4_0_5_mc_resume_dpg_mode - memory controller programming for dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @indirect: indirectly write sram * * Let the VCN memory controller know it's offsets with dpg mode */ -static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -534,13 +566,14 @@ static void vcn_v4_0_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i /** * vcn_v4_0_5_disable_static_power_gating - disable VCN static power gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Disable static power gating for VCN block */ -static void vcn_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev, int inst) +static void vcn_v4_0_5_disable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data = 0; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { @@ -593,13 +626,14 @@ static void vcn_v4_0_5_disable_static_power_gating(struct amdgpu_device *adev, i /** * vcn_v4_0_5_enable_static_power_gating - enable VCN static power gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Enable static power gating for VCN block */ -static void vcn_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev, int inst) +static void vcn_v4_0_5_enable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { @@ -635,13 +669,14 @@ static void vcn_v4_0_5_enable_static_power_gating(struct amdgpu_device *adev, in /** * vcn_v4_0_5_disable_clock_gating - disable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Disable clock gating for VCN block */ -static void vcn_v4_0_5_disable_clock_gating(struct amdgpu_device *adev, int inst) +static void vcn_v4_0_5_disable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data; if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) @@ -746,16 +781,18 @@ static void vcn_v4_0_5_disable_clock_gating(struct amdgpu_device *adev, int inst /** * vcn_v4_0_5_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * @sram_sel: sram select - * @inst_idx: instance number index * @indirect: indirectly write sram * * Disable clock gating for VCN block with dpg mode */ -static void vcn_v4_0_5_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel, - int inst_idx, uint8_t indirect) +static void vcn_v4_0_5_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst, + uint8_t sram_sel, + uint8_t indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t reg_data = 0; if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) @@ -803,13 +840,14 @@ static void vcn_v4_0_5_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, /** * vcn_v4_0_5_enable_clock_gating - enable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Enable clock gating for VCN block */ -static void vcn_v4_0_5_enable_clock_gating(struct amdgpu_device *adev, int inst) +static void vcn_v4_0_5_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data; if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) @@ -862,14 +900,16 @@ static void vcn_v4_0_5_enable_clock_gating(struct amdgpu_device *adev, int inst) /** * vcn_v4_0_5_start_dpg_mode - VCN start with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @indirect: indirectly write sram * * Start VCN block with dpg mode */ -static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; volatile struct amdgpu_vcn4_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; @@ -888,7 +928,7 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; /* enable clock gating */ - vcn_v4_0_5_disable_clock_gating_dpg_mode(adev, 0, inst_idx, indirect); + vcn_v4_0_5_disable_clock_gating_dpg_mode(vinst, 0, indirect); /* enable VCPU clock */ tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); @@ -936,7 +976,7 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect); - vcn_v4_0_5_mc_resume_dpg_mode(adev, inst_idx, indirect); + vcn_v4_0_5_mc_resume_dpg_mode(vinst, indirect); tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; @@ -989,182 +1029,180 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b /** * vcn_v4_0_5_start - VCN start * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Start VCN block */ -static int vcn_v4_0_5_start(struct amdgpu_device *adev) +static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; volatile struct amdgpu_vcn4_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; - int i, j, k, r; + int j, k, r; + + if (adev->vcn.harvest_config & (1 << i)) + return 0; if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + amdgpu_dpm_enable_vcn(adev, true, i); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v4_0_5_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v4_0_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram); - continue; - } + /* disable VCN power gating */ + vcn_v4_0_5_disable_static_power_gating(vinst); - /* disable VCN power gating */ - vcn_v4_0_5_disable_static_power_gating(adev, i); - - /* set VCN status busy */ - tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; - WREG32_SOC15(VCN, i, regUVD_STATUS, tmp); - - /*SW clock gating */ - vcn_v4_0_5_disable_clock_gating(adev, i); - - /* enable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); - - /* disable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); - - /* enable LMI MC and UMC channels */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); - tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; - tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); - - /* setup regUVD_LMI_CTRL */ - tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); - WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__MASK_MC_URGENT_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); - - /* setup regUVD_MPC_CNTL */ - tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL); - tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; - tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; - WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp); - - /* setup UVD_MPC_SET_MUXA0 */ - WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0, - ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); - - /* setup UVD_MPC_SET_MUXB0 */ - WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0, - ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | - (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | - (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); - - /* setup UVD_MPC_SET_MUX */ - WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX, - ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | - (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | - (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); - - vcn_v4_0_5_mc_resume(adev, i); - - /* VCN global tiling registers */ - WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* unblock VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - - /* release VCPU reset to boot */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - - for (j = 0; j < 10; ++j) { - uint32_t status; - - for (k = 0; k < 100; ++k) { - status = RREG32_SOC15(VCN, i, regUVD_STATUS); - if (status & 2) - break; - mdelay(10); - if (amdgpu_emu_mode == 1) - msleep(1); - } + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, i, regUVD_STATUS, tmp); + + /* SW clock gating */ + vcn_v4_0_5_disable_clock_gating(vinst); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + /* setup regUVD_MPC_CNTL */ + tmp = RREG32_SOC15(VCN, i, regUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(VCN, i, regUVD_MPC_CNTL, tmp); + + /* setup UVD_MPC_SET_MUXA0 */ + WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + /* setup UVD_MPC_SET_MUXB0 */ + WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + /* setup UVD_MPC_SET_MUX */ + WREG32_SOC15(VCN, i, regUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + + vcn_v4_0_5_mc_resume(vinst); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, i, regUVD_STATUS); + if (status & 2) + break; + mdelay(10); + if (amdgpu_emu_mode == 1) + msleep(1); + } - if (amdgpu_emu_mode == 1) { - r = -1; - if (status & 2) { - r = 0; - break; - } - } else { + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { r = 0; - if (status & 2) - break; - - dev_err(adev->dev, - "VCN[%d] is not responding, trying to reset VCPU!!!\n", i); - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - mdelay(10); - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - - mdelay(10); - r = -1; + break; } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, + "VCN[%d] is not responding, trying to reset VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; } + } - if (r) { - dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); - return r; - } + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } - /* enable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), - UVD_MASTINT_EN__VCPU_EN_MASK, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); - /* clear the busy bit of VCN_STATUS */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - ring = &adev->vcn.inst[i].ring_enc[0]; - WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, - ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); - - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; - WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); - - tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); - - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp |= VCN_RB_ENABLE__RB1_EN_MASK; - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); - fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); - } + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); return 0; } @@ -1172,13 +1210,14 @@ static int vcn_v4_0_5_start(struct amdgpu_device *adev) /** * vcn_v4_0_5_stop_dpg_mode - VCN stop with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * * Stop VCN block with dpg mode */ -static void vcn_v4_0_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +static void vcn_v4_0_5_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t tmp; /* Wait for power status to be 1 */ @@ -1200,101 +1239,104 @@ static void vcn_v4_0_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) /** * vcn_v4_0_5_stop - VCN stop * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Stop VCN block */ -static int vcn_v4_0_5_stop(struct amdgpu_device *adev) +static int vcn_v4_0_5_stop(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; volatile struct amdgpu_vcn4_fw_shared *fw_shared; uint32_t tmp; - int i, r = 0; + int r = 0; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (adev->vcn.harvest_config & (1 << i)) + return 0; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - vcn_v4_0_5_stop_dpg_mode(adev, i); - continue; - } - - /* wait for vcn idle */ - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); - if (r) - return r; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v4_0_5_stop_dpg_mode(vinst); + r = 0; + goto done; + } - tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | - UVD_LMI_STATUS__READ_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); - if (r) - return r; + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + goto done; - /* disable LMI UMC channel */ - tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2); - tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; - WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp); - tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | - UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); - if (r) - return r; + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + goto done; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + goto done; - /* block VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), - UVD_RB_ARB_CTRL__VCPU_DIS_MASK, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - /* reset VCPU */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); - /* disable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); - /* apply soft reset */ - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); - tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); - tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); - /* clear status */ - WREG32_SOC15(VCN, i, regUVD_STATUS, 0); + /* clear status */ + WREG32_SOC15(VCN, i, regUVD_STATUS, 0); - /* apply HW clock gating */ - vcn_v4_0_5_enable_clock_gating(adev, i); + /* apply HW clock gating */ + vcn_v4_0_5_enable_clock_gating(vinst); - /* enable VCN power gating */ - vcn_v4_0_5_enable_static_power_gating(adev, i); - } + /* enable VCN power gating */ + vcn_v4_0_5_enable_static_power_gating(vinst); +done: if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, i); - return 0; + return r; } /** * vcn_v4_0_5_pause_dpg_mode - VCN pause with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @new_state: pause state * * Pause dpg mode for VCN block */ -static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, - struct dpg_pause_state *new_state) +static int vcn_v4_0_5_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t reg_data = 0; int ret_code; @@ -1393,7 +1435,7 @@ static void vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring) } } -static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { +static struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs = { .type = AMDGPU_RING_TYPE_VCN_ENC, .align_mask = 0x3f, .nop = VCN_ENC_CMD_NO_OP, @@ -1437,6 +1479,9 @@ static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; + if (amdgpu_ip_version(adev, VCN_HWIP, 0) == IP_VERSION(4, 0, 5)) + vcn_v4_0_5_unified_ring_vm_funcs.secure_submission_supported = true; + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v4_0_5_unified_ring_vm_funcs; adev->vcn.inst[i].ring_enc[0].me = i; } @@ -1445,13 +1490,13 @@ static void vcn_v4_0_5_set_unified_ring_funcs(struct amdgpu_device *adev) /** * vcn_v4_0_5_is_idle - check VCN block is idle * - * @handle: amdgpu_device pointer + * @ip_block: Pointer to the amdgpu_ip_block structure * * Check whether VCN block is idle */ -static bool vcn_v4_0_5_is_idle(void *handle) +static bool vcn_v4_0_5_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 1; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1492,56 +1537,51 @@ static int vcn_v4_0_5_wait_for_idle(struct amdgpu_ip_block *ip_block) /** * vcn_v4_0_5_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v4_0_5_set_clockgating_state(void *handle, enum amd_clockgating_state state) +static int vcn_v4_0_5_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + if (adev->vcn.harvest_config & (1 << i)) continue; if (enable) { if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE) return -EBUSY; - vcn_v4_0_5_enable_clock_gating(adev, i); + vcn_v4_0_5_enable_clock_gating(vinst); } else { - vcn_v4_0_5_disable_clock_gating(adev, i); + vcn_v4_0_5_disable_clock_gating(vinst); } } return 0; } -/** - * vcn_v4_0_5_set_powergating_state - set VCN block powergating state - * - * @handle: amdgpu_device pointer - * @state: power gating state - * - * Set VCN block powergating state - */ -static int vcn_v4_0_5_set_powergating_state(void *handle, enum amd_powergating_state state) +static int vcn_v4_0_5_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret; + int ret = 0; - if (state == adev->vcn.cur_state) + if (state == vinst->cur_state) return 0; if (state == AMD_PG_STATE_GATE) - ret = vcn_v4_0_5_stop(adev); + ret = vcn_v4_0_5_stop(vinst); else - ret = vcn_v4_0_5_start(adev); + ret = vcn_v4_0_5_start(vinst); if (!ret) - adev->vcn.cur_state = state; + vinst->cur_state = state; return ret; } @@ -1609,7 +1649,7 @@ static void vcn_v4_0_5_set_irq_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1; adev->vcn.inst[i].irq.funcs = &vcn_v4_0_5_irq_funcs; } } @@ -1687,7 +1727,7 @@ static const struct amd_ip_funcs vcn_v4_0_5_ip_funcs = { .is_idle = vcn_v4_0_5_is_idle, .wait_for_idle = vcn_v4_0_5_wait_for_idle, .set_clockgating_state = vcn_v4_0_5_set_clockgating_state, - .set_powergating_state = vcn_v4_0_5_set_powergating_state, + .set_powergating_state = vcn_set_powergating_state, .dump_ip_state = vcn_v4_0_5_dump_ip_state, .print_ip_state = vcn_v4_0_5_print_ip_state, }; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c index bd3d2bbdc16b..d99d05f42f1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.c @@ -32,7 +32,7 @@ #include "vcn/vcn_5_0_0_offset.h" #include "vcn/vcn_5_0_0_sh_mask.h" -#include "ivsrcid/vcn/irqsrcs_vcn_4_0.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" #include "vcn_v5_0_0.h" #include <drm/drm_drv.h> @@ -78,10 +78,10 @@ static int amdgpu_ih_clientid_vcns[] = { static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev); static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev); -static int vcn_v5_0_0_set_powergating_state(void *handle, - enum amd_powergating_state state); -static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev, - int inst_idx, struct dpg_pause_state *new_state); +static int vcn_v5_0_0_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); +static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state); static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring); /** @@ -95,14 +95,39 @@ static void vcn_v5_0_0_unified_ring_set_wptr(struct amdgpu_ring *ring); static int vcn_v5_0_0_early_init(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; + int i, r; - /* re-use enc ring as unified ring */ - adev->vcn.num_enc_rings = 1; + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + /* re-use enc ring as unified ring */ + adev->vcn.inst[i].num_enc_rings = 1; vcn_v5_0_0_set_unified_ring_funcs(adev); vcn_v5_0_0_set_irq_funcs(adev); - return amdgpu_vcn_early_init(adev); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].set_pg_state = vcn_v5_0_0_set_pg_state; + + r = amdgpu_vcn_early_init(adev, i); + if (r) + return r; + } + + return 0; +} + +void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); + uint32_t *ptr; + + /* Allocate memory for VCN IP Dump buffer */ + ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); + if (!ptr) { + DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); + adev->vcn.ip_dump = NULL; + } else { + adev->vcn.ip_dump = ptr; + } } /** @@ -117,18 +142,6 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) struct amdgpu_ring *ring; struct amdgpu_device *adev = ip_block->adev; int i, r; - uint32_t reg_count = ARRAY_SIZE(vcn_reg_list_5_0); - uint32_t *ptr; - - r = amdgpu_vcn_sw_init(adev); - if (r) - return r; - - amdgpu_vcn_setup_ucode(adev); - - r = amdgpu_vcn_resume(adev); - if (r) - return r; for (i = 0; i < adev->vcn.num_vcn_inst; i++) { volatile struct amdgpu_vcn5_fw_shared *fw_shared; @@ -136,17 +149,27 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) if (adev->vcn.harvest_config & (1 << i)) continue; + r = amdgpu_vcn_sw_init(adev, i); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev, i); + + r = amdgpu_vcn_resume(adev, i); + if (r) + return r; + atomic_set(&adev->vcn.inst[i].sched_score, 0); /* VCN UNIFIED TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); + VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq); if (r) return r; /* VCN POISON TRAP */ r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], - VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); + VCN_5_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); if (r) return r; @@ -168,23 +191,16 @@ static int vcn_v5_0_0_sw_init(struct amdgpu_ip_block *ip_block) if (amdgpu_vcnfw_log) amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]); + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + adev->vcn.inst[i].pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; } /* TODO: Add queue reset mask when FW fully supports it */ adev->vcn.supported_reset = amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) - adev->vcn.pause_dpg_mode = vcn_v5_0_0_pause_dpg_mode; - - /* Allocate memory for VCN IP Dump buffer */ - ptr = kcalloc(adev->vcn.num_vcn_inst * reg_count, sizeof(uint32_t), GFP_KERNEL); - if (!ptr) { - DRM_ERROR("Failed to allocate memory for VCN IP Dump\n"); - adev->vcn.ip_dump = NULL; - } else { - adev->vcn.ip_dump = ptr; - } + vcn_v5_0_0_alloc_ip_dump(adev); r = amdgpu_vcn_sysfs_reset_mask_init(adev); if (r) @@ -220,16 +236,23 @@ static int vcn_v5_0_0_sw_fini(struct amdgpu_ip_block *ip_block) drm_dev_exit(idx); } - r = amdgpu_vcn_suspend(adev); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(adev, i); + if (r) + return r; + } amdgpu_vcn_sysfs_reset_mask_fini(adev); - r = amdgpu_vcn_sw_fini(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_sw_fini(adev, i); + if (r) + return r; + } kfree(adev->vcn.ip_dump); - return r; + return 0; } /** @@ -274,16 +297,19 @@ static int vcn_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block) struct amdgpu_device *adev = ip_block->adev; int i; - cancel_delayed_work_sync(&adev->vcn.idle_work); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + if (adev->vcn.harvest_config & (1 << i)) continue; + + cancel_delayed_work_sync(&vinst->idle_work); + if (!amdgpu_sriov_vf(adev)) { if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || - (adev->vcn.cur_state != AMD_PG_STATE_GATE && - RREG32_SOC15(VCN, i, regUVD_STATUS))) { - vcn_v5_0_0_set_powergating_state(adev, AMD_PG_STATE_GATE); + (vinst->cur_state != AMD_PG_STATE_GATE && + RREG32_SOC15(VCN, i, regUVD_STATUS))) { + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); } } } @@ -300,13 +326,18 @@ static int vcn_v5_0_0_hw_fini(struct amdgpu_ip_block *ip_block) */ static int vcn_v5_0_0_suspend(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; r = vcn_v5_0_0_hw_fini(ip_block); if (r) return r; - r = amdgpu_vcn_suspend(ip_block->adev); + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(ip_block->adev, i); + if (r) + return r; + } return r; } @@ -320,11 +351,14 @@ static int vcn_v5_0_0_suspend(struct amdgpu_ip_block *ip_block) */ static int vcn_v5_0_0_resume(struct amdgpu_ip_block *ip_block) { - int r; + struct amdgpu_device *adev = ip_block->adev; + int r, i; - r = amdgpu_vcn_resume(ip_block->adev); - if (r) - return r; + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_resume(ip_block->adev, i); + if (r) + return r; + } r = vcn_v5_0_0_hw_init(ip_block); @@ -334,17 +368,18 @@ static int vcn_v5_0_0_resume(struct amdgpu_ip_block *ip_block) /** * vcn_v5_0_0_mc_resume - memory controller programming * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Let the VCN memory controller know it's offsets */ -static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst) +static void vcn_v5_0_0_mc_resume(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -394,18 +429,20 @@ static void vcn_v5_0_0_mc_resume(struct amdgpu_device *adev, int inst) /** * vcn_v5_0_0_mc_resume_dpg_mode - memory controller programming for dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @indirect: indirectly write sram * * Let the VCN memory controller know it's offsets with dpg mode */ -static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t offset, size; const struct common_firmware_header *hdr; - hdr = (const struct common_firmware_header *)adev->vcn.fw[inst_idx]->data; + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); /* cache window 0: fw */ @@ -504,13 +541,14 @@ static void vcn_v5_0_0_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_i /** * vcn_v5_0_0_disable_static_power_gating - disable VCN static power gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Disable static power gating for VCN block */ -static void vcn_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev, int inst) +static void vcn_v5_0_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data = 0; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { @@ -571,13 +609,14 @@ static void vcn_v5_0_0_disable_static_power_gating(struct amdgpu_device *adev, i /** * vcn_v5_0_0_enable_static_power_gating - enable VCN static power gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Enable static power gating for VCN block */ -static void vcn_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev, int inst) +static void vcn_v5_0_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; uint32_t data; if (adev->pg_flags & AMD_PG_SUPPORT_VCN) { @@ -617,12 +656,11 @@ static void vcn_v5_0_0_enable_static_power_gating(struct amdgpu_device *adev, in /** * vcn_v5_0_0_disable_clock_gating - disable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Disable clock gating for VCN block */ -static void vcn_v5_0_0_disable_clock_gating(struct amdgpu_device *adev, int inst) +static void vcn_v5_0_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst) { return; } @@ -631,15 +669,15 @@ static void vcn_v5_0_0_disable_clock_gating(struct amdgpu_device *adev, int inst /** * vcn_v5_0_0_disable_clock_gating_dpg_mode - disable VCN clock gating dpg mode * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * @sram_sel: sram select - * @inst_idx: instance number index * @indirect: indirectly write sram * * Disable clock gating for VCN block with dpg mode */ -static void vcn_v5_0_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel, - int inst_idx, uint8_t indirect) +static void vcn_v5_0_0_disable_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst, + uint8_t sram_sel, + uint8_t indirect) { return; } @@ -648,12 +686,11 @@ static void vcn_v5_0_0_disable_clock_gating_dpg_mode(struct amdgpu_device *adev, /** * vcn_v5_0_0_enable_clock_gating - enable VCN clock gating * - * @adev: amdgpu_device pointer - * @inst: instance number + * @vinst: VCN instance * * Enable clock gating for VCN block */ -static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_device *adev, int inst) +static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst) { return; } @@ -661,14 +698,16 @@ static void vcn_v5_0_0_enable_clock_gating(struct amdgpu_device *adev, int inst) /** * vcn_v5_0_0_start_dpg_mode - VCN start with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @indirect: indirectly write sram * * Start VCN block with dpg mode */ -static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect) +static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; volatile struct amdgpu_vcn5_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; struct amdgpu_ring *ring; uint32_t tmp; @@ -708,7 +747,7 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( VCN, inst_idx, regUVD_LMI_CTRL), tmp, 0, indirect); - vcn_v5_0_0_mc_resume_dpg_mode(adev, inst_idx, indirect); + vcn_v5_0_0_mc_resume_dpg_mode(vinst, indirect); tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; @@ -760,153 +799,151 @@ static int vcn_v5_0_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, b /** * vcn_v5_0_0_start - VCN start * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Start VCN block */ -static int vcn_v5_0_0_start(struct amdgpu_device *adev) +static int vcn_v5_0_0_start(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; volatile struct amdgpu_vcn5_fw_shared *fw_shared; struct amdgpu_ring *ring; uint32_t tmp; - int i, j, k, r; + int j, k, r; + + if (adev->vcn.harvest_config & (1 << i)) + return 0; if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, true); + amdgpu_dpm_enable_vcn(adev, true, i); - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v5_0_0_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - r = vcn_v5_0_0_start_dpg_mode(adev, i, adev->vcn.indirect_sram); - continue; - } + /* disable VCN power gating */ + vcn_v5_0_0_disable_static_power_gating(vinst); - /* disable VCN power gating */ - vcn_v5_0_0_disable_static_power_gating(adev, i); - - /* set VCN status busy */ - tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; - WREG32_SOC15(VCN, i, regUVD_STATUS, tmp); - - /* enable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); - - /* disable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, - ~UVD_MASTINT_EN__VCPU_EN_MASK); - - /* enable LMI MC and UMC channels */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); - tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; - tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); - - /* setup regUVD_LMI_CTRL */ - tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); - WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__MASK_MC_URGENT_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); - - vcn_v5_0_0_mc_resume(adev, i); - - /* VCN global tiling registers */ - WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, - adev->gfx.config.gb_addr_config); - - /* unblock VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - - /* release VCPU reset to boot */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - - for (j = 0; j < 10; ++j) { - uint32_t status; - - for (k = 0; k < 100; ++k) { - status = RREG32_SOC15(VCN, i, regUVD_STATUS); - if (status & 2) - break; - mdelay(10); - if (amdgpu_emu_mode == 1) - msleep(1); - } + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, i, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, i, regUVD_STATUS, tmp); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); - if (amdgpu_emu_mode == 1) { - r = -1; - if (status & 2) { - r = 0; - break; - } - } else { + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + vcn_v5_0_0_mc_resume(vinst); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, i, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, i, regUVD_STATUS); + if (status & 2) + break; + mdelay(10); + if (amdgpu_emu_mode == 1) + msleep(1); + } + + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { r = 0; - if (status & 2) - break; - - dev_err(adev->dev, - "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - mdelay(10); - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - - mdelay(10); - r = -1; + break; } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, + "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; } + } - if (r) { - dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); - return r; - } + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } - /* enable master interrupt */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), - UVD_MASTINT_EN__VCPU_EN_MASK, - ~UVD_MASTINT_EN__VCPU_EN_MASK); + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); - /* clear the busy bit of VCN_STATUS */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); - ring = &adev->vcn.inst[i].ring_enc[0]; - WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, - ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | - VCN_RB1_DB_CTRL__EN_MASK); - - WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); - WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); - WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); - - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); - fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; - WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); - - tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); - WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); - ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); - - tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); - tmp |= VCN_RB_ENABLE__RB1_EN_MASK; - WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); - fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); - } + ring = &adev->vcn.inst[i].ring_enc[0]; + WREG32_SOC15(VCN, i, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + WREG32_SOC15(VCN, i, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, i, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, i, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, i, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, i, regUVD_RB_RPTR); + WREG32_SOC15(VCN, i, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, i, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, i, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); return 0; } @@ -914,17 +951,18 @@ static int vcn_v5_0_0_start(struct amdgpu_device *adev) /** * vcn_v5_0_0_stop_dpg_mode - VCN stop with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * * Stop VCN block with dpg mode */ -static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) +static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE}; uint32_t tmp; - vcn_v5_0_0_pause_dpg_mode(adev, inst_idx, &state); + vcn_v5_0_0_pause_dpg_mode(vinst, &state); /* Wait for power status to be 1 */ SOC15_WAIT_ON_RREG(VCN, inst_idx, regUVD_POWER_STATUS, 1, @@ -944,98 +982,101 @@ static void vcn_v5_0_0_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx) /** * vcn_v5_0_0_stop - VCN stop * - * @adev: amdgpu_device pointer + * @vinst: VCN instance * * Stop VCN block */ -static int vcn_v5_0_0_stop(struct amdgpu_device *adev) +static int vcn_v5_0_0_stop(struct amdgpu_vcn_inst *vinst) { + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; volatile struct amdgpu_vcn5_fw_shared *fw_shared; uint32_t tmp; - int i, r = 0; + int r = 0; - for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { - if (adev->vcn.harvest_config & (1 << i)) - continue; + if (adev->vcn.harvest_config & (1 << i)) + return 0; - fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; - fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { - vcn_v5_0_0_stop_dpg_mode(adev, i); - continue; - } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v5_0_0_stop_dpg_mode(vinst); + r = 0; + goto done; + } - /* wait for vcn idle */ - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); - if (r) - return r; + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + goto done; - tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | - UVD_LMI_STATUS__READ_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_MASK | - UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); - if (r) - return r; + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + goto done; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); + if (r) + goto done; - /* disable LMI UMC channel */ - tmp = RREG32_SOC15(VCN, i, regUVD_LMI_CTRL2); - tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; - WREG32_SOC15(VCN, i, regUVD_LMI_CTRL2, tmp); - tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | - UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; - r = SOC15_WAIT_ON_RREG(VCN, i, regUVD_LMI_STATUS, tmp, tmp); - if (r) - return r; + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - /* block VCPU register access */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_RB_ARB_CTRL), - UVD_RB_ARB_CTRL__VCPU_DIS_MASK, - ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); - - /* reset VCPU */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), - UVD_VCPU_CNTL__BLK_RST_MASK, - ~UVD_VCPU_CNTL__BLK_RST_MASK); - - /* disable VCPU clock */ - WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, - ~(UVD_VCPU_CNTL__CLK_EN_MASK)); - - /* apply soft reset */ - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); - tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); - tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); - tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; - WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); - - /* clear status */ - WREG32_SOC15(VCN, i, regUVD_STATUS, 0); - - /* enable VCN power gating */ - vcn_v5_0_0_enable_static_power_gating(adev, i); - } + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, i, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, i, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, i, regUVD_SOFT_RESET, tmp); + + /* clear status */ + WREG32_SOC15(VCN, i, regUVD_STATUS, 0); + + /* enable VCN power gating */ + vcn_v5_0_0_enable_static_power_gating(vinst); +done: if (adev->pm.dpm_enabled) - amdgpu_dpm_enable_uvd(adev, false); + amdgpu_dpm_enable_vcn(adev, false, i); - return 0; + return r; } /** * vcn_v5_0_0_pause_dpg_mode - VCN pause with dpg mode * - * @adev: amdgpu_device pointer - * @inst_idx: instance number index + * @vinst: VCN instance * @new_state: pause state * * Pause dpg mode for VCN block */ -static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_device *adev, int inst_idx, - struct dpg_pause_state *new_state) +static int vcn_v5_0_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst, + struct dpg_pause_state *new_state) { + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; uint32_t reg_data = 0; int ret_code; @@ -1182,13 +1223,13 @@ static void vcn_v5_0_0_set_unified_ring_funcs(struct amdgpu_device *adev) /** * vcn_v5_0_0_is_idle - check VCN block is idle * - * @handle: amdgpu_device pointer + * @ip_block: Pointer to the amdgpu_ip_block structure * * Check whether VCN block is idle */ -static bool vcn_v5_0_0_is_idle(void *handle) +static bool vcn_v5_0_0_is_idle(struct amdgpu_ip_block *ip_block) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int i, ret = 1; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { @@ -1229,56 +1270,51 @@ static int vcn_v5_0_0_wait_for_idle(struct amdgpu_ip_block *ip_block) /** * vcn_v5_0_0_set_clockgating_state - set VCN block clockgating state * - * @handle: amdgpu_device pointer + * @ip_block: amdgpu_ip_block pointer * @state: clock gating state * * Set VCN block clockgating state */ -static int vcn_v5_0_0_set_clockgating_state(void *handle, enum amd_clockgating_state state) +static int vcn_v5_0_0_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; bool enable = (state == AMD_CG_STATE_GATE) ? true : false; int i; for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + if (adev->vcn.harvest_config & (1 << i)) continue; if (enable) { if (RREG32_SOC15(VCN, i, regUVD_STATUS) != UVD_STATUS__IDLE) return -EBUSY; - vcn_v5_0_0_enable_clock_gating(adev, i); + vcn_v5_0_0_enable_clock_gating(vinst); } else { - vcn_v5_0_0_disable_clock_gating(adev, i); + vcn_v5_0_0_disable_clock_gating(vinst); } } return 0; } -/** - * vcn_v5_0_0_set_powergating_state - set VCN block powergating state - * - * @handle: amdgpu_device pointer - * @state: power gating state - * - * Set VCN block powergating state - */ -static int vcn_v5_0_0_set_powergating_state(void *handle, enum amd_powergating_state state) +static int vcn_v5_0_0_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int ret; + int ret = 0; - if (state == adev->vcn.cur_state) + if (state == vinst->cur_state) return 0; if (state == AMD_PG_STATE_GATE) - ret = vcn_v5_0_0_stop(adev); + ret = vcn_v5_0_0_stop(vinst); else - ret = vcn_v5_0_0_start(adev); + ret = vcn_v5_0_0_start(vinst); if (!ret) - adev->vcn.cur_state = state; + vinst->cur_state = state; return ret; } @@ -1312,10 +1348,10 @@ static int vcn_v5_0_0_process_interrupt(struct amdgpu_device *adev, struct amdgp DRM_DEBUG("IH: VCN TRAP\n"); switch (entry->src_id) { - case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE: amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]); break; - case VCN_4_0__SRCID_UVD_POISON: + case VCN_5_0__SRCID_UVD_POISON: amdgpu_vcn_process_poison_irq(adev, source, entry); break; default: @@ -1346,12 +1382,13 @@ static void vcn_v5_0_0_set_irq_funcs(struct amdgpu_device *adev) if (adev->vcn.harvest_config & (1 << i)) continue; - adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1; adev->vcn.inst[i].irq.funcs = &vcn_v5_0_0_irq_funcs; } } -static void vcn_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm_printer *p) +void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block, + struct drm_printer *p) { struct amdgpu_device *adev = ip_block->adev; int i, j; @@ -1383,7 +1420,7 @@ static void vcn_v5_0_print_ip_state(struct amdgpu_ip_block *ip_block, struct drm } } -static void vcn_v5_0_dump_ip_state(struct amdgpu_ip_block *ip_block) +void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block) { struct amdgpu_device *adev = ip_block->adev; int i, j; @@ -1423,9 +1460,9 @@ static const struct amd_ip_funcs vcn_v5_0_0_ip_funcs = { .is_idle = vcn_v5_0_0_is_idle, .wait_for_idle = vcn_v5_0_0_wait_for_idle, .set_clockgating_state = vcn_v5_0_0_set_clockgating_state, - .set_powergating_state = vcn_v5_0_0_set_powergating_state, - .dump_ip_state = vcn_v5_0_dump_ip_state, - .print_ip_state = vcn_v5_0_print_ip_state, + .set_powergating_state = vcn_set_powergating_state, + .dump_ip_state = vcn_v5_0_0_dump_ip_state, + .print_ip_state = vcn_v5_0_0_print_ip_state, }; const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h index 51bbccd4360f..b8927652bc50 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_0.h @@ -32,6 +32,11 @@ #define VCN_VID_IP_ADDRESS 0x0 #define VCN_AON_IP_ADDRESS 0x30000 +void vcn_v5_0_0_alloc_ip_dump(struct amdgpu_device *adev); +void vcn_v5_0_0_print_ip_state(struct amdgpu_ip_block *ip_block, + struct drm_printer *p); +void vcn_v5_0_0_dump_ip_state(struct amdgpu_ip_block *ip_block); + extern const struct amdgpu_ip_block_version vcn_v5_0_0_ip_block; #endif /* __VCN_V5_0_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c new file mode 100644 index 000000000000..581d8629b9d9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.c @@ -0,0 +1,1157 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_vcn.h" +#include "amdgpu_pm.h" +#include "soc15.h" +#include "soc15d.h" +#include "soc15_hw_ip.h" +#include "vcn_v2_0.h" +#include "vcn_v4_0_3.h" + +#include "vcn/vcn_5_0_0_offset.h" +#include "vcn/vcn_5_0_0_sh_mask.h" +#include "ivsrcid/vcn/irqsrcs_vcn_5_0.h" +#include "vcn_v5_0_0.h" +#include "vcn_v5_0_1.h" + +#include <drm/drm_drv.h> + +static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev); +static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev); +static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state); +static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring); + +/** + * vcn_v5_0_1_early_init - set function pointers and load microcode + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Set ring and irq function pointers + * Load microcode from filesystem + */ +static int vcn_v5_0_1_early_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, r; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + /* re-use enc ring as unified ring */ + adev->vcn.inst[i].num_enc_rings = 1; + + vcn_v5_0_1_set_unified_ring_funcs(adev); + vcn_v5_0_1_set_irq_funcs(adev); + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].set_pg_state = vcn_v5_0_1_set_pg_state; + + r = amdgpu_vcn_early_init(adev, i); + if (r) + return r; + } + + return 0; +} + +static void vcn_v5_0_1_fw_shared_init(struct amdgpu_device *adev, int inst_idx) +{ + struct amdgpu_vcn5_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + + if (fw_shared->sq.is_enabled) + return; + fw_shared->present_flag_0 = + cpu_to_le32(AMDGPU_FW_SHARED_FLAG_0_UNIFIED_QUEUE); + fw_shared->sq.is_enabled = 1; + + if (amdgpu_vcnfw_log) + amdgpu_vcn_fwlog_init(&adev->vcn.inst[inst_idx]); +} + +/** + * vcn_v5_0_1_sw_init - sw init for VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Load firmware and sw initialization + */ +static int vcn_v5_0_1_sw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + + /* VCN UNIFIED TRAP */ + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, + VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst->irq); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + vcn_inst = GET_INST(VCN, i); + + r = amdgpu_vcn_sw_init(adev, i); + if (r) + return r; + + amdgpu_vcn_setup_ucode(adev, i); + + r = amdgpu_vcn_resume(adev, i); + if (r) + return r; + + ring = &adev->vcn.inst[i].ring_enc[0]; + ring->use_doorbell = true; + ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 9 * vcn_inst; + + ring->vm_hub = AMDGPU_MMHUB0(adev->vcn.inst[i].aid_id); + sprintf(ring->name, "vcn_unified_%d", adev->vcn.inst[i].aid_id); + + r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0, + AMDGPU_RING_PRIO_DEFAULT, &adev->vcn.inst[i].sched_score); + if (r) + return r; + + vcn_v5_0_1_fw_shared_init(adev, i); + } + + /* TODO: Add queue reset mask when FW fully supports it */ + adev->vcn.supported_reset = + amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]); + + vcn_v5_0_0_alloc_ip_dump(adev); + + return amdgpu_vcn_sysfs_reset_mask_init(adev); +} + +/** + * vcn_v5_0_1_sw_fini - sw fini for VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * VCN suspend and free up sw allocation + */ +static int vcn_v5_0_1_sw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, r, idx; + + if (drm_dev_enter(adev_to_drm(adev), &idx)) { + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->present_flag_0 = 0; + fw_shared->sq.is_enabled = 0; + } + + drm_dev_exit(idx); + } + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(adev, i); + if (r) + return r; + } + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_sw_fini(adev, i); + if (r) + return r; + } + + amdgpu_vcn_sysfs_reset_mask_fini(adev); + + kfree(adev->vcn.ip_dump); + + return 0; +} + +/** + * vcn_v5_0_1_hw_init - start and test VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Initialize the hardware, boot up the VCPU and do some testing + */ +static int vcn_v5_0_1_hw_init(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + struct amdgpu_ring *ring; + int i, r, vcn_inst; + + if (RREG32_SOC15(VCN, GET_INST(VCN, 0), regVCN_RRMT_CNTL) & 0x100) + adev->vcn.caps |= AMDGPU_VCN_CAPS(RRMT_ENABLED); + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + vcn_inst = GET_INST(VCN, i); + ring = &adev->vcn.inst[i].ring_enc[0]; + + if (ring->use_doorbell) + adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell, + ((adev->doorbell_index.vcn.vcn_ring0_1 << 1) + + 9 * vcn_inst), + adev->vcn.inst[i].aid_id); + + /* Re-init fw_shared, if required */ + vcn_v5_0_1_fw_shared_init(adev, i); + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + } + + return 0; +} + +/** + * vcn_v5_0_1_hw_fini - stop the hardware block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Stop the VCN block, mark ring as not ready any more + */ +static int vcn_v5_0_1_hw_fini(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + cancel_delayed_work_sync(&adev->vcn.inst[i].idle_work); + if (vinst->cur_state != AMD_PG_STATE_GATE) + vinst->set_pg_state(vinst, AMD_PG_STATE_GATE); + } + + return 0; +} + +/** + * vcn_v5_0_1_suspend - suspend VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * HW fini and suspend VCN block + */ +static int vcn_v5_0_1_suspend(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r, i; + + r = vcn_v5_0_1_hw_fini(ip_block); + if (r) + return r; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + r = amdgpu_vcn_suspend(ip_block->adev, i); + if (r) + return r; + } + + return r; +} + +/** + * vcn_v5_0_1_resume - resume VCN block + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Resume firmware and hw init VCN block + */ +static int vcn_v5_0_1_resume(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int r, i; + + for (i = 0; i < adev->vcn.num_vcn_inst; i++) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + if (amdgpu_in_reset(adev)) + vinst->cur_state = AMD_PG_STATE_GATE; + + r = amdgpu_vcn_resume(ip_block->adev, i); + if (r) + return r; + } + + r = vcn_v5_0_1_hw_init(ip_block); + + return r; +} + +/** + * vcn_v5_0_1_mc_resume - memory controller programming + * + * @vinst: VCN instance + * + * Let the VCN memory controller know it's offsets + */ +static void vcn_v5_0_1_mc_resume(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int inst = vinst->inst; + uint32_t offset, size, vcn_inst; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst].fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + vcn_inst = GET_INST(VCN, inst); + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, 0); + offset = 0; + } else { + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr)); + offset = size; + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); + } + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE0, size); + + /* cache window 1: stack */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET1, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + + /* cache window 2: context */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_OFFSET2, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); + + /* non-cache window */ + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_OFFSET0, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_VCPU_NONCACHE_SIZE0, + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared))); +} + +/** + * vcn_v5_0_1_mc_resume_dpg_mode - memory controller programming for dpg mode + * + * @vinst: VCN instance + * @indirect: indirectly write sram + * + * Let the VCN memory controller know it's offsets with dpg mode + */ +static void vcn_v5_0_1_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) +{ + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; + uint32_t offset, size; + const struct common_firmware_header *hdr; + + hdr = (const struct common_firmware_header *)adev->vcn.inst[inst_idx].fw->data; + size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_lo), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + + inst_idx].tmr_mc_addr_hi), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect); + } + offset = 0; + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect); + offset = size; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET0), + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect); + } + + if (!indirect) + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), size, 0, indirect); + else + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE0), 0, 0, indirect); + + /* cache window 1: stack */ + if (!indirect) { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } else { + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect); + } + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect); + + /* cache window 2: context */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + + AMDGPU_VCN_STACK_SIZE), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect); + + /* non-cache window */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_LOW), + lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH), + upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_NONCACHE_SIZE0), + AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_vcn5_fw_shared)), 0, indirect); + + /* VCN global tiling registers */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect); +} + +/** + * vcn_v5_0_1_disable_clock_gating - disable VCN clock gating + * + * @vinst: VCN instance + * + * Disable clock gating for VCN block + */ +static void vcn_v5_0_1_disable_clock_gating(struct amdgpu_vcn_inst *vinst) +{ +} + +/** + * vcn_v5_0_1_enable_clock_gating - enable VCN clock gating + * + * @vinst: VCN instance + * + * Enable clock gating for VCN block + */ +static void vcn_v5_0_1_enable_clock_gating(struct amdgpu_vcn_inst *vinst) +{ +} + +/** + * vcn_v5_0_1_start_dpg_mode - VCN start with dpg mode + * + * @vinst: VCN instance + * @indirect: indirectly write sram + * + * Start VCN block with dpg mode + */ +static int vcn_v5_0_1_start_dpg_mode(struct amdgpu_vcn_inst *vinst, + bool indirect) +{ + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; + volatile struct amdgpu_vcn5_fw_shared *fw_shared = + adev->vcn.inst[inst_idx].fw_shared.cpu_addr; + struct amdgpu_ring *ring; + int vcn_inst; + uint32_t tmp; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* disable register anti-hang mechanism */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 1, + ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_POWER_STATUS, tmp); + + if (indirect) { + adev->vcn.inst[inst_idx].dpg_sram_curr_addr = + (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr; + /* Use dummy register 0xDEADBEEF passing AID selection to PSP FW */ + WREG32_SOC24_DPG_MODE(inst_idx, 0xDEADBEEF, + adev->vcn.inst[inst_idx].aid_id, 0, true); + } + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK | UVD_VCPU_CNTL__BLK_RST_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* disable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), 0, 0, indirect); + + /* setup regUVD_LMI_CTRL */ + tmp = (UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + 0x00100000L); + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL), tmp, 0, indirect); + + vcn_v5_0_1_mc_resume_dpg_mode(vinst, indirect); + + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_VCPU_CNTL), tmp, 0, indirect); + + /* enable LMI MC and UMC channels */ + tmp = 0x1f << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT; + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_LMI_CTRL2), tmp, 0, indirect); + + /* enable master interrupt */ + WREG32_SOC24_DPG_MODE(inst_idx, SOC24_DPG_MODE_OFFSET( + VCN, 0, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect); + + if (indirect) + amdgpu_vcn_psp_update_sram(adev, inst_idx, AMDGPU_UCODE_ID_VCN0_RAM); + + ring = &adev->vcn.inst[inst_idx].ring_enc[0]; + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / sizeof(uint32_t)); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + + return 0; +} + +/** + * vcn_v5_0_1_start - VCN start + * + * @vinst: VCN instance + * + * Start VCN block + */ +static int vcn_v5_0_1_start(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + struct amdgpu_ring *ring; + uint32_t tmp; + int j, k, r, vcn_inst; + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + return vcn_v5_0_1_start_dpg_mode(vinst, adev->vcn.inst[i].indirect_sram); + + vcn_inst = GET_INST(VCN, i); + + /* set VCN status busy */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, tmp); + + /* enable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK); + + /* disable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), 0, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* enable LMI MC and UMC channels */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_LMI_CTRL2), 0, + ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* setup regUVD_LMI_CTRL */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL); + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); + + vcn_v5_0_1_mc_resume(vinst); + + /* VCN global tiling registers */ + WREG32_SOC15(VCN, vcn_inst, regUVD_GFX10_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + + /* unblock VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), 0, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* release VCPU reset to boot */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + for (j = 0; j < 10; ++j) { + uint32_t status; + + for (k = 0; k < 100; ++k) { + status = RREG32_SOC15(VCN, vcn_inst, regUVD_STATUS); + if (status & 2) + break; + mdelay(100); + if (amdgpu_emu_mode == 1) + msleep(20); + } + + if (amdgpu_emu_mode == 1) { + r = -1; + if (status & 2) { + r = 0; + break; + } + } else { + r = 0; + if (status & 2) + break; + + dev_err(adev->dev, + "VCN[%d] is not responding, trying to reset the VCPU!!!\n", i); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + mdelay(10); + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + mdelay(10); + r = -1; + } + } + + if (r) { + dev_err(adev->dev, "VCN[%d] is not responding, giving up!!!\n", i); + return r; + } + + /* enable master interrupt */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_MASTINT_EN), + UVD_MASTINT_EN__VCPU_EN_MASK, + ~UVD_MASTINT_EN__VCPU_EN_MASK); + + /* clear the busy bit of VCN_STATUS */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_STATUS), 0, + ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + + ring = &adev->vcn.inst[i].ring_enc[0]; + + WREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL, + ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT | + VCN_RB1_DB_CTRL__EN_MASK); + + /* Read DB_CTRL to flush the write DB_CTRL command. */ + RREG32_SOC15(VCN, vcn_inst, regVCN_RB1_DB_CTRL); + + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_SIZE, ring->ring_size / 4); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp &= ~(VCN_RB_ENABLE__RB1_EN_MASK); + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode |= FW_QUEUE_RING_RESET; + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR, 0); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, 0); + + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_RPTR); + WREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR, tmp); + ring->wptr = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + + tmp = RREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE); + tmp |= VCN_RB_ENABLE__RB1_EN_MASK; + WREG32_SOC15(VCN, vcn_inst, regVCN_RB_ENABLE, tmp); + fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF); + + return 0; +} + +/** + * vcn_v5_0_1_stop_dpg_mode - VCN stop with dpg mode + * + * @vinst: VCN instance + * + * Stop VCN block with dpg mode + */ +static void vcn_v5_0_1_stop_dpg_mode(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int inst_idx = vinst->inst; + uint32_t tmp; + int vcn_inst; + + vcn_inst = GET_INST(VCN, inst_idx); + + /* Wait for power status to be 1 */ + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_POWER_STATUS, 1, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_RB_RPTR, tmp, 0xFFFFFFFF); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); +} + +/** + * vcn_v5_0_1_stop - VCN stop + * + * @vinst: VCN instance + * + * Stop VCN block + */ +static int vcn_v5_0_1_stop(struct amdgpu_vcn_inst *vinst) +{ + struct amdgpu_device *adev = vinst->adev; + int i = vinst->inst; + volatile struct amdgpu_vcn5_fw_shared *fw_shared; + uint32_t tmp; + int r = 0, vcn_inst; + + vcn_inst = GET_INST(VCN, i); + + fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr; + fw_shared->sq.queue_mode |= FW_QUEUE_DPG_HOLD_OFF; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + vcn_v5_0_1_stop_dpg_mode(vinst); + return 0; + } + + /* wait for vcn idle */ + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_STATUS, UVD_STATUS__IDLE, 0x7); + if (r) + return r; + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* disable LMI UMC channel */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2); + tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_LMI_CTRL2, tmp); + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + r = SOC15_WAIT_ON_RREG(VCN, vcn_inst, regUVD_LMI_STATUS, tmp, tmp); + if (r) + return r; + + /* block VCPU register access */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_RB_ARB_CTRL), + UVD_RB_ARB_CTRL__VCPU_DIS_MASK, + ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK); + + /* reset VCPU */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), + UVD_VCPU_CNTL__BLK_RST_MASK, + ~UVD_VCPU_CNTL__BLK_RST_MASK); + + /* disable VCPU clock */ + WREG32_P(SOC15_REG_OFFSET(VCN, vcn_inst, regUVD_VCPU_CNTL), 0, + ~(UVD_VCPU_CNTL__CLK_EN_MASK)); + + /* apply soft reset */ + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + tmp = RREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET); + tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + WREG32_SOC15(VCN, vcn_inst, regUVD_SOFT_RESET, tmp); + + /* clear status */ + WREG32_SOC15(VCN, vcn_inst, regUVD_STATUS, 0); + + return 0; +} + +/** + * vcn_v5_0_1_unified_ring_get_rptr - get unified read pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified read pointer + */ +static uint64_t vcn_v5_0_1_unified_ring_get_rptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_RPTR); +} + +/** + * vcn_v5_0_1_unified_ring_get_wptr - get unified write pointer + * + * @ring: amdgpu_ring pointer + * + * Returns the current hardware unified write pointer + */ +static uint64_t vcn_v5_0_1_unified_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) + return *ring->wptr_cpu_addr; + else + return RREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR); +} + +/** + * vcn_v5_0_1_unified_ring_set_wptr - set enc write pointer + * + * @ring: amdgpu_ring pointer + * + * Commits the enc write pointer to the hardware + */ +static void vcn_v5_0_1_unified_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring != &adev->vcn.inst[ring->me].ring_enc[0]) + DRM_ERROR("wrong ring id is identified in %s", __func__); + + if (ring->use_doorbell) { + *ring->wptr_cpu_addr = lower_32_bits(ring->wptr); + WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); + } else { + WREG32_SOC15(VCN, GET_INST(VCN, ring->me), regUVD_RB_WPTR, + lower_32_bits(ring->wptr)); + } +} + +static const struct amdgpu_ring_funcs vcn_v5_0_1_unified_ring_vm_funcs = { + .type = AMDGPU_RING_TYPE_VCN_ENC, + .align_mask = 0x3f, + .nop = VCN_ENC_CMD_NO_OP, + .get_rptr = vcn_v5_0_1_unified_ring_get_rptr, + .get_wptr = vcn_v5_0_1_unified_ring_get_wptr, + .set_wptr = vcn_v5_0_1_unified_ring_set_wptr, + .emit_frame_size = SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + + 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */ + 5 + + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */ + 1, /* vcn_v2_0_enc_ring_insert_end */ + .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */ + .emit_ib = vcn_v2_0_enc_ring_emit_ib, + .emit_fence = vcn_v2_0_enc_ring_emit_fence, + .emit_vm_flush = vcn_v4_0_3_enc_ring_emit_vm_flush, + .emit_hdp_flush = vcn_v4_0_3_ring_emit_hdp_flush, + .test_ring = amdgpu_vcn_enc_ring_test_ring, + .test_ib = amdgpu_vcn_unified_ring_test_ib, + .insert_nop = amdgpu_ring_insert_nop, + .insert_end = vcn_v2_0_enc_ring_insert_end, + .pad_ib = amdgpu_ring_generic_pad_ib, + .begin_use = amdgpu_vcn_ring_begin_use, + .end_use = amdgpu_vcn_ring_end_use, + .emit_wreg = vcn_v4_0_3_enc_ring_emit_wreg, + .emit_reg_wait = vcn_v4_0_3_enc_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + +/** + * vcn_v5_0_1_set_unified_ring_funcs - set unified ring functions + * + * @adev: amdgpu_device pointer + * + * Set unified ring functions + */ +static void vcn_v5_0_1_set_unified_ring_funcs(struct amdgpu_device *adev) +{ + int i, vcn_inst; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + adev->vcn.inst[i].ring_enc[0].funcs = &vcn_v5_0_1_unified_ring_vm_funcs; + adev->vcn.inst[i].ring_enc[0].me = i; + vcn_inst = GET_INST(VCN, i); + adev->vcn.inst[i].aid_id = vcn_inst / adev->vcn.num_inst_per_aid; + } +} + +/** + * vcn_v5_0_1_is_idle - check VCN block is idle + * + * @ip_block: Pointer to the amdgpu_ip_block structure + * + * Check whether VCN block is idle + */ +static bool vcn_v5_0_1_is_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, ret = 1; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + ret &= (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) == UVD_STATUS__IDLE); + + return ret; +} + +/** + * vcn_v5_0_1_wait_for_idle - wait for VCN block idle + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * + * Wait for VCN block idle + */ +static int vcn_v5_0_1_wait_for_idle(struct amdgpu_ip_block *ip_block) +{ + struct amdgpu_device *adev = ip_block->adev; + int i, ret = 0; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + ret = SOC15_WAIT_ON_RREG(VCN, GET_INST(VCN, i), regUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE); + if (ret) + return ret; + } + + return ret; +} + +/** + * vcn_v5_0_1_set_clockgating_state - set VCN block clockgating state + * + * @ip_block: Pointer to the amdgpu_ip_block for this hw instance. + * @state: clock gating state + * + * Set VCN block clockgating state + */ +static int vcn_v5_0_1_set_clockgating_state(struct amdgpu_ip_block *ip_block, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = ip_block->adev; + bool enable = state == AMD_CG_STATE_GATE; + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) { + struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i]; + + if (enable) { + if (RREG32_SOC15(VCN, GET_INST(VCN, i), regUVD_STATUS) != UVD_STATUS__IDLE) + return -EBUSY; + vcn_v5_0_1_enable_clock_gating(vinst); + } else { + vcn_v5_0_1_disable_clock_gating(vinst); + } + } + + return 0; +} + +static int vcn_v5_0_1_set_pg_state(struct amdgpu_vcn_inst *vinst, + enum amd_powergating_state state) +{ + int ret = 0; + + if (state == vinst->cur_state) + return 0; + + if (state == AMD_PG_STATE_GATE) + ret = vcn_v5_0_1_stop(vinst); + else + ret = vcn_v5_0_1_start(vinst); + + if (!ret) + vinst->cur_state = state; + + return ret; +} + +/** + * vcn_v5_0_1_process_interrupt - process VCN block interrupt + * + * @adev: amdgpu_device pointer + * @source: interrupt sources + * @entry: interrupt entry from clients and sources + * + * Process VCN block interrupt + */ +static int vcn_v5_0_1_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t i, inst; + + i = node_id_to_phys_map[entry->node_id]; + + DRM_DEV_DEBUG(adev->dev, "IH: VCN TRAP\n"); + + for (inst = 0; inst < adev->vcn.num_vcn_inst; ++inst) + if (adev->vcn.inst[inst].aid_id == i) + break; + if (inst >= adev->vcn.num_vcn_inst) { + dev_WARN_ONCE(adev->dev, 1, + "Interrupt received for unknown VCN instance %d", + entry->node_id); + return 0; + } + + switch (entry->src_id) { + case VCN_5_0__SRCID__UVD_ENC_GENERAL_PURPOSE: + amdgpu_fence_process(&adev->vcn.inst[inst].ring_enc[0]); + break; + default: + DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n", + entry->src_id, entry->src_data[0]); + break; + } + + return 0; +} + +static const struct amdgpu_irq_src_funcs vcn_v5_0_1_irq_funcs = { + .process = vcn_v5_0_1_process_interrupt, +}; + +/** + * vcn_v5_0_1_set_irq_funcs - set VCN block interrupt irq functions + * + * @adev: amdgpu_device pointer + * + * Set VCN block interrupt irq functions + */ +static void vcn_v5_0_1_set_irq_funcs(struct amdgpu_device *adev) +{ + int i; + + for (i = 0; i < adev->vcn.num_vcn_inst; ++i) + adev->vcn.inst->irq.num_types++; + adev->vcn.inst->irq.funcs = &vcn_v5_0_1_irq_funcs; +} + +static const struct amd_ip_funcs vcn_v5_0_1_ip_funcs = { + .name = "vcn_v5_0_1", + .early_init = vcn_v5_0_1_early_init, + .late_init = NULL, + .sw_init = vcn_v5_0_1_sw_init, + .sw_fini = vcn_v5_0_1_sw_fini, + .hw_init = vcn_v5_0_1_hw_init, + .hw_fini = vcn_v5_0_1_hw_fini, + .suspend = vcn_v5_0_1_suspend, + .resume = vcn_v5_0_1_resume, + .is_idle = vcn_v5_0_1_is_idle, + .wait_for_idle = vcn_v5_0_1_wait_for_idle, + .check_soft_reset = NULL, + .pre_soft_reset = NULL, + .soft_reset = NULL, + .post_soft_reset = NULL, + .set_clockgating_state = vcn_v5_0_1_set_clockgating_state, + .set_powergating_state = vcn_set_powergating_state, + .dump_ip_state = vcn_v5_0_0_dump_ip_state, + .print_ip_state = vcn_v5_0_0_print_ip_state, +}; + +const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block = { + .type = AMD_IP_BLOCK_TYPE_VCN, + .major = 5, + .minor = 0, + .rev = 1, + .funcs = &vcn_v5_0_1_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h new file mode 100644 index 000000000000..8fd90bd10807 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v5_0_1.h @@ -0,0 +1,32 @@ +/* + * Copyright 2024 Advanced Micro Devices, Inc. All rights reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VCN_v5_0_1_H__ +#define __VCN_v5_0_1_H__ + +#define regVCN_RRMT_CNTL 0x0940 +#define regVCN_RRMT_CNTL_BASE_IDX 1 + +extern const struct amdgpu_ip_block_version vcn_v5_0_1_ip_block; + +#endif /* __VCN_v5_0_1_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 0fedadd0a6a4..eb16916c6473 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -364,9 +364,8 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, * this should allow us to catchup. */ tmp = (wptr + 32) & ih->ptr_mask; - dev_warn(adev->dev, "IH ring buffer overflow " - "(0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, tmp); + dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp); ih->rptr = tmp; tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); @@ -556,7 +555,7 @@ static int vega10_ih_resume(struct amdgpu_ip_block *ip_block) return vega10_ih_hw_init(ip_block); } -static bool vega10_ih_is_idle(void *handle) +static bool vega10_ih_is_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return true; @@ -605,10 +604,10 @@ static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev, } } -static int vega10_ih_set_clockgating_state(void *handle, +static int vega10_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; vega10_ih_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -616,7 +615,7 @@ static int vega10_ih_set_clockgating_state(void *handle, } -static int vega10_ih_set_powergating_state(void *handle, +static int vega10_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h index 8de4ccce5e38..3ca8a417c6d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h +++ b/drivers/gpu/drm/amd/amdgpu/vega10_sdma_pkt_open.h @@ -64,6 +64,9 @@ #define HEADER_BARRIER 5 #define SDMA_OP_AQL_COPY 0 #define SDMA_OP_AQL_BARRIER_OR 0 +/* vm invalidation is only available for GC9.4.3/GC9.4.4/GC9.5.0 */ +#define SDMA_OP_VM_INVALIDATE 8 +#define SDMA_SUBOP_VM_INVALIDATE 4 /*define for op field*/ #define SDMA_PKT_HEADER_op_offset 0 @@ -3331,5 +3334,72 @@ #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift 0 #define SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_COMPLETION_SIGNAL_63_32(x) (((x) & SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_mask) << SDMA_AQL_PKT_BARRIER_OR_COMPLETION_SIGNAL_HI_completion_signal_63_32_shift) +/* +** Definitions for SDMA_PKT_VM_INVALIDATION packet +*/ + +/*define for HEADER word*/ +/*define for op field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_mask 0x000000FF +#define SDMA_PKT_VM_INVALIDATION_HEADER_op_shift 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_op_shift) + +/*define for sub_op field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask 0x000000FF +#define SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift 8 +#define SDMA_PKT_VM_INVALIDATION_HEADER_SUB_OP(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_sub_op_shift) + +/*define for xcc0_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift 16 +#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC0_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc0_eng_id_shift) + +/*define for xcc1_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift 21 +#define SDMA_PKT_VM_INVALIDATION_HEADER_XCC1_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_xcc1_eng_id_shift) + +/*define for mmhub_eng_id field*/ +#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_offset 0 +#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift 26 +#define SDMA_PKT_VM_INVALIDATION_HEADER_MMHUB_ENG_ID(x) ((x & SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_mask) << SDMA_PKT_VM_INVALIDATION_HEADER_mmhub_eng_id_shift) + +/*define for INVALIDATEREQ word*/ +/*define for invalidatereq field*/ +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_offset 1 +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask 0xFFFFFFFF +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift 0 +#define SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_INVALIDATEREQ(x) ((x & SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_mask) << SDMA_PKT_VM_INVALIDATION_INVALIDATEREQ_invalidatereq_shift) + +/*define for ADDRESSRANGELO word*/ +/*define for addressrangelo field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_offset 2 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask 0xFFFFFFFF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift 0 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_ADDRESSRANGELO(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGELO_addressrangelo_shift) + +/*define for ADDRESSRANGEHI word*/ +/*define for invalidateack field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask 0x0000FFFF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift 0 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_INVALIDATEACK(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_invalidateack_shift) + +/*define for addressrangehi field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask 0x0000001F +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift 16 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_ADDRESSRANGEHI(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_addressrangehi_shift) + +/*define for reserved field*/ +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_offset 3 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask 0x000001FF +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift 23 +#define SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_RESERVED(x) ((x & SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_mask) << SDMA_PKT_VM_INVALIDATION_ADDRESSRANGEHI_reserved_shift) #endif /* __SDMA_PKT_OPEN_H_ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 1c9aff742e43..faa0dd75dd6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -366,6 +366,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev) /* Enable IH Retry CAM */ if (amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 0) || amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 2) || + amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 4) || amdgpu_ip_version(adev, OSSSYS_HWIP, 0) == IP_VERSION(4, 4, 5)) WREG32_FIELD15(OSSSYS, 0, IH_RETRY_INT_CAM_CNTL_ALDEBARAN, ENABLE, 1); @@ -443,9 +444,8 @@ static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, * this should allow us to catchup. */ tmp = (wptr + 32) & ih->ptr_mask; - dev_warn(adev->dev, "IH ring buffer overflow " - "(0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, tmp); + dev_warn_ratelimited(adev->dev, "%s ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + amdgpu_ih_ring_name(adev, ih), wptr, ih->rptr, tmp); ih->rptr = tmp; tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); @@ -651,7 +651,7 @@ static int vega20_ih_resume(struct amdgpu_ip_block *ip_block) return vega20_ih_hw_init(ip_block); } -static bool vega20_ih_is_idle(void *handle) +static bool vega20_ih_is_idle(struct amdgpu_ip_block *ip_block) { /* todo */ return true; @@ -697,10 +697,10 @@ static void vega20_ih_update_clockgating_state(struct amdgpu_device *adev, } } -static int vega20_ih_set_clockgating_state(void *handle, +static int vega20_ih_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; vega20_ih_update_clockgating_state(adev, state == AMD_CG_STATE_GATE); @@ -708,7 +708,7 @@ static int vega20_ih_set_clockgating_state(void *handle, } -static int vega20_ih_set_powergating_state(void *handle, +static int vega20_ih_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index a83505815d39..86d8bc10d90a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -167,16 +167,16 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] = { { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 3, }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 5, }, { @@ -188,9 +188,9 @@ static const struct amdgpu_video_codec_info tonga_video_codecs_decode_array[] = }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 4, }, }; @@ -206,16 +206,16 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = { { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 3, }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 5, }, { @@ -227,9 +227,9 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = }, { .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, + .max_width = 1920, + .max_height = 1088, + .max_pixels_per_frame = 1920 * 1088, .max_level = 4, }, { @@ -239,13 +239,6 @@ static const struct amdgpu_video_codec_info cz_video_codecs_decode_array[] = .max_pixels_per_frame = 4096 * 4096, .max_level = 186, }, - { - .codec_type = AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, - .max_width = 4096, - .max_height = 4096, - .max_pixels_per_frame = 4096 * 4096, - .max_level = 0, - }, }; static const struct amdgpu_video_codecs cz_video_codecs_decode = @@ -1736,7 +1729,7 @@ static int vi_common_resume(struct amdgpu_ip_block *ip_block) return vi_common_hw_init(ip_block); } -static bool vi_common_is_idle(void *handle) +static bool vi_common_is_idle(struct amdgpu_ip_block *ip_block) { return true; } @@ -1945,10 +1938,10 @@ static int vi_common_set_clockgating_state_by_smu(void *handle, return 0; } -static int vi_common_set_clockgating_state(void *handle, +static int vi_common_set_clockgating_state(struct amdgpu_ip_block *ip_block, enum amd_clockgating_state state) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; if (amdgpu_sriov_vf(adev)) return 0; @@ -1988,15 +1981,15 @@ static int vi_common_set_clockgating_state(void *handle, return 0; } -static int vi_common_set_powergating_state(void *handle, +static int vi_common_set_powergating_state(struct amdgpu_ip_block *ip_block, enum amd_powergating_state state) { return 0; } -static void vi_common_get_clockgating_state(void *handle, u64 *flags) +static void vi_common_get_clockgating_state(struct amdgpu_ip_block *ip_block, u64 *flags) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + struct amdgpu_device *adev = ip_block->adev; int data; if (amdgpu_sriov_vf(adev)) |