diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 313 | 
1 files changed, 308 insertions, 5 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index b2841195bd3b..6c76bb2a6843 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -29,6 +29,7 @@  #include "amdgpu.h"  #include "amdgpu_psp.h"  #include "ta_ras_if.h" +#include "amdgpu_ras_eeprom.h"  enum amdgpu_ras_block {  	AMDGPU_RAS_BLOCK__UMC = 0, @@ -52,6 +53,236 @@ enum amdgpu_ras_block {  #define AMDGPU_RAS_BLOCK_COUNT	AMDGPU_RAS_BLOCK__LAST  #define AMDGPU_RAS_BLOCK_MASK	((1ULL << AMDGPU_RAS_BLOCK_COUNT) - 1) +enum amdgpu_ras_gfx_subblock { +	/* CPC */ +	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START = 0, +	AMDGPU_RAS_BLOCK__GFX_CPC_SCRATCH = +		AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_CPC_UCODE, +	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME1, +	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME1, +	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME1, +	AMDGPU_RAS_BLOCK__GFX_DC_STATE_ME2, +	AMDGPU_RAS_BLOCK__GFX_DC_CSINVOC_ME2, +	AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, +	AMDGPU_RAS_BLOCK__GFX_CPC_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_DC_RESTORE_ME2, +	/* CPF */ +	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME2 = +		AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_CPF_ROQ_ME1, +	AMDGPU_RAS_BLOCK__GFX_CPF_TAG, +	AMDGPU_RAS_BLOCK__GFX_CPF_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPF_TAG, +	/* CPG */ +	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_ROQ = +		AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_CPG_DMA_TAG, +	AMDGPU_RAS_BLOCK__GFX_CPG_TAG, +	AMDGPU_RAS_BLOCK__GFX_CPG_INDEX_END = AMDGPU_RAS_BLOCK__GFX_CPG_TAG, +	/* GDS */ +	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_GDS_MEM = AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_GDS_INPUT_QUEUE, +	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_CMD_RAM_MEM, +	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PHY_DATA_RAM_MEM, +	AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, +	AMDGPU_RAS_BLOCK__GFX_GDS_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_GDS_OA_PIPE_MEM, +	/* SPI */ +	AMDGPU_RAS_BLOCK__GFX_SPI_SR_MEM, +	/* SQ */ +	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_SQ_SGPR = AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_D, +	AMDGPU_RAS_BLOCK__GFX_SQ_LDS_I, +	AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, +	AMDGPU_RAS_BLOCK__GFX_SQ_INDEX_END = AMDGPU_RAS_BLOCK__GFX_SQ_VGPR, +	/* SQC (3 ranges) */ +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, +	/* SQC range 0 */ +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START = +		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_UTCL1_LFIFO = +		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_WRITE_DATA_BUF, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU0_UTCL1_LFIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_WRITE_DATA_BUF, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU1_UTCL1_LFIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_WRITE_DATA_BUF, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX0_END = +		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_CU2_UTCL1_LFIFO, +	/* SQC range 1 */ +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_TAG_RAM = +		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_UTCL1_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKA_BANK_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_TAG_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_HIT_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_DIRTY_BIT_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX1_END = +		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKA_BANK_RAM, +	/* SQC range 2 */ +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_TAG_RAM = +		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_UTCL1_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_INST_BANKB_BANK_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_TAG_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_HIT_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_MISS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_DIRTY_BIT_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END = +		AMDGPU_RAS_BLOCK__GFX_SQC_DATA_BANKB_BANK_RAM, +	AMDGPU_RAS_BLOCK__GFX_SQC_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_SQC_INDEX2_END, +	/* TA */ +	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TA_FS_DFIFO = +		AMDGPU_RAS_BLOCK__GFX_TA_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TA_FS_AFIFO, +	AMDGPU_RAS_BLOCK__GFX_TA_FL_LFIFO, +	AMDGPU_RAS_BLOCK__GFX_TA_FX_LFIFO, +	AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, +	AMDGPU_RAS_BLOCK__GFX_TA_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TA_FS_CFIFO, +	/* TCA */ +	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TCA_HOLE_FIFO = +		AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCA_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_TCA_REQ_FIFO, +	/* TCC (5 sub-ranges) */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, +	/* TCC range 0 */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_0_1, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_0, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DATA_BANK_1_1, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_0, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_DIRTY_BANK_1, +	AMDGPU_RAS_BLOCK__GFX_TCC_HIGH_RATE_TAG, +	AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX0_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_LOW_RATE_TAG, +	/* TCC range 1 */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_DEC = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX1_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_IN_USE_TRANSFER, +	/* TCC range 2 */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_DATA = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_RETURN_CONTROL, +	AMDGPU_RAS_BLOCK__GFX_TCC_UC_ATOMIC_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_RETURN, +	AMDGPU_RAS_BLOCK__GFX_TCC_WRITE_CACHE_READ, +	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCC_SRC_FIFO_NEXT_RAM, +	AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX2_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_CACHE_TAG_PROBE_FIFO, +	/* TCC range 3 */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX3_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_LATENCY_FIFO_NEXT_RAM, +	/* TCC range 4 */ +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_WRRET_TAG_WRITE_RETURN = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_START, +	AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_ATOMIC_RETURN_BUFFER, +	AMDGPU_RAS_BLOCK__GFX_TCC_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_TCC_INDEX4_END, +	/* TCI */ +	AMDGPU_RAS_BLOCK__GFX_TCI_WRITE_RAM, +	/* TCP */ +	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TCP_CACHE_RAM = +		AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TCP_LFIFO_RAM, +	AMDGPU_RAS_BLOCK__GFX_TCP_CMD_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCP_VM_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TCP_DB_RAM, +	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO0, +	AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, +	AMDGPU_RAS_BLOCK__GFX_TCP_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_TCP_UTCL1_LFIFO1, +	/* TD */ +	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_LO = +		AMDGPU_RAS_BLOCK__GFX_TD_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_TD_SS_FIFO_HI, +	AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, +	AMDGPU_RAS_BLOCK__GFX_TD_INDEX_END = AMDGPU_RAS_BLOCK__GFX_TD_CS_FIFO, +	/* EA (3 sub-ranges) */ +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, +	/* EA range 0 */ +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START = +		AMDGPU_RAS_BLOCK__GFX_EA_INDEX_START, +	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_CMDMEM = +		AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_START, +	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_CMDMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_DATAMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_RRET_TAGMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_WRET_TAGMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_CMDMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_CMDMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX0_END = +		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_DATAMEM, +	/* EA range 1 */ +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_EA_DRAMRD_PAGEMEM = +		AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_START, +	AMDGPU_RAS_BLOCK__GFX_EA_DRAMWR_PAGEMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_IORD_CMDMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_CMDMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_IOWR_DATAMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_GMIRD_PAGEMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX1_END = +		AMDGPU_RAS_BLOCK__GFX_EA_GMIWR_PAGEMEM, +	/* EA range 2 */ +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D0MEM = +		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_START, +	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D1MEM, +	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D2MEM, +	AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END = +		AMDGPU_RAS_BLOCK__GFX_EA_MAM_D3MEM, +	AMDGPU_RAS_BLOCK__GFX_EA_INDEX_END = +		AMDGPU_RAS_BLOCK__GFX_EA_INDEX2_END, +	/* UTC VM L2 bank */ +	AMDGPU_RAS_BLOCK__UTC_VML2_BANK_CACHE, +	/* UTC VM walker */ +	AMDGPU_RAS_BLOCK__UTC_VML2_WALKER, +	/* UTC ATC L2 2MB cache */ +	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_2M_BANK, +	/* UTC ATC L2 4KB cache */ +	AMDGPU_RAS_BLOCK__UTC_ATCL2_CACHE_4K_BANK, +	AMDGPU_RAS_BLOCK__GFX_MAX +}; +  enum amdgpu_ras_error_type {  	AMDGPU_RAS_ERROR__NONE							= 0,  	AMDGPU_RAS_ERROR__PARITY						= 1, @@ -76,9 +307,6 @@ struct ras_common_if {  	char name[32];  }; -typedef int (*ras_ih_cb)(struct amdgpu_device *adev, -		struct amdgpu_iv_entry *entry); -  struct amdgpu_ras {  	/* ras infrastructure */  	/* for ras itself. */ @@ -106,10 +334,85 @@ struct amdgpu_ras {  	struct mutex recovery_lock;  	uint32_t flags; + +	struct amdgpu_ras_eeprom_control eeprom_control;  }; -/* interfaces for IP */ +struct ras_fs_data { +	char sysfs_name[32]; +	char debugfs_name[32]; +}; + +struct ras_err_data { +	unsigned long ue_count; +	unsigned long ce_count; +	unsigned long err_addr_cnt; +	uint64_t *err_addr; +}; +struct ras_err_handler_data { +	/* point to bad pages array */ +	struct { +		unsigned long bp; +		struct amdgpu_bo *bo; +	} *bps; +	/* the count of entries */ +	int count; +	/* the space can place new entries */ +	int space_left; +	/* last reserved entry's index + 1 */ +	int last_reserved; +}; + +typedef int (*ras_ih_cb)(struct amdgpu_device *adev, +		struct ras_err_data *err_data, +		struct amdgpu_iv_entry *entry); + +struct ras_ih_data { +	/* interrupt bottom half */ +	struct work_struct ih_work; +	int inuse; +	/* IP callback */ +	ras_ih_cb cb; +	/* full of entries */ +	unsigned char *ring; +	unsigned int ring_size; +	unsigned int element_size; +	unsigned int aligned_element_size; +	unsigned int rptr; +	unsigned int wptr; +}; + +struct ras_manager { +	struct ras_common_if head; +	/* reference count */ +	int use; +	/* ras block link */ +	struct list_head node; +	/* the device */ +	struct amdgpu_device *adev; +	/* debugfs */ +	struct dentry *ent; +	/* sysfs */ +	struct device_attribute sysfs_attr; +	int attr_inuse; + +	/* fs node name */ +	struct ras_fs_data fs_data; + +	/* IH data */ +	struct ras_ih_data ih_data; + +	struct ras_err_data err_data; +}; + +struct ras_badpage { +	unsigned int bp; +	unsigned int size; +	unsigned int flags; +}; + +/* interfaces for IP */  struct ras_fs_if {  	struct ras_common_if head;  	char sysfs_name[32]; @@ -184,7 +487,7 @@ int amdgpu_ras_request_reset_on_boot(struct amdgpu_device *adev,  void amdgpu_ras_resume(struct amdgpu_device *adev);  void amdgpu_ras_suspend(struct amdgpu_device *adev); -int amdgpu_ras_query_error_count(struct amdgpu_device *adev, +unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,  		bool is_ce);  /* error handling functions */ | 
