summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVitaly Prosyak <vitaly.prosyak@amd.com>2025-05-06 16:45:33 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2025-06-19 15:40:41 +0200
commitc29a9dc44cbd53156c85dce671b1ac8dabf9eb1b (patch)
treedb4ba66ab8840adf0191e17b0a3df6a2c7be2486
parent620ab4d6215de0b25227f9fff1a8c7fb66837cb8 (diff)
drm/amdgpu/gfx10: Refine Cleaner Shader for GFX10.1.10
[ Upstream commit d26625d034fb8d596f0488472969493fa02d03f3 ] This patch updates the cleaner shader, which is responsible for initializing GPU resources such as Local Data Share (LDS), Vector General Purpose Registers (VGPRs), and Scalar General Purpose Registers (SGPRs). Changes include adjustments to register clearing and shader configuration. - Updated GPU resource initialization addresses in the cleaner shader from `be803080` to `be803000`. - Simplified the logic in the SGPR clearing section, ensuring all SGPRs are set to zero. Fixes: 25961bad9212 ("drm/amdgpu/gfx10: Add cleaner shader for GFX10.1.10") Cc: Christian König <christian.koenig@amd.com> Cc: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Manu Rastogi <manu.rastogi@amd.com> Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com> Acked-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm13
2 files changed, 9 insertions, 10 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
index 5255378af53c..f67569ccf9f6 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0_cleaner_shader.h
@@ -43,9 +43,9 @@ static const u32 gfx_10_1_10_cleaner_shader_hex[] = {
0xd70f6a01, 0x000202ff,
0x00000400, 0x80828102,
0xbf84fff7, 0xbefc03ff,
- 0x00000068, 0xbe803080,
- 0xbe813080, 0xbe823080,
- 0xbe833080, 0x80fc847c,
+ 0x00000068, 0xbe803000,
+ 0xbe813000, 0xbe823000,
+ 0xbe833000, 0x80fc847c,
0xbf84fffa, 0xbeea0480,
0xbeec0480, 0xbeee0480,
0xbef00480, 0xbef20480,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
index 9ba3359253c9..54f7ed9e2801 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_1_10_cleaner_shader.asm
@@ -40,7 +40,6 @@ shader main
type(CS)
wave_size(32)
// Note: original source code from SQ team
-
//
// Create 32 waves in a threadgroup (CS waves)
// Each allocates 64 VGPRs
@@ -71,8 +70,8 @@ label_0005:
s_sub_u32 s2, s2, 8
s_cbranch_scc0 label_0005
//
- s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
- s_and_b32 s2, s2, s0 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
+ s_mov_b32 s2, 0x80000000 // Bit31 is first_wave
+ s_and_b32 s2, s2, s1 // sgpr0 has tg_size (first_wave) term as in ucode only COMPUTE_PGM_RSRC2.tg_size_en is set
s_cbranch_scc0 label_0023 // Clean LDS if its first wave of ThreadGroup/WorkGroup
// CLEAR LDS
//
@@ -99,10 +98,10 @@ label_001F:
label_0023:
s_mov_b32 m0, 0x00000068 // Loop 108/4=27 times (loop unrolled for performance)
label_sgpr_loop:
- s_movreld_b32 s0, 0
- s_movreld_b32 s1, 0
- s_movreld_b32 s2, 0
- s_movreld_b32 s3, 0
+ s_movreld_b32 s0, s0
+ s_movreld_b32 s1, s0
+ s_movreld_b32 s2, s0
+ s_movreld_b32 s3, s0
s_sub_u32 m0, m0, 4
s_cbranch_scc0 label_sgpr_loop