diff options
| author | Christian König <christian.koenig@amd.com> | 2018-01-26 15:00:43 +0100 | 
|---|---|---|
| committer | Alex Deucher <alexander.deucher@amd.com> | 2018-02-19 14:19:12 -0500 | 
| commit | f732b6b3c0e62bf889702d6af2b1e5436e4e9a0a (patch) | |
| tree | 81a0dab49e09323452d2788b31d40d63c6918980 /drivers/gpu/drm/amd/amdgpu | |
| parent | 2b124b0b7085ac2216ac4703e3054963cadc7f47 (diff) | |
drm/amdgpu: move waiting for VM flush into gmc_v9_0_emit_flush_gpu_tlb
Keep that at a common place instead of spread over all engines.
Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 19 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 18 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/soc15.h | 3 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c | 20 | ||||
| -rwxr-xr-x | drivers/gpu/drm/amd/amdgpu/vce_v4_0.c | 9 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c | 20 | 
7 files changed, 33 insertions, 60 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 801d4a1dd7db..f7363f821cff 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3679,15 +3679,8 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,  					unsigned vmid, unsigned pasid,  					uint64_t pd_addr)  { -	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; -	unsigned eng = ring->vm_inv_eng; -  	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); -	/* wait for the invalidate to complete */ -	gfx_v9_0_wait_reg_mem(ring, 0, 0, 0, hub->vm_inv_eng0_ack + eng, -			      0, 1 << vmid, 1 << vmid, 0x20); -  	/* compute doesn't have PFP */  	if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {  		/* sync PFP to ME, otherwise we might get invalid PFP reads */ @@ -4295,7 +4288,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {  	.emit_frame_size = /* totally 242 maximum if 16 IBs */  		5 +  /* COND_EXEC */  		7 +  /* PIPELINE_SYNC */ -		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* VM_FLUSH */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + +		2 + /* VM_FLUSH */  		8 +  /* FENCE for VM_FLUSH */  		20 + /* GDS switch */  		4 + /* double SWITCH_BUFFER, @@ -4344,7 +4339,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {  		7 + /* gfx_v9_0_ring_emit_hdp_flush */  		5 + /* hdp invalidate */  		7 + /* gfx_v9_0_ring_emit_pipeline_sync */ -		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* gfx_v9_0_ring_emit_vm_flush */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + +		2 + /* gfx_v9_0_ring_emit_vm_flush */  		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */  	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */  	.emit_ib = gfx_v9_0_ring_emit_ib_compute, @@ -4376,7 +4373,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {  		7 + /* gfx_v9_0_ring_emit_hdp_flush */  		5 + /* hdp invalidate */  		7 + /* gfx_v9_0_ring_emit_pipeline_sync */ -		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + 9 + /* gfx_v9_0_ring_emit_vm_flush */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + +		2 + /* gfx_v9_0_ring_emit_vm_flush */  		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */  	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */  	.emit_ib = gfx_v9_0_ring_emit_ib_compute, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 4a82526652cc..0f4a9a8575a4 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -396,6 +396,10 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring,  	amdgpu_ring_emit_wreg(ring, hub->vm_inv_eng0_req + eng, req); +	/* wait for the invalidate to complete */ +	amdgpu_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, +				  1 << vmid, 1 << vmid); +  	return pd_addr;  } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index e1ae39f86adf..ce599fd24412 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1126,21 +1126,7 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,  					 unsigned vmid, unsigned pasid,  					 uint64_t pd_addr)  { -	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; -	unsigned eng = ring->vm_inv_eng; -  	amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); - -	/* wait for flush */ -	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) | -			  SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) | -			  SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* equal */ -	amdgpu_ring_write(ring, (hub->vm_inv_eng0_ack + eng) << 2); -	amdgpu_ring_write(ring, 0); -	amdgpu_ring_write(ring, 1 << vmid); /* reference */ -	amdgpu_ring_write(ring, 1 << vmid); /* mask */ -	amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) | -			  SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10));  }  static void sdma_v4_0_ring_emit_wreg(struct amdgpu_ring *ring, @@ -1589,7 +1575,9 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {  		6 + /* sdma_v4_0_ring_emit_hdp_flush */  		3 + /* hdp invalidate */  		6 + /* sdma_v4_0_ring_emit_pipeline_sync */ -		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 6 + /* sdma_v4_0_ring_emit_vm_flush */ +		/* sdma_v4_0_ring_emit_vm_flush */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 +  		10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */  	.emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */  	.emit_ib = sdma_v4_0_ring_emit_ib, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 0f63ed674d0b..f70da8a29f86 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -27,7 +27,8 @@  #include "nbio_v6_1.h"  #include "nbio_v7_0.h" -#define SOC15_FLUSH_GPU_TLB_NUM_WREG	4 +#define SOC15_FLUSH_GPU_TLB_NUM_WREG		4 +#define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT	1  extern const struct amd_ip_funcs soc15_common_ip_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index fcec0bea101d..bf16440e7258 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -1265,7 +1265,6 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,  					uint64_t pd_addr)  {  	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; -	unsigned eng = ring->vm_inv_eng;  	uint32_t data0, data1, mask;  	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); @@ -1275,12 +1274,6 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,  	data1 = lower_32_bits(pd_addr);  	mask = 0xffffffff;  	uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask); - -	/* wait for flush */ -	data0 = hub->vm_inv_eng0_ack + eng; -	data1 = 1 << vmid; -	mask =  1 << vmid; -	uvd_v7_0_ring_emit_reg_wait(ring, data0, data1, mask);  }  static void uvd_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) @@ -1313,17 +1306,12 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,  					    uint64_t pd_addr)  {  	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; -	unsigned eng = ring->vm_inv_eng;  	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);  	/* wait for reg writes */  	uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,  					lower_32_bits(pd_addr), 0xffffffff); - -	/* wait for flush */ -	uvd_v7_0_enc_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, -					1 << vmid, 1 << vmid);  }  static void uvd_v7_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, @@ -1669,7 +1657,9 @@ static const struct amdgpu_ring_funcs uvd_v7_0_ring_vm_funcs = {  	.set_wptr = uvd_v7_0_ring_set_wptr,  	.emit_frame_size =  		6 + 6 + /* hdp flush / invalidate */ -		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 16 + /* uvd_v7_0_ring_emit_vm_flush */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + +		8 + /* uvd_v7_0_ring_emit_vm_flush */  		14 + 14, /* uvd_v7_0_ring_emit_fence x2 vm fence */  	.emit_ib_size = 8, /* uvd_v7_0_ring_emit_ib */  	.emit_ib = uvd_v7_0_ring_emit_ib, @@ -1696,7 +1686,9 @@ static const struct amdgpu_ring_funcs uvd_v7_0_enc_ring_vm_funcs = {  	.set_wptr = uvd_v7_0_enc_ring_set_wptr,  	.emit_frame_size =  		3 + 3 + /* hdp flush / invalidate */ -		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 8 + /* uvd_v7_0_enc_ring_emit_vm_flush */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + +		4 + /* uvd_v7_0_enc_ring_emit_vm_flush */  		5 + 5 + /* uvd_v7_0_enc_ring_emit_fence x2 vm fence */  		1, /* uvd_v7_0_enc_ring_insert_end */  	.emit_ib_size = 5, /* uvd_v7_0_enc_ring_emit_ib */ diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 2a4f73ddea97..22c2067bd849 100755 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -979,17 +979,12 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,  				   uint64_t pd_addr)  {  	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; -	unsigned eng = ring->vm_inv_eng;  	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);  	/* wait for reg writes */  	vce_v4_0_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,  			       lower_32_bits(pd_addr), 0xffffffff); - -	/* wait for flush */ -	vce_v4_0_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, -			       1 << vmid, 1 << vmid);  }  static void vce_v4_0_emit_wreg(struct amdgpu_ring *ring, @@ -1069,7 +1064,9 @@ static const struct amdgpu_ring_funcs vce_v4_0_ring_vm_funcs = {  	.set_wptr = vce_v4_0_ring_set_wptr,  	.parse_cs = amdgpu_vce_ring_parse_cs_vm,  	.emit_frame_size = -		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 8 + /* vce_v4_0_emit_vm_flush */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + +		4 + /* vce_v4_0_emit_vm_flush */  		5 + 5 + /* amdgpu_vce_ring_emit_fence x2 vm fence */  		1, /* vce_v4_0_ring_insert_end */  	.emit_ib_size = 5, /* vce_v4_0_ring_emit_ib */ diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 294a1bfb59df..d9f597c36b63 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -863,7 +863,6 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,  					    uint64_t pd_addr)  {  	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; -	unsigned eng = ring->vm_inv_eng;  	uint32_t data0, data1, mask;  	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr); @@ -873,12 +872,6 @@ static void vcn_v1_0_dec_ring_emit_vm_flush(struct amdgpu_ring *ring,  	data1 = lower_32_bits(pd_addr);  	mask = 0xffffffff;  	vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask); - -	/* wait for flush */ -	data0 = hub->vm_inv_eng0_ack + eng; -	data1 = 1 << vmid; -	mask =  1 << vmid; -	vcn_v1_0_dec_ring_emit_reg_wait(ring, data0, data1, mask);  }  static void vcn_v1_0_dec_ring_emit_wreg(struct amdgpu_ring *ring, @@ -1008,17 +1001,12 @@ static void vcn_v1_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,  					    uint64_t pd_addr)  {  	struct amdgpu_vmhub *hub = &ring->adev->vmhub[ring->funcs->vmhub]; -	unsigned eng = ring->vm_inv_eng;  	pd_addr = amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pasid, pd_addr);  	/* wait for reg writes */  	vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->ctx0_ptb_addr_lo32 + vmid * 2,  					lower_32_bits(pd_addr), 0xffffffff); - -	/* wait for flush */ -	vcn_v1_0_enc_ring_emit_reg_wait(ring, hub->vm_inv_eng0_ack + eng, -					1 << vmid, 1 << vmid);  }  static void vcn_v1_0_enc_ring_emit_wreg(struct amdgpu_ring *ring, @@ -1104,7 +1092,9 @@ static const struct amdgpu_ring_funcs vcn_v1_0_dec_ring_vm_funcs = {  	.set_wptr = vcn_v1_0_dec_ring_set_wptr,  	.emit_frame_size =  		6 + 6 + /* hdp invalidate / flush */ -		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + 16 + /* vcn_v1_0_dec_ring_emit_vm_flush */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + +		8 + /* vcn_v1_0_dec_ring_emit_vm_flush */  		14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */  		6,  	.emit_ib_size = 8, /* vcn_v1_0_dec_ring_emit_ib */ @@ -1133,7 +1123,9 @@ static const struct amdgpu_ring_funcs vcn_v1_0_enc_ring_vm_funcs = {  	.get_wptr = vcn_v1_0_enc_ring_get_wptr,  	.set_wptr = vcn_v1_0_enc_ring_set_wptr,  	.emit_frame_size = -		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + 8 + /* vcn_v1_0_enc_ring_emit_vm_flush */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 + +		4 + /* vcn_v1_0_enc_ring_emit_vm_flush */  		5 + 5 + /* vcn_v1_0_enc_ring_emit_fence x2 vm fence */  		1, /* vcn_v1_0_enc_ring_insert_end */  	.emit_ib_size = 5, /* vcn_v1_0_enc_ring_emit_ib */ | 
