diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 209 | 
1 files changed, 186 insertions, 23 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index d78bd9732543..2dce338b0f1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -32,6 +32,7 @@  #include "amdgpu.h"  #include "amdgpu_gmc.h"  #include "amdgpu_ras.h" +#include "amdgpu_reset.h"  #include "amdgpu_xgmi.h"  #include <drm/drm_drv.h> @@ -263,12 +264,14 @@ void amdgpu_gmc_sysvm_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc   *   * @adev: amdgpu device structure holding all necessary information   * @mc: memory controller structure holding memory information + * @gart_placement: GART placement policy with respect to VRAM   *   * Function will place try to place GART before or after VRAM.   * If GART size is bigger than space left then we ajust GART size.   * Thus function will never fails.   */ -void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) +void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, +			      enum amdgpu_gart_placement gart_placement)  {  	const uint64_t four_gb = 0x100000000ULL;  	u64 size_af, size_bf; @@ -286,11 +289,22 @@ void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)  		mc->gart_size = max(size_bf, size_af);  	} -	if ((size_bf >= mc->gart_size && size_bf < size_af) || -	    (size_af < mc->gart_size)) -		mc->gart_start = 0; -	else +	switch (gart_placement) { +	case AMDGPU_GART_PLACEMENT_HIGH:  		mc->gart_start = max_mc_address - mc->gart_size + 1; +		break; +	case AMDGPU_GART_PLACEMENT_LOW: +		mc->gart_start = 0; +		break; +	case AMDGPU_GART_PLACEMENT_BEST_FIT: +	default: +		if ((size_bf >= mc->gart_size && size_bf < size_af) || +		    (size_af < mc->gart_size)) +			mc->gart_start = 0; +		else +			mc->gart_start = max_mc_address - mc->gart_size + 1; +		break; +	}  	mc->gart_start &= ~(four_gb - 1);  	mc->gart_end = mc->gart_start + mc->gart_size - 1; @@ -315,14 +329,6 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)  	const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1);  	u64 size_af, size_bf; -	if (amdgpu_sriov_vf(adev)) { -		mc->agp_start = 0xffffffffffff; -		mc->agp_end = 0x0; -		mc->agp_size = 0; - -		return; -	} -  	if (mc->fb_start > mc->gart_start) {  		size_bf = (mc->fb_start & sixteen_gb_mask) -  			ALIGN(mc->gart_end + 1, sixteen_gb); @@ -347,6 +353,25 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)  }  /** + * amdgpu_gmc_set_agp_default - Set the default AGP aperture value. + * @adev: amdgpu device structure holding all necessary information + * @mc: memory controller structure holding memory information + * + * To disable the AGP aperture, you need to set the start to a larger + * value than the end.  This function sets the default value which + * can then be overridden using amdgpu_gmc_agp_location() if you want + * to enable the AGP aperture on a specific chip. + * + */ +void amdgpu_gmc_set_agp_default(struct amdgpu_device *adev, +				struct amdgpu_gmc *mc) +{ +	mc->agp_start = 0xffffffffffff; +	mc->agp_end = 0; +	mc->agp_size = 0; +} + +/**   * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid   *   * @addr: 48 bit physical address, page aligned (36 significant bits) @@ -452,7 +477,10 @@ void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,  	uint32_t hash;  	uint64_t tmp; -	ih = adev->irq.retry_cam_enabled ? &adev->irq.ih_soft : &adev->irq.ih1; +	if (adev->irq.retry_cam_enabled) +		return; + +	ih = &adev->irq.ih1;  	/* Get the WPTR of the last entry in IH ring */  	last_wptr = amdgpu_ih_get_wptr(adev, ih);  	/* Order wptr with ring data. */ @@ -549,13 +577,17 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)  		/* reserve engine 5 for firmware */  		if (adev->enable_mes)  			vm_inv_engs[i] &= ~(1 << 5); +		/* reserve mmhub engine 3 for firmware */ +		if (adev->enable_umsch_mm) +			vm_inv_engs[i] &= ~(1 << 3);  	}  	for (i = 0; i < adev->num_rings; ++i) {  		ring = adev->rings[i];  		vmhub = ring->vm_hub; -		if (ring == &adev->mes.ring) +		if (ring == &adev->mes.ring || +		    ring == &adev->umsch_mm.ring)  			continue;  		inv_eng = ffs(vm_inv_engs[vmhub]); @@ -575,6 +607,142 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)  	return 0;  } +void amdgpu_gmc_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, +			      uint32_t vmhub, uint32_t flush_type) +{ +	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; +	struct amdgpu_vmhub *hub = &adev->vmhub[vmhub]; +	struct dma_fence *fence; +	struct amdgpu_job *job; +	int r; + +	if (!hub->sdma_invalidation_workaround || vmid || +	    !adev->mman.buffer_funcs_enabled || +	    !adev->ib_pool_ready || amdgpu_in_reset(adev) || +	    !ring->sched.ready) { + +		/* +		 * A GPU reset should flush all TLBs anyway, so no need to do +		 * this while one is ongoing. +		 */ +		if (!down_read_trylock(&adev->reset_domain->sem)) +			return; + +		if (adev->gmc.flush_tlb_needs_extra_type_2) +			adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, +							   vmhub, 2); + +		if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2) +			adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, +							   vmhub, 0); + +		adev->gmc.gmc_funcs->flush_gpu_tlb(adev, vmid, vmhub, +						   flush_type); +		up_read(&adev->reset_domain->sem); +		return; +	} + +	/* The SDMA on Navi 1x has a bug which can theoretically result in memory +	 * corruption if an invalidation happens at the same time as an VA +	 * translation. Avoid this by doing the invalidation from the SDMA +	 * itself at least for GART. +	 */ +	mutex_lock(&adev->mman.gtt_window_lock); +	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.high_pr, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     16 * 4, AMDGPU_IB_POOL_IMMEDIATE, +				     &job); +	if (r) +		goto error_alloc; + +	job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); +	job->vm_needs_flush = true; +	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop; +	amdgpu_ring_pad_ib(ring, &job->ibs[0]); +	fence = amdgpu_job_submit(job); +	mutex_unlock(&adev->mman.gtt_window_lock); + +	dma_fence_wait(fence, false); +	dma_fence_put(fence); + +	return; + +error_alloc: +	mutex_unlock(&adev->mman.gtt_window_lock); +	dev_err(adev->dev, "Error flushing GPU TLB using the SDMA (%d)!\n", r); +} + +int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid, +				   uint32_t flush_type, bool all_hub, +				   uint32_t inst) +{ +	u32 usec_timeout = amdgpu_sriov_vf(adev) ? SRIOV_USEC_TIMEOUT : +		adev->usec_timeout; +	struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring; +	struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst]; +	unsigned int ndw; +	signed long r; +	uint32_t seq; + +	if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready || +	    !down_read_trylock(&adev->reset_domain->sem)) { + +		if (adev->gmc.flush_tlb_needs_extra_type_2) +			adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, +								 2, all_hub, +								 inst); + +		if (adev->gmc.flush_tlb_needs_extra_type_0 && flush_type == 2) +			adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, +								 0, all_hub, +								 inst); + +		adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid, +							 flush_type, all_hub, +							 inst); +		return 0; +	} + +	/* 2 dwords flush + 8 dwords fence */ +	ndw = kiq->pmf->invalidate_tlbs_size + 8; + +	if (adev->gmc.flush_tlb_needs_extra_type_2) +		ndw += kiq->pmf->invalidate_tlbs_size; + +	if (adev->gmc.flush_tlb_needs_extra_type_0) +		ndw += kiq->pmf->invalidate_tlbs_size; + +	spin_lock(&adev->gfx.kiq[inst].ring_lock); +	amdgpu_ring_alloc(ring, ndw); +	if (adev->gmc.flush_tlb_needs_extra_type_2) +		kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub); + +	if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0) +		kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub); + +	kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub); +	r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); +	if (r) { +		amdgpu_ring_undo(ring); +		spin_unlock(&adev->gfx.kiq[inst].ring_lock); +		goto error_unlock_reset; +	} + +	amdgpu_ring_commit(ring); +	spin_unlock(&adev->gfx.kiq[inst].ring_lock); +	r = amdgpu_fence_wait_polling(ring, seq, usec_timeout); +	if (r < 1) { +		dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r); +		r = -ETIME; +		goto error_unlock_reset; +	} +	r = 0; + +error_unlock_reset: +	up_read(&adev->reset_domain->sem); +	return r; +} +  /**   * amdgpu_gmc_tmz_set -- check and set if a device supports TMZ   * @adev: amdgpu_device pointer @@ -584,7 +752,7 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)   */  void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)  { -	switch (adev->ip_versions[GC_HWIP][0]) { +	switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {  	/* RAVEN */  	case IP_VERSION(9, 2, 2):  	case IP_VERSION(9, 1, 0): @@ -618,6 +786,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)  	/* YELLOW_CARP*/  	case IP_VERSION(10, 3, 3):  	case IP_VERSION(11, 0, 4): +	case IP_VERSION(11, 5, 0):  		/* Don't enable it by default yet.  		 */  		if (amdgpu_tmz < 1) { @@ -648,7 +817,7 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)  void amdgpu_gmc_noretry_set(struct amdgpu_device *adev)  {  	struct amdgpu_gmc *gmc = &adev->gmc; -	uint32_t gc_ver = adev->ip_versions[GC_HWIP][0]; +	uint32_t gc_ver = amdgpu_ip_version(adev, GC_HWIP, 0);  	bool noretry_default = (gc_ver == IP_VERSION(9, 0, 1) ||  				gc_ver == IP_VERSION(9, 3, 0) ||  				gc_ver == IP_VERSION(9, 4, 0) || @@ -721,12 +890,6 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)  	case CHIP_RENOIR:  		adev->mman.keep_stolen_vga_memory = true;  		break; -	case CHIP_YELLOW_CARP: -		if (amdgpu_discovery == 0) { -			adev->mman.stolen_reserved_offset = 0x1ffb0000; -			adev->mman.stolen_reserved_size = 64 * PAGE_SIZE; -		} -		break;  	default:  		adev->mman.keep_stolen_vga_memory = false;  		break; | 
