diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
21 files changed, 181 insertions, 123 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index fbe7616555c8..a2879d2b7c8e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -250,16 +250,24 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,  void amdgpu_amdkfd_suspend(struct amdgpu_device *adev, bool suspend_proc)  { -	if (adev->kfd.dev) -		kgd2kfd_suspend(adev->kfd.dev, suspend_proc); +	if (adev->kfd.dev) { +		if (adev->in_s0ix) +			kgd2kfd_stop_sched_all_nodes(adev->kfd.dev); +		else +			kgd2kfd_suspend(adev->kfd.dev, suspend_proc); +	}  }  int amdgpu_amdkfd_resume(struct amdgpu_device *adev, bool resume_proc)  {  	int r = 0; -	if (adev->kfd.dev) -		r = kgd2kfd_resume(adev->kfd.dev, resume_proc); +	if (adev->kfd.dev) { +		if (adev->in_s0ix) +			r = kgd2kfd_start_sched_all_nodes(adev->kfd.dev); +		else +			r = kgd2kfd_resume(adev->kfd.dev, resume_proc); +	}  	return r;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 33eb4826b58b..aa88bad7416b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -426,7 +426,9 @@ void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);  int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd);  void kgd2kfd_unlock_kfd(struct kfd_dev *kfd);  int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd);  int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id); +int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd);  bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id);  bool kgd2kfd_vmfault_fast_path(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry,  			       bool retry_fault); @@ -516,11 +518,21 @@ static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)  	return 0;  } +static inline int kgd2kfd_start_sched_all_nodes(struct kfd_dev *kfd) +{ +	return 0; +} +  static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)  {  	return 0;  } +static inline int kgd2kfd_stop_sched_all_nodes(struct kfd_dev *kfd) +{ +	return 0; +} +  static inline bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)  {  	return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 260165bbe373..b16cce7c22c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -213,19 +213,35 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,  	spin_lock(&kfd_mem_limit.mem_limit_lock);  	if (kfd_mem_limit.system_mem_used + system_mem_needed > -	    kfd_mem_limit.max_system_mem_limit) +	    kfd_mem_limit.max_system_mem_limit) {  		pr_debug("Set no_system_mem_limit=1 if using shared memory\n"); +		if (!no_system_mem_limit) { +			ret = -ENOMEM; +			goto release; +		} +	} -	if ((kfd_mem_limit.system_mem_used + system_mem_needed > -	     kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || -	    (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > -	     kfd_mem_limit.max_ttm_mem_limit) || -	    (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > -	     vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) { +	if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > +		kfd_mem_limit.max_ttm_mem_limit) {  		ret = -ENOMEM;  		goto release;  	} +	/*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with +	 * carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip +	 * VRAM check since ttm_mem_limit check already cover this allocation +	 */ + +	if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) { +		uint64_t vram_available = +			vram_size - reserved_for_pt - reserved_for_ras - +			atomic64_read(&adev->vram_pin_size); +		if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) { +			ret = -ENOMEM; +			goto release; +		} +	} +  	/* Update memory accounting by decreasing available system  	 * memory, TTM memory and GPU memory as computed above  	 */ @@ -1626,11 +1642,15 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,  	uint64_t vram_available, system_mem_available, ttm_mem_available;  	spin_lock(&kfd_mem_limit.mem_limit_lock); -	vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) -		- adev->kfd.vram_used_aligned[xcp_id] -		- atomic64_read(&adev->vram_pin_size) -		- reserved_for_pt -		- reserved_for_ras; +	if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu) +		vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) +			- adev->kfd.vram_used_aligned[xcp_id]; +	else +		vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) +			- adev->kfd.vram_used_aligned[xcp_id] +			- atomic64_read(&adev->vram_pin_size) +			- reserved_for_pt +			- reserved_for_ras;  	if (adev->apu_prefer_gtt) {  		system_mem_available = no_system_mem_limit ? diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c index dfb6cfd83760..02138aa55793 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -88,8 +88,8 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	}  	r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, -			     AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | -			     AMDGPU_VM_PAGE_EXECUTABLE); +			     AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | +			     AMDGPU_PTE_EXECUTABLE);  	if (r) {  		DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 01d234cf8156..c8459337fcb8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5136,7 +5136,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)  	adev->in_suspend = true;  	if (amdgpu_sriov_vf(adev)) { -		if (!adev->in_s0ix && !adev->in_runpm) +		if (!adev->in_runpm)  			amdgpu_amdkfd_suspend_process(adev);  		amdgpu_virt_fini_data_exchange(adev);  		r = amdgpu_virt_request_full_gpu(adev, false); @@ -5156,10 +5156,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)  	amdgpu_device_ip_suspend_phase1(adev); -	if (!adev->in_s0ix) { -		amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); -		amdgpu_userq_suspend(adev); -	} +	amdgpu_amdkfd_suspend(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); +	amdgpu_userq_suspend(adev);  	r = amdgpu_device_evict_resources(adev);  	if (r) @@ -5254,15 +5252,13 @@ int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)  		goto exit;  	} -	if (!adev->in_s0ix) { -		r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); -		if (r) -			goto exit; +	r = amdgpu_amdkfd_resume(adev, !amdgpu_sriov_vf(adev) && !adev->in_runpm); +	if (r) +		goto exit; -		r = amdgpu_userq_resume(adev); -		if (r) -			goto exit; -	} +	r = amdgpu_userq_resume(adev); +	if (r) +		goto exit;  	r = amdgpu_device_ip_late_init(adev);  	if (r) @@ -5275,7 +5271,7 @@ exit:  		amdgpu_virt_init_data_exchange(adev);  		amdgpu_virt_release_full_gpu(adev, true); -		if (!adev->in_s0ix && !r && !adev->in_runpm) +		if (!r && !adev->in_runpm)  			r = amdgpu_amdkfd_resume_process(adev);  	} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 5743ebb2f1b7..ce27cb5bb05e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -285,6 +285,36 @@ static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,  	return ret;  } +static int amdgpu_dma_buf_vmap(struct dma_buf *dma_buf, struct iosys_map *map) +{ +	struct drm_gem_object *obj = dma_buf->priv; +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); +	int ret; + +	/* +	 * Pin to keep buffer in place while it's vmap'ed. The actual +	 * domain is not that important as long as it's mapable. Using +	 * GTT and VRAM should be compatible with most use cases. +	 */ +	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT | AMDGPU_GEM_DOMAIN_VRAM); +	if (ret) +		return ret; +	ret = drm_gem_dmabuf_vmap(dma_buf, map); +	if (ret) +		amdgpu_bo_unpin(bo); + +	return ret; +} + +static void amdgpu_dma_buf_vunmap(struct dma_buf *dma_buf, struct iosys_map *map) +{ +	struct drm_gem_object *obj = dma_buf->priv; +	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj); + +	drm_gem_dmabuf_vunmap(dma_buf, map); +	amdgpu_bo_unpin(bo); +} +  const struct dma_buf_ops amdgpu_dmabuf_ops = {  	.attach = amdgpu_dma_buf_attach,  	.pin = amdgpu_dma_buf_pin, @@ -294,8 +324,8 @@ const struct dma_buf_ops amdgpu_dmabuf_ops = {  	.release = drm_gem_dmabuf_release,  	.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,  	.mmap = drm_gem_dmabuf_mmap, -	.vmap = drm_gem_dmabuf_vmap, -	.vunmap = drm_gem_dmabuf_vunmap, +	.vmap = amdgpu_dma_buf_vmap, +	.vunmap = amdgpu_dma_buf_vunmap,  };  /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 23484317a5fa..693357caa9a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -448,7 +448,7 @@ static int psp_sw_init(struct amdgpu_ip_block *ip_block)  	psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);  	if (!psp->cmd) {  		dev_err(adev->dev, "Failed to allocate memory to command buffer!\n"); -		ret = -ENOMEM; +		return -ENOMEM;  	}  	adev->psp.xgmi_context.supports_extended_data = diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 6379bb25bf5c..486c3646710c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -421,8 +421,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)  	dma_fence_put(ring->vmid_wait);  	ring->vmid_wait = NULL;  	ring->me = 0; - -	ring->adev->rings[ring->idx] = NULL;  }  /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c index c3ace8030530..8190c24a649a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_userq.c @@ -471,6 +471,7 @@ amdgpu_userq_create(struct drm_file *filp, union drm_amdgpu_userq *args)  	if (index == (uint64_t)-EINVAL) {  		drm_file_err(uq_mgr->file, "Failed to get doorbell for queue\n");  		kfree(queue); +		r = -EINVAL;  		goto unlock;  	} diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index bf7c22f81cda..ba73518f5cdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -1462,17 +1462,12 @@ static int dce_v10_0_audio_init(struct amdgpu_device *adev)  static void dce_v10_0_audio_fini(struct amdgpu_device *adev)  { -	int i; -  	if (!amdgpu_audio)  		return;  	if (!adev->mode_info.audio.enabled)  		return; -	for (i = 0; i < adev->mode_info.audio.num_pins; i++) -		dce_v10_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); -  	adev->mode_info.audio.enabled = false;  } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 47e05783c4a0..b01d88d078fa 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -1511,17 +1511,12 @@ static int dce_v11_0_audio_init(struct amdgpu_device *adev)  static void dce_v11_0_audio_fini(struct amdgpu_device *adev)  { -	int i; -  	if (!amdgpu_audio)  		return;  	if (!adev->mode_info.audio.enabled)  		return; -	for (i = 0; i < adev->mode_info.audio.num_pins; i++) -		dce_v11_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); -  	adev->mode_info.audio.enabled = false;  } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index 276c025c4c03..81760a26f2ff 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -1451,17 +1451,12 @@ static int dce_v6_0_audio_init(struct amdgpu_device *adev)  static void dce_v6_0_audio_fini(struct amdgpu_device *adev)  { -	int i; -  	if (!amdgpu_audio)  		return;  	if (!adev->mode_info.audio.enabled)  		return; -	for (i = 0; i < adev->mode_info.audio.num_pins; i++) -		dce_v6_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); -  	adev->mode_info.audio.enabled = false;  } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index e62ccf9eb73d..19a265bd4d19 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -1443,17 +1443,12 @@ static int dce_v8_0_audio_init(struct amdgpu_device *adev)  static void dce_v8_0_audio_fini(struct amdgpu_device *adev)  { -	int i; -  	if (!amdgpu_audio)  		return;  	if (!adev->mode_info.audio.enabled)  		return; -	for (i = 0; i < adev->mode_info.audio.num_pins; i++) -		dce_v8_0_audio_enable(adev, &adev->mode_info.audio.pin[i], false); -  	adev->mode_info.audio.enabled = false;  } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index c01c241a1b06..c37527704d43 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1612,9 +1612,9 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)  	case IP_VERSION(11, 0, 2):  	case IP_VERSION(11, 0, 3):  		if (!adev->gfx.disable_uq && -		    adev->gfx.me_fw_version  >= 2390 && -		    adev->gfx.pfp_fw_version >= 2530 && -		    adev->gfx.mec_fw_version >= 2600 && +		    adev->gfx.me_fw_version  >= 2420 && +		    adev->gfx.pfp_fw_version >= 2580 && +		    adev->gfx.mec_fw_version >= 2650 &&  		    adev->mes.fw_version[0] >= 120) {  			adev->userq_funcs[AMDGPU_HW_IP_GFX] = &userq_mes_funcs;  			adev->userq_funcs[AMDGPU_HW_IP_COMPUTE] = &userq_mes_funcs; @@ -1654,6 +1654,21 @@ static int gfx_v11_0_sw_init(struct amdgpu_ip_block *ip_block)  			}  		}  		break; +	case IP_VERSION(11, 0, 1): +	case IP_VERSION(11, 0, 4): +		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; +		adev->gfx.cleaner_shader_size = sizeof(gfx_11_0_3_cleaner_shader_hex); +		if (adev->gfx.pfp_fw_version >= 102 && +		    adev->gfx.mec_fw_version >= 66 && +		    adev->mes.fw_version[0] >= 128) { +			adev->gfx.enable_cleaner_shader = true; +			r = amdgpu_gfx_cleaner_shader_sw_init(adev, adev->gfx.cleaner_shader_size); +			if (r) { +				adev->gfx.enable_cleaner_shader = false; +				dev_err(adev->dev, "Failed to initialize cleaner shader\n"); +			} +		} +		break;  	case IP_VERSION(11, 5, 0):  	case IP_VERSION(11, 5, 1):  		adev->gfx.cleaner_shader_ptr = gfx_11_0_3_cleaner_shader_hex; @@ -4129,6 +4144,8 @@ static int gfx_v11_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,  #endif  	if (prop->tmz_queue)  		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); +	if (!prop->kernel_queue) +		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);  	mqd->cp_gfx_hqd_cntl = tmp;  	/* set up cp_doorbell_control */ @@ -4281,8 +4298,10 @@ static int gfx_v11_0_compute_mqd_init(struct amdgpu_device *adev, void *m,  	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);  	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH,  			    prop->allow_tunneling); -	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); -	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); +	if (prop->kernel_queue) { +		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); +		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); +	}  	if (prop->tmz_queue)  		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);  	mqd->cp_hqd_pq_control = tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index 3e138527d534..fd44d5503e28 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -3026,6 +3026,8 @@ static int gfx_v12_0_gfx_mqd_init(struct amdgpu_device *adev, void *m,  #endif  	if (prop->tmz_queue)  		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, TMZ_MATCH, 1); +	if (!prop->kernel_queue) +		tmp = REG_SET_FIELD(tmp, CP_GFX_HQD_CNTL, RB_NON_PRIV, 1);  	mqd->cp_gfx_hqd_cntl = tmp;  	/* set up cp_doorbell_control */ @@ -3175,8 +3177,10 @@ static int gfx_v12_0_compute_mqd_init(struct amdgpu_device *adev, void *m,  			    (order_base_2(AMDGPU_GPU_PAGE_SIZE / 4) - 1));  	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, UNORD_DISPATCH, 1);  	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TUNNEL_DISPATCH, 0); -	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); -	tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); +	if (prop->kernel_queue) { +		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, PRIV_STATE, 1); +		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, KMD_QUEUE, 1); +	}  	if (prop->tmz_queue)  		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_CONTROL, TMZ, 1);  	mqd->cp_hqd_pq_control = tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c index a887df520414..4258d3e0b706 100644 --- a/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c +++ b/drivers/gpu/drm/amd/amdgpu/isp_v4_1_1.c @@ -29,6 +29,8 @@  #include "amdgpu.h"  #include "isp_v4_1_1.h" +MODULE_FIRMWARE("amdgpu/isp_4_1_1.bin"); +  #define ISP_PERFORMANCE_STATE_LOW 0  #define ISP_PERFORMANCE_STATE_HIGH 1 diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 28eb846280dd..3f6a828cad8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -641,8 +641,9 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,  		break;  	case MES_MISC_OP_CHANGE_CONFIG:  		if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) { -			dev_err(mes->adev->dev, "MES FW version must be larger than 0x63 to support limit single process feature.\n"); -			return -EINVAL; +			dev_warn_once(mes->adev->dev, +				      "MES FW version must be larger than 0x63 to support limit single process feature.\n"); +			return 0;  		}  		misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG;  		misc_pkt.change_config.opcode = diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 6cc05d36e359..64b240b51f1a 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -149,12 +149,12 @@ static int psp_v11_0_wait_for_bootloader(struct psp_context *psp)  	int ret;  	int retry_loop; -	for (retry_loop = 0; retry_loop < 10; retry_loop++) { +	for (retry_loop = 0; retry_loop < 20; retry_loop++) {  		/* Wait for bootloader to signify that is  		    ready having bit 31 of C2PMSG_35 set to 1 */  		ret = psp_wait_for(  			psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), -			0x80000000, 0x80000000, PSP_WAITREG_NOVERBOSE); +			0x80000000, 0x8000FFFF, PSP_WAITREG_NOVERBOSE);  		if (ret == 0)  			return 0; @@ -397,18 +397,6 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)  	msleep(500); -	offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); - -	ret = psp_wait_for(psp, offset, MBOX_TOS_RESP_FLAG, MBOX_TOS_RESP_MASK, -			   0); - -	if (ret) { -		DRM_INFO("psp mode 1 reset failed!\n"); -		return -EINVAL; -	} - -	DRM_INFO("psp mode1 reset succeed \n"); -  	return 0;  } @@ -665,7 +653,8 @@ static const struct psp_funcs psp_v11_0_funcs = {  	.ring_get_wptr = psp_v11_0_ring_get_wptr,  	.ring_set_wptr = psp_v11_0_ring_set_wptr,  	.load_usbc_pd_fw = psp_v11_0_load_usbc_pd_fw, -	.read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw +	.read_usbc_pd_fw = psp_v11_0_read_usbc_pd_fw, +	.wait_for_bootloader = psp_v11_0_wait_for_bootloader  };  void psp_v11_0_set_psp_funcs(struct psp_context *psp) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index e6d8eddda2bf..db6e41967f12 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -1377,7 +1377,7 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)  	switch (amdgpu_ip_version(adev, SDMA0_HWIP, 0)) {  	case IP_VERSION(6, 0, 0): -		if ((adev->sdma.instance[0].fw_version >= 24) && !adev->sdma.disable_uq) +		if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq)  			adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;  		break;  	case IP_VERSION(6, 0, 1): @@ -1385,11 +1385,11 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)  			adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;  		break;  	case IP_VERSION(6, 0, 2): -		if ((adev->sdma.instance[0].fw_version >= 21) && !adev->sdma.disable_uq) +		if ((adev->sdma.instance[0].fw_version >= 23) && !adev->sdma.disable_uq)  			adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;  		break;  	case IP_VERSION(6, 0, 3): -		if ((adev->sdma.instance[0].fw_version >= 25) && !adev->sdma.disable_uq) +		if ((adev->sdma.instance[0].fw_version >= 27) && !adev->sdma.disable_uq)  			adev->userq_funcs[AMDGPU_HW_IP_DMA] = &userq_mes_funcs;  		break;  	case IP_VERSION(6, 1, 0): diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 4b8f4407047f..2811226b0ea5 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1888,15 +1888,19 @@ static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,  				struct amdgpu_job *job)  {  	struct drm_gpu_scheduler **scheds; - -	/* The create msg must be in the first IB submitted */ -	if (atomic_read(&job->base.entity->fence_seq)) -		return -EINVAL; +	struct dma_fence *fence;  	/* if VCN0 is harvested, we can't support AV1 */  	if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)  		return -EINVAL; +	/* wait for all jobs to finish before switching to instance 0 */ +	fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull); +	if (fence) { +		dma_fence_wait(fence, false); +		dma_fence_put(fence); +	} +  	scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]  		[AMDGPU_RING_PRIO_DEFAULT].sched;  	drm_sched_entity_modify_sched(job->base.entity, scheds, 1); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index 1924e075b66f..706f3b2f484f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1808,15 +1808,19 @@ static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p,  				struct amdgpu_job *job)  {  	struct drm_gpu_scheduler **scheds; - -	/* The create msg must be in the first IB submitted */ -	if (atomic_read(&job->base.entity->fence_seq)) -		return -EINVAL; +	struct dma_fence *fence;  	/* if VCN0 is harvested, we can't support AV1 */  	if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)  		return -EINVAL; +	/* wait for all jobs to finish before switching to instance 0 */ +	fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull); +	if (fence) { +		dma_fence_wait(fence, false); +		dma_fence_put(fence); +	} +  	scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]  		[AMDGPU_RING_PRIO_0].sched;  	drm_sched_entity_modify_sched(job->base.entity, scheds, 1); @@ -1907,22 +1911,16 @@ out:  #define RADEON_VCN_ENGINE_TYPE_ENCODE			(0x00000002)  #define RADEON_VCN_ENGINE_TYPE_DECODE			(0x00000003) -  #define RADEON_VCN_ENGINE_INFO				(0x30000001) -#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET		16 -  #define RENCODE_ENCODE_STANDARD_AV1			2  #define RENCODE_IB_PARAM_SESSION_INIT			0x00000003 -#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET	64 -/* return the offset in ib if id is found, -1 otherwise - * to speed up the searching we only search upto max_offset - */ -static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset) +/* return the offset in ib if id is found, -1 otherwise */ +static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int start)  {  	int i; -	for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { +	for (i = start; i < ib->length_dw && ib->ptr[i] >= 8; i += ib->ptr[i] / 4) {  		if (ib->ptr[i + 1] == id)  			return i;  	} @@ -1937,33 +1935,29 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,  	struct amdgpu_vcn_decode_buffer *decode_buffer;  	uint64_t addr;  	uint32_t val; -	int idx; +	int idx = 0, sidx;  	/* The first instance can decode anything */  	if (!ring->me)  		return 0; -	/* RADEON_VCN_ENGINE_INFO is at the top of ib block */ -	idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, -			RADEON_VCN_ENGINE_INFO_MAX_OFFSET); -	if (idx < 0) /* engine info is missing */ -		return 0; - -	val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ -	if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { -		decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; - -		if (!(decode_buffer->valid_buf_flag  & 0x1)) -			return 0; - -		addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | -			decode_buffer->msg_buffer_address_lo; -		return vcn_v4_0_dec_msg(p, job, addr); -	} else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { -		idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, -			RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); -		if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) -			return vcn_v4_0_limit_sched(p, job); +	while ((idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, idx)) >= 0) { +		val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ +		if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { +			decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; + +			if (!(decode_buffer->valid_buf_flag & 0x1)) +				return 0; + +			addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | +				decode_buffer->msg_buffer_address_lo; +			return vcn_v4_0_dec_msg(p, job, addr); +		} else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { +			sidx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, idx); +			if (sidx >= 0 && ib->ptr[sidx + 2] == RENCODE_ENCODE_STANDARD_AV1) +				return vcn_v4_0_limit_sched(p, job); +		} +		idx += ib->ptr[idx] / 4;  	}  	return 0;  } | 
