diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
123 files changed, 3257 insertions, 1588 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index 7777d55275de..5fcd510f1abb 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -1,4 +1,33 @@  # SPDX-License-Identifier: MIT + +config DRM_AMDGPU +	tristate "AMD GPU" +	depends on DRM && PCI && MMU +	select FW_LOADER +	select DRM_DISPLAY_DP_HELPER +	select DRM_DISPLAY_HDMI_HELPER +	select DRM_DISPLAY_HELPER +	select DRM_KMS_HELPER +	select DRM_SCHED +	select DRM_TTM +	select DRM_TTM_HELPER +	select POWER_SUPPLY +	select HWMON +	select BACKLIGHT_CLASS_DEVICE +	select INTERVAL_TREE +	select DRM_BUDDY +	# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work +	# ACPI_VIDEO's dependencies must also be selected. +	select INPUT if ACPI +	select ACPI_VIDEO if ACPI +	# On x86 ACPI_VIDEO also needs ACPI_WMI +	select X86_PLATFORM_DEVICES if ACPI && X86 +	select ACPI_WMI if ACPI && X86 +	help +	  Choose this option if you have a recent AMD Radeon graphics card. + +	  If M is selected, the module will be called amdgpu. +  config DRM_AMDGPU_SI  	bool "Enable amdgpu support for SI parts"  	depends on DRM_AMDGPU diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 6ad39cf71bdd..798d0e9a60b7 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \  	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \  	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \  	amdgpu_fw_attestation.o amdgpu_securedisplay.o \ -	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o +	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \ +	amdgpu_ring_mux.o  amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o @@ -250,7 +251,7 @@ endif  amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o  amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o  amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o -amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o +amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_hmm.o  include $(FULL_AMD_PATH)/pm/Makefile diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 2eca58220550..e3e2e6e3b485 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -82,7 +82,6 @@  #include "amdgpu_vce.h"  #include "amdgpu_vcn.h"  #include "amdgpu_jpeg.h" -#include "amdgpu_mn.h"  #include "amdgpu_gmc.h"  #include "amdgpu_gfx.h"  #include "amdgpu_sdma.h" @@ -196,6 +195,7 @@ extern int amdgpu_emu_mode;  extern uint amdgpu_smu_memory_pool_size;  extern int amdgpu_smu_pptable_id;  extern uint amdgpu_dc_feature_mask; +extern uint amdgpu_freesync_vid_mode;  extern uint amdgpu_dc_debug_mask;  extern uint amdgpu_dc_visual_confirm;  extern uint amdgpu_dm_abm_level; @@ -219,10 +219,12 @@ extern int amdgpu_use_xgmi_p2p;  extern int sched_policy;  extern bool debug_evictions;  extern bool no_system_mem_limit; +extern int halt_if_hws_hang;  #else  static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;  static const bool __maybe_unused debug_evictions; /* = false */  static const bool __maybe_unused no_system_mem_limit; +static const int __maybe_unused halt_if_hws_hang;  #endif  #ifdef CONFIG_HSA_AMD_P2P  extern bool pcie_p2p; @@ -675,7 +677,7 @@ enum amd_hw_ip_block_type {  	MAX_HWIP  }; -#define HWIP_MAX_INSTANCE	11 +#define HWIP_MAX_INSTANCE	28  #define HW_ID_MAX		300  #define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv)) @@ -1063,6 +1065,7 @@ struct amdgpu_device {  	struct work_struct		reset_work;  	bool                            job_hang; +	bool                            dc_enabled;  };  static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev) @@ -1120,6 +1123,8 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,  bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);  bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); +void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev); +  int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,  				 struct amdgpu_reset_context *reset_context); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index b14800ac179e..57b5e11446c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -847,7 +847,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)  	struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;  	if (atif->notifications.brightness_change) { -		if (amdgpu_device_has_dc_support(adev)) { +		if (adev->dc_enabled) {  #if defined(CONFIG_DRM_AMD_DC)  			struct amdgpu_display_manager *dm = &adev->dm; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 5d9a34601a1a..f99d4873bf22 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -195,7 +195,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)  		}  		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev, -						adev_to_drm(adev), &gpu_resources); +							&gpu_resources);  		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; @@ -673,7 +673,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,  		goto err;  	} -	ret = amdgpu_job_alloc(adev, 1, &job, NULL); +	ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job);  	if (ret)  		goto err; @@ -760,9 +760,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)  void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)  { -	struct ras_err_data err_data = {0, 0, 0, NULL}; - -	amdgpu_umc_poison_handler(adev, &err_data, reset); +	amdgpu_umc_poison_handler(adev, reset);  }  bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 647220a8762d..0040deaf8a83 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -29,6 +29,7 @@  #include <linux/mm.h>  #include <linux/kthread.h>  #include <linux/workqueue.h> +#include <linux/mmu_notifier.h>  #include <kgd_kfd_interface.h>  #include <drm/ttm/ttm_execbuf_util.h>  #include "amdgpu_sync.h" @@ -65,6 +66,7 @@ struct kgd_mem {  	struct mutex lock;  	struct amdgpu_bo *bo;  	struct dma_buf *dmabuf; +	struct hmm_range *range;  	struct list_head attachments;  	/* protected by amdkfd_process_info.lock */  	struct ttm_validate_buffer validate_list; @@ -75,7 +77,7 @@ struct kgd_mem {  	uint32_t alloc_flags; -	atomic_t invalid; +	uint32_t invalid;  	struct amdkfd_process_info *process_info;  	struct amdgpu_sync sync; @@ -131,7 +133,8 @@ struct amdkfd_process_info {  	struct amdgpu_amdkfd_fence *eviction_fence;  	/* MMU-notifier related fields */ -	atomic_t evicted_bos; +	struct mutex notifier_lock; +	uint32_t evicted_bos;  	struct delayed_work restore_userptr_work;  	struct pid *pid;  	bool block_mmu_notifications; @@ -180,7 +183,8 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);  bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);  struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);  int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo); -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm); +int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, +				unsigned long cur_seq, struct kgd_mem *mem);  #else  static inline  bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) @@ -201,7 +205,8 @@ int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)  }  static inline -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) +int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, +				unsigned long cur_seq, struct kgd_mem *mem)  {  	return 0;  } @@ -265,8 +270,10 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_  	(&((struct amdgpu_fpriv *)					\  		((struct drm_file *)(drm_priv))->driver_priv)->vm) +int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, +				     struct file *filp, u32 pasid);  int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, -					struct file *filp, u32 pasid, +					struct file *filp,  					void **process_info,  					struct dma_fence **ef);  void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, @@ -353,7 +360,6 @@ int kgd2kfd_init(void);  void kgd2kfd_exit(void);  struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf);  bool kgd2kfd_device_init(struct kfd_dev *kfd, -			 struct drm_device *ddev,  			 const struct kgd2kfd_shared_resources *gpu_resources);  void kgd2kfd_device_exit(struct kfd_dev *kfd);  void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm); @@ -381,7 +387,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)  }  static inline -bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, +bool kgd2kfd_device_init(struct kfd_dev *kfd,  				const struct kgd2kfd_shared_resources *gpu_resources)  {  	return false; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 81e3b528bbc9..e92b93557c13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -787,7 +787,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,  	for (se_idx = 0; se_idx < se_cnt; se_idx++) {  		for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { -			gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); +			amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);  			queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS);  			/* @@ -820,7 +820,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,  		}  	} -	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); +	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);  	soc15_grbm_select(adev, 0, 0, 0, 0);  	unlock_spi_csq_mutexes(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 1f76e27f1a35..3b5c53712d31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -29,6 +29,7 @@  #include "amdgpu_object.h"  #include "amdgpu_gem.h"  #include "amdgpu_vm.h" +#include "amdgpu_hmm.h"  #include "amdgpu_amdkfd.h"  #include "amdgpu_dma_buf.h"  #include <uapi/linux/kfd_ioctl.h> @@ -403,63 +404,15 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)  static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)  { -	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev); -	bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT; -	bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED; -	uint32_t mapping_flags; -	uint64_t pte_flags; -	bool snoop = false; +	uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE | +				 AMDGPU_VM_MTYPE_DEFAULT; -	mapping_flags = AMDGPU_VM_PAGE_READABLE;  	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)  		mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;  	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)  		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE; -	switch (adev->asic_type) { -	case CHIP_ARCTURUS: -	case CHIP_ALDEBARAN: -		if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { -			if (bo_adev == adev) { -				if (uncached) -					mapping_flags |= AMDGPU_VM_MTYPE_UC; -				else if (coherent) -					mapping_flags |= AMDGPU_VM_MTYPE_CC; -				else -					mapping_flags |= AMDGPU_VM_MTYPE_RW; -				if (adev->asic_type == CHIP_ALDEBARAN && -				    adev->gmc.xgmi.connected_to_cpu) -					snoop = true; -			} else { -				if (uncached || coherent) -					mapping_flags |= AMDGPU_VM_MTYPE_UC; -				else -					mapping_flags |= AMDGPU_VM_MTYPE_NC; -				if (amdgpu_xgmi_same_hive(adev, bo_adev)) -					snoop = true; -			} -		} else { -			if (uncached || coherent) -				mapping_flags |= AMDGPU_VM_MTYPE_UC; -			else -				mapping_flags |= AMDGPU_VM_MTYPE_NC; -			snoop = true; -		} -		break; -	default: -		if (uncached || coherent) -			mapping_flags |= AMDGPU_VM_MTYPE_UC; -		else -			mapping_flags |= AMDGPU_VM_MTYPE_NC; - -		if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) -			snoop = true; -	} - -	pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags); -	pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; - -	return pte_flags; +	return amdgpu_gem_va_map_flags(adev, mapping_flags);  }  /** @@ -997,7 +950,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,  		goto out;  	} -	ret = amdgpu_mn_register(bo, user_addr); +	ret = amdgpu_hmm_register(bo, user_addr);  	if (ret) {  		pr_err("%s: Failed to register MMU notifier: %d\n",  		       __func__, ret); @@ -1011,7 +964,9 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,  		 * later stage when it is scheduled by another ioctl called by  		 * CRIU master process for the target pid for restore.  		 */ -		atomic_inc(&mem->invalid); +		mutex_lock(&process_info->notifier_lock); +		mem->invalid++; +		mutex_unlock(&process_info->notifier_lock);  		mutex_unlock(&process_info->lock);  		return 0;  	} @@ -1037,7 +992,7 @@ release_out:  	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);  unregister_out:  	if (ret) -		amdgpu_mn_unregister(bo); +		amdgpu_hmm_unregister(bo);  out:  	mutex_unlock(&process_info->lock);  	return ret; @@ -1348,6 +1303,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,  			return -ENOMEM;  		mutex_init(&info->lock); +		mutex_init(&info->notifier_lock);  		INIT_LIST_HEAD(&info->vm_list_head);  		INIT_LIST_HEAD(&info->kfd_bo_list);  		INIT_LIST_HEAD(&info->userptr_valid_list); @@ -1364,7 +1320,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,  		}  		info->pid = get_task_pid(current->group_leader, PIDTYPE_PID); -		atomic_set(&info->evicted_bos, 0);  		INIT_DELAYED_WORK(&info->restore_userptr_work,  				  amdgpu_amdkfd_restore_userptr_worker); @@ -1419,6 +1374,7 @@ reserve_pd_fail:  		put_pid(info->pid);  create_evict_fence_fail:  		mutex_destroy(&info->lock); +		mutex_destroy(&info->notifier_lock);  		kfree(info);  	}  	return ret; @@ -1473,10 +1429,9 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)  	amdgpu_bo_unreserve(bo);  } -int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, -					   struct file *filp, u32 pasid, -					   void **process_info, -					   struct dma_fence **ef) +int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev, +				     struct file *filp, u32 pasid) +  {  	struct amdgpu_fpriv *drv_priv;  	struct amdgpu_vm *avm; @@ -1487,10 +1442,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,  		return ret;  	avm = &drv_priv->vm; -	/* Already a compute VM? */ -	if (avm->process_info) -		return -EINVAL; -  	/* Free the original amdgpu allocated pasid,  	 * will be replaced with kfd allocated pasid.  	 */ @@ -1499,14 +1450,36 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,  		amdgpu_vm_set_pasid(adev, avm, 0);  	} -	/* Convert VM into a compute VM */ -	ret = amdgpu_vm_make_compute(adev, avm); +	ret = amdgpu_vm_set_pasid(adev, avm, pasid);  	if (ret)  		return ret; -	ret = amdgpu_vm_set_pasid(adev, avm, pasid); +	return 0; +} + +int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, +					   struct file *filp, +					   void **process_info, +					   struct dma_fence **ef) +{ +	struct amdgpu_fpriv *drv_priv; +	struct amdgpu_vm *avm; +	int ret; + +	ret = amdgpu_file_to_fpriv(filp, &drv_priv); +	if (ret) +		return ret; +	avm = &drv_priv->vm; + +	/* Already a compute VM? */ +	if (avm->process_info) +		return -EINVAL; + +	/* Convert VM into a compute VM */ +	ret = amdgpu_vm_make_compute(adev, avm);  	if (ret)  		return ret; +  	/* Initialize KFD part of the VM and process info */  	ret = init_kfd_vm(avm, process_info, ef);  	if (ret) @@ -1543,6 +1516,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,  		cancel_delayed_work_sync(&process_info->restore_userptr_work);  		put_pid(process_info->pid);  		mutex_destroy(&process_info->lock); +		mutex_destroy(&process_info->notifier_lock);  		kfree(process_info);  	}  } @@ -1595,7 +1569,9 @@ int amdgpu_amdkfd_criu_resume(void *p)  	mutex_lock(&pinfo->lock);  	pr_debug("scheduling work\n"); -	atomic_inc(&pinfo->evicted_bos); +	mutex_lock(&pinfo->notifier_lock); +	pinfo->evicted_bos++; +	mutex_unlock(&pinfo->notifier_lock);  	if (!READ_ONCE(pinfo->block_mmu_notifications)) {  		ret = -EINVAL;  		goto out_unlock; @@ -1672,6 +1648,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(  		}  	} +	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) +		alloc_flags |= AMDGPU_GEM_CREATE_COHERENT; +	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED) +		alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED; +  	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);  	if (!*mem) {  		ret = -ENOMEM; @@ -1815,8 +1796,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(  	list_del(&bo_list_entry->head);  	mutex_unlock(&process_info->lock); -	/* No more MMU notifiers */ -	amdgpu_mn_unregister(mem->bo); +	/* Cleanup user pages and MMU notifiers */ +	if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) { +		amdgpu_hmm_unregister(mem->bo); +		mutex_lock(&process_info->notifier_lock); +		amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range); +		mutex_unlock(&process_info->notifier_lock); +	}  	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);  	if (unlikely(ret)) @@ -1906,14 +1892,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(  	 */  	mutex_lock(&mem->process_info->lock); -	/* Lock mmap-sem. If we find an invalid userptr BO, we can be +	/* Lock notifier lock. If we find an invalid userptr BO, we can be  	 * sure that the MMU notifier is no longer running  	 * concurrently and the queues are actually stopped  	 */  	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { -		mmap_write_lock(current->mm); -		is_invalid_userptr = atomic_read(&mem->invalid); -		mmap_write_unlock(current->mm); +		mutex_lock(&mem->process_info->notifier_lock); +		is_invalid_userptr = !!mem->invalid; +		mutex_unlock(&mem->process_info->notifier_lock);  	}  	mutex_lock(&mem->lock); @@ -2113,7 +2099,7 @@ int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_device *adev, struct amdgpu_b  	}  	amdgpu_amdkfd_remove_eviction_fence( -		bo, bo->kfd_bo->process_info->eviction_fence); +		bo, bo->vm_bo->vm->process_info->eviction_fence);  	amdgpu_bo_unreserve(bo); @@ -2256,7 +2242,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,  	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);  	if (ret) { -		kfree(mem); +		kfree(*mem);  		return ret;  	} @@ -2293,34 +2279,38 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,   *   * Runs in MMU notifier, may be in RECLAIM_FS context. This means it   * cannot do any memory allocations, and cannot take any locks that - * are held elsewhere while allocating memory. Therefore this is as - * simple as possible, using atomic counters. + * are held elsewhere while allocating memory.   *   * It doesn't do anything to the BO itself. The real work happens in   * restore, where we get updated page addresses. This function only   * ensures that GPU access to the BO is stopped.   */ -int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, -				struct mm_struct *mm) +int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni, +				unsigned long cur_seq, struct kgd_mem *mem)  {  	struct amdkfd_process_info *process_info = mem->process_info; -	int evicted_bos;  	int r = 0; -	/* Do not process MMU notifications until stage-4 IOCTL is received */ +	/* Do not process MMU notifications during CRIU restore until +	 * KFD_CRIU_OP_RESUME IOCTL is received +	 */  	if (READ_ONCE(process_info->block_mmu_notifications))  		return 0; -	atomic_inc(&mem->invalid); -	evicted_bos = atomic_inc_return(&process_info->evicted_bos); -	if (evicted_bos == 1) { +	mutex_lock(&process_info->notifier_lock); +	mmu_interval_set_seq(mni, cur_seq); + +	mem->invalid++; +	if (++process_info->evicted_bos == 1) {  		/* First eviction, stop the queues */ -		r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR); +		r = kgd2kfd_quiesce_mm(mni->mm, +				       KFD_QUEUE_EVICTION_TRIGGER_USERPTR);  		if (r)  			pr_err("Failed to quiesce KFD\n");  		schedule_delayed_work(&process_info->restore_userptr_work,  			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));  	} +	mutex_unlock(&process_info->notifier_lock);  	return r;  } @@ -2337,54 +2327,58 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,  	struct kgd_mem *mem, *tmp_mem;  	struct amdgpu_bo *bo;  	struct ttm_operation_ctx ctx = { false, false }; -	int invalid, ret; +	uint32_t invalid; +	int ret = 0; -	/* Move all invalidated BOs to the userptr_inval_list and -	 * release their user pages by migration to the CPU domain -	 */ +	mutex_lock(&process_info->notifier_lock); + +	/* Move all invalidated BOs to the userptr_inval_list */  	list_for_each_entry_safe(mem, tmp_mem,  				 &process_info->userptr_valid_list, -				 validate_list.head) { -		if (!atomic_read(&mem->invalid)) -			continue; /* BO is still valid */ - -		bo = mem->bo; - -		if (amdgpu_bo_reserve(bo, true)) -			return -EAGAIN; -		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); -		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); -		amdgpu_bo_unreserve(bo); -		if (ret) { -			pr_err("%s: Failed to invalidate userptr BO\n", -			       __func__); -			return -EAGAIN; -		} - -		list_move_tail(&mem->validate_list.head, -			       &process_info->userptr_inval_list); -	} - -	if (list_empty(&process_info->userptr_inval_list)) -		return 0; /* All evicted userptr BOs were freed */ +				 validate_list.head) +		if (mem->invalid) +			list_move_tail(&mem->validate_list.head, +				       &process_info->userptr_inval_list);  	/* Go through userptr_inval_list and update any invalid user_pages */  	list_for_each_entry(mem, &process_info->userptr_inval_list,  			    validate_list.head) { -		struct hmm_range *range; - -		invalid = atomic_read(&mem->invalid); +		invalid = mem->invalid;  		if (!invalid)  			/* BO hasn't been invalidated since the last -			 * revalidation attempt. Keep its BO list. +			 * revalidation attempt. Keep its page list.  			 */  			continue;  		bo = mem->bo; +		amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range); +		mem->range = NULL; + +		/* BO reservations and getting user pages (hmm_range_fault) +		 * must happen outside the notifier lock +		 */ +		mutex_unlock(&process_info->notifier_lock); + +		/* Move the BO to system (CPU) domain if necessary to unmap +		 * and free the SG table +		 */ +		if (bo->tbo.resource->mem_type != TTM_PL_SYSTEM) { +			if (amdgpu_bo_reserve(bo, true)) +				return -EAGAIN; +			amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); +			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); +			amdgpu_bo_unreserve(bo); +			if (ret) { +				pr_err("%s: Failed to invalidate userptr BO\n", +				       __func__); +				return -EAGAIN; +			} +		} +  		/* Get updated user pages */  		ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, -						   &range); +						   &mem->range);  		if (ret) {  			pr_debug("Failed %d to get user pages\n", ret); @@ -2397,30 +2391,32 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,  			 */  			if (ret != -EFAULT)  				return ret; -		} else { -			/* -			 * FIXME: Cannot ignore the return code, must hold -			 * notifier_lock -			 */ -			amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range); +			ret = 0;  		} +		mutex_lock(&process_info->notifier_lock); +  		/* Mark the BO as valid unless it was invalidated  		 * again concurrently.  		 */ -		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid) -			return -EAGAIN; +		if (mem->invalid != invalid) { +			ret = -EAGAIN; +			goto unlock_out; +		} +		mem->invalid = 0;  	} -	return 0; +unlock_out: +	mutex_unlock(&process_info->notifier_lock); + +	return ret;  }  /* Validate invalid userptr BOs   * - * Validates BOs on the userptr_inval_list, and moves them back to the - * userptr_valid_list. Also updates GPUVM page tables with new page - * addresses and waits for the page table updates to complete. + * Validates BOs on the userptr_inval_list. Also updates GPUVM page tables + * with new page addresses and waits for the page table updates to complete.   */  static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)  { @@ -2491,9 +2487,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)  			}  		} -		list_move_tail(&mem->validate_list.head, -			       &process_info->userptr_valid_list); -  		/* Update mapping. If the BO was not validated  		 * (because we couldn't get user pages), this will  		 * clear the page table entries, which will result in @@ -2509,7 +2502,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)  			if (ret) {  				pr_err("%s: update PTE failed\n", __func__);  				/* make sure this gets validated again */ -				atomic_inc(&mem->invalid); +				mutex_lock(&process_info->notifier_lock); +				mem->invalid++; +				mutex_unlock(&process_info->notifier_lock);  				goto unreserve_out;  			}  		} @@ -2529,6 +2524,36 @@ out_no_mem:  	return ret;  } +/* Confirm that all user pages are valid while holding the notifier lock + * + * Moves valid BOs from the userptr_inval_list back to userptr_val_list. + */ +static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_info) +{ +	struct kgd_mem *mem, *tmp_mem; +	int ret = 0; + +	list_for_each_entry_safe(mem, tmp_mem, +				 &process_info->userptr_inval_list, +				 validate_list.head) { +		bool valid = amdgpu_ttm_tt_get_user_pages_done( +				mem->bo->tbo.ttm, mem->range); + +		mem->range = NULL; +		if (!valid) { +			WARN(!mem->invalid, "Invalid BO not marked invalid"); +			ret = -EAGAIN; +			continue; +		} +		WARN(mem->invalid, "Valid BO is marked invalid"); + +		list_move_tail(&mem->validate_list.head, +			       &process_info->userptr_valid_list); +	} + +	return ret; +} +  /* Worker callback to restore evicted userptr BOs   *   * Tries to update and validate all userptr BOs. If successful and no @@ -2543,9 +2568,11 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)  			     restore_userptr_work);  	struct task_struct *usertask;  	struct mm_struct *mm; -	int evicted_bos; +	uint32_t evicted_bos; -	evicted_bos = atomic_read(&process_info->evicted_bos); +	mutex_lock(&process_info->notifier_lock); +	evicted_bos = process_info->evicted_bos; +	mutex_unlock(&process_info->notifier_lock);  	if (!evicted_bos)  		return; @@ -2568,9 +2595,6 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)  	 * and we can just restart the queues.  	 */  	if (!list_empty(&process_info->userptr_inval_list)) { -		if (atomic_read(&process_info->evicted_bos) != evicted_bos) -			goto unlock_out; /* Concurrent eviction, try again */ -  		if (validate_invalid_user_pages(process_info))  			goto unlock_out;  	} @@ -2579,10 +2603,17 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)  	 * be a first eviction that calls quiesce_mm. The eviction  	 * reference counting inside KFD will handle this case.  	 */ -	if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) != -	    evicted_bos) -		goto unlock_out; -	evicted_bos = 0; +	mutex_lock(&process_info->notifier_lock); +	if (process_info->evicted_bos != evicted_bos) +		goto unlock_notifier_out; + +	if (confirm_valid_user_pages_locked(process_info)) { +		WARN(1, "User pages unexpectedly invalid"); +		goto unlock_notifier_out; +	} + +	process_info->evicted_bos = evicted_bos = 0; +  	if (kgd2kfd_resume_mm(mm)) {  		pr_err("%s: Failed to resume KFD\n", __func__);  		/* No recovery from this failure. Probably the CP is @@ -2590,6 +2621,8 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)  		 */  	} +unlock_notifier_out: +	mutex_unlock(&process_info->notifier_lock);  unlock_out:  	mutex_unlock(&process_info->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index b81b77a9efa6..ac6fe0ae4609 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -101,39 +101,101 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)  	}  } +static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev, +	struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes) +{ +	u32 start_addr, fw_size, drv_size; + +	start_addr = le32_to_cpu(fw_usage->start_address_in_kb); +	fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb); +	drv_size = le16_to_cpu(fw_usage->used_by_driver_in_kb); + +	DRM_DEBUG("atom firmware v2_1 requested %08x %dkb fw %dkb drv\n", +			  start_addr, +			  fw_size, +			  drv_size); + +	if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == +		(u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << +		ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { +		/* Firmware request VRAM reservation for SR-IOV */ +		adev->mman.fw_vram_usage_start_offset = (start_addr & +			(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; +		adev->mman.fw_vram_usage_size = fw_size << 10; +		/* Use the default scratch size */ +		*usage_bytes = 0; +	} else { +		*usage_bytes = drv_size << 10; +	} +	return 0; +} + +static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev, +		struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes) +{ +	u32 fw_start_addr, fw_size, drv_start_addr, drv_size; + +	fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb); +	fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb); + +	drv_start_addr = le32_to_cpu(fw_usage->driver_region0_start_address_in_kb); +	drv_size = le32_to_cpu(fw_usage->used_by_driver_region0_in_kb); + +	DRM_DEBUG("atom requested fw start at %08x %dkb and drv start at %08x %dkb\n", +			  fw_start_addr, +			  fw_size, +			  drv_start_addr, +			  drv_size); + +	if (amdgpu_sriov_vf(adev) && +	    ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << +		ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) { +		/* Firmware request VRAM reservation for SR-IOV */ +		adev->mman.fw_vram_usage_start_offset = (fw_start_addr & +			(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; +		adev->mman.fw_vram_usage_size = fw_size << 10; +	} + +	if (amdgpu_sriov_vf(adev) && +	    ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION << +		ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) { +		/* driver request VRAM reservation for SR-IOV */ +		adev->mman.drv_vram_usage_start_offset = (drv_start_addr & +			(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; +		adev->mman.drv_vram_usage_size = drv_size << 10; +	} + +	*usage_bytes = 0; +	return 0; +} +  int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)  {  	struct atom_context *ctx = adev->mode_info.atom_context;  	int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,  						vram_usagebyfirmware); -	struct vram_usagebyfirmware_v2_1 *firmware_usage; -	uint32_t start_addr, size; -	uint16_t data_offset; +	struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1; +	struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2; +	u16 data_offset; +	u8 frev, crev;  	int usage_bytes = 0; -	if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { -		firmware_usage = (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); -		DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n", -			  le32_to_cpu(firmware_usage->start_address_in_kb), -			  le16_to_cpu(firmware_usage->used_by_firmware_in_kb), -			  le16_to_cpu(firmware_usage->used_by_driver_in_kb)); - -		start_addr = le32_to_cpu(firmware_usage->start_address_in_kb); -		size = le16_to_cpu(firmware_usage->used_by_firmware_in_kb); - -		if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == -			(uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << -			ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { -			/* Firmware request VRAM reservation for SR-IOV */ -			adev->mman.fw_vram_usage_start_offset = (start_addr & -				(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; -			adev->mman.fw_vram_usage_size = size << 10; -			/* Use the default scratch size */ -			usage_bytes = 0; -		} else { -			usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) << 10; +	if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) { +		if (frev == 2 && crev == 1) { +			fw_usage_v2_1 = +				(struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset); +			amdgpu_atomfirmware_allocate_fb_v2_1(adev, +					fw_usage_v2_1, +					&usage_bytes); +		} else if (frev >= 2 && crev >= 2) { +			fw_usage_v2_2 = +				(struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset); +			amdgpu_atomfirmware_allocate_fb_v2_2(adev, +					fw_usage_v2_2, +					&usage_bytes);  		}  	} +  	ctx->scratch_size_bytes = 0;  	if (usage_bytes == 0)  		usage_bytes = 20 * 1024; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index e363f56c72af..30c28a69e847 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -317,6 +317,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)  	if (!found)  		return false; +	pci_dev_put(pdev);  	adev->bios = kmalloc(size, GFP_KERNEL);  	if (!adev->bios) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index cfb262911bfc..2ebbc6382a06 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -26,7 +26,6 @@  #include <drm/display/drm_dp_helper.h>  #include <drm/drm_edid.h> -#include <drm/drm_fb_helper.h>  #include <drm/drm_probe_helper.h>  #include <drm/amdgpu_drm.h>  #include "amdgpu.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 365e3fb6a9e5..7b5ce00f0602 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -61,6 +61,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,  		amdgpu_ctx_put(p->ctx);  		return -ECANCELED;  	} + +	amdgpu_sync_create(&p->sync);  	return 0;  } @@ -294,12 +296,8 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,  	}  	for (i = 0; i < p->gang_size; ++i) { -		ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm); -		if (ret) -			goto free_all_kdata; - -		ret = drm_sched_job_init(&p->jobs[i]->base, p->entities[i], -					 &fpriv->vm); +		ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm, +				       num_ibs[i], &p->jobs[i]);  		if (ret)  			goto free_all_kdata;  	} @@ -433,7 +431,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,  			dma_fence_put(old);  		} -		r = amdgpu_sync_fence(&p->gang_leader->sync, fence); +		r = amdgpu_sync_fence(&p->sync, fence);  		dma_fence_put(fence);  		if (r)  			return r; @@ -455,9 +453,8 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,  		return r;  	} -	r = amdgpu_sync_fence(&p->gang_leader->sync, fence); +	r = amdgpu_sync_fence(&p->sync, fence);  	dma_fence_put(fence); -  	return r;  } @@ -1106,7 +1103,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)  	if (r)  		return r; -	r = amdgpu_sync_fence(&job->sync, fpriv->prt_va->last_pt_update); +	r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);  	if (r)  		return r; @@ -1117,7 +1114,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)  		if (r)  			return r; -		r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update); +		r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);  		if (r)  			return r;  	} @@ -1136,7 +1133,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)  		if (r)  			return r; -		r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update); +		r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);  		if (r)  			return r;  	} @@ -1149,7 +1146,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)  	if (r)  		return r; -	r = amdgpu_sync_fence(&job->sync, vm->last_update); +	r = amdgpu_sync_fence(&p->sync, vm->last_update);  	if (r)  		return r; @@ -1181,11 +1178,19 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)  static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)  {  	struct amdgpu_fpriv *fpriv = p->filp->driver_priv; -	struct amdgpu_job *leader = p->gang_leader; +	struct drm_gpu_scheduler *sched;  	struct amdgpu_bo_list_entry *e; +	struct dma_fence *fence;  	unsigned int i;  	int r; +	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); +	if (r) { +		if (r != -ERESTARTSYS) +			DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); +		return r; +	} +  	list_for_each_entry(e, &p->validated, tv.head) {  		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);  		struct dma_resv *resv = bo->tbo.base.resv; @@ -1193,25 +1198,36 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)  		sync_mode = amdgpu_bo_explicit_sync(bo) ?  			AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER; -		r = amdgpu_sync_resv(p->adev, &leader->sync, resv, sync_mode, +		r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,  				     &fpriv->vm);  		if (r)  			return r;  	}  	for (i = 0; i < p->gang_size; ++i) { -		if (p->jobs[i] == leader) +		r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]); +		if (r) +			return r; +	} + +	sched = p->gang_leader->base.entity->rq->sched; +	while ((fence = amdgpu_sync_get_fence(&p->sync))) { +		struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); + +		/* +		 * When we have an dependency it might be necessary to insert a +		 * pipeline sync to make sure that all caches etc are flushed and the +		 * next job actually sees the results from the previous one +		 * before we start executing on the same scheduler ring. +		 */ +		if (!s_fence || s_fence->sched != sched)  			continue; -		r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync); +		r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);  		if (r)  			return r;  	} - -	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); -	if (r && r != -ERESTARTSYS) -		DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); -	return r; +	return 0;  }  static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) @@ -1251,9 +1267,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,  			continue;  		fence = &p->jobs[i]->base.s_fence->scheduled; -		r = amdgpu_sync_fence(&leader->sync, fence); -		if (r) +		dma_fence_get(fence); +		r = drm_sched_job_add_dependency(&leader->base, fence); +		if (r) { +			dma_fence_put(fence);  			goto error_cleanup; +		}  	}  	if (p->gang_size > 1) { @@ -1341,6 +1360,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)  {  	unsigned i; +	amdgpu_sync_free(&parser->sync);  	for (i = 0; i < parser->num_post_deps; i++) {  		drm_syncobj_put(parser->post_deps[i].syncobj);  		kfree(parser->post_deps[i].chain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h index f80adf9069ec..113f39510a72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h @@ -76,6 +76,8 @@ struct amdgpu_cs_parser {  	unsigned			num_post_deps;  	struct amdgpu_cs_post_dep	*post_deps; + +	struct amdgpu_sync		sync;  };  int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index de61a85c4b02..0f16d3c09309 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -1969,7 +1969,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)  	amdgpu_ta_if_debugfs_init(adev);  #if defined(CONFIG_DRM_AMD_DC) -	if (amdgpu_device_has_dc_support(adev)) +	if (adev->dc_enabled)  		dtn_debugfs_init(adev);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index f1e9663b4051..2f28a8c02f64 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -36,7 +36,9 @@  #include <generated/utsrelease.h>  #include <linux/pci-p2pdma.h> +#include <drm/drm_aperture.h>  #include <drm/drm_atomic_helper.h> +#include <drm/drm_fb_helper.h>  #include <drm/drm_probe_helper.h>  #include <drm/amdgpu_drm.h>  #include <linux/vgaarb.h> @@ -89,6 +91,8 @@ MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");  #define AMDGPU_MAX_RETRY_LIMIT		2  #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) +static const struct drm_driver amdgpu_kms_driver; +  const char *amdgpu_asic_name[] = {  	"TAHITI",  	"PITCAIRN", @@ -1568,7 +1572,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)   * @pdev: pci dev pointer   * @state: vga_switcheroo state   * - * Callback for the switcheroo driver.  Suspends or resumes the + * Callback for the switcheroo driver.  Suspends or resumes   * the asics before or after it is powered up using ACPI methods.   */  static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, @@ -1915,6 +1919,16 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)  	}  } +void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) +{ +	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) { +		adev->mode_info.num_crtc = 1; +		adev->enable_virtual_display = true; +		DRM_INFO("virtual_display:%d, num_crtc:%d\n", +			 adev->enable_virtual_display, adev->mode_info.num_crtc); +	} +} +  /**   * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware   * @@ -2397,7 +2411,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)  			adev->ip_blocks[i].status.hw = true;  			/* right after GMC hw init, we create CSA */ -			if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { +			if (amdgpu_mcbp) {  				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,  								AMDGPU_GEM_DOMAIN_VRAM,  								AMDGPU_CSA_SIZE); @@ -2462,6 +2476,11 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)  			if (!amdgpu_sriov_vf(adev)) {  				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); +				if (WARN_ON(!hive)) { +					r = -ENOENT; +					goto init_failed; +				} +  				if (!hive->reset_domain ||  				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {  					r = -ENOENT; @@ -3000,14 +3019,15 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)  			continue;  		} -		/* skip suspend of gfx and psp for S0ix +		/* skip suspend of gfx/mes and psp for S0ix  		 * gfx is in gfxoff state, so on resume it will exit gfxoff just  		 * like at runtime. PSP is also part of the always on hardware  		 * so no need to suspend it.  		 */  		if (adev->in_s0ix &&  		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP || -		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)) +		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || +		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))  			continue;  		/* XXX handle errors */ @@ -3347,8 +3367,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)   */  bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)  { -	if (amdgpu_sriov_vf(adev) || -	    adev->enable_virtual_display || +	if (adev->enable_virtual_display ||  	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))  		return false; @@ -3671,6 +3690,11 @@ int amdgpu_device_init(struct amdgpu_device *adev,  	if (r)  		return r; +	/* Get rid of things like offb */ +	r = drm_aperture_remove_conflicting_pci_framebuffers(adev->pdev, &amdgpu_kms_driver); +	if (r) +		return r; +  	/* Enable TMZ based on IP_VERSION */  	amdgpu_gmc_tmz_set(adev); @@ -4097,6 +4121,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)  	adev->in_suspend = true; +	/* Evict the majority of BOs before grabbing the full access */ +	r = amdgpu_device_evict_resources(adev); +	if (r) +		return r; +  	if (amdgpu_sriov_vf(adev)) {  		amdgpu_virt_fini_data_exchange(adev);  		r = amdgpu_virt_request_full_gpu(adev, false); @@ -4171,21 +4200,15 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)  	r = amdgpu_device_ip_resume(adev); -	/* no matter what r is, always need to properly release full GPU */ -	if (amdgpu_sriov_vf(adev)) { -		amdgpu_virt_init_data_exchange(adev); -		amdgpu_virt_release_full_gpu(adev, true); -	} -  	if (r) {  		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); -		return r; +		goto exit;  	}  	amdgpu_fence_driver_hw_init(adev);  	r = amdgpu_device_ip_late_init(adev);  	if (r) -		return r; +		goto exit;  	queue_delayed_work(system_wq, &adev->delayed_init_work,  			   msecs_to_jiffies(AMDGPU_RESUME_MS)); @@ -4193,9 +4216,18 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)  	if (!adev->in_s0ix) {  		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);  		if (r) -			return r; +			goto exit;  	} +exit: +	if (amdgpu_sriov_vf(adev)) { +		amdgpu_virt_init_data_exchange(adev); +		amdgpu_virt_release_full_gpu(adev, true); +	} + +	if (r) +		return r; +  	/* Make sure IB tests flushed */  	flush_delayed_work(&adev->delayed_init_work); @@ -4213,25 +4245,27 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)  	amdgpu_ras_resume(adev); -	/* -	 * Most of the connector probing functions try to acquire runtime pm -	 * refs to ensure that the GPU is powered on when connector polling is -	 * performed. Since we're calling this from a runtime PM callback, -	 * trying to acquire rpm refs will cause us to deadlock. -	 * -	 * Since we're guaranteed to be holding the rpm lock, it's safe to -	 * temporarily disable the rpm helpers so this doesn't deadlock us. -	 */ +	if (adev->mode_info.num_crtc) { +		/* +		 * Most of the connector probing functions try to acquire runtime pm +		 * refs to ensure that the GPU is powered on when connector polling is +		 * performed. Since we're calling this from a runtime PM callback, +		 * trying to acquire rpm refs will cause us to deadlock. +		 * +		 * Since we're guaranteed to be holding the rpm lock, it's safe to +		 * temporarily disable the rpm helpers so this doesn't deadlock us. +		 */  #ifdef CONFIG_PM -	dev->dev->power.disable_depth++; +		dev->dev->power.disable_depth++;  #endif -	if (!amdgpu_device_has_dc_support(adev)) -		drm_helper_hpd_irq_event(dev); -	else -		drm_kms_helper_hotplug_event(dev); +		if (!adev->dc_enabled) +			drm_helper_hpd_irq_event(dev); +		else +			drm_kms_helper_hotplug_event(dev);  #ifdef CONFIG_PM -	dev->dev->power.disable_depth--; +		dev->dev->power.disable_depth--;  #endif +	}  	adev->in_suspend = false;  	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0)) @@ -4580,6 +4614,10 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)  	if (amdgpu_gpu_recovery == 0)  		goto disabled; +	/* Skip soft reset check in fatal error mode */ +	if (!amdgpu_ras_is_poison_mode_supported(adev)) +		return true; +  	if (!amdgpu_device_ip_check_soft_reset(adev)) {  		dev_info(adev->dev,"Timeout, but no hardware hang detected.\n");  		return false; @@ -5027,6 +5065,8 @@ static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)  		pm_runtime_enable(&(p->dev));  		pm_runtime_resume(&(p->dev));  	} + +	pci_dev_put(p);  }  static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) @@ -5065,6 +5105,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)  		if (expires < ktime_get_mono_fast_ns()) {  			dev_warn(adev->dev, "failed to suspend display audio\n"); +			pci_dev_put(p);  			/* TODO: abort the succeeding gpu reset? */  			return -ETIMEDOUT;  		} @@ -5072,97 +5113,10 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)  	pm_runtime_disable(&(p->dev)); +	pci_dev_put(p);  	return 0;  } -static void amdgpu_device_recheck_guilty_jobs( -	struct amdgpu_device *adev, struct list_head *device_list_handle, -	struct amdgpu_reset_context *reset_context) -{ -	int i, r = 0; - -	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { -		struct amdgpu_ring *ring = adev->rings[i]; -		int ret = 0; -		struct drm_sched_job *s_job; - -		if (!ring || !ring->sched.thread) -			continue; - -		s_job = list_first_entry_or_null(&ring->sched.pending_list, -				struct drm_sched_job, list); -		if (s_job == NULL) -			continue; - -		/* clear job's guilty and depend the folowing step to decide the real one */ -		drm_sched_reset_karma(s_job); -		drm_sched_resubmit_jobs_ext(&ring->sched, 1); - -		if (!s_job->s_fence->parent) { -			DRM_WARN("Failed to get a HW fence for job!"); -			continue; -		} - -		ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout); -		if (ret == 0) { /* timeout */ -			DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n", -						ring->sched.name, s_job->id); - - -			amdgpu_fence_driver_isr_toggle(adev, true); - -			/* Clear this failed job from fence array */ -			amdgpu_fence_driver_clear_job_fences(ring); - -			amdgpu_fence_driver_isr_toggle(adev, false); - -			/* Since the job won't signal and we go for -			 * another resubmit drop this parent pointer -			 */ -			dma_fence_put(s_job->s_fence->parent); -			s_job->s_fence->parent = NULL; - -			/* set guilty */ -			drm_sched_increase_karma(s_job); -			amdgpu_reset_prepare_hwcontext(adev, reset_context); -retry: -			/* do hw reset */ -			if (amdgpu_sriov_vf(adev)) { -				amdgpu_virt_fini_data_exchange(adev); -				r = amdgpu_device_reset_sriov(adev, false); -				if (r) -					adev->asic_reset_res = r; -			} else { -				clear_bit(AMDGPU_SKIP_HW_RESET, -					  &reset_context->flags); -				r = amdgpu_do_asic_reset(device_list_handle, -							 reset_context); -				if (r && r == -EAGAIN) -					goto retry; -			} - -			/* -			 * add reset counter so that the following -			 * resubmitted job could flush vmid -			 */ -			atomic_inc(&adev->gpu_reset_counter); -			continue; -		} - -		/* got the hw fence, signal finished fence */ -		atomic_dec(ring->sched.score); -		dma_fence_get(&s_job->s_fence->finished); -		dma_fence_signal(&s_job->s_fence->finished); -		dma_fence_put(&s_job->s_fence->finished); - -		/* remove node from list and free the job */ -		spin_lock(&ring->sched.job_list_lock); -		list_del_init(&s_job->list); -		spin_unlock(&ring->sched.job_list_lock); -		ring->sched.ops->free_job(s_job); -	} -} -  static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)  {  	struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -5183,7 +5137,6 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)  } -  /**   * amdgpu_device_gpu_recover - reset the asic and recover scheduler   * @@ -5206,7 +5159,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,  	int i, r = 0;  	bool need_emergency_restart = false;  	bool audio_suspended = false; -	int tmp_vram_lost_counter;  	bool gpu_reset_for_dev_remove = false;  	gpu_reset_for_dev_remove = @@ -5352,7 +5304,6 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */  		amdgpu_device_stop_pending_resets(tmp_adev);  	} -	tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));  	/* Actual ASIC resets if needed.*/  	/* Host driver will handle XGMI hive reset for SRIOV */  	if (amdgpu_sriov_vf(adev)) { @@ -5377,29 +5328,13 @@ skip_hw_reset:  	/* Post ASIC reset for all devs .*/  	list_for_each_entry(tmp_adev, device_list_handle, reset_list) { -		/* -		 * Sometimes a later bad compute job can block a good gfx job as gfx -		 * and compute ring share internal GC HW mutually. We add an additional -		 * guilty jobs recheck step to find the real guilty job, it synchronously -		 * submits and pends for the first job being signaled. If it gets timeout, -		 * we identify it as a real guilty job. -		 */ -		if (amdgpu_gpu_recovery == 2 && -			!(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter))) -			amdgpu_device_recheck_guilty_jobs( -				tmp_adev, device_list_handle, reset_context); -  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {  			struct amdgpu_ring *ring = tmp_adev->rings[i];  			if (!ring || !ring->sched.thread)  				continue; -			/* No point to resubmit jobs if we didn't HW reset*/ -			if (!tmp_adev->asic_reset_res && !job_signaled) -				drm_sched_resubmit_jobs(&ring->sched); - -			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res); +			drm_sched_start(&ring->sched, true);  		}  		if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)) @@ -5441,6 +5376,8 @@ skip_sched_resume:  			amdgpu_device_resume_display_audio(tmp_adev);  		amdgpu_device_unset_mp1_state(tmp_adev); + +		amdgpu_ras_set_error_query_ready(tmp_adev, true);  	}  recover_end: @@ -5852,8 +5789,6 @@ void amdgpu_pci_resume(struct pci_dev *pdev)  		if (!ring || !ring->sched.thread)  			continue; - -		drm_sched_resubmit_jobs(&ring->sched);  		drm_sched_start(&ring->sched, true);  	} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index 3993e6134914..1bbd56029a4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -305,8 +305,13 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)  		goto out;  	} -	if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) { -		dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n"); +	if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin) || amdgpu_discovery == 2) { +		/* ignore the discovery binary from vram if discovery=2 in kernel module parameter */ +		if (amdgpu_discovery == 2) +			dev_info(adev->dev,"force read ip discovery binary from file"); +		else +			dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n"); +  		/* retry read ip discovery binary from file */  		r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin);  		if (r) { @@ -1507,6 +1512,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)  	case IP_VERSION(11, 0, 1):  	case IP_VERSION(11, 0, 2):  	case IP_VERSION(11, 0, 3): +	case IP_VERSION(11, 0, 4):  		amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);  		break;  	default: @@ -1551,6 +1557,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)  	case IP_VERSION(11, 0, 1):  	case IP_VERSION(11, 0, 2):  	case IP_VERSION(11, 0, 3): +	case IP_VERSION(11, 0, 4):  		amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);  		break;  	default: @@ -1636,6 +1643,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)  	case IP_VERSION(13, 0, 7):  	case IP_VERSION(13, 0, 8):  	case IP_VERSION(13, 0, 10): +	case IP_VERSION(13, 0, 11):  		amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);  		break;  	case IP_VERSION(13, 0, 4): @@ -1686,6 +1694,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)  	case IP_VERSION(13, 0, 7):  	case IP_VERSION(13, 0, 8):  	case IP_VERSION(13, 0, 10): +	case IP_VERSION(13, 0, 11):  		amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);  		break;  	default: @@ -1697,9 +1706,17 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)  	return 0;  } +#if defined(CONFIG_DRM_AMD_DC) +static void amdgpu_discovery_set_sriov_display(struct amdgpu_device *adev) +{ +	amdgpu_device_set_sriov_virtual_display(adev); +	amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block); +} +#endif +  static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)  { -	if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) { +	if (adev->enable_virtual_display) {  		amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);  		return 0;  	} @@ -1727,7 +1744,10 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)  		case IP_VERSION(3, 1, 6):  		case IP_VERSION(3, 2, 0):  		case IP_VERSION(3, 2, 1): -			amdgpu_device_ip_block_add(adev, &dm_ip_block); +			if (amdgpu_sriov_vf(adev)) +				amdgpu_discovery_set_sriov_display(adev); +			else +				amdgpu_device_ip_block_add(adev, &dm_ip_block);  			break;  		default:  			dev_err(adev->dev, @@ -1740,7 +1760,10 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)  		case IP_VERSION(12, 0, 0):  		case IP_VERSION(12, 0, 1):  		case IP_VERSION(12, 1, 0): -			amdgpu_device_ip_block_add(adev, &dm_ip_block); +			if (amdgpu_sriov_vf(adev)) +				amdgpu_discovery_set_sriov_display(adev); +			else +				amdgpu_device_ip_block_add(adev, &dm_ip_block);  			break;  		default:  			dev_err(adev->dev, @@ -1785,6 +1808,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)  	case IP_VERSION(11, 0, 1):  	case IP_VERSION(11, 0, 2):  	case IP_VERSION(11, 0, 3): +	case IP_VERSION(11, 0, 4):  		amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);  		break;  	default: @@ -1948,6 +1972,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)  	case IP_VERSION(11, 0, 1):  	case IP_VERSION(11, 0, 2):  	case IP_VERSION(11, 0, 3): +	case IP_VERSION(11, 0, 4):  		amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);  		adev->enable_mes = true;  		adev->enable_mes_kiq = true; @@ -2161,6 +2186,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)  		break;  	case IP_VERSION(10, 3, 1):  		adev->family = AMDGPU_FAMILY_VGH; +		adev->apu_flags |= AMD_APU_IS_VANGOGH;  		break;  	case IP_VERSION(10, 3, 3):  		adev->family = AMDGPU_FAMILY_YC; @@ -2177,6 +2203,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)  		adev->family = AMDGPU_FAMILY_GC_11_0_0;  		break;  	case IP_VERSION(11, 0, 1): +	case IP_VERSION(11, 0, 4):  		adev->family = AMDGPU_FAMILY_GC_11_0_1;  		break;  	default: @@ -2194,6 +2221,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)  	case IP_VERSION(10, 3, 6):  	case IP_VERSION(10, 3, 7):  	case IP_VERSION(11, 0, 1): +	case IP_VERSION(11, 0, 4):  		adev->flags |= AMD_IS_APU;  		break;  	default: @@ -2250,6 +2278,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)  		adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg;  		break;  	case IP_VERSION(7, 7, 0): +	case IP_VERSION(7, 7, 1):  		adev->nbio.funcs = &nbio_v7_7_funcs;  		adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;  		break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 1a06b8d724f3..b22471b3bd63 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -39,11 +39,46 @@  #include <linux/pm_runtime.h>  #include <drm/drm_crtc_helper.h>  #include <drm/drm_edid.h> -#include <drm/drm_gem_framebuffer_helper.h>  #include <drm/drm_fb_helper.h> +#include <drm/drm_gem_framebuffer_helper.h>  #include <drm/drm_fourcc.h>  #include <drm/drm_vblank.h> +/** + * amdgpu_display_hotplug_work_func - work handler for display hotplug event + * + * @work: work struct pointer + * + * This is the hotplug event work handler (all ASICs). + * The work gets scheduled from the IRQ handler if there + * was a hotplug interrupt.  It walks through the connector table + * and calls hotplug handler for each connector. After this, it sends + * a DRM hotplug event to alert userspace. + * + * This design approach is required in order to defer hotplug event handling + * from the IRQ handler to a work handler because hotplug handler has to use + * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may + * sleep). + */ +void amdgpu_display_hotplug_work_func(struct work_struct *work) +{ +	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, +						  hotplug_work); +	struct drm_device *dev = adev_to_drm(adev); +	struct drm_mode_config *mode_config = &dev->mode_config; +	struct drm_connector *connector; +	struct drm_connector_list_iter iter; + +	mutex_lock(&mode_config->mutex); +	drm_connector_list_iter_begin(dev, &iter); +	drm_for_each_connector_iter(connector, &iter) +		amdgpu_connector_hotplug(connector); +	drm_connector_list_iter_end(&iter); +	mutex_unlock(&mode_config->mutex); +	/* Just fire off a uevent and let userspace tell us what to do */ +	drm_helper_hpd_irq_event(dev); +} +  static int amdgpu_display_framebuffer_init(struct drm_device *dev,  					   struct amdgpu_framebuffer *rfb,  					   const struct drm_mode_fb_cmd2 *mode_cmd, @@ -514,7 +549,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,  	 */  	if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&  	    amdgpu_bo_support_uswc(bo_flags) && -	    amdgpu_device_asic_has_dc_support(adev->asic_type) && +	    adev->dc_enabled &&  	    adev->mode_info.gpu_vm_support)  		domain |= AMDGPU_GEM_DOMAIN_GTT;  #endif @@ -1214,7 +1249,6 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,  const struct drm_mode_config_funcs amdgpu_mode_funcs = {  	.fb_create = amdgpu_display_user_framebuffer_create, -	.output_poll_changed = drm_fb_helper_output_poll_changed,  };  static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] = @@ -1281,7 +1315,7 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)  					 "dither",  					 amdgpu_dither_enum_list, sz); -	if (amdgpu_device_has_dc_support(adev)) { +	if (adev->dc_enabled) {  		adev->mode_info.abm_level_property =  			drm_property_create_range(adev_to_drm(adev), 0,  						  "abm level", 0, 4); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index 560352f7c317..9d19940f73c8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -35,6 +35,7 @@  #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))  #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) +void amdgpu_display_hotplug_work_func(struct work_struct *work);  void amdgpu_display_update_priority(struct amdgpu_device *adev);  uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,  					  uint64_t bo_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 7bd8e33b14be..271e30e34d93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -328,7 +328,9 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)  	if (dma_buf->ops == &amdgpu_dmabuf_ops) {  		struct amdgpu_bo *other = gem_to_amdgpu_bo(dma_buf->priv); -		flags |= other->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC; +		flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC | +					 AMDGPU_GEM_CREATE_COHERENT | +					 AMDGPU_GEM_CREATE_UNCACHED);  	}  	ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index bf2d50c8c92a..cd4caaa29528 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -23,8 +23,8 @@   */  #include <drm/amdgpu_drm.h> -#include <drm/drm_aperture.h>  #include <drm/drm_drv.h> +#include <drm/drm_fbdev_generic.h>  #include <drm/drm_gem.h>  #include <drm/drm_vblank.h>  #include <drm/drm_managed.h> @@ -180,6 +180,7 @@ int amdgpu_mes_kiq;  int amdgpu_noretry = -1;  int amdgpu_force_asic_type = -1;  int amdgpu_tmz = -1; /* auto */ +uint amdgpu_freesync_vid_mode;  int amdgpu_reset_method = -1; /* auto */  int amdgpu_num_kcq = -1;  int amdgpu_smartshift_bias; @@ -230,17 +231,18 @@ module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);  /**   * DOC: gartsize (uint) - * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic). + * Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing. + * The default is -1 (The size depends on asic).   */ -MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)"); +MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)");  module_param_named(gartsize, amdgpu_gart_size, uint, 0600);  /**   * DOC: gttsize (int) - * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM, - * otherwise 3/4 RAM size). + * Restrict the size of GTT domain (for userspace use) in MiB for testing. + * The default is -1 (Use 1/2 RAM, minimum value is 3GB).   */ -MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)"); +MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)");  module_param_named(gttsize, amdgpu_gtt_size, int, 0600);  /** @@ -533,7 +535,7 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);   * DOC: gpu_recovery (int)   * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).   */ -MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (2 = advanced tdr mode, 1 = enable, 0 = disable, -1 = auto)"); +MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");  module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);  /** @@ -878,6 +880,32 @@ MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on)  module_param_named(tmz, amdgpu_tmz, int, 0444);  /** + * DOC: freesync_video (uint) + * Enable the optimization to adjust front porch timing to achieve seamless + * mode change experience when setting a freesync supported mode for which full + * modeset is not needed. + * + * The Display Core will add a set of modes derived from the base FreeSync + * video mode into the corresponding connector's mode list based on commonly + * used refresh rates and VRR range of the connected display, when users enable + * this feature. From the userspace perspective, they can see a seamless mode + * change experience when the change between different refresh rates under the + * same resolution. Additionally, userspace applications such as Video playback + * can read this modeset list and change the refresh rate based on the video + * frame rate. Finally, the userspace can also derive an appropriate mode for a + * particular refresh rate based on the FreeSync Mode and add it to the + * connector's mode list. + * + * Note: This is an experimental feature. + * + * The default value: 0 (off). + */ +MODULE_PARM_DESC( +	freesync_video, +	"Enable freesync modesetting optimization feature (0 = off (default), 1 = on)"); +module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444); + +/**   * DOC: reset_method (int)   * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)   */ @@ -1924,9 +1952,6 @@ static const struct pci_device_id pciidlist[] = {  	{0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},  	{0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID}, -	/* Van Gogh */ -	{0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU}, -  	/* Yellow Carp */  	{0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},  	{0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU}, @@ -2040,6 +2065,15 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,  			 "See modparam exp_hw_support\n");  		return -ENODEV;  	} +	/* differentiate between P10 and P11 asics with the same DID */ +	if (pdev->device == 0x67FF && +	    (pdev->revision == 0xE3 || +	     pdev->revision == 0xE7 || +	     pdev->revision == 0xF3 || +	     pdev->revision == 0xF7)) { +		flags &= ~AMD_ASIC_MASK; +		flags |= CHIP_POLARIS10; +	}  	/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,  	 * however, SME requires an indirect IOMMU mapping because the encryption @@ -2087,11 +2121,6 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,  	}  #endif -	/* Get rid of things like offb */ -	ret = drm_aperture_remove_conflicting_pci_framebuffers(pdev, &amdgpu_kms_driver); -	if (ret) -		return ret; -  	adev = devm_drm_dev_alloc(&pdev->dev, &amdgpu_kms_driver, typeof(*adev), ddev);  	if (IS_ERR(adev))  		return PTR_ERR(adev); @@ -2109,12 +2138,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,  	pci_set_drvdata(pdev, ddev); -	ret = amdgpu_driver_load_kms(adev, ent->driver_data); +	ret = amdgpu_driver_load_kms(adev, flags);  	if (ret)  		goto err_pci;  retry_init: -	ret = drm_dev_register(ddev, ent->driver_data); +	ret = drm_dev_register(ddev, flags);  	if (ret == -EAGAIN && ++retry <= 3) {  		DRM_INFO("retry init %d\n", retry);  		/* Don't request EX mode too frequently which is attacking */ @@ -2471,7 +2500,7 @@ static int amdgpu_runtime_idle_check_display(struct device *dev)  		if (ret)  			return ret; -		if (amdgpu_device_has_dc_support(adev)) { +		if (adev->dc_enabled) {  			struct drm_crtc *crtc;  			drm_for_each_crtc(crtc, drm_dev) { @@ -2572,6 +2601,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)  		amdgpu_device_baco_enter(drm_dev);  	} +	dev_dbg(&pdev->dev, "asic/device is runtime suspended\n"); +  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c index 4d9eb0137f8c..7d2a908438e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c @@ -79,13 +79,15 @@   * That is, for an I2C EEPROM driver everything is controlled by   * the "eeprom_addr".   * + * See also top of amdgpu_ras_eeprom.c. + *   * P.S. If you need to write, lock and read the Identification Page,   * (M24M02-DR device only, which we do not use), change the "7" to   * "0xF" in the macro below, and let the client set bit 20 to 1 in   * "eeprom_addr", and set A10 to 0 to write into it, and A10 and A1 to   * 1 to lock it permanently.   */ -#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 7)) +#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF))  static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,  				u8 *eeprom_buf, u16 buf_size, bool read) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index d0d99ed607dd..00444203220d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -55,6 +55,7 @@ struct amdgpu_fence {  	/* RB, DMA, etc. */  	struct amdgpu_ring		*ring; +	ktime_t				start_timestamp;  };  static struct kmem_cache *amdgpu_fence_slab; @@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd  		}  	} +	to_amdgpu_fence(fence)->start_timestamp = ktime_get(); +  	/* This function can't be called concurrently anyway, otherwise  	 * emitting the fence would mess up the hardware ring buffer.  	 */ @@ -407,6 +410,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)  }  /** + * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now + * @ring: ring the fence is associated with + * + * Find the earliest fence unsignaled until now, calculate the time delta + * between the time fence emitted and now. + */ +u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring) +{ +	struct amdgpu_fence_driver *drv = &ring->fence_drv; +	struct dma_fence *fence; +	uint32_t last_seq, sync_seq; + +	last_seq = atomic_read(&ring->fence_drv.last_seq); +	sync_seq = READ_ONCE(ring->fence_drv.sync_seq); +	if (last_seq == sync_seq) +		return 0; + +	++last_seq; +	last_seq &= drv->num_fences_mask; +	fence = drv->fences[last_seq]; +	if (!fence) +		return 0; + +	return ktime_us_delta(ktime_get(), +		to_amdgpu_fence(fence)->start_timestamp); +} + +/** + * amdgpu_fence_update_start_timestamp - update the timestamp of the fence + * @ring: ring the fence is associated with + * @seq: the fence seq number to update. + * @timestamp: the start timestamp to update. + * + * The function called at the time the fence and related ib is about to + * resubmit to gpu in MCBP scenario. Thus we do not consider race condition + * with amdgpu_fence_process to modify the same fence. + */ +void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp) +{ +	struct amdgpu_fence_driver *drv = &ring->fence_drv; +	struct dma_fence *fence; + +	seq &= drv->num_fences_mask; +	fence = drv->fences[seq]; +	if (!fence) +		return; + +	to_amdgpu_fence(fence)->start_timestamp = timestamp; +} + +/**   * amdgpu_fence_driver_start_ring - make the fence driver   * ready for use on the requested ring.   * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c index e325150879df..4620c4712ce3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c @@ -29,9 +29,10 @@  #include "amdgpu_fru_eeprom.h"  #include "amdgpu_eeprom.h" -#define FRU_EEPROM_MADDR        0x60000 +#define FRU_EEPROM_MADDR_6      0x60000 +#define FRU_EEPROM_MADDR_8      0x80000 -static bool is_fru_eeprom_supported(struct amdgpu_device *adev) +static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)  {  	/* Only server cards have the FRU EEPROM  	 * TODO: See if we can figure this out dynamically instead of @@ -45,6 +46,11 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev)  	if (amdgpu_sriov_vf(adev))  		return false; +	/* The default I2C EEPROM address of the FRU. +	 */ +	if (fru_addr) +		*fru_addr = FRU_EEPROM_MADDR_8; +  	/* VBIOS is of the format ###-DXXXYYYY-##. For SKU identification,  	 * we can use just the "DXXX" portion. If there were more models, we  	 * could convert the 3 characters to a hex integer and use a switch @@ -57,21 +63,31 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev)  		if (strnstr(atom_ctx->vbios_version, "D161",  			    sizeof(atom_ctx->vbios_version)) ||  		    strnstr(atom_ctx->vbios_version, "D163", -			    sizeof(atom_ctx->vbios_version))) +			    sizeof(atom_ctx->vbios_version))) { +			if (fru_addr) +				*fru_addr = FRU_EEPROM_MADDR_6;  			return true; -		else +		} else {  			return false; +		}  	case CHIP_ALDEBARAN: -		/* All Aldebaran SKUs have the FRU */ +		/* All Aldebaran SKUs have an FRU */ +		if (!strnstr(atom_ctx->vbios_version, "D673", +			     sizeof(atom_ctx->vbios_version))) +			if (fru_addr) +				*fru_addr = FRU_EEPROM_MADDR_6;  		return true;  	case CHIP_SIENNA_CICHLID:  		if (strnstr(atom_ctx->vbios_version, "D603", -		    sizeof(atom_ctx->vbios_version))) { +			    sizeof(atom_ctx->vbios_version))) {  			if (strnstr(atom_ctx->vbios_version, "D603GLXE", -			    sizeof(atom_ctx->vbios_version))) +				    sizeof(atom_ctx->vbios_version))) {  				return false; -			else +			} else { +				if (fru_addr) +					*fru_addr = FRU_EEPROM_MADDR_6;  				return true; +			}  		} else {  			return false;  		} @@ -80,41 +96,14 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev)  	}  } -static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr, -				  unsigned char *buf, size_t buf_size) -{ -	int ret; -	u8 size; - -	ret = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addrptr, buf, 1); -	if (ret < 1) { -		DRM_WARN("FRU: Failed to get size field"); -		return ret; -	} - -	/* The size returned by the i2c requires subtraction of 0xC0 since the -	 * size apparently always reports as 0xC0+actual size. -	 */ -	size = buf[0] & 0x3F; -	size = min_t(size_t, size, buf_size); - -	ret = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addrptr + 1, -				 buf, size); -	if (ret < 1) { -		DRM_WARN("FRU: Failed to get data field"); -		return ret; -	} - -	return size; -} -  int amdgpu_fru_get_product_info(struct amdgpu_device *adev)  { -	unsigned char buf[AMDGPU_PRODUCT_NAME_LEN]; -	u32 addrptr; +	unsigned char buf[8], *pia; +	u32 addr, fru_addr;  	int size, len; +	u8 csum; -	if (!is_fru_eeprom_supported(adev)) +	if (!is_fru_eeprom_supported(adev, &fru_addr))  		return 0;  	/* If algo exists, it means that the i2c_adapter's initialized */ @@ -123,88 +112,102 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)  		return -ENODEV;  	} -	/* There's a lot of repetition here. This is due to the FRU having -	 * variable-length fields. To get the information, we have to find the -	 * size of each field, and then keep reading along and reading along -	 * until we get all of the data that we want. We use addrptr to track -	 * the address as we go -	 */ - -	/* The first fields are all of size 1-byte, from 0-7 are offsets that -	 * contain information that isn't useful to us. -	 * Bytes 8-a are all 1-byte and refer to the size of the entire struct, -	 * and the language field, so just start from 0xb, manufacturer size -	 */ -	addrptr = FRU_EEPROM_MADDR + 0xb; -	size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); -	if (size < 1) { -		DRM_ERROR("Failed to read FRU Manufacturer, ret:%d", size); -		return -EINVAL; +	/* Read the IPMI Common header */ +	len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, fru_addr, buf, +				 sizeof(buf)); +	if (len != 8) { +		DRM_ERROR("Couldn't read the IPMI Common Header: %d", len); +		return len < 0 ? len : -EIO;  	} -	/* Increment the addrptr by the size of the field, and 1 due to the -	 * size field being 1 byte. This pattern continues below. -	 */ -	addrptr += size + 1; -	size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); -	if (size < 1) { -		DRM_ERROR("Failed to read FRU product name, ret:%d", size); -		return -EINVAL; +	if (buf[0] != 1) { +		DRM_ERROR("Bad IPMI Common Header version: 0x%02x", buf[0]); +		return -EIO;  	} -	len = size; -	if (len >= AMDGPU_PRODUCT_NAME_LEN) { -		DRM_WARN("FRU Product Name is larger than %d characters. This is likely a mistake", -				AMDGPU_PRODUCT_NAME_LEN); -		len = AMDGPU_PRODUCT_NAME_LEN - 1; -	} -	memcpy(adev->product_name, buf, len); -	adev->product_name[len] = '\0'; - -	addrptr += size + 1; -	size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); -	if (size < 1) { -		DRM_ERROR("Failed to read FRU product number, ret:%d", size); -		return -EINVAL; +	for (csum = 0; len > 0; len--) +		csum += buf[len - 1]; +	if (csum) { +		DRM_ERROR("Bad IPMI Common Header checksum: 0x%02x", csum); +		return -EIO;  	} -	len = size; -	/* Product number should only be 16 characters. Any more, -	 * and something could be wrong. Cap it at 16 to be safe -	 */ -	if (len >= sizeof(adev->product_number)) { -		DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake"); -		len = sizeof(adev->product_number) - 1; -	} -	memcpy(adev->product_number, buf, len); -	adev->product_number[len] = '\0'; +	/* Get the offset to the Product Info Area (PIA). */ +	addr = buf[4] * 8; +	if (!addr) +		return 0; -	addrptr += size + 1; -	size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); +	/* Get the absolute address to the PIA. */ +	addr += fru_addr; -	if (size < 1) { -		DRM_ERROR("Failed to read FRU product version, ret:%d", size); -		return -EINVAL; +	/* Read the header of the PIA. */ +	len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, buf, 3); +	if (len != 3) { +		DRM_ERROR("Couldn't read the Product Info Area header: %d", len); +		return len < 0 ? len : -EIO;  	} -	addrptr += size + 1; -	size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf)); +	if (buf[0] != 1) { +		DRM_ERROR("Bad IPMI Product Info Area version: 0x%02x", buf[0]); +		return -EIO; +	} -	if (size < 1) { -		DRM_ERROR("Failed to read FRU serial number, ret:%d", size); -		return -EINVAL; +	size = buf[1] * 8; +	pia = kzalloc(size, GFP_KERNEL); +	if (!pia) +		return -ENOMEM; + +	/* Read the whole PIA. */ +	len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, pia, size); +	if (len != size) { +		kfree(pia); +		DRM_ERROR("Couldn't read the Product Info Area: %d", len); +		return len < 0 ? len : -EIO;  	} -	len = size; -	/* Serial number should only be 16 characters. Any more, -	 * and something could be wrong. Cap it at 16 to be safe -	 */ -	if (len >= sizeof(adev->serial)) { -		DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake"); -		len = sizeof(adev->serial) - 1; +	for (csum = 0; size > 0; size--) +		csum += pia[size - 1]; +	if (csum) { +		DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum); +		return -EIO;  	} -	memcpy(adev->serial, buf, len); -	adev->serial[len] = '\0'; +	/* Now extract useful information from the PIA. +	 * +	 * Skip the Manufacturer Name at [3] and go directly to +	 * the Product Name field. +	 */ +	addr = 3 + 1 + (pia[3] & 0x3F); +	if (addr + 1 >= len) +		goto Out; +	memcpy(adev->product_name, pia + addr + 1, +	       min_t(size_t, +		     sizeof(adev->product_name), +		     pia[addr] & 0x3F)); +	adev->product_name[sizeof(adev->product_name) - 1] = '\0'; + +	/* Go to the Product Part/Model Number field. */ +	addr += 1 + (pia[addr] & 0x3F); +	if (addr + 1 >= len) +		goto Out; +	memcpy(adev->product_number, pia + addr + 1, +	       min_t(size_t, +		     sizeof(adev->product_number), +		     pia[addr] & 0x3F)); +	adev->product_number[sizeof(adev->product_number) - 1] = '\0'; + +	/* Go to the Product Version field. */ +	addr += 1 + (pia[addr] & 0x3F); + +	/* Go to the Product Serial Number field. */ +	addr += 1 + (pia[addr] & 0x3F); +	if (addr + 1 >= len) +		goto Out; +	memcpy(adev->serial, pia + addr + 1, min_t(size_t, +						   sizeof(adev->serial), +						   pia[addr] & 0x3F)); +	adev->serial[sizeof(adev->serial) - 1] = '\0'; +Out: +	kfree(pia);  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 91571b1324f2..bb7350ea1d75 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -38,6 +38,7 @@  #include "amdgpu.h"  #include "amdgpu_display.h"  #include "amdgpu_dma_buf.h" +#include "amdgpu_hmm.h"  #include "amdgpu_xgmi.h"  static const struct drm_gem_object_funcs amdgpu_gem_object_funcs; @@ -87,7 +88,7 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)  	struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);  	if (robj) { -		amdgpu_mn_unregister(robj); +		amdgpu_hmm_unregister(robj);  		amdgpu_bo_unref(&robj);  	}  } @@ -255,7 +256,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str  	 * becoming writable and makes is_cow_mapping(vm_flags) false.  	 */  	if (is_cow_mapping(vma->vm_flags) && -	    !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC))) +	    !(vma->vm_flags & VM_ACCESS_FLAGS))  		vma->vm_flags &= ~VM_MAYWRITE;  	return drm_gem_ttm_mmap(obj, vma); @@ -414,7 +415,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,  	if (r)  		goto release_object; -	r = amdgpu_mn_register(bo, args->addr); +	r = amdgpu_hmm_register(bo, args->addr);  	if (r)  		goto release_object; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 9546adc8a76f..23692e5d4d13 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -583,10 +583,14 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)  		if (adev->gfx.gfx_off_req_count == 0 &&  		    !adev->gfx.gfx_off_state) {  			/* If going to s2idle, no need to wait */ -			if (adev->in_s0ix) -				delay = GFX_OFF_NO_DELAY; -			schedule_delayed_work(&adev->gfx.gfx_off_delay_work, +			if (adev->in_s0ix) { +				if (!amdgpu_dpm_set_powergating_by_smu(adev, +						AMD_IP_BLOCK_TYPE_GFX, true)) +					adev->gfx.gfx_off_state = true; +			} else { +				schedule_delayed_work(&adev->gfx.gfx_off_delay_work,  					      delay); +			}  		}  	} else {  		if (adev->gfx.gfx_off_req_count == 0) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 832b3807f1d6..b3df4787877e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -33,6 +33,7 @@  #include "amdgpu_imu.h"  #include "soc15.h"  #include "amdgpu_ras.h" +#include "amdgpu_ring_mux.h"  /* GFX current status */  #define AMDGPU_GFX_NORMAL_MODE			0x00000000L @@ -352,6 +353,9 @@ struct amdgpu_gfx {  	struct amdgpu_gfx_ras		*ras;  	bool				is_poweron; + +	struct amdgpu_ring		sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS]; +	struct amdgpu_ring_mux          muxer;  };  #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 28612e56d0d4..02a4c93673ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -548,6 +548,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)  	case IP_VERSION(10, 3, 1):  	/* YELLOW_CARP*/  	case IP_VERSION(10, 3, 3): +	case IP_VERSION(11, 0, 1): +	case IP_VERSION(11, 0, 4):  		/* Don't enable it by default yet.  		 */  		if (amdgpu_tmz < 1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 1f3302aebeff..44367f03316f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -144,7 +144,7 @@ static int amdgpu_gtt_mgr_new(struct ttm_resource_manager *man,  		node->base.start = node->mm_nodes[0].start;  	} else {  		node->mm_nodes[0].start = 0; -		node->mm_nodes[0].size = node->base.num_pages; +		node->mm_nodes[0].size = PFN_UP(node->base.size);  		node->base.start = AMDGPU_BO_INVALID_OFFSET;  	} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index b86c0b8252a5..2dadcfe43d03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -49,9 +49,12 @@  #include "amdgpu.h"  #include "amdgpu_amdkfd.h" +#include "amdgpu_hmm.h" + +#define MAX_WALK_BYTE	(2UL << 30)  /** - * amdgpu_mn_invalidate_gfx - callback to notify about mm change + * amdgpu_hmm_invalidate_gfx - callback to notify about mm change   *   * @mni: the range (mm) is about to update   * @range: details on the invalidation @@ -60,9 +63,9 @@   * Block for operations on BOs to finish and mark pages as accessed and   * potentially dirty.   */ -static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni, -				     const struct mmu_notifier_range *range, -				     unsigned long cur_seq) +static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni, +				      const struct mmu_notifier_range *range, +				      unsigned long cur_seq)  {  	struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);  	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); @@ -83,12 +86,12 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,  	return true;  } -static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = { -	.invalidate = amdgpu_mn_invalidate_gfx, +static const struct mmu_interval_notifier_ops amdgpu_hmm_gfx_ops = { +	.invalidate = amdgpu_hmm_invalidate_gfx,  };  /** - * amdgpu_mn_invalidate_hsa - callback to notify about mm change + * amdgpu_hmm_invalidate_hsa - callback to notify about mm change   *   * @mni: the range (mm) is about to update   * @range: details on the invalidation @@ -97,32 +100,26 @@ static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = {   * We temporarily evict the BO attached to this range. This necessitates   * evicting all user-mode queues of the process.   */ -static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni, -				     const struct mmu_notifier_range *range, -				     unsigned long cur_seq) +static bool amdgpu_hmm_invalidate_hsa(struct mmu_interval_notifier *mni, +				      const struct mmu_notifier_range *range, +				      unsigned long cur_seq)  {  	struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier); -	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);  	if (!mmu_notifier_range_blockable(range))  		return false; -	mutex_lock(&adev->notifier_lock); - -	mmu_interval_set_seq(mni, cur_seq); - -	amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm); -	mutex_unlock(&adev->notifier_lock); +	amdgpu_amdkfd_evict_userptr(mni, cur_seq, bo->kfd_bo);  	return true;  } -static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = { -	.invalidate = amdgpu_mn_invalidate_hsa, +static const struct mmu_interval_notifier_ops amdgpu_hmm_hsa_ops = { +	.invalidate = amdgpu_hmm_invalidate_hsa,  };  /** - * amdgpu_mn_register - register a BO for notifier updates + * amdgpu_hmm_register - register a BO for notifier updates   *   * @bo: amdgpu buffer object   * @addr: userptr addr we should monitor @@ -130,25 +127,25 @@ static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = {   * Registers a mmu_notifier for the given BO at the specified address.   * Returns 0 on success, -ERRNO if anything goes wrong.   */ -int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) +int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)  {  	if (bo->kfd_bo)  		return mmu_interval_notifier_insert(&bo->notifier, current->mm,  						    addr, amdgpu_bo_size(bo), -						    &amdgpu_mn_hsa_ops); +						    &amdgpu_hmm_hsa_ops);  	return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,  					    amdgpu_bo_size(bo), -					    &amdgpu_mn_gfx_ops); +					    &amdgpu_hmm_gfx_ops);  }  /** - * amdgpu_mn_unregister - unregister a BO for notifier updates + * amdgpu_hmm_unregister - unregister a BO for notifier updates   *   * @bo: amdgpu buffer object   *   * Remove any registration of mmu notifier updates from the buffer object.   */ -void amdgpu_mn_unregister(struct amdgpu_bo *bo) +void amdgpu_hmm_unregister(struct amdgpu_bo *bo)  {  	if (!bo->notifier.mm)  		return; @@ -157,12 +154,12 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)  }  int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, -			       struct mm_struct *mm, struct page **pages, -			       uint64_t start, uint64_t npages, -			       struct hmm_range **phmm_range, bool readonly, -			       bool mmap_locked, void *owner) +			       uint64_t start, uint64_t npages, bool readonly, +			       void *owner, struct page **pages, +			       struct hmm_range **phmm_range)  {  	struct hmm_range *hmm_range; +	unsigned long end;  	unsigned long timeout;  	unsigned long i;  	unsigned long *pfns; @@ -184,32 +181,42 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,  		hmm_range->default_flags |= HMM_PFN_REQ_WRITE;  	hmm_range->hmm_pfns = pfns;  	hmm_range->start = start; -	hmm_range->end = start + npages * PAGE_SIZE; +	end = start + npages * PAGE_SIZE;  	hmm_range->dev_private_owner = owner; -	/* Assuming 512MB takes maxmium 1 second to fault page address */ -	timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT; -	timeout = jiffies + msecs_to_jiffies(timeout); +	do { +		hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end); + +		pr_debug("hmm range: start = 0x%lx, end = 0x%lx", +			hmm_range->start, hmm_range->end); + +		/* Assuming 512MB takes maxmium 1 second to fault page address */ +		timeout = max((hmm_range->end - hmm_range->start) >> 29, 1UL); +		timeout *= HMM_RANGE_DEFAULT_TIMEOUT; +		timeout = jiffies + msecs_to_jiffies(timeout);  retry: -	hmm_range->notifier_seq = mmu_interval_read_begin(notifier); - -	if (likely(!mmap_locked)) -		mmap_read_lock(mm); - -	r = hmm_range_fault(hmm_range); - -	if (likely(!mmap_locked)) -		mmap_read_unlock(mm); -	if (unlikely(r)) { -		/* -		 * FIXME: This timeout should encompass the retry from -		 * mmu_interval_read_retry() as well. -		 */ -		if (r == -EBUSY && !time_after(jiffies, timeout)) -			goto retry; -		goto out_free_pfns; -	} +		hmm_range->notifier_seq = mmu_interval_read_begin(notifier); +		r = hmm_range_fault(hmm_range); +		if (unlikely(r)) { +			/* +			 * FIXME: This timeout should encompass the retry from +			 * mmu_interval_read_retry() as well. +			 */ +			if (r == -EBUSY && !time_after(jiffies, timeout)) +				goto retry; +			goto out_free_pfns; +		} + +		if (hmm_range->end == end) +			break; +		hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT; +		hmm_range->start = hmm_range->end; +		schedule(); +	} while (hmm_range->end < end); + +	hmm_range->start = start; +	hmm_range->hmm_pfns = pfns;  	/*  	 * Due to default_flags, all pages are HMM_PFN_VALID or @@ -231,9 +238,9 @@ out_free_range:  	return r;  } -int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range) +bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)  { -	int r; +	bool r;  	r = mmu_interval_read_retry(hmm_range->notifier,  				    hmm_range->notifier_seq); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h index 14a3c1864085..e2edcd010ccc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h @@ -29,25 +29,25 @@  #include <linux/rwsem.h>  #include <linux/workqueue.h>  #include <linux/interval_tree.h> +#include <linux/mmu_notifier.h>  int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, -			       struct mm_struct *mm, struct page **pages, -			       uint64_t start, uint64_t npages, -			       struct hmm_range **phmm_range, bool readonly, -			       bool mmap_locked, void *owner); -int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); +			       uint64_t start, uint64_t npages, bool readonly, +			       void *owner, struct page **pages, +			       struct hmm_range **phmm_range); +bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);  #if defined(CONFIG_HMM_MIRROR) -int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); -void amdgpu_mn_unregister(struct amdgpu_bo *bo); +int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr); +void amdgpu_hmm_unregister(struct amdgpu_bo *bo);  #else -static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr) +static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)  {  	DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "  		      "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");  	return -ENODEV;  } -static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {} +static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {}  #endif  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 258cffe3c06a..bcccc348dbe2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -182,7 +182,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,  	need_ctx_switch = ring->current_ctx != fence_ctx;  	if (ring->funcs->emit_pipeline_sync && job && -	    ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || +	    ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||  	     (amdgpu_sriov_vf(adev) && need_ctx_switch) ||  	     amdgpu_vm_need_pipeline_sync(ring, job))) {  		need_pipe_sync = true; @@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,  		}  	} +	amdgpu_ring_ib_begin(ring);  	if (job && ring->funcs->init_cond_exec)  		patch_offset = amdgpu_ring_init_cond_exec(ring); @@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,  	    ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)  		ring->funcs->emit_wave_limit(ring, false); +	amdgpu_ring_ib_end(ring);  	amdgpu_ring_commit(ring);  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 03d115d2b5ed..fcb711a11a5b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -165,31 +165,52 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,  		atomic_read(&adev->gpu_reset_counter);  } +/* Check if we need to switch to another set of resources */ +static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id, +					  struct amdgpu_job *job) +{ +	return id->gds_base != job->gds_base || +		id->gds_size != job->gds_size || +		id->gws_base != job->gws_base || +		id->gws_size != job->gws_size || +		id->oa_base != job->oa_base || +		id->oa_size != job->oa_size; +} + +/* Check if the id is compatible with the job */ +static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id, +				   struct amdgpu_job *job) +{ +	return  id->pd_gpu_addr == job->vm_pd_addr && +		!amdgpu_vmid_gds_switch_needed(id, job); +} +  /**   * amdgpu_vmid_grab_idle - grab idle VMID   *   * @vm: vm to allocate id for   * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies   * @idle: resulting idle VMID + * @fence: fence to wait for if no id could be grabbed   *   * Try to find an idle VMID, if none is idle add a fence to wait to the sync   * object. Returns -ENOMEM when we are out of memory.   */  static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,  				 struct amdgpu_ring *ring, -				 struct amdgpu_sync *sync, -				 struct amdgpu_vmid **idle) +				 struct amdgpu_vmid **idle, +				 struct dma_fence **fence)  {  	struct amdgpu_device *adev = ring->adev;  	unsigned vmhub = ring->funcs->vmhub;  	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];  	struct dma_fence **fences;  	unsigned i; -	int r; -	if (!dma_fence_is_signaled(ring->vmid_wait)) -		return amdgpu_sync_fence(sync, ring->vmid_wait); +	if (!dma_fence_is_signaled(ring->vmid_wait)) { +		*fence = dma_fence_get(ring->vmid_wait); +		return 0; +	}  	fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);  	if (!fences) @@ -228,10 +249,10 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,  			return -ENOMEM;  		} -		r = amdgpu_sync_fence(sync, &array->base); +		*fence = dma_fence_get(&array->base);  		dma_fence_put(ring->vmid_wait);  		ring->vmid_wait = &array->base; -		return r; +		return 0;  	}  	kfree(fences); @@ -243,30 +264,29 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,   *   * @vm: vm to allocate id for   * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse   * @job: job who wants to use the VMID   * @id: resulting VMID + * @fence: fence to wait for if no id could be grabbed   *   * Try to assign a reserved VMID.   */  static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,  				     struct amdgpu_ring *ring, -				     struct amdgpu_sync *sync, -				     struct dma_fence *fence,  				     struct amdgpu_job *job, -				     struct amdgpu_vmid **id) +				     struct amdgpu_vmid **id, +				     struct dma_fence **fence)  {  	struct amdgpu_device *adev = ring->adev;  	unsigned vmhub = ring->funcs->vmhub; +	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];  	uint64_t fence_context = adev->fence_context + ring->idx;  	bool needs_flush = vm->use_cpu_for_update;  	uint64_t updates = amdgpu_vm_tlb_seq(vm);  	int r; -	*id = vm->reserved_vmid[vmhub]; +	*id = id_mgr->reserved;  	if ((*id)->owner != vm->immediate.fence_context || -	    (*id)->pd_gpu_addr != job->vm_pd_addr || +	    !amdgpu_vmid_compatible(*id, job) ||  	    (*id)->flushed_updates < updates ||  	    !(*id)->last_flush ||  	    ((*id)->last_flush->context != fence_context && @@ -282,7 +302,8 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,  		tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);  		if (tmp) {  			*id = NULL; -			return amdgpu_sync_fence(sync, tmp); +			*fence = dma_fence_get(tmp); +			return 0;  		}  		needs_flush = true;  	} @@ -290,12 +311,12 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,  	/* Good we can use this VMID. Remember this submission as  	* user of the VMID.  	*/ -	r = amdgpu_sync_fence(&(*id)->active, fence); +	r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished);  	if (r)  		return r; -	(*id)->flushed_updates = updates;  	job->vm_needs_flush = needs_flush; +	job->spm_update_needed = true;  	return 0;  } @@ -304,19 +325,17 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,   *   * @vm: vm to allocate id for   * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse   * @job: job who wants to use the VMID   * @id: resulting VMID + * @fence: fence to wait for if no id could be grabbed   *   * Try to reuse a VMID for this submission.   */  static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,  				 struct amdgpu_ring *ring, -				 struct amdgpu_sync *sync, -				 struct dma_fence *fence,  				 struct amdgpu_job *job, -				 struct amdgpu_vmid **id) +				 struct amdgpu_vmid **id, +				 struct dma_fence **fence)  {  	struct amdgpu_device *adev = ring->adev;  	unsigned vmhub = ring->funcs->vmhub; @@ -335,7 +354,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,  		if ((*id)->owner != vm->immediate.fence_context)  			continue; -		if ((*id)->pd_gpu_addr != job->vm_pd_addr) +		if (!amdgpu_vmid_compatible(*id, job))  			continue;  		if (!(*id)->last_flush || @@ -352,11 +371,11 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,  		/* Good, we can use this VMID. Remember this submission as  		 * user of the VMID.  		 */ -		r = amdgpu_sync_fence(&(*id)->active, fence); +		r = amdgpu_sync_fence(&(*id)->active, +				      &job->base.s_fence->finished);  		if (r)  			return r; -		(*id)->flushed_updates = updates;  		job->vm_needs_flush |= needs_flush;  		return 0;  	} @@ -370,15 +389,13 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,   *   * @vm: vm to allocate id for   * @ring: ring we want to submit job to - * @sync: sync object where we add dependencies - * @fence: fence protecting ID from reuse   * @job: job who wants to use the VMID + * @fence: fence to wait for if no id could be grabbed   *   * Allocate an id for the vm, adding fences to the sync obj as necessary.   */  int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, -		     struct amdgpu_sync *sync, struct dma_fence *fence, -		     struct amdgpu_job *job) +		     struct amdgpu_job *job, struct dma_fence **fence)  {  	struct amdgpu_device *adev = ring->adev;  	unsigned vmhub = ring->funcs->vmhub; @@ -388,16 +405,16 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,  	int r = 0;  	mutex_lock(&id_mgr->lock); -	r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle); +	r = amdgpu_vmid_grab_idle(vm, ring, &idle, fence);  	if (r || !idle)  		goto error;  	if (vm->reserved_vmid[vmhub]) { -		r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id); +		r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);  		if (r || !id)  			goto error;  	} else { -		r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id); +		r = amdgpu_vmid_grab_used(vm, ring, job, &id, fence);  		if (r)  			goto error; @@ -406,26 +423,35 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,  			id = idle;  			/* Remember this submission as user of the VMID */ -			r = amdgpu_sync_fence(&id->active, fence); +			r = amdgpu_sync_fence(&id->active, +					      &job->base.s_fence->finished);  			if (r)  				goto error; -			id->flushed_updates = amdgpu_vm_tlb_seq(vm);  			job->vm_needs_flush = true;  		}  		list_move_tail(&id->list, &id_mgr->ids_lru);  	} -	id->pd_gpu_addr = job->vm_pd_addr; -	id->owner = vm->immediate.fence_context; - +	job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);  	if (job->vm_needs_flush) { +		id->flushed_updates = amdgpu_vm_tlb_seq(vm);  		dma_fence_put(id->last_flush);  		id->last_flush = NULL;  	}  	job->vmid = id - id_mgr->ids;  	job->pasid = vm->pasid; + +	id->gds_base = job->gds_base; +	id->gds_size = job->gds_size; +	id->gws_base = job->gws_base; +	id->gws_size = job->gws_size; +	id->oa_base = job->oa_base; +	id->oa_size = job->oa_size; +	id->pd_gpu_addr = job->vm_pd_addr; +	id->owner = vm->immediate.fence_context; +  	trace_amdgpu_vm_grab_id(vm, ring, job);  error: @@ -437,31 +463,27 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,  			       struct amdgpu_vm *vm,  			       unsigned vmhub)  { -	struct amdgpu_vmid_mgr *id_mgr; -	struct amdgpu_vmid *idle; -	int r = 0; +	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; -	id_mgr = &adev->vm_manager.id_mgr[vmhub];  	mutex_lock(&id_mgr->lock);  	if (vm->reserved_vmid[vmhub])  		goto unlock; -	if (atomic_inc_return(&id_mgr->reserved_vmid_num) > -	    AMDGPU_VM_MAX_RESERVED_VMID) { -		DRM_ERROR("Over limitation of reserved vmid\n"); -		atomic_dec(&id_mgr->reserved_vmid_num); -		r = -EINVAL; -		goto unlock; + +	++id_mgr->reserved_use_count; +	if (!id_mgr->reserved) { +		struct amdgpu_vmid *id; + +		id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, +				      list); +		/* Remove from normal round robin handling */ +		list_del_init(&id->list); +		id_mgr->reserved = id;  	} -	/* Select the first entry VMID */ -	idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list); -	list_del_init(&idle->list); -	vm->reserved_vmid[vmhub] = idle; -	mutex_unlock(&id_mgr->lock); +	vm->reserved_vmid[vmhub] = true; -	return 0;  unlock:  	mutex_unlock(&id_mgr->lock); -	return r; +	return 0;  }  void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, @@ -471,12 +493,12 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,  	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];  	mutex_lock(&id_mgr->lock); -	if (vm->reserved_vmid[vmhub]) { -		list_add(&vm->reserved_vmid[vmhub]->list, -			&id_mgr->ids_lru); -		vm->reserved_vmid[vmhub] = NULL; -		atomic_dec(&id_mgr->reserved_vmid_num); +	if (vm->reserved_vmid[vmhub] && +	    !--id_mgr->reserved_use_count) { +		/* give the reserved ID back to normal round robin */ +		list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);  	} +	vm->reserved_vmid[vmhub] = false;  	mutex_unlock(&id_mgr->lock);  } @@ -543,7 +565,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)  		mutex_init(&id_mgr->lock);  		INIT_LIST_HEAD(&id_mgr->ids_lru); -		atomic_set(&id_mgr->reserved_vmid_num, 0); +		id_mgr->reserved_use_count = 0;  		/* manage only VMIDs not used by KFD */  		id_mgr->num_ids = adev->vm_manager.first_kfd_vmid; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index 06c8a0034fa5..d1cc09b45da4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -67,7 +67,8 @@ struct amdgpu_vmid_mgr {  	unsigned		num_ids;  	struct list_head	ids_lru;  	struct amdgpu_vmid	ids[AMDGPU_NUM_VMID]; -	atomic_t		reserved_vmid_num; +	struct amdgpu_vmid	*reserved; +	unsigned int		reserved_use_count;  };  int amdgpu_pasid_alloc(unsigned int bits); @@ -84,8 +85,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,  			       struct amdgpu_vm *vm,  			       unsigned vmhub);  int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, -		     struct amdgpu_sync *sync, struct dma_fence *fence, -		     struct amdgpu_job *job); +		     struct amdgpu_job *job, struct dma_fence **fence);  void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,  		       unsigned vmid);  void amdgpu_vmid_reset_all(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 89011bae7588..a6aef488a822 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -101,41 +101,6 @@ const char *soc15_ih_clientid_name[] = {  };  /** - * amdgpu_hotplug_work_func - work handler for display hotplug event - * - * @work: work struct pointer - * - * This is the hotplug event work handler (all ASICs). - * The work gets scheduled from the IRQ handler if there - * was a hotplug interrupt.  It walks through the connector table - * and calls hotplug handler for each connector. After this, it sends - * a DRM hotplug event to alert userspace. - * - * This design approach is required in order to defer hotplug event handling - * from the IRQ handler to a work handler because hotplug handler has to use - * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may - * sleep). - */ -static void amdgpu_hotplug_work_func(struct work_struct *work) -{ -	struct amdgpu_device *adev = container_of(work, struct amdgpu_device, -						  hotplug_work); -	struct drm_device *dev = adev_to_drm(adev); -	struct drm_mode_config *mode_config = &dev->mode_config; -	struct drm_connector *connector; -	struct drm_connector_list_iter iter; - -	mutex_lock(&mode_config->mutex); -	drm_connector_list_iter_begin(dev, &iter); -	drm_for_each_connector_iter(connector, &iter) -		amdgpu_connector_hotplug(connector); -	drm_connector_list_iter_end(&iter); -	mutex_unlock(&mode_config->mutex); -	/* Just fire off a uevent and let userspace tell us what to do */ -	drm_helper_hpd_irq_event(dev); -} - -/**   * amdgpu_irq_disable_all - disable *all* interrupts   *   * @adev: amdgpu device pointer @@ -317,21 +282,6 @@ int amdgpu_irq_init(struct amdgpu_device *adev)  		}  	} -	if (!amdgpu_device_has_dc_support(adev)) { -		if (!adev->enable_virtual_display) -			/* Disable vblank IRQs aggressively for power-saving */ -			/* XXX: can this be enabled for DC? */ -			adev_to_drm(adev)->vblank_disable_immediate = true; - -		r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); -		if (r) -			return r; - -		/* Pre-DCE11 */ -		INIT_WORK(&adev->hotplug_work, -				amdgpu_hotplug_work_func); -	} -  	INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1);  	INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2);  	INIT_WORK(&adev->irq.ih_soft_work, amdgpu_irq_handle_ih_soft); @@ -345,11 +295,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev)  	/* PCI devices require shared interrupts. */  	r = request_irq(irq, amdgpu_irq_handler, IRQF_SHARED, adev_to_drm(adev)->driver->name,  			adev_to_drm(adev)); -	if (r) { -		if (!amdgpu_device_has_dc_support(adev)) -			flush_work(&adev->hotplug_work); +	if (r)  		return r; -	}  	adev->irq.installed = true;  	adev->irq.irq = irq;  	adev_to_drm(adev)->max_vblank_count = 0x00ffffff; @@ -366,9 +313,6 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)  		adev->irq.installed = false;  		if (adev->irq.msi_enabled)  			pci_free_irq_vectors(adev->pdev); - -		if (!amdgpu_device_has_dc_support(adev)) -			flush_work(&adev->hotplug_work);  	}  	amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index adac650cf544..9e549923622b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -88,8 +88,9 @@ exit:  	return DRM_GPU_SCHED_STAT_NOMINAL;  } -int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, -		     struct amdgpu_job **job, struct amdgpu_vm *vm) +int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, +		     struct drm_sched_entity *entity, void *owner, +		     unsigned int num_ibs, struct amdgpu_job **job)  {  	if (num_ibs == 0)  		return -EINVAL; @@ -105,28 +106,34 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,  	(*job)->base.sched = &adev->rings[0]->sched;  	(*job)->vm = vm; -	amdgpu_sync_create(&(*job)->sync); -	amdgpu_sync_create(&(*job)->sched_sync); +	amdgpu_sync_create(&(*job)->explicit_sync);  	(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);  	(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET; -	return 0; +	if (!entity) +		return 0; + +	return drm_sched_job_init(&(*job)->base, entity, owner);  } -int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, -		enum amdgpu_ib_pool_type pool_type, -		struct amdgpu_job **job) +int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, +			     struct drm_sched_entity *entity, void *owner, +			     size_t size, enum amdgpu_ib_pool_type pool_type, +			     struct amdgpu_job **job)  {  	int r; -	r = amdgpu_job_alloc(adev, 1, job, NULL); +	r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job);  	if (r)  		return r;  	(*job)->num_ibs = 1;  	r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]); -	if (r) +	if (r) { +		if (entity) +			drm_sched_job_cleanup(&(*job)->base);  		kfree(*job); +	}  	return r;  } @@ -166,8 +173,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)  	drm_sched_job_cleanup(s_job); -	amdgpu_sync_free(&job->sync); -	amdgpu_sync_free(&job->sched_sync); +	amdgpu_sync_free(&job->explicit_sync);  	/* only put the hw fence if has embedded fence */  	if (!job->hw_fence.ops) @@ -194,9 +200,11 @@ void amdgpu_job_set_gang_leader(struct amdgpu_job *job,  void amdgpu_job_free(struct amdgpu_job *job)  { +	if (job->base.entity) +		drm_sched_job_cleanup(&job->base); +  	amdgpu_job_free_resources(job); -	amdgpu_sync_free(&job->sync); -	amdgpu_sync_free(&job->sched_sync); +	amdgpu_sync_free(&job->explicit_sync);  	if (job->gang_submit != &job->base.s_fence->scheduled)  		dma_fence_put(job->gang_submit); @@ -206,25 +214,16 @@ void amdgpu_job_free(struct amdgpu_job *job)  		dma_fence_put(&job->hw_fence);  } -int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, -		      void *owner, struct dma_fence **f) +struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)  { -	int r; - -	if (!f) -		return -EINVAL; - -	r = drm_sched_job_init(&job->base, entity, owner); -	if (r) -		return r; +	struct dma_fence *f;  	drm_sched_job_arm(&job->base); - -	*f = dma_fence_get(&job->base.s_fence->finished); +	f = dma_fence_get(&job->base.s_fence->finished);  	amdgpu_job_free_resources(job);  	drm_sched_entity_push_job(&job->base); -	return 0; +	return f;  }  int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring, @@ -242,33 +241,22 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,  	return 0;  } -static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, -					       struct drm_sched_entity *s_entity) +static struct dma_fence * +amdgpu_job_prepare_job(struct drm_sched_job *sched_job, +		      struct drm_sched_entity *s_entity)  {  	struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);  	struct amdgpu_job *job = to_amdgpu_job(sched_job); -	struct amdgpu_vm *vm = job->vm; -	struct dma_fence *fence; +	struct dma_fence *fence = NULL;  	int r; -	fence = amdgpu_sync_get_fence(&job->sync); -	if (fence && drm_sched_dependency_optimized(fence, s_entity)) { -		r = amdgpu_sync_fence(&job->sched_sync, fence); -		if (r) -			DRM_ERROR("Error adding fence (%d)\n", r); -	} -  	if (!fence && job->gang_submit)  		fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit); -	while (fence == NULL && vm && !job->vmid) { -		r = amdgpu_vmid_grab(vm, ring, &job->sync, -				     &job->base.s_fence->finished, -				     job); +	while (!fence && job->vm && !job->vmid) { +		r = amdgpu_vmid_grab(job->vm, ring, job, &fence);  		if (r)  			DRM_ERROR("Error getting VM ID (%d)\n", r); - -		fence = amdgpu_sync_get_fence(&job->sync);  	}  	return fence; @@ -285,8 +273,6 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)  	job = to_amdgpu_job(sched_job);  	finished = &job->base.s_fence->finished; -	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); -  	trace_amdgpu_sched_run_job(job);  	/* Skip job if VRAM is lost and never resubmit gangs */ @@ -345,7 +331,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)  }  const struct drm_sched_backend_ops amdgpu_sched_ops = { -	.dependency = amdgpu_job_dependency, +	.prepare_job = amdgpu_job_prepare_job,  	.run_job = amdgpu_job_run,  	.timedout_job = amdgpu_job_timedout,  	.free_job = amdgpu_job_free_cb diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index ab7b150e5d50..52f2e313ea17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -47,13 +47,14 @@ enum amdgpu_ib_pool_type;  struct amdgpu_job {  	struct drm_sched_job    base;  	struct amdgpu_vm	*vm; -	struct amdgpu_sync	sync; -	struct amdgpu_sync	sched_sync; +	struct amdgpu_sync	explicit_sync;  	struct dma_fence	hw_fence;  	struct dma_fence	*gang_submit;  	uint32_t		preamble_status;  	uint32_t                preemption_status;  	bool                    vm_needs_flush; +	bool			gds_switch_needed; +	bool			spm_update_needed;  	uint64_t		vm_pd_addr;  	unsigned		vmid;  	unsigned		pasid; @@ -78,18 +79,20 @@ static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job)  	return to_amdgpu_ring(job->base.entity->rq->sched);  } -int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, -		     struct amdgpu_job **job, struct amdgpu_vm *vm); -int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, -		enum amdgpu_ib_pool_type pool, struct amdgpu_job **job); +int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm, +		     struct drm_sched_entity *entity, void *owner, +		     unsigned int num_ibs, struct amdgpu_job **job); +int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, +			     struct drm_sched_entity *entity, void *owner, +			     size_t size, enum amdgpu_ib_pool_type pool_type, +			     struct amdgpu_job **job);  void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,  			      struct amdgpu_bo *gws, struct amdgpu_bo *oa);  void amdgpu_job_free_resources(struct amdgpu_job *job);  void amdgpu_job_set_gang_leader(struct amdgpu_job *job,  				struct amdgpu_job *leader);  void amdgpu_job_free(struct amdgpu_job *job); -int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity, -		      void *owner, struct dma_fence **f); +struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job);  int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,  			     struct dma_fence **fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c index 518eb0e40d32..6f81ed4fb0d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c @@ -150,14 +150,15 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,  	const unsigned ib_size_dw = 16;  	int i, r; -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, -					AMDGPU_IB_POOL_DIRECT, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, +				     AMDGPU_IB_POOL_DIRECT, &job);  	if (r)  		return r;  	ib = &job->ibs[0]; -	ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0); +	ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, +			     PACKETJ_TYPE0);  	ib->ptr[1] = 0xDEADBEEF;  	for (i = 2; i < 16; i += 2) {  		ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); @@ -234,3 +235,20 @@ int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,  	return 0;  } + +void jpeg_set_ras_funcs(struct amdgpu_device *adev) +{ +	if (!adev->jpeg.ras) +		return; + +	amdgpu_ras_register_ras_block(adev, &adev->jpeg.ras->ras_block); + +	strcpy(adev->jpeg.ras->ras_block.ras_comm.name, "jpeg"); +	adev->jpeg.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG; +	adev->jpeg.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; +	adev->jpeg.ras_if = &adev->jpeg.ras->ras_block.ras_comm; + +	/* If don't define special ras_late_init function, use default ras_late_init */ +	if (!adev->jpeg.ras->ras_block.ras_late_init) +		adev->jpeg.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h index 635dca59a70a..e8ca3e32ad52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h @@ -72,5 +72,6 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);  int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,  				struct amdgpu_irq_src *source,  				struct amdgpu_iv_entry *entry); +void jpeg_set_ras_funcs(struct amdgpu_device *adev);  #endif /*__AMDGPU_JPEG_H__*/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 4e42dcb1950f..7aa7e52ca784 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -29,6 +29,7 @@  #include "amdgpu.h"  #include <drm/amdgpu_drm.h>  #include <drm/drm_drv.h> +#include <drm/drm_fb_helper.h>  #include "amdgpu_uvd.h"  #include "amdgpu_vce.h"  #include "atom.h" @@ -430,7 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,  	case AMDGPU_HW_IP_VCN_DEC:  		type = AMD_IP_BLOCK_TYPE_VCN;  		for (i = 0; i < adev->vcn.num_vcn_inst; i++) { -			if (adev->uvd.harvest_config & (1 << i)) +			if (adev->vcn.harvest_config & (1 << i))  				continue;  			if (adev->vcn.inst[i].ring_dec.sched.ready) @@ -442,7 +443,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,  	case AMDGPU_HW_IP_VCN_ENC:  		type = AMD_IP_BLOCK_TYPE_VCN;  		for (i = 0; i < adev->vcn.num_vcn_inst; i++) { -			if (adev->uvd.harvest_config & (1 << i)) +			if (adev->vcn.harvest_config & (1 << i))  				continue;  			for (j = 0; j < adev->vcn.num_enc_rings; j++) @@ -796,7 +797,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  		dev_info->ids_flags = 0;  		if (adev->flags & AMD_IS_APU)  			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION; -		if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) +		if (amdgpu_mcbp)  			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;  		if (amdgpu_is_tmz(adev))  			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ; @@ -1172,7 +1173,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)  		goto error_vm;  	} -	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { +	if (amdgpu_mcbp) {  		uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;  		r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, @@ -1236,7 +1237,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,  	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)  		amdgpu_vce_free_handles(adev, file_priv); -	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) { +	if (amdgpu_mcbp) {  		/* TODO: how to handle reserve failure */  		BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));  		amdgpu_vm_bo_del(adev, fpriv->csa_va); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h index ad980f4b66e1..97c05d08a551 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h @@ -91,14 +91,12 @@ struct amdgpu_mes {  	struct amdgpu_bo		*ucode_fw_obj[AMDGPU_MAX_MES_PIPES];  	uint64_t			ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];  	uint32_t			*ucode_fw_ptr[AMDGPU_MAX_MES_PIPES]; -	uint32_t                        ucode_fw_version[AMDGPU_MAX_MES_PIPES];  	uint64_t                        uc_start_addr[AMDGPU_MAX_MES_PIPES];  	/* mes ucode data */  	struct amdgpu_bo		*data_fw_obj[AMDGPU_MAX_MES_PIPES];  	uint64_t			data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];  	uint32_t			*data_fw_ptr[AMDGPU_MAX_MES_PIPES]; -	uint32_t                        data_fw_version[AMDGPU_MAX_MES_PIPES];  	uint64_t                        data_start_addr[AMDGPU_MAX_MES_PIPES];  	/* eop gpu obj */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 37322550d750..8a39300b1a84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -36,7 +36,6 @@  #include <drm/drm_encoder.h>  #include <drm/drm_fixed.h>  #include <drm/drm_crtc_helper.h> -#include <drm/drm_fb_helper.h>  #include <drm/drm_framebuffer.h>  #include <drm/drm_probe_helper.h>  #include <linux/i2c.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 2e8f6cd7a729..25a68d8888e0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -346,17 +346,16 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,   * @adev: amdgpu device object   * @offset: offset of the BO   * @size: size of the BO - * @domain: where to place it   * @bo_ptr:  used to initialize BOs in structures   * @cpu_addr: optional CPU address mapping   * - * Creates a kernel BO at a specific offset in the address space of the domain. + * Creates a kernel BO at a specific offset in VRAM.   *   * Returns:   * 0 on success, negative error code otherwise.   */  int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, -			       uint64_t offset, uint64_t size, uint32_t domain, +			       uint64_t offset, uint64_t size,  			       struct amdgpu_bo **bo_ptr, void **cpu_addr)  {  	struct ttm_operation_ctx ctx = { false, false }; @@ -366,8 +365,9 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,  	offset &= PAGE_MASK;  	size = ALIGN(size, PAGE_SIZE); -	r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr, -				      NULL, cpu_addr); +	r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, +				      AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL, +				      cpu_addr);  	if (r)  		return r; @@ -422,6 +422,8 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,  	if (*bo == NULL)  		return; +	WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend); +  	if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {  		if (cpu_addr)  			amdgpu_bo_kunmap(*bo); @@ -446,33 +448,31 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,  	/*  	 * If GTT is part of requested domains the check must succeed to -	 * allow fall back to GTT +	 * allow fall back to GTT.  	 */  	if (domain & AMDGPU_GEM_DOMAIN_GTT) {  		man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); -		if (size < man->size) +		if (man && size < man->size)  			return true; -		else -			goto fail; -	} - -	if (domain & AMDGPU_GEM_DOMAIN_VRAM) { +		else if (!man) +			WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized"); +		goto fail; +	} else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {  		man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); -		if (size < man->size) +		if (man && size < man->size)  			return true; -		else -			goto fail; +		goto fail;  	} -  	/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */  	return true;  fail: -	DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, -		  man->size); +	if (man) +		DRM_DEBUG("BO size %lu > total memory in domain: %llu\n", size, +			  man->size);  	return false;  } @@ -542,6 +542,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,  		/* GWS and OA don't need any alignment. */  		page_align = bp->byte_align;  		size <<= PAGE_SHIFT; +  	} else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) {  		/* Both size and alignment must be a multiple of 4. */  		page_align = ALIGN(bp->byte_align, 4); @@ -776,7 +777,7 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)  		return 0;  	} -	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.resource->num_pages, &bo->kmap); +	r = ttm_bo_kmap(&bo->tbo, 0, PFN_UP(bo->tbo.base.size), &bo->kmap);  	if (r)  		return r; @@ -1509,7 +1510,8 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)  uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,  					    uint32_t domain)  { -	if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) { +	if ((domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) && +	    ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY))) {  		domain = AMDGPU_GEM_DOMAIN_VRAM;  		if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)  			domain = AMDGPU_GEM_DOMAIN_GTT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 147b79c10cbb..93207badf83f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -284,7 +284,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,  			    u32 domain, struct amdgpu_bo **bo_ptr,  			    u64 *gpu_addr, void **cpu_addr);  int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev, -			       uint64_t offset, uint64_t size, uint32_t domain, +			       uint64_t offset, uint64_t size,  			       struct amdgpu_bo **bo_ptr, void **cpu_addr);  int amdgpu_bo_create_user(struct amdgpu_device *adev,  			  struct amdgpu_bo_param *bp, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 7978307e1d6d..7a2fc920739b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -52,6 +52,32 @@ static int psp_load_smu_fw(struct psp_context *psp);  static int psp_rap_terminate(struct psp_context *psp);  static int psp_securedisplay_terminate(struct psp_context *psp); +static int psp_ring_init(struct psp_context *psp, +			 enum psp_ring_type ring_type) +{ +	int ret = 0; +	struct psp_ring *ring; +	struct amdgpu_device *adev = psp->adev; + +	ring = &psp->km_ring; + +	ring->ring_type = ring_type; + +	/* allocate 4k Page of Local Frame Buffer memory for ring */ +	ring->ring_size = 0x1000; +	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, +				      AMDGPU_GEM_DOMAIN_VRAM, +				      &adev->firmware.rbuf, +				      &ring->ring_mem_mc_addr, +				      (void **)&ring->ring_mem); +	if (ret) { +		ring->ring_size = 0; +		return ret; +	} + +	return 0; +} +  /*   * Due to DF Cstate management centralized to PMFW, the firmware   * loading sequence will be updated as below: @@ -139,6 +165,7 @@ static int psp_early_init(void *handle)  	case IP_VERSION(13, 0, 5):  	case IP_VERSION(13, 0, 8):  	case IP_VERSION(13, 0, 10): +	case IP_VERSION(13, 0, 11):  		psp_v13_0_set_psp_funcs(psp);  		psp->autoload_supported = true;  		break; @@ -486,26 +513,22 @@ static int psp_sw_fini(void *handle)  	struct psp_gfx_cmd_resp *cmd = psp->cmd;  	psp_memory_training_fini(psp); -	if (psp->sos_fw) { -		release_firmware(psp->sos_fw); -		psp->sos_fw = NULL; -	} -	if (psp->asd_fw) { -		release_firmware(psp->asd_fw); -		psp->asd_fw = NULL; -	} -	if (psp->ta_fw) { -		release_firmware(psp->ta_fw); -		psp->ta_fw = NULL; -	} -	if (psp->cap_fw) { -		release_firmware(psp->cap_fw); -		psp->cap_fw = NULL; -	} -	if (psp->toc_fw) { -		release_firmware(psp->toc_fw); -		psp->toc_fw = NULL; -	} + +	release_firmware(psp->sos_fw); +	psp->sos_fw = NULL; + +	release_firmware(psp->asd_fw); +	psp->asd_fw = NULL; + +	release_firmware(psp->ta_fw); +	psp->ta_fw = NULL; + +	release_firmware(psp->cap_fw); +	psp->cap_fw = NULL; + +	release_firmware(psp->toc_fw); +	psp->toc_fw = NULL; +  	if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) ||  	    adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7))  		psp_sysfs_fini(adev); @@ -835,7 +858,7 @@ static int psp_tmr_unload(struct psp_context *psp)  	struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);  	psp_prep_tmr_unload_cmd_buf(psp, cmd); -	dev_info(psp->adev->dev, "free PSP TMR buffer\n"); +	dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");  	ret = psp_cmd_submit_buf(psp, NULL, cmd,  				 psp->fence_buf_mc_addr); @@ -992,6 +1015,8 @@ int psp_ta_unload(struct psp_context *psp, struct ta_context *context)  	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); +	context->resp_status = cmd->resp.status; +  	release_psp_cmd_buf(psp);  	return ret; @@ -1073,42 +1098,6 @@ int psp_ta_init_shared_buf(struct psp_context *psp,  				      &mem_ctx->shared_buf);  } -static void psp_prep_ta_invoke_indirect_cmd_buf(struct psp_gfx_cmd_resp *cmd, -				       uint32_t ta_cmd_id, -				       struct ta_context *context) -{ -	cmd->cmd_id                         = GFX_CMD_ID_INVOKE_CMD; -	cmd->cmd.cmd_invoke_cmd.session_id  = context->session_id; -	cmd->cmd.cmd_invoke_cmd.ta_cmd_id   = ta_cmd_id; - -	cmd->cmd.cmd_invoke_cmd.buf.num_desc   = 1; -	cmd->cmd.cmd_invoke_cmd.buf.total_size = context->mem_context.shared_mem_size; -	cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_size = context->mem_context.shared_mem_size; -	cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_lo = -				     lower_32_bits(context->mem_context.shared_mc_addr); -	cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_hi = -				     upper_32_bits(context->mem_context.shared_mc_addr); -} - -int psp_ta_invoke_indirect(struct psp_context *psp, -		  uint32_t ta_cmd_id, -		  struct ta_context *context) -{ -	int ret; -	struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp); - -	psp_prep_ta_invoke_indirect_cmd_buf(cmd, ta_cmd_id, context); - -	ret = psp_cmd_submit_buf(psp, NULL, cmd, -				 psp->fence_buf_mc_addr); - -	context->resp_status = cmd->resp.status; - -	release_psp_cmd_buf(psp); - -	return ret; -} -  static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,  				       uint32_t ta_cmd_id,  				       uint32_t session_id) @@ -1551,7 +1540,7 @@ int psp_ras_terminate(struct psp_context *psp)  	return ret;  } -static int psp_ras_initialize(struct psp_context *psp) +int psp_ras_initialize(struct psp_context *psp)  {  	int ret;  	uint32_t boot_cfg = 0xFF; @@ -1614,7 +1603,7 @@ static int psp_ras_initialize(struct psp_context *psp)  	psp->ras_context.context.mem_context.shared_mem_size = PSP_RAS_SHARED_MEM_SIZE;  	psp->ras_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA; -	if (!psp->ras_context.context.initialized) { +	if (!psp->ras_context.context.mem_context.shared_buf) {  		ret = psp_ta_init_shared_buf(psp, &psp->ras_context.context.mem_context);  		if (ret)  			return ret; @@ -1635,7 +1624,9 @@ static int psp_ras_initialize(struct psp_context *psp)  	else {  		if (ras_cmd->ras_status)  			dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); -		amdgpu_ras_fini(psp->adev); + +		/* fail to load RAS TA */ +		psp->ras_context.context.initialized = false;  	}  	return ret; @@ -1942,10 +1933,15 @@ static int psp_securedisplay_initialize(struct psp_context *psp)  	} else  		return ret; +	mutex_lock(&psp->securedisplay_context.mutex); +  	psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,  			TA_SECUREDISPLAY_COMMAND__QUERY_TA);  	ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA); + +	mutex_unlock(&psp->securedisplay_context.mutex); +  	if (ret) {  		psp_securedisplay_terminate(psp);  		/* free securedisplay shared memory */ @@ -1994,12 +1990,8 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id)  	    ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC)  		return -EINVAL; -	mutex_lock(&psp->securedisplay_context.mutex); -  	ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context); -	mutex_unlock(&psp->securedisplay_context.mutex); -  	return ret;  }  /* SECUREDISPLAY end */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 58ce3ebb446c..cf4f60c66122 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -118,7 +118,6 @@ struct psp_funcs  	int (*bootloader_load_dbg_drv)(struct psp_context *psp);  	int (*bootloader_load_ras_drv)(struct psp_context *psp);  	int (*bootloader_load_sos)(struct psp_context *psp); -	int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);  	int (*ring_create)(struct psp_context *psp,  			   enum psp_ring_type ring_type);  	int (*ring_stop)(struct psp_context *psp, @@ -136,6 +135,12 @@ struct psp_funcs  	int (*vbflash_stat)(struct psp_context *psp);  }; +struct ta_funcs { +	int (*fn_ta_initialize)(struct psp_context *psp); +	int (*fn_ta_invoke)(struct psp_context *psp, uint32_t ta_cmd_id); +	int (*fn_ta_terminate)(struct psp_context *psp); +}; +  #define AMDGPU_XGMI_MAX_CONNECTED_NODES		64  struct psp_xgmi_node_info {  	uint64_t				node_id; @@ -309,6 +314,7 @@ struct psp_context  	struct psp_gfx_cmd_resp		*cmd;  	const struct psp_funcs		*funcs; +	const struct ta_funcs		*ta_funcs;  	/* firmware buffer */  	struct amdgpu_bo		*fw_pri_bo; @@ -389,7 +395,6 @@ struct amdgpu_psp_funcs {  }; -#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))  #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))  #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))  #define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type))) @@ -463,9 +468,6 @@ int psp_ta_load(struct psp_context *psp, struct ta_context *context);  int psp_ta_invoke(struct psp_context *psp,  			uint32_t ta_cmd_id,  			struct ta_context *context); -int psp_ta_invoke_indirect(struct psp_context *psp, -		  uint32_t ta_cmd_id, -		  struct ta_context *context);  int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta);  int psp_xgmi_terminate(struct psp_context *psp); @@ -479,7 +481,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,  int psp_xgmi_set_topology_info(struct psp_context *psp,  			       int number_devices,  			       struct psp_xgmi_topology_info *topology); - +int psp_ras_initialize(struct psp_context *psp);  int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);  int psp_ras_enable_features(struct psp_context *psp,  		union ta_ras_cmd_input *info, bool enable); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 0988e00612e5..468a67b302d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -41,30 +41,46 @@ static uint32_t get_bin_version(const uint8_t *bin)  	return hdr->ucode_version;  } -static void prep_ta_mem_context(struct psp_context *psp, -					     struct ta_context *context, +static int prep_ta_mem_context(struct ta_mem_context *mem_context,  					     uint8_t *shared_buf,  					     uint32_t shared_buf_len)  { -	context->mem_context.shared_mem_size = PAGE_ALIGN(shared_buf_len); -	psp_ta_init_shared_buf(psp, &context->mem_context); +	if (mem_context->shared_mem_size < shared_buf_len) +		return -EINVAL; +	memset(mem_context->shared_buf, 0, mem_context->shared_mem_size); +	memcpy((void *)mem_context->shared_buf, shared_buf, shared_buf_len); -	memcpy((void *)context->mem_context.shared_buf, shared_buf, shared_buf_len); +	return 0;  }  static bool is_ta_type_valid(enum ta_type_id ta_type)  { -	bool ret = false; +	switch (ta_type) { +	case TA_TYPE_RAS: +		return true; +	default: +		return false; +	} +} + +static const struct ta_funcs ras_ta_funcs = { +	.fn_ta_initialize = psp_ras_initialize, +	.fn_ta_invoke    = psp_ras_invoke, +	.fn_ta_terminate = psp_ras_terminate +}; +static void set_ta_context_funcs(struct psp_context *psp, +						      enum ta_type_id ta_type, +						      struct ta_context **pcontext) +{  	switch (ta_type) {  	case TA_TYPE_RAS: -		ret = true; +		*pcontext = &psp->ras_context.context; +		psp->ta_funcs = &ras_ta_funcs;  		break;  	default:  		break;  	} - -	return ret;  }  static const struct file_operations ta_load_debugfs_fops = { @@ -85,8 +101,7 @@ static const struct file_operations ta_invoke_debugfs_fops = {  	.owner  = THIS_MODULE  }; - -/** +/*   * DOC: AMDGPU TA debugfs interfaces   *   * Three debugfs interfaces can be opened by a program to @@ -111,15 +126,18 @@ static const struct file_operations ta_invoke_debugfs_fops = {   *   * - For TA invoke debugfs interface:   *   Transmit buffer: + *    - TA type (4bytes)   *    - TA ID (4bytes)   *    - TA CMD ID (4bytes) - *    - TA shard buf length (4bytes) + *    - TA shard buf length + *      (4bytes, value not beyond TA shared memory size)   *    - TA shared buf   *   Receive buffer:   *    - TA shared buf   *   * - For TA unload debugfs interface:   *   Transmit buffer: + *    - TA type (4bytes)   *    - TA ID (4bytes)   */ @@ -131,59 +149,92 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t  	uint32_t copy_pos   = 0;  	int      ret        = 0; -	struct amdgpu_device *adev   = (struct amdgpu_device *)file_inode(fp)->i_private; -	struct psp_context   *psp    = &adev->psp; -	struct ta_context    context = {0}; +	struct amdgpu_device *adev    = (struct amdgpu_device *)file_inode(fp)->i_private; +	struct psp_context   *psp     = &adev->psp; +	struct ta_context    *context = NULL;  	if (!buf)  		return -EINVAL;  	ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));  	if (ret || (!is_ta_type_valid(ta_type))) -		return -EINVAL; +		return -EFAULT;  	copy_pos += sizeof(uint32_t);  	ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t));  	if (ret) -		return -EINVAL; +		return -EFAULT;  	copy_pos += sizeof(uint32_t);  	ta_bin = kzalloc(ta_bin_len, GFP_KERNEL);  	if (!ta_bin) -		ret = -ENOMEM; +		return -ENOMEM;  	if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) {  		ret = -EFAULT;  		goto err_free_bin;  	} -	ret = psp_ras_terminate(psp); -	if (ret) { -		dev_err(adev->dev, "Failed to unload embedded RAS TA\n"); +	/* Set TA context and functions */ +	set_ta_context_funcs(psp, ta_type, &context); + +	if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) { +		dev_err(adev->dev, "Unsupported function to terminate TA\n"); +		ret = -EOPNOTSUPP;  		goto err_free_bin;  	} -	context.ta_type             = ta_type; -	context.ta_load_type        = GFX_CMD_ID_LOAD_TA; -	context.bin_desc.fw_version = get_bin_version(ta_bin); -	context.bin_desc.size_bytes = ta_bin_len; -	context.bin_desc.start_addr = ta_bin; +	/* +	 * Allocate TA shared buf in case shared buf was freed +	 * due to loading TA failed before. +	 */ +	if (!context->mem_context.shared_buf) { +		ret = psp_ta_init_shared_buf(psp, &context->mem_context); +		if (ret) { +			ret = -ENOMEM; +			goto err_free_bin; +		} +	} + +	ret = psp_fn_ta_terminate(psp); +	if (ret || context->resp_status) { +		dev_err(adev->dev, +			"Failed to unload embedded TA (%d) and status (0x%X)\n", +			ret, context->resp_status); +		if (!ret) +			ret = -EINVAL; +		goto err_free_ta_shared_buf; +	} + +	/* Prepare TA context for TA initialization */ +	context->ta_type                     = ta_type; +	context->bin_desc.fw_version         = get_bin_version(ta_bin); +	context->bin_desc.size_bytes         = ta_bin_len; +	context->bin_desc.start_addr         = ta_bin; -	ret = psp_ta_load(psp, &context); +	if (!psp->ta_funcs->fn_ta_initialize) { +		dev_err(adev->dev, "Unsupported function to initialize TA\n"); +		ret = -EOPNOTSUPP; +		goto err_free_ta_shared_buf; +	} -	if (ret || context.resp_status) { -		dev_err(adev->dev, "TA load via debugfs failed (%d) status %d\n", -			 ret, context.resp_status); +	ret = psp_fn_ta_initialize(psp); +	if (ret || context->resp_status) { +		dev_err(adev->dev, "Failed to load TA via debugfs (%d) and status (0x%X)\n", +			ret, context->resp_status);  		if (!ret)  			ret = -EINVAL; -		goto err_free_bin; +		goto err_free_ta_shared_buf;  	} -	context.initialized = true; -	if (copy_to_user((char *)buf, (void *)&context.session_id, sizeof(uint32_t))) +	if (copy_to_user((char *)buf, (void *)&context->session_id, sizeof(uint32_t)))  		ret = -EFAULT; +err_free_ta_shared_buf: +	/* Only free TA shared buf when returns error code */ +	if (ret && context->mem_context.shared_buf) +		psp_ta_free_shared_buf(&context->mem_context);  err_free_bin:  	kfree(ta_bin); @@ -192,58 +243,85 @@ err_free_bin:  static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)  { -	uint32_t ta_id  = 0; -	int      ret    = 0; +	uint32_t ta_type    = 0; +	uint32_t ta_id      = 0; +	uint32_t copy_pos   = 0; +	int      ret        = 0; -	struct amdgpu_device *adev   = (struct amdgpu_device *)file_inode(fp)->i_private; -	struct psp_context   *psp    = &adev->psp; -	struct ta_context    context = {0}; +	struct amdgpu_device *adev    = (struct amdgpu_device *)file_inode(fp)->i_private; +	struct psp_context   *psp     = &adev->psp; +	struct ta_context    *context = NULL;  	if (!buf)  		return -EINVAL; -	ret = copy_from_user((void *)&ta_id, buf, sizeof(uint32_t)); +	ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t)); +	if (ret || (!is_ta_type_valid(ta_type))) +		return -EFAULT; + +	copy_pos += sizeof(uint32_t); + +	ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));  	if (ret) -		return -EINVAL; +		return -EFAULT; -	context.session_id = ta_id; +	set_ta_context_funcs(psp, ta_type, &context); +	context->session_id = ta_id; -	ret = psp_ta_unload(psp, &context); -	if (!ret) -		context.initialized = false; +	if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) { +		dev_err(adev->dev, "Unsupported function to terminate TA\n"); +		return -EOPNOTSUPP; +	} + +	ret = psp_fn_ta_terminate(psp); +	if (ret || context->resp_status) { +		dev_err(adev->dev, "Failed to unload TA via debugfs (%d) and status (0x%X)\n", +			ret, context->resp_status); +		if (!ret) +			ret = -EINVAL; +	} + +	if (context->mem_context.shared_buf) +		psp_ta_free_shared_buf(&context->mem_context);  	return ret;  }  static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)  { +	uint32_t ta_type        = 0;  	uint32_t ta_id          = 0;  	uint32_t cmd_id         = 0;  	uint32_t shared_buf_len = 0; -	uint8_t	 *shared_buf    = NULL; +	uint8_t *shared_buf     = NULL;  	uint32_t copy_pos       = 0;  	int      ret            = 0; -	struct amdgpu_device *adev   = (struct amdgpu_device *)file_inode(fp)->i_private; -	struct psp_context   *psp    = &adev->psp; -	struct ta_context    context = {0}; +	struct amdgpu_device *adev    = (struct amdgpu_device *)file_inode(fp)->i_private; +	struct psp_context   *psp     = &adev->psp; +	struct ta_context    *context = NULL;  	if (!buf)  		return -EINVAL; +	ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t)); +	if (ret) +		return -EFAULT; +	copy_pos += sizeof(uint32_t); +  	ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));  	if (ret) -		return -EINVAL; +		return -EFAULT;  	copy_pos += sizeof(uint32_t);  	ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t));  	if (ret) -		return -EINVAL; +		return -EFAULT;  	copy_pos += sizeof(uint32_t);  	ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t));  	if (ret) -		return -EINVAL; +		return -EFAULT;  	copy_pos += sizeof(uint32_t);  	shared_buf = kzalloc(shared_buf_len, GFP_KERNEL); @@ -254,26 +332,39 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size  		goto err_free_shared_buf;  	} -	context.session_id = ta_id; +	set_ta_context_funcs(psp, ta_type, &context); + +	if (!context->initialized) { +		dev_err(adev->dev, "TA is not initialized\n"); +		ret = -EINVAL; +		goto err_free_shared_buf; +	} + +	if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) { +		dev_err(adev->dev, "Unsupported function to invoke TA\n"); +		ret = -EOPNOTSUPP; +		goto err_free_shared_buf; +	} -	prep_ta_mem_context(psp, &context, shared_buf, shared_buf_len); +	context->session_id = ta_id; -	ret = psp_ta_invoke_indirect(psp, cmd_id, &context); +	ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len); +	if (ret) +		goto err_free_shared_buf; -	if (ret || context.resp_status) { -		dev_err(adev->dev, "TA invoke via debugfs failed (%d) status %d\n", -			 ret, context.resp_status); -		if (!ret) +	ret = psp_fn_ta_invoke(psp, cmd_id); +	if (ret || context->resp_status) { +		dev_err(adev->dev, "Failed to invoke TA via debugfs (%d) and status (0x%X)\n", +			ret, context->resp_status); +		if (!ret) {  			ret = -EINVAL; -		goto err_free_ta_shared_buf; +			goto err_free_shared_buf; +		}  	} -	if (copy_to_user((char *)buf, context.mem_context.shared_buf, shared_buf_len)) +	if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len))  		ret = -EFAULT; -err_free_ta_shared_buf: -	psp_ta_free_shared_buf(&context.mem_context); -  err_free_shared_buf:  	kfree(shared_buf); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h index cfc1542f63ef..14cd1c81c3e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h @@ -24,6 +24,11 @@  #ifndef __AMDGPU_PSP_TA_H__  #define __AMDGPU_PSP_TA_H__ +/* Calling set_ta_context_funcs is required before using the following macros */ +#define psp_fn_ta_initialize(psp) ((psp)->ta_funcs->fn_ta_initialize((psp))) +#define psp_fn_ta_invoke(psp, ta_cmd_id) ((psp)->ta_funcs->fn_ta_invoke((psp), (ta_cmd_id))) +#define psp_fn_ta_terminate(psp) ((psp)->ta_funcs->fn_ta_terminate((psp))) +  void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index a4b47e1bd111..ad490c1e2f57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1267,7 +1267,7 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,  	struct amdgpu_ras *con =  		container_of(attr, struct amdgpu_ras, features_attr); -	return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features); +	return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);  }  static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) @@ -1561,7 +1561,6 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *  {  	bool poison_stat = false;  	struct amdgpu_device *adev = obj->adev; -	struct ras_err_data err_data = {0, 0, 0, NULL};  	struct amdgpu_ras_block_object *block_obj =  		amdgpu_ras_get_ras_block(adev, obj->head.block, 0); @@ -1584,7 +1583,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *  	}  	if (!adev->gmc.xgmi.connected_to_cpu) -		amdgpu_umc_poison_handler(adev, &err_data, false); +		amdgpu_umc_poison_handler(adev, false);  	if (block_obj->hw_ops->handle_poison_consumption)  		poison_stat = block_obj->hw_ops->handle_poison_consumption(adev); @@ -1949,7 +1948,12 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)  		reset_context.method = AMD_RESET_METHOD_NONE;  		reset_context.reset_req_dev = adev; -		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); + +		/* Perform full reset in fatal error mode */ +		if (!amdgpu_ras_is_poison_mode_supported(ras->adev)) +			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags); +		else +			clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);  		amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);  	} @@ -2344,7 +2348,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)  				adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |  							    1 << AMDGPU_RAS_BLOCK__DF); -				if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0)) +				if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) || +				    adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0))  					adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |  							1 << AMDGPU_RAS_BLOCK__JPEG);  				else @@ -2848,7 +2853,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,  	struct amdgpu_device *adev = NULL;  	uint32_t gpu_id = 0;  	uint32_t umc_inst = 0, ch_inst = 0; -	struct ras_err_data err_data = {0, 0, 0, NULL};  	/*  	 * If the error was generated in UMC_V2, which belongs to GPU UMCs, @@ -2887,31 +2891,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,  	dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",  			     umc_inst, ch_inst); -	err_data.err_addr = -		kcalloc(adev->umc.max_ras_err_cnt_per_query, -			sizeof(struct eeprom_table_record), GFP_KERNEL); -	if (!err_data.err_addr) { -		dev_warn(adev->dev, -			"Failed to alloc memory for umc error record in mca notifier!\n"); +	if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst)) +		return NOTIFY_OK; +	else  		return NOTIFY_DONE; -	} - -	/* -	 * Translate UMC channel address to Physical address -	 */ -	if (adev->umc.ras && -	    adev->umc.ras->convert_ras_error_address) -		adev->umc.ras->convert_ras_error_address(adev, -			&err_data, m->addr, ch_inst, umc_inst); - -	if (amdgpu_bad_page_threshold != 0) { -		amdgpu_ras_add_bad_pages(adev, err_data.err_addr, -						err_data.err_addr_cnt); -		amdgpu_ras_save_bad_pages(adev); -	} - -	kfree(err_data.err_addr); -	return NOTIFY_OK;  }  static struct notifier_block amdgpu_bad_page_nb = { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 84c241b9a2a1..2d9f3f4cd79e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -33,12 +33,29 @@  #include "amdgpu_reset.h" -#define EEPROM_I2C_MADDR_VEGA20         0x0 -#define EEPROM_I2C_MADDR_ARCTURUS       0x40000 -#define EEPROM_I2C_MADDR_ARCTURUS_D342  0x0 -#define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0 -#define EEPROM_I2C_MADDR_ALDEBARAN      0x0 -#define EEPROM_I2C_MADDR_SMU_13_0_0     (0x54UL << 16) +/* These are memory addresses as would be seen by one or more EEPROM + * chips strung on the I2C bus, usually by manipulating pins 1-3 of a + * set of EEPROM devices. They form a continuous memory space. + * + * The I2C device address includes the device type identifier, 1010b, + * which is a reserved value and indicates that this is an I2C EEPROM + * device. It also includes the top 3 bits of the 19 bit EEPROM memory + * address, namely bits 18, 17, and 16. This makes up the 7 bit + * address sent on the I2C bus with bit 0 being the direction bit, + * which is not represented here, and sent by the hardware directly. + * + * For instance, + *   50h = 1010000b => device type identifier 1010b, bits 18:16 = 000b, address 0. + *   54h = 1010100b => --"--, bits 18:16 = 100b, address 40000h. + *   56h = 1010110b => --"--, bits 18:16 = 110b, address 60000h. + * Depending on the size of the I2C EEPROM device(s), bits 18:16 may + * address memory in a device or a device on the I2C bus, depending on + * the status of pins 1-3. See top of amdgpu_eeprom.c. + * + * The RAS table lives either at address 0 or address 40000h of EEPROM. + */ +#define EEPROM_I2C_MADDR_0      0x0 +#define EEPROM_I2C_MADDR_4      0x40000  /*   * The 2 macros bellow represent the actual size in bytes that @@ -90,6 +107,16 @@  static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)  { +	if (adev->asic_type == CHIP_IP_DISCOVERY) { +		switch (adev->ip_versions[MP1_HWIP][0]) { +		case IP_VERSION(13, 0, 0): +		case IP_VERSION(13, 0, 10): +			return true; +		default: +			return false; +		} +	} +  	return  adev->asic_type == CHIP_VEGA20 ||  		adev->asic_type == CHIP_ARCTURUS ||  		adev->asic_type == CHIP_SIENNA_CICHLID || @@ -107,16 +134,30 @@ static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev,  	if (strnstr(atom_ctx->vbios_version,  	            "D342",  		    sizeof(atom_ctx->vbios_version))) -		control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS_D342; +		control->i2c_address = EEPROM_I2C_MADDR_0;  	else -		control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS; +		control->i2c_address = EEPROM_I2C_MADDR_4;  	return true;  } +static bool __get_eeprom_i2c_addr_ip_discovery(struct amdgpu_device *adev, +				       struct amdgpu_ras_eeprom_control *control) +{ +	switch (adev->ip_versions[MP1_HWIP][0]) { +	case IP_VERSION(13, 0, 0): +	case IP_VERSION(13, 0, 10): +		control->i2c_address = EEPROM_I2C_MADDR_4; +		return true; +	default: +		return false; +	} +} +  static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,  				  struct amdgpu_ras_eeprom_control *control)  { +	struct atom_context *atom_ctx = adev->mode_info.atom_context;  	u8 i2c_addr;  	if (!control) @@ -139,27 +180,34 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,  	switch (adev->asic_type) {  	case CHIP_VEGA20: -		control->i2c_address = EEPROM_I2C_MADDR_VEGA20; +		control->i2c_address = EEPROM_I2C_MADDR_0;  		break;  	case CHIP_ARCTURUS:  		return __get_eeprom_i2c_addr_arct(adev, control);  	case CHIP_SIENNA_CICHLID: -		control->i2c_address = EEPROM_I2C_MADDR_SIENNA_CICHLID; +		control->i2c_address = EEPROM_I2C_MADDR_0;  		break;  	case CHIP_ALDEBARAN: -		control->i2c_address = EEPROM_I2C_MADDR_ALDEBARAN; +		if (strnstr(atom_ctx->vbios_version, "D673", +			    sizeof(atom_ctx->vbios_version))) +			control->i2c_address = EEPROM_I2C_MADDR_4; +		else +			control->i2c_address = EEPROM_I2C_MADDR_0;  		break; +	case CHIP_IP_DISCOVERY: +		return __get_eeprom_i2c_addr_ip_discovery(adev, control); +  	default:  		return false;  	}  	switch (adev->ip_versions[MP1_HWIP][0]) {  	case IP_VERSION(13, 0, 0): -		control->i2c_address = EEPROM_I2C_MADDR_SMU_13_0_0; +		control->i2c_address = EEPROM_I2C_MADDR_4;  		break;  	default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h index 6546552e596c..5c4f93ee0c57 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_res_cursor.h @@ -62,7 +62,7 @@ static inline void amdgpu_res_first(struct ttm_resource *res,  	if (!res)  		goto fallback; -	BUG_ON(start + size > res->num_pages << PAGE_SHIFT); +	BUG_ON(start + size > res->size);  	cur->mem_type = res->mem_type; @@ -110,7 +110,7 @@ fallback:  	cur->size = size;  	cur->remaining = size;  	cur->node = NULL; -	WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT); +	WARN_ON(res && start + size > res->size);  	return;  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index d3558c34d406..dc474b809604 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -569,3 +569,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)  	return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);  } + +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring) +{ +	if (ring->is_sw_ring) +		amdgpu_sw_ring_ib_begin(ring); +} + +void amdgpu_ring_ib_end(struct amdgpu_ring *ring) +{ +	if (ring->is_sw_ring) +		amdgpu_sw_ring_ib_end(ring); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 82c178a9033a..f752c7ae7f60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -39,6 +39,7 @@ struct amdgpu_vm;  #define AMDGPU_MAX_RINGS		28  #define AMDGPU_MAX_HWIP_RINGS		8  #define AMDGPU_MAX_GFX_RINGS		2 +#define AMDGPU_MAX_SW_GFX_RINGS         2  #define AMDGPU_MAX_COMPUTE_RINGS	8  #define AMDGPU_MAX_VCE_RINGS		3  #define AMDGPU_MAX_UVD_ENC_RINGS	2 @@ -59,6 +60,7 @@ enum amdgpu_ring_priority_level {  #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)  #define AMDGPU_FENCE_FLAG_INT           (1 << 1)  #define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2) +#define AMDGPU_FENCE_FLAG_EXEC          (1 << 3)  #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched) @@ -143,8 +145,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,  				      uint32_t wait_seq,  				      signed long timeout);  unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); +  void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop); +u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring); +void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, +					 ktime_t timestamp); +  /*   * Rings.   */ @@ -279,6 +286,10 @@ struct amdgpu_ring {  	bool			is_mes_queue;  	uint32_t		hw_queue_id;  	struct amdgpu_mes_ctx_data *mes_ctx; + +	bool            is_sw_ring; +	unsigned int    entry_index; +  };  #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib))) @@ -307,6 +318,9 @@ struct amdgpu_ring {  #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)  int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); +void amdgpu_ring_ib_begin(struct amdgpu_ring *ring); +void amdgpu_ring_ib_end(struct amdgpu_ring *ring); +  void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);  void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);  void amdgpu_ring_commit(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c new file mode 100644 index 000000000000..62079f0e3ee8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c @@ -0,0 +1,514 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/slab.h> +#include <drm/drm_print.h> + +#include "amdgpu_ring_mux.h" +#include "amdgpu_ring.h" +#include "amdgpu.h" + +#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2) +#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000 + +static const struct ring_info { +	unsigned int hw_pio; +	const char *ring_name; +} sw_ring_info[] = { +	{ AMDGPU_RING_PRIO_DEFAULT, "gfx_low"}, +	{ AMDGPU_RING_PRIO_2, "gfx_high"}, +}; + +static struct kmem_cache *amdgpu_mux_chunk_slab; + +static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux, +								struct amdgpu_ring *ring) +{ +	return ring->entry_index < mux->ring_entry_size ? +			&mux->ring_entry[ring->entry_index] : NULL; +} + +/* copy packages on sw ring range[begin, end) */ +static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux, +						  struct amdgpu_ring *ring, +						  u64 s_start, u64 s_end) +{ +	u64 start, end; +	struct amdgpu_ring *real_ring = mux->real_ring; + +	start = s_start & ring->buf_mask; +	end = s_end & ring->buf_mask; + +	if (start == end) { +		DRM_ERROR("no more data copied from sw ring\n"); +		return; +	} +	if (start > end) { +		amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start); +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], +					   (ring->ring_size >> 2) - start); +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end); +	} else { +		amdgpu_ring_alloc(real_ring, end - start); +		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start); +	} +} + +static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux) +{ +	struct amdgpu_mux_entry *e = NULL; +	struct amdgpu_mux_chunk *chunk; +	uint32_t seq, last_seq; +	int i; + +	/*find low priority entries:*/ +	if (!mux->s_resubmit) +		return; + +	for (i = 0; i < mux->num_ring_entries; i++) { +		if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) { +			e = &mux->ring_entry[i]; +			break; +		} +	} + +	if (!e) { +		DRM_ERROR("%s no low priority ring found\n", __func__); +		return; +	} + +	last_seq = atomic_read(&e->ring->fence_drv.last_seq); +	seq = mux->seqno_to_resubmit; +	if (last_seq < seq) { +		/*resubmit all the fences between (last_seq, seq]*/ +		list_for_each_entry(chunk, &e->list, entry) { +			if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) { +				amdgpu_fence_update_start_timestamp(e->ring, +								    chunk->sync_seq, +								    ktime_get()); +				amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring, +								      chunk->start, +								      chunk->end); +				mux->wptr_resubmit = chunk->end; +				amdgpu_ring_commit(mux->real_ring); +			} +		} +	} + +	del_timer(&mux->resubmit_timer); +	mux->s_resubmit = false; +} + +static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux) +{ +	mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT); +} + +static void amdgpu_mux_resubmit_fallback(struct timer_list *t) +{ +	struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer); + +	if (!spin_trylock(&mux->lock)) { +		amdgpu_ring_mux_schedule_resubmit(mux); +		DRM_ERROR("reschedule resubmit\n"); +		return; +	} +	amdgpu_mux_resubmit_chunks(mux); +	spin_unlock(&mux->lock); +} + +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, +			 unsigned int entry_size) +{ +	mux->real_ring = ring; +	mux->num_ring_entries = 0; + +	mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL); +	if (!mux->ring_entry) +		return -ENOMEM; + +	mux->ring_entry_size = entry_size; +	mux->s_resubmit = false; + +	amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk", +						  sizeof(struct amdgpu_mux_chunk), 0, +						  SLAB_HWCACHE_ALIGN, NULL); +	if (!amdgpu_mux_chunk_slab) { +		DRM_ERROR("create amdgpu_mux_chunk cache failed\n"); +		return -ENOMEM; +	} + +	spin_lock_init(&mux->lock); +	timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0); + +	return 0; +} + +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux) +{ +	struct amdgpu_mux_entry *e; +	struct amdgpu_mux_chunk *chunk, *chunk2; +	int i; + +	for (i = 0; i < mux->num_ring_entries; i++) { +		e = &mux->ring_entry[i]; +		list_for_each_entry_safe(chunk, chunk2, &e->list, entry) { +			list_del(&chunk->entry); +			kmem_cache_free(amdgpu_mux_chunk_slab, chunk); +		} +	} +	kmem_cache_destroy(amdgpu_mux_chunk_slab); +	kfree(mux->ring_entry); +	mux->ring_entry = NULL; +	mux->num_ring_entries = 0; +	mux->ring_entry_size = 0; +} + +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ +	struct amdgpu_mux_entry *e; + +	if (mux->num_ring_entries >= mux->ring_entry_size) { +		DRM_ERROR("add sw ring exceeding max entry size\n"); +		return -ENOENT; +	} + +	e = &mux->ring_entry[mux->num_ring_entries]; +	ring->entry_index = mux->num_ring_entries; +	e->ring = ring; + +	INIT_LIST_HEAD(&e->list); +	mux->num_ring_entries += 1; +	return 0; +} + +void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr) +{ +	struct amdgpu_mux_entry *e; + +	spin_lock(&mux->lock); + +	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) +		amdgpu_mux_resubmit_chunks(mux); + +	e = amdgpu_ring_mux_sw_entry(mux, ring); +	if (!e) { +		DRM_ERROR("cannot find entry for sw ring\n"); +		spin_unlock(&mux->lock); +		return; +	} + +	/* We could skip this set wptr as preemption in process. */ +	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) { +		spin_unlock(&mux->lock); +		return; +	} + +	e->sw_cptr = e->sw_wptr; +	/* Update cptr if the package already copied in resubmit functions */ +	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit) +		e->sw_cptr = mux->wptr_resubmit; +	e->sw_wptr = wptr; +	e->start_ptr_in_hw_ring = mux->real_ring->wptr; + +	/* Skip copying for the packages already resubmitted.*/ +	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) { +		amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr); +		e->end_ptr_in_hw_ring = mux->real_ring->wptr; +		amdgpu_ring_commit(mux->real_ring); +	} else { +		e->end_ptr_in_hw_ring = mux->real_ring->wptr; +	} +	spin_unlock(&mux->lock); +} + +u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ +	struct amdgpu_mux_entry *e; + +	e = amdgpu_ring_mux_sw_entry(mux, ring); +	if (!e) { +		DRM_ERROR("cannot find entry for sw ring\n"); +		return 0; +	} + +	return e->sw_wptr; +} + +/** + * amdgpu_ring_mux_get_rptr - get the readptr of the software ring + * @mux: the multiplexer the software rings attach to + * @ring: the software ring of which we calculate the readptr + * + * The return value of the readptr is not precise while the other rings could + * write data onto the real ring buffer.After overwriting on the real ring, we + * can not decide if our packages have been excuted or not read yet. However, + * this function is only called by the tools such as umr to collect the latest + * packages for the hang analysis. We assume the hang happens near our latest + * submit. Thus we could use the following logic to give the clue: + * If the readptr is between start and end, then we return the copy pointer + * plus the distance from start to readptr. If the readptr is before start, we + * return the copy pointer. Lastly, if the readptr is past end, we return the + * write pointer. + */ +u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ +	struct amdgpu_mux_entry *e; +	u64 readp, offset, start, end; + +	e = amdgpu_ring_mux_sw_entry(mux, ring); +	if (!e) { +		DRM_ERROR("no sw entry found!\n"); +		return 0; +	} + +	readp = amdgpu_ring_get_rptr(mux->real_ring); + +	start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask; +	end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask; +	if (start > end) { +		if (readp <= end) +			readp += mux->real_ring->ring_size >> 2; +		end += mux->real_ring->ring_size >> 2; +	} + +	if (start <= readp && readp <= end) { +		offset = readp - start; +		e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask; +	} else if (readp < start) { +		e->sw_rptr = e->sw_cptr; +	} else { +		/* end < readptr */ +		e->sw_rptr = e->sw_wptr; +	} + +	return e->sw_rptr; +} + +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + +	WARN_ON(!ring->is_sw_ring); +	return amdgpu_ring_mux_get_rptr(mux, ring); +} + +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + +	WARN_ON(!ring->is_sw_ring); +	return amdgpu_ring_mux_get_wptr(mux, ring); +} + +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + +	WARN_ON(!ring->is_sw_ring); +	amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr); +} + +/* Override insert_nop to prevent emitting nops to the software rings */ +void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ +	WARN_ON(!ring->is_sw_ring); +} + +const char *amdgpu_sw_ring_name(int idx) +{ +	return idx < ARRAY_SIZE(sw_ring_info) ? +		sw_ring_info[idx].ring_name : NULL; +} + +unsigned int amdgpu_sw_ring_priority(int idx) +{ +	return idx < ARRAY_SIZE(sw_ring_info) ? +		sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT; +} + +/*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/ +static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux) +{ +	struct amdgpu_ring *ring; +	int i, need_preempt; + +	need_preempt = 0; +	for (i = 0; i < mux->num_ring_entries; i++) { +		ring = mux->ring_entry[i].ring; +		if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT && +		    amdgpu_fence_count_emitted(ring) > 0) +			return 0; +		if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && +		    amdgpu_fence_last_unsignaled_time_us(ring) > +		    AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US) +			need_preempt = 1; +	} +	return need_preempt && !mux->s_resubmit; +} + +/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */ +static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux) +{ +	int r; + +	spin_lock(&mux->lock); +	mux->pending_trailing_fence_signaled = true; +	r = amdgpu_ring_preempt_ib(mux->real_ring); +	spin_unlock(&mux->lock); +	return r; +} + +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + +	WARN_ON(!ring->is_sw_ring); +	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) { +		if (amdgpu_mcbp_scan(mux) > 0) +			amdgpu_mcbp_trigger_preempt(mux); +		return; +	} + +	amdgpu_ring_mux_start_ib(mux, ring); +} + +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring) +{ +	struct amdgpu_device *adev = ring->adev; +	struct amdgpu_ring_mux *mux = &adev->gfx.muxer; + +	WARN_ON(!ring->is_sw_ring); +	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) +		return; +	amdgpu_ring_mux_end_ib(mux, ring); +} + +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ +	struct amdgpu_mux_entry *e; +	struct amdgpu_mux_chunk *chunk; + +	spin_lock(&mux->lock); +	amdgpu_mux_resubmit_chunks(mux); +	spin_unlock(&mux->lock); + +	e = amdgpu_ring_mux_sw_entry(mux, ring); +	if (!e) { +		DRM_ERROR("cannot find entry!\n"); +		return; +	} + +	chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL); +	if (!chunk) { +		DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n"); +		return; +	} + +	chunk->start = ring->wptr; +	list_add_tail(&chunk->entry, &e->list); +} + +static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ +	uint32_t last_seq = 0; +	struct amdgpu_mux_entry *e; +	struct amdgpu_mux_chunk *chunk, *tmp; + +	e = amdgpu_ring_mux_sw_entry(mux, ring); +	if (!e) { +		DRM_ERROR("cannot find entry!\n"); +		return; +	} + +	last_seq = atomic_read(&ring->fence_drv.last_seq); + +	list_for_each_entry_safe(chunk, tmp, &e->list, entry) { +		if (chunk->sync_seq <= last_seq) { +			list_del(&chunk->entry); +			kmem_cache_free(amdgpu_mux_chunk_slab, chunk); +		} +	} +} + +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring) +{ +	struct amdgpu_mux_entry *e; +	struct amdgpu_mux_chunk *chunk; + +	e = amdgpu_ring_mux_sw_entry(mux, ring); +	if (!e) { +		DRM_ERROR("cannot find entry!\n"); +		return; +	} + +	chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry); +	if (!chunk) { +		DRM_ERROR("cannot find chunk!\n"); +		return; +	} + +	chunk->end = ring->wptr; +	chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq); + +	scan_and_remove_signaled_chunk(mux, ring); +} + +bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux) +{ +	struct amdgpu_mux_entry *e; +	struct amdgpu_ring *ring = NULL; +	int i; + +	if (!mux->pending_trailing_fence_signaled) +		return false; + +	if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr)) +		return false; + +	for (i = 0; i < mux->num_ring_entries; i++) { +		e = &mux->ring_entry[i]; +		if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) { +			ring = e->ring; +			break; +		} +	} + +	if (!ring) { +		DRM_ERROR("cannot find low priority ring\n"); +		return false; +	} + +	amdgpu_fence_process(ring); +	if (amdgpu_fence_count_emitted(ring) > 0) { +		mux->s_resubmit = true; +		mux->seqno_to_resubmit = ring->fence_drv.sync_seq; +		amdgpu_ring_mux_schedule_resubmit(mux); +	} + +	mux->pending_trailing_fence_signaled = false; +	return true; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h new file mode 100644 index 000000000000..4be45fc14954 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h @@ -0,0 +1,103 @@ +/* + * Copyright 2022 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_RING_MUX__ +#define __AMDGPU_RING_MUX__ + +#include <linux/timer.h> +#include <linux/spinlock.h> +#include "amdgpu_ring.h" + +struct amdgpu_ring; + +/** + * struct amdgpu_mux_entry - the entry recording software rings copying information. + * @ring: the pointer to the software ring. + * @start_ptr_in_hw_ring: last start location copied to in the hardware ring. + * @end_ptr_in_hw_ring: last end location copied to in the hardware ring. + * @sw_cptr: the position of the copy pointer in the sw ring. + * @sw_rptr: the read pointer in software ring. + * @sw_wptr: the write pointer in software ring. + * @list: list head for amdgpu_mux_chunk + */ +struct amdgpu_mux_entry { +	struct amdgpu_ring      *ring; +	u64                     start_ptr_in_hw_ring; +	u64                     end_ptr_in_hw_ring; +	u64                     sw_cptr; +	u64                     sw_rptr; +	u64                     sw_wptr; +	struct list_head        list; +}; + +struct amdgpu_ring_mux { +	struct amdgpu_ring      *real_ring; + +	struct amdgpu_mux_entry *ring_entry; +	unsigned int            num_ring_entries; +	unsigned int            ring_entry_size; +	/*the lock for copy data from different software rings*/ +	spinlock_t              lock; +	bool                    s_resubmit; +	uint32_t                seqno_to_resubmit; +	u64                     wptr_resubmit; +	struct timer_list       resubmit_timer; + +	bool                    pending_trailing_fence_signaled; +}; + +/** + * struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings. + * @entry: the list entry. + * @sync_seq: the fence seqno related with the saved IB. + * @start:- start location on the software ring. + * @end:- end location on the software ring. + */ +struct amdgpu_mux_chunk { +	struct list_head        entry; +	uint32_t                sync_seq; +	u64                     start; +	u64                     end; +}; + +int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, +			 unsigned int entry_size); +void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux); +int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr); +u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring); +bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux); + +u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring); +u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring); +void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring); +void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); +void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring); +void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring); +const char *amdgpu_sw_ring_name(int idx); +unsigned int amdgpu_sw_ring_priority(int idx); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c index cc7597a15fe9..2c1d82fc4c34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c @@ -121,6 +121,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u  	switch (op) {  	case 1: +		mutex_lock(&psp->securedisplay_context.mutex);  		psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,  			TA_SECUREDISPLAY_COMMAND__QUERY_TA);  		ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA); @@ -131,8 +132,10 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u  			else  				psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);  		} +		mutex_unlock(&psp->securedisplay_context.mutex);  		break;  	case 2: +		mutex_lock(&psp->securedisplay_context.mutex);  		psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,  			TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);  		securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_id; @@ -146,6 +149,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u  				psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);  			}  		} +		mutex_unlock(&psp->securedisplay_context.mutex);  		break;  	default:  		dev_err(adev->dev, "Invalid input: %s\n", str); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 090e66a1b284..dcd8c066bc1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -259,6 +259,14 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,  	return 0;  } +/* Free the entry back to the slab */ +static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e) +{ +	hash_del(&e->node); +	dma_fence_put(e->fence); +	kmem_cache_free(amdgpu_sync_slab, e); +} +  /**   * amdgpu_sync_peek_fence - get the next fence not signaled yet   * @@ -280,9 +288,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,  		struct drm_sched_fence *s_fence = to_drm_sched_fence(f);  		if (dma_fence_is_signaled(f)) { -			hash_del(&e->node); -			dma_fence_put(f); -			kmem_cache_free(amdgpu_sync_slab, e); +			amdgpu_sync_entry_free(e);  			continue;  		}  		if (ring && s_fence) { @@ -355,15 +361,44 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)  			if (r)  				return r;  		} else { -			hash_del(&e->node); -			dma_fence_put(f); -			kmem_cache_free(amdgpu_sync_slab, e); +			amdgpu_sync_entry_free(e);  		}  	}  	return 0;  } +/** + * amdgpu_sync_push_to_job - push fences into job + * @sync: sync object to get the fences from + * @job: job to push the fences into + * + * Add all unsignaled fences from sync to job. + */ +int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job) +{ +	struct amdgpu_sync_entry *e; +	struct hlist_node *tmp; +	struct dma_fence *f; +	int i, r; + +	hash_for_each_safe(sync->fences, i, tmp, e, node) { +		f = e->fence; +		if (dma_fence_is_signaled(f)) { +			amdgpu_sync_entry_free(e); +			continue; +		} + +		dma_fence_get(f); +		r = drm_sched_job_add_dependency(&job->base, f); +		if (r) { +			dma_fence_put(f); +			return r; +		} +	} +	return 0; +} +  int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)  {  	struct amdgpu_sync_entry *e; @@ -375,9 +410,7 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)  		if (r)  			return r; -		hash_del(&e->node); -		dma_fence_put(e->fence); -		kmem_cache_free(amdgpu_sync_slab, e); +		amdgpu_sync_entry_free(e);  	}  	return 0; @@ -396,11 +429,8 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)  	struct hlist_node *tmp;  	unsigned int i; -	hash_for_each_safe(sync->fences, i, tmp, e, node) { -		hash_del(&e->node); -		dma_fence_put(e->fence); -		kmem_cache_free(amdgpu_sync_slab, e); -	} +	hash_for_each_safe(sync->fences, i, tmp, e, node) +		amdgpu_sync_entry_free(e);  }  /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index 2d5c613cda10..cf1e9e858efd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -30,6 +30,7 @@ struct dma_fence;  struct dma_resv;  struct amdgpu_device;  struct amdgpu_ring; +struct amdgpu_job;  enum amdgpu_sync_mode {  	AMDGPU_SYNC_ALWAYS, @@ -54,6 +55,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,  				     struct amdgpu_ring *ring);  struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);  int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); +int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);  int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);  void amdgpu_sync_free(struct amdgpu_sync *sync);  int amdgpu_sync_init(void); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 5e6ddc7e101c..677ad2016976 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -127,7 +127,7 @@ TRACE_EVENT(amdgpu_bo_create,  	    TP_fast_assign(  			   __entry->bo = bo; -			   __entry->pages = bo->tbo.resource->num_pages; +			   __entry->pages = PFN_UP(bo->tbo.resource->size);  			   __entry->type = bo->tbo.resource->mem_type;  			   __entry->prefer = bo->preferred_domains;  			   __entry->allow = bo->allowed_domains; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index b64938ed8cb6..55e0284b2bdd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -58,6 +58,7 @@  #include "amdgpu_amdkfd.h"  #include "amdgpu_sdma.h"  #include "amdgpu_ras.h" +#include "amdgpu_hmm.h"  #include "amdgpu_atomfirmware.h"  #include "amdgpu_res_cursor.h"  #include "bif/bif_4_1_d.h" @@ -189,7 +190,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  	struct amdgpu_device *adev = ring->adev;  	unsigned offset, num_pages, num_dw, num_bytes;  	uint64_t src_addr, dst_addr; -	struct dma_fence *fence;  	struct amdgpu_job *job;  	void *cpu_addr;  	uint64_t flags; @@ -229,7 +229,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);  	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE; -	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, +	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     num_dw * 4 + num_bytes,  				     AMDGPU_IB_POOL_DELAYED, &job);  	if (r)  		return r; @@ -269,18 +271,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,  		}  	} -	r = amdgpu_job_submit(job, &adev->mman.entity, -			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence); -	if (r) -		goto error_free; - -	dma_fence_put(fence); - -	return r; - -error_free: -	amdgpu_job_free(job); -	return r; +	dma_fence_put(amdgpu_job_submit(job)); +	return 0;  }  /** @@ -381,7 +373,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,  	dst.offset = 0;  	r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, -				       new_mem->num_pages << PAGE_SHIFT, +				       new_mem->size,  				       amdgpu_bo_encrypted(abo),  				       bo->base.resv, &fence);  	if (r) @@ -424,7 +416,7 @@ error:  static bool amdgpu_mem_visible(struct amdgpu_device *adev,  			       struct ttm_resource *mem)  { -	u64 mem_size = (u64)mem->num_pages << PAGE_SHIFT; +	u64 mem_size = (u64)mem->size;  	struct amdgpu_res_cursor cursor;  	u64 end; @@ -571,7 +563,7 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_device *bdev,  				     struct ttm_resource *mem)  {  	struct amdgpu_device *adev = amdgpu_ttm_adev(bdev); -	size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT; +	size_t bus_size = (size_t)mem->size;  	switch (mem->mem_type) {  	case TTM_PL_SYSTEM: @@ -691,9 +683,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,  	}  	readonly = amdgpu_ttm_tt_is_readonly(ttm); -	r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, -				       ttm->num_pages, range, readonly, -				       true, NULL); +	r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages, +				       readonly, NULL, pages, range);  out_unlock:  	mmap_read_unlock(mm);  	if (r) @@ -704,8 +695,19 @@ out_unlock:  	return r;  } +/* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations + */ +void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, +				      struct hmm_range *range) +{ +	struct amdgpu_ttm_tt *gtt = (void *)ttm; + +	if (gtt && gtt->userptr && range) +		amdgpu_hmm_range_get_pages_done(range); +} +  /* - * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change + * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change   * Check if the pages backing this ttm range have been invalidated   *   * Returns: true if pages are still valid @@ -723,10 +725,6 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,  	WARN_ONCE(!range->hmm_pfns, "No user pages to check\n"); -	/* -	 * FIXME: Must always hold notifier_lock for this, and must -	 * not ignore the return code. -	 */  	return !amdgpu_hmm_range_get_pages_done(range);  }  #endif @@ -1154,8 +1152,9 @@ int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,   * @addr:  The address in the current tasks VM space to use   * @flags: Requirements of userptr object.   * - * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages - * to current task + * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to + * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to + * initialize GPU VM for a KFD process.   */  int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,  			      uint64_t addr, uint32_t flags) @@ -1394,7 +1393,8 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,  }  static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo, -					unsigned long offset, void *buf, int len, int write) +					unsigned long offset, void *buf, +					int len, int write)  {  	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);  	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); @@ -1418,26 +1418,27 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,  		memcpy(adev->mman.sdma_access_ptr, buf, len);  	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); -	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job); +	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     num_dw * 4, AMDGPU_IB_POOL_DELAYED, +				     &job);  	if (r)  		goto out;  	amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm); -	src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start; +	src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + +		src_mm.start;  	dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);  	if (write)  		swap(src_addr, dst_addr); -	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, false); +	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, +				PAGE_SIZE, false);  	amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);  	WARN_ON(job->ibs[0].length_dw > num_dw); -	r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence); -	if (r) { -		amdgpu_job_free(job); -		goto out; -	} +	fence = amdgpu_job_submit(job);  	if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))  		r = -ETIMEDOUT; @@ -1537,6 +1538,23 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)  		NULL, &adev->mman.fw_vram_usage_va);  } +/* + * Driver Reservation functions + */ +/** + * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram + * + * @adev: amdgpu_device pointer + * + * free drv reserved vram if it has been reserved. + */ +static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev) +{ +	amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo, +						  NULL, +						  &adev->mman.drv_vram_usage_va); +} +  /**   * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw   * @@ -1558,11 +1576,35 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)  	return amdgpu_bo_create_kernel_at(adev,  					  adev->mman.fw_vram_usage_start_offset,  					  adev->mman.fw_vram_usage_size, -					  AMDGPU_GEM_DOMAIN_VRAM,  					  &adev->mman.fw_vram_usage_reserved_bo,  					  &adev->mman.fw_vram_usage_va);  } +/** + * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from drv. + */ +static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev) +{ +	u64 vram_size = adev->gmc.visible_vram_size; + +	adev->mman.drv_vram_usage_va = NULL; +	adev->mman.drv_vram_usage_reserved_bo = NULL; + +	if (adev->mman.drv_vram_usage_size == 0 || +	    adev->mman.drv_vram_usage_size > vram_size) +		return 0; + +	return amdgpu_bo_create_kernel_at(adev, +					  adev->mman.drv_vram_usage_start_offset, +					  adev->mman.drv_vram_usage_size, +					  &adev->mman.drv_vram_usage_reserved_bo, +					  &adev->mman.drv_vram_usage_va); +} +  /*   * Memoy training reservation functions   */ @@ -1639,7 +1681,6 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)  		ret = amdgpu_bo_create_kernel_at(adev,  					 ctx->c2p_train_data_offset,  					 ctx->train_data_size, -					 AMDGPU_GEM_DOMAIN_VRAM,  					 &ctx->c2p_bo,  					 NULL);  		if (ret) { @@ -1653,7 +1694,6 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)  	ret = amdgpu_bo_create_kernel_at(adev,  				adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,  				adev->mman.discovery_tmr_size, -				AMDGPU_GEM_DOMAIN_VRAM,  				&adev->mman.discovery_memory,  				NULL);  	if (ret) { @@ -1731,6 +1771,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	}  	/* +	 *The reserved vram for driver must be pinned to the specified +	 *place on the VRAM, so reserve it early. +	 */ +	r = amdgpu_ttm_drv_reserve_vram_init(adev); +	if (r) +		return r; + +	/*  	 * only NAVI10 and onwards ASIC support for IP discovery.  	 * If IP discovery enabled, a block of memory should be  	 * reserved for IP discovey. @@ -1746,21 +1794,18 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	 * avoid display artifacts while transitioning between pre-OS  	 * and driver.  */  	r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size, -				       AMDGPU_GEM_DOMAIN_VRAM,  				       &adev->mman.stolen_vga_memory,  				       NULL);  	if (r)  		return r;  	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,  				       adev->mman.stolen_extended_size, -				       AMDGPU_GEM_DOMAIN_VRAM,  				       &adev->mman.stolen_extended_memory,  				       NULL);  	if (r)  		return r;  	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,  				       adev->mman.stolen_reserved_size, -				       AMDGPU_GEM_DOMAIN_VRAM,  				       &adev->mman.stolen_reserved_memory,  				       NULL);  	if (r) @@ -1855,6 +1900,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)  	amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,  					&adev->mman.sdma_access_ptr);  	amdgpu_ttm_fw_reserve_vram_fini(adev); +	amdgpu_ttm_drv_reserve_vram_fini(adev);  	if (drm_dev_enter(adev_to_drm(adev), &idx)) { @@ -1936,7 +1982,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,  		AMDGPU_IB_POOL_DELAYED;  	int r; -	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, job); +	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     num_dw * 4, pool, job);  	if (r)  		return r; @@ -1946,17 +1994,11 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,  							adev->gart.bo);  		(*job)->vm_needs_flush = true;  	} -	if (resv) { -		r = amdgpu_sync_resv(adev, &(*job)->sync, resv, -				     AMDGPU_SYNC_ALWAYS, -				     AMDGPU_FENCE_OWNER_UNDEFINED); -		if (r) { -			DRM_ERROR("sync failed (%d).\n", r); -			amdgpu_job_free(*job); -			return r; -		} -	} -	return 0; +	if (!resv) +		return 0; + +	return drm_sched_job_add_resv_dependencies(&(*job)->base, resv, +						   DMA_RESV_USAGE_BOOKKEEP);  }  int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, @@ -2001,8 +2043,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,  	if (direct_submit)  		r = amdgpu_job_submit_direct(job, ring, fence);  	else -		r = amdgpu_job_submit(job, &adev->mman.entity, -				      AMDGPU_FENCE_OWNER_UNDEFINED, fence); +		*fence = amdgpu_job_submit(job);  	if (r)  		goto error_free; @@ -2047,16 +2088,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,  	amdgpu_ring_pad_ib(ring, &job->ibs[0]);  	WARN_ON(job->ibs[0].length_dw > num_dw); -	r = amdgpu_job_submit(job, &adev->mman.entity, -			      AMDGPU_FENCE_OWNER_UNDEFINED, fence); -	if (r) -		goto error_free; - +	*fence = amdgpu_job_submit(job);  	return 0; - -error_free: -	amdgpu_job_free(job); -	return r;  }  int amdgpu_fill_buffer(struct amdgpu_bo *bo, @@ -2272,9 +2305,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,  		if (p->mapping != adev->mman.bdev.dev_mapping)  			return -EPERM; -		ptr = kmap(p); +		ptr = kmap_local_page(p);  		r = copy_to_user(buf, ptr + off, bytes); -		kunmap(p); +		kunmap_local(ptr);  		if (r)  			return -EFAULT; @@ -2323,9 +2356,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,  		if (p->mapping != adev->mman.bdev.dev_mapping)  			return -EPERM; -		ptr = kmap(p); +		ptr = kmap_local_page(p);  		r = copy_from_user(ptr + off, buf, bytes); -		kunmap(p); +		kunmap_local(ptr);  		if (r)  			return -EFAULT; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index a37207011a69..e2cd5894afc9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -86,6 +86,12 @@ struct amdgpu_mman {  	struct amdgpu_bo	*fw_vram_usage_reserved_bo;  	void		*fw_vram_usage_va; +	/* driver VRAM reservation */ +	u64		drv_vram_usage_start_offset; +	u64		drv_vram_usage_size; +	struct amdgpu_bo	*drv_vram_usage_reserved_bo; +	void		*drv_vram_usage_va; +  	/* PAGE_SIZE'd BO for process memory r/w over SDMA. */  	struct amdgpu_bo	*sdma_access_bo;  	void			*sdma_access_ptr; @@ -153,6 +159,8 @@ uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);  #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)  int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,  				 struct hmm_range **range); +void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, +				      struct hmm_range *range);  bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,  				       struct hmm_range *range);  #else @@ -162,6 +170,10 @@ static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,  {  	return -EPERM;  } +static inline void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm, +						    struct hmm_range *range) +{ +}  static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,  						     struct hmm_range *range)  { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index 1c36235b4539..552e06929229 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -125,6 +125,7 @@ enum psp_fw_type {  	PSP_FW_TYPE_PSP_INTF_DRV,  	PSP_FW_TYPE_PSP_DBG_DRV,  	PSP_FW_TYPE_PSP_RAS_DRV, +	PSP_FW_TYPE_MAX_INDEX,  };  /* version_major=2, version_minor=0 */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c index aad3c8b4c810..f76c19fc0392 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c @@ -22,6 +22,59 @@   */  #include "amdgpu.h" +#include "umc_v6_7.h" + +static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev, +				    struct ras_err_data *err_data, uint64_t err_addr, +				    uint32_t ch_inst, uint32_t umc_inst) +{ +	switch (adev->ip_versions[UMC_HWIP][0]) { +	case IP_VERSION(6, 7, 0): +		umc_v6_7_convert_error_address(adev, +				err_data, err_addr, ch_inst, umc_inst); +		break; +	default: +		dev_warn(adev->dev, +			 "UMC address to Physical address translation is not supported\n"); +		return AMDGPU_RAS_FAIL; +	} + +	return AMDGPU_RAS_SUCCESS; +} + +int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, +			uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst) +{ +	struct ras_err_data err_data = {0, 0, 0, NULL}; +	int ret = AMDGPU_RAS_FAIL; + +	err_data.err_addr = +		kcalloc(adev->umc.max_ras_err_cnt_per_query, +			sizeof(struct eeprom_table_record), GFP_KERNEL); +	if (!err_data.err_addr) { +		dev_warn(adev->dev, +			"Failed to alloc memory for umc error record in MCA notifier!\n"); +		return AMDGPU_RAS_FAIL; +	} + +	/* +	 * Translate UMC channel address to Physical address +	 */ +	ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr, +					ch_inst, umc_inst); +	if (ret) +		goto out; + +	if (amdgpu_bad_page_threshold != 0) { +		amdgpu_ras_add_bad_pages(adev, err_data.err_addr, +						err_data.err_addr_cnt); +		amdgpu_ras_save_bad_pages(adev); +	} + +out: +	kfree(err_data.err_addr); +	return ret; +}  static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,  		void *ras_error_status, @@ -112,23 +165,29 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,  	return AMDGPU_RAS_SUCCESS;  } -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, -		void *ras_error_status, -		bool reset) +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)  { -	int ret; -	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; -	struct ras_common_if head = { -		.block = AMDGPU_RAS_BLOCK__UMC, -	}; -	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); +	int ret = AMDGPU_RAS_SUCCESS; -	ret = -		amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, reset); +	if (!adev->gmc.xgmi.connected_to_cpu) { +		struct ras_err_data err_data = {0, 0, 0, NULL}; +		struct ras_common_if head = { +			.block = AMDGPU_RAS_BLOCK__UMC, +		}; +		struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head); -	if (ret == AMDGPU_RAS_SUCCESS && obj) { -		obj->err_data.ue_count += err_data->ue_count; -		obj->err_data.ce_count += err_data->ce_count; +		ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset); + +		if (ret == AMDGPU_RAS_SUCCESS && obj) { +			obj->err_data.ue_count += err_data.ue_count; +			obj->err_data.ce_count += err_data.ce_count; +		} +	} else if (reset) { +		/* MCA poison handler is only responsible for GPU reset, +		 * let MCA notifier do page retirement. +		 */ +		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); +		amdgpu_ras_reset_gpu(adev);  	}  	return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h index e46439274f3a..a6951160f13a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h @@ -51,9 +51,6 @@ struct amdgpu_umc_ras {  	struct amdgpu_ras_block_object ras_block;  	void (*err_cnt_init)(struct amdgpu_device *adev);  	bool (*query_ras_poison_mode)(struct amdgpu_device *adev); -	void (*convert_ras_error_address)(struct amdgpu_device *adev, -				struct ras_err_data *err_data, uint64_t err_addr, -				uint32_t ch_inst, uint32_t umc_inst);  	void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,  				      void *ras_error_status);  	void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev, @@ -86,9 +83,7 @@ struct amdgpu_umc {  };  int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block); -int amdgpu_umc_poison_handler(struct amdgpu_device *adev, -		void *ras_error_status, -		bool reset); +int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset);  int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,  		struct amdgpu_irq_src *source,  		struct amdgpu_iv_entry *entry); @@ -101,4 +96,6 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,  int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,  		void *ras_error_status,  		struct amdgpu_iv_entry *entry); +int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev, +			uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 6eac649499d3..e00bb654e24b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1132,7 +1132,9 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,  	unsigned offset_idx = 0;  	unsigned offset[3] = { UVD_BASE_SI, 0, 0 }; -	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT : +	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     64, direct ? AMDGPU_IB_POOL_DIRECT :  				     AMDGPU_IB_POOL_DELAYED, &job);  	if (r)  		return r; @@ -1175,16 +1177,13 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,  		if (r)  			goto err_free;  	} else { -		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv, -				     AMDGPU_SYNC_ALWAYS, -				     AMDGPU_FENCE_OWNER_UNDEFINED); +		r = drm_sched_job_add_resv_dependencies(&job->base, +							bo->tbo.base.resv, +							DMA_RESV_USAGE_KERNEL);  		if (r)  			goto err_free; -		r = amdgpu_job_submit(job, &adev->uvd.entity, -				      AMDGPU_FENCE_OWNER_UNDEFINED, &f); -		if (r) -			goto err_free; +		f = amdgpu_job_submit(job);  	}  	amdgpu_bo_reserve(bo, true); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 02cb3a12dd76..b239e874f2d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -450,8 +450,10 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,  	uint64_t addr;  	int i, r; -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, -				     AMDGPU_IB_POOL_DIRECT, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, +				     &job);  	if (r)  		return r; @@ -538,7 +540,9 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,  	struct dma_fence *f = NULL;  	int i, r; -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, +	r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     ib_size_dw * 4,  				     direct ? AMDGPU_IB_POOL_DIRECT :  				     AMDGPU_IB_POOL_DELAYED, &job);  	if (r) @@ -570,8 +574,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,  	if (direct)  		r = amdgpu_job_submit_direct(job, ring, &f);  	else -		r = amdgpu_job_submit(job, &ring->adev->vce.entity, -				      AMDGPU_FENCE_OWNER_UNDEFINED, &f); +		f = amdgpu_job_submit(job);  	if (r)  		goto err; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index ce64ca1c6e66..b1622ac9949f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -603,15 +603,16 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,  				   struct amdgpu_ib *ib_msg,  				   struct dma_fence **fence)  { +	u64 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);  	struct amdgpu_device *adev = ring->adev;  	struct dma_fence *f = NULL;  	struct amdgpu_job *job;  	struct amdgpu_ib *ib; -	uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);  	int i, r; -	r = amdgpu_job_alloc_with_ib(adev, 64, -					AMDGPU_IB_POOL_DIRECT, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, +				     64, AMDGPU_IB_POOL_DIRECT, +				     &job);  	if (r)  		goto err; @@ -790,8 +791,9 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,  	if (sq)  		ib_size_dw += 8; -	r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4, -				AMDGPU_IB_POOL_DIRECT, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, +				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, +				     &job);  	if (r)  		goto err; @@ -919,8 +921,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand  	if (sq)  		ib_size_dw += 8; -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, -					AMDGPU_IB_POOL_DIRECT, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, +				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, +				     &job);  	if (r)  		return r; @@ -985,8 +988,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han  	if (sq)  		ib_size_dw += 8; -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, -					AMDGPU_IB_POOL_DIRECT, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, +				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT, +				     &job);  	if (r)  		return r; @@ -1251,3 +1255,20 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,  	return 0;  } + +void amdgpu_vcn_set_ras_funcs(struct amdgpu_device *adev) +{ +	if (!adev->vcn.ras) +		return; + +	amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block); + +	strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn"); +	adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; +	adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; +	adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm; + +	/* If don't define special ras_late_init function, use default ras_late_init */ +	if (!adev->vcn.ras->ras_block.ras_late_init) +		adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 253ea6b159df..dbb8d68a30c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -399,5 +399,6 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,  int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,  			struct amdgpu_irq_src *source,  			struct amdgpu_iv_entry *entry); +void amdgpu_vcn_set_ras_funcs(struct amdgpu_device *adev);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index c73abe54d974..2994b9db196f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -64,6 +64,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)  	ddev->driver_features &= ~DRIVER_ATOMIC;  	adev->cg_flags = 0;  	adev->pg_flags = 0; + +	/* enable mcbp for sriov asic_type before soc21 */ +	amdgpu_mcbp = (adev->asic_type < CHIP_IP_DISCOVERY) ? 1 : 0; +  }  void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, @@ -391,7 +395,6 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)  		 */  		if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,  					       AMDGPU_GPU_PAGE_SIZE, -					       AMDGPU_GEM_DOMAIN_VRAM,  					       &bo, NULL))  			DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp); @@ -424,11 +427,17 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,  	struct eeprom_table_record bp;  	uint64_t retired_page;  	uint32_t bp_idx, bp_cnt; +	void *vram_usage_va = NULL; + +	if (adev->mman.fw_vram_usage_va) +		vram_usage_va = adev->mman.fw_vram_usage_va; +	else +		vram_usage_va = adev->mman.drv_vram_usage_va;  	if (bp_block_size) {  		bp_cnt = bp_block_size / sizeof(uint64_t);  		for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) { -			retired_page = *(uint64_t *)(adev->mman.fw_vram_usage_va + +			retired_page = *(uint64_t *)(vram_usage_va +  					bp_block_offset + bp_idx * sizeof(uint64_t));  			bp.retired_page = retired_page; @@ -639,7 +648,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)  	adev->virt.fw_reserve.p_vf2pf = NULL;  	adev->virt.vf2pf_update_interval_ms = 0; -	if (adev->mman.fw_vram_usage_va != NULL) { +	if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) { +		DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!"); +	} else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {  		/* go through this logic in ip_init and reset to init workqueue*/  		amdgpu_virt_exchange_data(adev); @@ -662,32 +673,40 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)  	uint32_t bp_block_size = 0;  	struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL; -	if (adev->mman.fw_vram_usage_va != NULL) { - -		adev->virt.fw_reserve.p_pf2vf = -			(struct amd_sriov_msg_pf2vf_info_header *) -			(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); -		adev->virt.fw_reserve.p_vf2pf = -			(struct amd_sriov_msg_vf2pf_info_header *) -			(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); +	if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) { +		if (adev->mman.fw_vram_usage_va) { +			adev->virt.fw_reserve.p_pf2vf = +				(struct amd_sriov_msg_pf2vf_info_header *) +				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); +			adev->virt.fw_reserve.p_vf2pf = +				(struct amd_sriov_msg_vf2pf_info_header *) +				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); +		} else if (adev->mman.drv_vram_usage_va) { +			adev->virt.fw_reserve.p_pf2vf = +				(struct amd_sriov_msg_pf2vf_info_header *) +				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10)); +			adev->virt.fw_reserve.p_vf2pf = +				(struct amd_sriov_msg_vf2pf_info_header *) +				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10)); +		}  		amdgpu_virt_read_pf2vf_data(adev);  		amdgpu_virt_write_vf2pf_data(adev);  		/* bad page handling for version 2 */  		if (adev->virt.fw_reserve.p_pf2vf->version == 2) { -				pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf; +			pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf; -				bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) | -						((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000); -				bp_block_size = pf2vf_v2->bp_block_size; +			bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) | +				((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000); +			bp_block_size = pf2vf_v2->bp_block_size; -				if (bp_block_size && !adev->virt.ras_init_done) -					amdgpu_virt_init_ras_err_handler_data(adev); +			if (bp_block_size && !adev->virt.ras_init_done) +				amdgpu_virt_init_ras_err_handler_data(adev); -				if (adev->virt.ras_init_done) -					amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size); -			} +			if (adev->virt.ras_init_done) +				amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size); +		}  	}  } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 49c4347d154c..2b9d806e23af 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -75,6 +75,8 @@ struct amdgpu_vf_error_buffer {  	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];  }; +enum idh_request; +  /**   * struct amdgpu_virt_ops - amdgpu device virt operations   */ @@ -84,7 +86,8 @@ struct amdgpu_virt_ops {  	int (*req_init_data)(struct amdgpu_device *adev);  	int (*reset_gpu)(struct amdgpu_device *adev);  	int (*wait_reset)(struct amdgpu_device *adev); -	void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); +	void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req, +			  u32 data1, u32 data2, u32 data3);  };  /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c index 500a1dc4fe02..53ff91fc6cf6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c @@ -498,7 +498,7 @@ static int amdgpu_vkms_sw_init(void *handle)  	adev_to_drm(adev)->mode_config.preferred_depth = 24;  	adev_to_drm(adev)->mode_config.prefer_shadow = 1; -	adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; +	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;  	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; @@ -513,6 +513,10 @@ static int amdgpu_vkms_sw_init(void *handle)  			return r;  	} +	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); +	if (r) +		return r; +  	drm_kms_helper_poll_init(adev_to_drm(adev));  	adev->mode_info.mode_config_initialized = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 003aa9e47085..dc379dc22c77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -45,22 +45,43 @@  /**   * DOC: GPUVM   * - * GPUVM is similar to the legacy gart on older asics, however - * rather than there being a single global gart table - * for the entire GPU, there are multiple VM page tables active - * at any given time.  The VM page tables can contain a mix - * vram pages and system memory pages and system memory pages + * GPUVM is the MMU functionality provided on the GPU. + * GPUVM is similar to the legacy GART on older asics, however + * rather than there being a single global GART table + * for the entire GPU, there can be multiple GPUVM page tables active + * at any given time.  The GPUVM page tables can contain a mix + * VRAM pages and system pages (both memory and MMIO) and system pages   * can be mapped as snooped (cached system pages) or unsnooped   * (uncached system pages). - * Each VM has an ID associated with it and there is a page table - * associated with each VMID.  When executing a command buffer, - * the kernel tells the ring what VMID to use for that command + * + * Each active GPUVM has an ID associated with it and there is a page table + * linked with each VMID.  When executing a command buffer, + * the kernel tells the engine what VMID to use for that command   * buffer.  VMIDs are allocated dynamically as commands are submitted.   * The userspace drivers maintain their own address space and the kernel   * sets up their pages tables accordingly when they submit their   * command buffers and a VMID is assigned. - * Cayman/Trinity support up to 8 active VMs at any given time; - * SI supports 16. + * The hardware supports up to 16 active GPUVMs at any given time. + * + * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending + * on the ASIC family.  GPUVM supports RWX attributes on each page as well + * as other features such as encryption and caching attributes. + * + * VMID 0 is special.  It is the GPUVM used for the kernel driver.  In + * addition to an aperture managed by a page table, VMID 0 also has + * several other apertures.  There is an aperture for direct access to VRAM + * and there is a legacy AGP aperture which just forwards accesses directly + * to the matching system physical addresses (or IOVAs when an IOMMU is + * present).  These apertures provide direct access to these memories without + * incurring the overhead of a page table.  VMID 0 is used by the kernel + * driver for tasks like memory management. + * + * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory. + * For user applications, each application can have their own unique GPUVM + * address space.  The application manages the address space and the kernel + * driver manages the GPUVM page tables for each process.  If an GPU client + * accesses an invalid page, it will generate a GPU page fault, similar to + * accessing an invalid page on a CPU.   */  #define START(node) ((node)->start) @@ -463,25 +484,20 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,  	struct amdgpu_device *adev = ring->adev;  	unsigned vmhub = ring->funcs->vmhub;  	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; -	struct amdgpu_vmid *id; -	bool gds_switch_needed; -	bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;  	if (job->vmid == 0)  		return false; -	id = &id_mgr->ids[job->vmid]; -	gds_switch_needed = ring->funcs->emit_gds_switch && ( -		id->gds_base != job->gds_base || -		id->gds_size != job->gds_size || -		id->gws_base != job->gws_base || -		id->gws_size != job->gws_size || -		id->oa_base != job->oa_base || -		id->oa_size != job->oa_size); - -	if (amdgpu_vmid_had_gpu_reset(adev, id)) + +	if (job->vm_needs_flush || ring->has_compute_vm_bug)  		return true; -	return vm_flush_needed || gds_switch_needed; +	if (ring->funcs->emit_gds_switch && job->gds_switch_needed) +		return true; + +	if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid])) +		return true; + +	return false;  }  /** @@ -503,27 +519,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  	unsigned vmhub = ring->funcs->vmhub;  	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];  	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; -	bool gds_switch_needed = ring->funcs->emit_gds_switch && ( -		id->gds_base != job->gds_base || -		id->gds_size != job->gds_size || -		id->gws_base != job->gws_base || -		id->gws_size != job->gws_size || -		id->oa_base != job->oa_base || -		id->oa_size != job->oa_size); +	bool spm_update_needed = job->spm_update_needed; +	bool gds_switch_needed = ring->funcs->emit_gds_switch && +		job->gds_switch_needed;  	bool vm_flush_needed = job->vm_needs_flush;  	struct dma_fence *fence = NULL;  	bool pasid_mapping_needed = false;  	unsigned patch_offset = 0; -	bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL));  	int r; -	if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid) -		adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); -  	if (amdgpu_vmid_had_gpu_reset(adev, id)) {  		gds_switch_needed = true;  		vm_flush_needed = true;  		pasid_mapping_needed = true; +		spm_update_needed = true;  	}  	mutex_lock(&id_mgr->lock); @@ -541,6 +550,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)  		return 0; +	amdgpu_ring_ib_begin(ring);  	if (ring->funcs->init_cond_exec)  		patch_offset = amdgpu_ring_init_cond_exec(ring); @@ -555,6 +565,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  	if (pasid_mapping_needed)  		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); +	if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) +		adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); + +	if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && +	    gds_switch_needed) { +		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, +					    job->gds_size, job->gws_base, +					    job->gws_size, job->oa_base, +					    job->oa_size); +	} +  	if (vm_flush_needed || pasid_mapping_needed) {  		r = amdgpu_fence_emit(ring, &fence, NULL, 0);  		if (r) @@ -579,20 +600,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  	}  	dma_fence_put(fence); -	if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && -	    gds_switch_needed) { -		id->gds_base = job->gds_base; -		id->gds_size = job->gds_size; -		id->gws_base = job->gws_base; -		id->gws_size = job->gws_size; -		id->oa_base = job->oa_base; -		id->oa_size = job->oa_size; -		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, -					    job->gds_size, job->gws_base, -					    job->gws_size, job->oa_base, -					    job->oa_size); -	} -  	if (ring->funcs->patch_cond_exec)  		amdgpu_ring_patch_cond_exec(ring, patch_offset); @@ -601,6 +608,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,  		amdgpu_ring_emit_switch_buffer(ring);  		amdgpu_ring_emit_switch_buffer(ring);  	} +	amdgpu_ring_ib_end(ring);  	return 0;  } @@ -2360,7 +2368,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  	union drm_amdgpu_vm *args = data;  	struct amdgpu_device *adev = drm_to_adev(dev);  	struct amdgpu_fpriv *fpriv = filp->driver_priv; -	long timeout = msecs_to_jiffies(2000);  	int r;  	switch (args->in.op) { @@ -2372,21 +2379,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  			return r;  		break;  	case AMDGPU_VM_OP_UNRESERVE_VMID: -		if (amdgpu_sriov_runtime(adev)) -			timeout = 8 * timeout; - -		/* Wait vm idle to make sure the vmid set in SPM_VMID is -		 * not referenced anymore. -		 */ -		r = amdgpu_bo_reserve(fpriv->vm.root.bo, true); -		if (r) -			return r; - -		r = amdgpu_vm_wait_idle(&fpriv->vm, timeout); -		if (r < 0) -			return r; - -		amdgpu_bo_unreserve(fpriv->vm.root.bo);  		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);  		break;  	default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6546e786bf00..094bb4807303 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -119,9 +119,6 @@ struct amdgpu_bo_vm;  /* Reserve 2MB at top/bottom of address space for kernel use */  #define AMDGPU_VA_RESERVED_SIZE			(2ULL << 20) -/* max vmids dedicated for process */ -#define AMDGPU_VM_MAX_RESERVED_VMID	1 -  /* See vm_update_mode */  #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)  #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) @@ -298,8 +295,7 @@ struct amdgpu_vm {  	struct dma_fence	*last_unlocked;  	unsigned int		pasid; -	/* dedicated to vm */ -	struct amdgpu_vmid	*reserved_vmid[AMDGPU_MAX_VMHUBS]; +	bool			reserved_vmid[AMDGPU_MAX_VMHUBS];  	/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */  	bool					use_cpu_for_update; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 69e105fa41f6..535cd6569bcc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -47,6 +47,32 @@ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)  	return r;  } +/* Allocate a new job for @count PTE updates */ +static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p, +				    unsigned int count) +{ +	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE +		: AMDGPU_IB_POOL_DELAYED; +	struct drm_sched_entity *entity = p->immediate ? &p->vm->immediate +		: &p->vm->delayed; +	unsigned int ndw; +	int r; + +	/* estimate how many dw we need */ +	ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; +	if (p->pages_addr) +		ndw += count * 2; +	ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); + +	r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM, +				     ndw * 4, pool, &p->job); +	if (r) +		return r; + +	p->num_dw_left = ndw; +	return 0; +} +  /**   * amdgpu_vm_sdma_prepare - prepare SDMA command submission   * @@ -61,21 +87,22 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,  				  struct dma_resv *resv,  				  enum amdgpu_sync_mode sync_mode)  { -	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE -		: AMDGPU_IB_POOL_DELAYED; -	unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; +	struct amdgpu_sync sync;  	int r; -	r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job); +	r = amdgpu_vm_sdma_alloc_job(p, 0);  	if (r)  		return r; -	p->num_dw_left = ndw; -  	if (!resv)  		return 0; -	return amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, p->vm); +	amdgpu_sync_create(&sync); +	r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm); +	if (!r) +		r = amdgpu_sync_push_to_job(&sync, p->job); +	amdgpu_sync_free(&sync); +	return r;  }  /** @@ -91,20 +118,16 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,  				 struct dma_fence **fence)  {  	struct amdgpu_ib *ib = p->job->ibs; -	struct drm_sched_entity *entity;  	struct amdgpu_ring *ring;  	struct dma_fence *f; -	int r; -	entity = p->immediate ? &p->vm->immediate : &p->vm->delayed; -	ring = container_of(entity->rq->sched, struct amdgpu_ring, sched); +	ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring, +			    sched);  	WARN_ON(ib->length_dw == 0);  	amdgpu_ring_pad_ib(ring, ib);  	WARN_ON(ib->length_dw > p->num_dw_left); -	r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f); -	if (r) -		goto error; +	f = amdgpu_job_submit(p->job);  	if (p->unlocked) {  		struct dma_fence *tmp = dma_fence_get(f); @@ -127,10 +150,6 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,  	}  	dma_fence_put(f);  	return 0; - -error: -	amdgpu_job_free(p->job); -	return r;  }  /** @@ -210,8 +229,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,  				 uint64_t flags)  {  	struct amdgpu_bo *bo = &vmbo->bo; -	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE -		: AMDGPU_IB_POOL_DELAYED;  	struct dma_resv_iter cursor;  	unsigned int i, ndw, nptes;  	struct dma_fence *fence; @@ -221,8 +238,10 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,  	/* Wait for PD/PT moves to be completed */  	dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL);  	dma_resv_for_each_fence_unlocked(&cursor, fence) { -		r = amdgpu_sync_fence(&p->job->sync, fence); +		dma_fence_get(fence); +		r = drm_sched_job_add_dependency(&p->job->base, fence);  		if (r) { +			dma_fence_put(fence);  			dma_resv_iter_end(&cursor);  			return r;  		} @@ -238,19 +257,9 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,  			if (r)  				return r; -			/* estimate how many dw we need */ -			ndw = 32; -			if (p->pages_addr) -				ndw += count * 2; -			ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW); -			ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); - -			r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, -						     &p->job); +			r = amdgpu_vm_sdma_alloc_job(p, count);  			if (r)  				return r; - -			p->num_dw_left = ndw;  		}  		if (!p->pages_addr) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 80dd1343594c..9fa1d814508a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -439,7 +439,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,  		/* Allocate blocks in desired range */  		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION; -	remaining_size = (u64)vres->base.num_pages << PAGE_SHIFT; +	remaining_size = (u64)vres->base.size;  	mutex_lock(&mgr->lock);  	while (remaining_size) { @@ -498,7 +498,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,  		LIST_HEAD(temp);  		trim_list = &vres->blocks; -		original_size = (u64)vres->base.num_pages << PAGE_SHIFT; +		original_size = (u64)vres->base.size;  		/*  		 * If size value is rounded up to min_block_size, trim the last @@ -533,8 +533,8 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,  			amdgpu_vram_mgr_block_size(block);  		start >>= PAGE_SHIFT; -		if (start > vres->base.num_pages) -			start -= vres->base.num_pages; +		if (start > PFN_UP(vres->base.size)) +			start -= PFN_UP(vres->base.size);  		else  			start = 0;  		vres->base.start = max(vres->base.start, start); @@ -882,7 +882,7 @@ void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)  		kfree(rsv);  	list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) { -		drm_buddy_free_list(&mgr->mm, &rsv->blocks); +		drm_buddy_free_list(&mgr->mm, &rsv->allocated);  		kfree(rsv);  	}  	drm_buddy_fini(&mgr->mm); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index 47159e9a0884..4b9e7b050ccd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -386,7 +386,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)  	if (ret) {  		dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");  		kobject_put(&hive->kobj); -		kfree(hive);  		hive = NULL;  		goto pro_end;  	} @@ -410,7 +409,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)  				dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");  				ret = -ENOMEM;  				kobject_put(&hive->kobj); -				kfree(hive);  				hive = NULL;  				goto pro_end;  			} diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index 6be9ac2b9c5b..18ae9433e463 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -2081,8 +2081,11 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder)  						}  					}  					record += fake_edid_record->ucFakeEDIDLength ? -						fake_edid_record->ucFakeEDIDLength + 2 : -						sizeof(ATOM_FAKE_EDID_PATCH_RECORD); +						  struct_size(fake_edid_record, +							      ucFakeEDIDString, +							      fake_edid_record->ucFakeEDIDLength) : +						  /* empty fake edid record must be 3 bytes long */ +						  sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1;  					break;  				case LCD_PANEL_RESOLUTION_RECORD_TYPE:  					panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 288fce7dc0ed..248f1a4e915f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -21,6 +21,7 @@   *   */ +#include <drm/drm_fb_helper.h>  #include <drm/drm_fourcc.h>  #include <drm/drm_vblank.h> @@ -2800,8 +2801,6 @@ static int dce_v10_0_sw_init(void *handle)  	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; -	adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; -  	r = amdgpu_display_modeset_create_props(adev);  	if (r)  		return r; @@ -2830,6 +2829,17 @@ static int dce_v10_0_sw_init(void *handle)  	if (r)  		return r; +	/* Disable vblank IRQs aggressively for power-saving */ +	/* XXX: can this be enabled for DC? */ +	adev_to_drm(adev)->vblank_disable_immediate = true; + +	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); +	if (r) +		return r; + +	INIT_WORK(&adev->hotplug_work, +		  amdgpu_display_hotplug_work_func); +  	drm_kms_helper_poll_init(adev_to_drm(adev));  	adev->mode_info.mode_config_initialized = true; @@ -2892,6 +2902,8 @@ static int dce_v10_0_hw_fini(void *handle)  	dce_v10_0_pageflip_interrupt_fini(adev); +	flush_work(&adev->hotplug_work); +  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index cbe5250b31cb..cd9c19060d89 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -21,6 +21,7 @@   *   */ +#include <drm/drm_fb_helper.h>  #include <drm/drm_fourcc.h>  #include <drm/drm_vblank.h> @@ -2918,8 +2919,6 @@ static int dce_v11_0_sw_init(void *handle)  	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; -	adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; -  	r = amdgpu_display_modeset_create_props(adev);  	if (r)  		return r; @@ -2949,6 +2948,17 @@ static int dce_v11_0_sw_init(void *handle)  	if (r)  		return r; +	/* Disable vblank IRQs aggressively for power-saving */ +	/* XXX: can this be enabled for DC? */ +	adev_to_drm(adev)->vblank_disable_immediate = true; + +	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); +	if (r) +		return r; + +	INIT_WORK(&adev->hotplug_work, +		  amdgpu_display_hotplug_work_func); +  	drm_kms_helper_poll_init(adev_to_drm(adev));  	adev->mode_info.mode_config_initialized = true; @@ -3022,6 +3032,8 @@ static int dce_v11_0_hw_fini(void *handle)  	dce_v11_0_pageflip_interrupt_fini(adev); +	flush_work(&adev->hotplug_work); +  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index b1c44fab074f..76323deecc58 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -23,6 +23,7 @@  #include <linux/pci.h> +#include <drm/drm_fb_helper.h>  #include <drm/drm_fourcc.h>  #include <drm/drm_vblank.h> @@ -2675,7 +2676,6 @@ static int dce_v6_0_sw_init(void *handle)  	adev_to_drm(adev)->mode_config.preferred_depth = 24;  	adev_to_drm(adev)->mode_config.prefer_shadow = 1;  	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; -	adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;  	r = amdgpu_display_modeset_create_props(adev);  	if (r) @@ -2706,6 +2706,18 @@ static int dce_v6_0_sw_init(void *handle)  	if (r)  		return r; +	/* Disable vblank IRQs aggressively for power-saving */ +	/* XXX: can this be enabled for DC? */ +	adev_to_drm(adev)->vblank_disable_immediate = true; + +	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); +	if (r) +		return r; + +	/* Pre-DCE11 */ +	INIT_WORK(&adev->hotplug_work, +		  amdgpu_display_hotplug_work_func); +  	drm_kms_helper_poll_init(adev_to_drm(adev));  	return r; @@ -2764,6 +2776,8 @@ static int dce_v6_0_hw_fini(void *handle)  	dce_v6_0_pageflip_interrupt_fini(adev); +	flush_work(&adev->hotplug_work); +  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index a22b45c92792..01cf3ab111cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -21,6 +21,7 @@   *   */ +#include <drm/drm_fb_helper.h>  #include <drm/drm_fourcc.h>  #include <drm/drm_vblank.h> @@ -2701,8 +2702,6 @@ static int dce_v8_0_sw_init(void *handle)  	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true; -	adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base; -  	r = amdgpu_display_modeset_create_props(adev);  	if (r)  		return r; @@ -2731,6 +2730,18 @@ static int dce_v8_0_sw_init(void *handle)  	if (r)  		return r; +	/* Disable vblank IRQs aggressively for power-saving */ +	/* XXX: can this be enabled for DC? */ +	adev_to_drm(adev)->vblank_disable_immediate = true; + +	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc); +	if (r) +		return r; + +	/* Pre-DCE11 */ +	INIT_WORK(&adev->hotplug_work, +		  amdgpu_display_hotplug_work_func); +  	drm_kms_helper_poll_init(adev_to_drm(adev));  	adev->mode_info.mode_config_initialized = true; @@ -2791,6 +2802,8 @@ static int dce_v8_0_hw_fini(void *handle)  	dce_v8_0_pageflip_interrupt_fini(adev); +	flush_work(&adev->hotplug_work); +  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index af94ac580d3e..49d34c7bbf20 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -4453,8 +4453,6 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)  {  	u32 gb_addr_config; -	adev->gfx.funcs = &gfx_v10_0_gfx_funcs; -  	switch (adev->ip_versions[GC_HWIP][0]) {  	case IP_VERSION(10, 1, 10):  	case IP_VERSION(10, 1, 1): @@ -6911,6 +6909,8 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)  		mutex_unlock(&adev->srbm_mutex);  	} else {  		memset((void *)mqd, 0, sizeof(*mqd)); +		if (amdgpu_sriov_vf(adev) && adev->in_suspend) +			amdgpu_ring_clear_ring(ring);  		mutex_lock(&adev->srbm_mutex);  		nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);  		amdgpu_ring_init_mqd(ring); @@ -7593,6 +7593,8 @@ static int gfx_v10_0_early_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	adev->gfx.funcs = &gfx_v10_0_gfx_funcs; +  	switch (adev->ip_versions[GC_HWIP][0]) {  	case IP_VERSION(10, 1, 10):  	case IP_VERSION(10, 1, 1): @@ -8489,7 +8491,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,  	control |= ib->length_dw | (vmid << 24); -	if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { +	if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {  		control |= INDIRECT_BUFFER_PRE_ENB(1);  		if (flags & AMDGPU_IB_PREEMPTED) @@ -8664,7 +8666,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,  {  	uint32_t dw2 = 0; -	if (amdgpu_mcbp || amdgpu_sriov_vf(ring->adev)) +	if (amdgpu_mcbp)  		gfx_v10_0_ring_emit_ce_meta(ring,  				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 0fecc5bf45bc..a56c6e106d00 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -77,6 +77,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");  MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");  MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");  MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");  static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =  { @@ -262,6 +266,7 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)  {  	switch (adev->ip_versions[GC_HWIP][0]) {  	case IP_VERSION(11, 0, 1): +	case IP_VERSION(11, 0, 4):  		soc15_program_register_sequence(adev,  						golden_settings_gc_11_0_1,  						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1)); @@ -843,7 +848,6 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {  static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)  { -	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;  	switch (adev->ip_versions[GC_HWIP][0]) {  	case IP_VERSION(11, 0, 0): @@ -856,6 +860,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)  		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;  		break;  	case IP_VERSION(11, 0, 1): +	case IP_VERSION(11, 0, 4):  		adev->gfx.config.max_hw_contexts = 8;  		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;  		adev->gfx.config.sc_prim_fifo_size_backend = 0x100; @@ -1285,6 +1290,7 @@ static int gfx_v11_0_sw_init(void *handle)  	case IP_VERSION(11, 0, 1):  	case IP_VERSION(11, 0, 2):  	case IP_VERSION(11, 0, 3): +	case IP_VERSION(11, 0, 4):  		adev->gfx.me.num_me = 1;  		adev->gfx.me.num_pipe_per_me = 1;  		adev->gfx.me.num_queue_per_pipe = 1; @@ -1626,7 +1632,8 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev)  	u32 tmp;  	int i; -	WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff); +	if (!amdgpu_sriov_vf(adev)) +		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);  	gfx_v11_0_setup_rb(adev);  	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info); @@ -2486,7 +2493,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)  	for (i = 0; i < adev->usec_timeout; i++) {  		cp_status = RREG32_SOC15(GC, 0, regCP_STAT); -		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1)) +		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) || +				adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))  			bootload_status = RREG32_SOC15(GC, 0,  					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);  		else @@ -4004,6 +4012,8 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)  		mutex_unlock(&adev->srbm_mutex);  	} else {  		memset((void *)mqd, 0, sizeof(*mqd)); +		if (amdgpu_sriov_vf(adev) && adev->in_suspend) +			amdgpu_ring_clear_ring(ring);  		mutex_lock(&adev->srbm_mutex);  		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);  		amdgpu_ring_init_mqd(ring); @@ -4390,7 +4400,6 @@ static int gfx_v11_0_hw_fini(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle;  	int r; -	uint32_t tmp;  	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);  	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -4409,15 +4418,14 @@ static int gfx_v11_0_hw_fini(void *handle)  		amdgpu_mes_kiq_hw_fini(adev);  	} -	if (amdgpu_sriov_vf(adev)) { -		gfx_v11_0_cp_gfx_enable(adev, false); -		/* Program KIQ position of RLC_CP_SCHEDULERS during destroy */ -		tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS); -		tmp &= 0xffffff00; -		WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp); - +	if (amdgpu_sriov_vf(adev)) +		/* Remove the steps disabling CPG and clearing KIQ position, +		 * so that CP could perform IDLE-SAVE during switch. Those +		 * steps are necessary to avoid a DMAR error in gfx9 but it is +		 * not reproduced on gfx11. +		 */  		return 0; -	} +  	gfx_v11_0_cp_enable(adev, false);  	gfx_v11_0_enable_gui_idle_interrupt(adev, false); @@ -4656,6 +4664,8 @@ static int gfx_v11_0_early_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	adev->gfx.funcs = &gfx_v11_0_gfx_funcs; +  	adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;  	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),  					  AMDGPU_MAX_COMPUTE_RINGS); @@ -4673,6 +4683,26 @@ static int gfx_v11_0_early_init(void *handle)  	return 0;  } +static int gfx_v11_0_ras_late_init(void *handle) +{ +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	struct ras_common_if *gfx_common_if; +	int ret; + +	gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL); +	if (!gfx_common_if) +		return -ENOMEM; + +	gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX; + +	ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true); +	if (ret) +		dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n"); + +	kfree(gfx_common_if); +	return 0; +} +  static int gfx_v11_0_late_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -4686,6 +4716,12 @@ static int gfx_v11_0_late_init(void *handle)  	if (r)  		return r; +	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) { +		r = gfx_v11_0_ras_late_init(handle); +		if (r) +			return r; +	} +  	return 0;  } @@ -5022,6 +5058,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)  	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {  		switch (adev->ip_versions[GC_HWIP][0]) {  		case IP_VERSION(11, 0, 1): +		case IP_VERSION(11, 0, 4):  			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);  			break;  		default: @@ -5055,6 +5092,7 @@ static int gfx_v11_0_set_powergating_state(void *handle,  		amdgpu_gfx_off_ctrl(adev, enable);  		break;  	case IP_VERSION(11, 0, 1): +	case IP_VERSION(11, 0, 4):  		gfx_v11_cntl_pg(adev, enable);  		amdgpu_gfx_off_ctrl(adev, enable);  		break; @@ -5078,6 +5116,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle,  	case IP_VERSION(11, 0, 1):  	case IP_VERSION(11, 0, 2):  	case IP_VERSION(11, 0, 3): +	case IP_VERSION(11, 0, 4):  	        gfx_v11_0_update_gfx_clock_gating(adev,  	                        state ==  AMD_CG_STATE_GATE);  	        break; @@ -5299,7 +5338,7 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,  	control |= ib->length_dw | (vmid << 24); -	if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { +	if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {  		control |= INDIRECT_BUFFER_PRE_ENB(1);  		if (flags & AMDGPU_IB_PREEMPTED) @@ -6060,6 +6099,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {  	.align_mask = 0xff,  	.nop = PACKET3(PACKET3_NOP, 0x3FFF),  	.support_64bit_ptrs = true, +	.secure_submission_supported = true,  	.vmhub = AMDGPU_GFXHUB_0,  	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,  	.get_wptr = gfx_v11_0_ring_get_wptr_gfx, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 7f0b18b0d4c4..d47135606e3e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4643,6 +4643,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)  		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));  		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;  		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; +		if (amdgpu_sriov_vf(adev) && adev->in_suspend) +			amdgpu_ring_clear_ring(ring);  		mutex_lock(&adev->srbm_mutex);  		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);  		gfx_v8_0_mqd_init(ring); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0320be4a5fc6..f202b45c413c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -47,6 +47,7 @@  #include "amdgpu_ras.h" +#include "amdgpu_ring_mux.h"  #include "gfx_v9_4.h"  #include "gfx_v9_0.h"  #include "gfx_v9_4_2.h" @@ -56,6 +57,7 @@  #include "asic_reg/gc/gc_9_0_default.h"  #define GFX9_NUM_GFX_RINGS     1 +#define GFX9_NUM_SW_GFX_RINGS  2  #define GFX9_MEC_HPD_SIZE 4096  #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L  #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L @@ -753,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);  static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,  				struct amdgpu_cu_info *cu_info);  static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring); +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);  static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);  static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,  					  void *ras_error_status); @@ -826,9 +828,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,  			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));  	if (action == PREEMPT_QUEUES_NO_UNMAP) { -		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr)); -		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr)); -		amdgpu_ring_write(kiq_ring, seq); +		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask)); +		amdgpu_ring_write(kiq_ring, 0); +		amdgpu_ring_write(kiq_ring, 0); +  	} else {  		amdgpu_ring_write(kiq_ring, 0);  		amdgpu_ring_write(kiq_ring, 0); @@ -1564,7 +1567,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)  			mask = 1;  			cu_bitmap = 0;  			counter = 0; -			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); +			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);  			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {  				if (cu_info->bitmap[i][j] & mask) { @@ -1583,7 +1586,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)  			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;  		}  	} -	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); +	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);  	mutex_unlock(&adev->grbm_idx_mutex);  } @@ -1605,7 +1608,7 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)  	mutex_lock(&adev->grbm_idx_mutex);  	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ -	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); +	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);  	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);  	/* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -1654,7 +1657,7 @@ static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)  	mutex_lock(&adev->grbm_idx_mutex);  	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ -	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); +	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);  	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);  	/* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -1919,8 +1922,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)  	u32 gb_addr_config;  	int err; -	adev->gfx.funcs = &gfx_v9_0_gfx_funcs; -  	switch (adev->ip_versions[GC_HWIP][0]) {  	case IP_VERSION(9, 0, 1):  		adev->gfx.config.max_hw_contexts = 8; @@ -2105,6 +2106,7 @@ static int gfx_v9_0_sw_init(void *handle)  	struct amdgpu_ring *ring;  	struct amdgpu_kiq *kiq;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	unsigned int hw_prio;  	switch (adev->ip_versions[GC_HWIP][0]) {  	case IP_VERSION(9, 0, 1): @@ -2188,6 +2190,9 @@ static int gfx_v9_0_sw_init(void *handle)  			sprintf(ring->name, "gfx_%d", i);  		ring->use_doorbell = true;  		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; + +		/* disable scheduler on the real ring */ +		ring->no_scheduler = true;  		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,  				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,  				     AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -2195,6 +2200,41 @@ static int gfx_v9_0_sw_init(void *handle)  			return r;  	} +	/* set up the software rings */ +	if (adev->gfx.num_gfx_rings) { +		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { +			ring = &adev->gfx.sw_gfx_ring[i]; +			ring->ring_obj = NULL; +			sprintf(ring->name, amdgpu_sw_ring_name(i)); +			ring->use_doorbell = true; +			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; +			ring->is_sw_ring = true; +			hw_prio = amdgpu_sw_ring_priority(i); +			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, +					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, +					     NULL); +			if (r) +				return r; +			ring->wptr = 0; +		} + +		/* init the muxer and add software rings */ +		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0], +					 GFX9_NUM_SW_GFX_RINGS); +		if (r) { +			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r); +			return r; +		} +		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) { +			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer, +							&adev->gfx.sw_gfx_ring[i]); +			if (r) { +				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r); +				return r; +			} +		} +	} +  	/* set up the compute queues - allocate horizontally across pipes */  	ring_id = 0;  	for (i = 0; i < adev->gfx.mec.num_mec; ++i) { @@ -2245,6 +2285,12 @@ static int gfx_v9_0_sw_fini(void *handle)  	int i;  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	if (adev->gfx.num_gfx_rings) { +		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) +			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]); +		amdgpu_ring_mux_fini(&adev->gfx.muxer); +	} +  	for (i = 0; i < adev->gfx.num_gfx_rings; i++)  		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);  	for (i = 0; i < adev->gfx.num_compute_rings; i++) @@ -2324,13 +2370,13 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)  	mutex_lock(&adev->grbm_idx_mutex);  	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {  		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { -			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); +			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);  			data = gfx_v9_0_get_rb_active_bitmap(adev);  			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *  					       rb_bitmap_width_per_sh);  		}  	} -	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); +	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);  	mutex_unlock(&adev->grbm_idx_mutex);  	adev->gfx.config.backend_enable_mask = active_rbs; @@ -2467,14 +2513,14 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)  	mutex_lock(&adev->grbm_idx_mutex);  	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {  		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { -			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); +			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);  			for (k = 0; k < adev->usec_timeout; k++) {  				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)  					break;  				udelay(1);  			}  			if (k == adev->usec_timeout) { -				gfx_v9_0_select_se_sh(adev, 0xffffffff, +				amdgpu_gfx_select_se_sh(adev, 0xffffffff,  						      0xffffffff, 0xffffffff);  				mutex_unlock(&adev->grbm_idx_mutex);  				DRM_INFO("Timeout wait for RLC serdes %u,%u\n", @@ -2483,7 +2529,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)  			}  		}  	} -	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); +	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);  	mutex_unlock(&adev->grbm_idx_mutex);  	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -3583,6 +3629,8 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)  		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));  		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;  		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; +		if (amdgpu_sriov_vf(adev) && adev->in_suspend) +			amdgpu_ring_clear_ring(ring);  		mutex_lock(&adev->srbm_mutex);  		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);  		gfx_v9_0_mqd_init(ring); @@ -4539,6 +4587,8 @@ static int gfx_v9_0_early_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; +	adev->gfx.funcs = &gfx_v9_0_gfx_funcs; +  	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||  	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))  		adev->gfx.num_gfx_rings = 0; @@ -5155,11 +5205,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,  	control |= ib->length_dw | (vmid << 24); -	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) { +	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {  		control |= INDIRECT_BUFFER_PRE_ENB(1); +		if (flags & AMDGPU_IB_PREEMPTED) +			control |= INDIRECT_BUFFER_PRE_RESUME(1); +  		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid) -			gfx_v9_0_ring_emit_de_meta(ring); +			gfx_v9_0_ring_emit_de_meta(ring, +						   (!amdgpu_sriov_vf(ring->adev) && +						   flags & AMDGPU_IB_PREEMPTED) ? +						   true : false);  	}  	amdgpu_ring_write(ring, header); @@ -5214,17 +5270,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,  	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;  	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;  	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY; +	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC; +	uint32_t dw2 = 0;  	/* RELEASE_MEM - flush caches, send int */  	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6)); -	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN | -					       EOP_TC_NC_ACTION_EN) : -					      (EOP_TCL1_ACTION_EN | -					       EOP_TC_ACTION_EN | -					       EOP_TC_WB_ACTION_EN | -					       EOP_TC_MD_ACTION_EN)) | -				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | -				 EVENT_INDEX(5))); + +	if (writeback) { +		dw2 = EOP_TC_NC_ACTION_EN; +	} else { +		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN | +				EOP_TC_MD_ACTION_EN; +	} +	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | +				EVENT_INDEX(5); +	if (exec) +		dw2 |= EOP_EXEC; + +	amdgpu_ring_write(ring, dw2);  	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));  	/* @@ -5329,33 +5392,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)  	amdgpu_ring_write(ring, 0);  } -static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring) +static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)  { +	struct amdgpu_device *adev = ring->adev;  	struct v9_ce_ib_state ce_payload = {0}; -	uint64_t csa_addr; +	uint64_t offset, ce_payload_gpu_addr; +	void *ce_payload_cpu_addr;  	int cnt;  	cnt = (sizeof(ce_payload) >> 2) + 4 - 2; -	csa_addr = amdgpu_csa_vaddr(ring->adev); + +	if (ring->is_mes_queue) { +		offset = offsetof(struct amdgpu_mes_ctx_meta_data, +				  gfx[0].gfx_meta_data) + +			offsetof(struct v9_gfx_meta_data, ce_payload); +		ce_payload_gpu_addr = +			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); +		ce_payload_cpu_addr = +			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); +	} else { +		offset = offsetof(struct v9_gfx_meta_data, ce_payload); +		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; +		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; +	}  	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));  	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |  				 WRITE_DATA_DST_SEL(8) |  				 WR_CONFIRM) |  				 WRITE_DATA_CACHE_POLICY(0)); -	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); -	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload))); -	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2); +	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); +	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); + +	if (resume) +		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, +					   sizeof(ce_payload) >> 2); +	else +		amdgpu_ring_write_multiple(ring, (void *)&ce_payload, +					   sizeof(ce_payload) >> 2); +} + +static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) +{ +	int i, r = 0; +	struct amdgpu_device *adev = ring->adev; +	struct amdgpu_kiq *kiq = &adev->gfx.kiq; +	struct amdgpu_ring *kiq_ring = &kiq->ring; +	unsigned long flags; + +	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) +		return -EINVAL; + +	spin_lock_irqsave(&kiq->ring_lock, flags); + +	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) { +		spin_unlock_irqrestore(&kiq->ring_lock, flags); +		return -ENOMEM; +	} + +	/* assert preemption condition */ +	amdgpu_ring_set_preempt_cond_exec(ring, false); + +	ring->trail_seq += 1; +	amdgpu_ring_alloc(ring, 13); +	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, +				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); +	/*reset the CP_VMID_PREEMPT after trailing fence*/ +	amdgpu_ring_emit_wreg(ring, +			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), +			      0x0); + +	/* assert IB preemption, emit the trailing fence */ +	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, +				   ring->trail_fence_gpu_addr, +				   ring->trail_seq); + +	amdgpu_ring_commit(kiq_ring); +	spin_unlock_irqrestore(&kiq->ring_lock, flags); + +	/* poll the trailing fence */ +	for (i = 0; i < adev->usec_timeout; i++) { +		if (ring->trail_seq == +			le32_to_cpu(*ring->trail_fence_cpu_addr)) +			break; +		udelay(1); +	} + +	if (i >= adev->usec_timeout) { +		r = -EINVAL; +		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); +	} + +	amdgpu_ring_commit(ring); + +	/* deassert preemption condition */ +	amdgpu_ring_set_preempt_cond_exec(ring, true); +	return r;  } -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring) +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)  { +	struct amdgpu_device *adev = ring->adev;  	struct v9_de_ib_state de_payload = {0}; -	uint64_t csa_addr, gds_addr; +	uint64_t offset, gds_addr, de_payload_gpu_addr; +	void *de_payload_cpu_addr;  	int cnt; -	csa_addr = amdgpu_csa_vaddr(ring->adev); -	gds_addr = csa_addr + 4096; +	if (ring->is_mes_queue) { +		offset = offsetof(struct amdgpu_mes_ctx_meta_data, +				  gfx[0].gfx_meta_data) + +			offsetof(struct v9_gfx_meta_data, de_payload); +		de_payload_gpu_addr = +			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); +		de_payload_cpu_addr = +			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset); + +		offset = offsetof(struct amdgpu_mes_ctx_meta_data, +				  gfx[0].gds_backup) + +			offsetof(struct v9_gfx_meta_data, de_payload); +		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset); +	} else { +		offset = offsetof(struct v9_gfx_meta_data, de_payload); +		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset; +		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset; + +		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) + +				 AMDGPU_CSA_SIZE - adev->gds.gds_size, +				 PAGE_SIZE); +	} +  	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);  	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); @@ -5365,9 +5530,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)  				 WRITE_DATA_DST_SEL(8) |  				 WR_CONFIRM) |  				 WRITE_DATA_CACHE_POLICY(0)); -	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); -	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload))); -	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2); +	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); +	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); + +	if (resume) +		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, +					   sizeof(de_payload) >> 2); +	else +		amdgpu_ring_write_multiple(ring, (void *)&de_payload, +					   sizeof(de_payload) >> 2);  }  static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, @@ -5383,8 +5554,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)  {  	uint32_t dw2 = 0; -	if (amdgpu_sriov_vf(ring->adev)) -		gfx_v9_0_ring_emit_ce_meta(ring); +	gfx_v9_0_ring_emit_ce_meta(ring, +				   (!amdgpu_sriov_vf(ring->adev) && +				   flags & AMDGPU_IB_PREEMPTED) ? true : false);  	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */  	if (flags & AMDGPU_HAVE_CTX_SWITCH) { @@ -5710,7 +5882,12 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,  	switch (me_id) {  	case 0: -		amdgpu_fence_process(&adev->gfx.gfx_ring[0]); +		if (adev->gfx.num_gfx_rings && +		    !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) { +			/* Fence signals are handled on the software rings*/ +			for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) +				amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]); +		}  		break;  	case 1:  	case 2: @@ -6482,7 +6659,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)  	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {  		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {  			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { -				gfx_v9_0_select_se_sh(adev, j, 0x0, k); +				amdgpu_gfx_select_se_sh(adev, j, 0x0, k);  				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));  			}  		} @@ -6544,7 +6721,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,  	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {  		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {  			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { -				gfx_v9_0_select_se_sh(adev, j, 0, k); +				amdgpu_gfx_select_se_sh(adev, j, 0, k);  				reg_value =  					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));  				if (reg_value) @@ -6559,7 +6736,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,  	err_data->ce_count += sec_count;  	err_data->ue_count += ded_count; -	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); +	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);  	mutex_unlock(&adev->grbm_idx_mutex);  	gfx_v9_0_query_utc_edc_status(adev, err_data); @@ -6707,6 +6884,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {  	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,  	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,  	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec, +	.preempt_ib = gfx_v9_0_ring_preempt_ib, +	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl, +	.emit_wreg = gfx_v9_0_ring_emit_wreg, +	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, +	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, +	.soft_recovery = gfx_v9_0_ring_soft_recovery, +	.emit_mem_sync = gfx_v9_0_emit_mem_sync, +}; + +static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { +	.type = AMDGPU_RING_TYPE_GFX, +	.align_mask = 0xff, +	.nop = PACKET3(PACKET3_NOP, 0x3FFF), +	.support_64bit_ptrs = true, +	.secure_submission_supported = true, +	.vmhub = AMDGPU_GFXHUB_0, +	.get_rptr = amdgpu_sw_ring_get_rptr_gfx, +	.get_wptr = amdgpu_sw_ring_get_wptr_gfx, +	.set_wptr = amdgpu_sw_ring_set_wptr_gfx, +	.emit_frame_size = /* totally 242 maximum if 16 IBs */ +		5 +  /* COND_EXEC */ +		7 +  /* PIPELINE_SYNC */ +		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 + +		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 + +		2 + /* VM_FLUSH */ +		8 +  /* FENCE for VM_FLUSH */ +		20 + /* GDS switch */ +		4 + /* double SWITCH_BUFFER, +		     * the first COND_EXEC jump to the place just +		     * prior to this double SWITCH_BUFFER +		     */ +		5 + /* COND_EXEC */ +		7 +	 /*	HDP_flush */ +		4 +	 /*	VGT_flush */ +		14 + /*	CE_META */ +		31 + /*	DE_META */ +		3 + /* CNTX_CTRL */ +		5 + /* HDP_INVL */ +		8 + 8 + /* FENCE x2 */ +		2 + /* SWITCH_BUFFER */ +		7, /* gfx_v9_0_emit_mem_sync */ +	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */ +	.emit_ib = gfx_v9_0_ring_emit_ib_gfx, +	.emit_fence = gfx_v9_0_ring_emit_fence, +	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync, +	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush, +	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch, +	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush, +	.test_ring = gfx_v9_0_ring_test_ring, +	.test_ib = gfx_v9_0_ring_test_ib, +	.insert_nop = amdgpu_sw_ring_insert_nop, +	.pad_ib = amdgpu_ring_generic_pad_ib, +	.emit_switch_buffer = gfx_v9_ring_emit_sb, +	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl, +	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec, +	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,  	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,  	.emit_wreg = gfx_v9_0_ring_emit_wreg,  	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, @@ -6792,6 +7025,11 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)  	for (i = 0; i < adev->gfx.num_gfx_rings; i++)  		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; +	if (adev->gfx.num_gfx_rings) { +		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) +			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx; +	} +  	for (i = 0; i < adev->gfx.num_compute_rings; i++)  		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;  } @@ -6963,7 +7201,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,  			mask = 1;  			ao_bitmap = 0;  			counter = 0; -			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); +			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);  			gfx_v9_0_set_user_cu_inactive_bitmap(  				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);  			bitmap = gfx_v9_0_get_cu_active_bitmap(adev); @@ -6996,7 +7234,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,  			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;  		}  	} -	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); +	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);  	mutex_unlock(&adev->grbm_idx_mutex);  	cu_info->number = active_cu_number; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c index 8cf53e039c11..3f8676d23a5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c @@ -397,6 +397,9 @@ static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)  			    ENABLE_ADVANCED_DRIVER_MODEL, 0);  	WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp); +	if (amdgpu_sriov_vf(adev)) +		return; +  	/* Setup L2 cache */  	WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);  	WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c index 5d3fffd4929f..080ff11ca305 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c @@ -154,6 +154,9 @@ static void gfxhub_v3_0_3_init_system_aperture_regs(struct amdgpu_device *adev)  {  	uint64_t value; +	if (amdgpu_sriov_vf(adev)) +		return; +  	/* Disable AGP. */  	WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);  	WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, 0); @@ -354,18 +357,6 @@ static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev)  static int gfxhub_v3_0_3_gart_enable(struct amdgpu_device *adev)  { -	if (amdgpu_sriov_vf(adev)) { -		/* -		 * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are -		 * VF copy registers so vbios post doesn't program them, for -		 * SRIOV driver need to program them -		 */ -		WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE, -			     adev->gmc.vram_start >> 24); -		WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP, -			     adev->gmc.vram_end >> 24); -	} -  	/* GART Enable. */  	gfxhub_v3_0_3_init_gart_aperture_regs(adev);  	gfxhub_v3_0_3_init_system_aperture_regs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index f513e2c2e964..21e46817d82d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -371,7 +371,9 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,  	 * translation. Avoid this by doing the invalidation from the SDMA  	 * itself.  	 */ -	r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE, +	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity, +				     AMDGPU_FENCE_OWNER_UNDEFINED, +				     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,  				     &job);  	if (r)  		goto error_alloc; @@ -380,10 +382,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,  	job->vm_needs_flush = true;  	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;  	amdgpu_ring_pad_ib(ring, &job->ibs[0]); -	r = amdgpu_job_submit(job, &adev->mman.entity, -			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence); -	if (r) -		goto error_submit; +	fence = amdgpu_job_submit(job);  	mutex_unlock(&adev->mman.gtt_window_lock); @@ -392,9 +391,6 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,  	return; -error_submit: -	amdgpu_job_free(job); -  error_alloc:  	mutex_unlock(&adev->mman.gtt_window_lock);  	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r); @@ -612,6 +608,8 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,  				 struct amdgpu_bo_va_mapping *mapping,  				 uint64_t *flags)  { +	struct amdgpu_bo *bo = mapping->bo_va->base.bo; +  	*flags &= ~AMDGPU_PTE_EXECUTABLE;  	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; @@ -628,6 +626,11 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,  		*flags |= AMDGPU_PTE_SYSTEM;  		*flags &= ~AMDGPU_PTE_VALID;  	} + +	if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | +			       AMDGPU_GEM_CREATE_UNCACHED)) +		*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | +			 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);  }  static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index 66dfb574cc7d..4326078689cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -503,6 +503,8 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,  				 struct amdgpu_bo_va_mapping *mapping,  				 uint64_t *flags)  { +	struct amdgpu_bo *bo = mapping->bo_va->base.bo; +  	*flags &= ~AMDGPU_PTE_EXECUTABLE;  	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; @@ -519,6 +521,11 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,  		*flags |= AMDGPU_PTE_SYSTEM;  		*flags &= ~AMDGPU_PTE_VALID;  	} + +	if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT | +			       AMDGPU_GEM_CREATE_UNCACHED)) +		*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) | +			 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);  }  static unsigned gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev) @@ -551,7 +558,10 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)  		adev->umc.node_inst_num = adev->gmc.num_umc;  		adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);  		adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET; -		adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0]; +		if (adev->umc.node_inst_num == 4) +			adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl_ext0[0][0][0]; +		else +			adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0];  		adev->umc.ras = &umc_v8_10_ras;  		break;  	case IP_VERSION(8, 11, 0): @@ -749,6 +759,7 @@ static int gmc_v11_0_sw_init(void *handle)  	case IP_VERSION(11, 0, 1):  	case IP_VERSION(11, 0, 2):  	case IP_VERSION(11, 0, 3): +	case IP_VERSION(11, 0, 4):  		adev->num_vmhubs = 2;  		/*  		 * To fulfill 4-level page support, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 67ca16a8027c..08d6cf79fb15 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1113,10 +1113,80 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,  	}  } +static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev, +					 struct amdgpu_bo *bo, +					 struct amdgpu_bo_va_mapping *mapping, +					 uint64_t *flags) +{ +	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev); +	bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM; +	bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT; +	bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED; +	unsigned int mtype; +	bool snoop = false; + +	switch (adev->ip_versions[GC_HWIP][0]) { +	case IP_VERSION(9, 4, 1): +	case IP_VERSION(9, 4, 2): +		if (is_vram) { +			if (bo_adev == adev) { +				if (uncached) +					mtype = MTYPE_UC; +				else if (coherent) +					mtype = MTYPE_CC; +				else +					mtype = MTYPE_RW; +				/* FIXME: is this still needed? Or does +				 * amdgpu_ttm_tt_pde_flags already handle this? +				 */ +				if (adev->ip_versions[GC_HWIP][0] == +					IP_VERSION(9, 4, 2) && +				    adev->gmc.xgmi.connected_to_cpu) +					snoop = true; +			} else { +				if (uncached || coherent) +					mtype = MTYPE_UC; +				else +					mtype = MTYPE_NC; +				if (mapping->bo_va->is_xgmi) +					snoop = true; +			} +		} else { +			if (uncached || coherent) +				mtype = MTYPE_UC; +			else +				mtype = MTYPE_NC; +			/* FIXME: is this still needed? Or does +			 * amdgpu_ttm_tt_pde_flags already handle this? +			 */ +			snoop = true; +		} +		break; +	default: +		if (uncached || coherent) +			mtype = MTYPE_UC; +		else +			mtype = MTYPE_NC; + +		/* FIXME: is this still needed? Or does +		 * amdgpu_ttm_tt_pde_flags already handle this? +		 */ +		if (!is_vram) +			snoop = true; +	} + +	if (mtype != MTYPE_NC) +		*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) | +			 AMDGPU_PTE_MTYPE_VG10(mtype); +	*flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; +} +  static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,  				struct amdgpu_bo_va_mapping *mapping,  				uint64_t *flags)  { +	struct amdgpu_bo *bo = mapping->bo_va->base.bo; +  	*flags &= ~AMDGPU_PTE_EXECUTABLE;  	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE; @@ -1128,14 +1198,9 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,  		*flags &= ~AMDGPU_PTE_VALID;  	} -	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) || -	     adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) && -	    !(*flags & AMDGPU_PTE_SYSTEM) && -	    mapping->bo_va->is_xgmi) -		*flags |= AMDGPU_PTE_SNOOPED; - -	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) -		*flags |= mapping->flags & AMDGPU_PTE_SNOOPED; +	if (bo && bo->tbo.resource) +		gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.bo, +					     mapping, flags);  }  static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c index f87d0f6ffc93..f2b743a93915 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c @@ -807,16 +807,5 @@ static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev)  		break;  	} -	if (adev->jpeg.ras) { -		amdgpu_ras_register_ras_block(adev, &adev->jpeg.ras->ras_block); - -		strcpy(adev->jpeg.ras->ras_block.ras_comm.name, "jpeg"); -		adev->jpeg.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG; -		adev->jpeg.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; -		adev->jpeg.ras_if = &adev->jpeg.ras->ras_block.ras_comm; - -		/* If don't define special ras_late_init function, use default ras_late_init */ -		if (!adev->jpeg.ras->ras_block.ras_late_init) -			adev->jpeg.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; -	} +	jpeg_set_ras_funcs(adev);  } diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c index 63b0d0b810ec..3beb731b2ce5 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c @@ -27,6 +27,7 @@  #include "soc15.h"  #include "soc15d.h"  #include "jpeg_v2_0.h" +#include "jpeg_v4_0.h"  #include "vcn/vcn_4_0_0_offset.h"  #include "vcn/vcn_4_0_0_sh_mask.h" @@ -38,6 +39,7 @@ static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);  static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev);  static int jpeg_v4_0_set_powergating_state(void *handle,  				enum amd_powergating_state state); +static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev);  /**   * jpeg_v4_0_early_init - set function pointers @@ -55,6 +57,7 @@ static int jpeg_v4_0_early_init(void *handle)  	jpeg_v4_0_set_dec_ring_funcs(adev);  	jpeg_v4_0_set_irq_funcs(adev); +	jpeg_v4_0_set_ras_funcs(adev);  	return 0;  } @@ -78,6 +81,18 @@ static int jpeg_v4_0_sw_init(void *handle)  	if (r)  		return r; +	/* JPEG DJPEG POISON EVENT */ +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, +			VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->irq); +	if (r) +		return r; + +	/* JPEG EJPEG POISON EVENT */ +	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN, +			VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->irq); +	if (r) +		return r; +  	r = amdgpu_jpeg_sw_init(adev);  	if (r)  		return r; @@ -167,6 +182,8 @@ static int jpeg_v4_0_hw_fini(void *handle)  	      RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))  		jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE); +	amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0); +  	return 0;  } @@ -524,6 +541,10 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,  	case VCN_4_0__SRCID__JPEG_DECODE:  		amdgpu_fence_process(&adev->jpeg.inst->ring_dec);  		break; +	case VCN_4_0__SRCID_DJPEG0_POISON: +	case VCN_4_0__SRCID_EJPEG0_POISON: +		amdgpu_jpeg_process_poison_irq(adev, source, entry); +		break;  	default:  		DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",  			  entry->src_id, entry->src_data[0]); @@ -607,3 +628,63 @@ const struct amdgpu_ip_block_version jpeg_v4_0_ip_block = {  	.rev = 0,  	.funcs = &jpeg_v4_0_ip_funcs,  }; + +static uint32_t jpeg_v4_0_query_poison_by_instance(struct amdgpu_device *adev, +		uint32_t instance, uint32_t sub_block) +{ +	uint32_t poison_stat = 0, reg_value = 0; + +	switch (sub_block) { +	case AMDGPU_JPEG_V4_0_JPEG0: +		reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS); +		poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF); +		break; +	case AMDGPU_JPEG_V4_0_JPEG1: +		reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS); +		poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF); +		break; +	default: +		break; +	} + +	if (poison_stat) +		dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n", +			instance, sub_block); + +	return poison_stat; +} + +static bool jpeg_v4_0_query_ras_poison_status(struct amdgpu_device *adev) +{ +	uint32_t inst = 0, sub = 0, poison_stat = 0; + +	for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++) +		for (sub = 0; sub < AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK; sub++) +			poison_stat += +				jpeg_v4_0_query_poison_by_instance(adev, inst, sub); + +	return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops jpeg_v4_0_ras_hw_ops = { +	.query_poison_status = jpeg_v4_0_query_ras_poison_status, +}; + +static struct amdgpu_jpeg_ras jpeg_v4_0_ras = { +	.ras_block = { +		.hw_ops = &jpeg_v4_0_ras_hw_ops, +	}, +}; + +static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev) +{ +	switch (adev->ip_versions[JPEG_HWIP][0]) { +	case IP_VERSION(4, 0, 0): +		adev->jpeg.ras = &jpeg_v4_0_ras; +		break; +	default: +		break; +	} + +	jpeg_set_ras_funcs(adev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h index f1ed6ccfedca..07d36c2abd6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h @@ -24,6 +24,13 @@  #ifndef __JPEG_V4_0_H__  #define __JPEG_V4_0_H__ +enum amdgpu_jpeg_v4_0_sub_block { +	AMDGPU_JPEG_V4_0_JPEG0 = 0, +	AMDGPU_JPEG_V4_0_JPEG1, + +	AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK, +}; +  extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block;  #endif /* __JPEG_V4_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c index 067d10073a56..614394118a53 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c @@ -121,6 +121,10 @@ static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,  	if (r < 1) {  		DRM_ERROR("MES failed to response msg=%d\n",  			  x_pkt->header.opcode); + +		while (halt_if_hws_hang) +			schedule(); +  		return -ETIMEDOUT;  	} @@ -415,10 +419,6 @@ static int mes_v10_1_init_microcode(struct amdgpu_device *adev,  	mes_hdr = (const struct mes_firmware_header_v1_0 *)  		adev->mes.fw[pipe]->data; -	adev->mes.ucode_fw_version[pipe] = -		le32_to_cpu(mes_hdr->mes_ucode_version); -	adev->mes.ucode_fw_version[pipe] = -		le32_to_cpu(mes_hdr->mes_ucode_data_version);  	adev->mes.uc_start_addr[pipe] =  		le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |  		((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index f141fadd2d86..970b066b37bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -129,6 +129,10 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,  	if (r < 1) {  		DRM_ERROR("MES failed to response msg=%d\n",  			  x_pkt->header.opcode); + +		while (halt_if_hws_hang) +			schedule(); +  		return -ETIMEDOUT;  	} @@ -384,6 +388,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)  	mes_set_hw_res_pkt.disable_reset = 1;  	mes_set_hw_res_pkt.disable_mes_log = 1;  	mes_set_hw_res_pkt.use_different_vmid_compute = 1; +	mes_set_hw_res_pkt.enable_reg_active_poll = 1;  	mes_set_hw_res_pkt.oversubscription_timer = 50;  	return mes_v11_0_submit_pkt_and_poll_completion(mes, @@ -485,10 +490,6 @@ static int mes_v11_0_init_microcode(struct amdgpu_device *adev,  	mes_hdr = (const struct mes_firmware_header_v1_0 *)  		adev->mes.fw[pipe]->data; -	adev->mes.ucode_fw_version[pipe] = -		le32_to_cpu(mes_hdr->mes_ucode_version); -	adev->mes.ucode_fw_version[pipe] = -		le32_to_cpu(mes_hdr->mes_ucode_data_version);  	adev->mes.uc_start_addr[pipe] =  		le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |  		((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32); @@ -1253,7 +1254,9 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)  	if (adev->mes.ring.sched.ready)  		mes_v11_0_kiq_dequeue_sched(adev); -	mes_v11_0_enable(adev, false); +	if (!amdgpu_sriov_vf(adev)) +		mes_v11_0_enable(adev, false); +  	return 0;  } @@ -1339,7 +1342,8 @@ static int mes_v11_0_late_init(void *handle)  {  	struct amdgpu_device *adev = (struct amdgpu_device *)handle; -	if (!amdgpu_in_reset(adev) && +	/* it's only intended for use in mes_self_test case, not for s0ix and reset */ +	if (!amdgpu_in_reset(adev) && !adev->in_s0ix &&  	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)))  		amdgpu_mes_self_test(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c index 998b5d17b271..0e664d0cc8d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c @@ -319,7 +319,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)  	tmp = mmMMVM_L2_CNTL5_DEFAULT;  	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); -	WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp); +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);  }  static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index 1b027d069ab4..4638ea7c2eec 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -243,7 +243,7 @@ static void mmhub_v2_3_init_cache_regs(struct amdgpu_device *adev)  	tmp = mmMMVM_L2_CNTL5_DEFAULT;  	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); -	WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp); +	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);  }  static void mmhub_v2_3_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c index a1d26c4d80b8..16cc82215e2e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c @@ -275,7 +275,7 @@ static void mmhub_v3_0_init_cache_regs(struct amdgpu_device *adev)  	tmp = regMMVM_L2_CNTL5_DEFAULT;  	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); -	WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); +	WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);  }  static void mmhub_v3_0_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c index e8058edc1d10..6bdf2ef0298d 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c @@ -269,7 +269,7 @@ static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev)  	tmp = regMMVM_L2_CNTL5_DEFAULT;  	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); -	WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); +	WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);  }  static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c index 770be0a8f7ce..45465acaa943 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c @@ -268,7 +268,7 @@ static void mmhub_v3_0_2_init_cache_regs(struct amdgpu_device *adev)  	tmp = regMMVM_L2_CNTL5_DEFAULT;  	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0); -	WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp); +	WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);  }  static void mmhub_v3_0_2_enable_system_domain(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index b3fba8dea63c..6853b93ac82e 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -82,10 +82,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode =  /* Navi1x */  static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] =  { -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -100,10 +100,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode =  /* Sienna Cichlid */  static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] =  { -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -125,10 +125,10 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] =  static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] =  { -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -149,7 +149,7 @@ static struct amdgpu_video_codecs sriov_sc_video_codecs_decode =  /* Beige Goby*/  static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = { -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},  }; @@ -166,7 +166,7 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = {  /* Yellow Carp*/  static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = { -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index ed2293686f0d..9de46fa8f46c 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -126,32 +126,6 @@ out:  	return err;  } -static int psp_v10_0_ring_init(struct psp_context *psp, -			       enum psp_ring_type ring_type) -{ -	int ret = 0; -	struct psp_ring *ring; -	struct amdgpu_device *adev = psp->adev; - -	ring = &psp->km_ring; - -	ring->ring_type = ring_type; - -	/* allocate 4k Page of Local Frame Buffer memory for ring */ -	ring->ring_size = 0x1000; -	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, -				      AMDGPU_GEM_DOMAIN_VRAM, -				      &adev->firmware.rbuf, -				      &ring->ring_mem_mc_addr, -				      (void **)&ring->ring_mem); -	if (ret) { -		ring->ring_size = 0; -		return ret; -	} - -	return 0; -} -  static int psp_v10_0_ring_create(struct psp_context *psp,  				 enum psp_ring_type ring_type)  { @@ -245,7 +219,6 @@ static void psp_v10_0_ring_set_wptr(struct psp_context *psp, uint32_t value)  static const struct psp_funcs psp_v10_0_funcs = {  	.init_microcode = psp_v10_0_init_microcode, -	.ring_init = psp_v10_0_ring_init,  	.ring_create = psp_v10_0_ring_create,  	.ring_stop = psp_v10_0_ring_stop,  	.ring_destroy = psp_v10_0_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index 9518b4394a6e..bd3e3e23a939 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -360,32 +360,6 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)  	return ret;  } -static int psp_v11_0_ring_init(struct psp_context *psp, -			      enum psp_ring_type ring_type) -{ -	int ret = 0; -	struct psp_ring *ring; -	struct amdgpu_device *adev = psp->adev; - -	ring = &psp->km_ring; - -	ring->ring_type = ring_type; - -	/* allocate 4k Page of Local Frame Buffer memory for ring */ -	ring->ring_size = 0x1000; -	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, -				      AMDGPU_GEM_DOMAIN_VRAM, -				      &adev->firmware.rbuf, -				      &ring->ring_mem_mc_addr, -				      (void **)&ring->ring_mem); -	if (ret) { -		ring->ring_size = 0; -		return ret; -	} - -	return 0; -} -  static int psp_v11_0_ring_stop(struct psp_context *psp,  			      enum psp_ring_type ring_type)  { @@ -779,7 +753,6 @@ static const struct psp_funcs psp_v11_0_funcs = {  	.bootloader_load_spl = psp_v11_0_bootloader_load_spl,  	.bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv,  	.bootloader_load_sos = psp_v11_0_bootloader_load_sos, -	.ring_init = psp_v11_0_ring_init,  	.ring_create = psp_v11_0_ring_create,  	.ring_stop = psp_v11_0_ring_stop,  	.ring_destroy = psp_v11_0_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c index ff13e1beb49b..5697760a819b 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c @@ -28,32 +28,6 @@  #include "mp/mp_11_0_8_offset.h" -static int psp_v11_0_8_ring_init(struct psp_context *psp, -			      enum psp_ring_type ring_type) -{ -	int ret = 0; -	struct psp_ring *ring; -	struct amdgpu_device *adev = psp->adev; - -	ring = &psp->km_ring; - -	ring->ring_type = ring_type; - -	/* allocate 4k Page of Local Frame Buffer memory for ring */ -	ring->ring_size = 0x1000; -	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, -				      AMDGPU_GEM_DOMAIN_VRAM, -				      &adev->firmware.rbuf, -				      &ring->ring_mem_mc_addr, -				      (void **)&ring->ring_mem); -	if (ret) { -		ring->ring_size = 0; -		return ret; -	} - -	return 0; -} -  static int psp_v11_0_8_ring_stop(struct psp_context *psp,  			       enum psp_ring_type ring_type)  { @@ -194,7 +168,6 @@ static void psp_v11_0_8_ring_set_wptr(struct psp_context *psp, uint32_t value)  }  static const struct psp_funcs psp_v11_0_8_funcs = { -	.ring_init = psp_v11_0_8_ring_init,  	.ring_create = psp_v11_0_8_ring_create,  	.ring_stop = psp_v11_0_8_ring_stop,  	.ring_destroy = psp_v11_0_8_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c index 0b2ac418e4ac..8ed2281b6557 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c @@ -236,34 +236,6 @@ static void psp_v12_0_reroute_ih(struct psp_context *psp)  		     0x80000000, 0x8000FFFF, false);  } -static int psp_v12_0_ring_init(struct psp_context *psp, -			      enum psp_ring_type ring_type) -{ -	int ret = 0; -	struct psp_ring *ring; -	struct amdgpu_device *adev = psp->adev; - -	psp_v12_0_reroute_ih(psp); - -	ring = &psp->km_ring; - -	ring->ring_type = ring_type; - -	/* allocate 4k Page of Local Frame Buffer memory for ring */ -	ring->ring_size = 0x1000; -	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, -				      AMDGPU_GEM_DOMAIN_VRAM, -				      &adev->firmware.rbuf, -				      &ring->ring_mem_mc_addr, -				      (void **)&ring->ring_mem); -	if (ret) { -		ring->ring_size = 0; -		return ret; -	} - -	return 0; -} -  static int psp_v12_0_ring_create(struct psp_context *psp,  				enum psp_ring_type ring_type)  { @@ -272,6 +244,8 @@ static int psp_v12_0_ring_create(struct psp_context *psp,  	struct psp_ring *ring = &psp->km_ring;  	struct amdgpu_device *adev = psp->adev; +	psp_v12_0_reroute_ih(psp); +  	if (amdgpu_sriov_vf(psp->adev)) {  		/* Write low address of the ring to C2PMSG_102 */  		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); @@ -425,7 +399,6 @@ static const struct psp_funcs psp_v12_0_funcs = {  	.init_microcode = psp_v12_0_init_microcode,  	.bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv,  	.bootloader_load_sos = psp_v12_0_bootloader_load_sos, -	.ring_init = psp_v12_0_ring_init,  	.ring_create = psp_v12_0_ring_create,  	.ring_stop = psp_v12_0_ring_stop,  	.ring_destroy = psp_v12_0_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c index 88f9b327183a..e6a26a7e5e5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c @@ -46,6 +46,8 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");  MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");  MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");  MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin"); +MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");  /* For large FW files the time to complete can be very long */  #define USBC_PD_POLLING_LIMIT_S 240 @@ -102,6 +104,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)  	case IP_VERSION(13, 0, 3):  	case IP_VERSION(13, 0, 5):  	case IP_VERSION(13, 0, 8): +	case IP_VERSION(13, 0, 11):  		err = psp_init_toc_microcode(psp, chip_name);  		if (err)  			return err; @@ -268,32 +271,6 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp)  	return ret;  } -static int psp_v13_0_ring_init(struct psp_context *psp, -			      enum psp_ring_type ring_type) -{ -	int ret = 0; -	struct psp_ring *ring; -	struct amdgpu_device *adev = psp->adev; - -	ring = &psp->km_ring; - -	ring->ring_type = ring_type; - -	/* allocate 4k Page of Local Frame Buffer memory for ring */ -	ring->ring_size = 0x1000; -	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, -				      AMDGPU_GEM_DOMAIN_VRAM, -				      &adev->firmware.rbuf, -				      &ring->ring_mem_mc_addr, -				      (void **)&ring->ring_mem); -	if (ret) { -		ring->ring_size = 0; -		return ret; -	} - -	return 0; -} -  static int psp_v13_0_ring_stop(struct psp_context *psp,  			       enum psp_ring_type ring_type)  { @@ -729,7 +706,6 @@ static const struct psp_funcs psp_v13_0_funcs = {  	.bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv,  	.bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv,  	.bootloader_load_sos = psp_v13_0_bootloader_load_sos, -	.ring_init = psp_v13_0_ring_init,  	.ring_create = psp_v13_0_ring_create,  	.ring_stop = psp_v13_0_ring_stop,  	.ring_destroy = psp_v13_0_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c index 321089dfa7db..9d4e24e518e8 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c @@ -199,32 +199,6 @@ static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp)  	return ret;  } -static int psp_v13_0_4_ring_init(struct psp_context *psp, -			      enum psp_ring_type ring_type) -{ -	int ret = 0; -	struct psp_ring *ring; -	struct amdgpu_device *adev = psp->adev; - -	ring = &psp->km_ring; - -	ring->ring_type = ring_type; - -	/* allocate 4k Page of Local Frame Buffer memory for ring */ -	ring->ring_size = 0x1000; -	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, -				      AMDGPU_GEM_DOMAIN_VRAM, -				      &adev->firmware.rbuf, -				      &ring->ring_mem_mc_addr, -				      (void **)&ring->ring_mem); -	if (ret) { -		ring->ring_size = 0; -		return ret; -	} - -	return 0; -} -  static int psp_v13_0_4_ring_stop(struct psp_context *psp,  			       enum psp_ring_type ring_type)  { @@ -373,7 +347,6 @@ static const struct psp_funcs psp_v13_0_4_funcs = {  	.bootloader_load_intf_drv = psp_v13_0_4_bootloader_load_intf_drv,  	.bootloader_load_dbg_drv = psp_v13_0_4_bootloader_load_dbg_drv,  	.bootloader_load_sos = psp_v13_0_4_bootloader_load_sos, -	.ring_init = psp_v13_0_4_ring_init,  	.ring_create = psp_v13_0_4_ring_create,  	.ring_stop = psp_v13_0_4_ring_stop,  	.ring_destroy = psp_v13_0_4_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 01f3bcc62a6c..157147c6c94e 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -160,32 +160,6 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)  	return ret;  } -static int psp_v3_1_ring_init(struct psp_context *psp, -			      enum psp_ring_type ring_type) -{ -	int ret = 0; -	struct psp_ring *ring; -	struct amdgpu_device *adev = psp->adev; - -	ring = &psp->km_ring; - -	ring->ring_type = ring_type; - -	/* allocate 4k Page of Local Frame Buffer memory for ring */ -	ring->ring_size = 0x1000; -	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, -				      AMDGPU_GEM_DOMAIN_VRAM, -				      &adev->firmware.rbuf, -				      &ring->ring_mem_mc_addr, -				      (void **)&ring->ring_mem); -	if (ret) { -		ring->ring_size = 0; -		return ret; -	} - -	return 0; -} -  static void psp_v3_1_reroute_ih(struct psp_context *psp)  {  	struct amdgpu_device *adev = psp->adev; @@ -401,7 +375,6 @@ static const struct psp_funcs psp_v3_1_funcs = {  	.init_microcode = psp_v3_1_init_microcode,  	.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,  	.bootloader_load_sos = psp_v3_1_bootloader_load_sos, -	.ring_init = psp_v3_1_ring_init,  	.ring_create = psp_v3_1_ring_create,  	.ring_stop = psp_v3_1_ring_stop,  	.ring_destroy = psp_v3_1_ring_destroy, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c index da3beb0bf2fa..049c26a45d85 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c @@ -455,6 +455,9 @@ static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable)  		sdma_v6_0_rlc_stop(adev);  	} +	if (amdgpu_sriov_vf(adev)) +		return; +  	for (i = 0; i < adev->sdma.num_instances; i++) {  		f32_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));  		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); @@ -1523,6 +1526,7 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {  	.align_mask = 0xf,  	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),  	.support_64bit_ptrs = true, +	.secure_submission_supported = true,  	.vmhub = AMDGPU_GFXHUB_0,  	.get_rptr = sdma_v6_0_ring_get_rptr,  	.get_wptr = sdma_v6_0_ring_get_wptr, diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index 4d5e718540aa..abca8b529721 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -112,14 +112,12 @@ static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,  static void si_dma_stop(struct amdgpu_device *adev)  { -	struct amdgpu_ring *ring;  	u32 rb_cntl;  	unsigned i;  	amdgpu_sdma_unset_buffer_funcs_helper(adev);  	for (i = 0; i < adev->sdma.num_instances; i++) { -		ring = &adev->sdma.instance[i].ring;  		/* dma0 */  		rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);  		rb_cntl &= ~DMA_RB_ENABLE; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index e3b2b6b4f1a6..7cd17dda32ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -103,10 +103,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode =  /* Vega */  static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =  { -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},  }; @@ -120,10 +120,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode =  /* Raven */  static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =  { -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)}, @@ -138,10 +138,10 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode =  /* Renoir, Arcturus */  static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =  { -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h index 799925d22fc8..2357ff39323f 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15d.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h @@ -162,6 +162,7 @@  		 * 2 - Bypass  		 */  #define     INDIRECT_BUFFER_PRE_ENB(x)		 ((x) << 21) +#define     INDIRECT_BUFFER_PRE_RESUME(x)               ((x) << 30)  #define	PACKET3_COPY_DATA				0x40  #define	PACKET3_PFP_SYNC_ME				0x42  #define	PACKET3_COND_WRITE				0x45 @@ -184,6 +185,7 @@  #define		EOP_TC_ACTION_EN                        (1 << 17) /* L2 */  #define		EOP_TC_NC_ACTION_EN			(1 << 19)  #define		EOP_TC_MD_ACTION_EN			(1 << 21) /* L2 metadata */ +#define		EOP_EXEC				(1 << 28) /* For Trailing Fence */  #define		DATA_SEL(x)                             ((x) << 29)  		/* 0 - discard diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index e08044008186..5562670b7b52 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -43,6 +43,7 @@  #include "soc15.h"  #include "soc15_common.h"  #include "soc21.h" +#include "mxgpu_nv.h"  static const struct amd_ip_funcs soc21_common_ip_funcs; @@ -61,7 +62,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode =  static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array[] =  { -	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)}, @@ -322,8 +323,10 @@ soc21_asic_reset_method(struct amdgpu_device *adev)  	switch (adev->ip_versions[MP1_HWIP][0]) {  	case IP_VERSION(13, 0, 0):  	case IP_VERSION(13, 0, 7): +	case IP_VERSION(13, 0, 10):  		return AMD_RESET_METHOD_MODE1;  	case IP_VERSION(13, 0, 4): +	case IP_VERSION(13, 0, 11):  		return AMD_RESET_METHOD_MODE2;  	default:  		if (amdgpu_dpm_is_baco_supported(adev)) @@ -584,10 +587,6 @@ static int soc21_common_early_init(void *handle)  			AMD_PG_SUPPORT_JPEG |  			AMD_PG_SUPPORT_ATHUB |  			AMD_PG_SUPPORT_MMHUB; -		if (amdgpu_sriov_vf(adev)) { -			adev->cg_flags = 0; -			adev->pg_flags = 0; -		}  		adev->external_rev_id = adev->rev_id + 0x1; // TODO: need update  		break;  	case IP_VERSION(11, 0, 2): @@ -645,28 +644,64 @@ static int soc21_common_early_init(void *handle)  		adev->pg_flags = AMD_PG_SUPPORT_VCN |  			AMD_PG_SUPPORT_VCN_DPG |  			AMD_PG_SUPPORT_JPEG; -		if (amdgpu_sriov_vf(adev)) { -			/* hypervisor control CG and PG enablement */ -			adev->cg_flags = 0; -			adev->pg_flags = 0; -		}  		adev->external_rev_id = adev->rev_id + 0x20;  		break; +	case IP_VERSION(11, 0, 4): +		adev->cg_flags = +			AMD_CG_SUPPORT_GFX_CGCG | +			AMD_CG_SUPPORT_GFX_CGLS | +			AMD_CG_SUPPORT_GFX_MGCG | +			AMD_CG_SUPPORT_GFX_FGCG | +			AMD_CG_SUPPORT_REPEATER_FGCG | +			AMD_CG_SUPPORT_GFX_PERF_CLK | +			AMD_CG_SUPPORT_MC_MGCG | +			AMD_CG_SUPPORT_MC_LS | +			AMD_CG_SUPPORT_HDP_MGCG | +			AMD_CG_SUPPORT_HDP_LS | +			AMD_CG_SUPPORT_ATHUB_MGCG | +			AMD_CG_SUPPORT_ATHUB_LS | +			AMD_CG_SUPPORT_IH_CG | +			AMD_CG_SUPPORT_BIF_MGCG | +			AMD_CG_SUPPORT_BIF_LS | +			AMD_CG_SUPPORT_VCN_MGCG | +			AMD_CG_SUPPORT_JPEG_MGCG; +		adev->pg_flags = AMD_PG_SUPPORT_VCN | +			AMD_PG_SUPPORT_VCN_DPG | +			AMD_PG_SUPPORT_GFX_PG | +			AMD_PG_SUPPORT_JPEG; +		adev->external_rev_id = adev->rev_id + 0x1; +		break; +  	default:  		/* FIXME: not supported yet */  		return -EINVAL;  	} +	if (amdgpu_sriov_vf(adev)) { +		amdgpu_virt_init_setting(adev); +		xgpu_nv_mailbox_set_irq_funcs(adev); +	} +  	return 0;  }  static int soc21_common_late_init(void *handle)  { +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +	if (amdgpu_sriov_vf(adev)) +		xgpu_nv_mailbox_get_irq(adev); +  	return 0;  }  static int soc21_common_sw_init(void *handle)  { +	struct amdgpu_device *adev = (struct amdgpu_device *)handle; + +	if (amdgpu_sriov_vf(adev)) +		xgpu_nv_mailbox_add_irq_id(adev); +  	return 0;  } @@ -704,6 +739,9 @@ static int soc21_common_hw_fini(void *handle)  	/* disable the doorbell aperture */  	soc21_enable_doorbell_aperture(adev, false); +	if (amdgpu_sriov_vf(adev)) +		xgpu_nv_mailbox_put_irq(adev); +  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c index 5d5d031c9e7d..72fd963f178b 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c @@ -187,9 +187,9 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,  	}  } -static void umc_v6_7_convert_error_address(struct amdgpu_device *adev, -					struct ras_err_data *err_data, uint64_t err_addr, -					uint32_t ch_inst, uint32_t umc_inst) +void umc_v6_7_convert_error_address(struct amdgpu_device *adev, +				    struct ras_err_data *err_data, uint64_t err_addr, +				    uint32_t ch_inst, uint32_t umc_inst)  {  	uint32_t channel_index;  	uint64_t soc_pa, retired_page, column; @@ -553,5 +553,4 @@ struct amdgpu_umc_ras umc_v6_7_ras = {  	.query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,  	.ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,  	.ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address, -	.convert_ras_error_address = umc_v6_7_convert_error_address,  }; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h index fe41ed2f5945..105245d5b6e5 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h @@ -71,5 +71,7 @@ extern const uint32_t  	umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];  extern const uint32_t  	umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM]; - +void umc_v6_7_convert_error_address(struct amdgpu_device *adev, +                                    struct ras_err_data *err_data, uint64_t err_addr, +                                    uint32_t ch_inst, uint32_t umc_inst);  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c index 91235df54e22..b7da4528cf0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c @@ -46,6 +46,16 @@ const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = {  };  const uint32_t +	umc_v8_10_channel_idx_tbl_ext0[] +				[UMC_V8_10_UMC_INSTANCE_NUM] +				[UMC_V8_10_CHANNEL_INSTANCE_NUM] = { +	   {{1,   5}, {7,  3}}, +	   {{14, 15}, {13, 12}}, +	   {{10, 11}, {9,  8}}, +	   {{6,   2}, {0,  4}} +	}; + +const uint32_t  	umc_v8_10_channel_idx_tbl[]  				[UMC_V8_10_UMC_INSTANCE_NUM]  				[UMC_V8_10_CHANNEL_INSTANCE_NUM] = { diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h index 849ede88e111..25eaf4af5fcf 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h +++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h @@ -66,5 +66,9 @@ extern const uint32_t  				[UMC_V8_10_UMC_INSTANCE_NUM]  				[UMC_V8_10_CHANNEL_INSTANCE_NUM]; +extern const uint32_t +	umc_v8_10_channel_idx_tbl_ext0[] +				[UMC_V8_10_UMC_INSTANCE_NUM] +				[UMC_V8_10_CHANNEL_INSTANCE_NUM];  #endif diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 375c440957dc..5fe872f4bea7 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -216,8 +216,8 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle  	uint64_t addr;  	int i, r; -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, -					AMDGPU_IB_POOL_DIRECT, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, +				     AMDGPU_IB_POOL_DIRECT, &job);  	if (r)  		return r; @@ -280,8 +280,8 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,  	uint64_t addr;  	int i, r; -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, -					AMDGPU_IB_POOL_DIRECT, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, +				     AMDGPU_IB_POOL_DIRECT, &job);  	if (r)  		return r; diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index e668b3baa8c6..e407be6cb63c 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -213,7 +213,7 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)   *   * Open up a stream for HW test   */ -static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, +static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle,  				       struct amdgpu_bo *bo,  				       struct dma_fence **fence)  { @@ -224,8 +224,8 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle  	uint64_t addr;  	int i, r; -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, -					AMDGPU_IB_POOL_DIRECT, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, +				     AMDGPU_IB_POOL_DIRECT, &job);  	if (r)  		return r; @@ -276,7 +276,7 @@ err:   *   * Close up a stream for HW test or if userspace failed to do so   */ -static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, +static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle,  					struct amdgpu_bo *bo,  					struct dma_fence **fence)  { @@ -287,8 +287,8 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl  	uint64_t addr;  	int i, r; -	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, -					AMDGPU_IB_POOL_DIRECT, &job); +	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4, +				     AMDGPU_IB_POOL_DIRECT, &job);  	if (r)  		return r; diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c index 8a7006d62a87..ec87b00f2e05 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c @@ -770,6 +770,33 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)  	}  } +static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx, +				bool indirect) +{ +	uint32_t tmp; + +	if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(2, 6, 0)) +		return; + +	tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK | +	      VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK | +	      VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK | +	      VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK; +	WREG32_SOC15_DPG_MODE(inst_idx, +			      SOC15_DPG_MODE_OFFSET(VCN, 0, mmVCN_RAS_CNTL), +			      tmp, 0, indirect); + +	tmp = UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; +	WREG32_SOC15_DPG_MODE(inst_idx, +			      SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_VCPU_INT_EN), +			      tmp, 0, indirect); + +	tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; +	WREG32_SOC15_DPG_MODE(inst_idx, +			      SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_SYS_INT_EN), +			      tmp, 0, indirect); +} +  static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)  {  	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr; @@ -849,6 +876,8 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo  	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(  		VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect); +	vcn_v2_6_enable_ras(adev, inst_idx, indirect); +  	/* unblock VCPU register access */  	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(  		VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect); @@ -2002,16 +2031,5 @@ static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev)  		break;  	} -	if (adev->vcn.ras) { -		amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block); - -		strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn"); -		adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN; -		adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON; -		adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm; - -		/* If don't define special ras_late_init function, use default ras_late_init */ -		if (!adev->vcn.ras->ras_block.ras_late_init) -			adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init; -	} +	amdgpu_vcn_set_ras_funcs(adev);  } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index dcc49b01bd59..1e2b22299975 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -31,6 +31,7 @@  #include "soc15_hw_ip.h"  #include "vcn_v2_0.h"  #include "mmsch_v4_0.h" +#include "vcn_v4_0.h"  #include "vcn/vcn_4_0_0_offset.h"  #include "vcn/vcn_4_0_0_sh_mask.h" @@ -64,6 +65,7 @@ static int vcn_v4_0_set_powergating_state(void *handle,  static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev,          int inst_idx, struct dpg_pause_state *new_state);  static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring); +static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev);  /**   * vcn_v4_0_early_init - set function pointers @@ -84,6 +86,7 @@ static int vcn_v4_0_early_init(void *handle)  	vcn_v4_0_set_unified_ring_funcs(adev);  	vcn_v4_0_set_irq_funcs(adev); +	vcn_v4_0_set_ras_funcs(adev);  	return 0;  } @@ -125,6 +128,12 @@ static int vcn_v4_0_sw_init(void *handle)  		if (r)  			return r; +		/* VCN POISON TRAP */ +		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i], +				VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq); +		if (r) +			return r; +  		ring = &adev->vcn.inst[i].ring_enc[0];  		ring->use_doorbell = true;  		if (amdgpu_sriov_vf(adev)) @@ -289,6 +298,7 @@ static int vcn_v4_0_hw_fini(void *handle)  			}  		} +		amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0);  	}  	return 0; @@ -852,6 +862,28 @@ static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)  	return;  } +static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx, +				bool indirect) +{ +	uint32_t tmp; + +	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN)) +		return; + +	tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK | +	      VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK | +	      VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK | +	      VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK; +	WREG32_SOC15_DPG_MODE(inst_idx, +			      SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL), +			      tmp, 0, indirect); + +	tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK; +	WREG32_SOC15_DPG_MODE(inst_idx, +			      SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN), +			      tmp, 0, indirect); +} +  /**   * vcn_v4_0_start_dpg_mode - VCN start with dpg mode   * @@ -940,6 +972,8 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo  	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(  		VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect); +	vcn_v4_0_enable_ras(adev, inst_idx, indirect); +  	/* enable master interrupt */  	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(  		VCN, inst_idx, regUVD_MASTINT_EN), @@ -1932,6 +1966,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_  	case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:  		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);  		break; +	case VCN_4_0__SRCID_UVD_POISON: +		amdgpu_vcn_process_poison_irq(adev, source, entry); +		break;  	default:  		DRM_ERROR("Unhandled interrupt: %d %d\n",  			  entry->src_id, entry->src_data[0]); @@ -1994,3 +2031,60 @@ const struct amdgpu_ip_block_version vcn_v4_0_ip_block =  	.rev = 0,  	.funcs = &vcn_v4_0_ip_funcs,  }; + +static uint32_t vcn_v4_0_query_poison_by_instance(struct amdgpu_device *adev, +			uint32_t instance, uint32_t sub_block) +{ +	uint32_t poison_stat = 0, reg_value = 0; + +	switch (sub_block) { +	case AMDGPU_VCN_V4_0_VCPU_VCODEC: +		reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS); +		poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF); +		break; +	default: +		break; +	} + +	if (poison_stat) +		dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n", +			instance, sub_block); + +	return poison_stat; +} + +static bool vcn_v4_0_query_ras_poison_status(struct amdgpu_device *adev) +{ +	uint32_t inst, sub; +	uint32_t poison_stat = 0; + +	for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++) +		for (sub = 0; sub < AMDGPU_VCN_V4_0_MAX_SUB_BLOCK; sub++) +			poison_stat += +				vcn_v4_0_query_poison_by_instance(adev, inst, sub); + +	return !!poison_stat; +} + +const struct amdgpu_ras_block_hw_ops vcn_v4_0_ras_hw_ops = { +	.query_poison_status = vcn_v4_0_query_ras_poison_status, +}; + +static struct amdgpu_vcn_ras vcn_v4_0_ras = { +	.ras_block = { +		.hw_ops = &vcn_v4_0_ras_hw_ops, +	}, +}; + +static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev) +{ +	switch (adev->ip_versions[VCN_HWIP][0]) { +	case IP_VERSION(4, 0, 0): +		adev->vcn.ras = &vcn_v4_0_ras; +		break; +	default: +		break; +	} + +	amdgpu_vcn_set_ras_funcs(adev); +} diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h index 7c5c9d91bb52..7d3d11f40f27 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h @@ -24,6 +24,12 @@  #ifndef __VCN_V4_0_H__  #define __VCN_V4_0_H__ +enum amdgpu_vcn_v4_0_sub_block { +	AMDGPU_VCN_V4_0_VCPU_VCODEC = 0, + +	AMDGPU_VCN_V4_0_MAX_SUB_BLOCK, +}; +  extern const struct amdgpu_ip_block_version vcn_v4_0_ip_block;  #endif /* __VCN_V4_0_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c index 59dfca093155..1706081d054d 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -267,7 +267,7 @@ static void vega20_ih_reroute_ih(struct amdgpu_device *adev)  	/* vega20 ih reroute will go through psp this  	 * function is used for newer asics starting arcturus  	 */ -	if (adev->asic_type >= CHIP_ARCTURUS) { +	if (adev->ip_versions[OSSSYS_HWIP][0] >= IP_VERSION(4, 2, 1)) {  		/* Reroute to IH ring 1 for VMC */  		WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x12);  		tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA); @@ -308,7 +308,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)  	adev->nbio.funcs->ih_control(adev); -	if (adev->asic_type == CHIP_ARCTURUS && +	if ((adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 2, 1)) &&  	    adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {  		ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);  		if (adev->irq.ih.use_bus_addr) { @@ -321,7 +321,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)  	/* psp firmware won't program IH_CHICKEN for aldebaran  	 * driver needs to program it properly according to  	 * MC_SPACE type in IH_RB_CNTL */ -	if (adev->asic_type == CHIP_ALDEBARAN) { +	if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0)) {  		ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN);  		if (adev->irq.ih.use_bus_addr) {  			ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index f6ffd7c96ff9..12ef782eb478 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -2111,6 +2111,8 @@ void vi_set_virt_ops(struct amdgpu_device *adev)  int vi_set_ip_blocks(struct amdgpu_device *adev)  { +	amdgpu_device_set_sriov_virtual_display(adev); +  	switch (adev->asic_type) {  	case CHIP_TOPAZ:  		/* topaz has no DCE, UVD, VCE */ @@ -2130,7 +2132,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)  		amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);  		amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);  		amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); -		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) +		if (adev->enable_virtual_display)  			amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);  #if defined(CONFIG_DRM_AMD_DC)  		else if (amdgpu_device_has_dc_support(adev)) @@ -2150,7 +2152,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)  		amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);  		amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);  		amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); -		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) +		if (adev->enable_virtual_display)  			amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);  #if defined(CONFIG_DRM_AMD_DC)  		else if (amdgpu_device_has_dc_support(adev)) | 
