diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 113 | 
1 files changed, 81 insertions, 32 deletions
| diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 321032d3a51a..c241317edee7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -26,12 +26,12 @@  #include <drm/drm_drv.h>  #include <drm/drm_gem.h>  #include <drm/drm_vblank.h> +#include <drm/drm_managed.h>  #include "amdgpu_drv.h"  #include <drm/drm_pciids.h>  #include <linux/console.h>  #include <linux/module.h> -#include <linux/pci.h>  #include <linux/pm_runtime.h>  #include <linux/vga_switcheroo.h>  #include <drm/drm_probe_helper.h> @@ -88,9 +88,10 @@   * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness   * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC   * - 3.39.0 - DMABUF implicit sync does a full pipeline sync + * - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ   */  #define KMS_DRIVER_MAJOR	3 -#define KMS_DRIVER_MINOR	39 +#define KMS_DRIVER_MINOR	40  #define KMS_DRIVER_PATCHLEVEL	0  int amdgpu_vram_limit = 0; @@ -146,16 +147,18 @@ int amdgpu_async_gfx_ring = 1;  int amdgpu_mcbp = 0;  int amdgpu_discovery = -1;  int amdgpu_mes = 0; -int amdgpu_noretry; +int amdgpu_noretry = -1;  int amdgpu_force_asic_type = -1;  int amdgpu_tmz = 0;  int amdgpu_reset_method = -1; /* auto */ +int amdgpu_num_kcq = -1;  struct amdgpu_mgpu_info mgpu_info = {  	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),  };  int amdgpu_ras_enable = -1;  uint amdgpu_ras_mask = 0xffffffff; +int amdgpu_bad_page_threshold = -1;  /**   * DOC: vramlimit (int) @@ -393,12 +396,12 @@ MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default  module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);  /** - * DOC: ppfeaturemask (uint) + * DOC: ppfeaturemask (hexint)   * Override power features enabled. See enum PP_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.   * The default is the current set of stable power features.   */  MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))"); -module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444); +module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, hexint, 0444);  /**   * DOC: forcelongtraining (uint) @@ -593,8 +596,13 @@ MODULE_PARM_DESC(mes,  	"Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");  module_param_named(mes, amdgpu_mes, int, 0444); +/** + * DOC: noretry (int) + * Disable retry faults in the GPU memory controller. + * (0 = retry enabled, 1 = retry disabled, -1 auto (default)) + */  MODULE_PARM_DESC(noretry, -	"Disable retry faults (0 = retry enabled (default), 1 = retry disabled)"); +	"Disable retry faults (0 = retry enabled, 1 = retry disabled, -1 auto (default))");  module_param_named(noretry, amdgpu_noretry, int, 0644);  /** @@ -676,11 +684,14 @@ MODULE_PARM_DESC(debug_largebar,   * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT   * table to get information about AMD APUs. This option can serve as a workaround on   * systems with a broken CRAT table. + * + * Default is auto (according to asic type, iommu_v2, and crat table, to decide + * whehter use CRAT)   */  int ignore_crat;  module_param(ignore_crat, int, 0444);  MODULE_PARM_DESC(ignore_crat, -	"Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); +	"Ignore CRAT table during KFD initialization (0 = auto (default), 1 = ignore CRAT)");  /**   * DOC: halt_if_hws_hang (int) @@ -715,6 +726,15 @@ MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1  bool debug_evictions;  module_param(debug_evictions, bool, 0644);  MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)"); + +/** + * DOC: no_system_mem_limit(bool) + * Disable system memory limit, to support multiple process shared memory + */ +bool no_system_mem_limit; +module_param(no_system_mem_limit, bool, 0644); +MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)"); +  #endif  /** @@ -765,6 +785,19 @@ module_param_named(tmz, amdgpu_tmz, int, 0444);  MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)");  module_param_named(reset_method, amdgpu_reset_method, int, 0444); +/** + * DOC: bad_page_threshold (int) + * Bad page threshold is to specify the threshold value of faulty pages + * detected by RAS ECC, that may result in GPU entering bad status if total + * faulty pages by ECC exceed threshold value and leave it for user's further + * check. + */ +MODULE_PARM_DESC(bad_page_threshold, "Bad page threshold(-1 = auto(default value), 0 = disable bad page retirement)"); +module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444); + +MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)"); +module_param_named(num_kcq, amdgpu_num_kcq, int, 0444); +  static const struct pci_device_id pciidlist[] = {  #ifdef  CONFIG_DRM_AMDGPU_SI  	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -1065,7 +1098,7 @@ static struct drm_driver kms_driver;  static int amdgpu_pci_probe(struct pci_dev *pdev,  			    const struct pci_device_id *ent)  { -	struct drm_device *dev; +	struct drm_device *ddev;  	struct amdgpu_device *adev;  	unsigned long flags = ent->driver_data;  	int ret, retry = 0; @@ -1081,6 +1114,16 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,  		return -ENODEV;  	} +	/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping, +	 * however, SME requires an indirect IOMMU mapping because the encryption +	 * bit is beyond the DMA mask of the chip. +	 */ +	if (mem_encrypt_active() && ((flags & AMD_ASIC_MASK) == CHIP_RAVEN)) { +		dev_info(&pdev->dev, +			 "SME is not compatible with RAVEN\n"); +		return -ENOTSUPP; +	} +  #ifdef CONFIG_DRM_AMDGPU_SI  	if (!amdgpu_si_support) {  		switch (flags & AMD_ASIC_MASK) { @@ -1121,36 +1164,39 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,  	if (ret)  		return ret; -	dev = drm_dev_alloc(&kms_driver, &pdev->dev); -	if (IS_ERR(dev)) -		return PTR_ERR(dev); +	adev = devm_drm_dev_alloc(&pdev->dev, &kms_driver, typeof(*adev), ddev); +	if (IS_ERR(adev)) +		return PTR_ERR(adev); + +	adev->dev  = &pdev->dev; +	adev->pdev = pdev; +	ddev = adev_to_drm(adev);  	if (!supports_atomic) -		dev->driver_features &= ~DRIVER_ATOMIC; +		ddev->driver_features &= ~DRIVER_ATOMIC;  	ret = pci_enable_device(pdev);  	if (ret) -		goto err_free; - -	dev->pdev = pdev; +		return ret; -	pci_set_drvdata(pdev, dev); +	ddev->pdev = pdev; +	pci_set_drvdata(pdev, ddev); -	ret = amdgpu_driver_load_kms(dev, ent->driver_data); +	ret = amdgpu_driver_load_kms(adev, ent->driver_data);  	if (ret)  		goto err_pci;  retry_init: -	ret = drm_dev_register(dev, ent->driver_data); +	ret = drm_dev_register(ddev, ent->driver_data);  	if (ret == -EAGAIN && ++retry <= 3) {  		DRM_INFO("retry init %d\n", retry);  		/* Don't request EX mode too frequently which is attacking */  		msleep(5000);  		goto retry_init; -	} else if (ret) +	} else if (ret) {  		goto err_pci; +	} -	adev = dev->dev_private;  	ret = amdgpu_debugfs_init(adev);  	if (ret)  		DRM_ERROR("Creating debugfs files failed (%d).\n", ret); @@ -1159,8 +1205,6 @@ retry_init:  err_pci:  	pci_disable_device(pdev); -err_free: -	drm_dev_put(dev);  	return ret;  } @@ -1177,14 +1221,13 @@ amdgpu_pci_remove(struct pci_dev *pdev)  	amdgpu_driver_unload_kms(dev);  	pci_disable_device(pdev);  	pci_set_drvdata(pdev, NULL); -	drm_dev_put(dev);  }  static void  amdgpu_pci_shutdown(struct pci_dev *pdev)  {  	struct drm_device *dev = pci_get_drvdata(pdev); -	struct amdgpu_device *adev = dev->dev_private; +	struct amdgpu_device *adev = drm_to_adev(dev);  	if (amdgpu_ras_intr_triggered())  		return; @@ -1217,7 +1260,7 @@ static int amdgpu_pmops_resume(struct device *dev)  static int amdgpu_pmops_freeze(struct device *dev)  {  	struct drm_device *drm_dev = dev_get_drvdata(dev); -	struct amdgpu_device *adev = drm_dev->dev_private; +	struct amdgpu_device *adev = drm_to_adev(drm_dev);  	int r;  	adev->in_hibernate = true; @@ -1253,7 +1296,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)  {  	struct pci_dev *pdev = to_pci_dev(dev);  	struct drm_device *drm_dev = pci_get_drvdata(pdev); -	struct amdgpu_device *adev = drm_dev->dev_private; +	struct amdgpu_device *adev = drm_to_adev(drm_dev);  	int ret, i;  	if (!adev->runpm) { @@ -1287,7 +1330,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)  		if (amdgpu_is_atpx_hybrid()) {  			pci_ignore_hotplug(pdev);  		} else { -			pci_save_state(pdev); +			amdgpu_device_cache_pci_state(pdev);  			pci_disable_device(pdev);  			pci_ignore_hotplug(pdev);  			pci_set_power_state(pdev, PCI_D3cold); @@ -1304,7 +1347,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)  {  	struct pci_dev *pdev = to_pci_dev(dev);  	struct drm_device *drm_dev = pci_get_drvdata(pdev); -	struct amdgpu_device *adev = drm_dev->dev_private; +	struct amdgpu_device *adev = drm_to_adev(drm_dev);  	int ret;  	if (!adev->runpm) @@ -1320,7 +1363,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)  			pci_set_master(pdev);  		} else {  			pci_set_power_state(pdev, PCI_D0); -			pci_restore_state(pdev); +			amdgpu_device_load_pci_state(pdev);  			ret = pci_enable_device(pdev);  			if (ret)  				return ret; @@ -1340,7 +1383,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)  static int amdgpu_pmops_runtime_idle(struct device *dev)  {  	struct drm_device *drm_dev = dev_get_drvdata(dev); -	struct amdgpu_device *adev = drm_dev->dev_private; +	struct amdgpu_device *adev = drm_to_adev(drm_dev);  	/* we don't want the main rpm_idle to call suspend - we want to autosuspend */  	int ret = 1; @@ -1499,6 +1542,13 @@ static struct drm_driver kms_driver = {  	.patchlevel = KMS_DRIVER_PATCHLEVEL,  }; +static struct pci_error_handlers amdgpu_pci_err_handler = { +	.error_detected	= amdgpu_pci_error_detected, +	.mmio_enabled	= amdgpu_pci_mmio_enabled, +	.slot_reset	= amdgpu_pci_slot_reset, +	.resume		= amdgpu_pci_resume, +}; +  static struct pci_driver amdgpu_kms_pci_driver = {  	.name = DRIVER_NAME,  	.id_table = pciidlist, @@ -1506,10 +1556,9 @@ static struct pci_driver amdgpu_kms_pci_driver = {  	.remove = amdgpu_pci_remove,  	.shutdown = amdgpu_pci_shutdown,  	.driver.pm = &amdgpu_pm_ops, +	.err_handler = &amdgpu_pci_err_handler,  }; - -  static int __init amdgpu_init(void)  {  	int r; | 
