summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-28 10:01:40 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-28 10:01:40 -0700
commit68a32ba14177d4a21c4a9a941cf1d7aea86d436f (patch)
tree945c20860766c22b19d1806d5b5db5b37bc65b65 /drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
parent3aa139aa9fdc138a84243dc49dc18d9b40e1c6e4 (diff)
parenta1a1ca70deb3ec600eeabb21de7f3f48aaae5695 (diff)
Merge tag 'drm-next-2021-04-28' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "The usual lots of work all over the place. i915 has gotten some Alderlake work and prelim DG1 code, along with a major locking rework over the GEM code, and brings back the property of timing out long running jobs using a watchdog. amdgpu has some Alderbran support (new GPU), freesync HDMI support along with a lot other fixes. Outside of the drm, there is a new printf specifier added which should have all the correct acks/sobs: - printk fourcc modifier support added %p4cc Summary: core: - drm_crtc_commit_wait - atomic plane state helpers reworked for full state - dma-buf heaps API rework - edid: rework and improvements for displayid dp-mst: - better topology logging bridge: - Chipone ICN6211 - Lontium LT8912B - anx7625 regulator support panel: - fix lt9611 4k panels handling simple-kms: - add plane state helpers ttm: - debugfs support - removal of unused sysfs - ignore signaled moved fences - ioremap buffer according to mem caching i915: - Alderlake S enablement - Conversion to dma_resv_locking - Bring back watchdog timeout support - legacy ioctl cleanups - add GEM TDDO and RFC process - DG1 LMEM preparation work - intel_display.c refactoring - Gen9/TGL PCH combination support - eDP MSO Support - multiple PSR instance support - Link training debug updates - Disable PSR2 support on JSL/EHL - DDR5/LPDDR5 support for bw calcs - LSPCON limited to gen9/10 platforms - HSW/BDW async flip/VTd corruption workaround - SAGV watermark fixes - SNB hard hang on ring resume fix - Limit imported dma-buf size - move to use new tasklet API - refactor KBL/TGL/ADL-S display/gt steppings - refactoring legacy DP/HDMI, FB plane code out amdgpu: - uapi: add ioctl to query video capabilities - Iniital AMD Freesync HDMI support - Initial Adebaran support - 10bpc dithering improvements - DCN secure display support - Drop legacy IO BAR requirements - PCIE/S0ix/RAS/Prime/Reset fixes - Display ASSR support - SMU gfx busy queues for RV/PCO - Initial LTTPR display work amdkfd: - MMU notifier fixes - APU fixes radeon: - debugfs cleanps - fw error handling ifix - Flexible array cleanups msm: - big DSI phy/pll cleanup - sc7280 initial support - commong bandwidth scaling path - shrinker locking contention fixes - unpin/swap support for GEM objcets ast: - cursor plane handling reworked tegra: - don't register DP AUX channels before connectors zynqmp: - fix OOB struct padding memset gma500: - drop ttm and medfield support exynos: - request_irq cleanup function mediatek: - fine tune line time for EOTp - MT8192 dpi support - atomic crtc config updates - don't support HDMI connector creation mxsdb: - imx8mm support panfrost: - MMU IRQ handling rework qxl: - locking fixes - resource deallocation changes sun4i: - add alpha properties to UI/VI layers vc4: - RPi4 CEC support vmwgfx: - doc cleanups arc: - moved to drm/tiny" * tag 'drm-next-2021-04-28' of git://anongit.freedesktop.org/drm/drm: (1390 commits) drm/ttm: Don't count pages in SG BOs against pages_limit drm/ttm: fix return value check drm/bridge: lt8912b: fix incorrect handling of of_* return values drm: bridge: fix LONTIUM use of mipi_dsi_() functions drm: bridge: fix ANX7625 use of mipi_dsi_() functions drm/amdgpu: page retire over debugfs mechanism drm/radeon: Fix a missing check bug in radeon_dp_mst_detect() drm/amd/display: Fix the Wunused-function warning drm/radeon/r600: Fix variables that are not used after assignment drm/amdgpu/smu7: fix CAC setting on TOPAZ drm/amd/display: Update DCN302 SR Exit Latency drm/amdgpu: enable ras eeprom on aldebaran drm/amdgpu: RAS harvest on driver load drm/amdgpu: add ras aldebaran ras eeprom driver drm/amd/pm: increase time out value when sending msg to SMU drm/amdgpu: add DMUB outbox event IRQ source define/complete/debug flag drm/amd/pm: add the callback to get vbios bootup values for vangogh drm/radeon: Fix size overflow drm/amdgpu: Fix size overflow drm/amdgpu: move mmhub ras_func init to ip specific file ...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c193
1 files changed, 172 insertions, 21 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index e92e7dea71da1..d8f131ed10cb5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -36,6 +36,7 @@
#include <linux/vga_switcheroo.h>
#include <drm/drm_probe_helper.h>
#include <linux/mmu_notifier.h>
+#include <linux/suspend.h>
#include "amdgpu.h"
#include "amdgpu_irq.h"
@@ -45,6 +46,8 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_ras.h"
+#include "amdgpu_xgmi.h"
+#include "amdgpu_reset.h"
/*
* KMS wrapper.
@@ -90,9 +93,10 @@
* - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
* - 3.39.0 - DMABUF implicit sync does a full pipeline sync
* - 3.40.0 - Add AMDGPU_IDS_FLAGS_TMZ
+ * - 3.41.0 - Add video codec query
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 40
+#define KMS_DRIVER_MINOR 41
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit;
@@ -145,6 +149,7 @@ int amdgpu_compute_multipipe = -1;
int amdgpu_gpu_recovery = -1; /* auto */
int amdgpu_emu_mode;
uint amdgpu_smu_memory_pool_size;
+int amdgpu_smu_pptable_id = -1;
/*
* FBC (bit 0) disabled by default
* MULTI_MON_PP_MCLK_SWITCH (bit 1) enabled by default
@@ -162,16 +167,26 @@ int amdgpu_discovery = -1;
int amdgpu_mes;
int amdgpu_noretry = -1;
int amdgpu_force_asic_type = -1;
-int amdgpu_tmz;
+int amdgpu_tmz = -1; /* auto */
+uint amdgpu_freesync_vid_mode;
int amdgpu_reset_method = -1; /* auto */
int amdgpu_num_kcq = -1;
+static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
+
struct amdgpu_mgpu_info mgpu_info = {
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
+ .delayed_reset_work = __DELAYED_WORK_INITIALIZER(
+ mgpu_info.delayed_reset_work,
+ amdgpu_drv_delayed_reset_work_handler, 0),
};
int amdgpu_ras_enable = -1;
uint amdgpu_ras_mask = 0xffffffff;
int amdgpu_bad_page_threshold = -1;
+struct amdgpu_watchdog_timer amdgpu_watchdog_timer = {
+ .timeout_fatal_disable = false,
+ .period = 0x23, /* default to max. timeout = 1 << 0x23 cycles */
+};
/**
* DOC: vramlimit (int)
@@ -502,7 +517,7 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
* DOC: gpu_recovery (int)
* Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
*/
-MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (2 = advanced tdr mode, 1 = enable, 0 = disable, -1 = auto)");
module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
/**
@@ -528,6 +543,20 @@ MODULE_PARM_DESC(ras_mask, "Mask of RAS features to enable (default 0xffffffff),
module_param_named(ras_mask, amdgpu_ras_mask, uint, 0444);
/**
+ * DOC: timeout_fatal_disable (bool)
+ * Disable Watchdog timeout fatal error event
+ */
+MODULE_PARM_DESC(timeout_fatal_disable, "disable watchdog timeout fatal error (false = default)");
+module_param_named(timeout_fatal_disable, amdgpu_watchdog_timer.timeout_fatal_disable, bool, 0644);
+
+/**
+ * DOC: timeout_period (uint)
+ * Modify the watchdog timeout max_cycles as (1 << period)
+ */
+MODULE_PARM_DESC(timeout_period, "watchdog timeout period (1 to 0x23(default), timeout maxCycles = (1 << period)");
+module_param_named(timeout_period, amdgpu_watchdog_timer.period, uint, 0644);
+
+/**
* DOC: si_support (int)
* Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled,
* set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
@@ -748,6 +777,13 @@ bool no_system_mem_limit;
module_param(no_system_mem_limit, bool, 0644);
MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = default)");
+/**
+ * DOC: no_queue_eviction_on_vm_fault (int)
+ * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction).
+ */
+int amdgpu_no_queue_eviction_on_vm_fault = 0;
+MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)");
+module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444);
#endif
/**
@@ -792,10 +828,21 @@ module_param_named(backlight, amdgpu_backlight, bint, 0444);
*
* The default value: 0 (off). TODO: change to auto till it is completed.
*/
-MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)");
+MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto (default), 0 = off, 1 = on)");
module_param_named(tmz, amdgpu_tmz, int, 0444);
/**
+ * DOC: freesync_video (uint)
+ * Enabled the optimization to adjust front porch timing to achieve seamless mode change experience
+ * when setting a freesync supported mode for which full modeset is not needed.
+ * The default value: 0 (off).
+ */
+MODULE_PARM_DESC(
+ freesync_video,
+ "Enable freesync modesetting optimization feature (0 = off (default), 1 = on)");
+module_param_named(freesync_video, amdgpu_freesync_vid_mode, uint, 0444);
+
+/**
* DOC: reset_method (int)
* GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco, 5 = pci)
*/
@@ -815,6 +862,15 @@ module_param_named(bad_page_threshold, amdgpu_bad_page_threshold, int, 0444);
MODULE_PARM_DESC(num_kcq, "number of kernel compute queue user want to setup (8 if set to greater than 8 or less than 0, only affect gfx 8+)");
module_param_named(num_kcq, amdgpu_num_kcq, int, 0444);
+/**
+ * DOC: smu_pptable_id (int)
+ * Used to override pptable id. id = 0 use VBIOS pptable.
+ * id > 0 use the soft pptable with specicfied id.
+ */
+MODULE_PARM_DESC(smu_pptable_id,
+ "specify pptable id to be used (-1 = auto(default) value, 0 = use pptable from vbios, > 0 = soft pptable id)");
+module_param_named(smu_pptable_id, amdgpu_smu_pptable_id, int, 0444);
+
static const struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_SI
{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@@ -1125,6 +1181,11 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x73E2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
{0x1002, 0x73FF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_DIMGREY_CAVEFISH},
+ /* Aldebaran */
+ {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
+
{0, 0, 0}
};
@@ -1279,6 +1340,98 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
adev->mp1_state = PP_MP1_STATE_NONE;
}
+/**
+ * amdgpu_drv_delayed_reset_work_handler - work handler for reset
+ *
+ * @work: work_struct.
+ */
+static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work)
+{
+ struct list_head device_list;
+ struct amdgpu_device *adev;
+ int i, r;
+ struct amdgpu_reset_context reset_context;
+
+ memset(&reset_context, 0, sizeof(reset_context));
+
+ mutex_lock(&mgpu_info.mutex);
+ if (mgpu_info.pending_reset == true) {
+ mutex_unlock(&mgpu_info.mutex);
+ return;
+ }
+ mgpu_info.pending_reset = true;
+ mutex_unlock(&mgpu_info.mutex);
+
+ /* Use a common context, just need to make sure full reset is done */
+ reset_context.method = AMD_RESET_METHOD_NONE;
+ set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+ for (i = 0; i < mgpu_info.num_dgpu; i++) {
+ adev = mgpu_info.gpu_ins[i].adev;
+ reset_context.reset_req_dev = adev;
+ r = amdgpu_device_pre_asic_reset(adev, &reset_context);
+ if (r) {
+ dev_err(adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
+ r, adev_to_drm(adev)->unique);
+ }
+ if (!queue_work(system_unbound_wq, &adev->xgmi_reset_work))
+ r = -EALREADY;
+ }
+ for (i = 0; i < mgpu_info.num_dgpu; i++) {
+ adev = mgpu_info.gpu_ins[i].adev;
+ flush_work(&adev->xgmi_reset_work);
+ adev->gmc.xgmi.pending_reset = false;
+ }
+
+ /* reset function will rebuild the xgmi hive info , clear it now */
+ for (i = 0; i < mgpu_info.num_dgpu; i++)
+ amdgpu_xgmi_remove_device(mgpu_info.gpu_ins[i].adev);
+
+ INIT_LIST_HEAD(&device_list);
+
+ for (i = 0; i < mgpu_info.num_dgpu; i++)
+ list_add_tail(&mgpu_info.gpu_ins[i].adev->reset_list, &device_list);
+
+ /* unregister the GPU first, reset function will add them back */
+ list_for_each_entry(adev, &device_list, reset_list)
+ amdgpu_unregister_gpu_instance(adev);
+
+ /* Use a common context, just need to make sure full reset is done */
+ set_bit(AMDGPU_SKIP_HW_RESET, &reset_context.flags);
+ r = amdgpu_do_asic_reset(&device_list, &reset_context);
+
+ if (r) {
+ DRM_ERROR("reinit gpus failure");
+ return;
+ }
+ for (i = 0; i < mgpu_info.num_dgpu; i++) {
+ adev = mgpu_info.gpu_ins[i].adev;
+ if (!adev->kfd.init_complete)
+ amdgpu_amdkfd_device_init(adev);
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+ }
+ return;
+}
+
+static int amdgpu_pmops_prepare(struct device *dev)
+{
+ struct drm_device *drm_dev = dev_get_drvdata(dev);
+
+ /* Return a positive number here so
+ * DPM_FLAG_SMART_SUSPEND works properly
+ */
+ if (amdgpu_device_supports_boco(drm_dev))
+ return pm_runtime_suspended(dev) &&
+ pm_suspend_via_firmware();
+
+ return 0;
+}
+
+static void amdgpu_pmops_complete(struct device *dev)
+{
+ /* nothing to do */
+}
+
static int amdgpu_pmops_suspend(struct device *dev)
{
struct drm_device *drm_dev = dev_get_drvdata(dev);
@@ -1364,7 +1517,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
}
adev->in_runpm = true;
- if (amdgpu_device_supports_atpx(drm_dev))
+ if (amdgpu_device_supports_px(drm_dev))
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
ret = amdgpu_device_suspend(drm_dev, false);
@@ -1373,16 +1526,14 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
return ret;
}
- if (amdgpu_device_supports_atpx(drm_dev)) {
+ if (amdgpu_device_supports_px(drm_dev)) {
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
- if (!amdgpu_is_atpx_hybrid()) {
- amdgpu_device_cache_pci_state(pdev);
- pci_disable_device(pdev);
- pci_ignore_hotplug(pdev);
- pci_set_power_state(pdev, PCI_D3cold);
- }
+ amdgpu_device_cache_pci_state(pdev);
+ pci_disable_device(pdev);
+ pci_ignore_hotplug(pdev);
+ pci_set_power_state(pdev, PCI_D3cold);
drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;
} else if (amdgpu_device_supports_baco(drm_dev)) {
amdgpu_device_baco_enter(drm_dev);
@@ -1401,19 +1552,17 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
if (!adev->runpm)
return -EINVAL;
- if (amdgpu_device_supports_atpx(drm_dev)) {
+ if (amdgpu_device_supports_px(drm_dev)) {
drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
/* Only need to handle PCI state in the driver for ATPX
* PCI core handles it for _PR3.
*/
- if (!amdgpu_is_atpx_hybrid()) {
- pci_set_power_state(pdev, PCI_D0);
- amdgpu_device_load_pci_state(pdev);
- ret = pci_enable_device(pdev);
- if (ret)
- return ret;
- }
+ pci_set_power_state(pdev, PCI_D0);
+ amdgpu_device_load_pci_state(pdev);
+ ret = pci_enable_device(pdev);
+ if (ret)
+ return ret;
pci_set_master(pdev);
} else if (amdgpu_device_supports_boco(drm_dev)) {
/* Only need to handle PCI state in the driver for ATPX
@@ -1424,7 +1573,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
amdgpu_device_baco_exit(drm_dev);
}
ret = amdgpu_device_resume(drm_dev, false);
- if (amdgpu_device_supports_atpx(drm_dev))
+ if (amdgpu_device_supports_px(drm_dev))
drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
adev->in_runpm = false;
return 0;
@@ -1505,6 +1654,8 @@ out:
}
static const struct dev_pm_ops amdgpu_pm_ops = {
+ .prepare = amdgpu_pmops_prepare,
+ .complete = amdgpu_pmops_complete,
.suspend = amdgpu_pmops_suspend,
.resume = amdgpu_pmops_resume,
.freeze = amdgpu_pmops_freeze,