summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKai-Heng Feng <kai.heng.feng@canonical.com>2022-08-26 08:25:30 +0800
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2022-09-20 11:51:31 +0200
commit101a952b47c1ba2b27ea1e2a91c240a6ceb49c45 (patch)
treec38a04b738e3a288d3d2f1b99222d4a9455ee155
parenta5e410be468ea8be6e3df7698fc52b51a507aa4b (diff)
tg3: Disable tg3 device on system reboot to avoid triggering AER
[ Upstream commit 2ca1c94ce0b65a2ce7512b718f3d8a0fe6224bca ] Commit d60cd06331a3 ("PM: ACPI: reboot: Use S5 for reboot") caused a reboot hang on one Dell servers so the commit was reverted. Someone managed to collect the AER log and it's caused by MSI: [ 148.762067] ACPI: Preparing to enter system sleep state S5 [ 148.794638] {1}[Hardware Error]: Hardware error from APEI Generic Hardware Error Source: 5 [ 148.803731] {1}[Hardware Error]: event severity: recoverable [ 148.810191] {1}[Hardware Error]: Error 0, type: fatal [ 148.816088] {1}[Hardware Error]: section_type: PCIe error [ 148.822391] {1}[Hardware Error]: port_type: 0, PCIe end point [ 148.829026] {1}[Hardware Error]: version: 3.0 [ 148.834266] {1}[Hardware Error]: command: 0x0006, status: 0x0010 [ 148.841140] {1}[Hardware Error]: device_id: 0000:04:00.0 [ 148.847309] {1}[Hardware Error]: slot: 0 [ 148.852077] {1}[Hardware Error]: secondary_bus: 0x00 [ 148.857876] {1}[Hardware Error]: vendor_id: 0x14e4, device_id: 0x165f [ 148.865145] {1}[Hardware Error]: class_code: 020000 [ 148.870845] {1}[Hardware Error]: aer_uncor_status: 0x00100000, aer_uncor_mask: 0x00010000 [ 148.879842] {1}[Hardware Error]: aer_uncor_severity: 0x000ef030 [ 148.886575] {1}[Hardware Error]: TLP Header: 40000001 0000030f 90028090 00000000 [ 148.894823] tg3 0000:04:00.0: AER: aer_status: 0x00100000, aer_mask: 0x00010000 [ 148.902795] tg3 0000:04:00.0: AER: [20] UnsupReq (First) [ 148.910234] tg3 0000:04:00.0: AER: aer_layer=Transaction Layer, aer_agent=Requester ID [ 148.918806] tg3 0000:04:00.0: AER: aer_uncor_severity: 0x000ef030 [ 148.925558] tg3 0000:04:00.0: AER: TLP Header: 40000001 0000030f 90028090 00000000 The MSI is probably raised by incoming packets, so power down the device and disable bus mastering to stop the traffic, as user confirmed this approach works. In addition to that, be extra safe and cancel reset task if it's running. Cc: Josef Bacik <josef@toxicpanda.com> Link: https://lore.kernel.org/all/b8db79e6857c41dab4ef08bdf826ea7c47e3bafc.1615947283.git.josef@toxicpanda.com/ BugLink: https://bugs.launchpad.net/bugs/1917471 Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com> Reviewed-by: Michael Chan <michael.chan@broadcom.com> Link: https://lore.kernel.org/r/20220826002530.1153296-1-kai.heng.feng@canonical.com Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Sasha Levin <sashal@kernel.org>
-rw-r--r--drivers/net/ethernet/broadcom/tg3.c8
1 files changed, 6 insertions, 2 deletions
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 480179ddc45b..3279a6e48f3b 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -18157,16 +18157,20 @@ static void tg3_shutdown(struct pci_dev *pdev)
struct net_device *dev = pci_get_drvdata(pdev);
struct tg3 *tp = netdev_priv(dev);
+ tg3_reset_task_cancel(tp);
+
rtnl_lock();
+
netif_device_detach(dev);
if (netif_running(dev))
dev_close(dev);
- if (system_state == SYSTEM_POWER_OFF)
- tg3_power_down(tp);
+ tg3_power_down(tp);
rtnl_unlock();
+
+ pci_disable_device(pdev);
}
/**