diff options
132 files changed, 1891 insertions, 3588 deletions
diff --git a/Documentation/arch/s390/driver-model.rst b/Documentation/arch/s390/driver-model.rst index ad4bc2dbea435..ad18f129fb0bc 100644 --- a/Documentation/arch/s390/driver-model.rst +++ b/Documentation/arch/s390/driver-model.rst @@ -244,7 +244,7 @@ information about the interrupt from the irb parameter. -------------------- The ccwgroup mechanism is designed to handle devices consisting of multiple ccw -devices, like lcs or ctc. +devices, like qeth or ctc. The ccw driver provides a 'group' attribute. Piping bus ids of ccw devices to this attributes creates a ccwgroup device consisting of these ccw devices (if diff --git a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml index b2adc71741770..dca16e202da99 100644 --- a/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml +++ b/Documentation/devicetree/bindings/interrupt-controller/microchip,lan966x-oic.yaml @@ -14,9 +14,8 @@ allOf: description: | The Microchip LAN966x outband interrupt controller (OIC) maps the internal - interrupt sources of the LAN966x device to an external interrupt. - When the LAN966x device is used as a PCI device, the external interrupt is - routed to the PCI interrupt. + interrupt sources of the LAN966x device to a PCI interrupt when the LAN966x + device is used as a PCI device. properties: compatible: diff --git a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml index 9bcbacb6640db..55d6a8379025b 100644 --- a/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml +++ b/Documentation/devicetree/bindings/net/faraday,ftgmac100.yaml @@ -44,6 +44,9 @@ properties: phy-mode: enum: - rgmii + - rgmii-id + - rgmii-rxid + - rgmii-txid - rmii phy-handle: true diff --git a/Documentation/netlink/genetlink-c.yaml b/Documentation/netlink/genetlink-c.yaml index 9660ffb1ed6ac..44f2226160cab 100644 --- a/Documentation/netlink/genetlink-c.yaml +++ b/Documentation/netlink/genetlink-c.yaml @@ -14,9 +14,10 @@ $defs: pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: - # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + # literal int, const name, or limit based on fixed-width type + # e.g. u8-min, u16-max, etc. type: [ string, integer ] - pattern: ^[su](8|16|32|64)-(min|max)$ + pattern: ^[0-9A-Za-z_-]+$ minimum: 0 # Schema for specs diff --git a/Documentation/netlink/genetlink-legacy.yaml b/Documentation/netlink/genetlink-legacy.yaml index 16380e12cabe7..ed64acf1bef77 100644 --- a/Documentation/netlink/genetlink-legacy.yaml +++ b/Documentation/netlink/genetlink-legacy.yaml @@ -14,9 +14,10 @@ $defs: pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: - # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + # literal int, const name, or limit based on fixed-width type + # e.g. u8-min, u16-max, etc. type: [ string, integer ] - pattern: ^[su](8|16|32|64)-(min|max)$ + pattern: ^[0-9A-Za-z_-]+$ minimum: 0 # Schema for specs diff --git a/Documentation/netlink/genetlink.yaml b/Documentation/netlink/genetlink.yaml index b036227b46f1b..e43e50dba2e48 100644 --- a/Documentation/netlink/genetlink.yaml +++ b/Documentation/netlink/genetlink.yaml @@ -14,9 +14,10 @@ $defs: pattern: ^[0-9A-Za-z_-]+( - 1)?$ minimum: 0 len-or-limit: - # literal int or limit based on fixed-width type e.g. u8-min, u16-max, etc. + # literal int, const name, or limit based on fixed-width type + # e.g. u8-min, u16-max, etc. type: [ string, integer ] - pattern: ^[su](8|16|32|64)-(min|max)$ + pattern: ^[0-9A-Za-z_-]+$ minimum: 0 # Schema for specs diff --git a/MAINTAINERS b/MAINTAINERS index 896a307fa0654..873aa2cce4d7f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16462,6 +16462,22 @@ F: include/net/dsa.h F: net/dsa/ F: tools/testing/selftests/drivers/net/dsa/ +NETWORKING [ETHTOOL] +M: Andrew Lunn <andrew@lunn.ch> +M: Jakub Kicinski <kuba@kernel.org> +F: Documentation/netlink/specs/ethtool.yaml +F: Documentation/networking/ethtool-netlink.rst +F: include/linux/ethtool* +F: include/uapi/linux/ethtool* +F: net/ethtool/ +F: tools/testing/selftests/drivers/net/*/ethtool* + +NETWORKING [ETHTOOL CABLE TEST] +M: Andrew Lunn <andrew@lunn.ch> +F: net/ethtool/cabletest.c +F: tools/testing/selftests/drivers/net/*/ethtool* +K: cable_test + NETWORKING [GENERAL] M: "David S. Miller" <davem@davemloft.net> M: Eric Dumazet <edumazet@google.com> @@ -16621,6 +16637,7 @@ F: tools/testing/selftests/net/mptcp/ NETWORKING [TCP] M: Eric Dumazet <edumazet@google.com> M: Neal Cardwell <ncardwell@google.com> +R: Kuniyuki Iwashima <kuniyu@amazon.com> L: netdev@vger.kernel.org S: Maintained F: Documentation/networking/net_cachelines/tcp_sock.rst @@ -16648,6 +16665,31 @@ F: include/net/tls.h F: include/uapi/linux/tls.h F: net/tls/* +NETWORKING [SOCKETS] +M: Eric Dumazet <edumazet@google.com> +M: Kuniyuki Iwashima <kuniyu@amazon.com> +M: Paolo Abeni <pabeni@redhat.com> +M: Willem de Bruijn <willemb@google.com> +S: Maintained +F: include/linux/sock_diag.h +F: include/linux/socket.h +F: include/linux/sockptr.h +F: include/net/sock.h +F: include/net/sock_reuseport.h +F: include/uapi/linux/socket.h +F: net/core/*sock* +F: net/core/scm.c +F: net/socket.c + +NETWORKING [UNIX SOCKETS] +M: Kuniyuki Iwashima <kuniyu@amazon.com> +S: Maintained +F: include/net/af_unix.h +F: include/net/netns/unix.h +F: include/uapi/linux/unix_diag.h +F: net/unix/ +F: tools/testing/selftests/net/af_unix/ + NETXEN (1/10) GbE SUPPORT M: Manish Chopra <manishc@marvell.com> M: Rahul Verma <rahulv@marvell.com> @@ -17713,6 +17755,7 @@ L: netdev@vger.kernel.org L: dev@openvswitch.org S: Maintained W: http://openvswitch.org +F: Documentation/networking/openvswitch.rst F: include/uapi/linux/openvswitch.h F: net/openvswitch/ F: tools/testing/selftests/net/openvswitch/ diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index d9e705f4a697e..bde6a496df5f8 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -54,7 +54,6 @@ enum interruption_class { IRQIO_C70, IRQIO_TAP, IRQIO_VMR, - IRQIO_LCS, IRQIO_CTC, IRQIO_ADM, IRQIO_CSC, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index ef7be599e1f78..7ca157ffab30e 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -84,7 +84,6 @@ static const struct irq_class irqclass_sub_desc[] = { {.irq = IRQIO_C70, .name = "C70", .desc = "[I/O] 3270"}, {.irq = IRQIO_TAP, .name = "TAP", .desc = "[I/O] Tape"}, {.irq = IRQIO_VMR, .name = "VMR", .desc = "[I/O] Unit Record Devices"}, - {.irq = IRQIO_LCS, .name = "LCS", .desc = "[I/O] LCS"}, {.irq = IRQIO_CTC, .name = "CTC", .desc = "[I/O] CTC"}, {.irq = IRQIO_ADM, .name = "ADM", .desc = "[I/O] EADM Subchannel"}, {.irq = IRQIO_CSC, .name = "CSC", .desc = "[I/O] CHSC Subchannel"}, diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index be063bfb50c4b..c11b9965c4ad9 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -169,6 +169,7 @@ config IXP4XX_IRQ config LAN966X_OIC tristate "Microchip LAN966x OIC Support" + depends on MCHP_LAN966X_PCI || COMPILE_TEST select GENERIC_IRQ_CHIP select IRQ_DOMAIN help diff --git a/drivers/irqchip/irq-apple-aic.c b/drivers/irqchip/irq-apple-aic.c index da5250f0155cf..2b1684c60e3ca 100644 --- a/drivers/irqchip/irq-apple-aic.c +++ b/drivers/irqchip/irq-apple-aic.c @@ -577,7 +577,8 @@ static void __exception_irq_entry aic_handle_fiq(struct pt_regs *regs) AIC_FIQ_HWIRQ(AIC_TMR_EL02_VIRT)); } - if (read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & PMCR0_IACT) { + if ((read_sysreg_s(SYS_IMP_APL_PMCR0_EL1) & (PMCR0_IMODE | PMCR0_IACT)) == + (FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ) | PMCR0_IACT)) { int irq; if (cpumask_test_cpu(smp_processor_id(), &aic_irqc->fiq_aff[AIC_CPU_PMU_P]->aff)) diff --git a/drivers/irqchip/irq-mvebu-icu.c b/drivers/irqchip/irq-mvebu-icu.c index b337f6c05f184..4eebed39880a5 100644 --- a/drivers/irqchip/irq-mvebu-icu.c +++ b/drivers/irqchip/irq-mvebu-icu.c @@ -68,7 +68,8 @@ static int mvebu_icu_translate(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *hwirq, unsigned int *type) { unsigned int param_count = static_branch_unlikely(&legacy_bindings) ? 3 : 2; - struct mvebu_icu_msi_data *msi_data = d->host_data; + struct msi_domain_info *info = d->host_data; + struct mvebu_icu_msi_data *msi_data = info->chip_data; struct mvebu_icu *icu = msi_data->icu; /* Check the count of the parameters in dt */ diff --git a/drivers/irqchip/irq-riscv-imsic-early.c b/drivers/irqchip/irq-riscv-imsic-early.c index c5c2e6929a2f5..275df50057057 100644 --- a/drivers/irqchip/irq-riscv-imsic-early.c +++ b/drivers/irqchip/irq-riscv-imsic-early.c @@ -27,7 +27,7 @@ static void imsic_ipi_send(unsigned int cpu) { struct imsic_local_config *local = per_cpu_ptr(imsic->global.local, cpu); - writel_relaxed(IMSIC_IPI_ID, local->msi_va); + writel(IMSIC_IPI_ID, local->msi_va); } static void imsic_ipi_starting_cpu(void) diff --git a/drivers/irqchip/irq-thead-c900-aclint-sswi.c b/drivers/irqchip/irq-thead-c900-aclint-sswi.c index b0e366ade4271..8ff6e7a1363bd 100644 --- a/drivers/irqchip/irq-thead-c900-aclint-sswi.c +++ b/drivers/irqchip/irq-thead-c900-aclint-sswi.c @@ -31,7 +31,7 @@ static DEFINE_PER_CPU(void __iomem *, sswi_cpu_regs); static void thead_aclint_sswi_ipi_send(unsigned int cpu) { - writel_relaxed(0x1, per_cpu(sswi_cpu_regs, cpu)); + writel(0x1, per_cpu(sswi_cpu_regs, cpu)); } static void thead_aclint_sswi_ipi_clear(void) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index e45bba240cbcd..f6d0628a36d9f 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -432,9 +432,6 @@ static struct net_device *bond_ipsec_dev(struct xfrm_state *xs) struct bonding *bond; struct slave *slave; - if (!bond_dev) - return NULL; - bond = netdev_priv(bond_dev); if (BOND_MODE(bond) != BOND_MODE_ACTIVEBACKUP) return NULL; diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index fe0e3e2a81171..71e50fc65c147 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -1441,7 +1441,9 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down) aq_ptp_ring_free(self); aq_ptp_free(self); - if (likely(self->aq_fw_ops->deinit) && link_down) { + /* May be invoked during hot unplug. */ + if (pci_device_is_present(self->pdev) && + likely(self->aq_fw_ops->deinit) && link_down) { mutex_lock(&self->fwreq_mutex); self->aq_fw_ops->deinit(self->aq_hw); mutex_unlock(&self->fwreq_mutex); diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h index f5901f8e39077..f6b990b7f5b47 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h @@ -226,7 +226,6 @@ struct __packed offload_info { struct offload_port_info ports; struct offload_ka_info kas; struct offload_rr_info rrs; - u8 buf[]; }; struct __packed hw_atl_utils_fw_rpc { diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c index 0715ea5bf13ed..3b082114f2e53 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c @@ -41,9 +41,12 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct bcmgenet_priv *priv = netdev_priv(dev); struct device *kdev = &priv->pdev->dev; + u32 phy_wolopts = 0; - if (dev->phydev) + if (dev->phydev) { phy_ethtool_get_wol(dev->phydev, wol); + phy_wolopts = wol->wolopts; + } /* MAC is not wake-up capable, return what the PHY does */ if (!device_can_wakeup(kdev)) @@ -51,9 +54,14 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) /* Overlay MAC capabilities with that of the PHY queried before */ wol->supported |= WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER; - wol->wolopts = priv->wolopts; - memset(wol->sopass, 0, sizeof(wol->sopass)); + wol->wolopts |= priv->wolopts; + /* Return the PHY configured magic password */ + if (phy_wolopts & WAKE_MAGICSECURE) + return; + + /* Otherwise the MAC one */ + memset(wol->sopass, 0, sizeof(wol->sopass)); if (wol->wolopts & WAKE_MAGICSECURE) memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass)); } @@ -70,7 +78,7 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) /* Try Wake-on-LAN from the PHY first */ if (dev->phydev) { ret = phy_ethtool_set_wol(dev->phydev, wol); - if (ret != -EOPNOTSUPP) + if (ret != -EOPNOTSUPP && wol->wolopts) return ret; } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 1c94bf1db7186..d9d675f1ebfe9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -55,6 +55,7 @@ #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/crc32poly.h> +#include <linux/dmi.h> #include <net/checksum.h> #include <net/gso.h> @@ -18212,6 +18213,50 @@ unlock: static SIMPLE_DEV_PM_OPS(tg3_pm_ops, tg3_suspend, tg3_resume); +/* Systems where ACPI _PTS (Prepare To Sleep) S5 will result in a fatal + * PCIe AER event on the tg3 device if the tg3 device is not, or cannot + * be, powered down. + */ +static const struct dmi_system_id tg3_restart_aer_quirk_table[] = { + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R440"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R540"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R640"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R650"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R740"), + }, + }, + { + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."), + DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge R750"), + }, + }, + {} +}; + static void tg3_shutdown(struct pci_dev *pdev) { struct net_device *dev = pci_get_drvdata(pdev); @@ -18228,6 +18273,19 @@ static void tg3_shutdown(struct pci_dev *pdev) if (system_state == SYSTEM_POWER_OFF) tg3_power_down(tp); + else if (system_state == SYSTEM_RESTART && + dmi_first_match(tg3_restart_aer_quirk_table) && + pdev->current_state != PCI_D3cold && + pdev->current_state != PCI_UNKNOWN) { + /* Disable PCIe AER on the tg3 to avoid a fatal + * error during this system restart. + */ + pcie_capability_clear_word(pdev, PCI_EXP_DEVCTL, + PCI_EXP_DEVCTL_CERE | + PCI_EXP_DEVCTL_NFERE | + PCI_EXP_DEVCTL_FERE | + PCI_EXP_DEVCTL_URRE); + } rtnl_unlock(); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 6b6cb73482d7f..1753bb87dfbd9 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -1433,22 +1433,6 @@ int octeon_wait_for_ddr_init(struct octeon_device *oct, u32 *timeout) } EXPORT_SYMBOL_GPL(octeon_wait_for_ddr_init); -/* Get the octeon id assigned to the octeon device passed as argument. - * This function is exported to other modules. - * @param dev - octeon device pointer passed as a void *. - * @return octeon device id - */ -int lio_get_device_id(void *dev) -{ - struct octeon_device *octeon_dev = (struct octeon_device *)dev; - u32 i; - - for (i = 0; i < MAX_OCTEON_DEVICES; i++) - if (octeon_device[i] == octeon_dev) - return octeon_dev->octeon_id; - return -1; -} - void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq) { u64 instr_cnt; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index d26364c2ac81c..19344b21f8fb9 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -705,13 +705,6 @@ octeon_get_dispatch(struct octeon_device *octeon_dev, u16 opcode, */ struct octeon_device *lio_get_device(u32 octeon_id); -/** Get the octeon id assigned to the octeon device passed as argument. - * This function is exported to other modules. - * @param dev - octeon device pointer passed as a void *. - * @return octeon device id - */ -int lio_get_device_id(void *dev); - /** Read windowed register. * @param oct - pointer to the Octeon device. * @param addr - Address of the register to read. diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index c7c2c15a18155..95e6f015a6af0 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1211,9 +1211,6 @@ struct adapter { struct timer_list flower_stats_timer; struct work_struct flower_stats_work; - /* Ethtool Dump */ - struct ethtool_dump eth_dump; - /* HMA */ struct hma_data hma; @@ -1233,6 +1230,10 @@ struct adapter { /* Ethtool n-tuple */ struct cxgb4_ethtool_filter *ethtool_filters; + + /* Ethtool Dump */ + /* Must be last - ends in a flex-array member. */ + struct ethtool_dump eth_dump; }; /* Support for "sched-class" command to allow a TX Scheduling Class to be diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink.c b/drivers/net/ethernet/intel/ice/devlink/devlink.c index d116e2b10bcea..dbdb83567364c 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink.c @@ -981,6 +981,9 @@ static int ice_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv /* preallocate memory for ice_sched_node */ node = devm_kzalloc(ice_hw_to_dev(pi->hw), sizeof(*node), GFP_KERNEL); + if (!node) + return -ENOMEM; + *priv = node; return 0; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 5d2d7736fd5f1..9c9ea4c1b93b7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -527,15 +527,14 @@ err: * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action - * @rx_buf: Rx buffer to store the XDP action * @eop_desc: Last descriptor in packet to read metadata from * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ -static void +static u32 ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, - struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc) + union ice_32b_rx_flex_desc *eop_desc) { unsigned int ret = ICE_XDP_PASS; u32 act; @@ -574,7 +573,7 @@ out_failure: ret = ICE_XDP_CONSUMED; } exit: - ice_set_rx_bufs_act(xdp, rx_ring, ret); + return ret; } /** @@ -860,10 +859,8 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, xdp_buff_set_frags_flag(xdp); } - if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) { - ice_set_rx_bufs_act(xdp, rx_ring, ICE_XDP_CONSUMED); + if (unlikely(sinfo->nr_frags == MAX_SKB_FRAGS)) return -ENOMEM; - } __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, rx_buf->page_offset, size); @@ -924,7 +921,6 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[ntc]; - rx_buf->pgcnt = page_count(rx_buf->page); prefetchw(rx_buf->page); if (!size) @@ -941,6 +937,31 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, } /** + * ice_get_pgcnts - grab page_count() for gathered fragments + * @rx_ring: Rx descriptor ring to store the page counts on + * + * This function is intended to be called right before running XDP + * program so that the page recycling mechanism will be able to take + * a correct decision regarding underlying pages; this is done in such + * way as XDP program can change the refcount of page + */ +static void ice_get_pgcnts(struct ice_rx_ring *rx_ring) +{ + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; + struct ice_rx_buf *rx_buf; + u32 cnt = rx_ring->count; + + for (int i = 0; i < nr_frags; i++) { + rx_buf = &rx_ring->rx_buf[idx]; + rx_buf->pgcnt = page_count(rx_buf->page); + + if (++idx == cnt) + idx = 0; + } +} + +/** * ice_build_skb - Build skb around an existing buffer * @rx_ring: Rx descriptor ring to transact packets on * @xdp: xdp_buff pointing to the data @@ -1051,12 +1072,12 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) rx_buf->page_offset + headlen, size, xdp->frame_sz); } else { - /* buffer is unused, change the act that should be taken later - * on; data was copied onto skb's linear part so there's no + /* buffer is unused, restore biased page count in Rx buffer; + * data was copied onto skb's linear part so there's no * need for adjusting page offset and we can reuse this buffer * as-is */ - rx_buf->act = ICE_SKB_CONSUMED; + rx_buf->pagecnt_bias++; } if (unlikely(xdp_buff_has_frags(xdp))) { @@ -1104,6 +1125,65 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) } /** + * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags + * @rx_ring: Rx ring with all the auxiliary data + * @xdp: XDP buffer carrying linear + frags part + * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage + * @ntc: a current next_to_clean value to be stored at rx_ring + * @verdict: return code from XDP program execution + * + * Walk through gathered fragments and satisfy internal page + * recycle mechanism; we take here an action related to verdict + * returned by XDP program; + */ +static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, + u32 *xdp_xmit, u32 ntc, u32 verdict) +{ + u32 nr_frags = rx_ring->nr_frags + 1; + u32 idx = rx_ring->first_desc; + u32 cnt = rx_ring->count; + u32 post_xdp_frags = 1; + struct ice_rx_buf *buf; + int i; + + if (unlikely(xdp_buff_has_frags(xdp))) + post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags; + + for (i = 0; i < post_xdp_frags; i++) { + buf = &rx_ring->rx_buf[idx]; + + if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + *xdp_xmit |= verdict; + } else if (verdict & ICE_XDP_CONSUMED) { + buf->pagecnt_bias++; + } else if (verdict == ICE_XDP_PASS) { + ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); + } + + ice_put_rx_buf(rx_ring, buf); + + if (++idx == cnt) + idx = 0; + } + /* handle buffers that represented frags released by XDP prog; + * for these we keep pagecnt_bias as-is; refcount from struct page + * has been decremented within XDP prog and we do not have to increase + * the biased refcnt + */ + for (; i < nr_frags; i++) { + buf = &rx_ring->rx_buf[idx]; + ice_put_rx_buf(rx_ring, buf); + if (++idx == cnt) + idx = 0; + } + + xdp->data = NULL; + rx_ring->first_desc = ntc; + rx_ring->nr_frags = 0; +} + +/** * ice_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf * @rx_ring: Rx descriptor ring to transact packets on * @budget: Total limit on number of packets to process @@ -1120,15 +1200,13 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int total_rx_bytes = 0, total_rx_pkts = 0; unsigned int offset = rx_ring->rx_offset; struct xdp_buff *xdp = &rx_ring->xdp; - u32 cached_ntc = rx_ring->first_desc; struct ice_tx_ring *xdp_ring = NULL; struct bpf_prog *xdp_prog = NULL; u32 ntc = rx_ring->next_to_clean; + u32 cached_ntu, xdp_verdict; u32 cnt = rx_ring->count; u32 xdp_xmit = 0; - u32 cached_ntu; bool failure; - u32 first; xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (xdp_prog) { @@ -1190,6 +1268,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { + ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED); break; } if (++ntc == cnt) @@ -1199,15 +1278,15 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; - ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc); - if (rx_buf->act == ICE_XDP_PASS) + ice_get_pgcnts(rx_ring); + xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc); + if (xdp_verdict == ICE_XDP_PASS) goto construct_skb; total_rx_bytes += xdp_get_buff_len(xdp); total_rx_pkts++; - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + continue; construct_skb: if (likely(ice_ring_uses_build_skb(rx_ring))) @@ -1217,18 +1296,12 @@ construct_skb: /* exit if we failed to retrieve a buffer */ if (!skb) { rx_ring->ring_stats->rx_stats.alloc_page_failed++; - rx_buf->act = ICE_XDP_CONSUMED; - if (unlikely(xdp_buff_has_frags(xdp))) - ice_set_rx_bufs_act(xdp, rx_ring, - ICE_XDP_CONSUMED); - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; - break; + xdp_verdict = ICE_XDP_CONSUMED; } - xdp->data = NULL; - rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; + ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + + if (!skb) + break; stat_err_bits = BIT(ICE_RX_FLEX_DESC_STATUS0_RXE_S); if (unlikely(ice_test_staterr(rx_desc->wb.status_error0, @@ -1257,23 +1330,6 @@ construct_skb: total_rx_pkts++; } - first = rx_ring->first_desc; - while (cached_ntc != first) { - struct ice_rx_buf *buf = &rx_ring->rx_buf[cached_ntc]; - - if (buf->act & (ICE_XDP_TX | ICE_XDP_REDIR)) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - xdp_xmit |= buf->act; - } else if (buf->act & ICE_XDP_CONSUMED) { - buf->pagecnt_bias++; - } else if (buf->act == ICE_XDP_PASS) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - } - - ice_put_rx_buf(rx_ring, buf); - if (++cached_ntc >= cnt) - cached_ntc = 0; - } rx_ring->next_to_clean = ntc; /* return up to cleaned_count buffers to hardware */ failure = ice_alloc_rx_bufs(rx_ring, ICE_RX_DESC_UNUSED(rx_ring)); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index cb347c852ba9e..806bce701df34 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -201,7 +201,6 @@ struct ice_rx_buf { struct page *page; unsigned int page_offset; unsigned int pgcnt; - unsigned int act; unsigned int pagecnt_bias; }; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 79f960c6680d1..6cf32b4041275 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -6,49 +6,6 @@ #include "ice.h" /** - * ice_set_rx_bufs_act - propagate Rx buffer action to frags - * @xdp: XDP buffer representing frame (linear and frags part) - * @rx_ring: Rx ring struct - * act: action to store onto Rx buffers related to XDP buffer parts - * - * Set action that should be taken before putting Rx buffer from first frag - * to the last. - */ -static inline void -ice_set_rx_bufs_act(struct xdp_buff *xdp, const struct ice_rx_ring *rx_ring, - const unsigned int act) -{ - u32 sinfo_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; - u32 nr_frags = rx_ring->nr_frags + 1; - u32 idx = rx_ring->first_desc; - u32 cnt = rx_ring->count; - struct ice_rx_buf *buf; - - for (int i = 0; i < nr_frags; i++) { - buf = &rx_ring->rx_buf[idx]; - buf->act = act; - - if (++idx == cnt) - idx = 0; - } - - /* adjust pagecnt_bias on frags freed by XDP prog */ - if (sinfo_frags < rx_ring->nr_frags && act == ICE_XDP_CONSUMED) { - u32 delta = rx_ring->nr_frags - sinfo_frags; - - while (delta) { - if (idx == 0) - idx = cnt - 1; - else - idx--; - buf = &rx_ring->rx_buf[idx]; - buf->pagecnt_bias--; - delta--; - } - } -} - -/** * ice_test_staterr - tests bits in Rx descriptor status and error fields * @status_err_n: Rx descriptor status_error0 or status_error1 bits * @stat_err_bits: value to mask diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c index b330020dc0d67..598df63518c55 100644 --- a/drivers/net/ethernet/mellanox/mlx4/alloc.c +++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c @@ -526,28 +526,6 @@ out: return res; } -u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, u32 uid, u32 obj, u32 count) -{ - struct mlx4_zone_entry *zone; - int res = 0; - - spin_lock(&zones->lock); - - zone = __mlx4_find_zone_by_uid(zones, uid); - - if (NULL == zone) { - res = -1; - goto out; - } - - __mlx4_free_from_zone(zone, obj, count); - -out: - spin_unlock(&zones->lock); - - return res; -} - u32 mlx4_zone_free_entries_unique(struct mlx4_zone_allocator *zones, u32 obj, u32 count) { struct mlx4_zone_entry *zone; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index d7d856d1758a3..b213094ea30f3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -1478,12 +1478,6 @@ void mlx4_zone_allocator_destroy(struct mlx4_zone_allocator *zone_alloc); u32 mlx4_zone_alloc_entries(struct mlx4_zone_allocator *zones, u32 uid, int count, int align, u32 skip_mask, u32 *puid); -/* Free <count> objects, start from <obj> of the uid <uid> from zone_allocator - * <zones>. - */ -u32 mlx4_zone_free_entries(struct mlx4_zone_allocator *zones, - u32 uid, u32 obj, u32 count); - /* If <zones> was allocated with MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP, instead of * specifying the uid when freeing an object, zone allocator could figure it by * itself. Other parameters are similar to mlx4_zone_free. diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index 4e43f4a7d2466..e3d0b13c1610e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -147,26 +147,6 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port, return err; } -int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx) -{ - struct mlx4_port_info *info = &mlx4_priv(dev)->port[port]; - struct mlx4_mac_table *table = &info->mac_table; - int i; - - for (i = 0; i < MLX4_MAX_MAC_NUM; i++) { - if (!table->refs[i]) - continue; - - if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) { - *idx = i; - return 0; - } - } - - return -ENOENT; -} -EXPORT_SYMBOL_GPL(mlx4_find_cached_mac); - static bool mlx4_need_mf_bond(struct mlx4_dev *dev) { int i, num_eth_ports = 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 5f6a0605e4ae8..f62fbfb67a1ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -296,11 +296,16 @@ enum mlx5e_fec_supported_link_mode { MLX5E_FEC_SUPPORTED_LINK_MODE_200G_2X, MLX5E_FEC_SUPPORTED_LINK_MODE_400G_4X, MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X, + MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X, + MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X, + MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X, + MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X, MLX5E_MAX_FEC_SUPPORTED_LINK_MODE, }; #define MLX5E_FEC_FIRST_50G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_50G_1X #define MLX5E_FEC_FIRST_100G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_100G_1X +#define MLX5E_FEC_FIRST_200G_PER_LANE_MODE MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X #define MLX5E_FEC_OVERRIDE_ADMIN_POLICY(buf, policy, write, link) \ do { \ @@ -320,8 +325,10 @@ static bool mlx5e_is_fec_supported_link_mode(struct mlx5_core_dev *dev, return link_mode < MLX5E_FEC_FIRST_50G_PER_LANE_MODE || (link_mode < MLX5E_FEC_FIRST_100G_PER_LANE_MODE && MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm)) || - (link_mode >= MLX5E_FEC_FIRST_100G_PER_LANE_MODE && - MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)); + (link_mode < MLX5E_FEC_FIRST_200G_PER_LANE_MODE && + MLX5_CAP_PCAM_FEATURE(dev, fec_100G_per_lane_in_pplm)) || + (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE && + MLX5_CAP_PCAM_FEATURE(dev, fec_200G_per_lane_in_pplm)); } /* get/set FEC admin field for a given speed */ @@ -368,6 +375,18 @@ static int mlx5e_fec_admin_field(u32 *pplm, u16 *fec_policy, bool write, case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X: MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_8x); break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 200g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 400g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 800g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X: + MLX5E_FEC_OVERRIDE_ADMIN_POLICY(pplm, *fec_policy, write, 1600g_8x); + break; default: return -EINVAL; } @@ -421,6 +440,18 @@ static int mlx5e_get_fec_cap_field(u32 *pplm, u16 *fec_cap, case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_8X: *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_8x); break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_200G_1X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 200g_1x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_400G_2X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 400g_2x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_800G_4X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 800g_4x); + break; + case MLX5E_FEC_SUPPORTED_LINK_MODE_1600G_8X: + *fec_cap = MLX5E_GET_FEC_OVERRIDE_CAP(pplm, 1600g_8x); + break; default: return -EINVAL; } @@ -494,6 +525,26 @@ out: return 0; } +static u16 mlx5e_remap_fec_conf_mode(enum mlx5e_fec_supported_link_mode link_mode, + u16 conf_fec) +{ + /* RS fec in ethtool is originally mapped to MLX5E_FEC_RS_528_514. + * For link modes up to 25G per lane, the value is kept. + * For 50G or 100G per lane, it's remapped to MLX5E_FEC_RS_544_514. + * For 200G per lane, remapped to MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD. + */ + if (conf_fec != BIT(MLX5E_FEC_RS_528_514)) + return conf_fec; + + if (link_mode >= MLX5E_FEC_FIRST_200G_PER_LANE_MODE) + return BIT(MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD); + + if (link_mode >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) + return BIT(MLX5E_FEC_RS_544_514); + + return conf_fec; +} + int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) { bool fec_50g_per_lane = MLX5_CAP_PCAM_FEATURE(dev, fec_50G_per_lane_in_pplm); @@ -530,14 +581,7 @@ int mlx5e_set_fec_mode(struct mlx5_core_dev *dev, u16 fec_policy) if (!mlx5e_is_fec_supported_link_mode(dev, i)) break; - /* RS fec in ethtool is mapped to MLX5E_FEC_RS_528_514 - * to link modes up to 25G per lane and to - * MLX5E_FEC_RS_544_514 in the new link modes based on - * 50G or 100G per lane - */ - if (conf_fec == (1 << MLX5E_FEC_RS_528_514) && - i >= MLX5E_FEC_FIRST_50G_PER_LANE_MODE) - conf_fec = (1 << MLX5E_FEC_RS_544_514); + conf_fec = mlx5e_remap_fec_conf_mode(i, conf_fec); mlx5e_get_fec_cap_field(out, &fec_caps, i); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index d1da225f35dad..fa2283dd383b0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -61,6 +61,7 @@ enum { MLX5E_FEC_NOFEC, MLX5E_FEC_FIRECODE, MLX5E_FEC_RS_528_514, + MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD = 4, MLX5E_FEC_RS_544_514 = 7, MLX5E_FEC_LLRS_272_257_1 = 9, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index afd654583b6b1..131ed97ca997d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -326,7 +326,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, int node; sq->pdev = c->pdev; - sq->clock = &mdev->clock; + sq->clock = mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; sq->priv = c->priv; @@ -696,7 +696,7 @@ static int mlx5e_init_ptp_rq(struct mlx5e_ptp *c, struct mlx5e_params *params, rq->pdev = c->pdev; rq->netdev = priv->netdev; rq->priv = priv; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->tstamp = &priv->tstamp; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h index d6c12d0ea55ba..2e528b2c34d64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/act/act.h @@ -73,11 +73,6 @@ struct mlx5e_tc_act { bool is_terminating_action; }; -struct mlx5e_tc_flow_action { - unsigned int num_entries; - struct flow_action_entry **entries; -}; - extern struct mlx5e_tc_act mlx5e_tc_act_drop; extern struct mlx5e_tc_act mlx5e_tc_act_trap; extern struct mlx5e_tc_act mlx5e_tc_act_accept; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index 53ca16cb9c41a..140606fcd23b1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -46,7 +46,7 @@ static void mlx5e_init_trap_rq(struct mlx5e_trap *t, struct mlx5e_params *params rq->pdev = t->pdev; rq->netdev = priv->netdev; rq->priv = priv; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->tstamp = &priv->tstamp; rq->mdev = mdev; rq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 94b2916620873..3cc4d55613bf2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -289,9 +289,9 @@ static u64 mlx5e_xsk_fill_timestamp(void *_priv) ts = get_cqe_ts(priv->cqe); if (mlx5_is_real_time_rq(priv->cq->mdev) || mlx5_is_real_time_sq(priv->cq->mdev)) - return mlx5_real_time_cyc2time(&priv->cq->mdev->clock, ts); + return mlx5_real_time_cyc2time(priv->cq->mdev->clock, ts); - return mlx5_timecounter_cyc2time(&priv->cq->mdev->clock, ts); + return mlx5_timecounter_cyc2time(priv->cq->mdev->clock, ts); } static void mlx5e_xsk_request_checksum(u16 csum_start, u16 csum_offset, void *priv) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index 9240cfe25d102..d743e823362ae 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -72,7 +72,7 @@ static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, rq->netdev = c->netdev; rq->priv = c->priv; rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; rq->channel = c; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index cae39198b4dbc..f9113cb13a0c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -237,6 +237,27 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_800000baseDR8_2_Full_BIT, ETHTOOL_LINK_MODE_800000baseSR8_Full_BIT, ETHTOOL_LINK_MODE_800000baseVR8_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_200GAUI_1_200GBASE_CR1_KR1, ext, + ETHTOOL_LINK_MODE_200000baseCR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseKR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT, + ETHTOOL_LINK_MODE_200000baseSR_Full_BIT, + ETHTOOL_LINK_MODE_200000baseVR_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_2_400GBASE_CR2_KR2, ext, + ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT, + ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_800GAUI_4_800GBASE_CR4_KR4, ext, + ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT, + ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT, + ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT); } static void mlx5e_ethtool_get_speed_arr(struct mlx5_core_dev *mdev, @@ -931,6 +952,7 @@ static const u32 pplm_fec_2_ethtool[] = { [MLX5E_FEC_RS_528_514] = ETHTOOL_FEC_RS, [MLX5E_FEC_RS_544_514] = ETHTOOL_FEC_RS, [MLX5E_FEC_LLRS_272_257_1] = ETHTOOL_FEC_LLRS, + [MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD] = ETHTOOL_FEC_RS, }; static u32 pplm2ethtool_fec(u_long fec_mode, unsigned long size) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a814b63ed97e5..2fdc86432ac0f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -737,7 +737,7 @@ static int mlx5e_init_rxq_rq(struct mlx5e_channel *c, struct mlx5e_params *param rq->netdev = c->netdev; rq->priv = c->priv; rq->tstamp = c->tstamp; - rq->clock = &mdev->clock; + rq->clock = mdev->clock; rq->icosq = &c->icosq; rq->ix = c->ix; rq->channel = c; @@ -1614,7 +1614,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, int err; sq->pdev = c->pdev; - sq->clock = &mdev->clock; + sq->clock = mdev->clock; sq->mkey_be = c->mkey_be; sq->netdev = c->netdev; sq->mdev = c->mdev; @@ -3816,8 +3816,11 @@ static int mlx5e_setup_tc_mqprio(struct mlx5e_priv *priv, /* MQPRIO is another toplevel qdisc that can't be attached * simultaneously with the offloaded HTB. */ - if (WARN_ON(mlx5e_selq_is_htb_enabled(&priv->selq))) - return -EINVAL; + if (mlx5e_selq_is_htb_enabled(&priv->selq)) { + NL_SET_ERR_MSG_MOD(mqprio->extack, + "MQPRIO cannot be configured when HTB offload is enabled."); + return -EOPNOTSUPP; + } switch (mqprio->mode) { case TC_MQPRIO_MODE_DCB: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c index bde79cac33a97..d832a12ffec0e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/port_sel.c @@ -97,7 +97,7 @@ static int mlx5_lag_create_port_sel_table(struct mlx5_lag *ldev, mlx5_del_flow_rules(lag_definer->rules[idx]); } j = ldev->buckets; - }; + } goto destroy_fg; } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index d61a1a9297c90..65a94e46edcf1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -43,6 +43,8 @@ #include <linux/cpufeature.h> #endif /* CONFIG_X86 */ +#define MLX5_RT_CLOCK_IDENTITY_SIZE MLX5_FLD_SZ_BYTES(mrtcq_reg, rt_clock_identity) + enum { MLX5_PIN_MODE_IN = 0x0, MLX5_PIN_MODE_OUT = 0x1, @@ -77,6 +79,56 @@ enum { MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX = 200000, }; +struct mlx5_clock_dev_state { + struct mlx5_core_dev *mdev; + struct mlx5_devcom_comp_dev *compdev; + struct mlx5_nb pps_nb; + struct work_struct out_work; +}; + +struct mlx5_clock_priv { + struct mlx5_clock clock; + struct mlx5_core_dev *mdev; + struct mutex lock; /* protect mdev and used in PTP callbacks */ + struct mlx5_core_dev *event_mdev; +}; + +static struct mlx5_clock_priv *clock_priv(struct mlx5_clock *clock) +{ + return container_of(clock, struct mlx5_clock_priv, clock); +} + +static void mlx5_clock_lockdep_assert(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + lockdep_assert(lockdep_is_held(&clock_priv(clock)->lock)); +} + +static struct mlx5_core_dev *mlx5_clock_mdev_get(struct mlx5_clock *clock) +{ + mlx5_clock_lockdep_assert(clock); + + return clock_priv(clock)->mdev; +} + +static void mlx5_clock_lock(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + mutex_lock(&clock_priv(clock)->lock); +} + +static void mlx5_clock_unlock(struct mlx5_clock *clock) +{ + if (!clock->shared) + return; + + mutex_unlock(&clock_priv(clock)->lock); +} + static bool mlx5_real_time_mode(struct mlx5_core_dev *mdev) { return (mlx5_is_real_time_rq(mdev) || mlx5_is_real_time_sq(mdev)); @@ -94,6 +146,22 @@ static bool mlx5_modify_mtutc_allowed(struct mlx5_core_dev *mdev) return MLX5_CAP_MCAM_FEATURE(mdev, ptpcyc2realtime_modify); } +static int mlx5_clock_identity_get(struct mlx5_core_dev *mdev, + u8 identify[MLX5_RT_CLOCK_IDENTITY_SIZE]) +{ + u32 out[MLX5_ST_SZ_DW(mrtcq_reg)] = {}; + u32 in[MLX5_ST_SZ_DW(mrtcq_reg)] = {}; + int err; + + err = mlx5_core_access_reg(mdev, in, sizeof(in), + out, sizeof(out), MLX5_REG_MRTCQ, 0, 0); + if (!err) + memcpy(identify, MLX5_ADDR_OF(mrtcq_reg, out, rt_clock_identity), + MLX5_RT_CLOCK_IDENTITY_SIZE); + + return err; +} + static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) { /* Optimal shift constant leads to corrections above just 1 scaled ppm. @@ -119,21 +187,30 @@ static u32 mlx5_ptp_shift_constant(u32 dev_freq_khz) ilog2((U32_MAX / NSEC_PER_MSEC) * dev_freq_khz)); } +static s32 mlx5_clock_getmaxphase(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ? + MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX : + MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; +} + static s32 mlx5_ptp_getmaxphase(struct ptp_clock_info *ptp) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_core_dev *mdev; + s32 ret; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + ret = mlx5_clock_getmaxphase(mdev); + mlx5_clock_unlock(clock); - return MLX5_CAP_MCAM_FEATURE(mdev, mtutc_time_adjustment_extended_range) ? - MLX5_MTUTC_OPERATION_ADJUST_TIME_EXTENDED_MAX : - MLX5_MTUTC_OPERATION_ADJUST_TIME_MAX; + return ret; } static bool mlx5_is_mtutc_time_adj_cap(struct mlx5_core_dev *mdev, s64 delta) { - s64 max = mlx5_ptp_getmaxphase(&mdev->clock.ptp_info); + s64 max = mlx5_clock_getmaxphase(mdev); if (delta < -max || delta > max) return false; @@ -209,7 +286,7 @@ static int mlx5_mtctr_syncdevicetime(ktime_t *device_time, if (real_time_mode) *device_time = ns_to_ktime(REAL_TIME_TO_NS(device >> 32, device & U32_MAX)); else - *device_time = mlx5_timecounter_cyc2time(&mdev->clock, device); + *device_time = mlx5_timecounter_cyc2time(mdev->clock, device); return 0; } @@ -220,16 +297,23 @@ static int mlx5_ptp_getcrosststamp(struct ptp_clock_info *ptp, struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct system_time_snapshot history_begin = {0}; struct mlx5_core_dev *mdev; + int err; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); - if (!mlx5_is_ptm_source_time_available(mdev)) - return -EBUSY; + if (!mlx5_is_ptm_source_time_available(mdev)) { + err = -EBUSY; + goto unlock; + } ktime_get_snapshot(&history_begin); - return get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev, - &history_begin, cts); + err = get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev, + &history_begin, cts); +unlock: + mlx5_clock_unlock(clock); + return err; } #endif /* CONFIG_X86 */ @@ -263,8 +347,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc) { struct mlx5_timer *timer = container_of(cc, struct mlx5_timer, cycles); struct mlx5_clock *clock = container_of(timer, struct mlx5_clock, timer); - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, - clock); + struct mlx5_core_dev *mdev = mlx5_clock_mdev_get(clock); return mlx5_read_time(mdev, NULL, false) & cc->mask; } @@ -272,7 +355,7 @@ static u64 read_internal_timer(const struct cyclecounter *cc) static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) { struct mlx5_ib_clock_info *clock_info = mdev->clock_info; - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer; u32 sign; @@ -295,12 +378,10 @@ static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) static void mlx5_pps_out(struct work_struct *work) { - struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, - out_work); - struct mlx5_clock *clock = container_of(pps_info, struct mlx5_clock, - pps_info); - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, - clock); + struct mlx5_clock_dev_state *clock_state = container_of(work, struct mlx5_clock_dev_state, + out_work); + struct mlx5_core_dev *mdev = clock_state->mdev; + struct mlx5_clock *clock = mdev->clock; u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; unsigned long flags; int i; @@ -330,7 +411,8 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info) unsigned long flags; clock = container_of(ptp_info, struct mlx5_clock, ptp_info); - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); timer = &clock->timer; if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) @@ -342,6 +424,7 @@ static long mlx5_timestamp_overflow(struct ptp_clock_info *ptp_info) write_sequnlock_irqrestore(&clock->lock, flags); out: + mlx5_clock_unlock(clock); return timer->overflow_period; } @@ -361,15 +444,12 @@ static int mlx5_ptp_settime_real_time(struct mlx5_core_dev *mdev, return mlx5_set_mtutc(mdev, in, sizeof(in)); } -static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) +static int mlx5_clock_settime(struct mlx5_core_dev *mdev, struct mlx5_clock *clock, + const struct timespec64 *ts) { - struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_timer *timer = &clock->timer; - struct mlx5_core_dev *mdev; unsigned long flags; - mdev = container_of(clock, struct mlx5_core_dev, clock); - if (mlx5_modify_mtutc_allowed(mdev)) { int err = mlx5_ptp_settime_real_time(mdev, ts); @@ -385,6 +465,20 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 return 0; } +static int mlx5_ptp_settime(struct ptp_clock_info *ptp, const struct timespec64 *ts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); + struct mlx5_core_dev *mdev; + int err; + + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + err = mlx5_clock_settime(mdev, clock, ts); + mlx5_clock_unlock(clock); + + return err; +} + static struct timespec64 mlx5_ptp_gettimex_real_time(struct mlx5_core_dev *mdev, struct ptp_system_timestamp *sts) @@ -404,7 +498,8 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, struct mlx5_core_dev *mdev; u64 cycles, ns; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_real_time_mode(mdev)) { *ts = mlx5_ptp_gettimex_real_time(mdev, sts); goto out; @@ -414,6 +509,7 @@ static int mlx5_ptp_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts, ns = mlx5_timecounter_cyc2time(clock, cycles); *ts = ns_to_timespec64(ns); out: + mlx5_clock_unlock(clock); return 0; } @@ -444,14 +540,16 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) struct mlx5_timer *timer = &clock->timer; struct mlx5_core_dev *mdev; unsigned long flags; + int err = 0; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_modify_mtutc_allowed(mdev)) { - int err = mlx5_ptp_adjtime_real_time(mdev, delta); + err = mlx5_ptp_adjtime_real_time(mdev, delta); if (err) - return err; + goto unlock; } write_seqlock_irqsave(&clock->lock, flags); @@ -459,17 +557,23 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) mlx5_update_clock_info_page(mdev); write_sequnlock_irqrestore(&clock->lock, flags); - return 0; +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_ptp_adjphase(struct ptp_clock_info *ptp, s32 delta) { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); struct mlx5_core_dev *mdev; + int err; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + err = mlx5_ptp_adjtime_real_time(mdev, delta); + mlx5_clock_unlock(clock); - return mlx5_ptp_adjtime_real_time(mdev, delta); + return err; } static int mlx5_ptp_freq_adj_real_time(struct mlx5_core_dev *mdev, long scaled_ppm) @@ -498,15 +602,17 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) struct mlx5_timer *timer = &clock->timer; struct mlx5_core_dev *mdev; unsigned long flags; + int err = 0; u32 mult; - mdev = container_of(clock, struct mlx5_core_dev, clock); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); if (mlx5_modify_mtutc_allowed(mdev)) { - int err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); + err = mlx5_ptp_freq_adj_real_time(mdev, scaled_ppm); if (err) - return err; + goto unlock; } mult = (u32)adjust_by_scaled_ppm(timer->nominal_c_mult, scaled_ppm); @@ -518,7 +624,9 @@ static int mlx5_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) write_sequnlock_irqrestore(&clock->lock, flags); ptp_schedule_worker(clock->ptp, timer->overflow_period); - return 0; +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_extts_configure(struct ptp_clock_info *ptp, @@ -527,18 +635,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - struct mlx5_core_dev *mdev = - container_of(clock, struct mlx5_core_dev, clock); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + struct mlx5_core_dev *mdev; u32 field_select = 0; u8 pin_mode = 0; u8 pattern = 0; int pin = -1; int err = 0; - if (!MLX5_PPS_CAP(mdev)) - return -EOPNOTSUPP; - /* Reject requests with unsupported flags */ if (rq->extts.flags & ~(PTP_ENABLE_FEATURE | PTP_RISING_EDGE | @@ -569,6 +673,14 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, field_select = MLX5_MTPPS_FS_ENABLE; } + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + + if (!MLX5_PPS_CAP(mdev)) { + err = -EOPNOTSUPP; + goto unlock; + } + MLX5_SET(mtpps_reg, in, pin, pin); MLX5_SET(mtpps_reg, in, pin_mode, pin_mode); MLX5_SET(mtpps_reg, in, pattern, pattern); @@ -577,15 +689,23 @@ static int mlx5_extts_configure(struct ptp_clock_info *ptp, err = mlx5_set_mtpps(mdev, in, sizeof(in)); if (err) - return err; + goto unlock; + + err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on); + if (err) + goto unlock; + + clock->pps_info.pin_armed[pin] = on; + clock_priv(clock)->event_mdev = mdev; - return mlx5_set_mtppse(mdev, pin, 0, - MLX5_EVENT_MODE_REPETETIVE & on); +unlock: + mlx5_clock_unlock(clock); + return err; } static u64 find_target_cycles(struct mlx5_core_dev *mdev, s64 target_ns) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; u64 cycles_now, cycles_delta; u64 nsec_now, nsec_delta; struct mlx5_timer *timer; @@ -644,7 +764,7 @@ static int mlx5_perout_conf_out_pulse_duration(struct mlx5_core_dev *mdev, struct ptp_clock_request *rq, u32 *out_pulse_duration_ns) { - struct mlx5_pps *pps_info = &mdev->clock.pps_info; + struct mlx5_pps *pps_info = &mdev->clock->pps_info; u32 out_pulse_duration; struct timespec64 ts; @@ -677,7 +797,7 @@ static int perout_conf_npps_real_time(struct mlx5_core_dev *mdev, struct ptp_clo u32 *field_select, u32 *out_pulse_duration_ns, u64 *period, u64 *time_stamp) { - struct mlx5_pps *pps_info = &mdev->clock.pps_info; + struct mlx5_pps *pps_info = &mdev->clock->pps_info; struct ptp_clock_time *time = &rq->perout.start; struct timespec64 ts; @@ -712,26 +832,18 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, { struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, ptp_info); - struct mlx5_core_dev *mdev = - container_of(clock, struct mlx5_core_dev, clock); - bool rt_mode = mlx5_real_time_mode(mdev); u32 in[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; u32 out_pulse_duration_ns = 0; + struct mlx5_core_dev *mdev; u32 field_select = 0; u64 npps_period = 0; u64 time_stamp = 0; u8 pin_mode = 0; u8 pattern = 0; + bool rt_mode; int pin = -1; int err = 0; - if (!MLX5_PPS_CAP(mdev)) - return -EOPNOTSUPP; - - /* Reject requests with unsupported flags */ - if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) - return -EOPNOTSUPP; - if (rq->perout.index >= clock->ptp_info.n_pins) return -EINVAL; @@ -740,14 +852,29 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, if (pin < 0) return -EBUSY; - if (on) { - bool rt_mode = mlx5_real_time_mode(mdev); + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + rt_mode = mlx5_real_time_mode(mdev); + + if (!MLX5_PPS_CAP(mdev)) { + err = -EOPNOTSUPP; + goto unlock; + } + + /* Reject requests with unsupported flags */ + if (mlx5_perout_verify_flags(mdev, rq->perout.flags)) { + err = -EOPNOTSUPP; + goto unlock; + } + if (on) { pin_mode = MLX5_PIN_MODE_OUT; pattern = MLX5_OUT_PATTERN_PERIODIC; - if (rt_mode && rq->perout.start.sec > U32_MAX) - return -EINVAL; + if (rt_mode && rq->perout.start.sec > U32_MAX) { + err = -EINVAL; + goto unlock; + } field_select |= MLX5_MTPPS_FS_PIN_MODE | MLX5_MTPPS_FS_PATTERN | @@ -760,7 +887,7 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, else err = perout_conf_1pps(mdev, rq, &time_stamp, rt_mode); if (err) - return err; + goto unlock; } MLX5_SET(mtpps_reg, in, pin, pin); @@ -773,13 +900,16 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp, MLX5_SET(mtpps_reg, in, out_pulse_duration_ns, out_pulse_duration_ns); err = mlx5_set_mtpps(mdev, in, sizeof(in)); if (err) - return err; + goto unlock; if (rt_mode) - return 0; + goto unlock; + + err = mlx5_set_mtppse(mdev, pin, 0, MLX5_EVENT_MODE_REPETETIVE & on); - return mlx5_set_mtppse(mdev, pin, 0, - MLX5_EVENT_MODE_REPETETIVE & on); +unlock: + mlx5_clock_unlock(clock); + return err; } static int mlx5_pps_configure(struct ptp_clock_info *ptp, @@ -866,10 +996,8 @@ static int mlx5_query_mtpps_pin_mode(struct mlx5_core_dev *mdev, u8 pin, mtpps_size, MLX5_REG_MTPPS, 0, 0); } -static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) +static int mlx5_get_pps_pin_mode(struct mlx5_core_dev *mdev, u8 pin) { - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); - u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {}; u8 mode; int err; @@ -888,8 +1016,9 @@ static int mlx5_get_pps_pin_mode(struct mlx5_clock *clock, u8 pin) return PTP_PF_NONE; } -static void mlx5_init_pin_config(struct mlx5_clock *clock) +static void mlx5_init_pin_config(struct mlx5_core_dev *mdev) { + struct mlx5_clock *clock = mdev->clock; int i; if (!clock->ptp_info.n_pins) @@ -910,15 +1039,15 @@ static void mlx5_init_pin_config(struct mlx5_clock *clock) sizeof(clock->ptp_info.pin_config[i].name), "mlx5_pps%d", i); clock->ptp_info.pin_config[i].index = i; - clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(clock, i); + clock->ptp_info.pin_config[i].func = mlx5_get_pps_pin_mode(mdev, i); clock->ptp_info.pin_config[i].chan = 0; } } static void mlx5_get_pps_caps(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; u32 out[MLX5_ST_SZ_DW(mtpps_reg)] = {0}; + struct mlx5_clock *clock = mdev->clock; mlx5_query_mtpps(mdev, out, sizeof(out)); @@ -968,16 +1097,16 @@ static u64 perout_conf_next_event_timer(struct mlx5_core_dev *mdev, static int mlx5_pps_event(struct notifier_block *nb, unsigned long type, void *data) { - struct mlx5_clock *clock = mlx5_nb_cof(nb, struct mlx5_clock, pps_nb); + struct mlx5_clock_dev_state *clock_state = mlx5_nb_cof(nb, struct mlx5_clock_dev_state, + pps_nb); + struct mlx5_core_dev *mdev = clock_state->mdev; + struct mlx5_clock *clock = mdev->clock; struct ptp_clock_event ptp_event; struct mlx5_eqe *eqe = data; int pin = eqe->data.pps.pin; - struct mlx5_core_dev *mdev; unsigned long flags; u64 ns; - mdev = container_of(clock, struct mlx5_core_dev, clock); - switch (clock->ptp_info.pin_config[pin].func) { case PTP_PF_EXTTS: ptp_event.index = pin; @@ -997,11 +1126,15 @@ static int mlx5_pps_event(struct notifier_block *nb, ptp_clock_event(clock->ptp, &ptp_event); break; case PTP_PF_PEROUT: + if (clock->shared) { + mlx5_core_warn(mdev, " Received unexpected PPS out event\n"); + break; + } ns = perout_conf_next_event_timer(mdev, clock); write_seqlock_irqsave(&clock->lock, flags); clock->pps_info.start[pin] = ns; write_sequnlock_irqrestore(&clock->lock, flags); - schedule_work(&clock->pps_info.out_work); + schedule_work(&clock_state->out_work); break; default: mlx5_core_err(mdev, " Unhandled clock PPS event, func %d\n", @@ -1013,7 +1146,7 @@ static int mlx5_pps_event(struct notifier_block *nb, static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer = &clock->timer; u32 dev_freq; @@ -1029,10 +1162,10 @@ static void mlx5_timecounter_init(struct mlx5_core_dev *mdev) ktime_to_ns(ktime_get_real())); } -static void mlx5_init_overflow_period(struct mlx5_clock *clock) +static void mlx5_init_overflow_period(struct mlx5_core_dev *mdev) { - struct mlx5_core_dev *mdev = container_of(clock, struct mlx5_core_dev, clock); struct mlx5_ib_clock_info *clock_info = mdev->clock_info; + struct mlx5_clock *clock = mdev->clock; struct mlx5_timer *timer = &clock->timer; u64 overflow_cycles; u64 frac = 0; @@ -1065,7 +1198,7 @@ static void mlx5_init_overflow_period(struct mlx5_clock *clock) static void mlx5_init_clock_info(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; struct mlx5_ib_clock_info *info; struct mlx5_timer *timer; @@ -1088,7 +1221,7 @@ static void mlx5_init_clock_info(struct mlx5_core_dev *mdev) static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; u32 out[MLX5_ST_SZ_DW(mtutc_reg)] = {}; u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; u8 log_max_freq_adjustment = 0; @@ -1107,7 +1240,7 @@ static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev) static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; /* Configure the PHC */ clock->ptp_info = mlx5_ptp_clock_info; @@ -1123,38 +1256,30 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) mlx5_timecounter_init(mdev); mlx5_init_clock_info(mdev); - mlx5_init_overflow_period(clock); + mlx5_init_overflow_period(mdev); if (mlx5_real_time_mode(mdev)) { struct timespec64 ts; ktime_get_real_ts64(&ts); - mlx5_ptp_settime(&clock->ptp_info, &ts); + mlx5_clock_settime(mdev, clock, &ts); } } static void mlx5_init_pps(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; - if (!MLX5_PPS_CAP(mdev)) return; mlx5_get_pps_caps(mdev); - mlx5_init_pin_config(clock); + mlx5_init_pin_config(mdev); } -void mlx5_init_clock(struct mlx5_core_dev *mdev) +static void mlx5_init_clock_dev(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; - - if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) { - mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); - return; - } + struct mlx5_clock *clock = mdev->clock; seqlock_init(&clock->lock); - INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); /* Initialize the device clock */ mlx5_init_timer_clock(mdev); @@ -1163,35 +1288,27 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) mlx5_init_pps(mdev); clock->ptp = ptp_clock_register(&clock->ptp_info, - &mdev->pdev->dev); + clock->shared ? NULL : &mdev->pdev->dev); if (IS_ERR(clock->ptp)) { - mlx5_core_warn(mdev, "ptp_clock_register failed %ld\n", + mlx5_core_warn(mdev, "%sptp_clock_register failed %ld\n", + clock->shared ? "shared clock " : "", PTR_ERR(clock->ptp)); clock->ptp = NULL; } - MLX5_NB_INIT(&clock->pps_nb, mlx5_pps_event, PPS_EVENT); - mlx5_eq_notifier_register(mdev, &clock->pps_nb); - if (clock->ptp) ptp_schedule_worker(clock->ptp, 0); } -void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +static void mlx5_destroy_clock_dev(struct mlx5_core_dev *mdev) { - struct mlx5_clock *clock = &mdev->clock; + struct mlx5_clock *clock = mdev->clock; - if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) - return; - - mlx5_eq_notifier_unregister(mdev, &clock->pps_nb); if (clock->ptp) { ptp_clock_unregister(clock->ptp); clock->ptp = NULL; } - cancel_work_sync(&clock->pps_info.out_work); - if (mdev->clock_info) { free_page((unsigned long)mdev->clock_info); mdev->clock_info = NULL; @@ -1199,3 +1316,248 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) kfree(clock->ptp_info.pin_config); } + +static void mlx5_clock_free(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock_priv *cpriv = clock_priv(mdev->clock); + + mlx5_destroy_clock_dev(mdev); + mutex_destroy(&cpriv->lock); + kfree(cpriv); + mdev->clock = NULL; +} + +static int mlx5_clock_alloc(struct mlx5_core_dev *mdev, bool shared) +{ + struct mlx5_clock_priv *cpriv; + struct mlx5_clock *clock; + + cpriv = kzalloc(sizeof(*cpriv), GFP_KERNEL); + if (!cpriv) + return -ENOMEM; + + mutex_init(&cpriv->lock); + cpriv->mdev = mdev; + clock = &cpriv->clock; + clock->shared = shared; + mdev->clock = clock; + mlx5_clock_lock(clock); + mlx5_init_clock_dev(mdev); + mlx5_clock_unlock(clock); + + if (!clock->shared) + return 0; + + if (!clock->ptp) { + mlx5_core_warn(mdev, "failed to create ptp dev shared by multiple functions"); + mlx5_clock_free(mdev); + return -EINVAL; + } + + return 0; +} + +static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_devcom_comp_dev *pos; + + mdev->clock_state->compdev = mlx5_devcom_register_component(mdev->priv.devc, + MLX5_DEVCOM_SHARED_CLOCK, + key, NULL, mdev); + if (IS_ERR(mdev->clock_state->compdev)) + return; + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock) { + next = peer_dev; + break; + } + } + + if (next) { + mdev->clock = next->clock; + /* clock info is shared among all the functions using the same clock */ + mdev->clock_info = next->clock_info; + } else { + mlx5_clock_alloc(mdev, true); + } + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + + if (!mdev->clock) { + mlx5_devcom_unregister_component(mdev->clock_state->compdev); + mdev->clock_state->compdev = NULL; + } +} + +static void mlx5_shared_clock_unregister(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_clock *clock = mdev->clock; + struct mlx5_devcom_comp_dev *pos; + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock && peer_dev != mdev) { + next = peer_dev; + break; + } + } + + if (next) { + struct mlx5_clock_priv *cpriv = clock_priv(clock); + + mlx5_clock_lock(clock); + if (mdev == cpriv->mdev) + cpriv->mdev = next; + mlx5_clock_unlock(clock); + } else { + mlx5_clock_free(mdev); + } + + mdev->clock = NULL; + mdev->clock_info = NULL; + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + + mlx5_devcom_unregister_component(mdev->clock_state->compdev); +} + +static void mlx5_clock_arm_pps_in_event(struct mlx5_clock *clock, + struct mlx5_core_dev *new_mdev, + struct mlx5_core_dev *old_mdev) +{ + struct ptp_clock_info *ptp_info = &clock->ptp_info; + struct mlx5_clock_priv *cpriv = clock_priv(clock); + int i; + + for (i = 0; i < ptp_info->n_pins; i++) { + if (ptp_info->pin_config[i].func != PTP_PF_EXTTS || + !clock->pps_info.pin_armed[i]) + continue; + + if (new_mdev) { + mlx5_set_mtppse(new_mdev, i, 0, MLX5_EVENT_MODE_REPETETIVE); + cpriv->event_mdev = new_mdev; + } else { + cpriv->event_mdev = NULL; + } + + if (old_mdev) + mlx5_set_mtppse(old_mdev, i, 0, MLX5_EVENT_MODE_DISABLE); + } +} + +void mlx5_clock_load(struct mlx5_core_dev *mdev) +{ + struct mlx5_clock *clock = mdev->clock; + struct mlx5_clock_priv *cpriv; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + INIT_WORK(&mdev->clock_state->out_work, mlx5_pps_out); + MLX5_NB_INIT(&mdev->clock_state->pps_nb, mlx5_pps_event, PPS_EVENT); + mlx5_eq_notifier_register(mdev, &mdev->clock_state->pps_nb); + + if (!clock->shared) { + mlx5_clock_arm_pps_in_event(clock, mdev, NULL); + return; + } + + cpriv = clock_priv(clock); + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_clock_lock(clock); + if (mdev == cpriv->mdev && mdev != cpriv->event_mdev) + mlx5_clock_arm_pps_in_event(clock, mdev, cpriv->event_mdev); + mlx5_clock_unlock(clock); + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); +} + +void mlx5_clock_unload(struct mlx5_core_dev *mdev) +{ + struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_clock *clock = mdev->clock; + struct mlx5_devcom_comp_dev *pos; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + if (!clock->shared) { + mlx5_clock_arm_pps_in_event(clock, NULL, mdev); + goto out; + } + + mlx5_devcom_comp_lock(mdev->clock_state->compdev); + mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { + if (peer_dev->clock && peer_dev != mdev) { + next = peer_dev; + break; + } + } + + mlx5_clock_lock(clock); + if (mdev == clock_priv(clock)->event_mdev) + mlx5_clock_arm_pps_in_event(clock, next, mdev); + mlx5_clock_unlock(clock); + mlx5_devcom_comp_unlock(mdev->clock_state->compdev); + +out: + mlx5_eq_notifier_unregister(mdev, &mdev->clock_state->pps_nb); + cancel_work_sync(&mdev->clock_state->out_work); +} + +static struct mlx5_clock null_clock; + +int mlx5_init_clock(struct mlx5_core_dev *mdev) +{ + u8 identity[MLX5_RT_CLOCK_IDENTITY_SIZE]; + struct mlx5_clock_dev_state *clock_state; + u64 key; + int err; + + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) { + mdev->clock = &null_clock; + mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n"); + return 0; + } + + clock_state = kzalloc(sizeof(*clock_state), GFP_KERNEL); + if (!clock_state) + return -ENOMEM; + clock_state->mdev = mdev; + mdev->clock_state = clock_state; + + if (MLX5_CAP_MCAM_REG3(mdev, mrtcq) && mlx5_real_time_mode(mdev)) { + if (mlx5_clock_identity_get(mdev, identity)) { + mlx5_core_warn(mdev, "failed to get rt clock identity, create ptp dev per function\n"); + } else { + memcpy(&key, &identity, sizeof(key)); + mlx5_shared_clock_register(mdev, key); + } + } + + if (!mdev->clock) { + err = mlx5_clock_alloc(mdev, false); + if (err) { + kfree(clock_state); + mdev->clock_state = NULL; + return err; + } + } + + return 0; +} + +void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, device_frequency_khz)) + return; + + if (mdev->clock->shared) + mlx5_shared_clock_unregister(mdev); + else + mlx5_clock_free(mdev); + kfree(mdev->clock_state); + mdev->clock_state = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h index bd95b9f8d1439..c18a652c0faa1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h @@ -33,6 +33,35 @@ #ifndef __LIB_CLOCK_H__ #define __LIB_CLOCK_H__ +#include <linux/ptp_clock_kernel.h> + +#define MAX_PIN_NUM 8 +struct mlx5_pps { + u8 pin_caps[MAX_PIN_NUM]; + u64 start[MAX_PIN_NUM]; + u8 enabled; + u64 min_npps_period; + u64 min_out_pulse_duration_ns; + bool pin_armed[MAX_PIN_NUM]; +}; + +struct mlx5_timer { + struct cyclecounter cycles; + struct timecounter tc; + u32 nominal_c_mult; + unsigned long overflow_period; +}; + +struct mlx5_clock { + seqlock_t lock; + struct hwtstamp_config hwtstamp_config; + struct ptp_clock *ptp; + struct ptp_clock_info ptp_info; + struct mlx5_pps pps_info; + struct mlx5_timer timer; + bool shared; +}; + static inline bool mlx5_is_real_time_rq(struct mlx5_core_dev *mdev) { u8 rq_ts_format_cap = MLX5_CAP_GEN(mdev, rq_ts_format); @@ -54,12 +83,14 @@ static inline bool mlx5_is_real_time_sq(struct mlx5_core_dev *mdev) typedef ktime_t (*cqe_ts_to_ns)(struct mlx5_clock *, u64); #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) -void mlx5_init_clock(struct mlx5_core_dev *mdev); +int mlx5_init_clock(struct mlx5_core_dev *mdev); void mlx5_cleanup_clock(struct mlx5_core_dev *mdev); +void mlx5_clock_load(struct mlx5_core_dev *mdev); +void mlx5_clock_unload(struct mlx5_core_dev *mdev); static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { - return mdev->clock.ptp ? ptp_clock_index(mdev->clock.ptp) : -1; + return mdev->clock->ptp ? ptp_clock_index(mdev->clock->ptp) : -1; } static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock, @@ -87,8 +118,10 @@ static inline ktime_t mlx5_real_time_cyc2time(struct mlx5_clock *clock, return ns_to_ktime(time); } #else -static inline void mlx5_init_clock(struct mlx5_core_dev *mdev) {} +static inline int mlx5_init_clock(struct mlx5_core_dev *mdev) { return 0; } static inline void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) {} +static inline void mlx5_clock_load(struct mlx5_core_dev *mdev) {} +static inline void mlx5_clock_unload(struct mlx5_core_dev *mdev) {} static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev) { return -1; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index d58032dd0df74..c79699b94a025 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -11,6 +11,7 @@ enum mlx5_devcom_component { MLX5_DEVCOM_MPV, MLX5_DEVCOM_HCA_PORTS, MLX5_DEVCOM_SD_GROUP, + MLX5_DEVCOM_SHARED_CLOCK, MLX5_DEVCOM_NUM_COMPONENTS, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index ec956c4bcebdb..710633d5fdbe0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1038,7 +1038,11 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) mlx5_init_reserved_gids(dev); - mlx5_init_clock(dev); + err = mlx5_init_clock(dev); + if (err) { + mlx5_core_err(dev, "failed to initialize hardware clock\n"); + goto err_tables_cleanup; + } dev->vxlan = mlx5_vxlan_create(dev); dev->geneve = mlx5_geneve_create(dev); @@ -1046,7 +1050,7 @@ static int mlx5_init_once(struct mlx5_core_dev *dev) err = mlx5_init_rl_table(dev); if (err) { mlx5_core_err(dev, "Failed to init rate limiting\n"); - goto err_tables_cleanup; + goto err_clock_cleanup; } err = mlx5_mpfs_init(dev); @@ -1123,10 +1127,11 @@ err_mpfs_cleanup: mlx5_mpfs_cleanup(dev); err_rl_cleanup: mlx5_cleanup_rl_table(dev); -err_tables_cleanup: +err_clock_cleanup: mlx5_geneve_destroy(dev->geneve); mlx5_vxlan_destroy(dev->vxlan); mlx5_cleanup_clock(dev); +err_tables_cleanup: mlx5_cleanup_reserved_gids(dev); mlx5_cq_debugfs_cleanup(dev); mlx5_fw_reset_cleanup(dev); @@ -1359,6 +1364,8 @@ static int mlx5_load(struct mlx5_core_dev *dev) goto err_eq_table; } + mlx5_clock_load(dev); + err = mlx5_fw_tracer_init(dev->tracer); if (err) { mlx5_core_err(dev, "Failed to init FW tracer %d\n", err); @@ -1442,6 +1449,7 @@ err_fpga_start: mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_reset_events_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_clock_unload(dev); mlx5_eq_table_destroy(dev); err_eq_table: mlx5_irq_table_destroy(dev); @@ -1468,6 +1476,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_hv_vhca_cleanup(dev->hv_vhca); mlx5_fw_reset_events_stop(dev); mlx5_fw_tracer_cleanup(dev->tracer); + mlx5_clock_unload(dev); mlx5_eq_table_destroy(dev); mlx5_irq_table_destroy(dev); mlx5_pagealloc_stop(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 50931584132b9..3995df064101d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -1105,6 +1105,9 @@ static const u32 mlx5e_ext_link_speed[MLX5E_EXT_LINK_MODES_NUMBER] = { [MLX5E_200GAUI_2_200GBASE_CR2_KR2] = 200000, [MLX5E_400GAUI_4_400GBASE_CR4_KR4] = 400000, [MLX5E_800GAUI_8_800GBASE_CR8_KR8] = 800000, + [MLX5E_200GAUI_1_200GBASE_CR1_KR1] = 200000, + [MLX5E_400GAUI_2_400GBASE_CR2_KR2] = 400000, + [MLX5E_800GAUI_4_800GBASE_CR4_KR4] = 800000, }; int mlx5_port_query_eth_proto(struct mlx5_core_dev *dev, u8 port, bool ext, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c index 60cb4527588a5..65740bb68b09b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_domain.c @@ -516,30 +516,6 @@ def_xa_destroy: return NULL; } -/* Assure synchronization of the device steering tables with updates made by SW - * insertion. - */ -int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags) -{ - int ret = 0; - - if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) { - mlx5dr_domain_lock(dmn); - ret = mlx5dr_send_ring_force_drain(dmn); - mlx5dr_domain_unlock(dmn); - if (ret) { - mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n", - flags, ret); - return ret; - } - } - - if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_HW) - ret = mlx5dr_cmd_sync_steering(dmn->mdev); - - return ret; -} - int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn) { if (WARN_ON_ONCE(refcount_read(&dmn->refcount) > 1)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c index f57c84e5128bc..4fd4e8483382c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c @@ -1331,36 +1331,3 @@ void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, kfree(send_ring->sync_buff); kfree(send_ring); } - -int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn) -{ - struct mlx5dr_send_ring *send_ring = dmn->send_ring; - struct postsend_info send_info = {}; - u8 data[DR_STE_SIZE]; - int num_of_sends_req; - int ret; - int i; - - /* Sending this amount of requests makes sure we will get drain */ - num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2; - - /* Send fake requests forcing the last to be signaled */ - send_info.write.addr = (uintptr_t)data; - send_info.write.length = DR_STE_SIZE; - send_info.write.lkey = 0; - /* Using the sync_mr in order to write/read */ - send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr; - send_info.rkey = send_ring->sync_mr->mkey; - - for (i = 0; i < num_of_sends_req; i++) { - ret = dr_postsend_icm_data(dmn, &send_info); - if (ret) - return ret; - } - - spin_lock(&send_ring->lock); - ret = dr_handle_pending_wc(dmn, send_ring); - spin_unlock(&send_ring->lock); - - return ret; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h index 7618c6147f866..cc328292bf84a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_types.h @@ -1473,7 +1473,6 @@ struct mlx5dr_send_ring { int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn); void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn, struct mlx5dr_send_ring *send_ring); -int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn); int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste, u8 *data, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h index 0bb3724c10c26..fc8a2169d1a13 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/mlx5dr.h @@ -45,8 +45,6 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type); int mlx5dr_domain_destroy(struct mlx5dr_domain *domain); -int mlx5dr_domain_sync(struct mlx5dr_domain *domain, u32 flags); - void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn, struct mlx5dr_domain *peer_dmn, u16 peer_vhca_id); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index b10f80fc651b0..fa7082ee51839 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -754,9 +754,6 @@ void mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan); void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp, struct net_device *dev); -bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev); -u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev); u16 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp); int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id, enum mlxsw_sp_l3proto ul_proto, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 7d6d859cef3f9..464821dd492da 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -8184,41 +8184,6 @@ mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, return NULL; } -bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp, - const struct net_device *dev) -{ - struct mlxsw_sp_rif *rif; - - mutex_lock(&mlxsw_sp->router->lock); - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - mutex_unlock(&mlxsw_sp->router->lock); - - return rif; -} - -u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) -{ - struct mlxsw_sp_rif *rif; - u16 vid = 0; - - mutex_lock(&mlxsw_sp->router->lock); - rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev); - if (!rif) - goto out; - - /* We only return the VID for VLAN RIFs. Otherwise we return an - * invalid value (0). - */ - if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN) - goto out; - - vid = mlxsw_sp_fid_8021q_vid(rif->fid); - -out: - mutex_unlock(&mlxsw_sp->router->lock); - return vid; -} - static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif) { char ritr_pl[MLXSW_REG_RITR_LEN]; @@ -8417,19 +8382,6 @@ u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif) return lb_rif->common.rif_index; } -u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) -{ - struct net_device *dev = mlxsw_sp_rif_dev(&lb_rif->common); - u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(dev); - struct mlxsw_sp_vr *ul_vr; - - ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL); - if (WARN_ON(IS_ERR(ul_vr))) - return 0; - - return ul_vr->id; -} - u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif) { return lb_rif->ul_rif_id; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h index 0432c7cc6b07a..313efab5c324c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.h @@ -90,7 +90,6 @@ struct mlxsw_sp_ipip_entry; struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp, u16 rif_index); u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *rif); -u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *rif); u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif); u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev); int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c index 59951b5abdb78..ac80981f67c00 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c @@ -10,6 +10,40 @@ static struct dentry *fbnic_dbg_root; +static void fbnic_dbg_desc_break(struct seq_file *s, int i) +{ + while (i--) + seq_putc(s, '-'); + + seq_putc(s, '\n'); +} + +static int fbnic_dbg_mac_addr_show(struct seq_file *s, void *v) +{ + struct fbnic_dev *fbd = s->private; + char hdr[80]; + int i; + + /* Generate Header */ + snprintf(hdr, sizeof(hdr), "%3s %s %-17s %s\n", + "Idx", "S", "TCAM Bitmap", "Addr/Mask"); + seq_puts(s, hdr); + fbnic_dbg_desc_break(s, strnlen(hdr, sizeof(hdr))); + + for (i = 0; i < FBNIC_RPC_TCAM_MACDA_NUM_ENTRIES; i++) { + struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i]; + + seq_printf(s, "%02d %d %64pb %pm\n", + i, mac_addr->state, mac_addr->act_tcam, + mac_addr->value.addr8); + seq_printf(s, " %pm\n", + mac_addr->mask.addr8); + } + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(fbnic_dbg_mac_addr); + static int fbnic_dbg_pcie_stats_show(struct seq_file *s, void *v) { struct fbnic_dev *fbd = s->private; @@ -48,6 +82,8 @@ void fbnic_dbg_fbd_init(struct fbnic_dev *fbd) fbd->dbg_fbd = debugfs_create_dir(name, fbnic_dbg_root); debugfs_create_file("pcie_stats", 0400, fbd->dbg_fbd, fbd, &fbnic_dbg_pcie_stats_fops); + debugfs_create_file("mac_addr", 0400, fbd->dbg_fbd, fbd, + &fbnic_dbg_mac_addr_fops); } void fbnic_dbg_fbd_exit(struct fbnic_dev *fbd) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index 7a96b6ee773f3..1db57c42333ef 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -628,6 +628,8 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) fbnic_rss_key_fill(fbn->rss_key); fbnic_rss_init_en_mask(fbn); + netdev->priv_flags |= IFF_UNICAST_FLT; + netdev->features |= NETIF_F_RXHASH | NETIF_F_SG | diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index fa167b1aa0190..5222a035fd198 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -3033,7 +3033,7 @@ static void qed_iov_vf_mbx_vport_update(struct qed_hwfn *p_hwfn, u16 length; int rc; - /* Valiate PF can send such a request */ + /* Validate PF can send such a request */ if (!vf->vport_instance) { DP_VERBOSE(p_hwfn, QED_MSG_IOV, @@ -3312,7 +3312,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn, goto out; } - /* Determine if the unicast filtering is acceptible by PF */ + /* Determine if the unicast filtering is acceptable by PF */ if ((p_bulletin->valid_bitmap & BIT(VLAN_ADDR_FORCED)) && (params.type == QED_FILTER_VLAN || params.type == QED_FILTER_MAC_VLAN)) { @@ -3729,7 +3729,7 @@ qed_iov_execute_vf_flr_cleanup(struct qed_hwfn *p_hwfn, rc = qed_iov_enable_vf_access(p_hwfn, p_ptt, p_vf); if (rc) { - DP_ERR(p_hwfn, "Failed to re-enable VF[%d] acces\n", + DP_ERR(p_hwfn, "Failed to re-enable VF[%d] access\n", vfid); return rc; } @@ -4480,7 +4480,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled) struct qed_ptt *ptt = qed_ptt_acquire(hwfn); /* Failure to acquire the ptt in 100g creates an odd error - * where the first engine has already relased IOV. + * where the first engine has already released IOV. */ if (!ptt) { DP_ERR(hwfn, "Failed to acquire ptt\n"); diff --git a/drivers/net/ethernet/realtek/Kconfig b/drivers/net/ethernet/realtek/Kconfig index 8a8ea51c639e9..fe136f61586fe 100644 --- a/drivers/net/ethernet/realtek/Kconfig +++ b/drivers/net/ethernet/realtek/Kconfig @@ -114,7 +114,8 @@ config R8169 will be called r8169. This is recommended. config R8169_LEDS - def_bool R8169 && LEDS_TRIGGER_NETDEV + bool "Support for controlling the NIC LEDs" + depends on R8169 && LEDS_TRIGGER_NETDEV depends on !(R8169=y && LEDS_CLASS=m) help Optional support for controlling the NIC LED's with the netdev diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 5a5eba49c6515..7306c8e323d72 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -5222,6 +5222,7 @@ static int r8169_mdio_register(struct rtl8169_private *tp) new_bus->priv = tp; new_bus->parent = &pdev->dev; new_bus->irq[0] = PHY_MAC_INTERRUPT; + new_bus->phy_mask = GENMASK(31, 1); snprintf(new_bus->id, MII_BUS_ID_SIZE, "r8169-%x-%x", pci_domain_nr(pdev->bus), pci_dev_id(pdev)); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index d04543e5697b0..b34ebb916b898 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -2424,6 +2424,11 @@ static void stmmac_dma_operation_mode(struct stmmac_priv *priv) u32 chan = 0; u8 qmode = 0; + if (rxfifosz == 0) + rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + /* Split up the shared Tx/Rx FIFO memory on DW QoS Eth and DW XGMAC */ if (priv->plat->has_gmac4 || priv->plat->has_xgmac) { rxfifosz /= rx_channels_count; @@ -2892,6 +2897,11 @@ static void stmmac_set_dma_operation_mode(struct stmmac_priv *priv, u32 txmode, int rxfifosz = priv->plat->rx_fifo_size; int txfifosz = priv->plat->tx_fifo_size; + if (rxfifosz == 0) + rxfifosz = priv->dma_cap.rx_fifo_size; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + /* Adjust for real per queue fifo size */ rxfifosz /= rx_channels_count; txfifosz /= tx_channels_count; @@ -5868,6 +5878,9 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) const int mtu = new_mtu; int ret; + if (txfifosz == 0) + txfifosz = priv->dma_cap.tx_fifo_size; + txfifosz /= priv->plat->tx_queues_to_use; if (stmmac_xdp_is_enabled(priv) && new_mtu > ETH_DATA_LEN) { @@ -7219,29 +7232,15 @@ static int stmmac_hw_init(struct stmmac_priv *priv) priv->plat->tx_queues_to_use = priv->dma_cap.number_tx_queues; } - if (!priv->plat->rx_fifo_size) { - if (priv->dma_cap.rx_fifo_size) { - priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; - } else { - dev_err(priv->device, "Can't specify Rx FIFO size\n"); - return -ENODEV; - } - } else if (priv->dma_cap.rx_fifo_size && - priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { + if (priv->dma_cap.rx_fifo_size && + priv->plat->rx_fifo_size > priv->dma_cap.rx_fifo_size) { dev_warn(priv->device, "Rx FIFO size (%u) exceeds dma capability\n", priv->plat->rx_fifo_size); priv->plat->rx_fifo_size = priv->dma_cap.rx_fifo_size; } - if (!priv->plat->tx_fifo_size) { - if (priv->dma_cap.tx_fifo_size) { - priv->plat->tx_fifo_size = priv->dma_cap.tx_fifo_size; - } else { - dev_err(priv->device, "Can't specify Tx FIFO size\n"); - return -ENODEV; - } - } else if (priv->dma_cap.tx_fifo_size && - priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { + if (priv->dma_cap.tx_fifo_size && + priv->plat->tx_fifo_size > priv->dma_cap.tx_fifo_size) { dev_warn(priv->device, "Tx FIFO size (%u) exceeds dma capability\n", priv->plat->tx_fifo_size); diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index 6bf3ec985f3d3..f181f05cb4297 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -13,7 +13,7 @@ */ const char *phy_speed_to_str(int speed) { - BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 103, + BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 121, "Enum ethtool_link_mode_bit_indices and phylib are out of sync. " "If a speed or mode has been added please update phy_speed_to_str " "and the PHY settings array.\n"); @@ -169,6 +169,12 @@ static const struct phy_setting settings[] = { PHY_SETTING( 800000, FULL, 800000baseDR8_2_Full ), PHY_SETTING( 800000, FULL, 800000baseSR8_Full ), PHY_SETTING( 800000, FULL, 800000baseVR8_Full ), + PHY_SETTING( 800000, FULL, 800000baseCR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseKR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseDR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseDR4_2_Full ), + PHY_SETTING( 800000, FULL, 800000baseSR4_Full ), + PHY_SETTING( 800000, FULL, 800000baseVR4_Full ), /* 400G */ PHY_SETTING( 400000, FULL, 400000baseCR8_Full ), PHY_SETTING( 400000, FULL, 400000baseKR8_Full ), @@ -180,6 +186,12 @@ static const struct phy_setting settings[] = { PHY_SETTING( 400000, FULL, 400000baseLR4_ER4_FR4_Full ), PHY_SETTING( 400000, FULL, 400000baseDR4_Full ), PHY_SETTING( 400000, FULL, 400000baseSR4_Full ), + PHY_SETTING( 400000, FULL, 400000baseCR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseKR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseDR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseDR2_2_Full ), + PHY_SETTING( 400000, FULL, 400000baseSR2_Full ), + PHY_SETTING( 400000, FULL, 400000baseVR2_Full ), /* 200G */ PHY_SETTING( 200000, FULL, 200000baseCR4_Full ), PHY_SETTING( 200000, FULL, 200000baseKR4_Full ), @@ -191,6 +203,12 @@ static const struct phy_setting settings[] = { PHY_SETTING( 200000, FULL, 200000baseLR2_ER2_FR2_Full ), PHY_SETTING( 200000, FULL, 200000baseDR2_Full ), PHY_SETTING( 200000, FULL, 200000baseSR2_Full ), + PHY_SETTING( 200000, FULL, 200000baseCR_Full ), + PHY_SETTING( 200000, FULL, 200000baseKR_Full ), + PHY_SETTING( 200000, FULL, 200000baseDR_Full ), + PHY_SETTING( 200000, FULL, 200000baseDR_2_Full ), + PHY_SETTING( 200000, FULL, 200000baseSR_Full ), + PHY_SETTING( 200000, FULL, 200000baseVR_Full ), /* 100G */ PHY_SETTING( 100000, FULL, 100000baseCR4_Full ), PHY_SETTING( 100000, FULL, 100000baseKR4_Full ), diff --git a/drivers/net/phy/realtek/Kconfig b/drivers/net/phy/realtek/Kconfig index 31935f147d879..b05c2a1e90243 100644 --- a/drivers/net/phy/realtek/Kconfig +++ b/drivers/net/phy/realtek/Kconfig @@ -4,8 +4,12 @@ config REALTEK_PHY help Currently supports RTL821x/RTL822x and fast ethernet PHYs +if REALTEK_PHY + config REALTEK_PHY_HWMON - def_bool REALTEK_PHY && HWMON - depends on !(REALTEK_PHY=y && HWMON=m) + bool "HWMON support for Realtek PHYs" + depends on HWMON && !(REALTEK_PHY=y && HWMON=m) help Optional hwmon support for the temperature sensor + +endif # REALTEK_PHY diff --git a/drivers/net/phy/realtek/realtek_main.c b/drivers/net/phy/realtek/realtek_main.c index 572a933636b01..210fefac44d47 100644 --- a/drivers/net/phy/realtek/realtek_main.c +++ b/drivers/net/phy/realtek/realtek_main.c @@ -13,6 +13,7 @@ #include <linux/module.h> #include <linux/delay.h> #include <linux/clk.h> +#include <linux/string_choices.h> #include "realtek.h" @@ -422,11 +423,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) } else if (ret) { dev_dbg(dev, "%s 2ns TX delay (and changing the value from pin-strapping RXD1 or the bootloader)\n", - val_txdly ? "Enabling" : "Disabling"); + str_enable_disable(val_txdly)); } else { dev_dbg(dev, "2ns TX delay was already %s (by pin-strapping RXD1 or bootloader configuration)\n", - val_txdly ? "enabled" : "disabled"); + str_enabled_disabled(val_txdly)); } ret = phy_modify_paged_changed(phydev, 0xd08, 0x15, RTL8211F_RX_DELAY, @@ -437,11 +438,11 @@ static int rtl8211f_config_init(struct phy_device *phydev) } else if (ret) { dev_dbg(dev, "%s 2ns RX delay (and changing the value from pin-strapping RXD0 or the bootloader)\n", - val_rxdly ? "Enabling" : "Disabling"); + str_enable_disable(val_rxdly)); } else { dev_dbg(dev, "2ns RX delay was already %s (by pin-strapping RXD0 or bootloader configuration)\n", - val_rxdly ? "enabled" : "disabled"); + str_enabled_disabled(val_rxdly)); } if (priv->has_phycr2) { diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 28624cca91f8d..acf96f2624887 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -574,18 +574,14 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb, return ret; } -static inline bool tun_capable(struct tun_struct *tun) +static inline bool tun_not_capable(struct tun_struct *tun) { const struct cred *cred = current_cred(); struct net *net = dev_net(tun->dev); - if (ns_capable(net->user_ns, CAP_NET_ADMIN)) - return 1; - if (uid_valid(tun->owner) && uid_eq(cred->euid, tun->owner)) - return 1; - if (gid_valid(tun->group) && in_egroup_p(tun->group)) - return 1; - return 0; + return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || + (gid_valid(tun->group) && !in_egroup_p(tun->group))) && + !ns_capable(net->user_ns, CAP_NET_ADMIN); } static void tun_set_real_num_queues(struct tun_struct *tun) @@ -2782,7 +2778,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) !!(tun->flags & IFF_MULTI_QUEUE)) return -EINVAL; - if (!tun_capable(tun)) + if (tun_not_capable(tun)) return -EPERM; err = security_tun_dev_open(tun->security); if (err < 0) diff --git a/drivers/net/vmxnet3/vmxnet3_xdp.c b/drivers/net/vmxnet3/vmxnet3_xdp.c index 1341374a4588a..616ecc38d1726 100644 --- a/drivers/net/vmxnet3/vmxnet3_xdp.c +++ b/drivers/net/vmxnet3/vmxnet3_xdp.c @@ -28,7 +28,7 @@ vmxnet3_xdp_get_tq(struct vmxnet3_adapter *adapter) if (likely(cpu < tq_number)) tq = &adapter->tx_queue[cpu]; else - tq = &adapter->tx_queue[reciprocal_scale(cpu, tq_number)]; + tq = &adapter->tx_queue[cpu % tq_number]; return tq; } @@ -124,6 +124,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, u32 buf_size; u32 dw2; + spin_lock_irq(&tq->tx_lock); dw2 = (tq->tx_ring.gen ^ 0x1) << VMXNET3_TXD_GEN_SHIFT; dw2 |= xdpf->len; ctx.sop_txd = tq->tx_ring.base + tq->tx_ring.next2fill; @@ -134,6 +135,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, if (vmxnet3_cmd_ring_desc_avail(&tq->tx_ring) == 0) { tq->stats.tx_ring_full++; + spin_unlock_irq(&tq->tx_lock); return -ENOSPC; } @@ -142,8 +144,10 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, tbi->dma_addr = dma_map_single(&adapter->pdev->dev, xdpf->data, buf_size, DMA_TO_DEVICE); - if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) + if (dma_mapping_error(&adapter->pdev->dev, tbi->dma_addr)) { + spin_unlock_irq(&tq->tx_lock); return -EFAULT; + } tbi->map_type |= VMXNET3_MAP_SINGLE; } else { /* XDP buffer from page pool */ page = virt_to_page(xdpf->data); @@ -182,6 +186,7 @@ vmxnet3_xdp_xmit_frame(struct vmxnet3_adapter *adapter, dma_wmb(); gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ VMXNET3_TXD_GEN); + spin_unlock_irq(&tq->tx_lock); /* No need to handle the case when tx_num_deferred doesn't reach * threshold. Backend driver at hypervisor side will poll and reset @@ -225,6 +230,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, { struct vmxnet3_adapter *adapter = netdev_priv(dev); struct vmxnet3_tx_queue *tq; + struct netdev_queue *nq; int i; if (unlikely(test_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))) @@ -236,6 +242,9 @@ vmxnet3_xdp_xmit(struct net_device *dev, if (tq->stopped) return -ENETDOWN; + nq = netdev_get_tx_queue(adapter->netdev, tq->qid); + + __netif_tx_lock(nq, smp_processor_id()); for (i = 0; i < n; i++) { if (vmxnet3_xdp_xmit_frame(adapter, frames[i], tq, true)) { tq->stats.xdp_xmit_err++; @@ -243,6 +252,7 @@ vmxnet3_xdp_xmit(struct net_device *dev, } } tq->stats.xdp_xmit += i; + __netif_tx_unlock(nq); return i; } diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c index 05c10acb2a57e..ece5415f9013d 100644 --- a/drivers/net/vxlan/vxlan_core.c +++ b/drivers/net/vxlan/vxlan_core.c @@ -227,9 +227,9 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, be32_to_cpu(fdb->vni))) goto nla_put_failure; - ci.ndm_used = jiffies_to_clock_t(now - fdb->used); + ci.ndm_used = jiffies_to_clock_t(now - READ_ONCE(fdb->used)); ci.ndm_confirmed = 0; - ci.ndm_updated = jiffies_to_clock_t(now - fdb->updated); + ci.ndm_updated = jiffies_to_clock_t(now - READ_ONCE(fdb->updated)); ci.ndm_refcnt = 0; if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) @@ -434,8 +434,12 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan, struct vxlan_fdb *f; f = __vxlan_find_mac(vxlan, mac, vni); - if (f && f->used != jiffies) - f->used = jiffies; + if (f) { + unsigned long now = jiffies; + + if (READ_ONCE(f->used) != now) + WRITE_ONCE(f->used, now); + } return f; } @@ -1009,12 +1013,10 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, !(f->flags & NTF_VXLAN_ADDED_BY_USER)) { if (f->state != state) { f->state = state; - f->updated = jiffies; notify = 1; } if (f->flags != fdb_flags) { f->flags = fdb_flags; - f->updated = jiffies; notify = 1; } } @@ -1048,12 +1050,13 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan, } if (ndm_flags & NTF_USE) - f->used = jiffies; + WRITE_ONCE(f->updated, jiffies); if (notify) { if (rd == NULL) rd = first_remote_rtnl(f); + WRITE_ONCE(f->updated, jiffies); err = vxlan_fdb_notify(vxlan, f, rd, RTM_NEWNEIGH, swdev_notify, extack); if (err) @@ -1292,7 +1295,7 @@ int __vxlan_fdb_delete(struct vxlan_dev *vxlan, struct vxlan_fdb *f; int err = -ENOENT; - f = vxlan_find_mac(vxlan, addr, src_vni); + f = __vxlan_find_mac(vxlan, addr, src_vni); if (!f) return err; @@ -1459,9 +1462,13 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, ifindex = src_ifindex; #endif - f = vxlan_find_mac(vxlan, src_mac, vni); + f = __vxlan_find_mac(vxlan, src_mac, vni); if (likely(f)) { struct vxlan_rdst *rdst = first_remote_rcu(f); + unsigned long now = jiffies; + + if (READ_ONCE(f->updated) != now) + WRITE_ONCE(f->updated, now); if (likely(vxlan_addr_equal(&rdst->remote_ip, src_ip) && rdst->remote_ifindex == ifindex)) @@ -1481,7 +1488,6 @@ static enum skb_drop_reason vxlan_snoop(struct net_device *dev, src_mac, &rdst->remote_ip.sa, &src_ip->sa); rdst->remote_ip = *src_ip; - f->updated = jiffies; vxlan_fdb_notify(vxlan, f, rdst, RTM_NEWNEIGH, true, NULL); } else { u32 hash_index = fdb_head_index(vxlan, src_mac, vni); @@ -2852,7 +2858,7 @@ static void vxlan_cleanup(struct timer_list *t) if (f->flags & NTF_EXT_LEARNED) continue; - timeout = f->used + vxlan->cfg.age_interval * HZ; + timeout = READ_ONCE(f->updated) + vxlan->cfg.age_interval * HZ; if (time_before_eq(timeout, jiffies)) { netdev_dbg(vxlan->dev, "garbage collect %pM\n", @@ -4765,7 +4771,7 @@ vxlan_fdb_offloaded_set(struct net_device *dev, spin_lock_bh(&vxlan->hash_lock[hash_index]); - f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); + f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); if (!f) goto out; @@ -4821,7 +4827,7 @@ vxlan_fdb_external_learn_del(struct net_device *dev, hash_index = fdb_head_index(vxlan, fdb_info->eth_addr, fdb_info->vni); spin_lock_bh(&vxlan->hash_lock[hash_index]); - f = vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); + f = __vxlan_find_mac(vxlan, fdb_info->eth_addr, fdb_info->vni); if (!f) err = -ENOENT; else if (f->flags & NTF_EXT_LEARNED) diff --git a/drivers/platform/x86/ideapad-laptop.c b/drivers/platform/x86/ideapad-laptop.c index dfb5d4b8c0465..30bd366d7b58a 100644 --- a/drivers/platform/x86/ideapad-laptop.c +++ b/drivers/platform/x86/ideapad-laptop.c @@ -1121,7 +1121,7 @@ static int ideapad_dytc_profile_init(struct ideapad_private *priv) /* Create platform_profile structure and register */ priv->dytc->ppdev = devm_platform_profile_register(&priv->platform_device->dev, - "ideapad-laptop", &priv->dytc, + "ideapad-laptop", priv->dytc, &dytc_profile_ops); if (IS_ERR(priv->dytc->ppdev)) { err = PTR_ERR(priv->dytc->ppdev); diff --git a/drivers/platform/x86/intel/ifs/ifs.h b/drivers/platform/x86/intel/ifs/ifs.h index 5c3c0dfa1bf83..f369fb0d3d82f 100644 --- a/drivers/platform/x86/intel/ifs/ifs.h +++ b/drivers/platform/x86/intel/ifs/ifs.h @@ -23,12 +23,14 @@ * IFS Image * --------- * - * Intel provides a firmware file containing the scan tests via - * github [#f1]_. Similar to microcode there is a separate file for each + * Intel provides firmware files containing the scan tests via the webpage [#f1]_. + * Look under "In-Field Scan Test Images Download" section towards the + * end of the page. Similar to microcode, there are separate files for each * family-model-stepping. IFS Images are not applicable for some test types. * Wherever applicable the sysfs directory would provide a "current_batch" file * (see below) for loading the image. * + * .. [#f1] https://intel.com/InFieldScan * * IFS Image Loading * ----------------- @@ -125,9 +127,6 @@ * 2) Hardware allows for some number of cores to be tested in parallel. * The driver does not make use of this, it only tests one core at a time. * - * .. [#f1] https://github.com/intel/TBD - * - * * Structural Based Functional Test at Field (SBAF): * ------------------------------------------------- * diff --git a/drivers/platform/x86/intel/pmc/core.c b/drivers/platform/x86/intel/pmc/core.c index 10f04b9441174..1ee0fb5f8250b 100644 --- a/drivers/platform/x86/intel/pmc/core.c +++ b/drivers/platform/x86/intel/pmc/core.c @@ -626,8 +626,8 @@ static u32 convert_ltr_scale(u32 val) static int pmc_core_ltr_show(struct seq_file *s, void *unused) { struct pmc_dev *pmcdev = s->private; - u64 decoded_snoop_ltr, decoded_non_snoop_ltr; - u32 ltr_raw_data, scale, val; + u64 decoded_snoop_ltr, decoded_non_snoop_ltr, val; + u32 ltr_raw_data, scale; u16 snoop_ltr, nonsnoop_ltr; unsigned int i, index, ltr_index = 0; diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig index c61e6427384c3..9eb9e3c49f815 100644 --- a/drivers/s390/net/Kconfig +++ b/drivers/s390/net/Kconfig @@ -2,15 +2,6 @@ menu "S/390 network device drivers" depends on NETDEVICES && S390 -config LCS - def_tristate m - prompt "Lan Channel Station Interface" - depends on CCW && NETDEVICES && ETHERNET - help - Select this option if you want to use LCS networking on IBM System z. - To compile as a module, choose M. The module name is lcs. - If you do not use LCS, choose N. - config CTCM def_tristate m prompt "CTC and MPC SNA device support" @@ -98,7 +89,7 @@ config QETH_OSX config CCWGROUP tristate - default (LCS || CTCM || QETH || SMC) + default (CTCM || QETH || SMC) config ISM tristate "Support for ISM vPCI Adapter" diff --git a/drivers/s390/net/Makefile b/drivers/s390/net/Makefile index bc55ec316adb1..b5aaba2901274 100644 --- a/drivers/s390/net/Makefile +++ b/drivers/s390/net/Makefile @@ -8,7 +8,6 @@ obj-$(CONFIG_CTCM) += ctcm.o fsm.o obj-$(CONFIG_NETIUCV) += netiucv.o fsm.o obj-$(CONFIG_SMSGIUCV) += smsgiucv.o obj-$(CONFIG_SMSGIUCV_EVENT) += smsgiucv_app.o -obj-$(CONFIG_LCS) += lcs.o qeth-y += qeth_core_sys.o qeth_core_main.o qeth_core_mpc.o qeth_ethtool.o obj-$(CONFIG_QETH) += qeth.o qeth_l2-y += qeth_l2_main.o qeth_l2_sys.o diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c deleted file mode 100644 index 88db8378325a3..0000000000000 --- a/drivers/s390/net/lcs.c +++ /dev/null @@ -1,2385 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Linux for S/390 LAN channel station device driver - * - * Copyright IBM Corp. 1999, 2009 - * Author(s): Original Code written by - * DJ Barrow <djbarrow@de.ibm.com,barrow_dj@yahoo.com> - * Rewritten by - * Frank Pavlic <fpavlic@de.ibm.com> and - * Martin Schwidefsky <schwidefsky@de.ibm.com> - */ - -#define KMSG_COMPONENT "lcs" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include <linux/module.h> -#include <linux/if.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/inetdevice.h> -#include <linux/in.h> -#include <linux/igmp.h> -#include <linux/delay.h> -#include <linux/kthread.h> -#include <linux/slab.h> -#include <net/arp.h> -#include <net/ip.h> - -#include <asm/debug.h> -#include <asm/idals.h> -#include <asm/timex.h> -#include <linux/device.h> -#include <asm/ccwgroup.h> - -#include "lcs.h" - - -/* - * initialization string for output - */ - -static char version[] __initdata = "LCS driver"; - -/* - * the root device for lcs group devices - */ -static struct device *lcs_root_dev; - -/* - * Some prototypes. - */ -static void lcs_tasklet(unsigned long); -static void lcs_start_kernel_thread(struct work_struct *); -static void lcs_get_frames_cb(struct lcs_channel *, struct lcs_buffer *); -#ifdef CONFIG_IP_MULTICAST -static int lcs_send_delipm(struct lcs_card *, struct lcs_ipm_list *); -#endif /* CONFIG_IP_MULTICAST */ -static int lcs_recovery(void *ptr); - -/* - * Debug Facility Stuff - */ -static char debug_buffer[255]; -static debug_info_t *lcs_dbf_setup; -static debug_info_t *lcs_dbf_trace; - -/* - * LCS Debug Facility functions - */ -static void -lcs_unregister_debug_facility(void) -{ - debug_unregister(lcs_dbf_setup); - debug_unregister(lcs_dbf_trace); -} - -static int -lcs_register_debug_facility(void) -{ - lcs_dbf_setup = debug_register("lcs_setup", 2, 1, 8); - lcs_dbf_trace = debug_register("lcs_trace", 4, 1, 8); - if (lcs_dbf_setup == NULL || lcs_dbf_trace == NULL) { - pr_err("Not enough memory for debug facility.\n"); - lcs_unregister_debug_facility(); - return -ENOMEM; - } - debug_register_view(lcs_dbf_setup, &debug_hex_ascii_view); - debug_set_level(lcs_dbf_setup, 2); - debug_register_view(lcs_dbf_trace, &debug_hex_ascii_view); - debug_set_level(lcs_dbf_trace, 2); - return 0; -} - -/* - * Allocate io buffers. - */ -static int -lcs_alloc_channel(struct lcs_channel *channel) -{ - int cnt; - - LCS_DBF_TEXT(2, setup, "ichalloc"); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - /* alloc memory fo iobuffer */ - channel->iob[cnt].data = - kzalloc(LCS_IOBUFFERSIZE, GFP_DMA | GFP_KERNEL); - if (channel->iob[cnt].data == NULL) - break; - channel->iob[cnt].state = LCS_BUF_STATE_EMPTY; - } - if (cnt < LCS_NUM_BUFFS) { - /* Not all io buffers could be allocated. */ - LCS_DBF_TEXT(2, setup, "echalloc"); - while (cnt-- > 0) - kfree(channel->iob[cnt].data); - return -ENOMEM; - } - return 0; -} - -/* - * Free io buffers. - */ -static void -lcs_free_channel(struct lcs_channel *channel) -{ - int cnt; - - LCS_DBF_TEXT(2, setup, "ichfree"); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - kfree(channel->iob[cnt].data); - channel->iob[cnt].data = NULL; - } -} - -/* - * Cleanup channel. - */ -static void -lcs_cleanup_channel(struct lcs_channel *channel) -{ - LCS_DBF_TEXT(3, setup, "cleanch"); - /* Kill write channel tasklets. */ - tasklet_kill(&channel->irq_tasklet); - /* Free channel buffers. */ - lcs_free_channel(channel); -} - -/* - * LCS free memory for card and channels. - */ -static void -lcs_free_card(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, setup, "remcard"); - LCS_DBF_HEX(2, setup, &card, sizeof(void*)); - kfree(card); -} - -/* - * LCS alloc memory for card and channels - */ -static struct lcs_card * -lcs_alloc_card(void) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(2, setup, "alloclcs"); - - card = kzalloc(sizeof(struct lcs_card), GFP_KERNEL | GFP_DMA); - if (card == NULL) - return NULL; - card->lan_type = LCS_FRAME_TYPE_AUTO; - card->pkt_seq = 0; - card->lancmd_timeout = LCS_LANCMD_TIMEOUT_DEFAULT; - /* Allocate io buffers for the read channel. */ - rc = lcs_alloc_channel(&card->read); - if (rc){ - LCS_DBF_TEXT(2, setup, "iccwerr"); - lcs_free_card(card); - return NULL; - } - /* Allocate io buffers for the write channel. */ - rc = lcs_alloc_channel(&card->write); - if (rc) { - LCS_DBF_TEXT(2, setup, "iccwerr"); - lcs_cleanup_channel(&card->read); - lcs_free_card(card); - return NULL; - } - -#ifdef CONFIG_IP_MULTICAST - INIT_LIST_HEAD(&card->ipm_list); -#endif - LCS_DBF_HEX(2, setup, &card, sizeof(void*)); - return card; -} - -/* - * Setup read channel. - */ -static void -lcs_setup_read_ccws(struct lcs_card *card) -{ - int cnt; - - LCS_DBF_TEXT(2, setup, "ireadccw"); - /* Setup read ccws. */ - memset(card->read.ccws, 0, sizeof (struct ccw1) * (LCS_NUM_BUFFS + 1)); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - card->read.ccws[cnt].cmd_code = LCS_CCW_READ; - card->read.ccws[cnt].count = LCS_IOBUFFERSIZE; - card->read.ccws[cnt].flags = - CCW_FLAG_CC | CCW_FLAG_SLI | CCW_FLAG_PCI; - /* - * Note: we have allocated the buffer with GFP_DMA, so - * we do not need to do set_normalized_cda. - */ - card->read.ccws[cnt].cda = - virt_to_dma32(card->read.iob[cnt].data); - ((struct lcs_header *) - card->read.iob[cnt].data)->offset = LCS_ILLEGAL_OFFSET; - card->read.iob[cnt].callback = lcs_get_frames_cb; - card->read.iob[cnt].state = LCS_BUF_STATE_READY; - card->read.iob[cnt].count = LCS_IOBUFFERSIZE; - } - card->read.ccws[0].flags &= ~CCW_FLAG_PCI; - card->read.ccws[LCS_NUM_BUFFS - 1].flags &= ~CCW_FLAG_PCI; - card->read.ccws[LCS_NUM_BUFFS - 1].flags |= CCW_FLAG_SUSPEND; - /* Last ccw is a tic (transfer in channel). */ - card->read.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER; - card->read.ccws[LCS_NUM_BUFFS].cda = virt_to_dma32(card->read.ccws); - /* Setg initial state of the read channel. */ - card->read.state = LCS_CH_STATE_INIT; - - card->read.io_idx = 0; - card->read.buf_idx = 0; -} - -static void -lcs_setup_read(struct lcs_card *card) -{ - LCS_DBF_TEXT(3, setup, "initread"); - - lcs_setup_read_ccws(card); - /* Initialize read channel tasklet. */ - card->read.irq_tasklet.data = (unsigned long) &card->read; - card->read.irq_tasklet.func = lcs_tasklet; - /* Initialize waitqueue. */ - init_waitqueue_head(&card->read.wait_q); -} - -/* - * Setup write channel. - */ -static void -lcs_setup_write_ccws(struct lcs_card *card) -{ - int cnt; - - LCS_DBF_TEXT(3, setup, "iwritccw"); - /* Setup write ccws. */ - memset(card->write.ccws, 0, sizeof(struct ccw1) * (LCS_NUM_BUFFS + 1)); - for (cnt = 0; cnt < LCS_NUM_BUFFS; cnt++) { - card->write.ccws[cnt].cmd_code = LCS_CCW_WRITE; - card->write.ccws[cnt].count = 0; - card->write.ccws[cnt].flags = - CCW_FLAG_SUSPEND | CCW_FLAG_CC | CCW_FLAG_SLI; - /* - * Note: we have allocated the buffer with GFP_DMA, so - * we do not need to do set_normalized_cda. - */ - card->write.ccws[cnt].cda = - virt_to_dma32(card->write.iob[cnt].data); - } - /* Last ccw is a tic (transfer in channel). */ - card->write.ccws[LCS_NUM_BUFFS].cmd_code = LCS_CCW_TRANSFER; - card->write.ccws[LCS_NUM_BUFFS].cda = virt_to_dma32(card->write.ccws); - /* Set initial state of the write channel. */ - card->read.state = LCS_CH_STATE_INIT; - - card->write.io_idx = 0; - card->write.buf_idx = 0; -} - -static void -lcs_setup_write(struct lcs_card *card) -{ - LCS_DBF_TEXT(3, setup, "initwrit"); - - lcs_setup_write_ccws(card); - /* Initialize write channel tasklet. */ - card->write.irq_tasklet.data = (unsigned long) &card->write; - card->write.irq_tasklet.func = lcs_tasklet; - /* Initialize waitqueue. */ - init_waitqueue_head(&card->write.wait_q); -} - -static void -lcs_set_allowed_threads(struct lcs_card *card, unsigned long threads) -{ - unsigned long flags; - - spin_lock_irqsave(&card->mask_lock, flags); - card->thread_allowed_mask = threads; - spin_unlock_irqrestore(&card->mask_lock, flags); - wake_up(&card->wait_q); -} -static int lcs_threads_running(struct lcs_card *card, unsigned long threads) -{ - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&card->mask_lock, flags); - rc = (card->thread_running_mask & threads); - spin_unlock_irqrestore(&card->mask_lock, flags); - return rc; -} - -static int -lcs_wait_for_threads(struct lcs_card *card, unsigned long threads) -{ - return wait_event_interruptible(card->wait_q, - lcs_threads_running(card, threads) == 0); -} - -static int lcs_set_thread_start_bit(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - - spin_lock_irqsave(&card->mask_lock, flags); - if ( !(card->thread_allowed_mask & thread) || - (card->thread_start_mask & thread) ) { - spin_unlock_irqrestore(&card->mask_lock, flags); - return -EPERM; - } - card->thread_start_mask |= thread; - spin_unlock_irqrestore(&card->mask_lock, flags); - return 0; -} - -static void -lcs_clear_thread_running_bit(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - - spin_lock_irqsave(&card->mask_lock, flags); - card->thread_running_mask &= ~thread; - spin_unlock_irqrestore(&card->mask_lock, flags); - wake_up(&card->wait_q); -} - -static int __lcs_do_run_thread(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&card->mask_lock, flags); - if (card->thread_start_mask & thread){ - if ((card->thread_allowed_mask & thread) && - !(card->thread_running_mask & thread)){ - rc = 1; - card->thread_start_mask &= ~thread; - card->thread_running_mask |= thread; - } else - rc = -EPERM; - } - spin_unlock_irqrestore(&card->mask_lock, flags); - return rc; -} - -static int -lcs_do_run_thread(struct lcs_card *card, unsigned long thread) -{ - int rc = 0; - wait_event(card->wait_q, - (rc = __lcs_do_run_thread(card, thread)) >= 0); - return rc; -} - -static int -lcs_do_start_thread(struct lcs_card *card, unsigned long thread) -{ - unsigned long flags; - int rc = 0; - - spin_lock_irqsave(&card->mask_lock, flags); - LCS_DBF_TEXT_(4, trace, " %02x%02x%02x", - (u8) card->thread_start_mask, - (u8) card->thread_allowed_mask, - (u8) card->thread_running_mask); - rc = (card->thread_start_mask & thread); - spin_unlock_irqrestore(&card->mask_lock, flags); - return rc; -} - -/* - * Initialize channels,card and state machines. - */ -static void -lcs_setup_card(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, setup, "initcard"); - LCS_DBF_HEX(2, setup, &card, sizeof(void*)); - - lcs_setup_read(card); - lcs_setup_write(card); - /* Set cards initial state. */ - card->state = DEV_STATE_DOWN; - card->tx_buffer = NULL; - card->tx_emitted = 0; - - init_waitqueue_head(&card->wait_q); - spin_lock_init(&card->lock); - spin_lock_init(&card->ipm_lock); - spin_lock_init(&card->mask_lock); -#ifdef CONFIG_IP_MULTICAST - INIT_LIST_HEAD(&card->ipm_list); -#endif - INIT_LIST_HEAD(&card->lancmd_waiters); -} - -static void lcs_clear_multicast_list(struct lcs_card *card) -{ -#ifdef CONFIG_IP_MULTICAST - struct lcs_ipm_list *ipm; - unsigned long flags; - - /* Free multicast list. */ - LCS_DBF_TEXT(3, setup, "clmclist"); - spin_lock_irqsave(&card->ipm_lock, flags); - while (!list_empty(&card->ipm_list)){ - ipm = list_entry(card->ipm_list.next, - struct lcs_ipm_list, list); - list_del(&ipm->list); - if (ipm->ipm_state != LCS_IPM_STATE_SET_REQUIRED){ - spin_unlock_irqrestore(&card->ipm_lock, flags); - lcs_send_delipm(card, ipm); - spin_lock_irqsave(&card->ipm_lock, flags); - } - kfree(ipm); - } - spin_unlock_irqrestore(&card->ipm_lock, flags); -#endif -} - -/* - * Cleanup channels,card and state machines. - */ -static void -lcs_cleanup_card(struct lcs_card *card) -{ - - LCS_DBF_TEXT(3, setup, "cleancrd"); - LCS_DBF_HEX(2,setup,&card,sizeof(void*)); - - if (card->dev != NULL) - free_netdev(card->dev); - /* Cleanup channels. */ - lcs_cleanup_channel(&card->write); - lcs_cleanup_channel(&card->read); -} - -/* - * Start channel. - */ -static int -lcs_start_channel(struct lcs_channel *channel) -{ - unsigned long flags; - int rc; - - LCS_DBF_TEXT_(4, trace,"ssch%s", dev_name(&channel->ccwdev->dev)); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_start(channel->ccwdev, - channel->ccws + channel->io_idx, 0, 0, - DOIO_DENY_PREFETCH | DOIO_ALLOW_SUSPEND); - if (rc == 0) - channel->state = LCS_CH_STATE_RUNNING; - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - if (rc) { - LCS_DBF_TEXT_(4,trace,"essh%s", - dev_name(&channel->ccwdev->dev)); - dev_err(&channel->ccwdev->dev, - "Starting an LCS device resulted in an error," - " rc=%d!\n", rc); - } - return rc; -} - -static int -lcs_clear_channel(struct lcs_channel *channel) -{ - unsigned long flags; - int rc; - - LCS_DBF_TEXT(4,trace,"clearch"); - LCS_DBF_TEXT_(4, trace, "%s", dev_name(&channel->ccwdev->dev)); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_clear(channel->ccwdev, 0); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - if (rc) { - LCS_DBF_TEXT_(4, trace, "ecsc%s", - dev_name(&channel->ccwdev->dev)); - return rc; - } - wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_CLEARED)); - channel->state = LCS_CH_STATE_STOPPED; - return rc; -} - - -/* - * Stop channel. - */ -static int -lcs_stop_channel(struct lcs_channel *channel) -{ - unsigned long flags; - int rc; - - if (channel->state == LCS_CH_STATE_STOPPED) - return 0; - LCS_DBF_TEXT(4,trace,"haltsch"); - LCS_DBF_TEXT_(4, trace, "%s", dev_name(&channel->ccwdev->dev)); - channel->state = LCS_CH_STATE_INIT; - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - rc = ccw_device_halt(channel->ccwdev, 0); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - if (rc) { - LCS_DBF_TEXT_(4, trace, "ehsc%s", - dev_name(&channel->ccwdev->dev)); - return rc; - } - /* Asynchronous halt initialted. Wait for its completion. */ - wait_event(channel->wait_q, (channel->state == LCS_CH_STATE_HALTED)); - lcs_clear_channel(channel); - return 0; -} - -/* - * start read and write channel - */ -static int -lcs_start_channels(struct lcs_card *card) -{ - int rc; - - LCS_DBF_TEXT(2, trace, "chstart"); - /* start read channel */ - rc = lcs_start_channel(&card->read); - if (rc) - return rc; - /* start write channel */ - rc = lcs_start_channel(&card->write); - if (rc) - lcs_stop_channel(&card->read); - return rc; -} - -/* - * stop read and write channel - */ -static int -lcs_stop_channels(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, trace, "chhalt"); - lcs_stop_channel(&card->read); - lcs_stop_channel(&card->write); - return 0; -} - -/* - * Get empty buffer. - */ -static struct lcs_buffer * -__lcs_get_buffer(struct lcs_channel *channel) -{ - int index; - - LCS_DBF_TEXT(5, trace, "_getbuff"); - index = channel->io_idx; - do { - if (channel->iob[index].state == LCS_BUF_STATE_EMPTY) { - channel->iob[index].state = LCS_BUF_STATE_LOCKED; - return channel->iob + index; - } - index = (index + 1) & (LCS_NUM_BUFFS - 1); - } while (index != channel->io_idx); - return NULL; -} - -static struct lcs_buffer * -lcs_get_buffer(struct lcs_channel *channel) -{ - struct lcs_buffer *buffer; - unsigned long flags; - - LCS_DBF_TEXT(5, trace, "getbuff"); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - buffer = __lcs_get_buffer(channel); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - return buffer; -} - -/* - * Resume channel program if the channel is suspended. - */ -static int -__lcs_resume_channel(struct lcs_channel *channel) -{ - int rc; - - if (channel->state != LCS_CH_STATE_SUSPENDED) - return 0; - if (channel->ccws[channel->io_idx].flags & CCW_FLAG_SUSPEND) - return 0; - LCS_DBF_TEXT_(5, trace, "rsch%s", dev_name(&channel->ccwdev->dev)); - rc = ccw_device_resume(channel->ccwdev); - if (rc) { - LCS_DBF_TEXT_(4, trace, "ersc%s", - dev_name(&channel->ccwdev->dev)); - dev_err(&channel->ccwdev->dev, - "Sending data from the LCS device to the LAN failed" - " with rc=%d\n",rc); - } else - channel->state = LCS_CH_STATE_RUNNING; - return rc; - -} - -/* - * Make a buffer ready for processing. - */ -static void __lcs_ready_buffer_bits(struct lcs_channel *channel, int index) -{ - int prev, next; - - LCS_DBF_TEXT(5, trace, "rdybits"); - prev = (index - 1) & (LCS_NUM_BUFFS - 1); - next = (index + 1) & (LCS_NUM_BUFFS - 1); - /* Check if we may clear the suspend bit of this buffer. */ - if (channel->ccws[next].flags & CCW_FLAG_SUSPEND) { - /* Check if we have to set the PCI bit. */ - if (!(channel->ccws[prev].flags & CCW_FLAG_SUSPEND)) - /* Suspend bit of the previous buffer is not set. */ - channel->ccws[index].flags |= CCW_FLAG_PCI; - /* Suspend bit of the next buffer is set. */ - channel->ccws[index].flags &= ~CCW_FLAG_SUSPEND; - } -} - -static int -lcs_ready_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - unsigned long flags; - int index, rc; - - LCS_DBF_TEXT(5, trace, "rdybuff"); - BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED && - buffer->state != LCS_BUF_STATE_PROCESSED); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - buffer->state = LCS_BUF_STATE_READY; - index = buffer - channel->iob; - /* Set length. */ - channel->ccws[index].count = buffer->count; - /* Check relevant PCI/suspend bits. */ - __lcs_ready_buffer_bits(channel, index); - rc = __lcs_resume_channel(channel); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - return rc; -} - -/* - * Mark the buffer as processed. Take care of the suspend bit - * of the previous buffer. This function is called from - * interrupt context, so the lock must not be taken. - */ -static int -__lcs_processed_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - int index, prev, next; - - LCS_DBF_TEXT(5, trace, "prcsbuff"); - BUG_ON(buffer->state != LCS_BUF_STATE_READY); - buffer->state = LCS_BUF_STATE_PROCESSED; - index = buffer - channel->iob; - prev = (index - 1) & (LCS_NUM_BUFFS - 1); - next = (index + 1) & (LCS_NUM_BUFFS - 1); - /* Set the suspend bit and clear the PCI bit of this buffer. */ - channel->ccws[index].flags |= CCW_FLAG_SUSPEND; - channel->ccws[index].flags &= ~CCW_FLAG_PCI; - /* Check the suspend bit of the previous buffer. */ - if (channel->iob[prev].state == LCS_BUF_STATE_READY) { - /* - * Previous buffer is in state ready. It might have - * happened in lcs_ready_buffer that the suspend bit - * has not been cleared to avoid an endless loop. - * Do it now. - */ - __lcs_ready_buffer_bits(channel, prev); - } - /* Clear PCI bit of next buffer. */ - channel->ccws[next].flags &= ~CCW_FLAG_PCI; - return __lcs_resume_channel(channel); -} - -/* - * Put a processed buffer back to state empty. - */ -static void -lcs_release_buffer(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - unsigned long flags; - - LCS_DBF_TEXT(5, trace, "relbuff"); - BUG_ON(buffer->state != LCS_BUF_STATE_LOCKED && - buffer->state != LCS_BUF_STATE_PROCESSED); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - buffer->state = LCS_BUF_STATE_EMPTY; - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); -} - -/* - * Get buffer for a lan command. - */ -static struct lcs_buffer * -lcs_get_lancmd(struct lcs_card *card, int count) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(4, trace, "getlncmd"); - /* Get buffer and wait if none is available. */ - wait_event(card->write.wait_q, - ((buffer = lcs_get_buffer(&card->write)) != NULL)); - count += sizeof(struct lcs_header); - *(__u16 *)(buffer->data + count) = 0; - buffer->count = count + sizeof(__u16); - buffer->callback = lcs_release_buffer; - cmd = (struct lcs_cmd *) buffer->data; - cmd->offset = count; - cmd->type = LCS_FRAME_TYPE_CONTROL; - cmd->slot = 0; - return buffer; -} - - -static void -lcs_get_reply(struct lcs_reply *reply) -{ - refcount_inc(&reply->refcnt); -} - -static void -lcs_put_reply(struct lcs_reply *reply) -{ - if (refcount_dec_and_test(&reply->refcnt)) - kfree(reply); -} - -static struct lcs_reply * -lcs_alloc_reply(struct lcs_cmd *cmd) -{ - struct lcs_reply *reply; - - LCS_DBF_TEXT(4, trace, "getreply"); - - reply = kzalloc(sizeof(struct lcs_reply), GFP_ATOMIC); - if (!reply) - return NULL; - refcount_set(&reply->refcnt, 1); - reply->sequence_no = cmd->sequence_no; - reply->received = 0; - reply->rc = 0; - init_waitqueue_head(&reply->wait_q); - - return reply; -} - -/* - * Notifier function for lancmd replies. Called from read irq. - */ -static void -lcs_notify_lancmd_waiters(struct lcs_card *card, struct lcs_cmd *cmd) -{ - struct list_head *l, *n; - struct lcs_reply *reply; - - LCS_DBF_TEXT(4, trace, "notiwait"); - spin_lock(&card->lock); - list_for_each_safe(l, n, &card->lancmd_waiters) { - reply = list_entry(l, struct lcs_reply, list); - if (reply->sequence_no == cmd->sequence_no) { - lcs_get_reply(reply); - list_del_init(&reply->list); - if (reply->callback != NULL) - reply->callback(card, cmd); - reply->received = 1; - reply->rc = cmd->return_code; - wake_up(&reply->wait_q); - lcs_put_reply(reply); - break; - } - } - spin_unlock(&card->lock); -} - -/* - * Emit buffer of a lan command. - */ -static void -lcs_lancmd_timeout(struct timer_list *t) -{ - struct lcs_reply *reply = from_timer(reply, t, timer); - struct lcs_reply *list_reply, *r; - unsigned long flags; - - LCS_DBF_TEXT(4, trace, "timeout"); - spin_lock_irqsave(&reply->card->lock, flags); - list_for_each_entry_safe(list_reply, r, - &reply->card->lancmd_waiters,list) { - if (reply == list_reply) { - lcs_get_reply(reply); - list_del_init(&reply->list); - spin_unlock_irqrestore(&reply->card->lock, flags); - reply->received = 1; - reply->rc = -ETIME; - wake_up(&reply->wait_q); - lcs_put_reply(reply); - return; - } - } - spin_unlock_irqrestore(&reply->card->lock, flags); -} - -static int -lcs_send_lancmd(struct lcs_card *card, struct lcs_buffer *buffer, - void (*reply_callback)(struct lcs_card *, struct lcs_cmd *)) -{ - struct lcs_reply *reply; - struct lcs_cmd *cmd; - unsigned long flags; - int rc; - - LCS_DBF_TEXT(4, trace, "sendcmd"); - cmd = (struct lcs_cmd *) buffer->data; - cmd->return_code = 0; - cmd->sequence_no = card->sequence_no++; - reply = lcs_alloc_reply(cmd); - if (!reply) - return -ENOMEM; - reply->callback = reply_callback; - reply->card = card; - spin_lock_irqsave(&card->lock, flags); - list_add_tail(&reply->list, &card->lancmd_waiters); - spin_unlock_irqrestore(&card->lock, flags); - - buffer->callback = lcs_release_buffer; - rc = lcs_ready_buffer(&card->write, buffer); - if (rc) - return rc; - timer_setup(&reply->timer, lcs_lancmd_timeout, 0); - mod_timer(&reply->timer, jiffies + HZ * card->lancmd_timeout); - wait_event(reply->wait_q, reply->received); - del_timer_sync(&reply->timer); - LCS_DBF_TEXT_(4, trace, "rc:%d",reply->rc); - rc = reply->rc; - lcs_put_reply(reply); - return rc ? -EIO : 0; -} - -/* - * LCS startup command - */ -static int -lcs_send_startup(struct lcs_card *card, __u8 initiator) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "startup"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_STARTUP; - cmd->initiator = initiator; - cmd->cmd.lcs_startup.buff_size = LCS_IOBUFFERSIZE; - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * LCS shutdown command - */ -static int -lcs_send_shutdown(struct lcs_card *card) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "shutdown"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_SHUTDOWN; - cmd->initiator = LCS_INITIATOR_TCPIP; - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * LCS lanstat command - */ -static void -__lcs_lanstat_cb(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(2, trace, "statcb"); - memcpy(card->mac, cmd->cmd.lcs_lanstat_cmd.mac_addr, LCS_MAC_LENGTH); -} - -static int -lcs_send_lanstat(struct lcs_card *card) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2,trace, "cmdstat"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - /* Setup lanstat command. */ - cmd->cmd_code = LCS_CMD_LANSTAT; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_std_cmd.lan_type = card->lan_type; - cmd->cmd.lcs_std_cmd.portno = card->portno; - return lcs_send_lancmd(card, buffer, __lcs_lanstat_cb); -} - -/* - * send stoplan command - */ -static int -lcs_send_stoplan(struct lcs_card *card, __u8 initiator) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmdstpln"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_STOPLAN; - cmd->initiator = initiator; - cmd->cmd.lcs_std_cmd.lan_type = card->lan_type; - cmd->cmd.lcs_std_cmd.portno = card->portno; - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * send startlan command - */ -static void -__lcs_send_startlan_cb(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(2, trace, "srtlancb"); - card->lan_type = cmd->cmd.lcs_std_cmd.lan_type; - card->portno = cmd->cmd.lcs_std_cmd.portno; -} - -static int -lcs_send_startlan(struct lcs_card *card, __u8 initiator) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmdstaln"); - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_STARTLAN; - cmd->initiator = initiator; - cmd->cmd.lcs_std_cmd.lan_type = card->lan_type; - cmd->cmd.lcs_std_cmd.portno = card->portno; - return lcs_send_lancmd(card, buffer, __lcs_send_startlan_cb); -} - -#ifdef CONFIG_IP_MULTICAST -/* - * send setipm command (Multicast) - */ -static int -lcs_send_setipm(struct lcs_card *card,struct lcs_ipm_list *ipm_list) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmdsetim"); - buffer = lcs_get_lancmd(card, LCS_MULTICAST_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_SETIPM; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_qipassist.lan_type = card->lan_type; - cmd->cmd.lcs_qipassist.portno = card->portno; - cmd->cmd.lcs_qipassist.version = 4; - cmd->cmd.lcs_qipassist.num_ip_pairs = 1; - memcpy(cmd->cmd.lcs_qipassist.lcs_ipass_ctlmsg.ip_mac_pair, - &ipm_list->ipm, sizeof (struct lcs_ip_mac_pair)); - LCS_DBF_TEXT_(2, trace, "%x",ipm_list->ipm.ip_addr); - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * send delipm command (Multicast) - */ -static int -lcs_send_delipm(struct lcs_card *card,struct lcs_ipm_list *ipm_list) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - - LCS_DBF_TEXT(2, trace, "cmddelim"); - buffer = lcs_get_lancmd(card, LCS_MULTICAST_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_DELIPM; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_qipassist.lan_type = card->lan_type; - cmd->cmd.lcs_qipassist.portno = card->portno; - cmd->cmd.lcs_qipassist.version = 4; - cmd->cmd.lcs_qipassist.num_ip_pairs = 1; - memcpy(cmd->cmd.lcs_qipassist.lcs_ipass_ctlmsg.ip_mac_pair, - &ipm_list->ipm, sizeof (struct lcs_ip_mac_pair)); - LCS_DBF_TEXT_(2, trace, "%x",ipm_list->ipm.ip_addr); - return lcs_send_lancmd(card, buffer, NULL); -} - -/* - * check if multicast is supported by LCS - */ -static void -__lcs_check_multicast_cb(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(2, trace, "chkmccb"); - card->ip_assists_supported = - cmd->cmd.lcs_qipassist.ip_assists_supported; - card->ip_assists_enabled = - cmd->cmd.lcs_qipassist.ip_assists_enabled; -} - -static int -lcs_check_multicast_support(struct lcs_card *card) -{ - struct lcs_buffer *buffer; - struct lcs_cmd *cmd; - int rc; - - LCS_DBF_TEXT(2, trace, "cmdqipa"); - /* Send query ipassist. */ - buffer = lcs_get_lancmd(card, LCS_STD_CMD_SIZE); - cmd = (struct lcs_cmd *) buffer->data; - cmd->cmd_code = LCS_CMD_QIPASSIST; - cmd->initiator = LCS_INITIATOR_TCPIP; - cmd->cmd.lcs_qipassist.lan_type = card->lan_type; - cmd->cmd.lcs_qipassist.portno = card->portno; - cmd->cmd.lcs_qipassist.version = 4; - cmd->cmd.lcs_qipassist.num_ip_pairs = 1; - rc = lcs_send_lancmd(card, buffer, __lcs_check_multicast_cb); - if (rc != 0) { - pr_err("Query IPAssist failed. Assuming unsupported!\n"); - return -EOPNOTSUPP; - } - if (card->ip_assists_supported & LCS_IPASS_MULTICAST_SUPPORT) - return 0; - return -EOPNOTSUPP; -} - -/* - * set or del multicast address on LCS card - */ -static void -lcs_fix_multicast_list(struct lcs_card *card) -{ - struct list_head failed_list; - struct lcs_ipm_list *ipm, *tmp; - unsigned long flags; - int rc; - - LCS_DBF_TEXT(4,trace, "fixipm"); - INIT_LIST_HEAD(&failed_list); - spin_lock_irqsave(&card->ipm_lock, flags); -list_modified: - list_for_each_entry_safe(ipm, tmp, &card->ipm_list, list){ - switch (ipm->ipm_state) { - case LCS_IPM_STATE_SET_REQUIRED: - /* del from ipm_list so no one else can tamper with - * this entry */ - list_del_init(&ipm->list); - spin_unlock_irqrestore(&card->ipm_lock, flags); - rc = lcs_send_setipm(card, ipm); - spin_lock_irqsave(&card->ipm_lock, flags); - if (rc) { - pr_info("Adding multicast address failed." - " Table possibly full!\n"); - /* store ipm in failed list -> will be added - * to ipm_list again, so a retry will be done - * during the next call of this function */ - list_add_tail(&ipm->list, &failed_list); - } else { - ipm->ipm_state = LCS_IPM_STATE_ON_CARD; - /* re-insert into ipm_list */ - list_add_tail(&ipm->list, &card->ipm_list); - } - goto list_modified; - case LCS_IPM_STATE_DEL_REQUIRED: - list_del(&ipm->list); - spin_unlock_irqrestore(&card->ipm_lock, flags); - lcs_send_delipm(card, ipm); - spin_lock_irqsave(&card->ipm_lock, flags); - kfree(ipm); - goto list_modified; - case LCS_IPM_STATE_ON_CARD: - break; - } - } - /* re-insert all entries from the failed_list into ipm_list */ - list_for_each_entry_safe(ipm, tmp, &failed_list, list) - list_move_tail(&ipm->list, &card->ipm_list); - - spin_unlock_irqrestore(&card->ipm_lock, flags); -} - -/* - * get mac address for the relevant Multicast address - */ -static void -lcs_get_mac_for_ipm(__be32 ipm, char *mac, struct net_device *dev) -{ - LCS_DBF_TEXT(4,trace, "getmac"); - ip_eth_mc_map(ipm, mac); -} - -/* - * function called by net device to handle multicast address relevant things - */ -static void lcs_remove_mc_addresses(struct lcs_card *card, - struct in_device *in4_dev) -{ - struct ip_mc_list *im4; - struct list_head *l; - struct lcs_ipm_list *ipm; - unsigned long flags; - char buf[MAX_ADDR_LEN]; - - LCS_DBF_TEXT(4, trace, "remmclst"); - spin_lock_irqsave(&card->ipm_lock, flags); - list_for_each(l, &card->ipm_list) { - ipm = list_entry(l, struct lcs_ipm_list, list); - for (im4 = rcu_dereference(in4_dev->mc_list); - im4 != NULL; im4 = rcu_dereference(im4->next_rcu)) { - lcs_get_mac_for_ipm(im4->multiaddr, buf, card->dev); - if ( (ipm->ipm.ip_addr == im4->multiaddr) && - (memcmp(buf, &ipm->ipm.mac_addr, - LCS_MAC_LENGTH) == 0) ) - break; - } - if (im4 == NULL) - ipm->ipm_state = LCS_IPM_STATE_DEL_REQUIRED; - } - spin_unlock_irqrestore(&card->ipm_lock, flags); -} - -static struct lcs_ipm_list *lcs_check_addr_entry(struct lcs_card *card, - struct ip_mc_list *im4, - char *buf) -{ - struct lcs_ipm_list *tmp, *ipm = NULL; - struct list_head *l; - unsigned long flags; - - LCS_DBF_TEXT(4, trace, "chkmcent"); - spin_lock_irqsave(&card->ipm_lock, flags); - list_for_each(l, &card->ipm_list) { - tmp = list_entry(l, struct lcs_ipm_list, list); - if ( (tmp->ipm.ip_addr == im4->multiaddr) && - (memcmp(buf, &tmp->ipm.mac_addr, - LCS_MAC_LENGTH) == 0) ) { - ipm = tmp; - break; - } - } - spin_unlock_irqrestore(&card->ipm_lock, flags); - return ipm; -} - -static void lcs_set_mc_addresses(struct lcs_card *card, - struct in_device *in4_dev) -{ - - struct ip_mc_list *im4; - struct lcs_ipm_list *ipm; - char buf[MAX_ADDR_LEN]; - unsigned long flags; - - LCS_DBF_TEXT(4, trace, "setmclst"); - for (im4 = rcu_dereference(in4_dev->mc_list); im4 != NULL; - im4 = rcu_dereference(im4->next_rcu)) { - lcs_get_mac_for_ipm(im4->multiaddr, buf, card->dev); - ipm = lcs_check_addr_entry(card, im4, buf); - if (ipm != NULL) - continue; /* Address already in list. */ - ipm = kzalloc(sizeof(struct lcs_ipm_list), GFP_ATOMIC); - if (ipm == NULL) { - pr_info("Not enough memory to add" - " new multicast entry!\n"); - break; - } - memcpy(&ipm->ipm.mac_addr, buf, LCS_MAC_LENGTH); - ipm->ipm.ip_addr = im4->multiaddr; - ipm->ipm_state = LCS_IPM_STATE_SET_REQUIRED; - spin_lock_irqsave(&card->ipm_lock, flags); - LCS_DBF_HEX(2,trace,&ipm->ipm.ip_addr,4); - list_add(&ipm->list, &card->ipm_list); - spin_unlock_irqrestore(&card->ipm_lock, flags); - } -} - -static int -lcs_register_mc_addresses(void *data) -{ - struct lcs_card *card; - struct in_device *in4_dev; - - card = (struct lcs_card *) data; - - if (!lcs_do_run_thread(card, LCS_SET_MC_THREAD)) - return 0; - LCS_DBF_TEXT(4, trace, "regmulti"); - - in4_dev = in_dev_get(card->dev); - if (in4_dev == NULL) - goto out; - rcu_read_lock(); - lcs_remove_mc_addresses(card,in4_dev); - lcs_set_mc_addresses(card, in4_dev); - rcu_read_unlock(); - in_dev_put(in4_dev); - - netif_carrier_off(card->dev); - netif_tx_disable(card->dev); - wait_event(card->write.wait_q, - (card->write.state != LCS_CH_STATE_RUNNING)); - lcs_fix_multicast_list(card); - if (card->state == DEV_STATE_UP) { - netif_carrier_on(card->dev); - netif_wake_queue(card->dev); - } -out: - lcs_clear_thread_running_bit(card, LCS_SET_MC_THREAD); - return 0; -} -#endif /* CONFIG_IP_MULTICAST */ - -/* - * function called by net device to - * handle multicast address relevant things - */ -static void -lcs_set_multicast_list(struct net_device *dev) -{ -#ifdef CONFIG_IP_MULTICAST - struct lcs_card *card; - - LCS_DBF_TEXT(4, trace, "setmulti"); - card = (struct lcs_card *) dev->ml_priv; - - if (!lcs_set_thread_start_bit(card, LCS_SET_MC_THREAD)) - schedule_work(&card->kernel_thread_starter); -#endif /* CONFIG_IP_MULTICAST */ -} - -static long -lcs_check_irb_error(struct ccw_device *cdev, struct irb *irb) -{ - if (!IS_ERR(irb)) - return 0; - - switch (PTR_ERR(irb)) { - case -EIO: - dev_warn(&cdev->dev, - "An I/O-error occurred on the LCS device\n"); - LCS_DBF_TEXT(2, trace, "ckirberr"); - LCS_DBF_TEXT_(2, trace, " rc%d", -EIO); - break; - case -ETIMEDOUT: - dev_warn(&cdev->dev, - "A command timed out on the LCS device\n"); - LCS_DBF_TEXT(2, trace, "ckirberr"); - LCS_DBF_TEXT_(2, trace, " rc%d", -ETIMEDOUT); - break; - default: - dev_warn(&cdev->dev, - "An error occurred on the LCS device, rc=%ld\n", - PTR_ERR(irb)); - LCS_DBF_TEXT(2, trace, "ckirberr"); - LCS_DBF_TEXT(2, trace, " rc???"); - } - return PTR_ERR(irb); -} - -static int -lcs_get_problem(struct ccw_device *cdev, struct irb *irb) -{ - int dstat, cstat; - char *sense; - - sense = (char *) irb->ecw; - cstat = irb->scsw.cmd.cstat; - dstat = irb->scsw.cmd.dstat; - - if (cstat & (SCHN_STAT_CHN_CTRL_CHK | SCHN_STAT_INTF_CTRL_CHK | - SCHN_STAT_CHN_DATA_CHK | SCHN_STAT_CHAIN_CHECK | - SCHN_STAT_PROT_CHECK | SCHN_STAT_PROG_CHECK)) { - LCS_DBF_TEXT(2, trace, "CGENCHK"); - return 1; - } - if (dstat & DEV_STAT_UNIT_CHECK) { - if (sense[LCS_SENSE_BYTE_1] & - LCS_SENSE_RESETTING_EVENT) { - LCS_DBF_TEXT(2, trace, "REVIND"); - return 1; - } - if (sense[LCS_SENSE_BYTE_0] & - LCS_SENSE_CMD_REJECT) { - LCS_DBF_TEXT(2, trace, "CMDREJ"); - return 0; - } - if ((!sense[LCS_SENSE_BYTE_0]) && - (!sense[LCS_SENSE_BYTE_1]) && - (!sense[LCS_SENSE_BYTE_2]) && - (!sense[LCS_SENSE_BYTE_3])) { - LCS_DBF_TEXT(2, trace, "ZEROSEN"); - return 0; - } - LCS_DBF_TEXT(2, trace, "DGENCHK"); - return 1; - } - return 0; -} - -static void -lcs_schedule_recovery(struct lcs_card *card) -{ - LCS_DBF_TEXT(2, trace, "startrec"); - if (!lcs_set_thread_start_bit(card, LCS_RECOVERY_THREAD)) - schedule_work(&card->kernel_thread_starter); -} - -/* - * IRQ Handler for LCS channels - */ -static void -lcs_irq(struct ccw_device *cdev, unsigned long intparm, struct irb *irb) -{ - struct lcs_card *card; - struct lcs_channel *channel; - int rc, index; - int cstat, dstat; - - if (lcs_check_irb_error(cdev, irb)) - return; - - card = CARD_FROM_DEV(cdev); - if (card->read.ccwdev == cdev) - channel = &card->read; - else - channel = &card->write; - - cstat = irb->scsw.cmd.cstat; - dstat = irb->scsw.cmd.dstat; - LCS_DBF_TEXT_(5, trace, "Rint%s", dev_name(&cdev->dev)); - LCS_DBF_TEXT_(5, trace, "%4x%4x", irb->scsw.cmd.cstat, - irb->scsw.cmd.dstat); - LCS_DBF_TEXT_(5, trace, "%4x%4x", irb->scsw.cmd.fctl, - irb->scsw.cmd.actl); - - /* Check for channel and device errors presented */ - rc = lcs_get_problem(cdev, irb); - if (rc || (dstat & DEV_STAT_UNIT_EXCEP)) { - dev_warn(&cdev->dev, - "The LCS device stopped because of an error," - " dstat=0x%X, cstat=0x%X \n", - dstat, cstat); - if (rc) { - channel->state = LCS_CH_STATE_ERROR; - } - } - if (channel->state == LCS_CH_STATE_ERROR) { - lcs_schedule_recovery(card); - wake_up(&card->wait_q); - return; - } - /* How far in the ccw chain have we processed? */ - if ((channel->state != LCS_CH_STATE_INIT) && - (irb->scsw.cmd.fctl & SCSW_FCTL_START_FUNC) && - (irb->scsw.cmd.cpa != 0)) { - index = (struct ccw1 *)dma32_to_virt(irb->scsw.cmd.cpa) - - channel->ccws; - if ((irb->scsw.cmd.actl & SCSW_ACTL_SUSPENDED) || - (irb->scsw.cmd.cstat & SCHN_STAT_PCI)) - /* Bloody io subsystem tells us lies about cpa... */ - index = (index - 1) & (LCS_NUM_BUFFS - 1); - while (channel->io_idx != index) { - __lcs_processed_buffer(channel, - channel->iob + channel->io_idx); - channel->io_idx = - (channel->io_idx + 1) & (LCS_NUM_BUFFS - 1); - } - } - - if ((irb->scsw.cmd.dstat & DEV_STAT_DEV_END) || - (irb->scsw.cmd.dstat & DEV_STAT_CHN_END) || - (irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK)) - /* Mark channel as stopped. */ - channel->state = LCS_CH_STATE_STOPPED; - else if (irb->scsw.cmd.actl & SCSW_ACTL_SUSPENDED) - /* CCW execution stopped on a suspend bit. */ - channel->state = LCS_CH_STATE_SUSPENDED; - if (irb->scsw.cmd.fctl & SCSW_FCTL_HALT_FUNC) { - if (irb->scsw.cmd.cc != 0) { - ccw_device_halt(channel->ccwdev, 0); - return; - } - /* The channel has been stopped by halt_IO. */ - channel->state = LCS_CH_STATE_HALTED; - } - if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC) - channel->state = LCS_CH_STATE_CLEARED; - /* Do the rest in the tasklet. */ - tasklet_schedule(&channel->irq_tasklet); -} - -/* - * Tasklet for IRQ handler - */ -static void -lcs_tasklet(unsigned long data) -{ - unsigned long flags; - struct lcs_channel *channel; - struct lcs_buffer *iob; - int buf_idx; - - channel = (struct lcs_channel *) data; - LCS_DBF_TEXT_(5, trace, "tlet%s", dev_name(&channel->ccwdev->dev)); - - /* Check for processed buffers. */ - iob = channel->iob; - buf_idx = channel->buf_idx; - while (iob[buf_idx].state == LCS_BUF_STATE_PROCESSED) { - /* Do the callback thing. */ - if (iob[buf_idx].callback != NULL) - iob[buf_idx].callback(channel, iob + buf_idx); - buf_idx = (buf_idx + 1) & (LCS_NUM_BUFFS - 1); - } - channel->buf_idx = buf_idx; - - if (channel->state == LCS_CH_STATE_STOPPED) - lcs_start_channel(channel); - spin_lock_irqsave(get_ccwdev_lock(channel->ccwdev), flags); - if (channel->state == LCS_CH_STATE_SUSPENDED && - channel->iob[channel->io_idx].state == LCS_BUF_STATE_READY) - __lcs_resume_channel(channel); - spin_unlock_irqrestore(get_ccwdev_lock(channel->ccwdev), flags); - - /* Something happened on the channel. Wake up waiters. */ - wake_up(&channel->wait_q); -} - -/* - * Finish current tx buffer and make it ready for transmit. - */ -static void -__lcs_emit_txbuffer(struct lcs_card *card) -{ - LCS_DBF_TEXT(5, trace, "emittx"); - *(__u16 *)(card->tx_buffer->data + card->tx_buffer->count) = 0; - card->tx_buffer->count += 2; - lcs_ready_buffer(&card->write, card->tx_buffer); - card->tx_buffer = NULL; - card->tx_emitted++; -} - -/* - * Callback for finished tx buffers. - */ -static void -lcs_txbuffer_cb(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - struct lcs_card *card; - - LCS_DBF_TEXT(5, trace, "txbuffcb"); - /* Put buffer back to pool. */ - lcs_release_buffer(channel, buffer); - card = container_of(channel, struct lcs_card, write); - if (netif_queue_stopped(card->dev) && netif_carrier_ok(card->dev)) - netif_wake_queue(card->dev); - spin_lock(&card->lock); - card->tx_emitted--; - if (card->tx_emitted <= 0 && card->tx_buffer != NULL) - /* - * Last running tx buffer has finished. Submit partially - * filled current buffer. - */ - __lcs_emit_txbuffer(card); - spin_unlock(&card->lock); -} - -/* - * Packet transmit function called by network stack - */ -static netdev_tx_t __lcs_start_xmit(struct lcs_card *card, struct sk_buff *skb, - struct net_device *dev) -{ - struct lcs_header *header; - int rc = NETDEV_TX_OK; - - LCS_DBF_TEXT(5, trace, "hardxmit"); - if (skb == NULL) { - card->stats.tx_dropped++; - card->stats.tx_errors++; - return NETDEV_TX_OK; - } - if (card->state != DEV_STATE_UP) { - dev_kfree_skb(skb); - card->stats.tx_dropped++; - card->stats.tx_errors++; - card->stats.tx_carrier_errors++; - return NETDEV_TX_OK; - } - if (skb->protocol == htons(ETH_P_IPV6)) { - dev_kfree_skb(skb); - return NETDEV_TX_OK; - } - netif_stop_queue(card->dev); - spin_lock(&card->lock); - if (card->tx_buffer != NULL && - card->tx_buffer->count + sizeof(struct lcs_header) + - skb->len + sizeof(u16) > LCS_IOBUFFERSIZE) - /* skb too big for current tx buffer. */ - __lcs_emit_txbuffer(card); - if (card->tx_buffer == NULL) { - /* Get new tx buffer */ - card->tx_buffer = lcs_get_buffer(&card->write); - if (card->tx_buffer == NULL) { - card->stats.tx_dropped++; - rc = NETDEV_TX_BUSY; - goto out; - } - card->tx_buffer->callback = lcs_txbuffer_cb; - card->tx_buffer->count = 0; - } - header = (struct lcs_header *) - (card->tx_buffer->data + card->tx_buffer->count); - card->tx_buffer->count += skb->len + sizeof(struct lcs_header); - header->offset = card->tx_buffer->count; - header->type = card->lan_type; - header->slot = card->portno; - skb_copy_from_linear_data(skb, header + 1, skb->len); - spin_unlock(&card->lock); - card->stats.tx_bytes += skb->len; - card->stats.tx_packets++; - dev_kfree_skb(skb); - netif_wake_queue(card->dev); - spin_lock(&card->lock); - if (card->tx_emitted <= 0 && card->tx_buffer != NULL) - /* If this is the first tx buffer emit it immediately. */ - __lcs_emit_txbuffer(card); -out: - spin_unlock(&card->lock); - return rc; -} - -static netdev_tx_t lcs_start_xmit(struct sk_buff *skb, struct net_device *dev) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(5, trace, "pktxmit"); - card = (struct lcs_card *) dev->ml_priv; - rc = __lcs_start_xmit(card, skb, dev); - return rc; -} - -/* - * send startlan and lanstat command to make LCS device ready - */ -static int -lcs_startlan_auto(struct lcs_card *card) -{ - int rc; - - LCS_DBF_TEXT(2, trace, "strtauto"); - card->lan_type = LCS_FRAME_TYPE_ENET; - rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP); - if (rc == 0) - return 0; - - return -EIO; -} - -static int -lcs_startlan(struct lcs_card *card) -{ - int rc, i; - - LCS_DBF_TEXT(2, trace, "startlan"); - rc = 0; - if (card->portno != LCS_INVALID_PORT_NO) { - if (card->lan_type == LCS_FRAME_TYPE_AUTO) - rc = lcs_startlan_auto(card); - else - rc = lcs_send_startlan(card, LCS_INITIATOR_TCPIP); - } else { - for (i = 0; i <= 16; i++) { - card->portno = i; - if (card->lan_type != LCS_FRAME_TYPE_AUTO) - rc = lcs_send_startlan(card, - LCS_INITIATOR_TCPIP); - else - /* autodetecting lan type */ - rc = lcs_startlan_auto(card); - if (rc == 0) - break; - } - } - if (rc == 0) - return lcs_send_lanstat(card); - return rc; -} - -/* - * LCS detect function - * setup channels and make them I/O ready - */ -static int -lcs_detect(struct lcs_card *card) -{ - int rc = 0; - - LCS_DBF_TEXT(2, setup, "lcsdetct"); - /* start/reset card */ - if (card->dev) - netif_stop_queue(card->dev); - rc = lcs_stop_channels(card); - if (rc == 0) { - rc = lcs_start_channels(card); - if (rc == 0) { - rc = lcs_send_startup(card, LCS_INITIATOR_TCPIP); - if (rc == 0) - rc = lcs_startlan(card); - } - } - if (rc == 0) { - card->state = DEV_STATE_UP; - } else { - card->state = DEV_STATE_DOWN; - card->write.state = LCS_CH_STATE_INIT; - card->read.state = LCS_CH_STATE_INIT; - } - return rc; -} - -/* - * LCS Stop card - */ -static int -lcs_stopcard(struct lcs_card *card) -{ - int rc; - - LCS_DBF_TEXT(3, setup, "stopcard"); - - if (card->read.state != LCS_CH_STATE_STOPPED && - card->write.state != LCS_CH_STATE_STOPPED && - card->read.state != LCS_CH_STATE_ERROR && - card->write.state != LCS_CH_STATE_ERROR && - card->state == DEV_STATE_UP) { - lcs_clear_multicast_list(card); - rc = lcs_send_stoplan(card,LCS_INITIATOR_TCPIP); - rc = lcs_send_shutdown(card); - } - rc = lcs_stop_channels(card); - card->state = DEV_STATE_DOWN; - - return rc; -} - -/* - * Kernel Thread helper functions for LGW initiated commands - */ -static void -lcs_start_kernel_thread(struct work_struct *work) -{ - struct lcs_card *card = container_of(work, struct lcs_card, kernel_thread_starter); - LCS_DBF_TEXT(5, trace, "krnthrd"); - if (lcs_do_start_thread(card, LCS_RECOVERY_THREAD)) - kthread_run(lcs_recovery, card, "lcs_recover"); -#ifdef CONFIG_IP_MULTICAST - if (lcs_do_start_thread(card, LCS_SET_MC_THREAD)) - kthread_run(lcs_register_mc_addresses, card, "regipm"); -#endif -} - -/* - * Process control frames. - */ -static void -lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd) -{ - LCS_DBF_TEXT(5, trace, "getctrl"); - if (cmd->initiator == LCS_INITIATOR_LGW) { - switch(cmd->cmd_code) { - case LCS_CMD_STARTUP: - case LCS_CMD_STARTLAN: - lcs_schedule_recovery(card); - break; - case LCS_CMD_STOPLAN: - if (card->dev) { - pr_warn("Stoplan for %s initiated by LGW\n", - card->dev->name); - netif_carrier_off(card->dev); - } - break; - default: - LCS_DBF_TEXT(5, trace, "noLGWcmd"); - break; - } - } else - lcs_notify_lancmd_waiters(card, cmd); -} - -/* - * Unpack network packet. - */ -static void -lcs_get_skb(struct lcs_card *card, char *skb_data, unsigned int skb_len) -{ - struct sk_buff *skb; - - LCS_DBF_TEXT(5, trace, "getskb"); - if (card->dev == NULL || - card->state != DEV_STATE_UP) - /* The card isn't up. Ignore the packet. */ - return; - - skb = dev_alloc_skb(skb_len); - if (skb == NULL) { - dev_err(&card->dev->dev, - " Allocating a socket buffer to interface %s failed\n", - card->dev->name); - card->stats.rx_dropped++; - return; - } - skb_put_data(skb, skb_data, skb_len); - skb->protocol = card->lan_type_trans(skb, card->dev); - card->stats.rx_bytes += skb_len; - card->stats.rx_packets++; - if (skb->protocol == htons(ETH_P_802_2)) - *((__u32 *)skb->cb) = ++card->pkt_seq; - netif_rx(skb); -} - -/* - * LCS main routine to get packets and lancmd replies from the buffers - */ -static void -lcs_get_frames_cb(struct lcs_channel *channel, struct lcs_buffer *buffer) -{ - struct lcs_card *card; - struct lcs_header *lcs_hdr; - __u16 offset; - - LCS_DBF_TEXT(5, trace, "lcsgtpkt"); - lcs_hdr = (struct lcs_header *) buffer->data; - if (lcs_hdr->offset == LCS_ILLEGAL_OFFSET) { - LCS_DBF_TEXT(4, trace, "-eiogpkt"); - return; - } - card = container_of(channel, struct lcs_card, read); - offset = 0; - while (lcs_hdr->offset != 0) { - if (lcs_hdr->offset <= 0 || - lcs_hdr->offset > LCS_IOBUFFERSIZE || - lcs_hdr->offset < offset) { - /* Offset invalid. */ - card->stats.rx_length_errors++; - card->stats.rx_errors++; - return; - } - if (lcs_hdr->type == LCS_FRAME_TYPE_CONTROL) - lcs_get_control(card, (struct lcs_cmd *) lcs_hdr); - else if (lcs_hdr->type == LCS_FRAME_TYPE_ENET) - lcs_get_skb(card, (char *)(lcs_hdr + 1), - lcs_hdr->offset - offset - - sizeof(struct lcs_header)); - else - dev_info_once(&card->dev->dev, - "Unknown frame type %d\n", - lcs_hdr->type); - offset = lcs_hdr->offset; - lcs_hdr->offset = LCS_ILLEGAL_OFFSET; - lcs_hdr = (struct lcs_header *) (buffer->data + offset); - } - /* The buffer is now empty. Make it ready again. */ - lcs_ready_buffer(&card->read, buffer); -} - -/* - * get network statistics for ifconfig and other user programs - */ -static struct net_device_stats * -lcs_getstats(struct net_device *dev) -{ - struct lcs_card *card; - - LCS_DBF_TEXT(4, trace, "netstats"); - card = (struct lcs_card *) dev->ml_priv; - return &card->stats; -} - -/* - * stop lcs device - * This function will be called by user doing ifconfig xxx down - */ -static int -lcs_stop_device(struct net_device *dev) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(2, trace, "stopdev"); - card = (struct lcs_card *) dev->ml_priv; - netif_carrier_off(dev); - netif_tx_disable(dev); - dev->flags &= ~IFF_UP; - wait_event(card->write.wait_q, - (card->write.state != LCS_CH_STATE_RUNNING)); - rc = lcs_stopcard(card); - if (rc) - dev_err(&card->dev->dev, - " Shutting down the LCS device failed\n"); - return rc; -} - -/* - * start lcs device and make it runnable - * This function will be called by user doing ifconfig xxx up - */ -static int -lcs_open_device(struct net_device *dev) -{ - struct lcs_card *card; - int rc; - - LCS_DBF_TEXT(2, trace, "opendev"); - card = (struct lcs_card *) dev->ml_priv; - /* initialize statistics */ - rc = lcs_detect(card); - if (rc) { - pr_err("Error in opening device!\n"); - - } else { - dev->flags |= IFF_UP; - netif_carrier_on(dev); - netif_wake_queue(dev); - card->state = DEV_STATE_UP; - } - return rc; -} - -/* - * show function for portno called by cat or similar things - */ -static ssize_t -lcs_portno_show (struct device *dev, struct device_attribute *attr, char *buf) -{ - struct lcs_card *card; - - card = dev_get_drvdata(dev); - - if (!card) - return 0; - - return sysfs_emit(buf, "%d\n", card->portno); -} - -/* - * store the value which is piped to file portno - */ -static ssize_t -lcs_portno_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct lcs_card *card; - int rc; - s16 value; - - card = dev_get_drvdata(dev); - - if (!card) - return 0; - - rc = kstrtos16(buf, 0, &value); - if (rc) - return -EINVAL; - /* TODO: sanity checks */ - card->portno = value; - if (card->dev) - card->dev->dev_port = card->portno; - - return count; - -} - -static DEVICE_ATTR(portno, 0644, lcs_portno_show, lcs_portno_store); - -static const char *lcs_type[] = { - "not a channel", - "2216 parallel", - "2216 channel", - "OSA LCS card", - "unknown channel type", - "unsupported channel type", -}; - -static ssize_t -lcs_type_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct ccwgroup_device *cgdev; - - cgdev = to_ccwgroupdev(dev); - if (!cgdev) - return -ENODEV; - - return sysfs_emit(buf, "%s\n", - lcs_type[cgdev->cdev[0]->id.driver_info]); -} - -static DEVICE_ATTR(type, 0444, lcs_type_show, NULL); - -static ssize_t -lcs_timeout_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct lcs_card *card; - - card = dev_get_drvdata(dev); - - return card ? sysfs_emit(buf, "%u\n", card->lancmd_timeout) : 0; -} - -static ssize_t -lcs_timeout_store (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) -{ - struct lcs_card *card; - unsigned int value; - int rc; - - card = dev_get_drvdata(dev); - - if (!card) - return 0; - - rc = kstrtouint(buf, 0, &value); - if (rc) - return -EINVAL; - /* TODO: sanity checks */ - card->lancmd_timeout = value; - - return count; - -} - -static DEVICE_ATTR(lancmd_timeout, 0644, lcs_timeout_show, lcs_timeout_store); - -static ssize_t -lcs_dev_recover_store(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct lcs_card *card = dev_get_drvdata(dev); - char *tmp; - int i; - - if (!card) - return -EINVAL; - if (card->state != DEV_STATE_UP) - return -EPERM; - i = simple_strtoul(buf, &tmp, 16); - if (i == 1) - lcs_schedule_recovery(card); - return count; -} - -static DEVICE_ATTR(recover, 0200, NULL, lcs_dev_recover_store); - -static struct attribute * lcs_attrs[] = { - &dev_attr_portno.attr, - &dev_attr_type.attr, - &dev_attr_lancmd_timeout.attr, - &dev_attr_recover.attr, - NULL, -}; -static struct attribute_group lcs_attr_group = { - .attrs = lcs_attrs, -}; -static const struct attribute_group *lcs_attr_groups[] = { - &lcs_attr_group, - NULL, -}; -static const struct device_type lcs_devtype = { - .name = "lcs", - .groups = lcs_attr_groups, -}; - -/* - * lcs_probe_device is called on establishing a new ccwgroup_device. - */ -static int -lcs_probe_device(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - - if (!get_device(&ccwgdev->dev)) - return -ENODEV; - - LCS_DBF_TEXT(2, setup, "add_dev"); - card = lcs_alloc_card(); - if (!card) { - LCS_DBF_TEXT_(2, setup, " rc%d", -ENOMEM); - put_device(&ccwgdev->dev); - return -ENOMEM; - } - dev_set_drvdata(&ccwgdev->dev, card); - ccwgdev->cdev[0]->handler = lcs_irq; - ccwgdev->cdev[1]->handler = lcs_irq; - card->gdev = ccwgdev; - INIT_WORK(&card->kernel_thread_starter, lcs_start_kernel_thread); - card->thread_start_mask = 0; - card->thread_allowed_mask = 0; - card->thread_running_mask = 0; - ccwgdev->dev.type = &lcs_devtype; - - return 0; -} - -static int -lcs_register_netdev(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - - LCS_DBF_TEXT(2, setup, "regnetdv"); - card = dev_get_drvdata(&ccwgdev->dev); - if (card->dev->reg_state != NETREG_UNINITIALIZED) - return 0; - SET_NETDEV_DEV(card->dev, &ccwgdev->dev); - return register_netdev(card->dev); -} - -/* - * lcs_new_device will be called by setting the group device online. - */ -static const struct net_device_ops lcs_netdev_ops = { - .ndo_open = lcs_open_device, - .ndo_stop = lcs_stop_device, - .ndo_get_stats = lcs_getstats, - .ndo_start_xmit = lcs_start_xmit, -}; - -static const struct net_device_ops lcs_mc_netdev_ops = { - .ndo_open = lcs_open_device, - .ndo_stop = lcs_stop_device, - .ndo_get_stats = lcs_getstats, - .ndo_start_xmit = lcs_start_xmit, - .ndo_set_rx_mode = lcs_set_multicast_list, -}; - -static int -lcs_new_device(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - struct net_device *dev=NULL; - enum lcs_dev_states recover_state; - int rc; - - card = dev_get_drvdata(&ccwgdev->dev); - if (!card) - return -ENODEV; - - LCS_DBF_TEXT(2, setup, "newdev"); - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - card->read.ccwdev = ccwgdev->cdev[0]; - card->write.ccwdev = ccwgdev->cdev[1]; - - recover_state = card->state; - rc = ccw_device_set_online(card->read.ccwdev); - if (rc) - goto out_err; - rc = ccw_device_set_online(card->write.ccwdev); - if (rc) - goto out_werr; - - LCS_DBF_TEXT(3, setup, "lcsnewdv"); - - lcs_setup_card(card); - rc = lcs_detect(card); - if (rc) { - LCS_DBF_TEXT(2, setup, "dtctfail"); - dev_err(&ccwgdev->dev, - "Detecting a network adapter for LCS devices" - " failed with rc=%d (0x%x)\n", rc, rc); - lcs_stopcard(card); - goto out; - } - if (card->dev) { - LCS_DBF_TEXT(2, setup, "samedev"); - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - goto netdev_out; - } - switch (card->lan_type) { - case LCS_FRAME_TYPE_ENET: - card->lan_type_trans = eth_type_trans; - dev = alloc_etherdev(0); - break; - default: - LCS_DBF_TEXT(3, setup, "errinit"); - pr_err(" Initialization failed\n"); - goto out; - } - if (!dev) - goto out; - card->dev = dev; - card->dev->ml_priv = card; - card->dev->netdev_ops = &lcs_netdev_ops; - card->dev->dev_port = card->portno; - eth_hw_addr_set(card->dev, card->mac); -#ifdef CONFIG_IP_MULTICAST - if (!lcs_check_multicast_support(card)) - card->dev->netdev_ops = &lcs_mc_netdev_ops; -#endif -netdev_out: - lcs_set_allowed_threads(card,0xffffffff); - if (recover_state == DEV_STATE_RECOVER) { - lcs_set_multicast_list(card->dev); - card->dev->flags |= IFF_UP; - netif_carrier_on(card->dev); - netif_wake_queue(card->dev); - card->state = DEV_STATE_UP; - } else { - lcs_stopcard(card); - } - - if (lcs_register_netdev(ccwgdev) != 0) - goto out; - - /* Print out supported assists: IPv6 */ - pr_info("LCS device %s %s IPv6 support\n", card->dev->name, - (card->ip_assists_supported & LCS_IPASS_IPV6_SUPPORT) ? - "with" : "without"); - /* Print out supported assist: Multicast */ - pr_info("LCS device %s %s Multicast support\n", card->dev->name, - (card->ip_assists_supported & LCS_IPASS_MULTICAST_SUPPORT) ? - "with" : "without"); - return 0; -out: - - ccw_device_set_offline(card->write.ccwdev); -out_werr: - ccw_device_set_offline(card->read.ccwdev); -out_err: - return -ENODEV; -} - -/* - * lcs_shutdown_device, called when setting the group device offline. - */ -static int -__lcs_shutdown_device(struct ccwgroup_device *ccwgdev, int recovery_mode) -{ - struct lcs_card *card; - enum lcs_dev_states recover_state; - int ret = 0, ret2 = 0, ret3 = 0; - - LCS_DBF_TEXT(3, setup, "shtdndev"); - card = dev_get_drvdata(&ccwgdev->dev); - if (!card) - return -ENODEV; - if (recovery_mode == 0) { - lcs_set_allowed_threads(card, 0); - if (lcs_wait_for_threads(card, LCS_SET_MC_THREAD)) - return -ERESTARTSYS; - } - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - recover_state = card->state; - - ret = lcs_stop_device(card->dev); - ret2 = ccw_device_set_offline(card->read.ccwdev); - ret3 = ccw_device_set_offline(card->write.ccwdev); - if (!ret) - ret = (ret2) ? ret2 : ret3; - if (ret) - LCS_DBF_TEXT_(3, setup, "1err:%d", ret); - if (recover_state == DEV_STATE_UP) { - card->state = DEV_STATE_RECOVER; - } - return 0; -} - -static int -lcs_shutdown_device(struct ccwgroup_device *ccwgdev) -{ - return __lcs_shutdown_device(ccwgdev, 0); -} - -/* - * drive lcs recovery after startup and startlan initiated by Lan Gateway - */ -static int -lcs_recovery(void *ptr) -{ - struct lcs_card *card; - struct ccwgroup_device *gdev; - int rc; - - card = (struct lcs_card *) ptr; - - LCS_DBF_TEXT(4, trace, "recover1"); - if (!lcs_do_run_thread(card, LCS_RECOVERY_THREAD)) - return 0; - LCS_DBF_TEXT(4, trace, "recover2"); - gdev = card->gdev; - dev_warn(&gdev->dev, - "A recovery process has been started for the LCS device\n"); - rc = __lcs_shutdown_device(gdev, 1); - rc = lcs_new_device(gdev); - if (!rc) - pr_info("Device %s successfully recovered!\n", - card->dev->name); - else - pr_info("Device %s could not be recovered!\n", - card->dev->name); - lcs_clear_thread_running_bit(card, LCS_RECOVERY_THREAD); - return 0; -} - -/* - * lcs_remove_device, free buffers and card - */ -static void -lcs_remove_device(struct ccwgroup_device *ccwgdev) -{ - struct lcs_card *card; - - card = dev_get_drvdata(&ccwgdev->dev); - if (!card) - return; - - LCS_DBF_TEXT(3, setup, "remdev"); - LCS_DBF_HEX(3, setup, &card, sizeof(void*)); - if (ccwgdev->state == CCWGROUP_ONLINE) { - lcs_shutdown_device(ccwgdev); - } - if (card->dev) - unregister_netdev(card->dev); - lcs_cleanup_card(card); - lcs_free_card(card); - dev_set_drvdata(&ccwgdev->dev, NULL); - put_device(&ccwgdev->dev); -} - -static struct ccw_device_id lcs_ids[] = { - {CCW_DEVICE(0x3088, 0x08), .driver_info = lcs_channel_type_parallel}, - {CCW_DEVICE(0x3088, 0x1f), .driver_info = lcs_channel_type_2216}, - {CCW_DEVICE(0x3088, 0x60), .driver_info = lcs_channel_type_osa2}, - {}, -}; -MODULE_DEVICE_TABLE(ccw, lcs_ids); - -static struct ccw_driver lcs_ccw_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "lcs", - }, - .ids = lcs_ids, - .probe = ccwgroup_probe_ccwdev, - .remove = ccwgroup_remove_ccwdev, - .int_class = IRQIO_LCS, -}; - -/* - * LCS ccwgroup driver registration - */ -static struct ccwgroup_driver lcs_group_driver = { - .driver = { - .owner = THIS_MODULE, - .name = "lcs", - }, - .ccw_driver = &lcs_ccw_driver, - .setup = lcs_probe_device, - .remove = lcs_remove_device, - .set_online = lcs_new_device, - .set_offline = lcs_shutdown_device, -}; - -static ssize_t group_store(struct device_driver *ddrv, const char *buf, - size_t count) -{ - int err; - err = ccwgroup_create_dev(lcs_root_dev, &lcs_group_driver, 2, buf); - return err ? err : count; -} -static DRIVER_ATTR_WO(group); - -static struct attribute *lcs_drv_attrs[] = { - &driver_attr_group.attr, - NULL, -}; -static struct attribute_group lcs_drv_attr_group = { - .attrs = lcs_drv_attrs, -}; -static const struct attribute_group *lcs_drv_attr_groups[] = { - &lcs_drv_attr_group, - NULL, -}; - -/* - * LCS Module/Kernel initialization function - */ -static int -__init lcs_init_module(void) -{ - int rc; - - pr_info("Loading %s\n", version); - rc = lcs_register_debug_facility(); - LCS_DBF_TEXT(0, setup, "lcsinit"); - if (rc) - goto out_err; - lcs_root_dev = root_device_register("lcs"); - rc = PTR_ERR_OR_ZERO(lcs_root_dev); - if (rc) - goto register_err; - rc = ccw_driver_register(&lcs_ccw_driver); - if (rc) - goto ccw_err; - lcs_group_driver.driver.groups = lcs_drv_attr_groups; - rc = ccwgroup_driver_register(&lcs_group_driver); - if (rc) - goto ccwgroup_err; - return 0; - -ccwgroup_err: - ccw_driver_unregister(&lcs_ccw_driver); -ccw_err: - root_device_unregister(lcs_root_dev); -register_err: - lcs_unregister_debug_facility(); -out_err: - pr_err("Initializing the lcs device driver failed\n"); - return rc; -} - - -/* - * LCS module cleanup function - */ -static void -__exit lcs_cleanup_module(void) -{ - pr_info("Terminating lcs module.\n"); - LCS_DBF_TEXT(0, trace, "cleanup"); - ccwgroup_driver_unregister(&lcs_group_driver); - ccw_driver_unregister(&lcs_ccw_driver); - root_device_unregister(lcs_root_dev); - lcs_unregister_debug_facility(); -} - -module_init(lcs_init_module); -module_exit(lcs_cleanup_module); - -MODULE_AUTHOR("Frank Pavlic <fpavlic@de.ibm.com>"); -MODULE_DESCRIPTION("S/390 LAN channel station device driver"); -MODULE_LICENSE("GPL"); - diff --git a/drivers/s390/net/lcs.h b/drivers/s390/net/lcs.h deleted file mode 100644 index a2699b70b0502..0000000000000 --- a/drivers/s390/net/lcs.h +++ /dev/null @@ -1,342 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/*lcs.h*/ - -#include <linux/interrupt.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/workqueue.h> -#include <linux/refcount.h> -#include <asm/ccwdev.h> - -#define LCS_DBF_TEXT(level, name, text) \ - do { \ - debug_text_event(lcs_dbf_##name, level, text); \ - } while (0) - -#define LCS_DBF_HEX(level,name,addr,len) \ -do { \ - debug_event(lcs_dbf_##name,level,(void*)(addr),len); \ -} while (0) - -#define LCS_DBF_TEXT_(level,name,text...) \ - do { \ - if (debug_level_enabled(lcs_dbf_##name, level)) { \ - scnprintf(debug_buffer, sizeof(debug_buffer), text); \ - debug_text_event(lcs_dbf_##name, level, debug_buffer); \ - } \ - } while (0) - -/** - * sysfs related stuff - */ -#define CARD_FROM_DEV(cdev) \ - (struct lcs_card *) dev_get_drvdata( \ - &((struct ccwgroup_device *)dev_get_drvdata(&cdev->dev))->dev); - -/** - * Enum for classifying detected devices. - */ -enum lcs_channel_types { - /* Device is not a channel */ - lcs_channel_type_none, - - /* Device is a 2216 channel */ - lcs_channel_type_parallel, - - /* Device is a 2216 channel */ - lcs_channel_type_2216, - - /* Device is a OSA2 card */ - lcs_channel_type_osa2 -}; - -/** - * CCW commands used in this driver - */ -#define LCS_CCW_WRITE 0x01 -#define LCS_CCW_READ 0x02 -#define LCS_CCW_TRANSFER 0x08 - -/** - * LCS device status primitives - */ -#define LCS_CMD_STARTLAN 0x01 -#define LCS_CMD_STOPLAN 0x02 -#define LCS_CMD_LANSTAT 0x04 -#define LCS_CMD_STARTUP 0x07 -#define LCS_CMD_SHUTDOWN 0x08 -#define LCS_CMD_QIPASSIST 0xb2 -#define LCS_CMD_SETIPM 0xb4 -#define LCS_CMD_DELIPM 0xb5 - -#define LCS_INITIATOR_TCPIP 0x00 -#define LCS_INITIATOR_LGW 0x01 -#define LCS_STD_CMD_SIZE 16 -#define LCS_MULTICAST_CMD_SIZE 404 - -/** - * LCS IPASSIST MASKS,only used when multicast is switched on - */ -/* Not supported by LCS */ -#define LCS_IPASS_ARP_PROCESSING 0x0001 -#define LCS_IPASS_IN_CHECKSUM_SUPPORT 0x0002 -#define LCS_IPASS_OUT_CHECKSUM_SUPPORT 0x0004 -#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008 -#define LCS_IPASS_IP_FILTERING 0x0010 -/* Supported by lcs 3172 */ -#define LCS_IPASS_IPV6_SUPPORT 0x0020 -#define LCS_IPASS_MULTICAST_SUPPORT 0x0040 - -/** - * LCS sense byte definitions - */ -#define LCS_SENSE_BYTE_0 0 -#define LCS_SENSE_BYTE_1 1 -#define LCS_SENSE_BYTE_2 2 -#define LCS_SENSE_BYTE_3 3 -#define LCS_SENSE_INTERFACE_DISCONNECT 0x01 -#define LCS_SENSE_EQUIPMENT_CHECK 0x10 -#define LCS_SENSE_BUS_OUT_CHECK 0x20 -#define LCS_SENSE_INTERVENTION_REQUIRED 0x40 -#define LCS_SENSE_CMD_REJECT 0x80 -#define LCS_SENSE_RESETTING_EVENT 0x80 -#define LCS_SENSE_DEVICE_ONLINE 0x20 - -/** - * LCS packet type definitions - */ -#define LCS_FRAME_TYPE_CONTROL 0 -#define LCS_FRAME_TYPE_ENET 1 -#define LCS_FRAME_TYPE_TR 2 -#define LCS_FRAME_TYPE_FDDI 7 -#define LCS_FRAME_TYPE_AUTO -1 - -/** - * some more definitions,we will sort them later - */ -#define LCS_ILLEGAL_OFFSET 0xffff -#define LCS_IOBUFFERSIZE 0x5000 -#define LCS_NUM_BUFFS 32 /* needs to be power of 2 */ -#define LCS_MAC_LENGTH 6 -#define LCS_INVALID_PORT_NO -1 -#define LCS_LANCMD_TIMEOUT_DEFAULT 5 - -/** - * Multicast state - */ -#define LCS_IPM_STATE_SET_REQUIRED 0 -#define LCS_IPM_STATE_DEL_REQUIRED 1 -#define LCS_IPM_STATE_ON_CARD 2 - -/** - * LCS IP Assist declarations - * seems to be only used for multicast - */ -#define LCS_IPASS_ARP_PROCESSING 0x0001 -#define LCS_IPASS_INBOUND_CSUM_SUPP 0x0002 -#define LCS_IPASS_OUTBOUND_CSUM_SUPP 0x0004 -#define LCS_IPASS_IP_FRAG_REASSEMBLY 0x0008 -#define LCS_IPASS_IP_FILTERING 0x0010 -#define LCS_IPASS_IPV6_SUPPORT 0x0020 -#define LCS_IPASS_MULTICAST_SUPPORT 0x0040 - -/** - * LCS Buffer states - */ -enum lcs_buffer_states { - LCS_BUF_STATE_EMPTY, /* buffer is empty */ - LCS_BUF_STATE_LOCKED, /* buffer is locked, don't touch */ - LCS_BUF_STATE_READY, /* buffer is ready for read/write */ - LCS_BUF_STATE_PROCESSED, -}; - -/** - * LCS Channel State Machine declarations - */ -enum lcs_channel_states { - LCS_CH_STATE_INIT, - LCS_CH_STATE_HALTED, - LCS_CH_STATE_STOPPED, - LCS_CH_STATE_RUNNING, - LCS_CH_STATE_SUSPENDED, - LCS_CH_STATE_CLEARED, - LCS_CH_STATE_ERROR, -}; - -/** - * LCS device state machine - */ -enum lcs_dev_states { - DEV_STATE_DOWN, - DEV_STATE_UP, - DEV_STATE_RECOVER, -}; - -enum lcs_threads { - LCS_SET_MC_THREAD = 1, - LCS_RECOVERY_THREAD = 2, -}; - -/** - * LCS struct declarations - */ -struct lcs_header { - __u16 offset; - __u8 type; - __u8 slot; -} __attribute__ ((packed)); - -struct lcs_ip_mac_pair { - __be32 ip_addr; - __u8 mac_addr[LCS_MAC_LENGTH]; - __u8 reserved[2]; -} __attribute__ ((packed)); - -struct lcs_ipm_list { - struct list_head list; - struct lcs_ip_mac_pair ipm; - __u8 ipm_state; -}; - -struct lcs_cmd { - __u16 offset; - __u8 type; - __u8 slot; - __u8 cmd_code; - __u8 initiator; - __u16 sequence_no; - __u16 return_code; - union { - struct { - __u8 lan_type; - __u8 portno; - __u16 parameter_count; - __u8 operator_flags[3]; - __u8 reserved[3]; - } lcs_std_cmd; - struct { - __u16 unused1; - __u16 buff_size; - __u8 unused2[6]; - } lcs_startup; - struct { - __u8 lan_type; - __u8 portno; - __u8 unused[10]; - __u8 mac_addr[LCS_MAC_LENGTH]; - __u32 num_packets_deblocked; - __u32 num_packets_blocked; - __u32 num_packets_tx_on_lan; - __u32 num_tx_errors_detected; - __u32 num_tx_packets_disgarded; - __u32 num_packets_rx_from_lan; - __u32 num_rx_errors_detected; - __u32 num_rx_discarded_nobuffs_avail; - __u32 num_rx_packets_too_large; - } lcs_lanstat_cmd; -#ifdef CONFIG_IP_MULTICAST - struct { - __u8 lan_type; - __u8 portno; - __u16 num_ip_pairs; - __u16 ip_assists_supported; - __u16 ip_assists_enabled; - __u16 version; - struct { - struct lcs_ip_mac_pair - ip_mac_pair[32]; - __u32 response_data; - } lcs_ipass_ctlmsg __attribute ((packed)); - } lcs_qipassist __attribute__ ((packed)); -#endif /*CONFIG_IP_MULTICAST */ - } cmd __attribute__ ((packed)); -} __attribute__ ((packed)); - -/** - * Forward declarations. - */ -struct lcs_card; -struct lcs_channel; - -/** - * Definition of an lcs buffer. - */ -struct lcs_buffer { - enum lcs_buffer_states state; - void *data; - int count; - /* Callback for completion notification. */ - void (*callback)(struct lcs_channel *, struct lcs_buffer *); -}; - -struct lcs_reply { - struct list_head list; - __u16 sequence_no; - refcount_t refcnt; - /* Callback for completion notification. */ - void (*callback)(struct lcs_card *, struct lcs_cmd *); - wait_queue_head_t wait_q; - struct lcs_card *card; - struct timer_list timer; - int received; - int rc; -}; - -/** - * Definition of an lcs channel - */ -struct lcs_channel { - enum lcs_channel_states state; - struct ccw_device *ccwdev; - struct ccw1 ccws[LCS_NUM_BUFFS + 1]; - wait_queue_head_t wait_q; - struct tasklet_struct irq_tasklet; - struct lcs_buffer iob[LCS_NUM_BUFFS]; - int io_idx; - int buf_idx; -}; - - -/** - * definition of the lcs card - */ -struct lcs_card { - spinlock_t lock; - spinlock_t ipm_lock; - enum lcs_dev_states state; - struct net_device *dev; - struct net_device_stats stats; - __be16 (*lan_type_trans)(struct sk_buff *skb, - struct net_device *dev); - struct ccwgroup_device *gdev; - struct lcs_channel read; - struct lcs_channel write; - struct lcs_buffer *tx_buffer; - int tx_emitted; - struct list_head lancmd_waiters; - int lancmd_timeout; - - struct work_struct kernel_thread_starter; - spinlock_t mask_lock; - unsigned long thread_start_mask; - unsigned long thread_running_mask; - unsigned long thread_allowed_mask; - wait_queue_head_t wait_q; - -#ifdef CONFIG_IP_MULTICAST - struct list_head ipm_list; -#endif - __u8 mac[LCS_MAC_LENGTH]; - __u16 ip_assists_supported; - __u16 ip_assists_enabled; - __s8 lan_type; - __u32 pkt_seq; - __u16 sequence_no; - __s16 portno; - /* Some info copied from probeinfo */ - u8 device_forced; - u8 max_port_no; - u8 hint_port_no; - s16 port_protocol_no; -} __attribute__ ((aligned(8))); - diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 92071ca0655f0..3dc5a35dd19b3 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1496,6 +1496,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (!p->skip_locking) { btrfs_unlock_up_safe(p, parent_level + 1); + btrfs_maybe_reset_lockdep_class(root, tmp); tmp_locked = true; btrfs_tree_read_lock(tmp); btrfs_release_path(p); @@ -1539,6 +1540,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (!p->skip_locking) { ASSERT(ret == -EAGAIN); + btrfs_maybe_reset_lockdep_class(root, tmp); tmp_locked = true; btrfs_tree_read_lock(tmp); btrfs_release_path(p); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 30eceaf829a7e..4aca7475fd82c 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1229,6 +1229,18 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( */ if (WARN_ON_ONCE(len >= ordered->num_bytes)) return ERR_PTR(-EINVAL); + /* + * If our ordered extent had an error there's no point in continuing. + * The error may have come from a transaction abort done either by this + * task or some other concurrent task, and the transaction abort path + * iterates over all existing ordered extents and sets the flag + * BTRFS_ORDERED_IOERR on them. + */ + if (unlikely(flags & (1U << BTRFS_ORDERED_IOERR))) { + const int fs_error = BTRFS_FS_ERROR(fs_info); + + return fs_error ? ERR_PTR(fs_error) : ERR_PTR(-EIO); + } /* We cannot split partially completed ordered extents. */ if (ordered->bytes_left) { ASSERT(!(flags & ~BTRFS_ORDERED_TYPE_FLAGS)); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index b90fabe302e61..f9d3766c809b4 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1880,11 +1880,7 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su * Commit current transaction to make sure all the rfer/excl numbers * get updated. */ - trans = btrfs_start_transaction(fs_info->quota_root, 0); - if (IS_ERR(trans)) - return PTR_ERR(trans); - - ret = btrfs_commit_transaction(trans); + ret = btrfs_commit_current_transaction(fs_info->quota_root); if (ret < 0) return ret; @@ -1897,8 +1893,11 @@ int btrfs_qgroup_cleanup_dropped_subvolume(struct btrfs_fs_info *fs_info, u64 su /* * It's squota and the subvolume still has numbers needed for future * accounting, in this case we can not delete it. Just skip it. + * + * Or the qgroup is already removed by a qgroup rescan. For both cases we're + * safe to ignore them. */ - if (ret == -EBUSY) + if (ret == -EBUSY || ret == -ENOENT) ret = 0; return ret; } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 15312013f2a34..aca83a98b75a2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -274,8 +274,10 @@ loop: cur_trans = fs_info->running_transaction; if (cur_trans) { if (TRANS_ABORTED(cur_trans)) { + const int abort_error = cur_trans->aborted; + spin_unlock(&fs_info->trans_lock); - return cur_trans->aborted; + return abort_error; } if (btrfs_blocked_trans_types[cur_trans->state] & type) { spin_unlock(&fs_info->trans_lock); diff --git a/fs/dcache.c b/fs/dcache.c index 9cc0d47da321c..96b21a47312e6 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2966,11 +2966,11 @@ static int __d_unalias(struct dentry *dentry, struct dentry *alias) goto out_err; m2 = &alias->d_parent->d_inode->i_rwsem; out_unalias: - if (alias->d_op->d_unalias_trylock && + if (alias->d_op && alias->d_op->d_unalias_trylock && !alias->d_op->d_unalias_trylock(alias)) goto out_err; __d_move(alias, dentry, false); - if (alias->d_op->d_unalias_unlock) + if (alias->d_op && alias->d_op->d_unalias_unlock) alias->d_op->d_unalias_unlock(alias); ret = 0; out_err: diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 40ad22fb808b9..0ef19f1469ec9 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3563,12 +3563,12 @@ xfs_bmap_btalloc_at_eof( int error; /* - * If there are already extents in the file, try an exact EOF block - * allocation to extend the file as a contiguous extent. If that fails, - * or it's the first allocation in a file, just try for a stripe aligned - * allocation. + * If there are already extents in the file, and xfs_bmap_adjacent() has + * given a better blkno, try an exact EOF block allocation to extend the + * file as a contiguous extent. If that fails, or it's the first + * allocation in a file, just try for a stripe aligned allocation. */ - if (ap->offset) { + if (ap->eof) { xfs_extlen_t nextminlen = 0; /* @@ -3736,7 +3736,8 @@ xfs_bmap_btalloc_best_length( int error; ap->blkno = XFS_INO_TO_FSB(args->mp, ap->ip->i_ino); - xfs_bmap_adjacent(ap); + if (!xfs_bmap_adjacent(ap)) + ap->eof = false; /* * Search for an allocation group with a single extent large enough for diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index d1d4a0a22e137..15bb790359f81 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -41,8 +41,7 @@ struct kmem_cache *xfs_buf_cache; * * xfs_buf_rele: * b_lock - * pag_buf_lock - * lru_lock + * lru_lock * * xfs_buftarg_drain_rele * lru_lock @@ -220,23 +219,25 @@ _xfs_buf_alloc( */ flags &= ~(XBF_UNMAPPED | XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD); - spin_lock_init(&bp->b_lock); + /* + * A new buffer is held and locked by the owner. This ensures that the + * buffer is owned by the caller and racing RCU lookups right after + * inserting into the hash table are safe (and will have to wait for + * the unlock to do anything non-trivial). + */ bp->b_hold = 1; + sema_init(&bp->b_sema, 0); /* held, no waiters */ + + spin_lock_init(&bp->b_lock); atomic_set(&bp->b_lru_ref, 1); init_completion(&bp->b_iowait); INIT_LIST_HEAD(&bp->b_lru); INIT_LIST_HEAD(&bp->b_list); INIT_LIST_HEAD(&bp->b_li_list); - sema_init(&bp->b_sema, 0); /* held, no waiters */ bp->b_target = target; bp->b_mount = target->bt_mount; bp->b_flags = flags; - /* - * Set length and io_length to the same value initially. - * I/O routines should use io_length, which will be the same in - * most cases but may be reset (e.g. XFS recovery). - */ error = xfs_buf_get_maps(bp, nmaps); if (error) { kmem_cache_free(xfs_buf_cache, bp); @@ -502,7 +503,6 @@ int xfs_buf_cache_init( struct xfs_buf_cache *bch) { - spin_lock_init(&bch->bc_lock); return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params); } @@ -652,17 +652,20 @@ xfs_buf_find_insert( if (error) goto out_free_buf; - spin_lock(&bch->bc_lock); + /* The new buffer keeps the perag reference until it is freed. */ + new_bp->b_pag = pag; + + rcu_read_lock(); bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash, &new_bp->b_rhash_head, xfs_buf_hash_params); if (IS_ERR(bp)) { + rcu_read_unlock(); error = PTR_ERR(bp); - spin_unlock(&bch->bc_lock); goto out_free_buf; } if (bp && xfs_buf_try_hold(bp)) { /* found an existing buffer */ - spin_unlock(&bch->bc_lock); + rcu_read_unlock(); error = xfs_buf_find_lock(bp, flags); if (error) xfs_buf_rele(bp); @@ -670,10 +673,8 @@ xfs_buf_find_insert( *bpp = bp; goto out_free_buf; } + rcu_read_unlock(); - /* The new buffer keeps the perag reference until it is freed. */ - new_bp->b_pag = pag; - spin_unlock(&bch->bc_lock); *bpp = new_bp; return 0; @@ -1090,7 +1091,6 @@ xfs_buf_rele_cached( } /* we are asked to drop the last reference */ - spin_lock(&bch->bc_lock); __xfs_buf_ioacct_dec(bp); if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) { /* @@ -1102,7 +1102,6 @@ xfs_buf_rele_cached( bp->b_state &= ~XFS_BSTATE_DISPOSE; else bp->b_hold--; - spin_unlock(&bch->bc_lock); } else { bp->b_hold--; /* @@ -1120,7 +1119,6 @@ xfs_buf_rele_cached( ASSERT(!(bp->b_flags & _XBF_DELWRI_Q)); rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head, xfs_buf_hash_params); - spin_unlock(&bch->bc_lock); if (pag) xfs_perag_put(pag); freebuf = true; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 7e73663c5d4a5..3b4ed42e11c01 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -80,7 +80,6 @@ typedef unsigned int xfs_buf_flags_t; #define XFS_BSTATE_IN_FLIGHT (1 << 1) /* I/O in flight */ struct xfs_buf_cache { - spinlock_t bc_lock; struct rhashtable bc_hash; }; diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c index f340a2015c4c7..0b41bdfecdfbc 100644 --- a/fs/xfs/xfs_exchrange.c +++ b/fs/xfs/xfs_exchrange.c @@ -329,22 +329,6 @@ out_trans_cancel: * successfully but before locks are dropped. */ -/* Verify that we have security clearance to perform this operation. */ -static int -xfs_exchange_range_verify_area( - struct xfs_exchrange *fxr) -{ - int ret; - - ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, - true); - if (ret) - return ret; - - return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, - true); -} - /* * Performs necessary checks before doing a range exchange, having stabilized * mutable inode attributes via i_rwsem. @@ -355,11 +339,13 @@ xfs_exchange_range_checks( unsigned int alloc_unit) { struct inode *inode1 = file_inode(fxr->file1); + loff_t size1 = i_size_read(inode1); struct inode *inode2 = file_inode(fxr->file2); + loff_t size2 = i_size_read(inode2); uint64_t allocmask = alloc_unit - 1; int64_t test_len; uint64_t blen; - loff_t size1, size2, tmp; + loff_t tmp; int error; /* Don't touch certain kinds of inodes */ @@ -368,24 +354,25 @@ xfs_exchange_range_checks( if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) return -ETXTBSY; - size1 = i_size_read(inode1); - size2 = i_size_read(inode2); - /* Ranges cannot start after EOF. */ if (fxr->file1_offset > size1 || fxr->file2_offset > size2) return -EINVAL; - /* - * If the caller said to exchange to EOF, we set the length of the - * request large enough to cover everything to the end of both files. - */ if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { + /* + * If the caller said to exchange to EOF, we set the length of + * the request large enough to cover everything to the end of + * both files. + */ fxr->length = max_t(int64_t, size1 - fxr->file1_offset, size2 - fxr->file2_offset); - - error = xfs_exchange_range_verify_area(fxr); - if (error) - return error; + } else { + /* + * Otherwise we require both ranges to end within EOF. + */ + if (fxr->file1_offset + fxr->length > size1 || + fxr->file2_offset + fxr->length > size2) + return -EINVAL; } /* @@ -402,15 +389,6 @@ xfs_exchange_range_checks( return -EINVAL; /* - * We require both ranges to end within EOF, unless we're exchanging - * to EOF. - */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && - (fxr->file1_offset + fxr->length > size1 || - fxr->file2_offset + fxr->length > size2)) - return -EINVAL; - - /* * Make sure we don't hit any file size limits. If we hit any size * limits such that test_length was adjusted, we abort the whole * operation. @@ -747,6 +725,7 @@ xfs_exchange_range( { struct inode *inode1 = file_inode(fxr->file1); struct inode *inode2 = file_inode(fxr->file2); + loff_t check_len = fxr->length; int ret; BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & @@ -779,14 +758,18 @@ xfs_exchange_range( return -EBADF; /* - * If we're not exchanging to EOF, we can check the areas before - * stabilizing both files' i_size. + * If we're exchanging to EOF we can't calculate the length until taking + * the iolock. Pass a 0 length to remap_verify_area similar to the + * FICLONE and FICLONERANGE ioctls that support cloning to EOF as well. */ - if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { - ret = xfs_exchange_range_verify_area(fxr); - if (ret) - return ret; - } + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) + check_len = 0; + ret = remap_verify_area(fxr->file1, fxr->file1_offset, check_len, true); + if (ret) + return ret; + ret = remap_verify_area(fxr->file2, fxr->file2_offset, check_len, true); + if (ret) + return ret; /* Update cmtime if the fd/inode don't forbid it. */ if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index c95fe1b1de4e6..b1f9f156ec888 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1404,8 +1404,11 @@ xfs_inactive( goto out; /* Try to clean out the cow blocks if there are any. */ - if (xfs_inode_has_cow_data(ip)) - xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (xfs_inode_has_cow_data(ip)) { + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); + if (error) + goto out; + } if (VFS_I(ip)->i_nlink != 0) { /* diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 50fa3ef89f6c9..d61460309a783 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -976,10 +976,8 @@ xfs_dax_write_iomap_end( if (!xfs_is_cow_inode(ip)) return 0; - if (!written) { - xfs_reflink_cancel_cow_range(ip, pos, length, true); - return 0; - } + if (!written) + return xfs_reflink_cancel_cow_range(ip, pos, length, true); return xfs_reflink_end_cow(ip, pos, written); } diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h index c3b4b7ed7c163..84a5045f80f36 100644 --- a/include/linux/hrtimer_defs.h +++ b/include/linux/hrtimer_defs.h @@ -125,6 +125,7 @@ struct hrtimer_cpu_base { ktime_t softirq_expires_next; struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; + call_single_data_t csd; } ____cacheline_aligned; diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index ed945f42e064a..0ea8c9887429f 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -537,7 +537,7 @@ static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) * * Return: jiffies value */ -#define secs_to_jiffies(_secs) ((_secs) * HZ) +#define secs_to_jiffies(_secs) (unsigned long)((_secs) * HZ) extern unsigned long __usecs_to_jiffies(const unsigned int u); #if !(USEC_PER_SEC % HZ) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 27f42f713c891..87edb7a8173b1 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1415,7 +1415,6 @@ int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, bool *vlan_offload_disabled); void mlx4_handle_eth_header_mcast_prio(struct mlx4_net_trans_rule_hw_ctrl *ctrl, struct _rule_hw *eth_header); -int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, u16 vlan); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index af86097641b0b..46bd7550adf81 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -54,7 +54,6 @@ #include <linux/mlx5/doorbell.h> #include <linux/mlx5/eq.h> #include <linux/timecounter.h> -#include <linux/ptp_clock_kernel.h> #include <net/devlink.h> #define MLX5_ADEV_NAME "mlx5_core" @@ -679,33 +678,8 @@ struct mlx5_rsvd_gids { struct ida ida; }; -#define MAX_PIN_NUM 8 -struct mlx5_pps { - u8 pin_caps[MAX_PIN_NUM]; - struct work_struct out_work; - u64 start[MAX_PIN_NUM]; - u8 enabled; - u64 min_npps_period; - u64 min_out_pulse_duration_ns; -}; - -struct mlx5_timer { - struct cyclecounter cycles; - struct timecounter tc; - u32 nominal_c_mult; - unsigned long overflow_period; -}; - -struct mlx5_clock { - struct mlx5_nb pps_nb; - seqlock_t lock; - struct hwtstamp_config hwtstamp_config; - struct ptp_clock *ptp; - struct ptp_clock_info ptp_info; - struct mlx5_pps pps_info; - struct mlx5_timer timer; -}; - +struct mlx5_clock; +struct mlx5_clock_dev_state; struct mlx5_dm; struct mlx5_fw_tracer; struct mlx5_vxlan; @@ -789,7 +763,8 @@ struct mlx5_core_dev { #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif - struct mlx5_clock clock; + struct mlx5_clock *clock; + struct mlx5_clock_dev_state *clock_state; struct mlx5_ib_clock_info *clock_info; struct mlx5_fw_tracer *tracer; struct mlx5_rsc_dump *rsc_dump; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index e68d42b8ce652..fd625e0dd8691 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -115,9 +115,12 @@ enum mlx5e_ext_link_mode { MLX5E_100GAUI_1_100GBASE_CR_KR = 11, MLX5E_200GAUI_4_200GBASE_CR4_KR4 = 12, MLX5E_200GAUI_2_200GBASE_CR2_KR2 = 13, + MLX5E_200GAUI_1_200GBASE_CR1_KR1 = 14, MLX5E_400GAUI_8_400GBASE_CR8 = 15, MLX5E_400GAUI_4_400GBASE_CR4_KR4 = 16, + MLX5E_400GAUI_2_400GBASE_CR2_KR2 = 17, MLX5E_800GAUI_8_800GBASE_CR8_KR8 = 19, + MLX5E_800GAUI_4_800GBASE_CR4_KR4 = 20, MLX5E_EXT_LINK_MODES_NUMBER, }; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2a59034a5fa2f..5429581f22995 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -658,6 +658,7 @@ struct netdev_queue { struct Qdisc __rcu *qdisc_sleeping; #ifdef CONFIG_SYSFS struct kobject kobj; + const struct attribute_group **groups; #endif unsigned long tx_maxrate; /* @@ -2904,9 +2905,9 @@ struct pcpu_sw_netstats { struct pcpu_dstats { u64_stats_t rx_packets; u64_stats_t rx_bytes; - u64_stats_t rx_drops; u64_stats_t tx_packets; u64_stats_t tx_bytes; + u64_stats_t rx_drops; u64_stats_t tx_drops; struct u64_stats_sync syncp; } __aligned(8 * sizeof(u64)); diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 4bc2ee0b10b05..ccaaf4c7d5f6a 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -43,6 +43,7 @@ extern void rtnl_lock(void); extern void rtnl_unlock(void); extern int rtnl_trylock(void); extern int rtnl_is_locked(void); +extern int rtnl_lock_interruptible(void); extern int rtnl_lock_killable(void); extern bool refcount_dec_and_rtnl_lock(refcount_t *r); diff --git a/include/net/netdev_rx_queue.h b/include/net/netdev_rx_queue.h index 596836abf7bf4..af40842f229d9 100644 --- a/include/net/netdev_rx_queue.h +++ b/include/net/netdev_rx_queue.h @@ -16,6 +16,7 @@ struct netdev_rx_queue { struct rps_dev_flow_table __rcu *rps_flow_table; #endif struct kobject kobj; + const struct attribute_group **groups; struct net_device *dev; netdevice_tracker dev_tracker; diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index d635c5b47ebaf..d48c657191cd0 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -851,7 +851,7 @@ static inline int qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, } static inline void _bstats_update(struct gnet_stats_basic_sync *bstats, - __u64 bytes, __u32 packets) + __u64 bytes, __u64 packets) { u64_stats_update_begin(&bstats->syncp); u64_stats_add(&bstats->bytes, bytes); diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 2f119d18a061f..cad50d91077ef 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -219,6 +219,7 @@ EM(rxrpc_conn_get_conn_input, "GET inp-conn") \ EM(rxrpc_conn_get_idle, "GET idle ") \ EM(rxrpc_conn_get_poke_abort, "GET pk-abort") \ + EM(rxrpc_conn_get_poke_secured, "GET secured ") \ EM(rxrpc_conn_get_poke_timer, "GET poke ") \ EM(rxrpc_conn_get_service_conn, "GET svc-conn") \ EM(rxrpc_conn_new_client, "NEW client ") \ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index d1089b88efc7d..e0bd726f84c17 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -2057,6 +2057,24 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_10baseT1S_Half_BIT = 100, ETHTOOL_LINK_MODE_10baseT1S_P2MP_Half_BIT = 101, ETHTOOL_LINK_MODE_10baseT1BRR_Full_BIT = 102, + ETHTOOL_LINK_MODE_200000baseCR_Full_BIT = 103, + ETHTOOL_LINK_MODE_200000baseKR_Full_BIT = 104, + ETHTOOL_LINK_MODE_200000baseDR_Full_BIT = 105, + ETHTOOL_LINK_MODE_200000baseDR_2_Full_BIT = 106, + ETHTOOL_LINK_MODE_200000baseSR_Full_BIT = 107, + ETHTOOL_LINK_MODE_200000baseVR_Full_BIT = 108, + ETHTOOL_LINK_MODE_400000baseCR2_Full_BIT = 109, + ETHTOOL_LINK_MODE_400000baseKR2_Full_BIT = 110, + ETHTOOL_LINK_MODE_400000baseDR2_Full_BIT = 111, + ETHTOOL_LINK_MODE_400000baseDR2_2_Full_BIT = 112, + ETHTOOL_LINK_MODE_400000baseSR2_Full_BIT = 113, + ETHTOOL_LINK_MODE_400000baseVR2_Full_BIT = 114, + ETHTOOL_LINK_MODE_800000baseCR4_Full_BIT = 115, + ETHTOOL_LINK_MODE_800000baseKR4_Full_BIT = 116, + ETHTOOL_LINK_MODE_800000baseDR4_Full_BIT = 117, + ETHTOOL_LINK_MODE_800000baseDR4_2_Full_BIT = 118, + ETHTOOL_LINK_MODE_800000baseSR4_Full_BIT = 119, + ETHTOOL_LINK_MODE_800000baseVR4_Full_BIT = 120, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS diff --git a/kernel/kthread.c b/kernel/kthread.c index 4005b13ebd7ff..5dc5b0d7238e8 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -859,7 +859,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) struct kthread *kthread = to_kthread(p); cpumask_var_t affinity; unsigned long flags; - int ret; + int ret = 0; if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE) || kthread->started) { WARN_ON(1); @@ -892,7 +892,7 @@ int kthread_affine_preferred(struct task_struct *p, const struct cpumask *mask) out: free_cpumask_var(affinity); - return 0; + return ret; } /* diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 7304d7cf47f2d..77d9566d3aa68 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -382,7 +382,8 @@ void clocksource_verify_percpu(struct clocksource *cs) return; } testcpu = smp_processor_id(); - pr_warn("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); + pr_info("Checking clocksource %s synchronization from CPU %d to CPUs %*pbl.\n", + cs->name, testcpu, cpumask_pr_args(&cpus_chosen)); for_each_cpu(cpu, &cpus_chosen) { if (cpu == testcpu) continue; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index f6d8df94045c9..deb1aa32814e3 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -58,6 +58,8 @@ #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) +static void retrigger_next_event(void *arg); + /* * The timer bases: * @@ -111,7 +113,8 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = .clockid = CLOCK_TAI, .get_time = &ktime_get_clocktai, }, - } + }, + .csd = CSD_INIT(retrigger_next_event, NULL) }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { @@ -124,6 +127,14 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { [CLOCK_TAI] = HRTIMER_BASE_TAI, }; +static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base) +{ + if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) + return true; + else + return likely(base->online); +} + /* * Functions and macros which are different for UP/SMP systems are kept in a * single place @@ -145,11 +156,6 @@ static struct hrtimer_cpu_base migration_cpu_base = { #define migration_base migration_cpu_base.clock_base[0] -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return base == &migration_base; -} - /* * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock * means that all timers which are tied to this base via timer->base are @@ -183,27 +189,54 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, } /* - * We do not migrate the timer when it is expiring before the next - * event on the target cpu. When high resolution is enabled, we cannot - * reprogram the target cpu hardware and we would cause it to fire - * late. To keep it simple, we handle the high resolution enabled and - * disabled case similar. + * Check if the elected target is suitable considering its next + * event and the hotplug state of the current CPU. + * + * If the elected target is remote and its next event is after the timer + * to queue, then a remote reprogram is necessary. However there is no + * guarantee the IPI handling the operation would arrive in time to meet + * the high resolution deadline. In this case the local CPU becomes a + * preferred target, unless it is offline. + * + * High and low resolution modes are handled the same way for simplicity. * * Called with cpu_base->lock of target cpu held. */ -static int -hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) +static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base, + struct hrtimer_cpu_base *new_cpu_base, + struct hrtimer_cpu_base *this_cpu_base) { ktime_t expires; + /* + * The local CPU clockevent can be reprogrammed. Also get_target_base() + * guarantees it is online. + */ + if (new_cpu_base == this_cpu_base) + return true; + + /* + * The offline local CPU can't be the default target if the + * next remote target event is after this timer. Keep the + * elected new base. An IPI will we issued to reprogram + * it as a last resort. + */ + if (!hrtimer_base_is_online(this_cpu_base)) + return true; + expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); - return expires < new_base->cpu_base->expires_next; + + return expires >= new_base->cpu_base->expires_next; } -static inline -struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, - int pinned) +static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) { + if (!hrtimer_base_is_online(base)) { + int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER)); + + return &per_cpu(hrtimer_bases, cpu); + } + #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) if (static_branch_likely(&timers_migration_enabled) && !pinned) return &per_cpu(hrtimer_bases, get_nohz_timer_target()); @@ -254,8 +287,8 @@ again: raw_spin_unlock(&base->cpu_base->lock); raw_spin_lock(&new_base->cpu_base->lock); - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, + this_cpu_base)) { raw_spin_unlock(&new_base->cpu_base->lock); raw_spin_lock(&base->cpu_base->lock); new_cpu_base = this_cpu_base; @@ -264,8 +297,7 @@ again: } WRITE_ONCE(timer->base, new_base); } else { - if (new_cpu_base != this_cpu_base && - hrtimer_check_target(timer, new_base)) { + if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { new_cpu_base = this_cpu_base; goto again; } @@ -275,11 +307,6 @@ again: #else /* CONFIG_SMP */ -static inline bool is_migration_base(struct hrtimer_clock_base *base) -{ - return false; -} - static inline struct hrtimer_clock_base * lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) __acquires(&timer->base->cpu_base->lock) @@ -716,8 +743,6 @@ static inline int hrtimer_is_hres_enabled(void) return hrtimer_hres_enabled; } -static void retrigger_next_event(void *arg); - /* * Switch to high resolution mode */ @@ -1205,6 +1230,7 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, const enum hrtimer_mode mode, struct hrtimer_clock_base *base) { + struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases); struct hrtimer_clock_base *new_base; bool force_local, first; @@ -1216,10 +1242,16 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, * and enforce reprogramming after it is queued no matter whether * it is the new first expiring timer again or not. */ - force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases); + force_local = base->cpu_base == this_cpu_base; force_local &= base->cpu_base->next_timer == timer; /* + * Don't force local queuing if this enqueue happens on a unplugged + * CPU after hrtimer_cpu_dying() has been invoked. + */ + force_local &= this_cpu_base->online; + + /* * Remove an active timer from the queue. In case it is not queued * on the current CPU, make sure that remove_hrtimer() updates the * remote data correctly. @@ -1248,8 +1280,27 @@ static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, } first = enqueue_hrtimer(timer, new_base, mode); - if (!force_local) - return first; + if (!force_local) { + /* + * If the current CPU base is online, then the timer is + * never queued on a remote CPU if it would be the first + * expiring timer there. + */ + if (hrtimer_base_is_online(this_cpu_base)) + return first; + + /* + * Timer was enqueued remote because the current base is + * already offline. If the timer is the first to expire, + * kick the remote CPU to reprogram the clock event. + */ + if (first) { + struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base; + + smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd); + } + return 0; + } /* * Timer was forced to stay on the current CPU to avoid @@ -1370,6 +1421,18 @@ static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, } } +#ifdef CONFIG_SMP +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return base == &migration_base; +} +#else +static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) +{ + return false; +} +#endif + /* * This function is called on PREEMPT_RT kernels when the fast path * deletion of a timer failed because the timer callback function was diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 1a52a0bca086d..7e1ad229e1330 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1040,7 +1040,7 @@ static int br_mdb_add_group(const struct br_mdb_config *cfg, /* host join */ if (!port) { - if (mp->host_joined) { + if (mp->host_joined && !(cfg->nlflags & NLM_F_REPLACE)) { NL_SET_ERR_MSG_MOD(extack, "Group is already joined by host"); return -EEXIST; } diff --git a/net/core/dev.c b/net/core/dev.c index 384b9109932ad..d5ab9a4b318ea 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6120,16 +6120,18 @@ EXPORT_SYMBOL(netif_receive_skb_list); static void flush_backlog(struct work_struct *work) { struct sk_buff *skb, *tmp; + struct sk_buff_head list; struct softnet_data *sd; + __skb_queue_head_init(&list); local_bh_disable(); sd = this_cpu_ptr(&softnet_data); backlog_lock_irq_disable(sd); skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { - if (skb->dev->reg_state == NETREG_UNREGISTERING) { + if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); - dev_kfree_skb_irq(skb); + __skb_queue_tail(&list, skb); rps_input_queue_head_incr(sd); } } @@ -6137,14 +6139,16 @@ static void flush_backlog(struct work_struct *work) local_lock_nested_bh(&softnet_data.process_queue_bh_lock); skb_queue_walk_safe(&sd->process_queue, skb, tmp) { - if (skb->dev->reg_state == NETREG_UNREGISTERING) { + if (READ_ONCE(skb->dev->reg_state) == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); - kfree_skb(skb); + __skb_queue_tail(&list, skb); rps_input_queue_head_incr(sd); } } local_unlock_nested_bh(&softnet_data.process_queue_bh_lock); local_bh_enable(); + + __skb_queue_purge_reason(&list, SKB_DROP_REASON_DEV_READY); } static bool flush_required(int cpu) @@ -7072,6 +7076,9 @@ void __netif_napi_del_locked(struct napi_struct *napi) if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state)) return; + /* Make sure NAPI is disabled (or was never enabled). */ + WARN_ON(!test_bit(NAPI_STATE_SCHED, &napi->state)); + if (napi->config) { napi->index = -1; napi->config = NULL; @@ -11287,6 +11294,20 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev, const struct net_device_ops *ops = dev->netdev_ops; const struct net_device_core_stats __percpu *p; + /* + * IPv{4,6} and udp tunnels share common stat helpers and use + * different stat type (NETDEV_PCPU_STAT_TSTATS vs + * NETDEV_PCPU_STAT_DSTATS). Ensure the accounting is consistent. + */ + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_bytes) != + offsetof(struct pcpu_dstats, rx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, rx_packets) != + offsetof(struct pcpu_dstats, rx_packets)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_bytes) != + offsetof(struct pcpu_dstats, tx_bytes)); + BUILD_BUG_ON(offsetof(struct pcpu_sw_netstats, tx_packets) != + offsetof(struct pcpu_dstats, tx_packets)); + if (ops->ndo_get_stats64) { memset(storage, 0, sizeof(*storage)); ops->ndo_get_stats64(dev, storage); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 89656d180bc60..73260ca0fc223 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -832,12 +832,10 @@ static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, return -ENOENT; } -static void neigh_parms_destroy(struct neigh_parms *parms); - static inline void neigh_parms_put(struct neigh_parms *parms) { if (refcount_dec_and_test(&parms->refcnt)) - neigh_parms_destroy(parms); + kfree(parms); } /* @@ -1713,11 +1711,6 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) } EXPORT_SYMBOL(neigh_parms_release); -static void neigh_parms_destroy(struct neigh_parms *parms) -{ - kfree(parms); -} - static struct lock_class_key neigh_table_proxy_queue_class; static struct neigh_table __rcu *neigh_tables[NEIGH_NR_TABLES] __read_mostly; diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 07cb99b114bdd..3fe2c521e5740 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -42,6 +42,87 @@ static inline int dev_isalive(const struct net_device *dev) return READ_ONCE(dev->reg_state) <= NETREG_REGISTERED; } +/* There is a possible ABBA deadlock between rtnl_lock and kernfs_node->active, + * when unregistering a net device and accessing associated sysfs files. The + * potential deadlock is as follow: + * + * CPU 0 CPU 1 + * + * rtnl_lock vfs_read + * unregister_netdevice_many kernfs_seq_start + * device_del / kobject_put kernfs_get_active (kn->active++) + * kernfs_drain sysfs_kf_seq_show + * wait_event( rtnl_lock + * kn->active == KN_DEACTIVATED_BIAS) -> waits on CPU 0 to release + * -> waits on CPU 1 to decrease kn->active the rtnl lock. + * + * The historical fix was to use rtnl_trylock with restart_syscall to bail out + * of sysfs operations when the lock couldn't be taken. This fixed the above + * issue as it allowed CPU 1 to bail out of the ABBA situation. + * + * But it came with performances issues, as syscalls are being restarted in + * loops when there was contention on the rtnl lock, with huge slow downs in + * specific scenarios (e.g. lots of virtual interfaces created and userspace + * daemons querying their attributes). + * + * The idea below is to bail out of the active kernfs_node protection + * (kn->active) while trying to take the rtnl lock. + * + * This replaces rtnl_lock() and still has to be used with rtnl_unlock(). The + * net device is guaranteed to be alive if this returns successfully. + */ +static int sysfs_rtnl_lock(struct kobject *kobj, struct attribute *attr, + struct net_device *ndev) +{ + struct kernfs_node *kn; + int ret = 0; + + /* First, we hold a reference to the net device as the unregistration + * path might run in parallel. This will ensure the net device and the + * associated sysfs objects won't be freed while we try to take the rtnl + * lock. + */ + dev_hold(ndev); + /* sysfs_break_active_protection was introduced to allow self-removal of + * devices and their associated sysfs files by bailing out of the + * sysfs/kernfs protection. We do this here to allow the unregistration + * path to complete in parallel. The following takes a reference on the + * kobject and the kernfs_node being accessed. + * + * This works because we hold a reference onto the net device and the + * unregistration path will wait for us eventually in netdev_run_todo + * (outside an rtnl lock section). + */ + kn = sysfs_break_active_protection(kobj, attr); + /* We can now try to take the rtnl lock. This can't deadlock us as the + * unregistration path is able to drain sysfs files (kernfs_node) thanks + * to the above dance. + */ + if (rtnl_lock_interruptible()) { + ret = -ERESTARTSYS; + goto unbreak; + } + /* Check dismantle on the device hasn't started, otherwise deny the + * operation. + */ + if (!dev_isalive(ndev)) { + rtnl_unlock(); + ret = -ENODEV; + goto unbreak; + } + /* We are now sure the device dismantle hasn't started nor that it can + * start before we exit the locking section as we hold the rtnl lock. + * There's no need to keep unbreaking the sysfs protection nor to hold + * a net device reference from that point; that was only needed to take + * the rtnl lock. + */ +unbreak: + sysfs_unbreak_active_protection(kn); + dev_put(ndev); + + return ret; +} + /* use same locking rules as GIF* ioctl's */ static ssize_t netdev_show(const struct device *dev, struct device_attribute *attr, char *buf, @@ -95,14 +176,14 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, if (ret) goto err; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + goto err; + + ret = (*set)(netdev, new); + if (ret == 0) + ret = len; - if (dev_isalive(netdev)) { - ret = (*set)(netdev, new); - if (ret == 0) - ret = len; - } rtnl_unlock(); err: return ret; @@ -220,7 +301,7 @@ static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, struct net_device *netdev = to_net_dev(dev); /* The check is also done in change_carrier; this helps returning early - * without hitting the trylock/restart in netdev_store. + * without hitting the locking section in netdev_store. */ if (!netdev->netdev_ops->ndo_change_carrier) return -EOPNOTSUPP; @@ -234,8 +315,9 @@ static ssize_t carrier_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); int ret = -EINVAL; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; if (netif_running(netdev)) { /* Synchronize carrier state with link watch, @@ -245,8 +327,8 @@ static ssize_t carrier_show(struct device *dev, ret = sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev)); } - rtnl_unlock(); + rtnl_unlock(); return ret; } static DEVICE_ATTR_RW(carrier); @@ -258,13 +340,14 @@ static ssize_t speed_show(struct device *dev, int ret = -EINVAL; /* The check is also done in __ethtool_get_link_ksettings; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->ethtool_ops->get_link_ksettings) return ret; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; if (netif_running(netdev)) { struct ethtool_link_ksettings cmd; @@ -284,13 +367,14 @@ static ssize_t duplex_show(struct device *dev, int ret = -EINVAL; /* The check is also done in __ethtool_get_link_ksettings; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->ethtool_ops->get_link_ksettings) return ret; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; if (netif_running(netdev)) { struct ethtool_link_ksettings cmd; @@ -490,16 +574,15 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, if (len > 0 && buf[len - 1] == '\n') --count; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - ret = dev_set_alias(netdev, buf, count); - if (ret < 0) - goto err; - ret = len; - netdev_state_change(netdev); - } + ret = dev_set_alias(netdev, buf, count); + if (ret < 0) + goto err; + ret = len; + netdev_state_change(netdev); err: rtnl_unlock(); @@ -551,24 +634,23 @@ static ssize_t phys_port_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); + struct netdev_phys_item_id ppid; ssize_t ret = -EINVAL; /* The check is also done in dev_get_phys_port_id; this helps returning - * early without hitting the trylock/restart below. + * early without hitting the locking section below. */ if (!netdev->netdev_ops->ndo_get_phys_port_id) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - struct netdev_phys_item_id ppid; + ret = dev_get_phys_port_id(netdev, &ppid); + if (!ret) + ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - ret = dev_get_phys_port_id(netdev, &ppid); - if (!ret) - ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - } rtnl_unlock(); return ret; @@ -580,24 +662,23 @@ static ssize_t phys_port_name_show(struct device *dev, { struct net_device *netdev = to_net_dev(dev); ssize_t ret = -EINVAL; + char name[IFNAMSIZ]; /* The checks are also done in dev_get_phys_port_name; this helps - * returning early without hitting the trylock/restart below. + * returning early without hitting the locking section below. */ if (!netdev->netdev_ops->ndo_get_phys_port_name && !netdev->devlink_port) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - char name[IFNAMSIZ]; + ret = dev_get_phys_port_name(netdev, name, sizeof(name)); + if (!ret) + ret = sysfs_emit(buf, "%s\n", name); - ret = dev_get_phys_port_name(netdev, name, sizeof(name)); - if (!ret) - ret = sysfs_emit(buf, "%s\n", name); - } rtnl_unlock(); return ret; @@ -608,26 +689,25 @@ static ssize_t phys_switch_id_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); + struct netdev_phys_item_id ppid = { }; ssize_t ret = -EINVAL; /* The checks are also done in dev_get_phys_port_name; this helps - * returning early without hitting the trylock/restart below. This works + * returning early without hitting the locking section below. This works * because recurse is false when calling dev_get_port_parent_id. */ if (!netdev->netdev_ops->ndo_get_port_parent_id && !netdev->devlink_port) return -EOPNOTSUPP; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(&dev->kobj, &attr->attr, netdev); + if (ret) + return ret; - if (dev_isalive(netdev)) { - struct netdev_phys_item_id ppid = { }; + ret = dev_get_port_parent_id(netdev, &ppid, false); + if (!ret) + ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - ret = dev_get_port_parent_id(netdev, &ppid, false); - if (!ret) - ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); - } rtnl_unlock(); return ret; @@ -1108,7 +1188,6 @@ static void rx_queue_get_ownership(const struct kobject *kobj, static const struct kobj_type rx_queue_ktype = { .sysfs_ops = &rx_queue_sysfs_ops, .release = rx_queue_release, - .default_groups = rx_queue_default_groups, .namespace = rx_queue_namespace, .get_ownership = rx_queue_get_ownership, }; @@ -1131,6 +1210,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Rx queues are cleared in rx_queue_release to allow later + * re-registration. This is triggered when their kobj refcount is + * dropped. + * + * If a queue is removed while both a read (or write) operation and a + * the re-addition of the same queue are pending (waiting on rntl_lock) + * it might happen that the re-addition will execute before the read, + * making the initial removal to never happen (queue's kobj refcount + * won't drop enough because of the pending read). In such rare case, + * return to allow the removal operation to complete. + */ + if (unlikely(kobj->state_initialized)) { + netdev_warn_once(dev, "Cannot re-add rx queues before their removal completed"); + return -EAGAIN; + } + /* Kobject_put later will trigger rx_queue_release call which * decreases dev refcount: Take that reference here */ @@ -1142,20 +1237,27 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) if (error) goto err; + queue->groups = rx_queue_default_groups; + error = sysfs_create_groups(kobj, queue->groups); + if (error) + goto err; + if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); if (error) - goto err; + goto err_default_groups; } error = rx_queue_default_mask(dev, queue); if (error) - goto err; + goto err_default_groups; kobject_uevent(kobj, KOBJ_ADD); return error; +err_default_groups: + sysfs_remove_groups(kobj, queue->groups); err: kobject_put(kobj); return error; @@ -1200,12 +1302,14 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) } while (--i >= new_num) { - struct kobject *kobj = &dev->_rx[i].kobj; + struct netdev_rx_queue *queue = &dev->_rx[i]; + struct kobject *kobj = &queue->kobj; if (!refcount_read(&dev_net(dev)->ns.count)) kobj->uevent_suppress = 1; if (dev->sysfs_rx_queue_group) sysfs_remove_group(kobj, dev->sysfs_rx_queue_group); + sysfs_remove_groups(kobj, queue->groups); kobject_put(kobj); } @@ -1244,9 +1348,11 @@ static int net_rx_queue_change_owner(struct net_device *dev, int num, */ struct netdev_queue_attribute { struct attribute attr; - ssize_t (*show)(struct netdev_queue *queue, char *buf); - ssize_t (*store)(struct netdev_queue *queue, - const char *buf, size_t len); + ssize_t (*show)(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf); + ssize_t (*store)(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len); }; #define to_netdev_queue_attr(_attr) \ container_of(_attr, struct netdev_queue_attribute, attr) @@ -1263,7 +1369,7 @@ static ssize_t netdev_queue_attr_show(struct kobject *kobj, if (!attribute->show) return -EIO; - return attribute->show(queue, buf); + return attribute->show(kobj, attr, queue, buf); } static ssize_t netdev_queue_attr_store(struct kobject *kobj, @@ -1277,7 +1383,7 @@ static ssize_t netdev_queue_attr_store(struct kobject *kobj, if (!attribute->store) return -EIO; - return attribute->store(queue, buf, count); + return attribute->store(kobj, attr, queue, buf, count); } static const struct sysfs_ops netdev_queue_sysfs_ops = { @@ -1285,7 +1391,8 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = { .store = netdev_queue_attr_store, }; -static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf) +static ssize_t tx_timeout_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout); @@ -1303,18 +1410,18 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue) return i; } -static ssize_t traffic_class_show(struct netdev_queue *queue, - char *buf) +static ssize_t traffic_class_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; - int num_tc, tc; - int index; + int num_tc, tc, index, ret; if (!netif_is_multiqueue(dev)) return -ENOENT; - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, queue->dev); + if (ret) + return ret; index = get_netdev_queue_index(queue); @@ -1341,24 +1448,25 @@ static ssize_t traffic_class_show(struct netdev_queue *queue, } #ifdef CONFIG_XPS -static ssize_t tx_maxrate_show(struct netdev_queue *queue, - char *buf) +static ssize_t tx_maxrate_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { return sysfs_emit(buf, "%lu\n", queue->tx_maxrate); } -static ssize_t tx_maxrate_store(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t tx_maxrate_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { - struct net_device *dev = queue->dev; int err, index = get_netdev_queue_index(queue); + struct net_device *dev = queue->dev; u32 rate = 0; if (!capable(CAP_NET_ADMIN)) return -EPERM; /* The check is also done later; this helps returning early without - * hitting the trylock/restart below. + * hitting the locking section below. */ if (!dev->netdev_ops->ndo_set_tx_maxrate) return -EOPNOTSUPP; @@ -1367,18 +1475,21 @@ static ssize_t tx_maxrate_store(struct netdev_queue *queue, if (err < 0) return err; - if (!rtnl_trylock()) - return restart_syscall(); + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) + return err; err = -EOPNOTSUPP; if (dev->netdev_ops->ndo_set_tx_maxrate) err = dev->netdev_ops->ndo_set_tx_maxrate(dev, index, rate); - rtnl_unlock(); if (!err) { queue->tx_maxrate = rate; + rtnl_unlock(); return len; } + + rtnl_unlock(); return err; } @@ -1422,16 +1533,17 @@ static ssize_t bql_set(const char *buf, const size_t count, return count; } -static ssize_t bql_show_hold_time(struct netdev_queue *queue, - char *buf) +static ssize_t bql_show_hold_time(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); } -static ssize_t bql_set_hold_time(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_hold_time(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct dql *dql = &queue->dql; unsigned int value; @@ -1450,15 +1562,17 @@ static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init = __ATTR(hold_time, 0644, bql_show_hold_time, bql_set_hold_time); -static ssize_t bql_show_stall_thrs(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_thrs(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->stall_thrs)); } -static ssize_t bql_set_stall_thrs(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_stall_thrs(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct dql *dql = &queue->dql; unsigned int value; @@ -1484,13 +1598,15 @@ static ssize_t bql_set_stall_thrs(struct netdev_queue *queue, static struct netdev_queue_attribute bql_stall_thrs_attribute __ro_after_init = __ATTR(stall_thrs, 0644, bql_show_stall_thrs, bql_set_stall_thrs); -static ssize_t bql_show_stall_max(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_max(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { return sysfs_emit(buf, "%u\n", READ_ONCE(queue->dql.stall_max)); } -static ssize_t bql_set_stall_max(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t bql_set_stall_max(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { WRITE_ONCE(queue->dql.stall_max, 0); return len; @@ -1499,7 +1615,8 @@ static ssize_t bql_set_stall_max(struct netdev_queue *queue, static struct netdev_queue_attribute bql_stall_max_attribute __ro_after_init = __ATTR(stall_max, 0644, bql_show_stall_max, bql_set_stall_max); -static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf) +static ssize_t bql_show_stall_cnt(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; @@ -1509,8 +1626,8 @@ static ssize_t bql_show_stall_cnt(struct netdev_queue *queue, char *buf) static struct netdev_queue_attribute bql_stall_cnt_attribute __ro_after_init = __ATTR(stall_cnt, 0444, bql_show_stall_cnt, NULL); -static ssize_t bql_show_inflight(struct netdev_queue *queue, - char *buf) +static ssize_t bql_show_inflight(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct dql *dql = &queue->dql; @@ -1521,13 +1638,16 @@ static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init = __ATTR(inflight, 0444, bql_show_inflight, NULL); #define BQL_ATTR(NAME, FIELD) \ -static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \ - char *buf) \ +static ssize_t bql_show_ ## NAME(struct kobject *kobj, \ + struct attribute *attr, \ + struct netdev_queue *queue, char *buf) \ { \ return bql_show(buf, queue->dql.FIELD); \ } \ \ -static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \ +static ssize_t bql_set_ ## NAME(struct kobject *kobj, \ + struct attribute *attr, \ + struct netdev_queue *queue, \ const char *buf, size_t len) \ { \ return bql_set(buf, len, &queue->dql.FIELD); \ @@ -1613,19 +1733,21 @@ out_no_maps: return len < PAGE_SIZE ? len : -EINVAL; } -static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) +static ssize_t xps_cpus_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; unsigned int index; - int len, tc; + int len, tc, ret; if (!netif_is_multiqueue(dev)) return -ENOENT; index = get_netdev_queue_index(queue); - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, queue->dev); + if (ret) + return ret; /* If queue belongs to subordinate dev use its map */ dev = netdev_get_tx_queue(dev, index)->sb_dev ? : dev; @@ -1636,18 +1758,21 @@ static ssize_t xps_cpus_show(struct netdev_queue *queue, char *buf) return -EINVAL; } - /* Make sure the subordinate device can't be freed */ - get_device(&dev->dev); + /* Increase the net device refcnt to make sure it won't be freed while + * xps_queue_show is running. + */ + dev_hold(dev); rtnl_unlock(); len = xps_queue_show(dev, index, tc, buf, XPS_CPUS); - put_device(&dev->dev); + dev_put(dev); return len; } -static ssize_t xps_cpus_store(struct netdev_queue *queue, - const char *buf, size_t len) +static ssize_t xps_cpus_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, + size_t len) { struct net_device *dev = queue->dev; unsigned int index; @@ -1671,9 +1796,10 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, return err; } - if (!rtnl_trylock()) { + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) { free_cpumask_var(mask); - return restart_syscall(); + return err; } err = netif_set_xps_queue(dev, mask, index); @@ -1687,26 +1813,34 @@ static ssize_t xps_cpus_store(struct netdev_queue *queue, static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init = __ATTR_RW(xps_cpus); -static ssize_t xps_rxqs_show(struct netdev_queue *queue, char *buf) +static ssize_t xps_rxqs_show(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; unsigned int index; - int tc; + int tc, ret; index = get_netdev_queue_index(queue); - if (!rtnl_trylock()) - return restart_syscall(); + ret = sysfs_rtnl_lock(kobj, attr, dev); + if (ret) + return ret; tc = netdev_txq_to_tc(dev, index); + + /* Increase the net device refcnt to make sure it won't be freed while + * xps_queue_show is running. + */ + dev_hold(dev); rtnl_unlock(); - if (tc < 0) - return -EINVAL; - return xps_queue_show(dev, index, tc, buf, XPS_RXQS); + ret = tc >= 0 ? xps_queue_show(dev, index, tc, buf, XPS_RXQS) : -EINVAL; + dev_put(dev); + return ret; } -static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, +static ssize_t xps_rxqs_store(struct kobject *kobj, struct attribute *attr, + struct netdev_queue *queue, const char *buf, size_t len) { struct net_device *dev = queue->dev; @@ -1730,9 +1864,10 @@ static ssize_t xps_rxqs_store(struct netdev_queue *queue, const char *buf, return err; } - if (!rtnl_trylock()) { + err = sysfs_rtnl_lock(kobj, attr, dev); + if (err) { bitmap_free(mask); - return restart_syscall(); + return err; } cpus_read_lock(); @@ -1792,7 +1927,6 @@ static void netdev_queue_get_ownership(const struct kobject *kobj, static const struct kobj_type netdev_queue_ktype = { .sysfs_ops = &netdev_queue_sysfs_ops, .release = netdev_queue_release, - .default_groups = netdev_queue_default_groups, .namespace = netdev_queue_namespace, .get_ownership = netdev_queue_get_ownership, }; @@ -1811,6 +1945,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) struct kobject *kobj = &queue->kobj; int error = 0; + /* Tx queues are cleared in netdev_queue_release to allow later + * re-registration. This is triggered when their kobj refcount is + * dropped. + * + * If a queue is removed while both a read (or write) operation and a + * the re-addition of the same queue are pending (waiting on rntl_lock) + * it might happen that the re-addition will execute before the read, + * making the initial removal to never happen (queue's kobj refcount + * won't drop enough because of the pending read). In such rare case, + * return to allow the removal operation to complete. + */ + if (unlikely(kobj->state_initialized)) { + netdev_warn_once(dev, "Cannot re-add tx queues before their removal completed"); + return -EAGAIN; + } + /* Kobject_put later will trigger netdev_queue_release call * which decreases dev refcount: Take that reference here */ @@ -1822,15 +1972,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) if (error) goto err; + queue->groups = netdev_queue_default_groups; + error = sysfs_create_groups(kobj, queue->groups); + if (error) + goto err; + if (netdev_uses_bql(dev)) { error = sysfs_create_group(kobj, &dql_group); if (error) - goto err; + goto err_default_groups; } kobject_uevent(kobj, KOBJ_ADD); return 0; +err_default_groups: + sysfs_remove_groups(kobj, queue->groups); err: kobject_put(kobj); return error; @@ -1885,6 +2042,7 @@ netdev_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) if (netdev_uses_bql(dev)) sysfs_remove_group(&queue->kobj, &dql_group); + sysfs_remove_groups(&queue->kobj, queue->groups); kobject_put(&queue->kobj); } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1f4d4b5570ab8..cb7fad8d1f95f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -80,6 +80,11 @@ void rtnl_lock(void) } EXPORT_SYMBOL(rtnl_lock); +int rtnl_lock_interruptible(void) +{ + return mutex_lock_interruptible(&rtnl_mutex); +} + int rtnl_lock_killable(void) { return mutex_lock_killable(&rtnl_mutex); diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 2bd77c94f9f1a..5489d0c9d13f9 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -213,6 +213,24 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(10, T1S, Half), __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_NAME(10, T1BRR, Full), + __DEFINE_LINK_MODE_NAME(200000, CR, Full), + __DEFINE_LINK_MODE_NAME(200000, KR, Full), + __DEFINE_LINK_MODE_NAME(200000, DR, Full), + __DEFINE_LINK_MODE_NAME(200000, DR_2, Full), + __DEFINE_LINK_MODE_NAME(200000, SR, Full), + __DEFINE_LINK_MODE_NAME(200000, VR, Full), + __DEFINE_LINK_MODE_NAME(400000, CR2, Full), + __DEFINE_LINK_MODE_NAME(400000, KR2, Full), + __DEFINE_LINK_MODE_NAME(400000, DR2, Full), + __DEFINE_LINK_MODE_NAME(400000, DR2_2, Full), + __DEFINE_LINK_MODE_NAME(400000, SR2, Full), + __DEFINE_LINK_MODE_NAME(400000, VR2, Full), + __DEFINE_LINK_MODE_NAME(800000, CR4, Full), + __DEFINE_LINK_MODE_NAME(800000, KR4, Full), + __DEFINE_LINK_MODE_NAME(800000, DR4, Full), + __DEFINE_LINK_MODE_NAME(800000, DR4_2, Full), + __DEFINE_LINK_MODE_NAME(800000, SR4, Full), + __DEFINE_LINK_MODE_NAME(800000, VR4, Full), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); @@ -221,8 +239,11 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_CR4 4 #define __LINK_MODE_LANES_CR8 8 #define __LINK_MODE_LANES_DR 1 +#define __LINK_MODE_LANES_DR_2 1 #define __LINK_MODE_LANES_DR2 2 +#define __LINK_MODE_LANES_DR2_2 2 #define __LINK_MODE_LANES_DR4 4 +#define __LINK_MODE_LANES_DR4_2 4 #define __LINK_MODE_LANES_DR8 8 #define __LINK_MODE_LANES_KR 1 #define __LINK_MODE_LANES_KR2 2 @@ -251,6 +272,9 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); #define __LINK_MODE_LANES_T1L 1 #define __LINK_MODE_LANES_T1S 1 #define __LINK_MODE_LANES_T1S_P2MP 1 +#define __LINK_MODE_LANES_VR 1 +#define __LINK_MODE_LANES_VR2 2 +#define __LINK_MODE_LANES_VR4 4 #define __LINK_MODE_LANES_VR8 8 #define __LINK_MODE_LANES_DR8_2 8 #define __LINK_MODE_LANES_T1BRR 1 @@ -378,6 +402,24 @@ const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(10, T1S, Half), __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half), __DEFINE_LINK_MODE_PARAMS(10, T1BRR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, CR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, KR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, DR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, DR_2, Full), + __DEFINE_LINK_MODE_PARAMS(200000, SR, Full), + __DEFINE_LINK_MODE_PARAMS(200000, VR, Full), + __DEFINE_LINK_MODE_PARAMS(400000, CR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, KR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, DR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, DR2_2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, SR2, Full), + __DEFINE_LINK_MODE_PARAMS(400000, VR2, Full), + __DEFINE_LINK_MODE_PARAMS(800000, CR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, KR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, DR4_2, Full), + __DEFINE_LINK_MODE_PARAMS(800000, SR4, Full), + __DEFINE_LINK_MODE_PARAMS(800000, VR4, Full), }; static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 34bee42e12470..7609ce2b2c5e2 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -993,7 +993,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, return rc; /* Nonzero ring with RSS only makes sense if NIC adds them together */ - if (cmd == ETHTOOL_SRXCLSRLINS && info.flow_type & FLOW_RSS && + if (cmd == ETHTOOL_SRXCLSRLINS && info.fs.flow_type & FLOW_RSS && !ops->cap_rss_rxnfc_adds && ethtool_get_flow_spec_ring(info.fs.ring_cookie)) return -EINVAL; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index 7cb106b590aba..58df9ad02ce8a 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -107,6 +107,8 @@ rss_prepare_ctx(const struct rss_req_info *request, struct net_device *dev, u32 total_size, indir_bytes; u8 *rss_config; + data->no_key_fields = !dev->ethtool_ops->rxfh_per_ctx_key; + ctx = xa_load(&dev->ethtool->rss_ctx, request->rss_context); if (!ctx) return -ENOENT; @@ -153,7 +155,6 @@ rss_prepare_data(const struct ethnl_req_info *req_base, if (!ops->cap_rss_ctx_supported && !ops->create_rxfh_context) return -EOPNOTSUPP; - data->no_key_fields = !ops->rxfh_per_ctx_key; return rss_prepare_ctx(request, dev, data, info); } diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index ed1b6b44faf80..c9f11a046c263 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -141,7 +141,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info, const struct iphdr *iph; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; - unsigned int data_len = 0; struct ip_tunnel *t; if (tpi->proto == htons(ETH_P_TEB)) @@ -182,7 +181,6 @@ static int ipgre_err(struct sk_buff *skb, u32 info, case ICMP_TIME_EXCEEDED: if (code != ICMP_EXC_TTL) return 0; - data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ break; case ICMP_REDIRECT: @@ -190,10 +188,16 @@ static int ipgre_err(struct sk_buff *skb, u32 info, } #if IS_ENABLED(CONFIG_IPV6) - if (tpi->proto == htons(ETH_P_IPV6) && - !ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, - type, data_len)) - return 0; + if (tpi->proto == htons(ETH_P_IPV6)) { + unsigned int data_len = 0; + + if (type == ICMP_TIME_EXCEEDED) + data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */ + + if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len, + type, data_len)) + return 0; + } #endif if (t->parms.iph.daddr == 0 || diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c472c9a57cf68..a9bb9ce5438ea 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1141,9 +1141,9 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c index 28e5a89dc2557..2c383c12a4315 100644 --- a/net/ipv6/ioam6_iptunnel.c +++ b/net/ipv6/ioam6_iptunnel.c @@ -336,7 +336,7 @@ static int ioam6_do_encap(struct net *net, struct sk_buff *skb, static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb) { - struct dst_entry *dst = skb_dst(skb), *cache_dst; + struct dst_entry *dst = skb_dst(skb), *cache_dst = NULL; struct in6_addr orig_daddr; struct ioam6_lwt *ilwt; int err = -EINVAL; @@ -407,13 +407,15 @@ do_encap: cache_dst = ip6_route_output(net, NULL, &fl6); if (cache_dst->error) { err = cache_dst->error; - dst_release(cache_dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (dst->lwtstate != cache_dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&ilwt->cache, cache_dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(cache_dst->dev)); if (unlikely(err)) @@ -426,8 +428,10 @@ do_encap: return dst_output(net, sk, skb); } out: + dst_release(cache_dst); return dst->lwtstate->orig_output(net, sk, skb); drop: + dst_release(cache_dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c index 7ba22d2f2bfef..0ac4283acdf20 100644 --- a/net/ipv6/rpl_iptunnel.c +++ b/net/ipv6/rpl_iptunnel.c @@ -232,13 +232,15 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&rlwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -251,6 +253,7 @@ static int rpl_output(struct net *net, struct sock *sk, struct sk_buff *skb) return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } @@ -269,8 +272,10 @@ static int rpl_input(struct sk_buff *skb) local_bh_enable(); err = rpl_do_srh(skb, rlwt, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 4bf937bfc2633..33833b2064c07 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -482,8 +482,10 @@ static int seg6_input_core(struct net *net, struct sock *sk, local_bh_enable(); err = seg6_do_srh(skb, dst); - if (unlikely(err)) + if (unlikely(err)) { + dst_release(dst); goto drop; + } if (!dst) { ip6_route_input(skb); @@ -571,13 +573,15 @@ static int seg6_output_core(struct net *net, struct sock *sk, dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { err = dst->error; - dst_release(dst); goto drop; } - local_bh_disable(); - dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); - local_bh_enable(); + /* cache only if we don't create a dst reference loop */ + if (orig_dst->lwtstate != dst->lwtstate) { + local_bh_disable(); + dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr); + local_bh_enable(); + } err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev)); if (unlikely(err)) @@ -593,6 +597,7 @@ static int seg6_output_core(struct net *net, struct sock *sk, return dst_output(net, sk, skb); drop: + dst_release(dst); kfree_skb(skb); return err; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 6671daa67f4fa..c6ea438b5c758 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1389,9 +1389,9 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, const int hlen = skb_network_header_len(skb) + sizeof(struct udphdr); - if (hlen + cork->gso_size > cork->fragsize) { + if (hlen + min(datalen, cork->gso_size) > cork->fragsize) { kfree_skb(skb); - return -EINVAL; + return -EMSGSIZE; } if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) { kfree_skb(skb); diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 72c65d938a150..a4a668b88a8f2 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -701,11 +701,9 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct net_device *dev; ax25_address *source; ax25_uid_assoc *user; + int err = -EINVAL; int n; - if (!sock_flag(sk, SOCK_ZAPPED)) - return -EINVAL; - if (addr_len != sizeof(struct sockaddr_rose) && addr_len != sizeof(struct full_sockaddr_rose)) return -EINVAL; @@ -718,8 +716,15 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if ((unsigned int) addr->srose_ndigis > ROSE_MAX_DIGIS) return -EINVAL; - if ((dev = rose_dev_get(&addr->srose_addr)) == NULL) - return -EADDRNOTAVAIL; + lock_sock(sk); + + if (!sock_flag(sk, SOCK_ZAPPED)) + goto out_release; + + err = -EADDRNOTAVAIL; + dev = rose_dev_get(&addr->srose_addr); + if (!dev) + goto out_release; source = &addr->srose_call; @@ -730,7 +735,8 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) } else { if (ax25_uid_policy && !capable(CAP_NET_BIND_SERVICE)) { dev_put(dev); - return -EACCES; + err = -EACCES; + goto out_release; } rose->source_call = *source; } @@ -753,8 +759,10 @@ static int rose_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) rose_insert_socket(sk); sock_reset_flag(sk, SOCK_ZAPPED); - - return 0; + err = 0; +out_release: + release_sock(sk); + return err; } static int rose_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 718193df9d2e2..f251845fe532c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -582,6 +582,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */ + RXRPC_CALL_CONN_CHALLENGING, /* The connection is being challenged */ }; /* @@ -602,7 +603,6 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ - RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 5a543c3f6fb08..c4c8b46a68c67 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -22,7 +22,6 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", - [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", [RXRPC_CALL_SERVER_ACK_REQUEST] = "SvAckReq", [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", @@ -453,17 +452,16 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx, call->cong_tstamp = skb->tstamp; __set_bit(RXRPC_CALL_EXPOSED, &call->flags); - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); spin_lock(&conn->state_lock); switch (conn->state) { case RXRPC_CONN_SERVICE_UNSECURED: case RXRPC_CONN_SERVICE_CHALLENGING: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING); + __set_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags); break; case RXRPC_CONN_SERVICE: - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); break; case RXRPC_CONN_ABORTED: diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 713e04394ceb7..4d9c5e21ba785 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -228,10 +228,8 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn) */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { - if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) { - rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST); + if (call && __test_and_clear_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) rxrpc_notify_socket(call); - } } /* @@ -272,6 +270,7 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, * we've already received the packet, put it on the * front of the queue. */ + sp->conn = rxrpc_get_connection(conn, rxrpc_conn_get_poke_secured); skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED; rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured); skb_queue_head(&conn->local->rx_queue, skb); @@ -437,14 +436,16 @@ void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb) if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events)) rxrpc_abort_calls(conn); - switch (skb->mark) { - case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: - if (conn->state != RXRPC_CONN_SERVICE) - break; + if (skb) { + switch (skb->mark) { + case RXRPC_SKB_MARK_SERVICE_CONN_SECURED: + if (conn->state != RXRPC_CONN_SERVICE) + break; - for (loop = 0; loop < RXRPC_MAXCALLS; loop++) - rxrpc_call_is_secure(conn->channels[loop].call); - break; + for (loop = 0; loop < RXRPC_MAXCALLS; loop++) + rxrpc_call_is_secure(conn->channels[loop].call); + break; + } } /* Process delayed ACKs whose time has come. */ diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 7eba4d7d9a380..2f1fd1e2e7e48 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -67,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet, INIT_WORK(&conn->destructor, rxrpc_clean_up_connection); INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); + INIT_LIST_HEAD(&conn->attend_link); mutex_init(&conn->security_lock); mutex_init(&conn->tx_data_alloc_lock); skb_queue_head_init(&conn->rx_queue); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 4974b5accafa3..9047ba13bd31e 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -448,11 +448,19 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); bool last = sp->hdr.flags & RXRPC_LAST_PACKET; - skb_queue_tail(&call->recvmsg_queue, skb); + spin_lock_irq(&call->recvmsg_queue.lock); + + __skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); if (last) + /* Change the state inside the lock so that recvmsg syncs + * correctly with it and using sendmsg() to send a reply + * doesn't race. + */ rxrpc_end_rx_phase(call, sp->hdr.serial); + + spin_unlock_irq(&call->recvmsg_queue.lock); } /* @@ -657,7 +665,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb rxrpc_propose_delay_ACK(call, sp->hdr.serial, rxrpc_propose_ack_input_data); } - if (notify) { + if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) { trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); rxrpc_notify_socket(call); } diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0e8da909d4f2f..584397aba4a07 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -707,7 +707,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) } else { switch (rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_AWAIT_CONN: - case RXRPC_CALL_SERVER_SECURING: + case RXRPC_CALL_SERVER_RECV_REQUEST: if (p.command == RXRPC_CMD_SEND_ABORT) break; fallthrough; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index b50b2c2cc09bc..e6bfd39ff3396 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -40,6 +40,9 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; + if (unlikely(READ_ONCE(sch->limit) == 0)) + return qdisc_drop(skb, sch, to_free); + if (likely(sch->q.qlen < READ_ONCE(sch->limit))) return qdisc_enqueue_tail(skb, sch); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 71ec9986ed37f..fdd79d3ccd8ce 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -749,9 +749,9 @@ deliver: if (err != NET_XMIT_SUCCESS) { if (net_xmit_drop_count(err)) qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, pkt_len); sch->qstats.backlog -= pkt_len; sch->q.qlen--; + qdisc_tree_reduce_backlog(sch, 1, pkt_len); } goto tfifo_dequeue; } diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 0712b5e82eb79..d027a07c1e2c1 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -17,9 +17,11 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h) CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \ - $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) + $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \ + $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h) +CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h) CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py index c2eabc90dce8c..b22082fd660e9 100755 --- a/tools/net/ynl/pyynl/ynl_gen_c.py +++ b/tools/net/ynl/pyynl/ynl_gen_c.py @@ -100,7 +100,7 @@ class Type(SpecAttr): if isinstance(value, int): return value if value in self.family.consts: - raise Exception("Resolving family constants not implemented, yet") + return self.family.consts[value]["value"] return limit_to_number(value) def get_limit_str(self, limit, default=None, suffix=''): @@ -110,6 +110,9 @@ class Type(SpecAttr): if isinstance(value, int): return str(value) + suffix if value in self.family.consts: + const = self.family.consts[value] + if const.get('header'): + return c_upper(value) return c_upper(f"{self.family['name']}-{value}") return c_upper(value) @@ -2549,6 +2552,9 @@ def render_uapi(family, cw): defines = [] for const in family['definitions']: + if const.get('header'): + continue + if const['type'] != 'const': cw.writes_defines(defines) defines = [] diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 137470bdee0c7..c7f1c443f2af0 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -7,6 +7,7 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \ TEST_PROGS := \ netcons_basic.sh \ + netcons_fragmented_msg.sh \ netcons_overflow.sh \ ping.py \ queues.py \ diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index ca8a7edff3dda..319aaa004c407 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -252,6 +252,7 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): try: # this targets queue 4, which doesn't exist ntuple2 = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") except CmdExitFailure: pass else: @@ -259,7 +260,13 @@ def test_rss_queue_reconfigure(cfg, main_ctx=True): # change the table to target queues 0 and 2 ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 1 0") # ntuple rule therefore targets queues 1 and 3 - ntuple2 = ethtool_create(cfg, "-N", flow) + try: + ntuple2 = ethtool_create(cfg, "-N", flow) + except CmdExitFailure: + ksft_pr("Driver does not support rss + queue offset") + return + + defer(ethtool, f"-N {cfg.ifname} delete {ntuple2}") # should replace existing filter ksft_eq(ntuple, ntuple2) _send_traffic_check(cfg, port, ctx_ref, { 'target': (1, 3), diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 3acaba41ac7b2..0c262b123fdd3 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -110,6 +110,13 @@ function create_dynamic_target() { echo 1 > "${NETCONS_PATH}"/enabled } +# Do not append the release to the header of the message +function disable_release_append() { + echo 0 > "${NETCONS_PATH}"/enabled + echo 0 > "${NETCONS_PATH}"/release + echo 1 > "${NETCONS_PATH}"/enabled +} + function cleanup() { local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device" diff --git a/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh new file mode 100755 index 0000000000000..4a71e01a230c7 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_fragmented_msg.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Test netconsole's message fragmentation functionality. +# +# When a message exceeds the maximum packet size, netconsole splits it into +# multiple fragments for transmission. This test verifies: +# - Correct fragmentation of large messages +# - Proper reassembly of fragments at the receiver +# - Preservation of userdata across fragments +# - Behavior with and without kernel release version appending +# +# Author: Breno Leitao <leitao@debian.org> + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# set userdata to a long value. In this case, it is "1-2-3-4...50-" +USERDATA_VALUE=$(printf -- '%.2s-' {1..60}) + +# Convert the header string in a regexp, so, we can remove +# the second header as well. +# A header looks like "13,468,514729715,-,ncfrag=0/1135;". If +# release is appended, you might find something like:L +# "6.13.0-04048-g4f561a87745a,13,468,514729715,-,ncfrag=0/1135;" +function header_to_regex() { + # header is everything before ; + local HEADER="${1}" + REGEX=$(echo "${HEADER}" | cut -d'=' -f1) + echo "${REGEX}=[0-9]*\/[0-9]*;" +} + +# We have two headers in the message. Remove both to get the full message, +# and extract the full message. +function extract_msg() { + local MSGFILE="${1}" + # Extract the header, which is the very first thing that arrives in the + # first list. + HEADER=$(sed -n '1p' "${MSGFILE}" | cut -d';' -f1) + HEADER_REGEX=$(header_to_regex "${HEADER}") + + # Remove the two headers from the received message + # This will return the message without any header, similarly to what + # was sent. + sed "s/""${HEADER_REGEX}""//g" "${MSGFILE}" +} + +# Validate the message, which has two messages glued together. +# unwrap them to make sure all the characters were transmitted. +# File will look like the following: +# 13,468,514729715,-,ncfrag=0/1135;<message> +# key=<part of key>-13,468,514729715,-,ncfrag=967/1135;<rest of the key> +function validate_fragmented_result() { + # Discard the netconsole headers, and assemble the full message + RCVMSG=$(extract_msg "${1}") + + # check for the main message + if ! echo "${RCVMSG}" | grep -q "${MSG}"; then + echo "Message body doesn't match." >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + + # check userdata + if ! echo "${RCVMSG}" | grep -q "${USERDATA_VALUE}"; then + echo "message userdata doesn't match" >&2 + echo "msg received=" "${RCVMSG}" >&2 + exit "${ksft_fail}" + fi + # test passed. hooray +} + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network +# Create a dynamic target for netconsole +create_dynamic_target +# Set userdata "key" with the "value" value +set_user_data + + +# TEST 1: Send message and userdata. They will fragment +# ======= +MSG=$(printf -- 'MSG%.3s=' {1..150}) + +# Listen for netconsole port inside the namespace and destination interface +listen_port_and_save_to "${OUTPUT_FILE}" & +# Wait for socat to start and listen to the port. +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +# Send the message +echo "${MSG}: ${TARGET}" > /dev/kmsg +# Wait until socat saves the file to disk +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +# Check if the message was not corrupted +validate_fragmented_result "${OUTPUT_FILE}" + +# TEST 2: Test with smaller message, and without release appended +# ======= +MSG=$(printf -- 'FOOBAR%.3s=' {1..100}) +# Let's disable release and test again. +disable_release_append + +listen_port_and_save_to "${OUTPUT_FILE}" & +wait_local_port_listen "${NAMESPACE}" "${PORT}" udp +echo "${MSG}: ${TARGET}" > /dev/kmsg +busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" +validate_fragmented_result "${OUTPUT_FILE}" +exit "${ksft_pass}" diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index e5d06fb402335..15601402dee65 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -306,7 +306,8 @@ function check_result { result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ - sed 's/^\[[ 0-9.]*\] //') + sed 's/^\[[ 0-9.]*\] //' | \ + sed 's/^\[[ ]*[CT][0-9]*\] //') if [[ "$expect" == "$result" ]] ; then echo "ok" diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index d9d587454d207..8c1597ebc2d38 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -149,7 +149,7 @@ cfg_test_host_common() check_err $? "Failed to add $name host entry" bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null - check_fail $? "Managed to replace $name host entry" + check_err $? "Failed to replace $name host entry" bridge mdb del dev br0 port br0 grp $grp $state vid 10 bridge mdb get dev br0 grp $grp vid 10 &> /dev/null diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 3f9d50f1ef9ec..180c5eca556fe 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -740,6 +740,8 @@ test_learning() vxlan_flood_test $mac $dst 0 10 0 + # The entry should age out when it only forwards traffic + $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q & sleep 60 bridge fdb show brport vx1 | grep $mac | grep -q self diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 414addef9a451..d240d02fa443a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -1302,7 +1302,7 @@ again: return ret; if (cfg_truncate > 0) { - xdisconnect(fd); + shutdown(fd, SHUT_WR); } else if (--cfg_repeat > 0) { xdisconnect(fd); diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3f2fca02fec53..36ff28af4b190 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -103,6 +103,19 @@ struct testcase testcases_v4[] = { .r_num_mss = 1, }, { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V4, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4 + 2, + .tfail = true, + }, + { /* send a single MSS + 1B */ .tlen = CONST_MSS_V4 + 1, .gso_len = CONST_MSS_V4, @@ -206,6 +219,19 @@ struct testcase testcases_v6[] = { .r_num_mss = 1, }, { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V6, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6 + 2, + .tfail = true + }, + { /* send a single MSS + 1B */ .tlen = CONST_MSS_V6 + 1, .gso_len = CONST_MSS_V6, diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index 12e7cae251be3..e907c2751956f 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -27,7 +27,8 @@ $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig: $(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig $(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a - $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a + $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \ + GENS="$(YNL_GENS)" RSTS="" libynl.a $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a EXTRA_CLEAN += \ diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index d3dd65b05b5f1..9044ac0541672 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -94,5 +94,37 @@ "$TC qdisc del dev $DUMMY ingress", "$IP addr del 10.10.10.10/24 dev $DUMMY" ] - } + }, + { + "id": "a4b9", + "name": "Test class qlen notification", + "category": [ + "qdisc" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: drr", + "$TC filter add dev $DUMMY parent 1: basic classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr", + "$TC qdisc add dev $DUMMY parent 1:1 handle 2: netem", + "$TC qdisc add dev $DUMMY parent 2: handle 3: drr", + "$TC filter add dev $DUMMY parent 3: basic action drop", + "$TC class add dev $DUMMY parent 3: classid 3:1 drr", + "$TC class del dev $DUMMY classid 1:1", + "$TC class add dev $DUMMY parent 1: classid 1:1 drr" + ], + "cmdUnderTest": "ping -c1 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC qdisc ls dev $DUMMY", + "matchPattern": "drr 1: root", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY root handle 1: drr", + "$IP addr del 10.10.10.10/24 dev $DUMMY" + ] + } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json index ae3d286a32b2e..6f20d033670d4 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json @@ -313,6 +313,29 @@ "matchPattern": "qdisc bfifo 1: root", "matchCount": "0", "teardown": [ + ] + }, + { + "id": "d774", + "name": "Check pfifo_head_drop qdisc enqueue behaviour when limit == 0", + "category": [ + "qdisc", + "pfifo_head_drop" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$IP addr add 10.10.10.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: pfifo_head_drop limit 0", + "$IP link set dev $DUMMY up || true" + ], + "cmdUnderTest": "ping -c2 -W0.01 -I $DUMMY 10.10.10.1", + "expExitCode": "1", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "dropped 2", + "matchCount": "1", + "teardown": [ ] } ] |