summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/ice/ice_txrx.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 15:17:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 15:17:01 -0700
commit07fdad3a93756b872da7b53647715c48d0f4a2d0 (patch)
tree133af559ac91e6b24358b57a025abc060a782129 /drivers/net/ethernet/intel/ice/ice_txrx.c
parentf79e772258df311c2cb21594ca0996318e720d28 (diff)
parentf1455695d2d99894b65db233877acac9a0e120b9 (diff)
Merge tag 'net-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "Core & protocols: - Improve drop account scalability on NUMA hosts for RAW and UDP sockets and the backlog, almost doubling the Pps capacity under DoS - Optimize the UDP RX performance under stress, reducing contention, revisiting the binary layout of the involved data structs and implementing NUMA-aware locking. This improves UDP RX performance by an additional 50%, even more under extreme conditions - Add support for PSP encryption of TCP connections; this mechanism has some similarities with IPsec and TLS, but offers superior HW offloads capabilities - Ongoing work to support Accurate ECN for TCP. AccECN allows more than one congestion notification signal per RTT and is a building block for Low Latency, Low Loss, and Scalable Throughput (L4S) - Reorganize the TCP socket binary layout for data locality, reducing the number of touched cachelines in the fastpath - Refactor skb deferral free to better scale on large multi-NUMA hosts, this improves TCP and UDP RX performances significantly on such HW - Increase the default socket memory buffer limits from 256K to 4M to better fit modern link speeds - Improve handling of setups with a large number of nexthop, making dump operating scaling linearly and avoiding unneeded synchronize_rcu() on delete - Improve bridge handling of VLAN FDB, storing a single entry per bridge instead of one entry per port; this makes the dump order of magnitude faster on large switches - Restore IP ID correctly for encapsulated packets at GSO segmentation time, allowing GRO to merge packets in more scenarios - Improve netfilter matching performance on large sets - Improve MPTCP receive path performance by leveraging recently introduced core infrastructure (skb deferral free) and adopting recent TCP autotuning changes - Allow bridges to redirect to a backup port when the bridge port is administratively down - Introduce MPTCP 'laminar' endpoint that con be used only once per connection and simplify common MPTCP setups - Add RCU safety to dst->dev, closing a lot of possible races - A significant crypto library API for SCTP, MPTCP and IPv6 SR, reducing code duplication - Supports pulling data from an skb frag into the linear area of an XDP buffer Things we sprinkled into general kernel code: - Generate netlink documentation from YAML using an integrated YAML parser Driver API: - Support using IPv6 Flow Label in Rx hash computation and RSS queue selection - Introduce API for fetching the DMA device for a given queue, allowing TCP zerocopy RX on more H/W setups - Make XDP helpers compatible with unreadable memory, allowing more easily building DevMem-enabled drivers with a unified XDP/skbs datapath - Add a new dedicated ethtool callback enabling drivers to provide the number of RX rings directly, improving efficiency and clarity in RX ring queries and RSS configuration - Introduce a burst period for the health reporter, allowing better handling of multiple errors due to the same root cause - Support for DPLL phase offset exponential moving average, controlling the average smoothing factor Device drivers: - Add a new Huawei driver for 3rd gen NIC (hinic3) - Add a new SpacemiT driver for K1 ethernet MAC - Add a generic abstraction for shared memory communication devices (dibps) - Ethernet high-speed NICs: - nVidia/Mellanox: - Use multiple per-queue doorbell, to avoid MMIO contention issues - support adjacent functions, allowing them to delegate their SR-IOV VFs to sibling PFs - support RSS for IPSec offload - support exposing raw cycle counters in PTP and mlx5 - support for disabling host PFs. - Intel (100G, ice, idpf): - ice: support for SRIOV VFs over an Active-Active link aggregate - ice: support for firmware logging via debugfs - ice: support for Earliest TxTime First (ETF) hardware offload - idpf: support basic XDP functionalities and XSk - Broadcom (bnxt): - support Hyper-V VF ID - dynamic SRIOV resource allocations for RoCE - Meta (fbnic): - support queue API, zero-copy Rx and Tx - support basic XDP functionalities - devlink health support for FW crashes and OTP mem corruptions - expand hardware stats coverage to FEC, PHY, and Pause - Wangxun: - support ethtool coalesce options - support for multiple RSS contexts - Ethernet virtual: - Macsec: - replace custom netlink attribute checks with policy-level checks - Bonding: - support aggregator selection based on port priority - Microsoft vNIC: - use page pool fragments for RX buffers instead of full pages to improve memory efficiency - Ethernet NICs consumer, and embedded: - Qualcomm: support Ethernet function for IPQ9574 SoC - Airoha: implement wlan offloading via NPU - Freescale - enetc: add NETC timer PTP driver and add PTP support - fec: enable the Jumbo frame support for i.MX8QM - Renesas (R-Car S4): - support HW offloading for layer 2 switching - support for RZ/{T2H, N2H} SoCs - Cadence (macb): support TAPRIO traffic scheduling - TI: - support for Gigabit ICSS ethernet SoC (icssm-prueth) - Synopsys (stmmac): a lot of cleanups - Ethernet PHYs: - Support 10g-qxgmi phy-mode for AQR412C, Felix DSA and Lynx PCS driver - Support bcm63268 GPHY power control - Support for Micrel lan8842 PHY and PTP - Support for Aquantia AQR412 and AQR115 - CAN: - a large CAN-XL preparation work - reorganize raw_sock and uniqframe struct to minimize memory usage - rcar_canfd: update the CAN-FD handling - WiFi: - extended Neighbor Awareness Networking (NAN) support - S1G channel representation cleanup - improve S1G support - WiFi drivers: - Intel (iwlwifi): - major refactor and cleanup - Broadcom (brcm80211): - support for AP isolation - RealTek (rtw88/89) rtw88/89: - preparation work for RTL8922DE support - MediaTek (mt76): - HW restart improvements - MLO support - Qualcomm/Atheros (ath10k): - GTK rekey fixes - Bluetooth drivers: - btusb: support for several new IDs for MT7925 - btintel: support for BlazarIW core - btintel_pcie: support for _suspend() / _resume() - btintel_pcie: support for Scorpious, Panther Lake-H484 IDs" * tag 'net-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1536 commits) net: stmmac: Add support for Allwinner A523 GMAC200 dt-bindings: net: sun8i-emac: Add A523 GMAC200 compatible Revert "Documentation: net: add flow control guide and document ethtool API" octeontx2-pf: fix bitmap leak octeontx2-vf: fix bitmap leak net/mlx5e: Use extack in set rxfh callback net/mlx5e: Introduce mlx5e_rss_params for RSS configuration net/mlx5e: Introduce mlx5e_rss_init_params net/mlx5e: Remove unused mdev param from RSS indir init net/mlx5: Improve QoS error messages with actual depth values net/mlx5e: Prevent entering switchdev mode with inconsistent netns net/mlx5: HWS, Generalize complex matchers net/mlx5: Improve write-combining test reliability for ARM64 Grace CPUs selftests/net: add tcp_port_share to .gitignore Revert "net/mlx5e: Update and set Xon/Xoff upon MTU set" net: add NUMA awareness to skb_attempt_defer_free() net: use llist for sd->defer_list net: make softnet_data.defer_count an atomic selftests: drv-net: psp: add tests for destroying devices selftests: drv-net: psp: add test for auto-adjusting TCP MSS ...
Diffstat (limited to 'drivers/net/ethernet/intel/ice/ice_txrx.c')
-rw-r--r--drivers/net/ethernet/intel/ice/ice_txrx.c188
1 files changed, 177 insertions, 11 deletions
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 41e7e29879a3..73f08d02f9c7 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -144,6 +144,56 @@ static struct netdev_queue *txring_txq(const struct ice_tx_ring *ring)
}
/**
+ * ice_clean_tstamp_ring - clean time stamp ring
+ * @tx_ring: Tx ring to clean the Time Stamp ring for
+ */
+static void ice_clean_tstamp_ring(struct ice_tx_ring *tx_ring)
+{
+ struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring;
+ u32 size;
+
+ if (!tstamp_ring->desc)
+ return;
+
+ size = ALIGN(tstamp_ring->count * sizeof(struct ice_ts_desc),
+ PAGE_SIZE);
+ memset(tstamp_ring->desc, 0, size);
+ tstamp_ring->next_to_use = 0;
+}
+
+/**
+ * ice_free_tstamp_ring - free time stamp resources per queue
+ * @tx_ring: Tx ring to free the Time Stamp ring for
+ */
+void ice_free_tstamp_ring(struct ice_tx_ring *tx_ring)
+{
+ struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring;
+ u32 size;
+
+ if (!tstamp_ring->desc)
+ return;
+
+ ice_clean_tstamp_ring(tx_ring);
+ size = ALIGN(tstamp_ring->count * sizeof(struct ice_ts_desc),
+ PAGE_SIZE);
+ dmam_free_coherent(tx_ring->dev, size, tstamp_ring->desc,
+ tstamp_ring->dma);
+ tstamp_ring->desc = NULL;
+}
+
+/**
+ * ice_free_tx_tstamp_ring - free time stamp resources per Tx ring
+ * @tx_ring: Tx ring to free the Time Stamp ring for
+ */
+void ice_free_tx_tstamp_ring(struct ice_tx_ring *tx_ring)
+{
+ ice_free_tstamp_ring(tx_ring);
+ kfree_rcu(tx_ring->tstamp_ring, rcu);
+ tx_ring->tstamp_ring = NULL;
+ tx_ring->flags &= ~ICE_TX_FLAGS_TXTIME;
+}
+
+/**
* ice_clean_tx_ring - Free any empty Tx buffers
* @tx_ring: ring to be cleaned
*/
@@ -181,6 +231,9 @@ tx_skip_free:
/* cleanup Tx queue statistics */
netdev_tx_reset_queue(txring_txq(tx_ring));
+
+ if (ice_is_txtime_cfg(tx_ring))
+ ice_free_tx_tstamp_ring(tx_ring);
}
/**
@@ -332,6 +385,84 @@ static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget)
}
/**
+ * ice_alloc_tstamp_ring - allocate the Time Stamp ring
+ * @tx_ring: Tx ring to allocate the Time Stamp ring for
+ *
+ * Return: 0 on success, negative on error
+ */
+static int ice_alloc_tstamp_ring(struct ice_tx_ring *tx_ring)
+{
+ struct ice_tstamp_ring *tstamp_ring;
+
+ /* allocate with kzalloc(), free with kfree_rcu() */
+ tstamp_ring = kzalloc(sizeof(*tstamp_ring), GFP_KERNEL);
+ if (!tstamp_ring)
+ return -ENOMEM;
+
+ tstamp_ring->tx_ring = tx_ring;
+ tx_ring->tstamp_ring = tstamp_ring;
+ tstamp_ring->desc = NULL;
+ tstamp_ring->count = ice_calc_ts_ring_count(tx_ring);
+ tx_ring->flags |= ICE_TX_FLAGS_TXTIME;
+ return 0;
+}
+
+/**
+ * ice_setup_tstamp_ring - allocate the Time Stamp ring
+ * @tx_ring: Tx ring to set up the Time Stamp ring for
+ *
+ * Return: 0 on success, negative on error
+ */
+static int ice_setup_tstamp_ring(struct ice_tx_ring *tx_ring)
+{
+ struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring;
+ struct device *dev = tx_ring->dev;
+ u32 size;
+
+ /* round up to nearest page */
+ size = ALIGN(tstamp_ring->count * sizeof(struct ice_ts_desc),
+ PAGE_SIZE);
+ tstamp_ring->desc = dmam_alloc_coherent(dev, size, &tstamp_ring->dma,
+ GFP_KERNEL);
+ if (!tstamp_ring->desc) {
+ dev_err(dev, "Unable to allocate memory for Time stamp Ring, size=%d\n",
+ size);
+ return -ENOMEM;
+ }
+
+ tstamp_ring->next_to_use = 0;
+ return 0;
+}
+
+/**
+ * ice_alloc_setup_tstamp_ring - Allocate and setup the Time Stamp ring
+ * @tx_ring: Tx ring to allocate and setup the Time Stamp ring for
+ *
+ * Return: 0 on success, negative on error
+ */
+int ice_alloc_setup_tstamp_ring(struct ice_tx_ring *tx_ring)
+{
+ struct device *dev = tx_ring->dev;
+ int err;
+
+ err = ice_alloc_tstamp_ring(tx_ring);
+ if (err) {
+ dev_err(dev, "Unable to allocate Time stamp ring for Tx ring %d\n",
+ tx_ring->q_index);
+ return err;
+ }
+
+ err = ice_setup_tstamp_ring(tx_ring);
+ if (err) {
+ dev_err(dev, "Unable to setup Time stamp ring for Tx ring %d\n",
+ tx_ring->q_index);
+ ice_free_tx_tstamp_ring(tx_ring);
+ return err;
+ }
+ return 0;
+}
+
+/**
* ice_setup_tx_ring - Allocate the Tx descriptors
* @tx_ring: the Tx ring to set up
*
@@ -1031,10 +1162,9 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
skb_metadata_set(skb, metasize);
if (unlikely(xdp_buff_has_frags(xdp)))
- xdp_update_skb_shared_info(skb, nr_frags,
- sinfo->xdp_frags_size,
- nr_frags * xdp->frame_sz,
- xdp_buff_is_frag_pfmemalloc(xdp));
+ xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size,
+ nr_frags * xdp->frame_sz,
+ xdp_buff_get_skb_flags(xdp));
return skb;
}
@@ -1111,10 +1241,10 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp)
memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0],
sizeof(skb_frag_t) * nr_frags);
- xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags,
- sinfo->xdp_frags_size,
- nr_frags * xdp->frame_sz,
- xdp_buff_is_frag_pfmemalloc(xdp));
+ xdp_update_skb_frags_info(skb, skinfo->nr_frags + nr_frags,
+ sinfo->xdp_frags_size,
+ nr_frags * xdp->frame_sz,
+ xdp_buff_get_skb_flags(xdp));
}
return skb;
@@ -1823,10 +1953,46 @@ ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first,
/* notify HW of packet */
kick = __netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount,
netdev_xmit_more());
- if (kick)
- /* notify HW of packet */
- writel(i, tx_ring->tail);
+ if (!kick)
+ return;
+ if (ice_is_txtime_cfg(tx_ring)) {
+ struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring;
+ u32 tstamp_count = tstamp_ring->count;
+ u32 j = tstamp_ring->next_to_use;
+ struct ice_ts_desc *ts_desc;
+ struct timespec64 ts;
+ u32 tstamp;
+
+ ts = ktime_to_timespec64(first->skb->tstamp);
+ tstamp = ts.tv_nsec >> ICE_TXTIME_CTX_RESOLUTION_128NS;
+
+ ts_desc = ICE_TS_DESC(tstamp_ring, j);
+ ts_desc->tx_desc_idx_tstamp = ice_build_tstamp_desc(i, tstamp);
+
+ j++;
+ if (j == tstamp_count) {
+ u32 fetch = tstamp_count - tx_ring->count;
+
+ j = 0;
+
+ /* To prevent an MDD, when wrapping the tstamp ring
+ * create additional TS descriptors equal to the number
+ * of the fetch TS descriptors value. HW will merge the
+ * TS descriptors with the same timestamp value into a
+ * single descriptor.
+ */
+ for (; j < fetch; j++) {
+ ts_desc = ICE_TS_DESC(tstamp_ring, j);
+ ts_desc->tx_desc_idx_tstamp =
+ ice_build_tstamp_desc(i, tstamp);
+ }
+ }
+ tstamp_ring->next_to_use = j;
+ writel_relaxed(j, tstamp_ring->tail);
+ } else {
+ writel_relaxed(i, tx_ring->tail);
+ }
return;
dma_error: