summaryrefslogtreecommitdiff
path: root/net/smc/smc_loopback.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 15:17:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 15:17:01 -0700
commit07fdad3a93756b872da7b53647715c48d0f4a2d0 (patch)
tree133af559ac91e6b24358b57a025abc060a782129 /net/smc/smc_loopback.c
parentf79e772258df311c2cb21594ca0996318e720d28 (diff)
parentf1455695d2d99894b65db233877acac9a0e120b9 (diff)
Merge tag 'net-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "Core & protocols: - Improve drop account scalability on NUMA hosts for RAW and UDP sockets and the backlog, almost doubling the Pps capacity under DoS - Optimize the UDP RX performance under stress, reducing contention, revisiting the binary layout of the involved data structs and implementing NUMA-aware locking. This improves UDP RX performance by an additional 50%, even more under extreme conditions - Add support for PSP encryption of TCP connections; this mechanism has some similarities with IPsec and TLS, but offers superior HW offloads capabilities - Ongoing work to support Accurate ECN for TCP. AccECN allows more than one congestion notification signal per RTT and is a building block for Low Latency, Low Loss, and Scalable Throughput (L4S) - Reorganize the TCP socket binary layout for data locality, reducing the number of touched cachelines in the fastpath - Refactor skb deferral free to better scale on large multi-NUMA hosts, this improves TCP and UDP RX performances significantly on such HW - Increase the default socket memory buffer limits from 256K to 4M to better fit modern link speeds - Improve handling of setups with a large number of nexthop, making dump operating scaling linearly and avoiding unneeded synchronize_rcu() on delete - Improve bridge handling of VLAN FDB, storing a single entry per bridge instead of one entry per port; this makes the dump order of magnitude faster on large switches - Restore IP ID correctly for encapsulated packets at GSO segmentation time, allowing GRO to merge packets in more scenarios - Improve netfilter matching performance on large sets - Improve MPTCP receive path performance by leveraging recently introduced core infrastructure (skb deferral free) and adopting recent TCP autotuning changes - Allow bridges to redirect to a backup port when the bridge port is administratively down - Introduce MPTCP 'laminar' endpoint that con be used only once per connection and simplify common MPTCP setups - Add RCU safety to dst->dev, closing a lot of possible races - A significant crypto library API for SCTP, MPTCP and IPv6 SR, reducing code duplication - Supports pulling data from an skb frag into the linear area of an XDP buffer Things we sprinkled into general kernel code: - Generate netlink documentation from YAML using an integrated YAML parser Driver API: - Support using IPv6 Flow Label in Rx hash computation and RSS queue selection - Introduce API for fetching the DMA device for a given queue, allowing TCP zerocopy RX on more H/W setups - Make XDP helpers compatible with unreadable memory, allowing more easily building DevMem-enabled drivers with a unified XDP/skbs datapath - Add a new dedicated ethtool callback enabling drivers to provide the number of RX rings directly, improving efficiency and clarity in RX ring queries and RSS configuration - Introduce a burst period for the health reporter, allowing better handling of multiple errors due to the same root cause - Support for DPLL phase offset exponential moving average, controlling the average smoothing factor Device drivers: - Add a new Huawei driver for 3rd gen NIC (hinic3) - Add a new SpacemiT driver for K1 ethernet MAC - Add a generic abstraction for shared memory communication devices (dibps) - Ethernet high-speed NICs: - nVidia/Mellanox: - Use multiple per-queue doorbell, to avoid MMIO contention issues - support adjacent functions, allowing them to delegate their SR-IOV VFs to sibling PFs - support RSS for IPSec offload - support exposing raw cycle counters in PTP and mlx5 - support for disabling host PFs. - Intel (100G, ice, idpf): - ice: support for SRIOV VFs over an Active-Active link aggregate - ice: support for firmware logging via debugfs - ice: support for Earliest TxTime First (ETF) hardware offload - idpf: support basic XDP functionalities and XSk - Broadcom (bnxt): - support Hyper-V VF ID - dynamic SRIOV resource allocations for RoCE - Meta (fbnic): - support queue API, zero-copy Rx and Tx - support basic XDP functionalities - devlink health support for FW crashes and OTP mem corruptions - expand hardware stats coverage to FEC, PHY, and Pause - Wangxun: - support ethtool coalesce options - support for multiple RSS contexts - Ethernet virtual: - Macsec: - replace custom netlink attribute checks with policy-level checks - Bonding: - support aggregator selection based on port priority - Microsoft vNIC: - use page pool fragments for RX buffers instead of full pages to improve memory efficiency - Ethernet NICs consumer, and embedded: - Qualcomm: support Ethernet function for IPQ9574 SoC - Airoha: implement wlan offloading via NPU - Freescale - enetc: add NETC timer PTP driver and add PTP support - fec: enable the Jumbo frame support for i.MX8QM - Renesas (R-Car S4): - support HW offloading for layer 2 switching - support for RZ/{T2H, N2H} SoCs - Cadence (macb): support TAPRIO traffic scheduling - TI: - support for Gigabit ICSS ethernet SoC (icssm-prueth) - Synopsys (stmmac): a lot of cleanups - Ethernet PHYs: - Support 10g-qxgmi phy-mode for AQR412C, Felix DSA and Lynx PCS driver - Support bcm63268 GPHY power control - Support for Micrel lan8842 PHY and PTP - Support for Aquantia AQR412 and AQR115 - CAN: - a large CAN-XL preparation work - reorganize raw_sock and uniqframe struct to minimize memory usage - rcar_canfd: update the CAN-FD handling - WiFi: - extended Neighbor Awareness Networking (NAN) support - S1G channel representation cleanup - improve S1G support - WiFi drivers: - Intel (iwlwifi): - major refactor and cleanup - Broadcom (brcm80211): - support for AP isolation - RealTek (rtw88/89) rtw88/89: - preparation work for RTL8922DE support - MediaTek (mt76): - HW restart improvements - MLO support - Qualcomm/Atheros (ath10k): - GTK rekey fixes - Bluetooth drivers: - btusb: support for several new IDs for MT7925 - btintel: support for BlazarIW core - btintel_pcie: support for _suspend() / _resume() - btintel_pcie: support for Scorpious, Panther Lake-H484 IDs" * tag 'net-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1536 commits) net: stmmac: Add support for Allwinner A523 GMAC200 dt-bindings: net: sun8i-emac: Add A523 GMAC200 compatible Revert "Documentation: net: add flow control guide and document ethtool API" octeontx2-pf: fix bitmap leak octeontx2-vf: fix bitmap leak net/mlx5e: Use extack in set rxfh callback net/mlx5e: Introduce mlx5e_rss_params for RSS configuration net/mlx5e: Introduce mlx5e_rss_init_params net/mlx5e: Remove unused mdev param from RSS indir init net/mlx5: Improve QoS error messages with actual depth values net/mlx5e: Prevent entering switchdev mode with inconsistent netns net/mlx5: HWS, Generalize complex matchers net/mlx5: Improve write-combining test reliability for ARM64 Grace CPUs selftests/net: add tcp_port_share to .gitignore Revert "net/mlx5e: Update and set Xon/Xoff upon MTU set" net: add NUMA awareness to skb_attempt_defer_free() net: use llist for sd->defer_list net: make softnet_data.defer_count an atomic selftests: drv-net: psp: add tests for destroying devices selftests: drv-net: psp: add test for auto-adjusting TCP MSS ...
Diffstat (limited to 'net/smc/smc_loopback.c')
-rw-r--r--net/smc/smc_loopback.c425
1 files changed, 0 insertions, 425 deletions
diff --git a/net/smc/smc_loopback.c b/net/smc/smc_loopback.c
deleted file mode 100644
index 77cc1c6dc3e9..000000000000
--- a/net/smc/smc_loopback.c
+++ /dev/null
@@ -1,425 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Shared Memory Communications Direct over loopback-ism device.
- *
- * Functions for loopback-ism device.
- *
- * Copyright (c) 2024, Alibaba Inc.
- *
- * Author: Wen Gu <guwen@linux.alibaba.com>
- * Tony Lu <tonylu@linux.alibaba.com>
- *
- */
-
-#include <linux/device.h>
-#include <linux/types.h>
-#include <net/smc.h>
-
-#include "smc_cdc.h"
-#include "smc_ism.h"
-#include "smc_loopback.h"
-
-#define SMC_LO_V2_CAPABLE 0x1 /* loopback-ism acts as ISMv2 */
-#define SMC_LO_SUPPORT_NOCOPY 0x1
-#define SMC_DMA_ADDR_INVALID (~(dma_addr_t)0)
-
-static const char smc_lo_dev_name[] = "loopback-ism";
-static struct smc_lo_dev *lo_dev;
-
-static void smc_lo_generate_ids(struct smc_lo_dev *ldev)
-{
- struct smcd_gid *lgid = &ldev->local_gid;
- uuid_t uuid;
-
- uuid_gen(&uuid);
- memcpy(&lgid->gid, &uuid, sizeof(lgid->gid));
- memcpy(&lgid->gid_ext, (u8 *)&uuid + sizeof(lgid->gid),
- sizeof(lgid->gid_ext));
-
- ldev->chid = SMC_LO_RESERVED_CHID;
-}
-
-static int smc_lo_query_rgid(struct smcd_dev *smcd, struct smcd_gid *rgid,
- u32 vid_valid, u32 vid)
-{
- struct smc_lo_dev *ldev = smcd->priv;
-
- /* rgid should be the same as lgid */
- if (!ldev || rgid->gid != ldev->local_gid.gid ||
- rgid->gid_ext != ldev->local_gid.gid_ext)
- return -ENETUNREACH;
- return 0;
-}
-
-static int smc_lo_register_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb,
- void *client_priv)
-{
- struct smc_lo_dmb_node *dmb_node, *tmp_node;
- struct smc_lo_dev *ldev = smcd->priv;
- struct folio *folio;
- int sba_idx, rc;
-
- /* check space for new dmb */
- for_each_clear_bit(sba_idx, ldev->sba_idx_mask, SMC_LO_MAX_DMBS) {
- if (!test_and_set_bit(sba_idx, ldev->sba_idx_mask))
- break;
- }
- if (sba_idx == SMC_LO_MAX_DMBS)
- return -ENOSPC;
-
- dmb_node = kzalloc(sizeof(*dmb_node), GFP_KERNEL);
- if (!dmb_node) {
- rc = -ENOMEM;
- goto err_bit;
- }
-
- dmb_node->sba_idx = sba_idx;
- dmb_node->len = dmb->dmb_len;
-
- /* not critical; fail under memory pressure and fallback to TCP */
- folio = folio_alloc(GFP_KERNEL | __GFP_NOWARN | __GFP_NOMEMALLOC |
- __GFP_NORETRY | __GFP_ZERO,
- get_order(dmb_node->len));
- if (!folio) {
- rc = -ENOMEM;
- goto err_node;
- }
- dmb_node->cpu_addr = folio_address(folio);
- dmb_node->dma_addr = SMC_DMA_ADDR_INVALID;
- refcount_set(&dmb_node->refcnt, 1);
-
-again:
- /* add new dmb into hash table */
- get_random_bytes(&dmb_node->token, sizeof(dmb_node->token));
- write_lock_bh(&ldev->dmb_ht_lock);
- hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_node->token) {
- if (tmp_node->token == dmb_node->token) {
- write_unlock_bh(&ldev->dmb_ht_lock);
- goto again;
- }
- }
- hash_add(ldev->dmb_ht, &dmb_node->list, dmb_node->token);
- write_unlock_bh(&ldev->dmb_ht_lock);
- atomic_inc(&ldev->dmb_cnt);
-
- dmb->sba_idx = dmb_node->sba_idx;
- dmb->dmb_tok = dmb_node->token;
- dmb->cpu_addr = dmb_node->cpu_addr;
- dmb->dma_addr = dmb_node->dma_addr;
- dmb->dmb_len = dmb_node->len;
-
- return 0;
-
-err_node:
- kfree(dmb_node);
-err_bit:
- clear_bit(sba_idx, ldev->sba_idx_mask);
- return rc;
-}
-
-static void __smc_lo_unregister_dmb(struct smc_lo_dev *ldev,
- struct smc_lo_dmb_node *dmb_node)
-{
- /* remove dmb from hash table */
- write_lock_bh(&ldev->dmb_ht_lock);
- hash_del(&dmb_node->list);
- write_unlock_bh(&ldev->dmb_ht_lock);
-
- clear_bit(dmb_node->sba_idx, ldev->sba_idx_mask);
- folio_put(virt_to_folio(dmb_node->cpu_addr));
- kfree(dmb_node);
-
- if (atomic_dec_and_test(&ldev->dmb_cnt))
- wake_up(&ldev->ldev_release);
-}
-
-static int smc_lo_unregister_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
-{
- struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
- struct smc_lo_dev *ldev = smcd->priv;
-
- /* find dmb from hash table */
- read_lock_bh(&ldev->dmb_ht_lock);
- hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) {
- if (tmp_node->token == dmb->dmb_tok) {
- dmb_node = tmp_node;
- break;
- }
- }
- if (!dmb_node) {
- read_unlock_bh(&ldev->dmb_ht_lock);
- return -EINVAL;
- }
- read_unlock_bh(&ldev->dmb_ht_lock);
-
- if (refcount_dec_and_test(&dmb_node->refcnt))
- __smc_lo_unregister_dmb(ldev, dmb_node);
- return 0;
-}
-
-static int smc_lo_support_dmb_nocopy(struct smcd_dev *smcd)
-{
- return SMC_LO_SUPPORT_NOCOPY;
-}
-
-static int smc_lo_attach_dmb(struct smcd_dev *smcd, struct smcd_dmb *dmb)
-{
- struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
- struct smc_lo_dev *ldev = smcd->priv;
-
- /* find dmb_node according to dmb->dmb_tok */
- read_lock_bh(&ldev->dmb_ht_lock);
- hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb->dmb_tok) {
- if (tmp_node->token == dmb->dmb_tok) {
- dmb_node = tmp_node;
- break;
- }
- }
- if (!dmb_node) {
- read_unlock_bh(&ldev->dmb_ht_lock);
- return -EINVAL;
- }
- read_unlock_bh(&ldev->dmb_ht_lock);
-
- if (!refcount_inc_not_zero(&dmb_node->refcnt))
- /* the dmb is being unregistered, but has
- * not been removed from the hash table.
- */
- return -EINVAL;
-
- /* provide dmb information */
- dmb->sba_idx = dmb_node->sba_idx;
- dmb->dmb_tok = dmb_node->token;
- dmb->cpu_addr = dmb_node->cpu_addr;
- dmb->dma_addr = dmb_node->dma_addr;
- dmb->dmb_len = dmb_node->len;
- return 0;
-}
-
-static int smc_lo_detach_dmb(struct smcd_dev *smcd, u64 token)
-{
- struct smc_lo_dmb_node *dmb_node = NULL, *tmp_node;
- struct smc_lo_dev *ldev = smcd->priv;
-
- /* find dmb_node according to dmb->dmb_tok */
- read_lock_bh(&ldev->dmb_ht_lock);
- hash_for_each_possible(ldev->dmb_ht, tmp_node, list, token) {
- if (tmp_node->token == token) {
- dmb_node = tmp_node;
- break;
- }
- }
- if (!dmb_node) {
- read_unlock_bh(&ldev->dmb_ht_lock);
- return -EINVAL;
- }
- read_unlock_bh(&ldev->dmb_ht_lock);
-
- if (refcount_dec_and_test(&dmb_node->refcnt))
- __smc_lo_unregister_dmb(ldev, dmb_node);
- return 0;
-}
-
-static int smc_lo_move_data(struct smcd_dev *smcd, u64 dmb_tok,
- unsigned int idx, bool sf, unsigned int offset,
- void *data, unsigned int size)
-{
- struct smc_lo_dmb_node *rmb_node = NULL, *tmp_node;
- struct smc_lo_dev *ldev = smcd->priv;
- struct smc_connection *conn;
-
- if (!sf)
- /* since sndbuf is merged with peer DMB, there is
- * no need to copy data from sndbuf to peer DMB.
- */
- return 0;
-
- read_lock_bh(&ldev->dmb_ht_lock);
- hash_for_each_possible(ldev->dmb_ht, tmp_node, list, dmb_tok) {
- if (tmp_node->token == dmb_tok) {
- rmb_node = tmp_node;
- break;
- }
- }
- if (!rmb_node) {
- read_unlock_bh(&ldev->dmb_ht_lock);
- return -EINVAL;
- }
- memcpy((char *)rmb_node->cpu_addr + offset, data, size);
- read_unlock_bh(&ldev->dmb_ht_lock);
-
- conn = smcd->conn[rmb_node->sba_idx];
- if (!conn || conn->killed)
- return -EPIPE;
- tasklet_schedule(&conn->rx_tsklet);
- return 0;
-}
-
-static void smc_lo_get_local_gid(struct smcd_dev *smcd,
- struct smcd_gid *smcd_gid)
-{
- struct smc_lo_dev *ldev = smcd->priv;
-
- smcd_gid->gid = ldev->local_gid.gid;
- smcd_gid->gid_ext = ldev->local_gid.gid_ext;
-}
-
-static u16 smc_lo_get_chid(struct smcd_dev *smcd)
-{
- return ((struct smc_lo_dev *)smcd->priv)->chid;
-}
-
-static struct device *smc_lo_get_dev(struct smcd_dev *smcd)
-{
- return &((struct smc_lo_dev *)smcd->priv)->dev;
-}
-
-static const struct smcd_ops lo_ops = {
- .query_remote_gid = smc_lo_query_rgid,
- .register_dmb = smc_lo_register_dmb,
- .unregister_dmb = smc_lo_unregister_dmb,
- .support_dmb_nocopy = smc_lo_support_dmb_nocopy,
- .attach_dmb = smc_lo_attach_dmb,
- .detach_dmb = smc_lo_detach_dmb,
- .add_vlan_id = NULL,
- .del_vlan_id = NULL,
- .set_vlan_required = NULL,
- .reset_vlan_required = NULL,
- .signal_event = NULL,
- .move_data = smc_lo_move_data,
- .get_local_gid = smc_lo_get_local_gid,
- .get_chid = smc_lo_get_chid,
- .get_dev = smc_lo_get_dev,
-};
-
-static struct smcd_dev *smcd_lo_alloc_dev(const struct smcd_ops *ops,
- int max_dmbs)
-{
- struct smcd_dev *smcd;
-
- smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
- if (!smcd)
- return NULL;
-
- smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
- GFP_KERNEL);
- if (!smcd->conn)
- goto out_smcd;
-
- smcd->ops = ops;
-
- spin_lock_init(&smcd->lock);
- spin_lock_init(&smcd->lgr_lock);
- INIT_LIST_HEAD(&smcd->vlan);
- INIT_LIST_HEAD(&smcd->lgr_list);
- init_waitqueue_head(&smcd->lgrs_deleted);
- return smcd;
-
-out_smcd:
- kfree(smcd);
- return NULL;
-}
-
-static int smcd_lo_register_dev(struct smc_lo_dev *ldev)
-{
- struct smcd_dev *smcd;
-
- smcd = smcd_lo_alloc_dev(&lo_ops, SMC_LO_MAX_DMBS);
- if (!smcd)
- return -ENOMEM;
- ldev->smcd = smcd;
- smcd->priv = ldev;
- smc_ism_set_v2_capable();
- mutex_lock(&smcd_dev_list.mutex);
- list_add(&smcd->list, &smcd_dev_list.list);
- mutex_unlock(&smcd_dev_list.mutex);
- pr_warn_ratelimited("smc: adding smcd device %s\n",
- dev_name(&ldev->dev));
- return 0;
-}
-
-static void smcd_lo_unregister_dev(struct smc_lo_dev *ldev)
-{
- struct smcd_dev *smcd = ldev->smcd;
-
- pr_warn_ratelimited("smc: removing smcd device %s\n",
- dev_name(&ldev->dev));
- smcd->going_away = 1;
- smc_smcd_terminate_all(smcd);
- mutex_lock(&smcd_dev_list.mutex);
- list_del_init(&smcd->list);
- mutex_unlock(&smcd_dev_list.mutex);
- kfree(smcd->conn);
- kfree(smcd);
-}
-
-static int smc_lo_dev_init(struct smc_lo_dev *ldev)
-{
- smc_lo_generate_ids(ldev);
- rwlock_init(&ldev->dmb_ht_lock);
- hash_init(ldev->dmb_ht);
- atomic_set(&ldev->dmb_cnt, 0);
- init_waitqueue_head(&ldev->ldev_release);
-
- return smcd_lo_register_dev(ldev);
-}
-
-static void smc_lo_dev_exit(struct smc_lo_dev *ldev)
-{
- smcd_lo_unregister_dev(ldev);
- if (atomic_read(&ldev->dmb_cnt))
- wait_event(ldev->ldev_release, !atomic_read(&ldev->dmb_cnt));
-}
-
-static void smc_lo_dev_release(struct device *dev)
-{
- struct smc_lo_dev *ldev =
- container_of(dev, struct smc_lo_dev, dev);
-
- kfree(ldev);
-}
-
-static int smc_lo_dev_probe(void)
-{
- struct smc_lo_dev *ldev;
- int ret;
-
- ldev = kzalloc(sizeof(*ldev), GFP_KERNEL);
- if (!ldev)
- return -ENOMEM;
-
- ldev->dev.parent = NULL;
- ldev->dev.release = smc_lo_dev_release;
- device_initialize(&ldev->dev);
- dev_set_name(&ldev->dev, smc_lo_dev_name);
-
- ret = smc_lo_dev_init(ldev);
- if (ret)
- goto free_dev;
-
- lo_dev = ldev; /* global loopback device */
- return 0;
-
-free_dev:
- put_device(&ldev->dev);
- return ret;
-}
-
-static void smc_lo_dev_remove(void)
-{
- if (!lo_dev)
- return;
-
- smc_lo_dev_exit(lo_dev);
- put_device(&lo_dev->dev); /* device_initialize in smc_lo_dev_probe */
-}
-
-int smc_loopback_init(void)
-{
- return smc_lo_dev_probe();
-}
-
-void smc_loopback_exit(void)
-{
- smc_lo_dev_remove();
-}