summaryrefslogtreecommitdiff
path: root/net/ipv4/proc.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 15:17:01 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-10-02 15:17:01 -0700
commit07fdad3a93756b872da7b53647715c48d0f4a2d0 (patch)
tree133af559ac91e6b24358b57a025abc060a782129 /net/ipv4/proc.c
parentf79e772258df311c2cb21594ca0996318e720d28 (diff)
parentf1455695d2d99894b65db233877acac9a0e120b9 (diff)
Merge tag 'net-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "Core & protocols: - Improve drop account scalability on NUMA hosts for RAW and UDP sockets and the backlog, almost doubling the Pps capacity under DoS - Optimize the UDP RX performance under stress, reducing contention, revisiting the binary layout of the involved data structs and implementing NUMA-aware locking. This improves UDP RX performance by an additional 50%, even more under extreme conditions - Add support for PSP encryption of TCP connections; this mechanism has some similarities with IPsec and TLS, but offers superior HW offloads capabilities - Ongoing work to support Accurate ECN for TCP. AccECN allows more than one congestion notification signal per RTT and is a building block for Low Latency, Low Loss, and Scalable Throughput (L4S) - Reorganize the TCP socket binary layout for data locality, reducing the number of touched cachelines in the fastpath - Refactor skb deferral free to better scale on large multi-NUMA hosts, this improves TCP and UDP RX performances significantly on such HW - Increase the default socket memory buffer limits from 256K to 4M to better fit modern link speeds - Improve handling of setups with a large number of nexthop, making dump operating scaling linearly and avoiding unneeded synchronize_rcu() on delete - Improve bridge handling of VLAN FDB, storing a single entry per bridge instead of one entry per port; this makes the dump order of magnitude faster on large switches - Restore IP ID correctly for encapsulated packets at GSO segmentation time, allowing GRO to merge packets in more scenarios - Improve netfilter matching performance on large sets - Improve MPTCP receive path performance by leveraging recently introduced core infrastructure (skb deferral free) and adopting recent TCP autotuning changes - Allow bridges to redirect to a backup port when the bridge port is administratively down - Introduce MPTCP 'laminar' endpoint that con be used only once per connection and simplify common MPTCP setups - Add RCU safety to dst->dev, closing a lot of possible races - A significant crypto library API for SCTP, MPTCP and IPv6 SR, reducing code duplication - Supports pulling data from an skb frag into the linear area of an XDP buffer Things we sprinkled into general kernel code: - Generate netlink documentation from YAML using an integrated YAML parser Driver API: - Support using IPv6 Flow Label in Rx hash computation and RSS queue selection - Introduce API for fetching the DMA device for a given queue, allowing TCP zerocopy RX on more H/W setups - Make XDP helpers compatible with unreadable memory, allowing more easily building DevMem-enabled drivers with a unified XDP/skbs datapath - Add a new dedicated ethtool callback enabling drivers to provide the number of RX rings directly, improving efficiency and clarity in RX ring queries and RSS configuration - Introduce a burst period for the health reporter, allowing better handling of multiple errors due to the same root cause - Support for DPLL phase offset exponential moving average, controlling the average smoothing factor Device drivers: - Add a new Huawei driver for 3rd gen NIC (hinic3) - Add a new SpacemiT driver for K1 ethernet MAC - Add a generic abstraction for shared memory communication devices (dibps) - Ethernet high-speed NICs: - nVidia/Mellanox: - Use multiple per-queue doorbell, to avoid MMIO contention issues - support adjacent functions, allowing them to delegate their SR-IOV VFs to sibling PFs - support RSS for IPSec offload - support exposing raw cycle counters in PTP and mlx5 - support for disabling host PFs. - Intel (100G, ice, idpf): - ice: support for SRIOV VFs over an Active-Active link aggregate - ice: support for firmware logging via debugfs - ice: support for Earliest TxTime First (ETF) hardware offload - idpf: support basic XDP functionalities and XSk - Broadcom (bnxt): - support Hyper-V VF ID - dynamic SRIOV resource allocations for RoCE - Meta (fbnic): - support queue API, zero-copy Rx and Tx - support basic XDP functionalities - devlink health support for FW crashes and OTP mem corruptions - expand hardware stats coverage to FEC, PHY, and Pause - Wangxun: - support ethtool coalesce options - support for multiple RSS contexts - Ethernet virtual: - Macsec: - replace custom netlink attribute checks with policy-level checks - Bonding: - support aggregator selection based on port priority - Microsoft vNIC: - use page pool fragments for RX buffers instead of full pages to improve memory efficiency - Ethernet NICs consumer, and embedded: - Qualcomm: support Ethernet function for IPQ9574 SoC - Airoha: implement wlan offloading via NPU - Freescale - enetc: add NETC timer PTP driver and add PTP support - fec: enable the Jumbo frame support for i.MX8QM - Renesas (R-Car S4): - support HW offloading for layer 2 switching - support for RZ/{T2H, N2H} SoCs - Cadence (macb): support TAPRIO traffic scheduling - TI: - support for Gigabit ICSS ethernet SoC (icssm-prueth) - Synopsys (stmmac): a lot of cleanups - Ethernet PHYs: - Support 10g-qxgmi phy-mode for AQR412C, Felix DSA and Lynx PCS driver - Support bcm63268 GPHY power control - Support for Micrel lan8842 PHY and PTP - Support for Aquantia AQR412 and AQR115 - CAN: - a large CAN-XL preparation work - reorganize raw_sock and uniqframe struct to minimize memory usage - rcar_canfd: update the CAN-FD handling - WiFi: - extended Neighbor Awareness Networking (NAN) support - S1G channel representation cleanup - improve S1G support - WiFi drivers: - Intel (iwlwifi): - major refactor and cleanup - Broadcom (brcm80211): - support for AP isolation - RealTek (rtw88/89) rtw88/89: - preparation work for RTL8922DE support - MediaTek (mt76): - HW restart improvements - MLO support - Qualcomm/Atheros (ath10k): - GTK rekey fixes - Bluetooth drivers: - btusb: support for several new IDs for MT7925 - btintel: support for BlazarIW core - btintel_pcie: support for _suspend() / _resume() - btintel_pcie: support for Scorpious, Panther Lake-H484 IDs" * tag 'net-next-6.18' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1536 commits) net: stmmac: Add support for Allwinner A523 GMAC200 dt-bindings: net: sun8i-emac: Add A523 GMAC200 compatible Revert "Documentation: net: add flow control guide and document ethtool API" octeontx2-pf: fix bitmap leak octeontx2-vf: fix bitmap leak net/mlx5e: Use extack in set rxfh callback net/mlx5e: Introduce mlx5e_rss_params for RSS configuration net/mlx5e: Introduce mlx5e_rss_init_params net/mlx5e: Remove unused mdev param from RSS indir init net/mlx5: Improve QoS error messages with actual depth values net/mlx5e: Prevent entering switchdev mode with inconsistent netns net/mlx5: HWS, Generalize complex matchers net/mlx5: Improve write-combining test reliability for ARM64 Grace CPUs selftests/net: add tcp_port_share to .gitignore Revert "net/mlx5e: Update and set Xon/Xoff upon MTU set" net: add NUMA awareness to skb_attempt_defer_free() net: use llist for sd->defer_list net: make softnet_data.defer_count an atomic selftests: drv-net: psp: add tests for destroying devices selftests: drv-net: psp: add test for auto-adjusting TCP MSS ...
Diffstat (limited to 'net/ipv4/proc.c')
-rw-r--r--net/ipv4/proc.c65
1 files changed, 33 insertions, 32 deletions
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 65b0d0ab0084..974afc4ecbe2 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -95,7 +95,6 @@ static const struct snmp_mib snmp4_ipstats_list[] = {
SNMP_MIB_ITEM("FragFails", IPSTATS_MIB_FRAGFAILS),
SNMP_MIB_ITEM("FragCreates", IPSTATS_MIB_FRAGCREATES),
SNMP_MIB_ITEM("OutTransmits", IPSTATS_MIB_OUTPKTS),
- SNMP_MIB_SENTINEL
};
/* Following items are displayed in /proc/net/netstat */
@@ -119,7 +118,6 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
SNMP_MIB_ITEM("InECT0Pkts", IPSTATS_MIB_ECT0PKTS),
SNMP_MIB_ITEM("InCEPkts", IPSTATS_MIB_CEPKTS),
SNMP_MIB_ITEM("ReasmOverlaps", IPSTATS_MIB_REASM_OVERLAPS),
- SNMP_MIB_SENTINEL
};
static const struct {
@@ -157,7 +155,6 @@ static const struct snmp_mib snmp4_tcp_list[] = {
SNMP_MIB_ITEM("InErrs", TCP_MIB_INERRS),
SNMP_MIB_ITEM("OutRsts", TCP_MIB_OUTRSTS),
SNMP_MIB_ITEM("InCsumErrors", TCP_MIB_CSUMERRORS),
- SNMP_MIB_SENTINEL
};
static const struct snmp_mib snmp4_udp_list[] = {
@@ -170,7 +167,6 @@ static const struct snmp_mib snmp4_udp_list[] = {
SNMP_MIB_ITEM("InCsumErrors", UDP_MIB_CSUMERRORS),
SNMP_MIB_ITEM("IgnoredMulti", UDP_MIB_IGNOREDMULTI),
SNMP_MIB_ITEM("MemErrors", UDP_MIB_MEMERRORS),
- SNMP_MIB_SENTINEL
};
static const struct snmp_mib snmp4_net_list[] = {
@@ -309,7 +305,6 @@ static const struct snmp_mib snmp4_net_list[] = {
SNMP_MIB_ITEM("TCPAOKeyNotFound", LINUX_MIB_TCPAOKEYNOTFOUND),
SNMP_MIB_ITEM("TCPAOGood", LINUX_MIB_TCPAOGOOD),
SNMP_MIB_ITEM("TCPAODroppedIcmps", LINUX_MIB_TCPAODROPPEDICMPS),
- SNMP_MIB_SENTINEL
};
static void icmpmsg_put_line(struct seq_file *seq, unsigned long *vals,
@@ -389,14 +384,15 @@ static void icmp_put(struct seq_file *seq)
*/
static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
{
+ const int cnt = ARRAY_SIZE(snmp4_ipstats_list);
+ u64 buff64[ARRAY_SIZE(snmp4_ipstats_list)];
struct net *net = seq->private;
- u64 buff64[IPSTATS_MIB_MAX];
int i;
- memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64));
+ memset(buff64, 0, sizeof(buff64));
seq_puts(seq, "Ip: Forwarding DefaultTTL");
- for (i = 0; snmp4_ipstats_list[i].name; i++)
+ for (i = 0; i < cnt; i++)
seq_printf(seq, " %s", snmp4_ipstats_list[i].name);
seq_printf(seq, "\nIp: %d %d",
@@ -404,10 +400,10 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
- snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
- net->mib.ip_statistics,
- offsetof(struct ipstats_mib, syncp));
- for (i = 0; snmp4_ipstats_list[i].name; i++)
+ snmp_get_cpu_field64_batch_cnt(buff64, snmp4_ipstats_list, cnt,
+ net->mib.ip_statistics,
+ offsetof(struct ipstats_mib, syncp));
+ for (i = 0; i < cnt; i++)
seq_printf(seq, " %llu", buff64[i]);
return 0;
@@ -415,20 +411,23 @@ static int snmp_seq_show_ipstats(struct seq_file *seq, void *v)
static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v)
{
+ const int udp_cnt = ARRAY_SIZE(snmp4_udp_list);
+ const int tcp_cnt = ARRAY_SIZE(snmp4_tcp_list);
unsigned long buff[TCPUDP_MIB_MAX];
struct net *net = seq->private;
int i;
- memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+ memset(buff, 0, tcp_cnt * sizeof(unsigned long));
seq_puts(seq, "\nTcp:");
- for (i = 0; snmp4_tcp_list[i].name; i++)
+ for (i = 0; i < tcp_cnt; i++)
seq_printf(seq, " %s", snmp4_tcp_list[i].name);
seq_puts(seq, "\nTcp:");
- snmp_get_cpu_field_batch(buff, snmp4_tcp_list,
- net->mib.tcp_statistics);
- for (i = 0; snmp4_tcp_list[i].name; i++) {
+ snmp_get_cpu_field_batch_cnt(buff, snmp4_tcp_list,
+ tcp_cnt,
+ net->mib.tcp_statistics);
+ for (i = 0; i < tcp_cnt; i++) {
/* MaxConn field is signed, RFC 2012 */
if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN)
seq_printf(seq, " %ld", buff[i]);
@@ -436,27 +435,29 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v)
seq_printf(seq, " %lu", buff[i]);
}
- memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+ memset(buff, 0, udp_cnt * sizeof(unsigned long));
- snmp_get_cpu_field_batch(buff, snmp4_udp_list,
- net->mib.udp_statistics);
+ snmp_get_cpu_field_batch_cnt(buff, snmp4_udp_list,
+ udp_cnt,
+ net->mib.udp_statistics);
seq_puts(seq, "\nUdp:");
- for (i = 0; snmp4_udp_list[i].name; i++)
+ for (i = 0; i < udp_cnt; i++)
seq_printf(seq, " %s", snmp4_udp_list[i].name);
seq_puts(seq, "\nUdp:");
- for (i = 0; snmp4_udp_list[i].name; i++)
+ for (i = 0; i < udp_cnt; i++)
seq_printf(seq, " %lu", buff[i]);
- memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long));
+ memset(buff, 0, udp_cnt * sizeof(unsigned long));
/* the UDP and UDP-Lite MIBs are the same */
seq_puts(seq, "\nUdpLite:");
- snmp_get_cpu_field_batch(buff, snmp4_udp_list,
- net->mib.udplite_statistics);
- for (i = 0; snmp4_udp_list[i].name; i++)
+ snmp_get_cpu_field_batch_cnt(buff, snmp4_udp_list,
+ udp_cnt,
+ net->mib.udplite_statistics);
+ for (i = 0; i < udp_cnt; i++)
seq_printf(seq, " %s", snmp4_udp_list[i].name);
seq_puts(seq, "\nUdpLite:");
- for (i = 0; snmp4_udp_list[i].name; i++)
+ for (i = 0; i < udp_cnt; i++)
seq_printf(seq, " %lu", buff[i]);
seq_putc(seq, '\n');
@@ -480,8 +481,8 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
*/
static int netstat_seq_show(struct seq_file *seq, void *v)
{
- const int ip_cnt = ARRAY_SIZE(snmp4_ipextstats_list) - 1;
- const int tcp_cnt = ARRAY_SIZE(snmp4_net_list) - 1;
+ const int ip_cnt = ARRAY_SIZE(snmp4_ipextstats_list);
+ const int tcp_cnt = ARRAY_SIZE(snmp4_net_list);
struct net *net = seq->private;
unsigned long *buff;
int i;
@@ -494,8 +495,8 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
buff = kzalloc(max(tcp_cnt * sizeof(long), ip_cnt * sizeof(u64)),
GFP_KERNEL);
if (buff) {
- snmp_get_cpu_field_batch(buff, snmp4_net_list,
- net->mib.net_statistics);
+ snmp_get_cpu_field_batch_cnt(buff, snmp4_net_list, tcp_cnt,
+ net->mib.net_statistics);
for (i = 0; i < tcp_cnt; i++)
seq_printf(seq, " %lu", buff[i]);
} else {
@@ -513,7 +514,7 @@ static int netstat_seq_show(struct seq_file *seq, void *v)
u64 *buff64 = (u64 *)buff;
memset(buff64, 0, ip_cnt * sizeof(u64));
- snmp_get_cpu_field64_batch(buff64, snmp4_ipextstats_list,
+ snmp_get_cpu_field64_batch_cnt(buff64, snmp4_ipextstats_list, ip_cnt,
net->mib.ip_statistics,
offsetof(struct ipstats_mib, syncp));
for (i = 0; i < ip_cnt; i++)