summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2025-07-26 11:44:23 -0700
committerJakub Kicinski <kuba@kernel.org>2025-07-26 11:49:45 -0700
commitc58c18be8850d58fd61b0480d2355df89ce7ee59 (patch)
treed0ef58fab37674a1b56fc0df0400898e58b60fd5
parentc471b90bb332dadd59744fb2c8407d67d815b6e6 (diff)
parentafd8c2c9e2e29c6c7705635bea2960593976dacc (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Merge in late fixes to prepare for the 6.17 net-next PR. Conflicts: net/core/neighbour.c 1bbb76a89948 ("neighbour: Fix null-ptr-deref in neigh_flush_dev().") 13a936bb99fb ("neighbour: Protect tbl->phash_buckets[] with a dedicated mutex.") 03dc03fa0432 ("neighbor: Add NTF_EXT_VALIDATED flag for externally validated entries") Adjacent changes: drivers/net/usb/usbnet.c 0d9cfc9b8cb1 ("net: usbnet: Avoid potential RCU stall on LINK_CHANGE event") 2c04d279e857 ("net: usb: Convert tasklet API to new bottom half workqueue mechanism") net/ipv6/route.c 31d7d67ba127 ("ipv6: annotate data-races around rt->fib6_nsiblings") 1caf27297215 ("ipv6: adopt dst_dev() helper") 3b3ccf9ed05e ("net: Remove unnecessary NULL check for lwtunnel_fill_encap()") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
-rw-r--r--drivers/net/can/usb/peak_usb/pcan_usb_fd.c17
-rw-r--r--drivers/net/dsa/microchip/ksz8.c3
-rw-r--r--drivers/net/dsa/microchip/ksz8_reg.h4
-rw-r--r--drivers/net/ethernet/intel/igb/igb_xsk.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en.h1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_main.c26
-rw-r--r--drivers/net/ethernet/stmicro/stmmac/stmmac_main.c2
-rw-r--r--drivers/net/phy/micrel.c2
-rw-r--r--drivers/net/usb/usbnet.c11
-rw-r--r--drivers/net/vrf.c2
-rw-r--r--include/linux/usb/usbnet.h1
-rw-r--r--net/core/neighbour.c88
-rw-r--r--net/ipv6/ip6_fib.c24
-rw-r--r--net/ipv6/route.c71
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/tso.py99
18 files changed, 247 insertions, 121 deletions
diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
index 4d85b29a17b78..ebefc274b50a5 100644
--- a/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
+++ b/drivers/net/can/usb/peak_usb/pcan_usb_fd.c
@@ -49,7 +49,7 @@ struct __packed pcan_ufd_fw_info {
__le32 ser_no; /* S/N */
__le32 flags; /* special functions */
- /* extended data when type == PCAN_USBFD_TYPE_EXT */
+ /* extended data when type >= PCAN_USBFD_TYPE_EXT */
u8 cmd_out_ep; /* ep for cmd */
u8 cmd_in_ep; /* ep for replies */
u8 data_out_ep[2]; /* ep for CANx TX */
@@ -982,10 +982,11 @@ static int pcan_usb_fd_init(struct peak_usb_device *dev)
dev->can.ctrlmode |= CAN_CTRLMODE_FD_NON_ISO;
}
- /* if vendor rsp is of type 2, then it contains EP numbers to
- * use for cmds pipes. If not, then default EP should be used.
+ /* if vendor rsp type is greater than or equal to 2, then it
+ * contains EP numbers to use for cmds pipes. If not, then
+ * default EP should be used.
*/
- if (fw_info->type != cpu_to_le16(PCAN_USBFD_TYPE_EXT)) {
+ if (le16_to_cpu(fw_info->type) < PCAN_USBFD_TYPE_EXT) {
fw_info->cmd_out_ep = PCAN_USBPRO_EP_CMDOUT;
fw_info->cmd_in_ep = PCAN_USBPRO_EP_CMDIN;
}
@@ -1018,11 +1019,11 @@ static int pcan_usb_fd_init(struct peak_usb_device *dev)
dev->can_channel_id =
le32_to_cpu(pdev->usb_if->fw_info.dev_id[dev->ctrl_idx]);
- /* if vendor rsp is of type 2, then it contains EP numbers to
- * use for data pipes. If not, then statically defined EP are used
- * (see peak_usb_create_dev()).
+ /* if vendor rsp type is greater than or equal to 2, then it contains EP
+ * numbers to use for data pipes. If not, then statically defined EP are
+ * used (see peak_usb_create_dev()).
*/
- if (fw_info->type == cpu_to_le16(PCAN_USBFD_TYPE_EXT)) {
+ if (le16_to_cpu(fw_info->type) >= PCAN_USBFD_TYPE_EXT) {
dev->ep_msg_in = fw_info->data_in_ep;
dev->ep_msg_out = fw_info->data_out_ep[dev->ctrl_idx];
}
diff --git a/drivers/net/dsa/microchip/ksz8.c b/drivers/net/dsa/microchip/ksz8.c
index c400e1c0369e3..76e490070e9c2 100644
--- a/drivers/net/dsa/microchip/ksz8.c
+++ b/drivers/net/dsa/microchip/ksz8.c
@@ -384,6 +384,9 @@ static void ksz8863_r_mib_pkt(struct ksz_device *dev, int port, u16 addr,
addr -= dev->info->reg_mib_cnt;
ctrl_addr = addr ? KSZ8863_MIB_PACKET_DROPPED_TX_0 :
KSZ8863_MIB_PACKET_DROPPED_RX_0;
+ if (ksz_is_8895_family(dev) &&
+ ctrl_addr == KSZ8863_MIB_PACKET_DROPPED_RX_0)
+ ctrl_addr = KSZ8895_MIB_PACKET_DROPPED_RX_0;
ctrl_addr += port;
ctrl_addr |= IND_ACC_TABLE(TABLE_MIB | TABLE_READ);
diff --git a/drivers/net/dsa/microchip/ksz8_reg.h b/drivers/net/dsa/microchip/ksz8_reg.h
index 491aa1e50175b..332408567b473 100644
--- a/drivers/net/dsa/microchip/ksz8_reg.h
+++ b/drivers/net/dsa/microchip/ksz8_reg.h
@@ -833,7 +833,9 @@
#define KSZ8795_MIB_TOTAL_TX_1 0x105
#define KSZ8863_MIB_PACKET_DROPPED_TX_0 0x100
-#define KSZ8863_MIB_PACKET_DROPPED_RX_0 0x105
+#define KSZ8863_MIB_PACKET_DROPPED_RX_0 0x103
+
+#define KSZ8895_MIB_PACKET_DROPPED_RX_0 0x105
#define MIB_PACKET_DROPPED 0x0000FFFF
diff --git a/drivers/net/ethernet/intel/igb/igb_xsk.c b/drivers/net/ethernet/intel/igb/igb_xsk.c
index 5cf67ba292694..30ce5fbb5b776 100644
--- a/drivers/net/ethernet/intel/igb/igb_xsk.c
+++ b/drivers/net/ethernet/intel/igb/igb_xsk.c
@@ -482,7 +482,7 @@ bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool)
if (!nb_pkts)
return true;
- while (nb_pkts-- > 0) {
+ for (; i < nb_pkts; i++) {
dma = xsk_buff_raw_get_dma(xsk_pool, descs[i].addr);
xsk_buff_raw_dma_sync_for_device(xsk_pool, dma, descs[i].len);
@@ -512,7 +512,6 @@ bool igb_xmit_zc(struct igb_ring *tx_ring, struct xsk_buff_pool *xsk_pool)
total_bytes += descs[i].len;
- i++;
tx_ring->next_to_use++;
tx_buffer_info->next_to_watch = tx_desc;
if (tx_ring->next_to_use == tx_ring->count)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 60d24d8a22425..0dd3bc0f4caae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -726,6 +726,7 @@ struct mlx5e_rq {
struct xsk_buff_pool *xsk_pool;
struct work_struct recover_work;
+ struct work_struct rx_timeout_work;
/* control */
struct mlx5_wq_ctrl wq_ctrl;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
index 8e25f4ef5ccce..5ae787656a7ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c
@@ -331,6 +331,9 @@ static int port_set_buffer(struct mlx5e_priv *priv,
if (err)
goto out;
+ /* RO bits should be set to 0 on write */
+ MLX5_SET(pbmc_reg, in, port_buffer_size, 0);
+
err = mlx5e_port_set_pbmc(mdev, in);
out:
kfree(in);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
index e75759533ae0c..16c44d628eda6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c
@@ -170,16 +170,23 @@ static int mlx5e_rx_reporter_err_rq_cqe_recover(void *ctx)
static int mlx5e_rx_reporter_timeout_recover(void *ctx)
{
struct mlx5_eq_comp *eq;
+ struct mlx5e_priv *priv;
struct mlx5e_rq *rq;
int err;
rq = ctx;
+ priv = rq->priv;
+
+ mutex_lock(&priv->state_lock);
+
eq = rq->cq.mcq.eq;
err = mlx5e_health_channel_eq_recover(rq->netdev, eq, rq->cq.ch_stats);
if (err && rq->icosq)
clear_bit(MLX5E_SQ_STATE_ENABLED, &rq->icosq->state);
+ mutex_unlock(&priv->state_lock);
+
return err;
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 727fa7c185238..6056106edcc64 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -327,6 +327,10 @@ void mlx5e_ipsec_offload_handle_rx_skb(struct net_device *netdev,
if (unlikely(!sa_entry)) {
rcu_read_unlock();
atomic64_inc(&ipsec->sw_stats.ipsec_rx_drop_sadb_miss);
+ /* Clear secpath to prevent invalid dereference
+ * in downstream XFRM policy checks.
+ */
+ secpath_reset(skb);
return;
}
xfrm_state_hold(sa_entry->x);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 33bdb7f1e03fd..21bb88c5d3dce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -676,6 +676,27 @@ static void mlx5e_rq_err_cqe_work(struct work_struct *recover_work)
mlx5e_reporter_rq_cqe_err(rq);
}
+static void mlx5e_rq_timeout_work(struct work_struct *timeout_work)
+{
+ struct mlx5e_rq *rq = container_of(timeout_work,
+ struct mlx5e_rq,
+ rx_timeout_work);
+
+ /* Acquire netdev instance lock to synchronize with channel close and
+ * reopen flows. Either successfully obtain the lock, or detect that
+ * channels are closing for another reason, making this work no longer
+ * necessary.
+ */
+ while (!netdev_trylock(rq->netdev)) {
+ if (!test_bit(MLX5E_STATE_CHANNELS_ACTIVE, &rq->priv->state))
+ return;
+ msleep(20);
+ }
+
+ mlx5e_reporter_rx_timeout(rq);
+ netdev_unlock(rq->netdev);
+}
+
static int mlx5e_alloc_mpwqe_rq_drop_page(struct mlx5e_rq *rq)
{
rq->wqe_overflow.page = alloc_page(GFP_KERNEL);
@@ -876,6 +897,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params,
rqp->wq.db_numa_node = node;
INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work);
+ INIT_WORK(&rq->rx_timeout_work, mlx5e_rq_timeout_work);
if (params->xdp_prog)
bpf_prog_inc(params->xdp_prog);
@@ -1261,7 +1283,8 @@ int mlx5e_wait_for_min_rx_wqes(struct mlx5e_rq *rq, int wait_time)
netdev_warn(rq->netdev, "Failed to get min RX wqes on Channel[%d] RQN[0x%x] wq cur_sz(%d) min_rx_wqes(%d)\n",
rq->ix, rq->rqn, mlx5e_rqwq_get_cur_sz(rq), min_wqes);
- mlx5e_reporter_rx_timeout(rq);
+ queue_work(rq->priv->wq, &rq->rx_timeout_work);
+
return -ETIMEDOUT;
}
@@ -1432,6 +1455,7 @@ void mlx5e_close_rq(struct mlx5e_rq *rq)
if (rq->dim)
cancel_work_sync(&rq->dim->work);
cancel_work_sync(&rq->recover_work);
+ cancel_work_sync(&rq->rx_timeout_work);
mlx5e_destroy_rq(rq);
mlx5e_free_rx_descs(rq);
mlx5e_free_rq(rq);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index f350a6662880a..f1abf4242cd2f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -2596,7 +2596,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget)
budget = min(budget, stmmac_tx_avail(priv, queue));
- while (budget-- > 0) {
+ for (; budget > 0; budget--) {
struct stmmac_metadata_request meta_req;
struct xsk_tx_metadata *meta = NULL;
dma_addr_t dma_addr;
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index f678c1bdacdf0..605b0315b4cb0 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -477,6 +477,8 @@ static const struct kszphy_type ksz8051_type = {
static const struct kszphy_type ksz8081_type = {
.led_mode_reg = MII_KSZPHY_CTRL_2,
+ .cable_diag_reg = KSZ8081_LMD,
+ .pair_mask = KSZPHY_WIRE_PAIR_MASK,
.has_broadcast_disable = true,
.has_nand_tree_disable = true,
.has_rmii_ref_clk_sel = true,
diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index a8d50dd93d12c..a38ffbf4b3f03 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1129,6 +1129,9 @@ static void __handle_link_change(struct usbnet *dev)
* tx queue is stopped by netcore after link becomes off
*/
} else {
+ if (test_and_clear_bit(EVENT_LINK_CARRIER_ON, &dev->flags))
+ netif_carrier_on(dev->net);
+
/* submitting URBs for reading packets */
queue_work(system_bh_wq, &dev->bh_work);
}
@@ -2015,10 +2018,12 @@ EXPORT_SYMBOL(usbnet_manage_power);
void usbnet_link_change(struct usbnet *dev, bool link, bool need_reset)
{
/* update link after link is reseted */
- if (link && !need_reset)
- netif_carrier_on(dev->net);
- else
+ if (link && !need_reset) {
+ set_bit(EVENT_LINK_CARRIER_ON, &dev->flags);
+ } else {
+ clear_bit(EVENT_LINK_CARRIER_ON, &dev->flags);
netif_carrier_off(dev->net);
+ }
if (need_reset && link)
usbnet_defer_kevent(dev, EVENT_LINK_RESET);
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 9a4beea6ee0c2..3ccd649913b50 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -1302,6 +1302,8 @@ static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev,
struct net *net = dev_net(vrf_dev);
struct rt6_info *rt6;
+ skb_dst_drop(skb);
+
rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, skb,
RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE);
if (unlikely(!rt6))
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 208682f771793..a2d54122823da 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -76,6 +76,7 @@ struct usbnet {
# define EVENT_LINK_CHANGE 11
# define EVENT_SET_RX_MODE 12
# define EVENT_NO_IP_ALIGN 13
+# define EVENT_LINK_CARRIER_ON 14
/* This one is special, as it indicates that the device is going away
* there are cyclic dependencies between tasklet, timer and bh
* that must be broken
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4316ca3d98729..bddfa389effa7 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -379,6 +379,43 @@ static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net,
}
}
+static void neigh_flush_one(struct neighbour *n)
+{
+ hlist_del_rcu(&n->hash);
+ hlist_del_rcu(&n->dev_list);
+
+ write_lock(&n->lock);
+
+ neigh_del_timer(n);
+ neigh_mark_dead(n);
+
+ if (refcount_read(&n->refcnt) != 1) {
+ /* The most unpleasant situation.
+ * We must destroy neighbour entry,
+ * but someone still uses it.
+ *
+ * The destroy will be delayed until
+ * the last user releases us, but
+ * we must kill timers etc. and move
+ * it to safe state.
+ */
+ __skb_queue_purge(&n->arp_queue);
+ n->arp_queue_len_bytes = 0;
+ WRITE_ONCE(n->output, neigh_blackhole);
+
+ if (n->nud_state & NUD_VALID)
+ n->nud_state = NUD_NOARP;
+ else
+ n->nud_state = NUD_NONE;
+
+ neigh_dbg(2, "neigh %p is stray\n", n);
+ }
+
+ write_unlock(&n->lock);
+
+ neigh_cleanup_and_release(n);
+}
+
static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
@@ -394,32 +431,24 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev,
n->flags & NTF_EXT_VALIDATED))
continue;
- hlist_del_rcu(&n->hash);
- hlist_del_rcu(&n->dev_list);
- write_lock(&n->lock);
- neigh_del_timer(n);
- neigh_mark_dead(n);
- if (refcount_read(&n->refcnt) != 1) {
- /* The most unpleasant situation.
- * We must destroy neighbour entry,
- * but someone still uses it.
- *
- * The destroy will be delayed until
- * the last user releases us, but
- * we must kill timers etc. and move
- * it to safe state.
- */
- __skb_queue_purge(&n->arp_queue);
- n->arp_queue_len_bytes = 0;
- WRITE_ONCE(n->output, neigh_blackhole);
- if (n->nud_state & NUD_VALID)
- n->nud_state = NUD_NOARP;
- else
- n->nud_state = NUD_NONE;
- neigh_dbg(2, "neigh %p is stray\n", n);
- }
- write_unlock(&n->lock);
- neigh_cleanup_and_release(n);
+ neigh_flush_one(n);
+ }
+}
+
+static void neigh_flush_table(struct neigh_table *tbl)
+{
+ struct neigh_hash_table *nht;
+ int i;
+
+ nht = rcu_dereference_protected(tbl->nht,
+ lockdep_is_held(&tbl->lock));
+
+ for (i = 0; i < (1 << nht->hash_shift); i++) {
+ struct hlist_node *tmp;
+ struct neighbour *n;
+
+ neigh_for_each_in_bucket_safe(n, tmp, &nht->hash_heads[i])
+ neigh_flush_one(n);
}
}
@@ -435,7 +464,12 @@ static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev,
bool skip_perm)
{
write_lock_bh(&tbl->lock);
- neigh_flush_dev(tbl, dev, skip_perm);
+ if (likely(dev)) {
+ neigh_flush_dev(tbl, dev, skip_perm);
+ } else {
+ DEBUG_NET_WARN_ON_ONCE(skip_perm);
+ neigh_flush_table(tbl);
+ }
write_unlock_bh(&tbl->lock);
pneigh_ifdown(tbl, dev, skip_perm);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 7272d7e0fc36b..02c16909f6182 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -445,15 +445,17 @@ struct fib6_dump_arg {
static int fib6_rt_dump(struct fib6_info *rt, struct fib6_dump_arg *arg)
{
enum fib_event_type fib_event = FIB_EVENT_ENTRY_REPLACE;
+ unsigned int nsiblings;
int err;
if (!rt || rt == arg->net->ipv6.fib6_null_entry)
return 0;
- if (rt->fib6_nsiblings)
+ nsiblings = READ_ONCE(rt->fib6_nsiblings);
+ if (nsiblings)
err = call_fib6_multipath_entry_notifier(arg->nb, fib_event,
rt,
- rt->fib6_nsiblings,
+ nsiblings,
arg->extack);
else
err = call_fib6_entry_notifier(arg->nb, fib_event, rt,
@@ -1126,7 +1128,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
if (rt6_duplicate_nexthop(iter, rt)) {
if (rt->fib6_nsiblings)
- rt->fib6_nsiblings = 0;
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
if (!(iter->fib6_flags & RTF_EXPIRES))
return -EEXIST;
if (!(rt->fib6_flags & RTF_EXPIRES)) {
@@ -1155,7 +1157,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
*/
if (rt_can_ecmp &&
rt6_qualify_for_ecmp(iter))
- rt->fib6_nsiblings++;
+ WRITE_ONCE(rt->fib6_nsiblings,
+ rt->fib6_nsiblings + 1);
}
if (iter->fib6_metric > rt->fib6_metric)
@@ -1205,7 +1208,8 @@ next_iter:
fib6_nsiblings = 0;
list_for_each_entry_safe(sibling, temp_sibling,
&rt->fib6_siblings, fib6_siblings) {
- sibling->fib6_nsiblings++;
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings + 1);
BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
fib6_nsiblings++;
}
@@ -1252,8 +1256,9 @@ add:
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings,
fib6_siblings)
- sibling->fib6_nsiblings--;
- rt->fib6_nsiblings = 0;
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings - 1);
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
list_del_rcu(&rt->fib6_siblings);
rcu_read_lock();
rt6_multipath_rebalance(next_sibling);
@@ -2002,8 +2007,9 @@ static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn,
notify_del = true;
list_for_each_entry_safe(sibling, next_sibling,
&rt->fib6_siblings, fib6_siblings)
- sibling->fib6_nsiblings--;
- rt->fib6_nsiblings = 0;
+ WRITE_ONCE(sibling->fib6_nsiblings,
+ sibling->fib6_nsiblings - 1);
+ WRITE_ONCE(rt->fib6_nsiblings, 0);
list_del_rcu(&rt->fib6_siblings);
rt6_multipath_rebalance(next_sibling);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3fbe0885c21c6..17c5b54ecad84 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5347,7 +5347,8 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
*/
rcu_read_lock();
- if ((nlflags & NLM_F_APPEND) && rt_last && rt_last->fib6_nsiblings) {
+ if ((nlflags & NLM_F_APPEND) && rt_last &&
+ READ_ONCE(rt_last->fib6_nsiblings)) {
rt = list_first_or_null_rcu(&rt_last->fib6_siblings,
struct fib6_info,
fib6_siblings);
@@ -5671,32 +5672,34 @@ static int rt6_nh_nlmsg_size(struct fib6_nh *nh, void *arg)
static size_t rt6_nlmsg_size(struct fib6_info *f6i)
{
+ struct fib6_info *sibling;
+ struct fib6_nh *nh;
int nexthop_len;
if (f6i->nh) {
nexthop_len = nla_total_size(4); /* RTA_NH_ID */
nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
&nexthop_len);
- } else {
- struct fib6_nh *nh = f6i->fib6_nh;
- struct fib6_info *sibling;
-
- nexthop_len = 0;
- if (f6i->fib6_nsiblings) {
- rt6_nh_nlmsg_size(nh, &nexthop_len);
-
- rcu_read_lock();
+ goto common;
+ }
- list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
- fib6_siblings) {
- rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
- }
+ rcu_read_lock();
+retry:
+ nh = f6i->fib6_nh;
+ nexthop_len = 0;
+ if (READ_ONCE(f6i->fib6_nsiblings)) {
+ rt6_nh_nlmsg_size(nh, &nexthop_len);
- rcu_read_unlock();
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
+ rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
+ if (!READ_ONCE(f6i->fib6_nsiblings))
+ goto retry;
}
- nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
-
+ rcu_read_unlock();
+ nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
+common:
return NLMSG_ALIGN(sizeof(struct rtmsg))
+ nla_total_size(16) /* RTA_SRC */
+ nla_total_size(16) /* RTA_DST */
@@ -5857,7 +5860,7 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
if (lwtunnel_fill_encap(skb, dst->lwtstate, RTA_ENCAP, RTA_ENCAP_TYPE) < 0)
goto nla_put_failure;
- } else if (rt->fib6_nsiblings) {
+ } else if (READ_ONCE(rt->fib6_nsiblings)) {
struct fib6_info *sibling;
struct nlattr *mp;
@@ -5959,16 +5962,21 @@ static bool fib6_info_uses_dev(const struct fib6_info *f6i,
if (f6i->fib6_nh->fib_nh_dev == dev)
return true;
- if (f6i->fib6_nsiblings) {
- struct fib6_info *sibling, *next_sibling;
+ if (READ_ONCE(f6i->fib6_nsiblings)) {
+ const struct fib6_info *sibling;
- list_for_each_entry_safe(sibling, next_sibling,
- &f6i->fib6_siblings, fib6_siblings) {
- if (sibling->fib6_nh->fib_nh_dev == dev)
+ rcu_read_lock();
+ list_for_each_entry_rcu(sibling, &f6i->fib6_siblings,
+ fib6_siblings) {
+ if (sibling->fib6_nh->fib_nh_dev == dev) {
+ rcu_read_unlock();
return true;
+ }
+ if (!READ_ONCE(f6i->fib6_nsiblings))
+ break;
}
+ rcu_read_unlock();
}
-
return false;
}
@@ -6324,8 +6332,9 @@ errout:
void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
unsigned int nlm_flags)
{
- struct sk_buff *skb;
struct net *net = info->nl_net;
+ struct sk_buff *skb;
+ size_t sz;
u32 seq;
int err;
@@ -6333,17 +6342,21 @@ void inet6_rt_notify(int event, struct fib6_info *rt, struct nl_info *info,
seq = info->nlh ? info->nlh->nlmsg_seq : 0;
rcu_read_lock();
-
- skb = nlmsg_new(rt6_nlmsg_size(rt), GFP_ATOMIC);
+ sz = rt6_nlmsg_size(rt);
+retry:
+ skb = nlmsg_new(sz, GFP_ATOMIC);
if (!skb)
goto errout;
err = rt6_fill_node(net, skb, rt, NULL, NULL, NULL, 0,
event, info->portid, seq, nlm_flags);
if (err < 0) {
- /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
- WARN_ON(err == -EMSGSIZE);
kfree_skb(skb);
+ /* -EMSGSIZE implies needed space grew under us. */
+ if (err == -EMSGSIZE) {
+ sz = max(rt6_nlmsg_size(rt), sz << 1);
+ goto retry;
+ }
goto errout;
}
diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py
index 3500d8f1bac45..c13dd5efa27a1 100755
--- a/tools/testing/selftests/drivers/net/hw/tso.py
+++ b/tools/testing/selftests/drivers/net/hw/tso.py
@@ -102,7 +102,7 @@ def build_tunnel(cfg, outer_ipver, tun_info):
remote_addr = cfg.remote_addr_v[outer_ipver]
tun_type = tun_info[0]
- tun_arg = tun_info[2]
+ tun_arg = tun_info[1]
ip(f"link add {tun_type}-ksft type {tun_type} {tun_arg} local {local_addr} remote {remote_addr} dev {cfg.ifname}")
defer(ip, f"link del {tun_type}-ksft")
ip(f"link set dev {tun_type}-ksft up")
@@ -119,15 +119,30 @@ def build_tunnel(cfg, outer_ipver, tun_info):
return remote_v4, remote_v6
+def restore_wanted_features(cfg):
+ features_cmd = ""
+ for feature in cfg.hw_features:
+ setting = "on" if feature in cfg.wanted_features else "off"
+ features_cmd += f" {feature} {setting}"
+ try:
+ ethtool(f"-K {cfg.ifname} {features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure restoring wanted features: {e}")
+
+
def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
"""Construct specific tests from the common template."""
def f(cfg):
cfg.require_ipver(outer_ipver)
+ defer(restore_wanted_features, cfg)
if not cfg.have_stat_super_count and \
not cfg.have_stat_wire_count:
raise KsftSkipEx(f"Device does not support LSO queue stats")
+ if feature not in cfg.hw_features:
+ raise KsftSkipEx(f"Device does not support {feature}")
+
ipver = outer_ipver
if tun:
remote_v4, remote_v6 = build_tunnel(cfg, ipver, tun)
@@ -136,36 +151,21 @@ def test_builder(name, cfg, outer_ipver, feature, tun=None, inner_ipver=None):
remote_v4 = cfg.remote_addr_v["4"]
remote_v6 = cfg.remote_addr_v["6"]
- tun_partial = tun and tun[1]
- # Tunnel which can silently fall back to gso-partial
- has_gso_partial = tun and 'tx-gso-partial' in cfg.features
-
- # For TSO4 via partial we need mangleid
- if ipver == "4" and feature in cfg.partial_features:
- ksft_pr("Testing with mangleid enabled")
- if 'tx-tcp-mangleid-segmentation' not in cfg.features:
- ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
- defer(ethtool, f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
-
# First test without the feature enabled.
ethtool(f"-K {cfg.ifname} {feature} off")
- if has_gso_partial:
- ethtool(f"-K {cfg.ifname} tx-gso-partial off")
run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=False)
- # Now test with the feature enabled.
- # For compatible tunnels only - just GSO partial, not specific feature.
- if has_gso_partial:
+ ethtool(f"-K {cfg.ifname} tx-gso-partial off")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation off")
+ if feature in cfg.partial_features:
ethtool(f"-K {cfg.ifname} tx-gso-partial on")
- run_one_stream(cfg, ipver, remote_v4, remote_v6,
- should_lso=tun_partial)
+ if ipver == "4":
+ ksft_pr("Testing with mangleid enabled")
+ ethtool(f"-K {cfg.ifname} tx-tcp-mangleid-segmentation on")
# Full feature enabled.
- if feature in cfg.features:
- ethtool(f"-K {cfg.ifname} {feature} on")
- run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
- else:
- raise KsftXfailEx(f"Device does not support {feature}")
+ ethtool(f"-K {cfg.ifname} {feature} on")
+ run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso=True)
f.__name__ = name + ((outer_ipver + "_") if tun else "") + "ipv" + inner_ipver
return f
@@ -176,23 +176,39 @@ def query_nic_features(cfg) -> None:
cfg.have_stat_super_count = False
cfg.have_stat_wire_count = False
- cfg.features = set()
features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
- for f in features["active"]["bits"]["bit"]:
- cfg.features.add(f["name"])
+
+ cfg.wanted_features = set()
+ for f in features["wanted"]["bits"]["bit"]:
+ cfg.wanted_features.add(f["name"])
+
+ cfg.hw_features = set()
+ hw_all_features_cmd = ""
+ for f in features["hw"]["bits"]["bit"]:
+ if f.get("value", False):
+ feature = f["name"]
+ cfg.hw_features.add(feature)
+ hw_all_features_cmd += f" {feature} on"
+ try:
+ ethtool(f"-K {cfg.ifname} {hw_all_features_cmd}")
+ except Exception as e:
+ ksft_pr(f"WARNING: failure enabling all hw features: {e}")
+ ksft_pr("partial gso feature detection may be impacted")
# Check which features are supported via GSO partial
cfg.partial_features = set()
- if 'tx-gso-partial' in cfg.features:
+ if 'tx-gso-partial' in cfg.hw_features:
ethtool(f"-K {cfg.ifname} tx-gso-partial off")
no_partial = set()
features = cfg.ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
for f in features["active"]["bits"]["bit"]:
no_partial.add(f["name"])
- cfg.partial_features = cfg.features - no_partial
+ cfg.partial_features = cfg.hw_features - no_partial
ethtool(f"-K {cfg.ifname} tx-gso-partial on")
+ restore_wanted_features(cfg)
+
stats = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)
if stats:
if 'tx-hw-gso-packets' in stats[0]:
@@ -211,13 +227,14 @@ def main() -> None:
query_nic_features(cfg)
test_info = (
- # name, v4/v6 ethtool_feature tun:(type, partial, args)
- ("", "4", "tx-tcp-segmentation", None),
- ("", "6", "tx-tcp6-segmentation", None),
- ("vxlan", "", "tx-udp_tnl-segmentation", ("vxlan", True, "id 100 dstport 4789 noudpcsum")),
- ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", False, "id 100 dstport 4789 udpcsum")),
- ("gre", "4", "tx-gre-segmentation", ("gre", False, "")),
- ("gre", "6", "tx-gre-segmentation", ("ip6gre", False, "")),
+ # name, v4/v6 ethtool_feature tun:(type, args, inner ip versions)
+ ("", "4", "tx-tcp-segmentation", None),
+ ("", "6", "tx-tcp6-segmentation", None),
+ ("vxlan", "4", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 noudpcsum", ("4", "6"))),
+ ("vxlan", "6", "tx-udp_tnl-segmentation", ("vxlan", "id 100 dstport 4789 udp6zerocsumtx udp6zerocsumrx", ("4", "6"))),
+ ("vxlan_csum", "", "tx-udp_tnl-csum-segmentation", ("vxlan", "id 100 dstport 4789 udpcsum", ("4", "6"))),
+ ("gre", "4", "tx-gre-segmentation", ("gre", "", ("4", "6"))),
+ ("gre", "6", "tx-gre-segmentation", ("ip6gre","", ("4", "6"))),
)
cases = []
@@ -227,11 +244,13 @@ def main() -> None:
if info[1] and outer_ipver != info[1]:
continue
- cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
- tun=info[3], inner_ipver="4"))
if info[3]:
- cases.append(test_builder(info[0], cfg, outer_ipver, info[2],
- tun=info[3], inner_ipver="6"))
+ cases += [
+ test_builder(info[0], cfg, outer_ipver, info[2], info[3], inner_ipver)
+ for inner_ipver in info[3][2]
+ ]
+ else:
+ cases.append(test_builder(info[0], cfg, outer_ipver, info[2], None, outer_ipver))
ksft_run(cases=cases, args=(cfg, ))
ksft_exit()