diff options
author | Paolo Abeni <pabeni@redhat.com> | 2025-07-22 11:35:51 +0200 |
---|---|---|
committer | Paolo Abeni <pabeni@redhat.com> | 2025-07-22 11:35:51 +0200 |
commit | cdb794002d9059bf82acba6b68a7324db4451494 (patch) | |
tree | 7db3465e86938a89b9643ec9e609f5d26939dc15 | |
parent | 3fc894728fb3a0d9282e81247b68c07468fe2985 (diff) | |
parent | c1fffc5d66a7147d557736c2341a511e0896d9ff (diff) |
Merge branch 'gve-af_xdp-zero-copy-for-dqo-rda'
Joshua Washington says:
====================
gve: AF_XDP zero-copy for DQO RDA
This patch series adds support for AF_XDP zero-copy in the DQO RDA queue
format.
XSK infrastructure is updated to re-post buffers when adding XSK pools
because XSK umem will be posted directly to the NIC, a departure from
the bounce buffer model used in GQI QPL. A registry of XSK pools is
introduced to prevent the usage of XSK pools when in copy mode.
v1: https://lore.kernel.org/netdev/20250714160451.124671-1-jeroendb@google.com/
====================
Link: https://patch.msgid.link/20250717152839.973004-1-jeroendb@google.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r-- | drivers/net/ethernet/google/gve/gve.h | 24 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c | 24 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_dqo.h | 1 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_main.c | 233 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_rx_dqo.c | 94 | ||||
-rw-r--r-- | drivers/net/ethernet/google/gve/gve_tx_dqo.c | 148 |
6 files changed, 423 insertions, 101 deletions
diff --git a/drivers/net/ethernet/google/gve/gve.h b/drivers/net/ethernet/google/gve/gve.h index 53899096e89e..bceaf9b05cb4 100644 --- a/drivers/net/ethernet/google/gve/gve.h +++ b/drivers/net/ethernet/google/gve/gve.h @@ -190,6 +190,9 @@ struct gve_rx_buf_state_dqo { /* The page posted to HW. */ struct gve_rx_slot_page_info page_info; + /* XSK buffer */ + struct xdp_buff *xsk_buff; + /* The DMA address corresponding to `page_info`. */ dma_addr_t addr; @@ -331,7 +334,6 @@ struct gve_rx_ring { /* XDP stuff */ struct xdp_rxq_info xdp_rxq; - struct xdp_rxq_info xsk_rxq; struct xsk_buff_pool *xsk_pool; struct page_frag_cache page_cache; /* Page cache to allocate XDP frames */ }; @@ -400,11 +402,17 @@ enum gve_packet_state { GVE_PACKET_STATE_PENDING_REINJECT_COMPL, /* No valid completion received within the specified timeout. */ GVE_PACKET_STATE_TIMED_OUT_COMPL, + /* XSK pending packet has received a packet/reinjection completion, or + * has timed out. At this point, the pending packet can be counted by + * xsk_tx_complete and freed. + */ + GVE_PACKET_STATE_XSK_COMPLETE, }; enum gve_tx_pending_packet_dqo_type { GVE_TX_PENDING_PACKET_DQO_SKB, - GVE_TX_PENDING_PACKET_DQO_XDP_FRAME + GVE_TX_PENDING_PACKET_DQO_XDP_FRAME, + GVE_TX_PENDING_PACKET_DQO_XSK, }; struct gve_tx_pending_packet_dqo { @@ -441,10 +449,10 @@ struct gve_tx_pending_packet_dqo { /* Identifies the current state of the packet as defined in * `enum gve_packet_state`. */ - u8 state : 2; + u8 state : 3; /* gve_tx_pending_packet_dqo_type */ - u8 type : 1; + u8 type : 2; /* If packet is an outstanding miss completion, then the packet is * freed if the corresponding re-injection completion is not received @@ -513,6 +521,8 @@ struct gve_tx_ring { /* Cached value of `dqo_compl.free_tx_qpl_buf_cnt` */ u32 free_tx_qpl_buf_cnt; }; + + atomic_t xsk_reorder_queue_tail; } dqo_tx; }; @@ -546,6 +556,9 @@ struct gve_tx_ring { /* Last TX ring index fetched by HW */ atomic_t hw_tx_head; + u16 xsk_reorder_queue_head; + u16 xsk_reorder_queue_tail; + /* List to track pending packets which received a miss * completion but not a corresponding reinjection. */ @@ -599,6 +612,8 @@ struct gve_tx_ring { struct gve_tx_pending_packet_dqo *pending_packets; s16 num_pending_packets; + u16 *xsk_reorder_queue; + u32 complq_mask; /* complq size is complq_mask + 1 */ /* QPL fields */ @@ -803,6 +818,7 @@ struct gve_priv { struct gve_tx_queue_config tx_cfg; struct gve_rx_queue_config rx_cfg; + unsigned long *xsk_pools; /* bitmap of RX queues with XSK pools */ u32 num_ntfy_blks; /* split between TX and RX so must be even */ int numa_node; diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c index 6c3c459a1b5e..8f5021e59e0a 100644 --- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -4,6 +4,7 @@ * Copyright (C) 2015-2024 Google, Inc. */ +#include <net/xdp_sock_drv.h> #include "gve.h" #include "gve_utils.h" @@ -29,6 +30,10 @@ struct gve_rx_buf_state_dqo *gve_alloc_buf_state(struct gve_rx_ring *rx) /* Point buf_state to itself to mark it as allocated */ buf_state->next = buffer_id; + /* Clear the buffer pointers */ + buf_state->page_info.page = NULL; + buf_state->xsk_buff = NULL; + return buf_state; } @@ -286,7 +291,24 @@ int gve_alloc_buffer(struct gve_rx_ring *rx, struct gve_rx_desc_dqo *desc) { struct gve_rx_buf_state_dqo *buf_state; - if (rx->dqo.page_pool) { + if (rx->xsk_pool) { + buf_state = gve_alloc_buf_state(rx); + if (unlikely(!buf_state)) + return -ENOMEM; + + buf_state->xsk_buff = xsk_buff_alloc(rx->xsk_pool); + if (unlikely(!buf_state->xsk_buff)) { + xsk_set_rx_need_wakeup(rx->xsk_pool); + gve_free_buf_state(rx, buf_state); + return -ENOMEM; + } + /* Allocated xsk buffer. Clear wakeup in case it was set. */ + xsk_clear_rx_need_wakeup(rx->xsk_pool); + desc->buf_id = cpu_to_le16(buf_state - rx->dqo.buf_states); + desc->buf_addr = + cpu_to_le64(xsk_buff_xdp_get_dma(buf_state->xsk_buff)); + return 0; + } else if (rx->dqo.page_pool) { buf_state = gve_alloc_buf_state(rx); if (WARN_ON_ONCE(!buf_state)) return -ENOMEM; diff --git a/drivers/net/ethernet/google/gve/gve_dqo.h b/drivers/net/ethernet/google/gve/gve_dqo.h index bb278727f4d9..6eb442096e02 100644 --- a/drivers/net/ethernet/google/gve/gve_dqo.h +++ b/drivers/net/ethernet/google/gve/gve_dqo.h @@ -38,6 +38,7 @@ netdev_features_t gve_features_check_dqo(struct sk_buff *skb, netdev_features_t features); bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean); bool gve_xdp_poll_dqo(struct gve_notify_block *block); +bool gve_xsk_tx_poll_dqo(struct gve_notify_block *block, int budget); int gve_rx_poll_dqo(struct gve_notify_block *block, int budget); int gve_tx_alloc_rings_dqo(struct gve_priv *priv, struct gve_tx_alloc_rings_cfg *cfg); diff --git a/drivers/net/ethernet/google/gve/gve_main.c b/drivers/net/ethernet/google/gve/gve_main.c index be461751ff31..6ea306947417 100644 --- a/drivers/net/ethernet/google/gve/gve_main.c +++ b/drivers/net/ethernet/google/gve/gve_main.c @@ -4,6 +4,7 @@ * Copyright (C) 2015-2024 Google LLC */ +#include <linux/bitmap.h> #include <linux/bpf.h> #include <linux/cpumask.h> #include <linux/etherdevice.h> @@ -426,6 +427,12 @@ int gve_napi_poll_dqo(struct napi_struct *napi, int budget) if (block->rx) { work_done = gve_rx_poll_dqo(block, budget); + + /* Poll XSK TX as part of RX NAPI. Setup re-poll based on if + * either datapath has more work to do. + */ + if (priv->xdp_prog) + reschedule |= gve_xsk_tx_poll_dqo(block, budget); reschedule |= work_done == budget; } @@ -1158,18 +1165,84 @@ static int gve_reset_recovery(struct gve_priv *priv, bool was_up); static void gve_turndown(struct gve_priv *priv); static void gve_turnup(struct gve_priv *priv); +static void gve_unreg_xsk_pool(struct gve_priv *priv, u16 qid) +{ + struct gve_rx_ring *rx; + + if (!priv->rx) + return; + + rx = &priv->rx[qid]; + rx->xsk_pool = NULL; + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg_mem_model(&rx->xdp_rxq); + + if (!priv->tx) + return; + priv->tx[gve_xdp_tx_queue_id(priv, qid)].xsk_pool = NULL; +} + +static int gve_reg_xsk_pool(struct gve_priv *priv, struct net_device *dev, + struct xsk_buff_pool *pool, u16 qid) +{ + struct gve_rx_ring *rx; + u16 tx_qid; + int err; + + rx = &priv->rx[qid]; + err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, pool); + if (err) { + gve_unreg_xsk_pool(priv, qid); + return err; + } + + rx->xsk_pool = pool; + + tx_qid = gve_xdp_tx_queue_id(priv, qid); + priv->tx[tx_qid].xsk_pool = pool; + + return 0; +} + +static void gve_unreg_xdp_info(struct gve_priv *priv) +{ + int i; + + if (!priv->tx_cfg.num_xdp_queues || !priv->rx) + return; + + for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct gve_rx_ring *rx = &priv->rx[i]; + + if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) + xdp_rxq_info_unreg(&rx->xdp_rxq); + + gve_unreg_xsk_pool(priv, i); + } +} + +static struct xsk_buff_pool *gve_get_xsk_pool(struct gve_priv *priv, int qid) +{ + if (!test_bit(qid, priv->xsk_pools)) + return NULL; + + return xsk_get_pool_from_qid(priv->dev, qid); +} + static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) { struct napi_struct *napi; struct gve_rx_ring *rx; int err = 0; - int i, j; - u32 tx_qid; + int i; if (!priv->tx_cfg.num_xdp_queues) return 0; for (i = 0; i < priv->rx_cfg.num_queues; i++) { + struct xsk_buff_pool *xsk_pool; + rx = &priv->rx[i]; napi = &priv->ntfy_blocks[rx->ntfy_id].napi; @@ -1177,7 +1250,11 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) napi->napi_id); if (err) goto err; - if (gve_is_qpl(priv)) + + xsk_pool = gve_get_xsk_pool(priv, i); + if (xsk_pool) + err = gve_reg_xsk_pool(priv, dev, xsk_pool, i); + else if (gve_is_qpl(priv)) err = xdp_rxq_info_reg_mem_model(&rx->xdp_rxq, MEM_TYPE_PAGE_SHARED, NULL); @@ -1187,60 +1264,14 @@ static int gve_reg_xdp_info(struct gve_priv *priv, struct net_device *dev) rx->dqo.page_pool); if (err) goto err; - rx->xsk_pool = xsk_get_pool_from_qid(dev, i); - if (rx->xsk_pool) { - err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, i, - napi->napi_id); - if (err) - goto err; - err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, - MEM_TYPE_XSK_BUFF_POOL, NULL); - if (err) - goto err; - xsk_pool_set_rxq_info(rx->xsk_pool, - &rx->xsk_rxq); - } - } - - for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { - tx_qid = gve_xdp_tx_queue_id(priv, i); - priv->tx[tx_qid].xsk_pool = xsk_get_pool_from_qid(dev, i); } return 0; err: - for (j = i; j >= 0; j--) { - rx = &priv->rx[j]; - if (xdp_rxq_info_is_reg(&rx->xdp_rxq)) - xdp_rxq_info_unreg(&rx->xdp_rxq); - if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) - xdp_rxq_info_unreg(&rx->xsk_rxq); - } + gve_unreg_xdp_info(priv); return err; } -static void gve_unreg_xdp_info(struct gve_priv *priv) -{ - int i, tx_qid; - - if (!priv->tx_cfg.num_xdp_queues || !priv->rx || !priv->tx) - return; - - for (i = 0; i < priv->rx_cfg.num_queues; i++) { - struct gve_rx_ring *rx = &priv->rx[i]; - - xdp_rxq_info_unreg(&rx->xdp_rxq); - if (rx->xsk_pool) { - xdp_rxq_info_unreg(&rx->xsk_rxq); - rx->xsk_pool = NULL; - } - } - - for (i = 0; i < priv->tx_cfg.num_xdp_queues; i++) { - tx_qid = gve_xdp_tx_queue_id(priv, i); - priv->tx[tx_qid].xsk_pool = NULL; - } -} static void gve_drain_page_cache(struct gve_priv *priv) { @@ -1555,9 +1586,6 @@ static int gve_xsk_pool_enable(struct net_device *dev, u16 qid) { struct gve_priv *priv = netdev_priv(dev); - struct napi_struct *napi; - struct gve_rx_ring *rx; - int tx_qid; int err; if (qid >= priv->rx_cfg.num_queues) { @@ -1575,34 +1603,31 @@ static int gve_xsk_pool_enable(struct net_device *dev, if (err) return err; + set_bit(qid, priv->xsk_pools); + /* If XDP prog is not installed or interface is down, return. */ if (!priv->xdp_prog || !netif_running(dev)) return 0; - rx = &priv->rx[qid]; - napi = &priv->ntfy_blocks[rx->ntfy_id].napi; - err = xdp_rxq_info_reg(&rx->xsk_rxq, dev, qid, napi->napi_id); - if (err) - goto err; - - err = xdp_rxq_info_reg_mem_model(&rx->xsk_rxq, - MEM_TYPE_XSK_BUFF_POOL, NULL); + err = gve_reg_xsk_pool(priv, dev, pool, qid); if (err) - goto err; - - xsk_pool_set_rxq_info(pool, &rx->xsk_rxq); - rx->xsk_pool = pool; - - tx_qid = gve_xdp_tx_queue_id(priv, qid); - priv->tx[tx_qid].xsk_pool = pool; + goto err_xsk_pool_dma_mapped; + /* Stop and start RDA queues to repost buffers. */ + if (!gve_is_qpl(priv)) { + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + if (err) + goto err_xsk_pool_registered; + } return 0; -err: - if (xdp_rxq_info_is_reg(&rx->xsk_rxq)) - xdp_rxq_info_unreg(&rx->xsk_rxq); +err_xsk_pool_registered: + gve_unreg_xsk_pool(priv, qid); +err_xsk_pool_dma_mapped: + clear_bit(qid, priv->xsk_pools); xsk_pool_dma_unmap(pool, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); return err; } @@ -1614,18 +1639,28 @@ static int gve_xsk_pool_disable(struct net_device *dev, struct napi_struct *napi_tx; struct xsk_buff_pool *pool; int tx_qid; + int err; - pool = xsk_get_pool_from_qid(dev, qid); - if (!pool) - return -EINVAL; if (qid >= priv->rx_cfg.num_queues) return -EINVAL; - /* If XDP prog is not installed or interface is down, unmap DMA and - * return. - */ - if (!priv->xdp_prog || !netif_running(dev)) - goto done; + clear_bit(qid, priv->xsk_pools); + + pool = xsk_get_pool_from_qid(dev, qid); + if (pool) + xsk_pool_dma_unmap(pool, + DMA_ATTR_SKIP_CPU_SYNC | + DMA_ATTR_WEAK_ORDERING); + + if (!netif_running(dev) || !priv->tx_cfg.num_xdp_queues) + return 0; + + /* Stop and start RDA queues to repost buffers. */ + if (!gve_is_qpl(priv) && priv->xdp_prog) { + err = gve_configure_rings_xdp(priv, priv->rx_cfg.num_queues); + if (err) + return err; + } napi_rx = &priv->ntfy_blocks[priv->rx[qid].ntfy_id].napi; napi_disable(napi_rx); /* make sure current rx poll is done */ @@ -1634,22 +1669,19 @@ static int gve_xsk_pool_disable(struct net_device *dev, napi_tx = &priv->ntfy_blocks[priv->tx[tx_qid].ntfy_id].napi; napi_disable(napi_tx); /* make sure current tx poll is done */ - priv->rx[qid].xsk_pool = NULL; - xdp_rxq_info_unreg(&priv->rx[qid].xsk_rxq); - priv->tx[tx_qid].xsk_pool = NULL; + gve_unreg_xsk_pool(priv, qid); smp_mb(); /* Make sure it is visible to the workers on datapath */ napi_enable(napi_rx); - if (gve_rx_work_pending(&priv->rx[qid])) - napi_schedule(napi_rx); - napi_enable(napi_tx); - if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) - napi_schedule(napi_tx); + if (gve_is_gqi(priv)) { + if (gve_rx_work_pending(&priv->rx[qid])) + napi_schedule(napi_rx); + + if (gve_tx_clean_pending(priv, &priv->tx[tx_qid])) + napi_schedule(napi_tx); + } -done: - xsk_pool_dma_unmap(pool, - DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_WEAK_ORDERING); return 0; } @@ -2275,6 +2307,7 @@ static void gve_set_netdev_xdp_features(struct gve_priv *priv) } else if (priv->queue_format == GVE_DQO_RDA_FORMAT) { xdp_features = NETDEV_XDP_ACT_BASIC; xdp_features |= NETDEV_XDP_ACT_REDIRECT; + xdp_features |= NETDEV_XDP_ACT_XSK_ZEROCOPY; } else { xdp_features = 0; } @@ -2370,10 +2403,22 @@ static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) priv->ts_config.rx_filter = HWTSTAMP_FILTER_NONE; setup_device: + priv->xsk_pools = bitmap_zalloc(priv->rx_cfg.max_queues, GFP_KERNEL); + if (!priv->xsk_pools) { + err = -ENOMEM; + goto err; + } + gve_set_netdev_xdp_features(priv); err = gve_setup_device_resources(priv); - if (!err) - return 0; + if (err) + goto err_free_xsk_bitmap; + + return 0; + +err_free_xsk_bitmap: + bitmap_free(priv->xsk_pools); + priv->xsk_pools = NULL; err: gve_adminq_free(&priv->pdev->dev, priv); return err; @@ -2383,6 +2428,8 @@ static void gve_teardown_priv_resources(struct gve_priv *priv) { gve_teardown_device_resources(priv); gve_adminq_free(&priv->pdev->dev, priv); + bitmap_free(priv->xsk_pools); + priv->xsk_pools = NULL; } static void gve_trigger_reset(struct gve_priv *priv) diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index afaa822b1227..7380c2b7a2d8 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -16,6 +16,7 @@ #include <net/ip6_checksum.h> #include <net/ipv6.h> #include <net/tcp.h> +#include <net/xdp_sock_drv.h> static void gve_rx_free_hdr_bufs(struct gve_priv *priv, struct gve_rx_ring *rx) { @@ -149,6 +150,10 @@ void gve_rx_free_ring_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, gve_free_to_page_pool(rx, bs, false); else gve_free_qpl_page_dqo(bs); + if (gve_buf_state_is_allocated(rx, bs) && bs->xsk_buff) { + xsk_buff_free(bs->xsk_buff); + bs->xsk_buff = NULL; + } } if (rx->dqo.qpl) { @@ -580,8 +585,11 @@ static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, int err; xdpf = xdp_convert_buff_to_frame(xdp); - if (unlikely(!xdpf)) + if (unlikely(!xdpf)) { + if (rx->xsk_pool) + xsk_buff_free(xdp); return -ENOSPC; + } tx_qid = gve_xdp_tx_queue_id(priv, rx->q_num); tx = &priv->tx[tx_qid]; @@ -592,6 +600,41 @@ static int gve_xdp_tx_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, return err; } +static void gve_xsk_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, + struct xdp_buff *xdp, struct bpf_prog *xprog, + int xdp_act) +{ + switch (xdp_act) { + case XDP_ABORTED: + case XDP_DROP: + default: + xsk_buff_free(xdp); + break; + case XDP_TX: + if (unlikely(gve_xdp_tx_dqo(priv, rx, xdp))) + goto err; + break; + case XDP_REDIRECT: + if (unlikely(xdp_do_redirect(priv->dev, xdp, xprog))) + goto err; + break; + } + + u64_stats_update_begin(&rx->statss); + if ((u32)xdp_act < GVE_XDP_ACTIONS) + rx->xdp_actions[xdp_act]++; + u64_stats_update_end(&rx->statss); + return; + +err: + u64_stats_update_begin(&rx->statss); + if (xdp_act == XDP_TX) + rx->xdp_tx_errors++; + if (xdp_act == XDP_REDIRECT) + rx->xdp_redirect_errors++; + u64_stats_update_end(&rx->statss); +} + static void gve_xdp_done_dqo(struct gve_priv *priv, struct gve_rx_ring *rx, struct xdp_buff *xdp, struct bpf_prog *xprog, int xdp_act, @@ -633,6 +676,48 @@ err: return; } +static int gve_rx_xsk_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, int buf_len, + struct bpf_prog *xprog) +{ + struct xdp_buff *xdp = buf_state->xsk_buff; + struct gve_priv *priv = rx->gve; + int xdp_act; + + xdp->data_end = xdp->data + buf_len; + xsk_buff_dma_sync_for_cpu(xdp); + + if (xprog) { + xdp_act = bpf_prog_run_xdp(xprog, xdp); + buf_len = xdp->data_end - xdp->data; + if (xdp_act != XDP_PASS) { + gve_xsk_done_dqo(priv, rx, xdp, xprog, xdp_act); + gve_free_buf_state(rx, buf_state); + return 0; + } + } + + /* Copy the data to skb */ + rx->ctx.skb_head = gve_rx_copy_data(priv->dev, napi, + xdp->data, buf_len); + if (unlikely(!rx->ctx.skb_head)) { + xsk_buff_free(xdp); + gve_free_buf_state(rx, buf_state); + return -ENOMEM; + } + rx->ctx.skb_tail = rx->ctx.skb_head; + + /* Free XSK buffer and Buffer state */ + xsk_buff_free(xdp); + gve_free_buf_state(rx, buf_state); + + /* Update Stats */ + u64_stats_update_begin(&rx->statss); + rx->xdp_actions[XDP_PASS]++; + u64_stats_update_end(&rx->statss); + return 0; +} + /* Returns 0 if descriptor is completed successfully. * Returns -EINVAL if descriptor is invalid. * Returns -ENOMEM if data cannot be copied to skb. @@ -671,7 +756,11 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, buf_len = compl_desc->packet_len; hdr_len = compl_desc->header_len; - /* Page might have not been used for a while and was likely last written + xprog = READ_ONCE(priv->xdp_prog); + if (buf_state->xsk_buff) + return gve_rx_xsk_dqo(napi, rx, buf_state, buf_len, xprog); + + /* Page might have not been used for awhile and was likely last written * by a different thread. */ if (rx->dqo.page_pool) { @@ -721,7 +810,6 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } - xprog = READ_ONCE(priv->xdp_prog); if (xprog) { struct xdp_buff xdp; void *old_data; diff --git a/drivers/net/ethernet/google/gve/gve_tx_dqo.c b/drivers/net/ethernet/google/gve/gve_tx_dqo.c index ce5370b741ec..6f1d515673d2 100644 --- a/drivers/net/ethernet/google/gve/gve_tx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_tx_dqo.c @@ -13,6 +13,7 @@ #include <linux/tcp.h> #include <linux/slab.h> #include <linux/skbuff.h> +#include <net/xdp_sock_drv.h> /* Returns true if tx_bufs are available. */ static bool gve_has_free_tx_qpl_bufs(struct gve_tx_ring *tx, int count) @@ -241,6 +242,9 @@ static void gve_tx_free_ring_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, tx->dqo.tx_ring = NULL; } + kvfree(tx->dqo.xsk_reorder_queue); + tx->dqo.xsk_reorder_queue = NULL; + kvfree(tx->dqo.pending_packets); tx->dqo.pending_packets = NULL; @@ -345,6 +349,17 @@ static int gve_tx_alloc_ring_dqo(struct gve_priv *priv, tx->dqo.pending_packets[tx->dqo.num_pending_packets - 1].next = -1; atomic_set_release(&tx->dqo_compl.free_pending_packets, -1); + + /* Only alloc xsk pool for XDP queues */ + if (idx >= cfg->qcfg->num_queues && cfg->num_xdp_rings) { + tx->dqo.xsk_reorder_queue = + kvcalloc(tx->dqo.complq_mask + 1, + sizeof(tx->dqo.xsk_reorder_queue[0]), + GFP_KERNEL); + if (!tx->dqo.xsk_reorder_queue) + goto err; + } + tx->dqo_compl.miss_completions.head = -1; tx->dqo_compl.miss_completions.tail = -1; tx->dqo_compl.timed_out_completions.head = -1; @@ -992,6 +1007,38 @@ drop: return 0; } +static void gve_xsk_reorder_queue_push_dqo(struct gve_tx_ring *tx, + u16 completion_tag) +{ + u32 tail = atomic_read(&tx->dqo_tx.xsk_reorder_queue_tail); + + tx->dqo.xsk_reorder_queue[tail] = completion_tag; + tail = (tail + 1) & tx->dqo.complq_mask; + atomic_set_release(&tx->dqo_tx.xsk_reorder_queue_tail, tail); +} + +static struct gve_tx_pending_packet_dqo * +gve_xsk_reorder_queue_head(struct gve_tx_ring *tx) +{ + u32 head = tx->dqo_compl.xsk_reorder_queue_head; + + if (head == tx->dqo_compl.xsk_reorder_queue_tail) { + tx->dqo_compl.xsk_reorder_queue_tail = + atomic_read_acquire(&tx->dqo_tx.xsk_reorder_queue_tail); + + if (head == tx->dqo_compl.xsk_reorder_queue_tail) + return NULL; + } + + return &tx->dqo.pending_packets[tx->dqo.xsk_reorder_queue[head]]; +} + +static void gve_xsk_reorder_queue_pop_dqo(struct gve_tx_ring *tx) +{ + tx->dqo_compl.xsk_reorder_queue_head++; + tx->dqo_compl.xsk_reorder_queue_head &= tx->dqo.complq_mask; +} + /* Transmit a given skb and ring the doorbell. */ netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev) { @@ -1015,6 +1062,62 @@ netdev_tx_t gve_tx_dqo(struct sk_buff *skb, struct net_device *dev) return NETDEV_TX_OK; } +static bool gve_xsk_tx_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, + int budget) +{ + struct xsk_buff_pool *pool = tx->xsk_pool; + struct xdp_desc desc; + bool repoll = false; + int sent = 0; + + spin_lock(&tx->dqo_tx.xdp_lock); + for (; sent < budget; sent++) { + struct gve_tx_pending_packet_dqo *pkt; + s16 completion_tag; + dma_addr_t addr; + u32 desc_idx; + + if (unlikely(!gve_has_avail_slots_tx_dqo(tx, 1, 1))) { + repoll = true; + break; + } + + if (!xsk_tx_peek_desc(pool, &desc)) + break; + + pkt = gve_alloc_pending_packet(tx); + pkt->type = GVE_TX_PENDING_PACKET_DQO_XSK; + pkt->num_bufs = 0; + completion_tag = pkt - tx->dqo.pending_packets; + + addr = xsk_buff_raw_get_dma(pool, desc.addr); + xsk_buff_raw_dma_sync_for_device(pool, addr, desc.len); + + desc_idx = tx->dqo_tx.tail; + gve_tx_fill_pkt_desc_dqo(tx, &desc_idx, + true, desc.len, + addr, completion_tag, true, + false); + ++pkt->num_bufs; + gve_tx_update_tail(tx, desc_idx); + tx->dqo_tx.posted_packet_desc_cnt += pkt->num_bufs; + gve_xsk_reorder_queue_push_dqo(tx, completion_tag); + } + + if (sent) { + gve_tx_put_doorbell_dqo(priv, tx->q_resources, tx->dqo_tx.tail); + xsk_tx_release(pool); + } + + spin_unlock(&tx->dqo_tx.xdp_lock); + + u64_stats_update_begin(&tx->statss); + tx->xdp_xsk_sent += sent; + u64_stats_update_end(&tx->statss); + + return (sent == budget) || repoll; +} + static void add_to_list(struct gve_tx_ring *tx, struct gve_index_list *list, struct gve_tx_pending_packet_dqo *pending_packet) { @@ -1152,6 +1255,9 @@ static void gve_handle_packet_completion(struct gve_priv *priv, pending_packet->xdpf = NULL; gve_free_pending_packet(tx, pending_packet); break; + case GVE_TX_PENDING_PACKET_DQO_XSK: + pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE; + break; default: WARN_ON_ONCE(1); } @@ -1251,8 +1357,34 @@ static void remove_timed_out_completions(struct gve_priv *priv, remove_from_list(tx, &tx->dqo_compl.timed_out_completions, pending_packet); + + /* Need to count XSK packets in xsk_tx_completed. */ + if (pending_packet->type == GVE_TX_PENDING_PACKET_DQO_XSK) + pending_packet->state = GVE_PACKET_STATE_XSK_COMPLETE; + else + gve_free_pending_packet(tx, pending_packet); + } +} + +static void gve_tx_process_xsk_completions(struct gve_tx_ring *tx) +{ + u32 num_xsks = 0; + + while (true) { + struct gve_tx_pending_packet_dqo *pending_packet = + gve_xsk_reorder_queue_head(tx); + + if (!pending_packet || + pending_packet->state != GVE_PACKET_STATE_XSK_COMPLETE) + break; + + num_xsks++; + gve_xsk_reorder_queue_pop_dqo(tx); gve_free_pending_packet(tx, pending_packet); } + + if (num_xsks) + xsk_tx_completed(tx->xsk_pool, num_xsks); } int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, @@ -1333,6 +1465,9 @@ int gve_clean_tx_done_dqo(struct gve_priv *priv, struct gve_tx_ring *tx, remove_miss_completions(priv, tx); remove_timed_out_completions(priv, tx); + if (tx->xsk_pool) + gve_tx_process_xsk_completions(tx); + u64_stats_update_begin(&tx->statss); tx->bytes_done += pkt_compl_bytes + reinject_compl_bytes; tx->pkt_done += pkt_compl_pkts + reinject_compl_pkts; @@ -1365,6 +1500,19 @@ bool gve_tx_poll_dqo(struct gve_notify_block *block, bool do_clean) return compl_desc->generation != tx->dqo_compl.cur_gen_bit; } +bool gve_xsk_tx_poll_dqo(struct gve_notify_block *rx_block, int budget) +{ + struct gve_rx_ring *rx = rx_block->rx; + struct gve_priv *priv = rx->gve; + struct gve_tx_ring *tx; + + tx = &priv->tx[gve_xdp_tx_queue_id(priv, rx->q_num)]; + if (tx->xsk_pool) + return gve_xsk_tx_dqo(priv, tx, budget); + + return 0; +} + bool gve_xdp_poll_dqo(struct gve_notify_block *block) { struct gve_tx_compl_desc *compl_desc; |