summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/igc/igc_main.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 11:57:23 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-04-29 11:57:23 -0700
commit9d31d2338950293ec19d9b095fbaa9030899dcb4 (patch)
treee688040d0557c24a2eeb9f6c9c223d949f6f7ef9 /drivers/net/ethernet/intel/igc/igc_main.c
parent635de956a7f5a6ffcb04f29d70630c64c717b56b (diff)
parent4a52dd8fefb45626dace70a63c0738dbd83b7edb (diff)
Merge tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - bpf: - allow bpf programs calling kernel functions (initially to reuse TCP congestion control implementations) - enable task local storage for tracing programs - remove the need to store per-task state in hash maps, and allow tracing programs access to task local storage previously added for BPF_LSM - add bpf_for_each_map_elem() helper, allowing programs to walk all map elements in a more robust and easier to verify fashion - sockmap: support UDP and cross-protocol BPF_SK_SKB_VERDICT redirection - lpm: add support for batched ops in LPM trie - add BTF_KIND_FLOAT support - mostly to allow use of BTF on s390 which has floats in its headers files - improve BPF syscall documentation and extend the use of kdoc parsing scripts we already employ for bpf-helpers - libbpf, bpftool: support static linking of BPF ELF files - improve support for encapsulation of L2 packets - xdp: restructure redirect actions to avoid a runtime lookup, improving performance by 4-8% in microbenchmarks - xsk: build skb by page (aka generic zerocopy xmit) - improve performance of software AF_XDP path by 33% for devices which don't need headers in the linear skb part (e.g. virtio) - nexthop: resilient next-hop groups - improve path stability on next-hops group changes (incl. offload for mlxsw) - ipv6: segment routing: add support for IPv4 decapsulation - icmp: add support for RFC 8335 extended PROBE messages - inet: use bigger hash table for IP ID generation - tcp: deal better with delayed TX completions - make sure we don't give up on fast TCP retransmissions only because driver is slow in reporting that it completed transmitting the original - tcp: reorder tcp_congestion_ops for better cache locality - mptcp: - add sockopt support for common TCP options - add support for common TCP msg flags - include multiple address ids in RM_ADDR - add reset option support for resetting one subflow - udp: GRO L4 improvements - improve 'forward' / 'frag_list' co-existence with UDP tunnel GRO, allowing the first to take place correctly even for encapsulated UDP traffic - micro-optimize dev_gro_receive() and flow dissection, avoid retpoline overhead on VLAN and TEB GRO - use less memory for sysctls, add a new sysctl type, to allow using u8 instead of "int" and "long" and shrink networking sysctls - veth: allow GRO without XDP - this allows aggregating UDP packets before handing them off to routing, bridge, OvS, etc. - allow specifing ifindex when device is moved to another namespace - netfilter: - nft_socket: add support for cgroupsv2 - nftables: add catch-all set element - special element used to define a default action in case normal lookup missed - use net_generic infra in many modules to avoid allocating per-ns memory unnecessarily - xps: improve the xps handling to avoid potential out-of-bound accesses and use-after-free when XPS change race with other re-configuration under traffic - add a config knob to turn off per-cpu netdev refcnt to catch underflows in testing Device APIs: - add WWAN subsystem to organize the WWAN interfaces better and hopefully start driving towards more unified and vendor- independent APIs - ethtool: - add interface for reading IEEE MIB stats (incl. mlx5 and bnxt support) - allow network drivers to dump arbitrary SFP EEPROM data, current offset+length API was a poor fit for modern SFP which define EEPROM in terms of pages (incl. mlx5 support) - act_police, flow_offload: add support for packet-per-second policing (incl. offload for nfp) - psample: add additional metadata attributes like transit delay for packets sampled from switch HW (and corresponding egress and policy-based sampling in the mlxsw driver) - dsa: improve support for sandwiched LAGs with bridge and DSA - netfilter: - flowtable: use direct xmit in topologies with IP forwarding, bridging, vlans etc. - nftables: counter hardware offload support - Bluetooth: - improvements for firmware download w/ Intel devices - add support for reading AOSP vendor capabilities - add support for virtio transport driver - mac80211: - allow concurrent monitor iface and ethernet rx decap - set priority and queue mapping for injected frames - phy: add support for Clause-45 PHY Loopback - pci/iov: add sysfs MSI-X vector assignment interface to distribute MSI-X resources to VFs (incl. mlx5 support) New hardware/drivers: - dsa: mv88e6xxx: add support for Marvell mv88e6393x - 11-port Ethernet switch with 8x 1-Gigabit Ethernet and 3x 10-Gigabit interfaces. - dsa: support for legacy Broadcom tags used on BCM5325, BCM5365 and BCM63xx switches - Microchip KSZ8863 and KSZ8873; 3x 10/100Mbps Ethernet switches - ath11k: support for QCN9074 a 802.11ax device - Bluetooth: Broadcom BCM4330 and BMC4334 - phy: Marvell 88X2222 transceiver support - mdio: add BCM6368 MDIO mux bus controller - r8152: support RTL8153 and RTL8156 (USB Ethernet) chips - mana: driver for Microsoft Azure Network Adapter (MANA) - Actions Semi Owl Ethernet MAC - can: driver for ETAS ES58X CAN/USB interfaces Pure driver changes: - add XDP support to: enetc, igc, stmmac - add AF_XDP support to: stmmac - virtio: - page_to_skb() use build_skb when there's sufficient tailroom (21% improvement for 1000B UDP frames) - support XDP even without dedicated Tx queues - share the Tx queues with the stack when necessary - mlx5: - flow rules: add support for mirroring with conntrack, matching on ICMP, GTP, flex filters and more - support packet sampling with flow offloads - persist uplink representor netdev across eswitch mode changes - allow coexistence of CQE compression and HW time-stamping - add ethtool extended link error state reporting - ice, iavf: support flow filters, UDP Segmentation Offload - dpaa2-switch: - move the driver out of staging - add spanning tree (STP) support - add rx copybreak support - add tc flower hardware offload on ingress traffic - ionic: - implement Rx page reuse - support HW PTP time-stamping - octeon: support TC hardware offloads - flower matching on ingress and egress ratelimitting. - stmmac: - add RX frame steering based on VLAN priority in tc flower - support frame preemption (FPE) - intel: add cross time-stamping freq difference adjustment - ocelot: - support forwarding of MRP frames in HW - support multiple bridges - support PTP Sync one-step timestamping - dsa: mv88e6xxx, dpaa2-switch: offload bridge port flags like learning, flooding etc. - ipa: add IPA v4.5, v4.9 and v4.11 support (Qualcomm SDX55, SM8350, SC7280 SoCs) - mt7601u: enable TDLS support - mt76: - add support for 802.3 rx frames (mt7915/mt7615) - mt7915 flash pre-calibration support - mt7921/mt7663 runtime power management fixes" * tag 'net-next-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (2451 commits) net: selftest: fix build issue if INET is disabled net: netrom: nr_in: Remove redundant assignment to ns net: tun: Remove redundant assignment to ret net: phy: marvell: add downshift support for M88E1240 net: dsa: ksz: Make reg_mib_cnt a u8 as it never exceeds 255 net/sched: act_ct: Remove redundant ct get and check icmp: standardize naming of RFC 8335 PROBE constants bpf, selftests: Update array map tests for per-cpu batched ops bpf: Add batched ops support for percpu array bpf: Implement formatted output helpers with bstr_printf seq_file: Add a seq_bprintf function sfc: adjust efx->xdp_tx_queue_count with the real number of initialized queues net:nfc:digital: Fix a double free in digital_tg_recv_dep_req net: fix a concurrency bug in l2tp_tunnel_register() net/smc: Remove redundant assignment to rc mpls: Remove redundant assignment to err llc2: Remove redundant assignment to rc net/tls: Remove redundant initialization of record rds: Remove redundant assignment to nr_sig dt-bindings: net: mdio-gpio: add compatible for microchip,mdio-smi0 ...
Diffstat (limited to 'drivers/net/ethernet/intel/igc/igc_main.c')
-rw-r--r--drivers/net/ethernet/intel/igc/igc_main.c539
1 files changed, 464 insertions, 75 deletions
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 4d989ebc97134..069471b7ffb0a 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -10,17 +10,24 @@
#include <linux/ip.h>
#include <linux/pm_runtime.h>
#include <net/pkt_sched.h>
+#include <linux/bpf_trace.h>
#include <net/ipv6.h>
#include "igc.h"
#include "igc_hw.h"
#include "igc_tsn.h"
+#include "igc_xdp.h"
#define DRV_SUMMARY "Intel(R) 2.5G Ethernet Linux Driver"
#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+#define IGC_XDP_PASS 0
+#define IGC_XDP_CONSUMED BIT(0)
+#define IGC_XDP_TX BIT(1)
+#define IGC_XDP_REDIRECT BIT(2)
+
static int debug = -1;
MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
@@ -176,8 +183,10 @@ static void igc_clean_tx_ring(struct igc_ring *tx_ring)
while (i != tx_ring->next_to_use) {
union igc_adv_tx_desc *eop_desc, *tx_desc;
- /* Free all the Tx ring sk_buffs */
- dev_kfree_skb_any(tx_buffer->skb);
+ if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP)
+ xdp_return_frame(tx_buffer->xdpf);
+ else
+ dev_kfree_skb_any(tx_buffer->skb);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -375,6 +384,8 @@ static void igc_clean_rx_ring(struct igc_ring *rx_ring)
i = 0;
}
+ clear_ring_uses_large_buffer(rx_ring);
+
rx_ring->next_to_alloc = 0;
rx_ring->next_to_clean = 0;
rx_ring->next_to_use = 0;
@@ -403,6 +414,8 @@ void igc_free_rx_resources(struct igc_ring *rx_ring)
{
igc_clean_rx_ring(rx_ring);
+ igc_xdp_unregister_rxq_info(rx_ring);
+
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
@@ -440,7 +453,11 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
{
struct net_device *ndev = rx_ring->netdev;
struct device *dev = rx_ring->dev;
- int size, desc_len;
+ int size, desc_len, res;
+
+ res = igc_xdp_register_rxq_info(rx_ring);
+ if (res < 0)
+ return res;
size = sizeof(struct igc_rx_buffer) * rx_ring->count;
rx_ring->rx_buffer_info = vzalloc(size);
@@ -466,6 +483,7 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
return 0;
err:
+ igc_xdp_unregister_rxq_info(rx_ring);
vfree(rx_ring->rx_buffer_info);
rx_ring->rx_buffer_info = NULL;
netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
@@ -497,6 +515,11 @@ static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
return err;
}
+static bool igc_xdp_is_enabled(struct igc_adapter *adapter)
+{
+ return !!adapter->xdp_prog;
+}
+
/**
* igc_configure_rx_ring - Configure a receive ring after Reset
* @adapter: board private structure
@@ -513,6 +536,9 @@ static void igc_configure_rx_ring(struct igc_adapter *adapter,
u32 srrctl = 0, rxdctl = 0;
u64 rdba = ring->dma;
+ if (igc_xdp_is_enabled(adapter))
+ set_ring_uses_large_buffer(ring);
+
/* disable the queue */
wr32(IGC_RXDCTL(reg_idx), 0);
@@ -941,7 +967,7 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
ktime_t txtime = first->skb->tstamp;
- first->skb->tstamp = ktime_set(0, 0);
+ skb_txtime_consumed(first->skb);
context_desc->launch_time = igc_tx_launchtime(adapter,
txtime);
} else {
@@ -1029,7 +1055,7 @@ static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
((u32)((_input) & (_flag)) * ((_result) / (_flag))) : \
((u32)((_input) & (_flag)) / ((_flag) / (_result))))
-static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
+static u32 igc_tx_cmd_type(u32 tx_flags)
{
/* set type for advanced descriptor with frame checksum insertion */
u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
@@ -1078,7 +1104,7 @@ static int igc_tx_map(struct igc_ring *tx_ring,
u16 i = tx_ring->next_to_use;
unsigned int data_len, size;
dma_addr_t dma;
- u32 cmd_type = igc_tx_cmd_type(skb, tx_flags);
+ u32 cmd_type = igc_tx_cmd_type(tx_flags);
tx_desc = IGC_TX_DESC(tx_ring, i);
@@ -1480,11 +1506,18 @@ static void igc_process_skb_fields(struct igc_ring *rx_ring,
}
static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
- const unsigned int size)
+ const unsigned int size,
+ int *rx_buffer_pgcnt)
{
struct igc_rx_buffer *rx_buffer;
rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+ *rx_buffer_pgcnt =
+#if (PAGE_SIZE < 8192)
+ page_count(rx_buffer->page);
+#else
+ 0;
+#endif
prefetchw(rx_buffer->page);
/* we are reusing so sync this buffer for CPU use */
@@ -1499,6 +1532,32 @@ static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
return rx_buffer;
}
+static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer,
+ unsigned int truesize)
+{
+#if (PAGE_SIZE < 8192)
+ buffer->page_offset ^= truesize;
+#else
+ buffer->page_offset += truesize;
+#endif
+}
+
+static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring,
+ unsigned int size)
+{
+ unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+ truesize = igc_rx_pg_size(ring) / 2;
+#else
+ truesize = ring_uses_build_skb(ring) ?
+ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
+ SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
+ SKB_DATA_ALIGN(size);
+#endif
+ return truesize;
+}
+
/**
* igc_add_rx_frag - Add contents of Rx buffer to sk_buff
* @rx_ring: rx descriptor ring to transact packets on
@@ -1513,20 +1572,19 @@ static void igc_add_rx_frag(struct igc_ring *rx_ring,
struct sk_buff *skb,
unsigned int size)
{
-#if (PAGE_SIZE < 8192)
- unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
+ unsigned int truesize;
- skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
- rx_buffer->page_offset, size, truesize);
- rx_buffer->page_offset ^= truesize;
+#if (PAGE_SIZE < 8192)
+ truesize = igc_rx_pg_size(rx_ring) / 2;
#else
- unsigned int truesize = ring_uses_build_skb(rx_ring) ?
- SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
- SKB_DATA_ALIGN(size);
+ truesize = ring_uses_build_skb(rx_ring) ?
+ SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
+ SKB_DATA_ALIGN(size);
+#endif
skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
rx_buffer->page_offset, size, truesize);
- rx_buffer->page_offset += truesize;
-#endif
+
+ igc_rx_buffer_flip(rx_buffer, truesize);
}
static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
@@ -1535,12 +1593,7 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
unsigned int size)
{
void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
-#if (PAGE_SIZE < 8192)
- unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
-#else
- unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
- SKB_DATA_ALIGN(IGC_SKB_PAD + size);
-#endif
+ unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
struct sk_buff *skb;
/* prefetch first cache line of first page */
@@ -1555,27 +1608,18 @@ static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
skb_reserve(skb, IGC_SKB_PAD);
__skb_put(skb, size);
- /* update buffer offset */
-#if (PAGE_SIZE < 8192)
- rx_buffer->page_offset ^= truesize;
-#else
- rx_buffer->page_offset += truesize;
-#endif
-
+ igc_rx_buffer_flip(rx_buffer, truesize);
return skb;
}
static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
struct igc_rx_buffer *rx_buffer,
- union igc_adv_rx_desc *rx_desc,
- unsigned int size)
+ struct xdp_buff *xdp,
+ ktime_t timestamp)
{
- void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
-#if (PAGE_SIZE < 8192)
- unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
-#else
- unsigned int truesize = SKB_DATA_ALIGN(size);
-#endif
+ unsigned int size = xdp->data_end - xdp->data;
+ unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
+ void *va = xdp->data;
unsigned int headlen;
struct sk_buff *skb;
@@ -1587,11 +1631,8 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
if (unlikely(!skb))
return NULL;
- if (unlikely(igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP))) {
- igc_ptp_rx_pktstamp(rx_ring->q_vector, va, skb);
- va += IGC_TS_HDR_LEN;
- size -= IGC_TS_HDR_LEN;
- }
+ if (timestamp)
+ skb_hwtstamps(skb)->hwtstamp = timestamp;
/* Determine available headroom for copy */
headlen = size;
@@ -1607,11 +1648,7 @@ static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
skb_add_rx_frag(skb, 0, rx_buffer->page,
(va + headlen) - page_address(rx_buffer->page),
size, truesize);
-#if (PAGE_SIZE < 8192)
- rx_buffer->page_offset ^= truesize;
-#else
- rx_buffer->page_offset += truesize;
-#endif
+ igc_rx_buffer_flip(rx_buffer, truesize);
} else {
rx_buffer->pagecnt_bias++;
}
@@ -1648,7 +1685,8 @@ static void igc_reuse_rx_page(struct igc_ring *rx_ring,
new_buff->pagecnt_bias = old_buff->pagecnt_bias;
}
-static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
+static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer,
+ int rx_buffer_pgcnt)
{
unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
struct page *page = rx_buffer->page;
@@ -1659,7 +1697,7 @@ static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
#if (PAGE_SIZE < 8192)
/* if we are only owner of page we can reuse it */
- if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
+ if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
return false;
#else
#define IGC_LAST_OFFSET \
@@ -1673,8 +1711,8 @@ static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
* the pagecnt_bias and page count so that we fully restock the
* number of references the driver holds.
*/
- if (unlikely(!pagecnt_bias)) {
- page_ref_add(page, USHRT_MAX);
+ if (unlikely(pagecnt_bias == 1)) {
+ page_ref_add(page, USHRT_MAX - 1);
rx_buffer->pagecnt_bias = USHRT_MAX;
}
@@ -1726,6 +1764,10 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring,
union igc_adv_rx_desc *rx_desc,
struct sk_buff *skb)
{
+ /* XDP packets use error pointer so abort at this point */
+ if (IS_ERR(skb))
+ return true;
+
if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
struct net_device *netdev = rx_ring->netdev;
@@ -1743,9 +1785,10 @@ static bool igc_cleanup_headers(struct igc_ring *rx_ring,
}
static void igc_put_rx_buffer(struct igc_ring *rx_ring,
- struct igc_rx_buffer *rx_buffer)
+ struct igc_rx_buffer *rx_buffer,
+ int rx_buffer_pgcnt)
{
- if (igc_can_reuse_rx_page(rx_buffer)) {
+ if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
/* hand second half of page back to the ring */
igc_reuse_rx_page(rx_ring, rx_buffer);
} else {
@@ -1765,7 +1808,14 @@ static void igc_put_rx_buffer(struct igc_ring *rx_ring,
static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
{
- return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0;
+ struct igc_adapter *adapter = rx_ring->q_vector->adapter;
+
+ if (ring_uses_build_skb(rx_ring))
+ return IGC_SKB_PAD;
+ if (igc_xdp_is_enabled(adapter))
+ return XDP_PACKET_HEADROOM;
+
+ return 0;
}
static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
@@ -1804,7 +1854,8 @@ static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
bi->dma = dma;
bi->page = page;
bi->page_offset = igc_rx_offset(rx_ring);
- bi->pagecnt_bias = 1;
+ page_ref_add(page, USHRT_MAX - 1);
+ bi->pagecnt_bias = USHRT_MAX;
return true;
}
@@ -1879,17 +1930,196 @@ static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
}
}
+static int igc_xdp_init_tx_buffer(struct igc_tx_buffer *buffer,
+ struct xdp_frame *xdpf,
+ struct igc_ring *ring)
+{
+ dma_addr_t dma;
+
+ dma = dma_map_single(ring->dev, xdpf->data, xdpf->len, DMA_TO_DEVICE);
+ if (dma_mapping_error(ring->dev, dma)) {
+ netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
+ return -ENOMEM;
+ }
+
+ buffer->xdpf = xdpf;
+ buffer->tx_flags = IGC_TX_FLAGS_XDP;
+ buffer->protocol = 0;
+ buffer->bytecount = xdpf->len;
+ buffer->gso_segs = 1;
+ buffer->time_stamp = jiffies;
+ dma_unmap_len_set(buffer, len, xdpf->len);
+ dma_unmap_addr_set(buffer, dma, dma);
+ return 0;
+}
+
+/* This function requires __netif_tx_lock is held by the caller. */
+static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
+ struct xdp_frame *xdpf)
+{
+ struct igc_tx_buffer *buffer;
+ union igc_adv_tx_desc *desc;
+ u32 cmd_type, olinfo_status;
+ int err;
+
+ if (!igc_desc_unused(ring))
+ return -EBUSY;
+
+ buffer = &ring->tx_buffer_info[ring->next_to_use];
+ err = igc_xdp_init_tx_buffer(buffer, xdpf, ring);
+ if (err)
+ return err;
+
+ cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
+ IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
+ buffer->bytecount;
+ olinfo_status = buffer->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
+
+ desc = IGC_TX_DESC(ring, ring->next_to_use);
+ desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+ desc->read.olinfo_status = cpu_to_le32(olinfo_status);
+ desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(buffer, dma));
+
+ netdev_tx_sent_queue(txring_txq(ring), buffer->bytecount);
+
+ buffer->next_to_watch = desc;
+
+ ring->next_to_use++;
+ if (ring->next_to_use == ring->count)
+ ring->next_to_use = 0;
+
+ return 0;
+}
+
+static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
+ int cpu)
+{
+ int index = cpu;
+
+ if (unlikely(index < 0))
+ index = 0;
+
+ while (index >= adapter->num_tx_queues)
+ index -= adapter->num_tx_queues;
+
+ return adapter->tx_ring[index];
+}
+
+static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
+{
+ struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
+ int cpu = smp_processor_id();
+ struct netdev_queue *nq;
+ struct igc_ring *ring;
+ int res;
+
+ if (unlikely(!xdpf))
+ return -EFAULT;
+
+ ring = igc_xdp_get_tx_ring(adapter, cpu);
+ nq = txring_txq(ring);
+
+ __netif_tx_lock(nq, cpu);
+ res = igc_xdp_init_tx_descriptor(ring, xdpf);
+ __netif_tx_unlock(nq);
+ return res;
+}
+
+static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
+ struct xdp_buff *xdp)
+{
+ struct bpf_prog *prog;
+ int res;
+ u32 act;
+
+ rcu_read_lock();
+
+ prog = READ_ONCE(adapter->xdp_prog);
+ if (!prog) {
+ res = IGC_XDP_PASS;
+ goto unlock;
+ }
+
+ act = bpf_prog_run_xdp(prog, xdp);
+ switch (act) {
+ case XDP_PASS:
+ res = IGC_XDP_PASS;
+ break;
+ case XDP_TX:
+ if (igc_xdp_xmit_back(adapter, xdp) < 0)
+ res = IGC_XDP_CONSUMED;
+ else
+ res = IGC_XDP_TX;
+ break;
+ case XDP_REDIRECT:
+ if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
+ res = IGC_XDP_CONSUMED;
+ else
+ res = IGC_XDP_REDIRECT;
+ break;
+ default:
+ bpf_warn_invalid_xdp_action(act);
+ fallthrough;
+ case XDP_ABORTED:
+ trace_xdp_exception(adapter->netdev, prog, act);
+ fallthrough;
+ case XDP_DROP:
+ res = IGC_XDP_CONSUMED;
+ break;
+ }
+
+unlock:
+ rcu_read_unlock();
+ return ERR_PTR(-res);
+}
+
+/* This function assumes __netif_tx_lock is held by the caller. */
+static void igc_flush_tx_descriptors(struct igc_ring *ring)
+{
+ /* Once tail pointer is updated, hardware can fetch the descriptors
+ * any time so we issue a write membar here to ensure all memory
+ * writes are complete before the tail pointer is updated.
+ */
+ wmb();
+ writel(ring->next_to_use, ring->tail);
+}
+
+static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
+{
+ int cpu = smp_processor_id();
+ struct netdev_queue *nq;
+ struct igc_ring *ring;
+
+ if (status & IGC_XDP_TX) {
+ ring = igc_xdp_get_tx_ring(adapter, cpu);
+ nq = txring_txq(ring);
+
+ __netif_tx_lock(nq, cpu);
+ igc_flush_tx_descriptors(ring);
+ __netif_tx_unlock(nq);
+ }
+
+ if (status & IGC_XDP_REDIRECT)
+ xdp_do_flush();
+}
+
static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
{
unsigned int total_bytes = 0, total_packets = 0;
+ struct igc_adapter *adapter = q_vector->adapter;
struct igc_ring *rx_ring = q_vector->rx.ring;
struct sk_buff *skb = rx_ring->skb;
u16 cleaned_count = igc_desc_unused(rx_ring);
+ int xdp_status = 0, rx_buffer_pgcnt;
while (likely(total_packets < budget)) {
union igc_adv_rx_desc *rx_desc;
struct igc_rx_buffer *rx_buffer;
- unsigned int size;
+ unsigned int size, truesize;
+ ktime_t timestamp = 0;
+ struct xdp_buff xdp;
+ int pkt_offset = 0;
+ void *pktbuf;
/* return some buffers to hardware, one at a time is too slow */
if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
@@ -1908,16 +2138,52 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
*/
dma_rmb();
- rx_buffer = igc_get_rx_buffer(rx_ring, size);
+ rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
+ truesize = igc_get_rx_frame_truesize(rx_ring, size);
+
+ pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
- /* retrieve a buffer from the ring */
- if (skb)
+ if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
+ timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
+ pktbuf);
+ pkt_offset = IGC_TS_HDR_LEN;
+ size -= IGC_TS_HDR_LEN;
+ }
+
+ if (!skb) {
+ xdp.data = pktbuf + pkt_offset;
+ xdp.data_end = xdp.data + size;
+ xdp.data_hard_start = pktbuf - igc_rx_offset(rx_ring);
+ xdp_set_data_meta_invalid(&xdp);
+ xdp.frame_sz = truesize;
+ xdp.rxq = &rx_ring->xdp_rxq;
+
+ skb = igc_xdp_run_prog(adapter, &xdp);
+ }
+
+ if (IS_ERR(skb)) {
+ unsigned int xdp_res = -PTR_ERR(skb);
+
+ switch (xdp_res) {
+ case IGC_XDP_CONSUMED:
+ rx_buffer->pagecnt_bias++;
+ break;
+ case IGC_XDP_TX:
+ case IGC_XDP_REDIRECT:
+ igc_rx_buffer_flip(rx_buffer, truesize);
+ xdp_status |= xdp_res;
+ break;
+ }
+
+ total_packets++;
+ total_bytes += size;
+ } else if (skb)
igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
else if (ring_uses_build_skb(rx_ring))
skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size);
else
- skb = igc_construct_skb(rx_ring, rx_buffer,
- rx_desc, size);
+ skb = igc_construct_skb(rx_ring, rx_buffer, &xdp,
+ timestamp);
/* exit if we failed to retrieve a buffer */
if (!skb) {
@@ -1926,7 +2192,7 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
break;
}
- igc_put_rx_buffer(rx_ring, rx_buffer);
+ igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
cleaned_count++;
/* fetch next buffer in frame if non-eop */
@@ -1954,6 +2220,9 @@ static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
total_packets++;
}
+ if (xdp_status)
+ igc_finalize_xdp(adapter, xdp_status);
+
/* place incomplete frames back on ring for completion */
rx_ring->skb = skb;
@@ -2015,8 +2284,10 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
total_bytes += tx_buffer->bytecount;
total_packets += tx_buffer->gso_segs;
- /* free the skb */
- napi_consume_skb(tx_buffer->skb, napi_budget);
+ if (tx_buffer->tx_flags & IGC_TX_FLAGS_XDP)
+ xdp_return_frame(tx_buffer->xdpf);
+ else
+ napi_consume_skb(tx_buffer->skb, napi_budget);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
@@ -3580,7 +3851,7 @@ void igc_up(struct igc_adapter *adapter)
netif_tx_start_all_queues(adapter->netdev);
/* start the watchdog. */
- hw->mac.get_link_status = 1;
+ hw->mac.get_link_status = true;
schedule_work(&adapter->watchdog_task);
}
@@ -3858,6 +4129,11 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu)
int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
struct igc_adapter *adapter = netdev_priv(netdev);
+ if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) {
+ netdev_dbg(netdev, "Jumbo frames not supported with XDP");
+ return -EINVAL;
+ }
+
/* adjust max frame to be at least the size of a standard frame */
if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
@@ -3974,9 +4250,20 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev,
static void igc_tsync_interrupt(struct igc_adapter *adapter)
{
+ u32 ack, tsauxc, sec, nsec, tsicr;
struct igc_hw *hw = &adapter->hw;
- u32 tsicr = rd32(IGC_TSICR);
- u32 ack = 0;
+ struct ptp_clock_event event;
+ struct timespec64 ts;
+
+ tsicr = rd32(IGC_TSICR);
+ ack = 0;
+
+ if (tsicr & IGC_TSICR_SYS_WRAP) {
+ event.type = PTP_CLOCK_PPS;
+ if (adapter->ptp_caps.pps)
+ ptp_clock_event(adapter->ptp_clock, &event);
+ ack |= IGC_TSICR_SYS_WRAP;
+ }
if (tsicr & IGC_TSICR_TXTS) {
/* retrieve hardware timestamp */
@@ -3984,6 +4271,54 @@ static void igc_tsync_interrupt(struct igc_adapter *adapter)
ack |= IGC_TSICR_TXTS;
}
+ if (tsicr & IGC_TSICR_TT0) {
+ spin_lock(&adapter->tmreg_lock);
+ ts = timespec64_add(adapter->perout[0].start,
+ adapter->perout[0].period);
+ wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
+ wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec);
+ tsauxc = rd32(IGC_TSAUXC);
+ tsauxc |= IGC_TSAUXC_EN_TT0;
+ wr32(IGC_TSAUXC, tsauxc);
+ adapter->perout[0].start = ts;
+ spin_unlock(&adapter->tmreg_lock);
+ ack |= IGC_TSICR_TT0;
+ }
+
+ if (tsicr & IGC_TSICR_TT1) {
+ spin_lock(&adapter->tmreg_lock);
+ ts = timespec64_add(adapter->perout[1].start,
+ adapter->perout[1].period);
+ wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
+ wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec);
+ tsauxc = rd32(IGC_TSAUXC);
+ tsauxc |= IGC_TSAUXC_EN_TT1;
+ wr32(IGC_TSAUXC, tsauxc);
+ adapter->perout[1].start = ts;
+ spin_unlock(&adapter->tmreg_lock);
+ ack |= IGC_TSICR_TT1;
+ }
+
+ if (tsicr & IGC_TSICR_AUTT0) {
+ nsec = rd32(IGC_AUXSTMPL0);
+ sec = rd32(IGC_AUXSTMPH0);
+ event.type = PTP_CLOCK_EXTTS;
+ event.index = 0;
+ event.timestamp = sec * NSEC_PER_SEC + nsec;
+ ptp_clock_event(adapter->ptp_clock, &event);
+ ack |= IGC_TSICR_AUTT0;
+ }
+
+ if (tsicr & IGC_TSICR_AUTT1) {
+ nsec = rd32(IGC_AUXSTMPL1);
+ sec = rd32(IGC_AUXSTMPH1);
+ event.type = PTP_CLOCK_EXTTS;
+ event.index = 1;
+ event.timestamp = sec * NSEC_PER_SEC + nsec;
+ ptp_clock_event(adapter->ptp_clock, &event);
+ ack |= IGC_TSICR_AUTT1;
+ }
+
/* acknowledge the interrupts */
wr32(IGC_TSICR, ack);
}
@@ -4009,7 +4344,7 @@ static irqreturn_t igc_msix_other(int irq, void *data)
}
if (icr & IGC_ICR_LSC) {
- hw->mac.get_link_status = 1;
+ hw->mac.get_link_status = true;
/* guard against interrupt when we're going down */
if (!test_bit(__IGC_DOWN, &adapter->state))
mod_timer(&adapter->watchdog_timer, jiffies + 1);
@@ -4387,7 +4722,7 @@ static irqreturn_t igc_intr_msi(int irq, void *data)
}
if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
- hw->mac.get_link_status = 1;
+ hw->mac.get_link_status = true;
if (!test_bit(__IGC_DOWN, &adapter->state))
mod_timer(&adapter->watchdog_timer, jiffies + 1);
}
@@ -4429,7 +4764,7 @@ static irqreturn_t igc_intr(int irq, void *data)
}
if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
- hw->mac.get_link_status = 1;
+ hw->mac.get_link_status = true;
/* guard against interrupt when we're going down */
if (!test_bit(__IGC_DOWN, &adapter->state))
mod_timer(&adapter->watchdog_timer, jiffies + 1);
@@ -4583,7 +4918,7 @@ static int __igc_open(struct net_device *netdev, bool resuming)
netif_tx_start_all_queues(netdev);
/* start the watchdog. */
- hw->mac.get_link_status = 1;
+ hw->mac.get_link_status = true;
schedule_work(&adapter->watchdog_task);
return IGC_SUCCESS;
@@ -4844,6 +5179,58 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
}
}
+static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
+{
+ struct igc_adapter *adapter = netdev_priv(dev);
+
+ switch (bpf->command) {
+ case XDP_SETUP_PROG:
+ return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
+ default:
+ return -EOPNOTSUPP;
+ }
+}
+
+static int igc_xdp_xmit(struct net_device *dev, int num_frames,
+ struct xdp_frame **frames, u32 flags)
+{
+ struct igc_adapter *adapter = netdev_priv(dev);
+ int cpu = smp_processor_id();
+ struct netdev_queue *nq;
+ struct igc_ring *ring;
+ int i, drops;
+
+ if (unlikely(test_bit(__IGC_DOWN, &adapter->state)))
+ return -ENETDOWN;
+
+ if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
+ return -EINVAL;
+
+ ring = igc_xdp_get_tx_ring(adapter, cpu);
+ nq = txring_txq(ring);
+
+ __netif_tx_lock(nq, cpu);
+
+ drops = 0;
+ for (i = 0; i < num_frames; i++) {
+ int err;
+ struct xdp_frame *xdpf = frames[i];
+
+ err = igc_xdp_init_tx_descriptor(ring, xdpf);
+ if (err) {
+ xdp_return_frame_rx_napi(xdpf);
+ drops++;
+ }
+ }
+
+ if (flags & XDP_XMIT_FLUSH)
+ igc_flush_tx_descriptors(ring);
+
+ __netif_tx_unlock(nq);
+
+ return num_frames - drops;
+}
+
static const struct net_device_ops igc_netdev_ops = {
.ndo_open = igc_open,
.ndo_stop = igc_close,
@@ -4857,6 +5244,8 @@ static const struct net_device_ops igc_netdev_ops = {
.ndo_features_check = igc_features_check,
.ndo_do_ioctl = igc_ioctl,
.ndo_setup_tc = igc_setup_tc,
+ .ndo_bpf = igc_bpf,
+ .ndo_xdp_xmit = igc_xdp_xmit,
};
/* PCIe configuration access */
@@ -4924,7 +5313,7 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
{
struct igc_mac_info *mac = &adapter->hw.mac;
- mac->autoneg = 0;
+ mac->autoneg = false;
/* Make sure dplx is at most 1 bit and lsb of speed is not set
* for the switch() below to work
@@ -4946,13 +5335,13 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
mac->forced_speed_duplex = ADVERTISE_100_FULL;
break;
case SPEED_1000 + DUPLEX_FULL:
- mac->autoneg = 1;
+ mac->autoneg = true;
adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
break;
case SPEED_1000 + DUPLEX_HALF: /* not supported */
goto err_inval;
case SPEED_2500 + DUPLEX_FULL:
- mac->autoneg = 1;
+ mac->autoneg = true;
adapter->hw.phy.autoneg_advertised = ADVERTISE_2500_FULL;
break;
case SPEED_2500 + DUPLEX_HALF: /* not supported */