summaryrefslogtreecommitdiff
path: root/drivers/net/virtio_net.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/virtio_net.c')
-rw-r--r--drivers/net/virtio_net.c264
1 files changed, 159 insertions, 105 deletions
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index fb5e68ed3ec27..486b5849033dc 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -62,7 +62,8 @@ static const unsigned long guest_offloads[] = {
VIRTIO_NET_F_GUEST_UFO,
VIRTIO_NET_F_GUEST_CSUM,
VIRTIO_NET_F_GUEST_USO4,
- VIRTIO_NET_F_GUEST_USO6
+ VIRTIO_NET_F_GUEST_USO6,
+ VIRTIO_NET_F_GUEST_HDRLEN
};
#define GUEST_OFFLOAD_GRO_HW_MASK ((1ULL << VIRTIO_NET_F_GUEST_TSO4) | \
@@ -204,6 +205,8 @@ struct control_buf {
__virtio16 vid;
__virtio64 offloads;
struct virtio_net_ctrl_rss rss;
+ struct virtio_net_ctrl_coal_tx coal_tx;
+ struct virtio_net_ctrl_coal_rx coal_rx;
};
struct virtnet_info {
@@ -446,7 +449,8 @@ static unsigned int mergeable_ctx_to_truesize(void *mrg_ctx)
static struct sk_buff *page_to_skb(struct virtnet_info *vi,
struct receive_queue *rq,
struct page *page, unsigned int offset,
- unsigned int len, unsigned int truesize)
+ unsigned int len, unsigned int truesize,
+ unsigned int headroom)
{
struct sk_buff *skb;
struct virtio_net_hdr_mrg_rxbuf *hdr;
@@ -464,11 +468,11 @@ static struct sk_buff *page_to_skb(struct virtnet_info *vi,
else
hdr_padded_len = sizeof(struct padded_vnet_hdr);
- buf = p;
+ buf = p - headroom;
len -= hdr_len;
offset += hdr_padded_len;
p += hdr_padded_len;
- tailroom = truesize - hdr_padded_len - len;
+ tailroom = truesize - headroom - hdr_padded_len - len;
shinfo_size = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
@@ -545,6 +549,87 @@ ok:
return skb;
}
+static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
+{
+ unsigned int len;
+ unsigned int packets = 0;
+ unsigned int bytes = 0;
+ void *ptr;
+
+ while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
+ if (likely(!is_xdp_frame(ptr))) {
+ struct sk_buff *skb = ptr;
+
+ pr_debug("Sent skb %p\n", skb);
+
+ bytes += skb->len;
+ napi_consume_skb(skb, in_napi);
+ } else {
+ struct xdp_frame *frame = ptr_to_xdp(ptr);
+
+ bytes += xdp_get_frame_len(frame);
+ xdp_return_frame(frame);
+ }
+ packets++;
+ }
+
+ /* Avoid overhead when no packets have been processed
+ * happens when called speculatively from start_xmit.
+ */
+ if (!packets)
+ return;
+
+ u64_stats_update_begin(&sq->stats.syncp);
+ sq->stats.bytes += bytes;
+ sq->stats.packets += packets;
+ u64_stats_update_end(&sq->stats.syncp);
+}
+
+static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
+{
+ if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
+ return false;
+ else if (q < vi->curr_queue_pairs)
+ return true;
+ else
+ return false;
+}
+
+static void check_sq_full_and_disable(struct virtnet_info *vi,
+ struct net_device *dev,
+ struct send_queue *sq)
+{
+ bool use_napi = sq->napi.weight;
+ int qnum;
+
+ qnum = sq - vi->sq;
+
+ /* If running out of space, stop queue to avoid getting packets that we
+ * are then unable to transmit.
+ * An alternative would be to force queuing layer to requeue the skb by
+ * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
+ * returned in a normal path of operation: it means that driver is not
+ * maintaining the TX queue stop/start state properly, and causes
+ * the stack to do a non-trivial amount of useless work.
+ * Since most packets only take 1 or 2 ring slots, stopping the queue
+ * early means 16 slots are typically wasted.
+ */
+ if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
+ netif_stop_subqueue(dev, qnum);
+ if (use_napi) {
+ if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
+ virtqueue_napi_schedule(&sq->napi, sq->vq);
+ } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
+ /* More just got used, free them then recheck. */
+ free_old_xmit_skbs(sq, false);
+ if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
+ netif_start_subqueue(dev, qnum);
+ virtqueue_disable_cb(sq->vq);
+ }
+ }
+ }
+}
+
static int __virtnet_xdp_xmit_one(struct virtnet_info *vi,
struct send_queue *sq,
struct xdp_frame *xdpf)
@@ -686,6 +771,9 @@ static int virtnet_xdp_xmit(struct net_device *dev,
}
ret = nxmit;
+ if (!is_xdp_raw_buffer_queue(vi, sq - vi->sq))
+ check_sq_full_and_disable(vi, dev, sq);
+
if (flags & XDP_XMIT_FLUSH) {
if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq))
kicks = 1;
@@ -729,8 +817,13 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
int page_off,
unsigned int *len)
{
- struct page *page = alloc_page(GFP_ATOMIC);
+ int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+ struct page *page;
+
+ if (page_off + *len + tailroom > PAGE_SIZE)
+ return NULL;
+ page = alloc_page(GFP_ATOMIC);
if (!page)
return NULL;
@@ -738,7 +831,6 @@ static struct page *xdp_linearize_page(struct receive_queue *rq,
page_off += *len;
while (--*num_buf) {
- int tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
unsigned int buflen;
void *buf;
int off;
@@ -925,7 +1017,7 @@ static struct sk_buff *receive_big(struct net_device *dev,
{
struct page *page = buf;
struct sk_buff *skb =
- page_to_skb(vi, rq, page, 0, len, PAGE_SIZE);
+ page_to_skb(vi, rq, page, 0, len, PAGE_SIZE, 0);
stats->bytes += len - vi->hdr_len;
if (unlikely(!skb))
@@ -1188,9 +1280,12 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
switch (act) {
case XDP_PASS:
+ head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
+ if (unlikely(!head_skb))
+ goto err_xdp_frags;
+
if (unlikely(xdp_page != page))
put_page(page);
- head_skb = build_skb_from_xdp_buff(dev, vi, &xdp, xdp_frags_truesz);
rcu_read_unlock();
return head_skb;
case XDP_TX:
@@ -1248,7 +1343,7 @@ err_xdp_frags:
rcu_read_unlock();
skip_xdp:
- head_skb = page_to_skb(vi, rq, page, offset, len, truesize);
+ head_skb = page_to_skb(vi, rq, page, offset, len, truesize, headroom);
curr_skb = head_skb;
if (unlikely(!curr_skb))
@@ -1714,52 +1809,6 @@ static int virtnet_receive(struct receive_queue *rq, int budget,
return stats.packets;
}
-static void free_old_xmit_skbs(struct send_queue *sq, bool in_napi)
-{
- unsigned int len;
- unsigned int packets = 0;
- unsigned int bytes = 0;
- void *ptr;
-
- while ((ptr = virtqueue_get_buf(sq->vq, &len)) != NULL) {
- if (likely(!is_xdp_frame(ptr))) {
- struct sk_buff *skb = ptr;
-
- pr_debug("Sent skb %p\n", skb);
-
- bytes += skb->len;
- napi_consume_skb(skb, in_napi);
- } else {
- struct xdp_frame *frame = ptr_to_xdp(ptr);
-
- bytes += xdp_get_frame_len(frame);
- xdp_return_frame(frame);
- }
- packets++;
- }
-
- /* Avoid overhead when no packets have been processed
- * happens when called speculatively from start_xmit.
- */
- if (!packets)
- return;
-
- u64_stats_update_begin(&sq->stats.syncp);
- sq->stats.bytes += bytes;
- sq->stats.packets += packets;
- u64_stats_update_end(&sq->stats.syncp);
-}
-
-static bool is_xdp_raw_buffer_queue(struct virtnet_info *vi, int q)
-{
- if (q < (vi->curr_queue_pairs - vi->xdp_queue_pairs))
- return false;
- else if (q < vi->curr_queue_pairs)
- return true;
- else
- return false;
-}
-
static void virtnet_poll_cleantx(struct receive_queue *rq)
{
struct virtnet_info *vi = rq->vq->vdev->priv;
@@ -1821,6 +1870,38 @@ static int virtnet_poll(struct napi_struct *napi, int budget)
return received;
}
+static void virtnet_disable_queue_pair(struct virtnet_info *vi, int qp_index)
+{
+ virtnet_napi_tx_disable(&vi->sq[qp_index].napi);
+ napi_disable(&vi->rq[qp_index].napi);
+ xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
+}
+
+static int virtnet_enable_queue_pair(struct virtnet_info *vi, int qp_index)
+{
+ struct net_device *dev = vi->dev;
+ int err;
+
+ err = xdp_rxq_info_reg(&vi->rq[qp_index].xdp_rxq, dev, qp_index,
+ vi->rq[qp_index].napi.napi_id);
+ if (err < 0)
+ return err;
+
+ err = xdp_rxq_info_reg_mem_model(&vi->rq[qp_index].xdp_rxq,
+ MEM_TYPE_PAGE_SHARED, NULL);
+ if (err < 0)
+ goto err_xdp_reg_mem_model;
+
+ virtnet_napi_enable(vi->rq[qp_index].vq, &vi->rq[qp_index].napi);
+ virtnet_napi_tx_enable(vi, vi->sq[qp_index].vq, &vi->sq[qp_index].napi);
+
+ return 0;
+
+err_xdp_reg_mem_model:
+ xdp_rxq_info_unreg(&vi->rq[qp_index].xdp_rxq);
+ return err;
+}
+
static int virtnet_open(struct net_device *dev)
{
struct virtnet_info *vi = netdev_priv(dev);
@@ -1834,22 +1915,20 @@ static int virtnet_open(struct net_device *dev)
if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
schedule_delayed_work(&vi->refill, 0);
- err = xdp_rxq_info_reg(&vi->rq[i].xdp_rxq, dev, i, vi->rq[i].napi.napi_id);
+ err = virtnet_enable_queue_pair(vi, i);
if (err < 0)
- return err;
-
- err = xdp_rxq_info_reg_mem_model(&vi->rq[i].xdp_rxq,
- MEM_TYPE_PAGE_SHARED, NULL);
- if (err < 0) {
- xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
- return err;
- }
-
- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
- virtnet_napi_tx_enable(vi, vi->sq[i].vq, &vi->sq[i].napi);
+ goto err_enable_qp;
}
return 0;
+
+err_enable_qp:
+ disable_delayed_refill(vi);
+ cancel_delayed_work_sync(&vi->refill);
+
+ for (i--; i >= 0; i--)
+ virtnet_disable_queue_pair(vi, i);
+ return err;
}
static int virtnet_poll_tx(struct napi_struct *napi, int budget)
@@ -1989,30 +2068,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
nf_reset_ct(skb);
}
- /* If running out of space, stop queue to avoid getting packets that we
- * are then unable to transmit.
- * An alternative would be to force queuing layer to requeue the skb by
- * returning NETDEV_TX_BUSY. However, NETDEV_TX_BUSY should not be
- * returned in a normal path of operation: it means that driver is not
- * maintaining the TX queue stop/start state properly, and causes
- * the stack to do a non-trivial amount of useless work.
- * Since most packets only take 1 or 2 ring slots, stopping the queue
- * early means 16 slots are typically wasted.
- */
- if (sq->vq->num_free < 2+MAX_SKB_FRAGS) {
- netif_stop_subqueue(dev, qnum);
- if (use_napi) {
- if (unlikely(!virtqueue_enable_cb_delayed(sq->vq)))
- virtqueue_napi_schedule(&sq->napi, sq->vq);
- } else if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) {
- /* More just got used, free them then recheck. */
- free_old_xmit_skbs(sq, false);
- if (sq->vq->num_free >= 2+MAX_SKB_FRAGS) {
- netif_start_subqueue(dev, qnum);
- virtqueue_disable_cb(sq->vq);
- }
- }
- }
+ check_sq_full_and_disable(vi, dev, sq);
if (kick || netif_xmit_stopped(txq)) {
if (virtqueue_kick_prepare(sq->vq) && virtqueue_notify(sq->vq)) {
@@ -2281,11 +2337,8 @@ static int virtnet_close(struct net_device *dev)
/* Make sure refill_work doesn't re-enable napi! */
cancel_delayed_work_sync(&vi->refill);
- for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_tx_disable(&vi->sq[i].napi);
- napi_disable(&vi->rq[i].napi);
- xdp_rxq_info_unreg(&vi->rq[i].xdp_rxq);
- }
+ for (i = 0; i < vi->max_queue_pairs; i++)
+ virtnet_disable_queue_pair(vi, i);
return 0;
}
@@ -2883,12 +2936,10 @@ static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
struct ethtool_coalesce *ec)
{
struct scatterlist sgs_tx, sgs_rx;
- struct virtio_net_ctrl_coal_tx coal_tx;
- struct virtio_net_ctrl_coal_rx coal_rx;
- coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
- coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
- sg_init_one(&sgs_tx, &coal_tx, sizeof(coal_tx));
+ vi->ctrl->coal_tx.tx_usecs = cpu_to_le32(ec->tx_coalesce_usecs);
+ vi->ctrl->coal_tx.tx_max_packets = cpu_to_le32(ec->tx_max_coalesced_frames);
+ sg_init_one(&sgs_tx, &vi->ctrl->coal_tx, sizeof(vi->ctrl->coal_tx));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
VIRTIO_NET_CTRL_NOTF_COAL_TX_SET,
@@ -2899,9 +2950,9 @@ static int virtnet_send_notf_coal_cmds(struct virtnet_info *vi,
vi->tx_usecs = ec->tx_coalesce_usecs;
vi->tx_max_packets = ec->tx_max_coalesced_frames;
- coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
- coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
- sg_init_one(&sgs_rx, &coal_rx, sizeof(coal_rx));
+ vi->ctrl->coal_rx.rx_usecs = cpu_to_le32(ec->rx_coalesce_usecs);
+ vi->ctrl->coal_rx.rx_max_packets = cpu_to_le32(ec->rx_max_coalesced_frames);
+ sg_init_one(&sgs_rx, &vi->ctrl->coal_rx, sizeof(vi->ctrl->coal_rx));
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_NOTF_COAL,
VIRTIO_NET_CTRL_NOTF_COAL_RX_SET,
@@ -3536,12 +3587,14 @@ static void free_unused_bufs(struct virtnet_info *vi)
struct virtqueue *vq = vi->sq[i].vq;
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
virtnet_sq_free_unused_buf(vq, buf);
+ cond_resched();
}
for (i = 0; i < vi->max_queue_pairs; i++) {
struct virtqueue *vq = vi->rq[i].vq;
while ((buf = virtqueue_detach_unused_buf(vq)) != NULL)
virtnet_rq_free_unused_buf(vq, buf);
+ cond_resched();
}
}
@@ -4213,7 +4266,8 @@ static struct virtio_device_id id_table[] = {
VIRTIO_NET_F_CTRL_MAC_ADDR, \
VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
- VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL
+ VIRTIO_NET_F_RSS, VIRTIO_NET_F_HASH_REPORT, VIRTIO_NET_F_NOTF_COAL, \
+ VIRTIO_NET_F_GUEST_HDRLEN
static unsigned int features[] = {
VIRTNET_FEATURES,