From 70181d51209cbcdf9ce2171eac3f3458281d2947 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 10 Apr 2013 20:50:48 +0000 Subject: vhost_net: remove tx polling state After commit 2b8b328b61c799957a456a5a8dab8cc7dea68575 (vhost_net: handle polling errors when setting backend), we in fact track the polling state through poll->wqh, so there's no need to duplicate the work with an extra vhost_net_polling_state. So this patch removes this and make the code simpler. This patch also removes the all tx starting/stopping code in tx path according to Michael's suggestion. Netperf test shows almost the same result in stream test, but gets improvements on TCP_RR tests (both zerocopy or copy) especially on low load cases. Tested between multiqueue kvm guest and external host with two direct connected 82599s. zerocopy disabled: sessions|transaction rates|normalize| before/after/+improvements 1 | 9510.24/11727.29/+23.3% | 693.54/887.68/+28.0% | 25| 192931.50/241729.87/+25.3% | 2376.80/2771.70/+16.6% | 50| 277634.64/291905.76/+5% | 3118.36/3230.11/+3.6% | zerocopy enabled: sessions|transaction rates|normalize| before/after/+improvements 1 | 7318.33/11929.76/+63.0% | 521.86/843.30/+61.6% | 25| 167264.88/242422.15/+44.9% | 2181.60/2788.16/+27.8% | 50| 272181.02/294347.04/+8.1% | 3071.56/3257.85/+6.1% | Signed-off-by: Jason Wang Acked-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/vhost/net.c | 74 +++++------------------------------------------------ 1 file changed, 6 insertions(+), 68 deletions(-) (limited to 'drivers/vhost/net.c') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ec6fb3fa59bb..87c216c1e54e 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,20 +64,10 @@ enum { VHOST_NET_VQ_MAX = 2, }; -enum vhost_net_poll_state { - VHOST_NET_POLL_DISABLED = 0, - VHOST_NET_POLL_STARTED = 1, - VHOST_NET_POLL_STOPPED = 2, -}; - struct vhost_net { struct vhost_dev dev; struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; - /* Tells us whether we are polling a socket for TX. - * We only do this when socket buffer fills up. - * Protected by tx vq lock. */ - enum vhost_net_poll_state tx_poll_state; /* Number of TX recently submitted. * Protected by tx vq lock. */ unsigned tx_packets; @@ -155,28 +145,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, } } -/* Caller must have TX VQ lock */ -static void tx_poll_stop(struct vhost_net *net) -{ - if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED)) - return; - vhost_poll_stop(net->poll + VHOST_NET_VQ_TX); - net->tx_poll_state = VHOST_NET_POLL_STOPPED; -} - -/* Caller must have TX VQ lock */ -static int tx_poll_start(struct vhost_net *net, struct socket *sock) -{ - int ret; - - if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED)) - return 0; - ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file); - if (!ret) - net->tx_poll_state = VHOST_NET_POLL_STARTED; - return ret; -} - /* In case of DMA done not in order in lower device driver for some reason. * upend_idx is used to track end of used idx, done_idx is used to track head * of used idx. Once lower device DMA done contiguously, we will signal KVM @@ -242,7 +210,7 @@ static void handle_tx(struct vhost_net *net) .msg_flags = MSG_DONTWAIT, }; size_t len, total_len = 0; - int err, wmem; + int err; size_t hdr_size; struct socket *sock; struct vhost_ubuf_ref *uninitialized_var(ubufs); @@ -253,19 +221,9 @@ static void handle_tx(struct vhost_net *net) if (!sock) return; - wmem = atomic_read(&sock->sk->sk_wmem_alloc); - if (wmem >= sock->sk->sk_sndbuf) { - mutex_lock(&vq->mutex); - tx_poll_start(net, sock); - mutex_unlock(&vq->mutex); - return; - } - mutex_lock(&vq->mutex); vhost_disable_notify(&net->dev, vq); - if (wmem < sock->sk->sk_sndbuf / 2) - tx_poll_stop(net); hdr_size = vq->vhost_hlen; zcopy = vq->ubufs; @@ -285,23 +243,14 @@ static void handle_tx(struct vhost_net *net) if (head == vq->num) { int num_pends; - wmem = atomic_read(&sock->sk->sk_wmem_alloc); - if (wmem >= sock->sk->sk_sndbuf * 3 / 4) { - tx_poll_start(net, sock); - set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); - break; - } /* If more outstanding DMAs, queue the work. * Handle upend_idx wrap around */ num_pends = likely(vq->upend_idx >= vq->done_idx) ? (vq->upend_idx - vq->done_idx) : (vq->upend_idx + UIO_MAXIOV - vq->done_idx); - if (unlikely(num_pends > VHOST_MAX_PEND)) { - tx_poll_start(net, sock); - set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); + if (unlikely(num_pends > VHOST_MAX_PEND)) break; - } if (unlikely(vhost_enable_notify(&net->dev, vq))) { vhost_disable_notify(&net->dev, vq); continue; @@ -364,8 +313,6 @@ static void handle_tx(struct vhost_net *net) UIO_MAXIOV; } vhost_discard_vq_desc(vq, 1); - if (err == -EAGAIN || err == -ENOBUFS) - tx_poll_start(net, sock); break; } if (err != len) @@ -628,7 +575,6 @@ static int vhost_net_open(struct inode *inode, struct file *f) vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); - n->tx_poll_state = VHOST_NET_POLL_DISABLED; f->private_data = n; @@ -638,32 +584,24 @@ static int vhost_net_open(struct inode *inode, struct file *f) static void vhost_net_disable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { + struct vhost_poll *poll = n->poll + (vq - n->vqs); if (!vq->private_data) return; - if (vq == n->vqs + VHOST_NET_VQ_TX) { - tx_poll_stop(n); - n->tx_poll_state = VHOST_NET_POLL_DISABLED; - } else - vhost_poll_stop(n->poll + VHOST_NET_VQ_RX); + vhost_poll_stop(poll); } static int vhost_net_enable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { + struct vhost_poll *poll = n->poll + (vq - n->vqs); struct socket *sock; - int ret; sock = rcu_dereference_protected(vq->private_data, lockdep_is_held(&vq->mutex)); if (!sock) return 0; - if (vq == n->vqs + VHOST_NET_VQ_TX) { - n->tx_poll_state = VHOST_NET_POLL_STOPPED; - ret = tx_poll_start(n, sock); - } else - ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file); - return ret; + return vhost_poll_start(poll, sock->file); } static struct socket *vhost_net_stop_vq(struct vhost_net *n, -- cgit v1.2.3 From 3ab2e420ec1caf4ead233f3161ac7d86fe5d2a9f Mon Sep 17 00:00:00 2001 From: Asias He Date: Sat, 27 Apr 2013 11:16:48 +0800 Subject: vhost: Allow device specific fields per vq This is useful for any device who wants device specific fields per vq. For example, tcm_vhost wants a per vq field to track requests which are in flight on the vq. Also, on top of this we can add patches to move things like ubufs from vhost.h out to net.c. Signed-off-by: Michael S. Tsirkin Signed-off-by: Asias He Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 64 +++++++++++++++++++++------------- drivers/vhost/tcm_vhost.c | 55 +++++++++++++++++++---------- drivers/vhost/vhost.c | 88 +++++++++++++++++++++++------------------------ drivers/vhost/vhost.h | 4 +-- 4 files changed, 124 insertions(+), 87 deletions(-) (limited to 'drivers/vhost/net.c') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 87c216c1e54e..176aa030dc5f 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,9 +64,13 @@ enum { VHOST_NET_VQ_MAX = 2, }; +struct vhost_net_virtqueue { + struct vhost_virtqueue vq; +}; + struct vhost_net { struct vhost_dev dev; - struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; + struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX]; struct vhost_poll poll[VHOST_NET_VQ_MAX]; /* Number of TX recently submitted. * Protected by tx vq lock. */ @@ -198,7 +202,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) { - struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_TX].vq; unsigned out, in, s; int head; struct msghdr msg = { @@ -417,7 +421,7 @@ err: * read-size critical section for our kind of RCU. */ static void handle_rx(struct vhost_net *net) { - struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; + struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_RX].vq; unsigned uninitialized_var(in), log; struct vhost_log *vq_log; struct msghdr msg = { @@ -559,17 +563,26 @@ static int vhost_net_open(struct inode *inode, struct file *f) { struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); struct vhost_dev *dev; + struct vhost_virtqueue **vqs; int r; if (!n) return -ENOMEM; + vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); + if (!vqs) { + kfree(n); + return -ENOMEM; + } dev = &n->dev; - n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick; - n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick; - r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX); + vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq; + vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; + n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick; + n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick; + r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); if (r < 0) { kfree(n); + kfree(vqs); return r; } @@ -584,7 +597,9 @@ static int vhost_net_open(struct inode *inode, struct file *f) static void vhost_net_disable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { - struct vhost_poll *poll = n->poll + (vq - n->vqs); + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); if (!vq->private_data) return; vhost_poll_stop(poll); @@ -593,7 +608,9 @@ static void vhost_net_disable_vq(struct vhost_net *n, static int vhost_net_enable_vq(struct vhost_net *n, struct vhost_virtqueue *vq) { - struct vhost_poll *poll = n->poll + (vq - n->vqs); + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); + struct vhost_poll *poll = n->poll + (nvq - n->vqs); struct socket *sock; sock = rcu_dereference_protected(vq->private_data, @@ -621,30 +638,30 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n, static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, struct socket **rx_sock) { - *tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX); - *rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX); + *tx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_TX].vq); + *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq); } static void vhost_net_flush_vq(struct vhost_net *n, int index) { vhost_poll_flush(n->poll + index); - vhost_poll_flush(&n->dev.vqs[index].poll); + vhost_poll_flush(&n->vqs[index].vq.poll); } static void vhost_net_flush(struct vhost_net *n) { vhost_net_flush_vq(n, VHOST_NET_VQ_TX); vhost_net_flush_vq(n, VHOST_NET_VQ_RX); - if (n->dev.vqs[VHOST_NET_VQ_TX].ubufs) { - mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); + if (n->vqs[VHOST_NET_VQ_TX].vq.ubufs) { + mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = true; - mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); + mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); /* Wait for all lower device DMAs done. */ - vhost_ubuf_put_and_wait(n->dev.vqs[VHOST_NET_VQ_TX].ubufs); - mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); + vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].vq.ubufs); + mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = false; - kref_init(&n->dev.vqs[VHOST_NET_VQ_TX].ubufs->kref); - mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); + kref_init(&n->vqs[VHOST_NET_VQ_TX].vq.ubufs->kref); + mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); } } @@ -665,6 +682,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_net_flush(n); + kfree(n->dev.vqs); kfree(n); return 0; } @@ -750,7 +768,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) r = -ENOBUFS; goto err; } - vq = n->vqs + index; + vq = &n->vqs[index].vq; mutex_lock(&vq->mutex); /* Verify that ring has been setup correctly. */ @@ -870,10 +888,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) n->dev.acked_features = features; smp_wmb(); for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { - mutex_lock(&n->vqs[i].mutex); - n->vqs[i].vhost_hlen = vhost_hlen; - n->vqs[i].sock_hlen = sock_hlen; - mutex_unlock(&n->vqs[i].mutex); + mutex_lock(&n->vqs[i].vq.mutex); + n->vqs[i].vq.vhost_hlen = vhost_hlen; + n->vqs[i].vq.sock_hlen = sock_hlen; + mutex_unlock(&n->vqs[i].vq.mutex); } vhost_net_flush(n); mutex_unlock(&n->dev.mutex); diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c index 1677238d281f..99d3480450e7 100644 --- a/drivers/vhost/tcm_vhost.c +++ b/drivers/vhost/tcm_vhost.c @@ -74,13 +74,17 @@ enum { #define VHOST_SCSI_MAX_VQ 128 #define VHOST_SCSI_MAX_EVENT 128 +struct vhost_scsi_virtqueue { + struct vhost_virtqueue vq; +}; + struct vhost_scsi { /* Protected by vhost_scsi->dev.mutex */ struct tcm_vhost_tpg **vs_tpg; char vs_vhost_wwpn[TRANSPORT_IQN_LEN]; struct vhost_dev dev; - struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ]; + struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ]; struct vhost_work vs_completion_work; /* cmd completion work item */ struct llist_head vs_completion_list; /* cmd completion queue */ @@ -366,7 +370,7 @@ static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs, u32 event, u32 reason) { - struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; + struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; struct tcm_vhost_evt *evt; if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT) { @@ -409,7 +413,7 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd) static void tcm_vhost_do_evt_work(struct vhost_scsi *vs, struct tcm_vhost_evt *evt) { - struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; + struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; struct virtio_scsi_event *event = &evt->event; struct virtio_scsi_event __user *eventp; unsigned out, in; @@ -460,7 +464,7 @@ static void tcm_vhost_evt_work(struct vhost_work *work) { struct vhost_scsi *vs = container_of(work, struct vhost_scsi, vs_event_work); - struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; + struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; struct tcm_vhost_evt *evt; struct llist_node *llnode; @@ -511,8 +515,10 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) v_rsp.sense_len); ret = copy_to_user(tv_cmd->tvc_resp, &v_rsp, sizeof(v_rsp)); if (likely(ret == 0)) { + struct vhost_scsi_virtqueue *q; vhost_add_used(tv_cmd->tvc_vq, tv_cmd->tvc_vq_desc, 0); - vq = tv_cmd->tvc_vq - vs->vqs; + q = container_of(tv_cmd->tvc_vq, struct vhost_scsi_virtqueue, vq); + vq = q - vs->vqs; __set_bit(vq, signal); } else pr_err("Faulted on virtio_scsi_cmd_resp\n"); @@ -523,7 +529,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work) vq = -1; while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1)) < VHOST_SCSI_MAX_VQ) - vhost_signal(&vs->dev, &vs->vqs[vq]); + vhost_signal(&vs->dev, &vs->vqs[vq].vq); } static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd( @@ -938,7 +944,7 @@ static void vhost_scsi_handle_kick(struct vhost_work *work) static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index) { - vhost_poll_flush(&vs->dev.vqs[index].poll); + vhost_poll_flush(&vs->vqs[index].vq.poll); } static void vhost_scsi_flush(struct vhost_scsi *vs) @@ -975,7 +981,7 @@ static int vhost_scsi_set_endpoint( /* Verify that ring has been setup correctly. */ for (index = 0; index < vs->dev.nvqs; ++index) { /* Verify that ring has been setup correctly. */ - if (!vhost_vq_access_ok(&vs->vqs[index])) { + if (!vhost_vq_access_ok(&vs->vqs[index].vq)) { ret = -EFAULT; goto out; } @@ -1022,7 +1028,7 @@ static int vhost_scsi_set_endpoint( memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, sizeof(vs->vs_vhost_wwpn)); for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { - vq = &vs->vqs[i]; + vq = &vs->vqs[i].vq; /* Flushing the vhost_work acts as synchronize_rcu */ mutex_lock(&vq->mutex); rcu_assign_pointer(vq->private_data, vs_tpg); @@ -1063,7 +1069,7 @@ static int vhost_scsi_clear_endpoint( mutex_lock(&vs->dev.mutex); /* Verify that ring has been setup correctly. */ for (index = 0; index < vs->dev.nvqs; ++index) { - if (!vhost_vq_access_ok(&vs->vqs[index])) { + if (!vhost_vq_access_ok(&vs->vqs[index].vq)) { ret = -EFAULT; goto err_dev; } @@ -1103,7 +1109,7 @@ static int vhost_scsi_clear_endpoint( } if (match) { for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { - vq = &vs->vqs[i]; + vq = &vs->vqs[i].vq; /* Flushing the vhost_work acts as synchronize_rcu */ mutex_lock(&vq->mutex); rcu_assign_pointer(vq->private_data, NULL); @@ -1151,24 +1157,36 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features) static int vhost_scsi_open(struct inode *inode, struct file *f) { struct vhost_scsi *s; + struct vhost_virtqueue **vqs; int r, i; s = kzalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; + vqs = kmalloc(VHOST_SCSI_MAX_VQ * sizeof(*vqs), GFP_KERNEL); + if (!vqs) { + kfree(s); + return -ENOMEM; + } + vhost_work_init(&s->vs_completion_work, vhost_scsi_complete_cmd_work); vhost_work_init(&s->vs_event_work, tcm_vhost_evt_work); s->vs_events_nr = 0; s->vs_events_missed = false; - s->vqs[VHOST_SCSI_VQ_CTL].handle_kick = vhost_scsi_ctl_handle_kick; - s->vqs[VHOST_SCSI_VQ_EVT].handle_kick = vhost_scsi_evt_handle_kick; - for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) - s->vqs[i].handle_kick = vhost_scsi_handle_kick; - r = vhost_dev_init(&s->dev, s->vqs, VHOST_SCSI_MAX_VQ); + vqs[VHOST_SCSI_VQ_CTL] = &s->vqs[VHOST_SCSI_VQ_CTL].vq; + vqs[VHOST_SCSI_VQ_EVT] = &s->vqs[VHOST_SCSI_VQ_EVT].vq; + s->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick; + s->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick; + for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) { + vqs[i] = &s->vqs[i].vq; + s->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; + } + r = vhost_dev_init(&s->dev, vqs, VHOST_SCSI_MAX_VQ); if (r < 0) { + kfree(vqs); kfree(s); return r; } @@ -1190,6 +1208,7 @@ static int vhost_scsi_release(struct inode *inode, struct file *f) vhost_dev_cleanup(&s->dev, false); /* Jobs can re-queue themselves in evt kick handler. Do extra flush. */ vhost_scsi_flush(s); + kfree(s->dev.vqs); kfree(s); return 0; } @@ -1205,7 +1224,7 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl, u32 events_missed; u64 features; int r, abi_version = VHOST_SCSI_ABI_VERSION; - struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; + struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; switch (ioctl) { case VHOST_SCSI_SET_ENDPOINT: @@ -1333,7 +1352,7 @@ static void tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg, else reason = VIRTIO_SCSI_EVT_RESET_REMOVED; - vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; + vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq; mutex_lock(&vq->mutex); tcm_vhost_send_evt(vs, tpg, lun, VIRTIO_SCSI_T_TRANSPORT_RESET, reason); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 4eecdb867d53..bef8b6bae186 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -269,27 +269,27 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) bool zcopy; for (i = 0; i < dev->nvqs; ++i) { - dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * + dev->vqs[i]->indirect = kmalloc(sizeof *dev->vqs[i]->indirect * UIO_MAXIOV, GFP_KERNEL); - dev->vqs[i].log = kmalloc(sizeof *dev->vqs[i].log * UIO_MAXIOV, + dev->vqs[i]->log = kmalloc(sizeof *dev->vqs[i]->log * UIO_MAXIOV, GFP_KERNEL); - dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads * + dev->vqs[i]->heads = kmalloc(sizeof *dev->vqs[i]->heads * UIO_MAXIOV, GFP_KERNEL); zcopy = vhost_zcopy_mask & (0x1 << i); if (zcopy) - dev->vqs[i].ubuf_info = - kmalloc(sizeof *dev->vqs[i].ubuf_info * + dev->vqs[i]->ubuf_info = + kmalloc(sizeof *dev->vqs[i]->ubuf_info * UIO_MAXIOV, GFP_KERNEL); - if (!dev->vqs[i].indirect || !dev->vqs[i].log || - !dev->vqs[i].heads || - (zcopy && !dev->vqs[i].ubuf_info)) + if (!dev->vqs[i]->indirect || !dev->vqs[i]->log || + !dev->vqs[i]->heads || + (zcopy && !dev->vqs[i]->ubuf_info)) goto err_nomem; } return 0; err_nomem: for (; i >= 0; --i) - vhost_vq_free_iovecs(&dev->vqs[i]); + vhost_vq_free_iovecs(dev->vqs[i]); return -ENOMEM; } @@ -298,11 +298,11 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) int i; for (i = 0; i < dev->nvqs; ++i) - vhost_vq_free_iovecs(&dev->vqs[i]); + vhost_vq_free_iovecs(dev->vqs[i]); } long vhost_dev_init(struct vhost_dev *dev, - struct vhost_virtqueue *vqs, int nvqs) + struct vhost_virtqueue **vqs, int nvqs) { int i; @@ -318,16 +318,16 @@ long vhost_dev_init(struct vhost_dev *dev, dev->worker = NULL; for (i = 0; i < dev->nvqs; ++i) { - dev->vqs[i].log = NULL; - dev->vqs[i].indirect = NULL; - dev->vqs[i].heads = NULL; - dev->vqs[i].ubuf_info = NULL; - dev->vqs[i].dev = dev; - mutex_init(&dev->vqs[i].mutex); - vhost_vq_reset(dev, dev->vqs + i); - if (dev->vqs[i].handle_kick) - vhost_poll_init(&dev->vqs[i].poll, - dev->vqs[i].handle_kick, POLLIN, dev); + dev->vqs[i]->log = NULL; + dev->vqs[i]->indirect = NULL; + dev->vqs[i]->heads = NULL; + dev->vqs[i]->ubuf_info = NULL; + dev->vqs[i]->dev = dev; + mutex_init(&dev->vqs[i]->mutex); + vhost_vq_reset(dev, dev->vqs[i]); + if (dev->vqs[i]->handle_kick) + vhost_poll_init(&dev->vqs[i]->poll, + dev->vqs[i]->handle_kick, POLLIN, dev); } return 0; @@ -430,9 +430,9 @@ void vhost_dev_stop(struct vhost_dev *dev) int i; for (i = 0; i < dev->nvqs; ++i) { - if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { - vhost_poll_stop(&dev->vqs[i].poll); - vhost_poll_flush(&dev->vqs[i].poll); + if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) { + vhost_poll_stop(&dev->vqs[i]->poll); + vhost_poll_flush(&dev->vqs[i]->poll); } } } @@ -443,17 +443,17 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) int i; for (i = 0; i < dev->nvqs; ++i) { - if (dev->vqs[i].error_ctx) - eventfd_ctx_put(dev->vqs[i].error_ctx); - if (dev->vqs[i].error) - fput(dev->vqs[i].error); - if (dev->vqs[i].kick) - fput(dev->vqs[i].kick); - if (dev->vqs[i].call_ctx) - eventfd_ctx_put(dev->vqs[i].call_ctx); - if (dev->vqs[i].call) - fput(dev->vqs[i].call); - vhost_vq_reset(dev, dev->vqs + i); + if (dev->vqs[i]->error_ctx) + eventfd_ctx_put(dev->vqs[i]->error_ctx); + if (dev->vqs[i]->error) + fput(dev->vqs[i]->error); + if (dev->vqs[i]->kick) + fput(dev->vqs[i]->kick); + if (dev->vqs[i]->call_ctx) + eventfd_ctx_put(dev->vqs[i]->call_ctx); + if (dev->vqs[i]->call) + fput(dev->vqs[i]->call); + vhost_vq_reset(dev, dev->vqs[i]); } vhost_dev_free_iovecs(dev); if (dev->log_ctx) @@ -524,14 +524,14 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, for (i = 0; i < d->nvqs; ++i) { int ok; - mutex_lock(&d->vqs[i].mutex); + mutex_lock(&d->vqs[i]->mutex); /* If ring is inactive, will check when it's enabled. */ - if (d->vqs[i].private_data) - ok = vq_memory_access_ok(d->vqs[i].log_base, mem, + if (d->vqs[i]->private_data) + ok = vq_memory_access_ok(d->vqs[i]->log_base, mem, log_all); else ok = 1; - mutex_unlock(&d->vqs[i].mutex); + mutex_unlock(&d->vqs[i]->mutex); if (!ok) return 0; } @@ -641,7 +641,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) if (idx >= d->nvqs) return -ENOBUFS; - vq = d->vqs + idx; + vq = d->vqs[idx]; mutex_lock(&vq->mutex); @@ -852,7 +852,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) for (i = 0; i < d->nvqs; ++i) { struct vhost_virtqueue *vq; void __user *base = (void __user *)(unsigned long)p; - vq = d->vqs + i; + vq = d->vqs[i]; mutex_lock(&vq->mutex); /* If ring is inactive, will check when it's enabled. */ if (vq->private_data && !vq_log_access_ok(d, vq, base)) @@ -879,9 +879,9 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) } else filep = eventfp; for (i = 0; i < d->nvqs; ++i) { - mutex_lock(&d->vqs[i].mutex); - d->vqs[i].log_ctx = d->log_ctx; - mutex_unlock(&d->vqs[i].mutex); + mutex_lock(&d->vqs[i]->mutex); + d->vqs[i]->log_ctx = d->log_ctx; + mutex_unlock(&d->vqs[i]->mutex); } if (ctx) eventfd_ctx_put(ctx); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 17261e277c02..f3afa8a41fe0 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -150,7 +150,7 @@ struct vhost_dev { struct mm_struct *mm; struct mutex mutex; unsigned acked_features; - struct vhost_virtqueue *vqs; + struct vhost_virtqueue **vqs; int nvqs; struct file *log_file; struct eventfd_ctx *log_ctx; @@ -159,7 +159,7 @@ struct vhost_dev { struct task_struct *worker; }; -long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs); +long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); long vhost_dev_check_owner(struct vhost_dev *); long vhost_dev_reset_owner(struct vhost_dev *); void vhost_dev_cleanup(struct vhost_dev *, bool locked); -- cgit v1.2.3 From 2839400f8fe28ce216eeeba3fb97bdf90977f7ad Mon Sep 17 00:00:00 2001 From: Asias He Date: Sat, 27 Apr 2013 15:07:46 +0800 Subject: vhost: move vhost-net zerocopy fields to net.c On top of 'vhost: Allow device specific fields per vq', we can move device specific fields to device virt queue from vhost virt queue. Signed-off-by: Asias He Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 164 +++++++++++++++++++++++++++++++++++++++++++------- drivers/vhost/vhost.c | 57 +----------------- drivers/vhost/vhost.h | 22 ------- 3 files changed, 142 insertions(+), 101 deletions(-) (limited to 'drivers/vhost/net.c') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 176aa030dc5f..8672e0538d59 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -64,8 +64,24 @@ enum { VHOST_NET_VQ_MAX = 2, }; +struct vhost_ubuf_ref { + struct kref kref; + wait_queue_head_t wait; + struct vhost_virtqueue *vq; +}; + struct vhost_net_virtqueue { struct vhost_virtqueue vq; + /* vhost zerocopy support fields below: */ + /* last used idx for outstanding DMA zerocopy buffers */ + int upend_idx; + /* first used idx for DMA done zerocopy buffers */ + int done_idx; + /* an array of userspace buffers info */ + struct ubuf_info *ubuf_info; + /* Reference counting for outstanding ubufs. + * Protected by vq mutex. Writers must also take device mutex. */ + struct vhost_ubuf_ref *ubufs; }; struct vhost_net { @@ -82,6 +98,88 @@ struct vhost_net { bool tx_flush; }; +static unsigned vhost_zcopy_mask __read_mostly; + +void vhost_enable_zcopy(int vq) +{ + vhost_zcopy_mask |= 0x1 << vq; +} + +static void vhost_zerocopy_done_signal(struct kref *kref) +{ + struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref, + kref); + wake_up(&ubufs->wait); +} + +struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq, + bool zcopy) +{ + struct vhost_ubuf_ref *ubufs; + /* No zero copy backend? Nothing to count. */ + if (!zcopy) + return NULL; + ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL); + if (!ubufs) + return ERR_PTR(-ENOMEM); + kref_init(&ubufs->kref); + init_waitqueue_head(&ubufs->wait); + ubufs->vq = vq; + return ubufs; +} + +void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs) +{ + kref_put(&ubufs->kref, vhost_zerocopy_done_signal); +} + +void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs) +{ + kref_put(&ubufs->kref, vhost_zerocopy_done_signal); + wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); + kfree(ubufs); +} + +int vhost_net_set_ubuf_info(struct vhost_net *n) +{ + bool zcopy; + int i; + + for (i = 0; i < n->dev.nvqs; ++i) { + zcopy = vhost_zcopy_mask & (0x1 << i); + if (!zcopy) + continue; + n->vqs[i].ubuf_info = kmalloc(sizeof(*n->vqs[i].ubuf_info) * + UIO_MAXIOV, GFP_KERNEL); + if (!n->vqs[i].ubuf_info) + goto err; + } + return 0; + +err: + while (i--) { + zcopy = vhost_zcopy_mask & (0x1 << i); + if (!zcopy) + continue; + kfree(n->vqs[i].ubuf_info); + } + return -ENOMEM; +} + +void vhost_net_reset_ubuf_info(struct vhost_net *n) +{ + int i; + + for (i = 0; i < VHOST_NET_VQ_MAX; i++) { + n->vqs[i].done_idx = 0; + n->vqs[i].upend_idx = 0; + n->vqs[i].ubufs = NULL; + kfree(n->vqs[i].ubuf_info); + n->vqs[i].ubuf_info = NULL; + } + +} + static void vhost_net_tx_packet(struct vhost_net *net) { ++net->tx_packets; @@ -157,10 +255,12 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, static int vhost_zerocopy_signal_used(struct vhost_net *net, struct vhost_virtqueue *vq) { + struct vhost_net_virtqueue *nvq = + container_of(vq, struct vhost_net_virtqueue, vq); int i; int j = 0; - for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { + for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) { if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) vhost_net_tx_err(net); if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { @@ -172,7 +272,7 @@ static int vhost_zerocopy_signal_used(struct vhost_net *net, break; } if (j) - vq->done_idx = i; + nvq->done_idx = i; return j; } @@ -203,6 +303,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) static void handle_tx(struct vhost_net *net) { struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_TX].vq; + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; unsigned out, in, s; int head; struct msghdr msg = { @@ -229,7 +330,7 @@ static void handle_tx(struct vhost_net *net) vhost_disable_notify(&net->dev, vq); hdr_size = vq->vhost_hlen; - zcopy = vq->ubufs; + zcopy = nvq->ubufs; for (;;) { /* Release DMAs done buffers first */ @@ -250,9 +351,10 @@ static void handle_tx(struct vhost_net *net) /* If more outstanding DMAs, queue the work. * Handle upend_idx wrap around */ - num_pends = likely(vq->upend_idx >= vq->done_idx) ? - (vq->upend_idx - vq->done_idx) : - (vq->upend_idx + UIO_MAXIOV - vq->done_idx); + num_pends = likely(nvq->upend_idx >= nvq->done_idx) ? + (nvq->upend_idx - nvq->done_idx) : + (nvq->upend_idx + UIO_MAXIOV - + nvq->done_idx); if (unlikely(num_pends > VHOST_MAX_PEND)) break; if (unlikely(vhost_enable_notify(&net->dev, vq))) { @@ -278,34 +380,34 @@ static void handle_tx(struct vhost_net *net) break; } zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN || - vq->upend_idx != vq->done_idx); + nvq->upend_idx != nvq->done_idx); /* use msg_control to pass vhost zerocopy ubuf info to skb */ if (zcopy_used) { - vq->heads[vq->upend_idx].id = head; + vq->heads[nvq->upend_idx].id = head; if (!vhost_net_tx_select_zcopy(net) || len < VHOST_GOODCOPY_LEN) { /* copy don't need to wait for DMA done */ - vq->heads[vq->upend_idx].len = + vq->heads[nvq->upend_idx].len = VHOST_DMA_DONE_LEN; msg.msg_control = NULL; msg.msg_controllen = 0; ubufs = NULL; } else { struct ubuf_info *ubuf; - ubuf = vq->ubuf_info + vq->upend_idx; + ubuf = nvq->ubuf_info + nvq->upend_idx; - vq->heads[vq->upend_idx].len = + vq->heads[nvq->upend_idx].len = VHOST_DMA_IN_PROGRESS; ubuf->callback = vhost_zerocopy_callback; - ubuf->ctx = vq->ubufs; - ubuf->desc = vq->upend_idx; + ubuf->ctx = nvq->ubufs; + ubuf->desc = nvq->upend_idx; msg.msg_control = ubuf; msg.msg_controllen = sizeof(ubuf); - ubufs = vq->ubufs; + ubufs = nvq->ubufs; kref_get(&ubufs->kref); } - vq->upend_idx = (vq->upend_idx + 1) % UIO_MAXIOV; + nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; } /* TODO: Check specific error and bomb out unless ENOBUFS? */ err = sock->ops->sendmsg(NULL, sock, &msg, len); @@ -313,8 +415,8 @@ static void handle_tx(struct vhost_net *net) if (zcopy_used) { if (ubufs) vhost_ubuf_put(ubufs); - vq->upend_idx = ((unsigned)vq->upend_idx - 1) % - UIO_MAXIOV; + nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) + % UIO_MAXIOV; } vhost_discard_vq_desc(vq, 1); break; @@ -564,7 +666,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); struct vhost_dev *dev; struct vhost_virtqueue **vqs; - int r; + int r, i; if (!n) return -ENOMEM; @@ -579,6 +681,12 @@ static int vhost_net_open(struct inode *inode, struct file *f) vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick; n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick; + for (i = 0; i < VHOST_NET_VQ_MAX; i++) { + n->vqs[i].ubufs = NULL; + n->vqs[i].ubuf_info = NULL; + n->vqs[i].upend_idx = 0; + n->vqs[i].done_idx = 0; + } r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); if (r < 0) { kfree(n); @@ -652,15 +760,15 @@ static void vhost_net_flush(struct vhost_net *n) { vhost_net_flush_vq(n, VHOST_NET_VQ_TX); vhost_net_flush_vq(n, VHOST_NET_VQ_RX); - if (n->vqs[VHOST_NET_VQ_TX].vq.ubufs) { + if (n->vqs[VHOST_NET_VQ_TX].ubufs) { mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = true; mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); /* Wait for all lower device DMAs done. */ - vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].vq.ubufs); + vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs); mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); n->tx_flush = false; - kref_init(&n->vqs[VHOST_NET_VQ_TX].vq.ubufs->kref); + kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref); mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); } } @@ -675,6 +783,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) vhost_net_flush(n); vhost_dev_stop(&n->dev); vhost_dev_cleanup(&n->dev, false); + vhost_net_reset_ubuf_info(n); if (tx_sock) fput(tx_sock->file); if (rx_sock) @@ -756,6 +865,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) { struct socket *sock, *oldsock; struct vhost_virtqueue *vq; + struct vhost_net_virtqueue *nvq; struct vhost_ubuf_ref *ubufs, *oldubufs = NULL; int r; @@ -769,6 +879,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) goto err; } vq = &n->vqs[index].vq; + nvq = &n->vqs[index]; mutex_lock(&vq->mutex); /* Verify that ring has been setup correctly. */ @@ -801,8 +912,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) if (r) goto err_used; - oldubufs = vq->ubufs; - vq->ubufs = ubufs; + oldubufs = nvq->ubufs; + nvq->ubufs = ubufs; n->tx_packets = 0; n->tx_zcopy_err = 0; @@ -853,6 +964,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); err = vhost_dev_reset_owner(&n->dev); + vhost_net_reset_ubuf_info(n); done: mutex_unlock(&n->dev.mutex); if (tx_sock) @@ -928,11 +1040,17 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, return vhost_net_reset_owner(n); default: mutex_lock(&n->dev.mutex); + if (ioctl == VHOST_SET_OWNER) { + r = vhost_net_set_ubuf_info(n); + if (r) + goto out; + } r = vhost_dev_ioctl(&n->dev, ioctl, argp); if (r == -ENOIOCTLCMD) r = vhost_vring_ioctl(&n->dev, ioctl, argp); else vhost_net_flush(n); +out: mutex_unlock(&n->dev.mutex); return r; } diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index bef8b6bae186..6644812e99b4 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -33,8 +33,6 @@ enum { VHOST_MEMORY_F_LOG = 0x1, }; -static unsigned vhost_zcopy_mask __read_mostly; - #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) @@ -191,9 +189,6 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->call_ctx = NULL; vq->call = NULL; vq->log_ctx = NULL; - vq->upend_idx = 0; - vq->done_idx = 0; - vq->ubufs = NULL; } static int vhost_worker(void *data) @@ -253,20 +248,12 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) vq->log = NULL; kfree(vq->heads); vq->heads = NULL; - kfree(vq->ubuf_info); - vq->ubuf_info = NULL; -} - -void vhost_enable_zcopy(int vq) -{ - vhost_zcopy_mask |= 0x1 << vq; } /* Helper to allocate iovec buffers for all vqs. */ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) { int i; - bool zcopy; for (i = 0; i < dev->nvqs; ++i) { dev->vqs[i]->indirect = kmalloc(sizeof *dev->vqs[i]->indirect * @@ -275,14 +262,8 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) GFP_KERNEL); dev->vqs[i]->heads = kmalloc(sizeof *dev->vqs[i]->heads * UIO_MAXIOV, GFP_KERNEL); - zcopy = vhost_zcopy_mask & (0x1 << i); - if (zcopy) - dev->vqs[i]->ubuf_info = - kmalloc(sizeof *dev->vqs[i]->ubuf_info * - UIO_MAXIOV, GFP_KERNEL); if (!dev->vqs[i]->indirect || !dev->vqs[i]->log || - !dev->vqs[i]->heads || - (zcopy && !dev->vqs[i]->ubuf_info)) + !dev->vqs[i]->heads) goto err_nomem; } return 0; @@ -321,7 +302,6 @@ long vhost_dev_init(struct vhost_dev *dev, dev->vqs[i]->log = NULL; dev->vqs[i]->indirect = NULL; dev->vqs[i]->heads = NULL; - dev->vqs[i]->ubuf_info = NULL; dev->vqs[i]->dev = dev; mutex_init(&dev->vqs[i]->mutex); vhost_vq_reset(dev, dev->vqs[i]); @@ -1551,38 +1531,3 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) &vq->used->flags, r); } } - -static void vhost_zerocopy_done_signal(struct kref *kref) -{ - struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref, - kref); - wake_up(&ubufs->wait); -} - -struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq, - bool zcopy) -{ - struct vhost_ubuf_ref *ubufs; - /* No zero copy backend? Nothing to count. */ - if (!zcopy) - return NULL; - ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL); - if (!ubufs) - return ERR_PTR(-ENOMEM); - kref_init(&ubufs->kref); - init_waitqueue_head(&ubufs->wait); - ubufs->vq = vq; - return ubufs; -} - -void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs) -{ - kref_put(&ubufs->kref, vhost_zerocopy_done_signal); -} - -void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs) -{ - kref_put(&ubufs->kref, vhost_zerocopy_done_signal); - wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); - kfree(ubufs); -} diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index f3afa8a41fe0..3a36712e0792 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -54,18 +54,6 @@ struct vhost_log { struct vhost_virtqueue; -struct vhost_ubuf_ref { - struct kref kref; - wait_queue_head_t wait; - struct vhost_virtqueue *vq; -}; - -struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy); -void vhost_ubuf_put(struct vhost_ubuf_ref *); -void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *); - -struct ubuf_info; - /* The virtqueue structure describes a queue attached to a device. */ struct vhost_virtqueue { struct vhost_dev *dev; @@ -130,16 +118,6 @@ struct vhost_virtqueue { /* Log write descriptors */ void __user *log_base; struct vhost_log *log; - /* vhost zerocopy support fields below: */ - /* last used idx for outstanding DMA zerocopy buffers */ - int upend_idx; - /* first used idx for DMA done zerocopy buffers */ - int done_idx; - /* an array of userspace buffers info */ - struct ubuf_info *ubuf_info; - /* Reference counting for outstanding ubufs. - * Protected by vq mutex. Writers must also take device mutex. */ - struct vhost_ubuf_ref *ubufs; }; struct vhost_dev { -- cgit v1.2.3 From 81f95a55802be669b3191b2828c34006d0f04214 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Apr 2013 15:51:40 +0300 Subject: vhost: move per-vq net specific fields out to net This will remove the need for vhost scsi to pull in virtio-net.h. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 43 +++++++++++++++++++++++++++---------------- drivers/vhost/vhost.c | 2 -- drivers/vhost/vhost.h | 3 --- 3 files changed, 27 insertions(+), 21 deletions(-) (limited to 'drivers/vhost/net.c') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 8672e0538d59..e34e195b9cf6 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -72,6 +72,12 @@ struct vhost_ubuf_ref { struct vhost_net_virtqueue { struct vhost_virtqueue vq; + /* hdr is used to store the virtio header. + * Since each iovec has >= 1 byte length, we never need more than + * header length entries to store the header. */ + struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)]; + size_t vhost_hlen; + size_t sock_hlen; /* vhost zerocopy support fields below: */ /* last used idx for outstanding DMA zerocopy buffers */ int upend_idx; @@ -166,7 +172,7 @@ err: return -ENOMEM; } -void vhost_net_reset_ubuf_info(struct vhost_net *n) +void vhost_net_vq_reset(struct vhost_net *n) { int i; @@ -176,6 +182,8 @@ void vhost_net_reset_ubuf_info(struct vhost_net *n) n->vqs[i].ubufs = NULL; kfree(n->vqs[i].ubuf_info); n->vqs[i].ubuf_info = NULL; + n->vqs[i].vhost_hlen = 0; + n->vqs[i].sock_hlen = 0; } } @@ -302,8 +310,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) * read-size critical section for our kind of RCU. */ static void handle_tx(struct vhost_net *net) { - struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_TX].vq; struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; + struct vhost_virtqueue *vq = &nvq->vq; unsigned out, in, s; int head; struct msghdr msg = { @@ -329,7 +337,7 @@ static void handle_tx(struct vhost_net *net) mutex_lock(&vq->mutex); vhost_disable_notify(&net->dev, vq); - hdr_size = vq->vhost_hlen; + hdr_size = nvq->vhost_hlen; zcopy = nvq->ubufs; for (;;) { @@ -369,14 +377,14 @@ static void handle_tx(struct vhost_net *net) break; } /* Skip header. TODO: support TSO. */ - s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out); + s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out); msg.msg_iovlen = out; len = iov_length(vq->iov, out); /* Sanity check */ if (!len) { vq_err(vq, "Unexpected header len for TX: " "%zd expected %zd\n", - iov_length(vq->hdr, s), hdr_size); + iov_length(nvq->hdr, s), hdr_size); break; } zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN || @@ -523,7 +531,8 @@ err: * read-size critical section for our kind of RCU. */ static void handle_rx(struct vhost_net *net) { - struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_RX].vq; + struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX]; + struct vhost_virtqueue *vq = &nvq->vq; unsigned uninitialized_var(in), log; struct vhost_log *vq_log; struct msghdr msg = { @@ -551,8 +560,8 @@ static void handle_rx(struct vhost_net *net) mutex_lock(&vq->mutex); vhost_disable_notify(&net->dev, vq); - vhost_hlen = vq->vhost_hlen; - sock_hlen = vq->sock_hlen; + vhost_hlen = nvq->vhost_hlen; + sock_hlen = nvq->sock_hlen; vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? vq->log : NULL; @@ -582,11 +591,11 @@ static void handle_rx(struct vhost_net *net) /* We don't need to be notified again. */ if (unlikely((vhost_hlen))) /* Skip header. TODO: support TSO. */ - move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); + move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in); else /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: * needed because recvmsg can modify msg_iov. */ - copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in); + copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in); msg.msg_iovlen = in; err = sock->ops->recvmsg(NULL, sock, &msg, sock_len, MSG_DONTWAIT | MSG_TRUNC); @@ -600,7 +609,7 @@ static void handle_rx(struct vhost_net *net) continue; } if (unlikely(vhost_hlen) && - memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0, + memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0, vhost_hlen)) { vq_err(vq, "Unable to write vnet_hdr at addr %p\n", vq->iov->iov_base); @@ -608,7 +617,7 @@ static void handle_rx(struct vhost_net *net) } /* TODO: Should check and handle checksum. */ if (likely(mergeable) && - memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, + memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount, offsetof(typeof(hdr), num_buffers), sizeof hdr.num_buffers)) { vq_err(vq, "Failed num_buffers write"); @@ -686,6 +695,8 @@ static int vhost_net_open(struct inode *inode, struct file *f) n->vqs[i].ubuf_info = NULL; n->vqs[i].upend_idx = 0; n->vqs[i].done_idx = 0; + n->vqs[i].vhost_hlen = 0; + n->vqs[i].sock_hlen = 0; } r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); if (r < 0) { @@ -783,7 +794,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) vhost_net_flush(n); vhost_dev_stop(&n->dev); vhost_dev_cleanup(&n->dev, false); - vhost_net_reset_ubuf_info(n); + vhost_net_vq_reset(n); if (tx_sock) fput(tx_sock->file); if (rx_sock) @@ -964,7 +975,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); err = vhost_dev_reset_owner(&n->dev); - vhost_net_reset_ubuf_info(n); + vhost_net_vq_reset(n); done: mutex_unlock(&n->dev.mutex); if (tx_sock) @@ -1001,8 +1012,8 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features) smp_wmb(); for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { mutex_lock(&n->vqs[i].vq.mutex); - n->vqs[i].vq.vhost_hlen = vhost_hlen; - n->vqs[i].vq.sock_hlen = sock_hlen; + n->vqs[i].vhost_hlen = vhost_hlen; + n->vqs[i].sock_hlen = sock_hlen; mutex_unlock(&n->vqs[i].vq.mutex); } vhost_net_flush(n); diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 6644812e99b4..6dcd81c87432 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -179,8 +179,6 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->used_flags = 0; vq->log_used = false; vq->log_addr = -1ull; - vq->vhost_hlen = 0; - vq->sock_hlen = 0; vq->private_data = NULL; vq->log_base = NULL; vq->error_ctx = NULL; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 3a36712e0792..1627eec0ca25 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -102,10 +102,7 @@ struct vhost_virtqueue { /* hdr is used to store the virtio header. * Since each iovec has >= 1 byte length, we never need more than * header length entries to store the header. */ - struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)]; struct iovec *indirect; - size_t vhost_hlen; - size_t sock_hlen; struct vring_used_elem *heads; /* We use a kind of RCU to access private pointer. * All readers access it from worker, which makes it possible to -- cgit v1.2.3 From 150b9e51ae975ca1fe468c565870fbc4a96e0574 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 28 Apr 2013 17:12:08 +0300 Subject: vhost: fix error handling in RESET_OWNER ioctl RESET_OWNER ioctl would leave the fd in a bad state if memory allocation failed: device is stopped but owner is not reset. Make state changes after allocating memory, such that a failed ioctl has no effect. Signed-off-by: Michael S. Tsirkin --- drivers/vhost/net.c | 8 +++++++- drivers/vhost/test.c | 9 ++++++++- drivers/vhost/vhost.c | 16 +++++++--------- drivers/vhost/vhost.h | 3 ++- 4 files changed, 24 insertions(+), 12 deletions(-) (limited to 'drivers/vhost/net.c') diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e34e195b9cf6..a3645bd163d8 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -967,14 +967,20 @@ static long vhost_net_reset_owner(struct vhost_net *n) struct socket *tx_sock = NULL; struct socket *rx_sock = NULL; long err; + struct vhost_memory *memory; mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; + memory = vhost_dev_reset_owner_prepare(); + if (!memory) { + err = -ENOMEM; + goto done; + } vhost_net_stop(n, &tx_sock, &rx_sock); vhost_net_flush(n); - err = vhost_dev_reset_owner(&n->dev); + vhost_dev_reset_owner(&n->dev, memory); vhost_net_vq_reset(n); done: mutex_unlock(&n->dev.mutex); diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 91d6f060aade..be65414d5bb1 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -219,13 +219,20 @@ static long vhost_test_reset_owner(struct vhost_test *n) { void *priv = NULL; long err; + struct vhost_memory *memory; + mutex_lock(&n->dev.mutex); err = vhost_dev_check_owner(&n->dev); if (err) goto done; + memory = vhost_dev_reset_owner_prepare(); + if (!memory) { + err = -ENOMEM; + goto done; + } vhost_test_stop(n, &priv); vhost_test_flush(n); - err = vhost_dev_reset_owner(&n->dev); + vhost_dev_reset_owner(&n->dev, memory); done: mutex_unlock(&n->dev.mutex); return err; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 6dcd81c87432..749b5ab5bfbb 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -386,21 +386,19 @@ err_mm: return err; } -/* Caller should have device mutex */ -long vhost_dev_reset_owner(struct vhost_dev *dev) +struct vhost_memory *vhost_dev_reset_owner_prepare(void) { - struct vhost_memory *memory; - - /* Restore memory to default empty mapping. */ - memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); - if (!memory) - return -ENOMEM; + return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL); +} +/* Caller should have device mutex */ +void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory) +{ vhost_dev_cleanup(dev, true); + /* Restore memory to default empty mapping. */ memory->nregions = 0; RCU_INIT_POINTER(dev->memory, memory); - return 0; } void vhost_dev_stop(struct vhost_dev *dev) diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 1627eec0ca25..b58f4ae82cb8 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -136,7 +136,8 @@ struct vhost_dev { long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); long vhost_dev_check_owner(struct vhost_dev *); -long vhost_dev_reset_owner(struct vhost_dev *); +struct vhost_memory *vhost_dev_reset_owner_prepare(void); +void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *); void vhost_dev_cleanup(struct vhost_dev *, bool locked); void vhost_dev_stop(struct vhost_dev *); long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); -- cgit v1.2.3