diff options
Diffstat (limited to 'drivers/vhost/vhost.c')
| -rw-r--r-- | drivers/vhost/vhost.c | 213 | 
1 files changed, 166 insertions, 47 deletions
| diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ea966b356352..c14c42b95ab8 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -37,6 +37,8 @@ enum {  	VHOST_MEMORY_F_LOG = 0x1,  }; +static unsigned vhost_zcopy_mask __read_mostly; +  #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])  #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) @@ -179,6 +181,9 @@ static void vhost_vq_reset(struct vhost_dev *dev,  	vq->call_ctx = NULL;  	vq->call = NULL;  	vq->log_ctx = NULL; +	vq->upend_idx = 0; +	vq->done_idx = 0; +	vq->ubufs = NULL;  }  static int vhost_worker(void *data) @@ -225,10 +230,28 @@ static int vhost_worker(void *data)  	return 0;  } +static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) +{ +	kfree(vq->indirect); +	vq->indirect = NULL; +	kfree(vq->log); +	vq->log = NULL; +	kfree(vq->heads); +	vq->heads = NULL; +	kfree(vq->ubuf_info); +	vq->ubuf_info = NULL; +} + +void vhost_enable_zcopy(int vq) +{ +	vhost_zcopy_mask |= 0x1 << vq; +} +  /* Helper to allocate iovec buffers for all vqs. */  static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)  {  	int i; +	bool zcopy;  	for (i = 0; i < dev->nvqs; ++i) {  		dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * @@ -237,19 +260,21 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)  					  GFP_KERNEL);  		dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads *  					    UIO_MAXIOV, GFP_KERNEL); - +		zcopy = vhost_zcopy_mask & (0x1 << i); +		if (zcopy) +			dev->vqs[i].ubuf_info = +				kmalloc(sizeof *dev->vqs[i].ubuf_info * +					UIO_MAXIOV, GFP_KERNEL);  		if (!dev->vqs[i].indirect || !dev->vqs[i].log || -			!dev->vqs[i].heads) +			!dev->vqs[i].heads || +			(zcopy && !dev->vqs[i].ubuf_info))  			goto err_nomem;  	}  	return 0;  err_nomem: -	for (; i >= 0; --i) { -		kfree(dev->vqs[i].indirect); -		kfree(dev->vqs[i].log); -		kfree(dev->vqs[i].heads); -	} +	for (; i >= 0; --i) +		vhost_vq_free_iovecs(&dev->vqs[i]);  	return -ENOMEM;  } @@ -257,14 +282,8 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)  {  	int i; -	for (i = 0; i < dev->nvqs; ++i) { -		kfree(dev->vqs[i].indirect); -		dev->vqs[i].indirect = NULL; -		kfree(dev->vqs[i].log); -		dev->vqs[i].log = NULL; -		kfree(dev->vqs[i].heads); -		dev->vqs[i].heads = NULL; -	} +	for (i = 0; i < dev->nvqs; ++i) +		vhost_vq_free_iovecs(&dev->vqs[i]);  }  long vhost_dev_init(struct vhost_dev *dev, @@ -287,6 +306,7 @@ long vhost_dev_init(struct vhost_dev *dev,  		dev->vqs[i].log = NULL;  		dev->vqs[i].indirect = NULL;  		dev->vqs[i].heads = NULL; +		dev->vqs[i].ubuf_info = NULL;  		dev->vqs[i].dev = dev;  		mutex_init(&dev->vqs[i].mutex);  		vhost_vq_reset(dev, dev->vqs + i); @@ -390,6 +410,30 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)  	return 0;  } +/* In case of DMA done not in order in lower device driver for some reason. + * upend_idx is used to track end of used idx, done_idx is used to track head + * of used idx. Once lower device DMA done contiguously, we will signal KVM + * guest used idx. + */ +int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq) +{ +	int i; +	int j = 0; + +	for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { +		if ((vq->heads[i].len == VHOST_DMA_DONE_LEN)) { +			vq->heads[i].len = VHOST_DMA_CLEAR_LEN; +			vhost_add_used_and_signal(vq->dev, vq, +						  vq->heads[i].id, 0); +			++j; +		} else +			break; +	} +	if (j) +		vq->done_idx = i; +	return j; +} +  /* Caller should have device mutex */  void vhost_dev_cleanup(struct vhost_dev *dev)  { @@ -400,6 +444,13 @@ void vhost_dev_cleanup(struct vhost_dev *dev)  			vhost_poll_stop(&dev->vqs[i].poll);  			vhost_poll_flush(&dev->vqs[i].poll);  		} +		/* Wait for all lower device DMAs done. */ +		if (dev->vqs[i].ubufs) +			vhost_ubuf_put_and_wait(dev->vqs[i].ubufs); + +		/* Signal guest as appropriate. */ +		vhost_zerocopy_signal_used(&dev->vqs[i]); +  		if (dev->vqs[i].error_ctx)  			eventfd_ctx_put(dev->vqs[i].error_ctx);  		if (dev->vqs[i].error) @@ -578,17 +629,6 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)  	return 0;  } -static int init_used(struct vhost_virtqueue *vq, -		     struct vring_used __user *used) -{ -	int r = put_user(vq->used_flags, &used->flags); - -	if (r) -		return r; -	vq->signalled_used_valid = false; -	return get_user(vq->last_used_idx, &used->idx); -} -  static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)  {  	struct file *eventfp, *filep = NULL, @@ -701,10 +741,6 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)  			}  		} -		r = init_used(vq, (struct vring_used __user *)(unsigned long) -			      a.used_user_addr); -		if (r) -			break;  		vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));  		vq->desc = (void __user *)(unsigned long)a.desc_user_addr;  		vq->avail = (void __user *)(unsigned long)a.avail_user_addr; @@ -959,6 +995,57 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,  	return 0;  } +static int vhost_update_used_flags(struct vhost_virtqueue *vq) +{ +	void __user *used; +	if (__put_user(vq->used_flags, &vq->used->flags) < 0) +		return -EFAULT; +	if (unlikely(vq->log_used)) { +		/* Make sure the flag is seen before log. */ +		smp_wmb(); +		/* Log used flag write. */ +		used = &vq->used->flags; +		log_write(vq->log_base, vq->log_addr + +			  (used - (void __user *)vq->used), +			  sizeof vq->used->flags); +		if (vq->log_ctx) +			eventfd_signal(vq->log_ctx, 1); +	} +	return 0; +} + +static int vhost_update_avail_event(struct vhost_virtqueue *vq, u16 avail_event) +{ +	if (__put_user(vq->avail_idx, vhost_avail_event(vq))) +		return -EFAULT; +	if (unlikely(vq->log_used)) { +		void __user *used; +		/* Make sure the event is seen before log. */ +		smp_wmb(); +		/* Log avail event write */ +		used = vhost_avail_event(vq); +		log_write(vq->log_base, vq->log_addr + +			  (used - (void __user *)vq->used), +			  sizeof *vhost_avail_event(vq)); +		if (vq->log_ctx) +			eventfd_signal(vq->log_ctx, 1); +	} +	return 0; +} + +int vhost_init_used(struct vhost_virtqueue *vq) +{ +	int r; +	if (!vq->private_data) +		return 0; + +	r = vhost_update_used_flags(vq); +	if (r) +		return r; +	vq->signalled_used_valid = false; +	return get_user(vq->last_used_idx, &vq->used->idx); +} +  static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,  			  struct iovec iov[], int iov_size)  { @@ -1430,34 +1517,20 @@ bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)  		return false;  	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;  	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { -		r = put_user(vq->used_flags, &vq->used->flags); +		r = vhost_update_used_flags(vq);  		if (r) {  			vq_err(vq, "Failed to enable notification at %p: %d\n",  			       &vq->used->flags, r);  			return false;  		}  	} else { -		r = put_user(vq->avail_idx, vhost_avail_event(vq)); +		r = vhost_update_avail_event(vq, vq->avail_idx);  		if (r) {  			vq_err(vq, "Failed to update avail event index at %p: %d\n",  			       vhost_avail_event(vq), r);  			return false;  		}  	} -	if (unlikely(vq->log_used)) { -		void __user *used; -		/* Make sure data is seen before log. */ -		smp_wmb(); -		used = vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ? -			&vq->used->flags : vhost_avail_event(vq); -		/* Log used flags or event index entry write. Both are 16 bit -		 * fields. */ -		log_write(vq->log_base, vq->log_addr + -			   (used - (void __user *)vq->used), -			  sizeof(u16)); -		if (vq->log_ctx) -			eventfd_signal(vq->log_ctx, 1); -	}  	/* They could have slipped one in as we were doing that: make  	 * sure it's written, then check again. */  	smp_mb(); @@ -1480,9 +1553,55 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)  		return;  	vq->used_flags |= VRING_USED_F_NO_NOTIFY;  	if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { -		r = put_user(vq->used_flags, &vq->used->flags); +		r = vhost_update_used_flags(vq);  		if (r)  			vq_err(vq, "Failed to enable notification at %p: %d\n",  			       &vq->used->flags, r);  	}  } + +static void vhost_zerocopy_done_signal(struct kref *kref) +{ +	struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref, +						    kref); +	wake_up(&ubufs->wait); +} + +struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq, +					bool zcopy) +{ +	struct vhost_ubuf_ref *ubufs; +	/* No zero copy backend? Nothing to count. */ +	if (!zcopy) +		return NULL; +	ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL); +	if (!ubufs) +		return ERR_PTR(-ENOMEM); +	kref_init(&ubufs->kref); +	init_waitqueue_head(&ubufs->wait); +	ubufs->vq = vq; +	return ubufs; +} + +void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs) +{ +	kref_put(&ubufs->kref, vhost_zerocopy_done_signal); +} + +void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs) +{ +	kref_put(&ubufs->kref, vhost_zerocopy_done_signal); +	wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); +	kfree(ubufs); +} + +void vhost_zerocopy_callback(void *arg) +{ +	struct ubuf_info *ubuf = arg; +	struct vhost_ubuf_ref *ubufs = ubuf->arg; +	struct vhost_virtqueue *vq = ubufs->vq; + +	/* set len = 1 to mark this desc buffers done DMA */ +	vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; +	kref_put(&ubufs->kref, vhost_zerocopy_done_signal); +} | 
