diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dlm/lowcomms.c | 10 | ||||
-rw-r--r-- | fs/netfs/iterator.c | 266 | ||||
-rw-r--r-- | fs/nfsd/vfs.c | 2 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 38 | ||||
-rw-r--r-- | fs/smb/client/smb2ops.c | 4 | ||||
-rw-r--r-- | fs/smb/client/smbdirect.c | 2 | ||||
-rw-r--r-- | fs/splice.c | 205 |
7 files changed, 197 insertions, 330 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 3d3802c47b8b..5c12d8cdfc16 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -1395,8 +1395,11 @@ int dlm_lowcomms_resend_msg(struct dlm_msg *msg) /* Send a message */ static int send_to_sock(struct connection *con) { - const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; struct writequeue_entry *e; + struct bio_vec bvec; + struct msghdr msg = { + .msg_flags = MSG_SPLICE_PAGES | MSG_DONTWAIT | MSG_NOSIGNAL, + }; int len, offset, ret; spin_lock_bh(&con->writequeue_lock); @@ -1412,8 +1415,9 @@ static int send_to_sock(struct connection *con) WARN_ON_ONCE(len == 0 && e->users == 0); spin_unlock_bh(&con->writequeue_lock); - ret = kernel_sendpage(con->sock, e->page, offset, len, - msg_flags); + bvec_set_page(&bvec, e->page, len, offset); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bvec, 1, len); + ret = sock_sendmsg(con->sock, &msg); trace_dlm_send(con->nodeid, ret); if (ret == -EAGAIN || ret == 0) { lock_sock(con->sock->sk); diff --git a/fs/netfs/iterator.c b/fs/netfs/iterator.c index 8a4c86687429..2ff07ba655a0 100644 --- a/fs/netfs/iterator.c +++ b/fs/netfs/iterator.c @@ -101,269 +101,3 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len, return npages; } EXPORT_SYMBOL_GPL(netfs_extract_user_iter); - -/* - * Extract and pin a list of up to sg_max pages from UBUF- or IOVEC-class - * iterators, and add them to the scatterlist. - */ -static ssize_t netfs_extract_user_to_sg(struct iov_iter *iter, - ssize_t maxsize, - struct sg_table *sgtable, - unsigned int sg_max, - iov_iter_extraction_t extraction_flags) -{ - struct scatterlist *sg = sgtable->sgl + sgtable->nents; - struct page **pages; - unsigned int npages; - ssize_t ret = 0, res; - size_t len, off; - - /* We decant the page list into the tail of the scatterlist */ - pages = (void *)sgtable->sgl + array_size(sg_max, sizeof(struct scatterlist)); - pages -= sg_max; - - do { - res = iov_iter_extract_pages(iter, &pages, maxsize, sg_max, - extraction_flags, &off); - if (res < 0) - goto failed; - - len = res; - maxsize -= len; - ret += len; - npages = DIV_ROUND_UP(off + len, PAGE_SIZE); - sg_max -= npages; - - for (; npages > 0; npages--) { - struct page *page = *pages; - size_t seg = min_t(size_t, PAGE_SIZE - off, len); - - *pages++ = NULL; - sg_set_page(sg, page, seg, off); - sgtable->nents++; - sg++; - len -= seg; - off = 0; - } - } while (maxsize > 0 && sg_max > 0); - - return ret; - -failed: - while (sgtable->nents > sgtable->orig_nents) - put_page(sg_page(&sgtable->sgl[--sgtable->nents])); - return res; -} - -/* - * Extract up to sg_max pages from a BVEC-type iterator and add them to the - * scatterlist. The pages are not pinned. - */ -static ssize_t netfs_extract_bvec_to_sg(struct iov_iter *iter, - ssize_t maxsize, - struct sg_table *sgtable, - unsigned int sg_max, - iov_iter_extraction_t extraction_flags) -{ - const struct bio_vec *bv = iter->bvec; - struct scatterlist *sg = sgtable->sgl + sgtable->nents; - unsigned long start = iter->iov_offset; - unsigned int i; - ssize_t ret = 0; - - for (i = 0; i < iter->nr_segs; i++) { - size_t off, len; - - len = bv[i].bv_len; - if (start >= len) { - start -= len; - continue; - } - - len = min_t(size_t, maxsize, len - start); - off = bv[i].bv_offset + start; - - sg_set_page(sg, bv[i].bv_page, len, off); - sgtable->nents++; - sg++; - sg_max--; - - ret += len; - maxsize -= len; - if (maxsize <= 0 || sg_max == 0) - break; - start = 0; - } - - if (ret > 0) - iov_iter_advance(iter, ret); - return ret; -} - -/* - * Extract up to sg_max pages from a KVEC-type iterator and add them to the - * scatterlist. This can deal with vmalloc'd buffers as well as kmalloc'd or - * static buffers. The pages are not pinned. - */ -static ssize_t netfs_extract_kvec_to_sg(struct iov_iter *iter, - ssize_t maxsize, - struct sg_table *sgtable, - unsigned int sg_max, - iov_iter_extraction_t extraction_flags) -{ - const struct kvec *kv = iter->kvec; - struct scatterlist *sg = sgtable->sgl + sgtable->nents; - unsigned long start = iter->iov_offset; - unsigned int i; - ssize_t ret = 0; - - for (i = 0; i < iter->nr_segs; i++) { - struct page *page; - unsigned long kaddr; - size_t off, len, seg; - - len = kv[i].iov_len; - if (start >= len) { - start -= len; - continue; - } - - kaddr = (unsigned long)kv[i].iov_base + start; - off = kaddr & ~PAGE_MASK; - len = min_t(size_t, maxsize, len - start); - kaddr &= PAGE_MASK; - - maxsize -= len; - ret += len; - do { - seg = min_t(size_t, len, PAGE_SIZE - off); - if (is_vmalloc_or_module_addr((void *)kaddr)) - page = vmalloc_to_page((void *)kaddr); - else - page = virt_to_page(kaddr); - - sg_set_page(sg, page, len, off); - sgtable->nents++; - sg++; - sg_max--; - - len -= seg; - kaddr += PAGE_SIZE; - off = 0; - } while (len > 0 && sg_max > 0); - - if (maxsize <= 0 || sg_max == 0) - break; - start = 0; - } - - if (ret > 0) - iov_iter_advance(iter, ret); - return ret; -} - -/* - * Extract up to sg_max folios from an XARRAY-type iterator and add them to - * the scatterlist. The pages are not pinned. - */ -static ssize_t netfs_extract_xarray_to_sg(struct iov_iter *iter, - ssize_t maxsize, - struct sg_table *sgtable, - unsigned int sg_max, - iov_iter_extraction_t extraction_flags) -{ - struct scatterlist *sg = sgtable->sgl + sgtable->nents; - struct xarray *xa = iter->xarray; - struct folio *folio; - loff_t start = iter->xarray_start + iter->iov_offset; - pgoff_t index = start / PAGE_SIZE; - ssize_t ret = 0; - size_t offset, len; - XA_STATE(xas, xa, index); - - rcu_read_lock(); - - xas_for_each(&xas, folio, ULONG_MAX) { - if (xas_retry(&xas, folio)) - continue; - if (WARN_ON(xa_is_value(folio))) - break; - if (WARN_ON(folio_test_hugetlb(folio))) - break; - - offset = offset_in_folio(folio, start); - len = min_t(size_t, maxsize, folio_size(folio) - offset); - - sg_set_page(sg, folio_page(folio, 0), len, offset); - sgtable->nents++; - sg++; - sg_max--; - - maxsize -= len; - ret += len; - if (maxsize <= 0 || sg_max == 0) - break; - } - - rcu_read_unlock(); - if (ret > 0) - iov_iter_advance(iter, ret); - return ret; -} - -/** - * netfs_extract_iter_to_sg - Extract pages from an iterator and add ot an sglist - * @iter: The iterator to extract from - * @maxsize: The amount of iterator to copy - * @sgtable: The scatterlist table to fill in - * @sg_max: Maximum number of elements in @sgtable that may be filled - * @extraction_flags: Flags to qualify the request - * - * Extract the page fragments from the given amount of the source iterator and - * add them to a scatterlist that refers to all of those bits, to a maximum - * addition of @sg_max elements. - * - * The pages referred to by UBUF- and IOVEC-type iterators are extracted and - * pinned; BVEC-, KVEC- and XARRAY-type are extracted but aren't pinned; PIPE- - * and DISCARD-type are not supported. - * - * No end mark is placed on the scatterlist; that's left to the caller. - * - * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA - * be allowed on the pages extracted. - * - * If successul, @sgtable->nents is updated to include the number of elements - * added and the number of bytes added is returned. @sgtable->orig_nents is - * left unaltered. - * - * The iov_iter_extract_mode() function should be used to query how cleanup - * should be performed. - */ -ssize_t netfs_extract_iter_to_sg(struct iov_iter *iter, size_t maxsize, - struct sg_table *sgtable, unsigned int sg_max, - iov_iter_extraction_t extraction_flags) -{ - if (maxsize == 0) - return 0; - - switch (iov_iter_type(iter)) { - case ITER_UBUF: - case ITER_IOVEC: - return netfs_extract_user_to_sg(iter, maxsize, sgtable, sg_max, - extraction_flags); - case ITER_BVEC: - return netfs_extract_bvec_to_sg(iter, maxsize, sgtable, sg_max, - extraction_flags); - case ITER_KVEC: - return netfs_extract_kvec_to_sg(iter, maxsize, sgtable, sg_max, - extraction_flags); - case ITER_XARRAY: - return netfs_extract_xarray_to_sg(iter, maxsize, sgtable, sg_max, - extraction_flags); - default: - pr_err("%s(%u) unsupported\n", __func__, iov_iter_type(iter)); - WARN_ON_ONCE(1); - return -EIO; - } -} -EXPORT_SYMBOL_GPL(netfs_extract_iter_to_sg); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 59b7d60ae33e..8a2321d19194 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -938,7 +938,7 @@ nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, /* * Grab and keep cached pages associated with a file in the svc_rqst - * so that they can be passed to the network sendmsg/sendpage routines + * so that they can be passed to the network sendmsg routines * directly. They will be released after the sending has completed. * * Return values: Number of bytes consumed, or -EIO if there are no diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index aecbd712a00c..960080753d3b 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -930,19 +930,22 @@ out: } static void o2net_sendpage(struct o2net_sock_container *sc, - void *kmalloced_virt, - size_t size) + void *virt, size_t size) { struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); + struct msghdr msg = {}; + struct bio_vec bv; ssize_t ret; + bvec_set_virt(&bv, virt, size); + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, &bv, 1, size); + while (1) { + msg.msg_flags = MSG_DONTWAIT | MSG_SPLICE_PAGES; mutex_lock(&sc->sc_send_lock); - ret = sc->sc_sock->ops->sendpage(sc->sc_sock, - virt_to_page(kmalloced_virt), - offset_in_page(kmalloced_virt), - size, MSG_DONTWAIT); + ret = sock_sendmsg(sc->sc_sock, &msg); mutex_unlock(&sc->sc_send_lock); + if (ret == size) break; if (ret == (ssize_t)-EAGAIN) { @@ -2087,18 +2090,24 @@ void o2net_stop_listening(struct o2nm_node *node) int o2net_init(void) { + struct folio *folio; + void *p; unsigned long i; o2quo_init(); - o2net_debugfs_init(); - o2net_hand = kzalloc(sizeof(struct o2net_handshake), GFP_KERNEL); - o2net_keep_req = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); - o2net_keep_resp = kzalloc(sizeof(struct o2net_msg), GFP_KERNEL); - if (!o2net_hand || !o2net_keep_req || !o2net_keep_resp) + folio = folio_alloc(GFP_KERNEL | __GFP_ZERO, 0); + if (!folio) goto out; + p = folio_address(folio); + o2net_hand = p; + p += sizeof(struct o2net_handshake); + o2net_keep_req = p; + p += sizeof(struct o2net_msg); + o2net_keep_resp = p; + o2net_hand->protocol_version = cpu_to_be64(O2NET_PROTOCOL_VERSION); o2net_hand->connector_id = cpu_to_be64(1); @@ -2124,9 +2133,6 @@ int o2net_init(void) return 0; out: - kfree(o2net_hand); - kfree(o2net_keep_req); - kfree(o2net_keep_resp); o2net_debugfs_exit(); o2quo_exit(); return -ENOMEM; @@ -2135,8 +2141,6 @@ out: void o2net_exit(void) { o2quo_exit(); - kfree(o2net_hand); - kfree(o2net_keep_req); - kfree(o2net_keep_resp); o2net_debugfs_exit(); + folio_put(virt_to_folio(o2net_hand)); } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index a8bb9d00d33a..5639d8c48570 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -4373,8 +4373,8 @@ static void *smb2_get_aead_req(struct crypto_aead *tfm, struct smb_rqst *rqst, } sgtable.orig_nents = sgtable.nents; - rc = netfs_extract_iter_to_sg(iter, count, &sgtable, - num_sgs - sgtable.nents, 0); + rc = extract_iter_to_sg(iter, count, &sgtable, + num_sgs - sgtable.nents, 0); iov_iter_revert(iter, rc); sgtable.orig_nents = sgtable.nents; } diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index 0362ebd4fa0f..223e17c16b60 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -2227,7 +2227,7 @@ static int smbd_iter_to_mr(struct smbd_connection *info, memset(sgt->sgl, 0, max_sg * sizeof(struct scatterlist)); - ret = netfs_extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); + ret = extract_iter_to_sg(iter, iov_iter_count(iter), sgt, max_sg, 0); WARN_ON(ret < 0); if (sgt->nents > 0) sg_mark_end(&sgt->sgl[sgt->nents - 1]); diff --git a/fs/splice.c b/fs/splice.c index 7a9565d8ec4f..004eb1c4ce31 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -33,6 +33,7 @@ #include <linux/fsnotify.h> #include <linux/security.h> #include <linux/gfp.h> +#include <linux/net.h> #include <linux/socket.h> #include <linux/sched/signal.h> @@ -415,30 +416,6 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = { }; EXPORT_SYMBOL(nosteal_pipe_buf_ops); -/* - * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' - * using sendpage(). Return the number of bytes sent. - */ -static int pipe_to_sendpage(struct pipe_inode_info *pipe, - struct pipe_buffer *buf, struct splice_desc *sd) -{ - struct file *file = sd->u.file; - loff_t pos = sd->pos; - int more; - - if (!likely(file->f_op->sendpage)) - return -EINVAL; - - more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; - - if (sd->len < sd->total_len && - pipe_occupancy(pipe->head, pipe->tail) > 1) - more |= MSG_SENDPAGE_NOTLAST; - - return file->f_op->sendpage(file, buf->page, buf->offset, - sd->len, &pos, more); -} - static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { smp_mb(); @@ -619,7 +596,7 @@ static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_des * Description: * This function does little more than loop over the pipe and call * @actor to do the actual moving of a single struct pipe_buffer to - * the desired destination. See pipe_to_file, pipe_to_sendpage, or + * the desired destination. See pipe_to_file, pipe_to_sendmsg, or * pipe_to_user. * */ @@ -800,8 +777,9 @@ done: EXPORT_SYMBOL(iter_file_splice_write); +#ifdef CONFIG_NET /** - * generic_splice_sendpage - splice data from a pipe to a socket + * splice_to_socket - splice data from a pipe to a socket * @pipe: pipe to splice from * @out: socket to write to * @ppos: position in @out @@ -813,13 +791,129 @@ EXPORT_SYMBOL(iter_file_splice_write); * is involved. * */ -ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, - loff_t *ppos, size_t len, unsigned int flags) +ssize_t splice_to_socket(struct pipe_inode_info *pipe, struct file *out, + loff_t *ppos, size_t len, unsigned int flags) { - return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); -} + struct socket *sock = sock_from_file(out); + struct bio_vec bvec[16]; + struct msghdr msg = {}; + ssize_t ret = 0; + size_t spliced = 0; + bool need_wakeup = false; + + pipe_lock(pipe); + + while (len > 0) { + unsigned int head, tail, mask, bc = 0; + size_t remain = len; + + /* + * Check for signal early to make process killable when there + * are always buffers available + */ + ret = -ERESTARTSYS; + if (signal_pending(current)) + break; + + while (pipe_empty(pipe->head, pipe->tail)) { + ret = 0; + if (!pipe->writers) + goto out; + + if (spliced) + goto out; + + ret = -EAGAIN; + if (flags & SPLICE_F_NONBLOCK) + goto out; + + ret = -ERESTARTSYS; + if (signal_pending(current)) + goto out; + + if (need_wakeup) { + wakeup_pipe_writers(pipe); + need_wakeup = false; + } + + pipe_wait_readable(pipe); + } + + head = pipe->head; + tail = pipe->tail; + mask = pipe->ring_size - 1; + + while (!pipe_empty(head, tail)) { + struct pipe_buffer *buf = &pipe->bufs[tail & mask]; + size_t seg; + + if (!buf->len) { + tail++; + continue; + } + + seg = min_t(size_t, remain, buf->len); + + ret = pipe_buf_confirm(pipe, buf); + if (unlikely(ret)) { + if (ret == -ENODATA) + ret = 0; + break; + } -EXPORT_SYMBOL(generic_splice_sendpage); + bvec_set_page(&bvec[bc++], buf->page, seg, buf->offset); + remain -= seg; + if (remain == 0 || bc >= ARRAY_SIZE(bvec)) + break; + tail++; + } + + if (!bc) + break; + + msg.msg_flags = MSG_SPLICE_PAGES; + if (flags & SPLICE_F_MORE) + msg.msg_flags |= MSG_MORE; + if (remain && pipe_occupancy(pipe->head, tail) > 0) + msg.msg_flags |= MSG_MORE; + + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, bvec, bc, + len - remain); + ret = sock_sendmsg(sock, &msg); + if (ret <= 0) + break; + + spliced += ret; + len -= ret; + tail = pipe->tail; + while (ret > 0) { + struct pipe_buffer *buf = &pipe->bufs[tail & mask]; + size_t seg = min_t(size_t, ret, buf->len); + + buf->offset += seg; + buf->len -= seg; + ret -= seg; + + if (!buf->len) { + pipe_buf_release(pipe, buf); + tail++; + } + } + + if (tail != pipe->tail) { + pipe->tail = tail; + if (pipe->files) + need_wakeup = true; + } + } + +out: + pipe_unlock(pipe); + if (need_wakeup) + wakeup_pipe_writers(pipe); + return spliced ?: ret; +} +#endif static int warn_unsupported(struct file *file, const char *op) { @@ -840,6 +934,17 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, return out->f_op->splice_write(pipe, out, ppos, len, flags); } +/* + * Indicate to the caller that there was a premature EOF when reading from the + * source and the caller didn't indicate they would be sending more data after + * this. + */ +static void do_splice_eof(struct splice_desc *sd) +{ + if (sd->splice_eof) + sd->splice_eof(sd); +} + /** * vfs_splice_read - Read data from a file and splice it into a pipe * @in: File to splice from @@ -944,13 +1049,17 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, */ bytes = 0; len = sd->total_len; + + /* Don't block on output, we have to drain the direct pipe. */ flags = sd->flags; + sd->flags &= ~SPLICE_F_NONBLOCK; /* - * Don't block on output, we have to drain the direct pipe. + * We signal MORE until we've read sufficient data to fulfill the + * request and we keep signalling it if the caller set it. */ - sd->flags &= ~SPLICE_F_NONBLOCK; more = sd->flags & SPLICE_F_MORE; + sd->flags |= SPLICE_F_MORE; WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail)); @@ -960,20 +1069,18 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, ret = vfs_splice_read(in, &pos, pipe, len, flags); if (unlikely(ret <= 0)) - goto out_release; + goto read_failure; read_len = ret; sd->total_len = read_len; /* - * If more data is pending, set SPLICE_F_MORE - * If this is the last data and SPLICE_F_MORE was not set - * initially, clears it. + * If we now have sufficient data to fulfill the request then + * we clear SPLICE_F_MORE if it was not set initially. */ - if (read_len < len) - sd->flags |= SPLICE_F_MORE; - else if (!more) + if (read_len >= len && !more) sd->flags &= ~SPLICE_F_MORE; + /* * NOTE: nonblocking mode only applies to the input. We * must not do the output in nonblocking mode as then we @@ -1000,6 +1107,15 @@ done: file_accessed(in); return bytes; +read_failure: + /* + * If the user did *not* set SPLICE_F_MORE *and* we didn't hit that + * "use all of len" case that cleared SPLICE_F_MORE, *and* we did a + * "->splice_in()" that returned EOF (ie zero) *and* we have sent at + * least 1 byte *then* we will also do the ->splice_eof() call. + */ + if (ret == 0 && !more && len > 0 && bytes) + do_splice_eof(sd); out_release: /* * If we did an incomplete transfer we must release @@ -1028,6 +1144,14 @@ static int direct_splice_actor(struct pipe_inode_info *pipe, sd->flags); } +static void direct_file_splice_eof(struct splice_desc *sd) +{ + struct file *file = sd->u.file; + + if (file->f_op->splice_eof) + file->f_op->splice_eof(file); +} + /** * do_splice_direct - splices data directly between two files * @in: file to splice from @@ -1053,6 +1177,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, .flags = flags, .pos = *ppos, .u.file = out, + .splice_eof = direct_file_splice_eof, .opos = opos, }; long ret; |