diff options
author | David Howells <dhowells@redhat.com> | 2024-07-02 00:40:22 +0100 |
---|---|---|
committer | Christian Brauner <brauner@kernel.org> | 2024-09-12 12:20:41 +0200 |
commit | ee4cdf7ba857a894ad1650d6ab77669cbbfa329e (patch) | |
tree | 8258e3b756adf109085d66a8b63cd08db03abad0 /fs/netfs/read_retry.c | |
parent | 2e45b922977c07bb339d76fd45e68f9b907fef7d (diff) |
netfs: Speed up buffered reading
Improve the efficiency of buffered reads in a number of ways:
(1) Overhaul the algorithm in general so that it's a lot more compact and
split the read submission code between buffered and unbuffered
versions. The unbuffered version can be vastly simplified.
(2) Read-result collection is handed off to a work queue rather than being
done in the I/O thread. Multiple subrequests can be processes
simultaneously.
(3) When a subrequest is collected, any folios it fully spans are
collected and "spare" data on either side is donated to either the
previous or the next subrequest in the sequence.
Notes:
(*) Readahead expansion is massively slows down fio, presumably because it
causes a load of extra allocations, both folio and xarray, up front
before RPC requests can be transmitted.
(*) RDMA with cifs does appear to work, both with SIW and RXE.
(*) PG_private_2-based reading and copy-to-cache is split out into its own
file and altered to use folio_queue. Note that the copy to the cache
now creates a new write transaction against the cache and adds the
folios to be copied into it. This allows it to use part of the
writeback I/O code.
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Jeff Layton <jlayton@kernel.org>
cc: netfs@lists.linux.dev
cc: linux-fsdevel@vger.kernel.org
Link: https://lore.kernel.org/r/20240814203850.2240469-20-dhowells@redhat.com/ # v2
Signed-off-by: Christian Brauner <brauner@kernel.org>
Diffstat (limited to 'fs/netfs/read_retry.c')
-rw-r--r-- | fs/netfs/read_retry.c | 256 |
1 files changed, 256 insertions, 0 deletions
diff --git a/fs/netfs/read_retry.c b/fs/netfs/read_retry.c new file mode 100644 index 0000000000000..0350592ea8047 --- /dev/null +++ b/fs/netfs/read_retry.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Network filesystem read subrequest retrying. + * + * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include <linux/fs.h> +#include <linux/slab.h> +#include "internal.h" + +static void netfs_reissue_read(struct netfs_io_request *rreq, + struct netfs_io_subrequest *subreq) +{ + struct iov_iter *io_iter = &subreq->io_iter; + + if (iov_iter_is_folioq(io_iter)) { + subreq->curr_folioq = (struct folio_queue *)io_iter->folioq; + subreq->curr_folioq_slot = io_iter->folioq_slot; + subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot]; + } + + atomic_inc(&rreq->nr_outstanding); + __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags); + netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit); + subreq->rreq->netfs_ops->issue_read(subreq); +} + +/* + * Go through the list of failed/short reads, retrying all retryable ones. We + * need to switch failed cache reads to network downloads. + */ +static void netfs_retry_read_subrequests(struct netfs_io_request *rreq) +{ + struct netfs_io_subrequest *subreq; + struct netfs_io_stream *stream0 = &rreq->io_streams[0]; + LIST_HEAD(sublist); + LIST_HEAD(queue); + + _enter("R=%x", rreq->debug_id); + + if (list_empty(&rreq->subrequests)) + return; + + if (rreq->netfs_ops->retry_request) + rreq->netfs_ops->retry_request(rreq, NULL); + + /* If there's no renegotiation to do, just resend each retryable subreq + * up to the first permanently failed one. + */ + if (!rreq->netfs_ops->prepare_read && + !test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags)) { + struct netfs_io_subrequest *subreq; + + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { + if (test_bit(NETFS_SREQ_FAILED, &subreq->flags)) + break; + if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) { + netfs_reset_iter(subreq); + netfs_reissue_read(rreq, subreq); + } + } + return; + } + + /* Okay, we need to renegotiate all the download requests and flip any + * failed cache reads over to being download requests and negotiate + * those also. All fully successful subreqs have been removed from the + * list and any spare data from those has been donated. + * + * What we do is decant the list and rebuild it one subreq at a time so + * that we don't end up with donations jumping over a gap we're busy + * populating with smaller subrequests. In the event that the subreq + * we just launched finishes before we insert the next subreq, it'll + * fill in rreq->prev_donated instead. + + * Note: Alternatively, we could split the tail subrequest right before + * we reissue it and fix up the donations under lock. + */ + list_splice_init(&rreq->subrequests, &queue); + + do { + struct netfs_io_subrequest *from; + struct iov_iter source; + unsigned long long start, len; + size_t part, deferred_next_donated = 0; + bool boundary = false; + + /* Go through the subreqs and find the next span of contiguous + * buffer that we then rejig (cifs, for example, needs the + * rsize renegotiating) and reissue. + */ + from = list_first_entry(&queue, struct netfs_io_subrequest, rreq_link); + list_move_tail(&from->rreq_link, &sublist); + start = from->start + from->transferred; + len = from->len - from->transferred; + + _debug("from R=%08x[%x] s=%llx ctl=%zx/%zx/%zx", + rreq->debug_id, from->debug_index, + from->start, from->consumed, from->transferred, from->len); + + if (test_bit(NETFS_SREQ_FAILED, &from->flags) || + !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags)) + goto abandon; + + deferred_next_donated = from->next_donated; + while ((subreq = list_first_entry_or_null( + &queue, struct netfs_io_subrequest, rreq_link))) { + if (subreq->start != start + len || + subreq->transferred > 0 || + !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) + break; + list_move_tail(&subreq->rreq_link, &sublist); + len += subreq->len; + deferred_next_donated = subreq->next_donated; + if (test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags)) + break; + } + + _debug(" - range: %llx-%llx %llx", start, start + len - 1, len); + + /* Determine the set of buffers we're going to use. Each + * subreq gets a subset of a single overall contiguous buffer. + */ + netfs_reset_iter(from); + source = from->io_iter; + source.count = len; + + /* Work through the sublist. */ + while ((subreq = list_first_entry_or_null( + &sublist, struct netfs_io_subrequest, rreq_link))) { + list_del(&subreq->rreq_link); + + subreq->source = NETFS_DOWNLOAD_FROM_SERVER; + subreq->start = start - subreq->transferred; + subreq->len = len + subreq->transferred; + stream0->sreq_max_len = subreq->len; + + __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); + __set_bit(NETFS_SREQ_RETRYING, &subreq->flags); + + spin_lock_bh(&rreq->lock); + list_add_tail(&subreq->rreq_link, &rreq->subrequests); + subreq->prev_donated += rreq->prev_donated; + rreq->prev_donated = 0; + trace_netfs_sreq(subreq, netfs_sreq_trace_retry); + spin_unlock_bh(&rreq->lock); + + BUG_ON(!len); + + /* Renegotiate max_len (rsize) */ + if (rreq->netfs_ops->prepare_read(subreq) < 0) { + trace_netfs_sreq(subreq, netfs_sreq_trace_reprep_failed); + __set_bit(NETFS_SREQ_FAILED, &subreq->flags); + } + + part = umin(len, stream0->sreq_max_len); + if (unlikely(rreq->io_streams[0].sreq_max_segs)) + part = netfs_limit_iter(&source, 0, part, stream0->sreq_max_segs); + subreq->len = subreq->transferred + part; + subreq->io_iter = source; + iov_iter_truncate(&subreq->io_iter, part); + iov_iter_advance(&source, part); + len -= part; + start += part; + if (!len) { + if (boundary) + __set_bit(NETFS_SREQ_BOUNDARY, &subreq->flags); + subreq->next_donated = deferred_next_donated; + } else { + __clear_bit(NETFS_SREQ_BOUNDARY, &subreq->flags); + subreq->next_donated = 0; + } + + netfs_reissue_read(rreq, subreq); + if (!len) + break; + + /* If we ran out of subrequests, allocate another. */ + if (list_empty(&sublist)) { + subreq = netfs_alloc_subrequest(rreq); + if (!subreq) + goto abandon; + subreq->source = NETFS_DOWNLOAD_FROM_SERVER; + subreq->start = start; + + /* We get two refs, but need just one. */ + netfs_put_subrequest(subreq, false, netfs_sreq_trace_new); + trace_netfs_sreq(subreq, netfs_sreq_trace_split); + list_add_tail(&subreq->rreq_link, &sublist); + } + } + + /* If we managed to use fewer subreqs, we can discard the + * excess. + */ + while ((subreq = list_first_entry_or_null( + &sublist, struct netfs_io_subrequest, rreq_link))) { + trace_netfs_sreq(subreq, netfs_sreq_trace_discard); + list_del(&subreq->rreq_link); + netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done); + } + + } while (!list_empty(&queue)); + + return; + + /* If we hit ENOMEM, fail all remaining subrequests */ +abandon: + list_splice_init(&sublist, &queue); + list_for_each_entry(subreq, &queue, rreq_link) { + if (!subreq->error) + subreq->error = -ENOMEM; + __clear_bit(NETFS_SREQ_FAILED, &subreq->flags); + __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags); + __clear_bit(NETFS_SREQ_RETRYING, &subreq->flags); + } + spin_lock_bh(&rreq->lock); + list_splice_tail_init(&queue, &rreq->subrequests); + spin_unlock_bh(&rreq->lock); +} + +/* + * Retry reads. + */ +void netfs_retry_reads(struct netfs_io_request *rreq) +{ + trace_netfs_rreq(rreq, netfs_rreq_trace_resubmit); + + atomic_inc(&rreq->nr_outstanding); + + netfs_retry_read_subrequests(rreq); + + if (atomic_dec_and_test(&rreq->nr_outstanding)) + netfs_rreq_terminated(rreq, false); +} + +/* + * Unlock any the pages that haven't been unlocked yet due to abandoned + * subrequests. + */ +void netfs_unlock_abandoned_read_pages(struct netfs_io_request *rreq) +{ + struct folio_queue *p; + + for (p = rreq->buffer; p; p = p->next) { + for (int slot = 0; slot < folioq_count(p); slot++) { + struct folio *folio = folioq_folio(p, slot); + + if (folio && !folioq_is_marked2(p, slot)) { + trace_netfs_folio(folio, netfs_folio_trace_abandon); + folio_unlock(folio); + } + } + } +} |