summaryrefslogtreecommitdiff
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2023-01-21 07:50:07 +0100
committerDavid Sterba <dsterba@suse.com>2023-02-15 19:38:51 +0100
commit7609afac677546b225d8327d726cc558d3666496 (patch)
tree03387fff02a0211bc38f33474d66f06f0216ed20 /fs/btrfs/inode.c
parente52190441bd6b268aed6ecc0efe3614c4222014e (diff)
btrfs: handle checksum validation and repair at the storage layer
Currently btrfs handles checksum validation and repair in the end I/O handler for the btrfs_bio. This leads to a lot of duplicate code plus issues with varying semantics or bugs, e.g. - the until recently broken repair for compressed extents - the fact that encoded reads validate the checksums but do not kick of read repair - the inconsistent checking of the BTRFS_FS_STATE_NO_CSUMS flag This commit revamps the checksum validation and repair code to instead work below the btrfs_submit_bio interfaces. In case of a checksum failure (or a plain old I/O error), the repair is now kicked off before the upper level ->end_io handler is invoked. Progress of an in-progress repair is tracked by a small structure that is allocated using a mempool for each original bio with failed sectors, which holds a reference to the original bio. This new structure is allocated using a mempool to guarantee forward progress even under memory pressure. The mempool will be replenished when the repair completes, just as the mempools backing the bios. There is one significant behavior change here: If repair fails or is impossible to start with, the whole bio will be failed to the upper layer. This is the behavior that all I/O submitters except for buffered I/O already emulated in their end_io handler. For buffered I/O this now means that a large readahead request can fail due to a single bad sector, but as readahead errors are ignored the following readpage if the sector is actually accessed will still be able to read. This also matches the I/O failure handling in other file systems. Reviewed-by: Josef Bacik <josef@toxicpanda.com> Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c81
1 files changed, 4 insertions, 77 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ba90f90e3d87..237513508fcc 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7941,39 +7941,6 @@ void btrfs_submit_dio_repair_bio(struct btrfs_inode *inode, struct bio *bio, int
btrfs_submit_bio(inode->root->fs_info, bio, mirror_num);
}
-static blk_status_t btrfs_check_read_dio_bio(struct btrfs_dio_private *dip,
- struct btrfs_bio *bbio,
- const bool uptodate)
-{
- struct inode *inode = &dip->inode->vfs_inode;
- struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
- const bool csum = !(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM);
- blk_status_t err = BLK_STS_OK;
- struct bvec_iter iter;
- struct bio_vec bv;
- u32 offset;
-
- btrfs_bio_for_each_sector(fs_info, bv, bbio, iter, offset) {
- u64 start = bbio->file_offset + offset;
-
- if (uptodate &&
- (!csum || !btrfs_check_data_csum(BTRFS_I(inode), bbio, offset,
- bv.bv_page, bv.bv_offset))) {
- btrfs_clean_io_failure(BTRFS_I(inode), start,
- bv.bv_page, bv.bv_offset);
- } else {
- int ret;
-
- ret = btrfs_repair_one_sector(BTRFS_I(inode), bbio, offset,
- bv.bv_page, bv.bv_offset, false);
- if (ret)
- err = errno_to_blk_status(ret);
- }
- }
-
- return err;
-}
-
blk_status_t btrfs_submit_bio_start_direct_io(struct btrfs_inode *inode,
struct bio *bio,
u64 dio_file_offset)
@@ -7987,18 +7954,14 @@ static void btrfs_end_dio_bio(struct btrfs_bio *bbio)
struct bio *bio = &bbio->bio;
blk_status_t err = bio->bi_status;
- if (err)
+ if (err) {
btrfs_warn(dip->inode->root->fs_info,
"direct IO failed ino %llu rw %d,%u sector %#Lx len %u err no %d",
btrfs_ino(dip->inode), bio_op(bio),
bio->bi_opf, bio->bi_iter.bi_sector,
bio->bi_iter.bi_size, err);
-
- if (bio_op(bio) == REQ_OP_READ)
- err = btrfs_check_read_dio_bio(dip, bbio, !err);
-
- if (err)
dip->bio.bi_status = err;
+ }
btrfs_record_physical_zoned(&dip->inode->vfs_inode, bbio->file_offset, bio);
@@ -10282,7 +10245,6 @@ struct btrfs_encoded_read_private {
wait_queue_head_t wait;
atomic_t pending;
blk_status_t status;
- bool skip_csum;
};
static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
@@ -10296,44 +10258,11 @@ static blk_status_t submit_encoded_read_bio(struct btrfs_inode *inode,
return BLK_STS_OK;
}
-static blk_status_t btrfs_encoded_read_verify_csum(struct btrfs_bio *bbio)
-{
- const bool uptodate = (bbio->bio.bi_status == BLK_STS_OK);
- struct btrfs_encoded_read_private *priv = bbio->private;
- struct btrfs_inode *inode = priv->inode;
- struct btrfs_fs_info *fs_info = inode->root->fs_info;
- u32 sectorsize = fs_info->sectorsize;
- struct bio_vec *bvec;
- struct bvec_iter_all iter_all;
- u32 bio_offset = 0;
-
- if (priv->skip_csum || !uptodate)
- return bbio->bio.bi_status;
-
- bio_for_each_segment_all(bvec, &bbio->bio, iter_all) {
- unsigned int i, nr_sectors, pgoff;
-
- nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
- pgoff = bvec->bv_offset;
- for (i = 0; i < nr_sectors; i++) {
- ASSERT(pgoff < PAGE_SIZE);
- if (btrfs_check_data_csum(inode, bbio, bio_offset,
- bvec->bv_page, pgoff))
- return BLK_STS_IOERR;
- bio_offset += sectorsize;
- pgoff += sectorsize;
- }
- }
- return BLK_STS_OK;
-}
-
static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
{
struct btrfs_encoded_read_private *priv = bbio->private;
- blk_status_t status;
- status = btrfs_encoded_read_verify_csum(bbio);
- if (status) {
+ if (bbio->bio.bi_status) {
/*
* The memory barrier implied by the atomic_dec_return() here
* pairs with the memory barrier implied by the
@@ -10342,11 +10271,10 @@ static void btrfs_encoded_read_endio(struct btrfs_bio *bbio)
* write is observed before the load of status in
* btrfs_encoded_read_regular_fill_pages().
*/
- WRITE_ONCE(priv->status, status);
+ WRITE_ONCE(priv->status, bbio->bio.bi_status);
}
if (!atomic_dec_return(&priv->pending))
wake_up(&priv->wait);
- btrfs_bio_free_csum(bbio);
bio_put(&bbio->bio);
}
@@ -10359,7 +10287,6 @@ int btrfs_encoded_read_regular_fill_pages(struct btrfs_inode *inode,
.inode = inode,
.file_offset = file_offset,
.pending = ATOMIC_INIT(1),
- .skip_csum = (inode->flags & BTRFS_INODE_NODATASUM),
};
unsigned long i = 0;
u64 cur = 0;