From 74cc3600e8a7599557cf684ba8760d6cb2c911d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 20 Jan 2023 08:46:57 +0100 Subject: btrfs: raid56: no need for irqsafe locking These days all the operations that take locks in the raid56.c code are run from user context (mostly workqueues). Drop all the irqsafe locking that is not required any more. Reviewed-by: Qu Wenruo Signed-off-by: Christoph Hellwig Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 50 ++++++++++++++++++++++---------------------------- 1 file changed, 22 insertions(+), 28 deletions(-) (limited to 'fs/btrfs/raid56.c') diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 642828c1b299..a68fe51861ac 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -407,16 +407,15 @@ static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio) static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio) { struct btrfs_stripe_hash_table *table; - unsigned long flags; if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) return; table = rbio->bioc->fs_info->stripe_hash_table; - spin_lock_irqsave(&table->cache_lock, flags); + spin_lock(&table->cache_lock); __remove_rbio_from_cache(rbio); - spin_unlock_irqrestore(&table->cache_lock, flags); + spin_unlock(&table->cache_lock); } /* @@ -425,19 +424,18 @@ static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio) static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info) { struct btrfs_stripe_hash_table *table; - unsigned long flags; struct btrfs_raid_bio *rbio; table = info->stripe_hash_table; - spin_lock_irqsave(&table->cache_lock, flags); + spin_lock(&table->cache_lock); while (!list_empty(&table->stripe_cache)) { rbio = list_entry(table->stripe_cache.next, struct btrfs_raid_bio, stripe_cache); __remove_rbio_from_cache(rbio); } - spin_unlock_irqrestore(&table->cache_lock, flags); + spin_unlock(&table->cache_lock); } /* @@ -467,14 +465,13 @@ void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info) static void cache_rbio(struct btrfs_raid_bio *rbio) { struct btrfs_stripe_hash_table *table; - unsigned long flags; if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags)) return; table = rbio->bioc->fs_info->stripe_hash_table; - spin_lock_irqsave(&table->cache_lock, flags); + spin_lock(&table->cache_lock); spin_lock(&rbio->bio_list_lock); /* bump our ref if we were not in the list before */ @@ -501,7 +498,7 @@ static void cache_rbio(struct btrfs_raid_bio *rbio) __remove_rbio_from_cache(found); } - spin_unlock_irqrestore(&table->cache_lock, flags); + spin_unlock(&table->cache_lock); } /* @@ -530,15 +527,14 @@ static void run_xor(void **pages, int src_cnt, ssize_t len) */ static int rbio_is_full(struct btrfs_raid_bio *rbio) { - unsigned long flags; unsigned long size = rbio->bio_list_bytes; int ret = 1; - spin_lock_irqsave(&rbio->bio_list_lock, flags); + spin_lock(&rbio->bio_list_lock); if (size != rbio->nr_data * BTRFS_STRIPE_LEN) ret = 0; BUG_ON(size > rbio->nr_data * BTRFS_STRIPE_LEN); - spin_unlock_irqrestore(&rbio->bio_list_lock, flags); + spin_unlock(&rbio->bio_list_lock); return ret; } @@ -657,14 +653,13 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) struct btrfs_stripe_hash *h; struct btrfs_raid_bio *cur; struct btrfs_raid_bio *pending; - unsigned long flags; struct btrfs_raid_bio *freeit = NULL; struct btrfs_raid_bio *cache_drop = NULL; int ret = 0; h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio); - spin_lock_irqsave(&h->lock, flags); + spin_lock(&h->lock); list_for_each_entry(cur, &h->hash_list, hash_list) { if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0]) continue; @@ -724,7 +719,7 @@ lockit: refcount_inc(&rbio->refs); list_add(&rbio->hash_list, &h->hash_list); out: - spin_unlock_irqrestore(&h->lock, flags); + spin_unlock(&h->lock); if (cache_drop) remove_rbio_from_cache(cache_drop); if (freeit) @@ -742,7 +737,6 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) { int bucket; struct btrfs_stripe_hash *h; - unsigned long flags; int keep_cache = 0; bucket = rbio_bucket(rbio); @@ -751,7 +745,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) if (list_empty(&rbio->plug_list)) cache_rbio(rbio); - spin_lock_irqsave(&h->lock, flags); + spin_lock(&h->lock); spin_lock(&rbio->bio_list_lock); if (!list_empty(&rbio->hash_list)) { @@ -788,7 +782,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) list_add(&next->hash_list, &h->hash_list); refcount_inc(&next->refs); spin_unlock(&rbio->bio_list_lock); - spin_unlock_irqrestore(&h->lock, flags); + spin_unlock(&h->lock); if (next->operation == BTRFS_RBIO_READ_REBUILD) start_async_work(next, recover_rbio_work_locked); @@ -808,7 +802,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) } done: spin_unlock(&rbio->bio_list_lock); - spin_unlock_irqrestore(&h->lock, flags); + spin_unlock(&h->lock); done_nolock: if (!keep_cache) @@ -891,16 +885,16 @@ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio, index = stripe_nr * rbio->stripe_nsectors + sector_nr; ASSERT(index >= 0 && index < rbio->nr_sectors); - spin_lock_irq(&rbio->bio_list_lock); + spin_lock(&rbio->bio_list_lock); sector = &rbio->bio_sectors[index]; if (sector->page || bio_list_only) { /* Don't return sector without a valid page pointer */ if (!sector->page) sector = NULL; - spin_unlock_irq(&rbio->bio_list_lock); + spin_unlock(&rbio->bio_list_lock); return sector; } - spin_unlock_irq(&rbio->bio_list_lock); + spin_unlock(&rbio->bio_list_lock); return &rbio->stripe_sectors[index]; } @@ -1148,11 +1142,11 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio) { struct bio *bio; - spin_lock_irq(&rbio->bio_list_lock); + spin_lock(&rbio->bio_list_lock); bio_list_for_each(bio, &rbio->bio_list) index_one_bio(rbio, bio); - spin_unlock_irq(&rbio->bio_list_lock); + spin_unlock(&rbio->bio_list_lock); } static void bio_get_trace_info(struct btrfs_raid_bio *rbio, struct bio *bio, @@ -1895,9 +1889,9 @@ static int recover_sectors(struct btrfs_raid_bio *rbio) if (rbio->operation == BTRFS_RBIO_READ_REBUILD || rbio->operation == BTRFS_RBIO_REBUILD_MISSING) { - spin_lock_irq(&rbio->bio_list_lock); + spin_lock(&rbio->bio_list_lock); set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); - spin_unlock_irq(&rbio->bio_list_lock); + spin_unlock(&rbio->bio_list_lock); } index_rbio_pages(rbio); @@ -2265,9 +2259,9 @@ static void rmw_rbio(struct btrfs_raid_bio *rbio) * bio list any more, anyone else that wants to change this stripe * needs to do their own rmw. */ - spin_lock_irq(&rbio->bio_list_lock); + spin_lock(&rbio->bio_list_lock); set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); - spin_unlock_irq(&rbio->bio_list_lock); + spin_unlock(&rbio->bio_list_lock); bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors); -- cgit v1.2.3 From 1faf3885067d5be65597d5dc682f0da505822104 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Tue, 7 Feb 2023 12:26:14 +0800 Subject: btrfs: use an efficient way to represent source of duplicated stripes For btrfs dev-replace, we have to duplicate writes to the source device into the target device. For non-RAID56, all writes into the same mapped ranges are sharing the same content, thus they don't really need to bother anything. (E.g. in btrfs_submit_bio() for non-RAID56 range we just submit the same write to all involved devices). But for RAID56, all stripes contain different content, thus we must have a clear mapping of which stripe is duplicated from which original stripe. Currently we use a complex way using tgtdev_map[] array, e.g: num_tgtdevs = 1 tgtdev_map[0] = 0 <- Means stripes[0] is not involved in replace. tgtdev_map[1] = 3 <- Means stripes[1] is involved in replace, and it's duplicated to stripes[3]. tgtdev_map[2] = 0 <- Means stripes[2] is not involved in replace. But this is wasting some space, and ignores one important thing for dev-replace, there is at most one running replace. Thus we can change it to a fixed array to represent the mapping: replace_nr_stripes = 1 replace_stripe_src = 1 <- Means stripes[1] is involved in replace. thus the extra stripe is a copy of stripes[1] By this we can save some space for bioc on RAID56 chunks with many devices. And we get rid of one variable sized array from bioc. Thus the patch involves the following changes: - Replace @num_tgtdevs and @tgtdev_map[] with @replace_nr_stripes and @replace_stripe_src. @num_tgtdevs is just renamed to @replace_nr_stripes. While the mapping is completely changed. - Add extra ASSERT()s for RAID56 code - Only add two more extra stripes for dev-replace cases. As we have an upper limit on how many dev-replace stripes we can have. - Unify the behavior of handle_ops_on_dev_replace() Previously handle_ops_on_dev_replace() go two different paths for WRITE and GET_READ_MIRRORS. Now unify them by always going the WRITE path first (with at most 2 replace stripes), then if we're doing GET_READ_MIRRORS and we have 2 extra stripes, just drop one stripe. - Remove the @real_stripes argument from alloc_btrfs_io_context() As we don't need the old variable length array any more. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 36 +++++++++--- fs/btrfs/scrub.c | 4 +- fs/btrfs/volumes.c | 162 +++++++++++++++++++++++------------------------------ fs/btrfs/volumes.h | 26 +++++---- 4 files changed, 115 insertions(+), 113 deletions(-) (limited to 'fs/btrfs/raid56.c') diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index a68fe51861ac..0ac1fc7896dd 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -906,7 +906,7 @@ static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio, static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, struct btrfs_io_context *bioc) { - const unsigned int real_stripes = bioc->num_stripes - bioc->num_tgtdevs; + const unsigned int real_stripes = bioc->num_stripes - bioc->replace_nr_stripes; const unsigned int stripe_npages = BTRFS_STRIPE_LEN >> PAGE_SHIFT; const unsigned int num_pages = stripe_npages * real_stripes; const unsigned int stripe_nsectors = @@ -1276,10 +1276,16 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio, goto error; } - if (likely(!rbio->bioc->num_tgtdevs)) + if (likely(!rbio->bioc->replace_nr_stripes)) return 0; - /* Make a copy for the replace target device. */ + /* + * Make a copy for the replace target device. + * + * Thus the source stripe number (in replace_stripe_src) should be valid. + */ + ASSERT(rbio->bioc->replace_stripe_src >= 0); + for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors; total_sector_nr++) { struct sector_ptr *sector; @@ -1287,7 +1293,12 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio, stripe = total_sector_nr / rbio->stripe_nsectors; sectornr = total_sector_nr % rbio->stripe_nsectors; - if (!rbio->bioc->tgtdev_map[stripe]) { + /* + * For RAID56, there is only one device that can be replaced, + * and replace_stripe_src[0] indicates the stripe number we + * need to copy from. + */ + if (stripe != rbio->bioc->replace_stripe_src) { /* * We can skip the whole stripe completely, note * total_sector_nr will be increased by one anyway. @@ -1310,7 +1321,7 @@ static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio, } ret = rbio_add_io_sector(rbio, bio_list, sector, - rbio->bioc->tgtdev_map[stripe], + rbio->real_stripes, sectornr, REQ_OP_WRITE); if (ret) goto error; @@ -2436,7 +2447,11 @@ static int finish_parity_scrub(struct btrfs_raid_bio *rbio, int need_check) else BUG(); - if (bioc->num_tgtdevs && bioc->tgtdev_map[rbio->scrubp]) { + /* + * Replace is running and our P/Q stripe is being replaced, then we + * need to duplicate the final write to replace target. + */ + if (bioc->replace_nr_stripes && bioc->replace_stripe_src == rbio->scrubp) { is_replace = 1; bitmap_copy(pbitmap, &rbio->dbitmap, rbio->stripe_nsectors); } @@ -2538,13 +2553,18 @@ writeback: if (!is_replace) goto submit_write; + /* + * Replace is running and our parity stripe needs to be duplicated to + * the target device. Check we have a valid source stripe number. + */ + ASSERT(rbio->bioc->replace_stripe_src >= 0); for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) { struct sector_ptr *sector; sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr); ret = rbio_add_io_sector(rbio, &bio_list, sector, - bioc->tgtdev_map[rbio->scrubp], - sectornr, REQ_OP_WRITE); + rbio->real_stripes, + sectornr, REQ_OP_WRITE); if (ret) goto cleanup; } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index e1910a045c24..64b52be6bf0b 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1230,7 +1230,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) sblock_other = sblocks_for_recheck[mirror_index]; } else { struct scrub_recover *r = sblock_bad->sectors[0]->recover; - int max_allowed = r->bioc->num_stripes - r->bioc->num_tgtdevs; + int max_allowed = r->bioc->num_stripes - r->bioc->replace_nr_stripes; if (mirror_index >= max_allowed) break; @@ -1540,7 +1540,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, bioc->map_type, bioc->raid_map, bioc->num_stripes - - bioc->num_tgtdevs, + bioc->replace_nr_stripes, mirror_index, &stripe_index, &stripe_offset); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 2417f4fb8724..8f06f0e47ba8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5914,8 +5914,7 @@ static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes) } static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info, - u16 total_stripes, - u16 real_stripes) + u16 total_stripes) { struct btrfs_io_context *bioc; @@ -5924,8 +5923,6 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ sizeof(struct btrfs_io_context) + /* Plus the variable array for the stripes */ sizeof(struct btrfs_io_stripe) * (total_stripes) + - /* Plus the variable array for the tgt dev */ - sizeof(u16) * (real_stripes) + /* * Plus the raid_map, which includes both the tgt dev * and the stripes. @@ -5939,8 +5936,8 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ refcount_set(&bioc->refs, 1); bioc->fs_info = fs_info; - bioc->tgtdev_map = (u16 *)(bioc->stripes + total_stripes); - bioc->raid_map = (u64 *)(bioc->tgtdev_map + real_stripes); + bioc->raid_map = (u64 *)(bioc->stripes + total_stripes); + bioc->replace_stripe_src = -1; return bioc; } @@ -6204,93 +6201,74 @@ static void handle_ops_on_dev_replace(enum btrfs_map_op op, int *num_stripes_ret, int *max_errors_ret) { u64 srcdev_devid = dev_replace->srcdev->devid; - int tgtdev_indexes = 0; + /* + * At this stage, num_stripes is still the real number of stripes, + * excluding the duplicated stripes. + */ int num_stripes = *num_stripes_ret; + int nr_extra_stripes = 0; int max_errors = *max_errors_ret; int i; - if (op == BTRFS_MAP_WRITE) { - int index_where_to_add; + /* + * A block group which has "to_copy" set will eventually be copied by + * the dev-replace process. We can avoid cloning IO here. + */ + if (is_block_group_to_copy(dev_replace->srcdev->fs_info, logical)) + return; - /* - * A block group which have "to_copy" set will eventually - * copied by dev-replace process. We can avoid cloning IO here. - */ - if (is_block_group_to_copy(dev_replace->srcdev->fs_info, logical)) - return; + /* + * Duplicate the write operations while the dev-replace procedure is + * running. Since the copying of the old disk to the new disk takes + * place at run time while the filesystem is mounted writable, the + * regular write operations to the old disk have to be duplicated to go + * to the new disk as well. + * + * Note that device->missing is handled by the caller, and that the + * write to the old disk is already set up in the stripes array. + */ + for (i = 0; i < num_stripes; i++) { + struct btrfs_io_stripe *old = &bioc->stripes[i]; + struct btrfs_io_stripe *new = &bioc->stripes[num_stripes + nr_extra_stripes]; - /* - * duplicate the write operations while the dev replace - * procedure is running. Since the copying of the old disk to - * the new disk takes place at run time while the filesystem is - * mounted writable, the regular write operations to the old - * disk have to be duplicated to go to the new disk as well. - * - * Note that device->missing is handled by the caller, and that - * the write to the old disk is already set up in the stripes - * array. - */ - index_where_to_add = num_stripes; - for (i = 0; i < num_stripes; i++) { - if (bioc->stripes[i].dev->devid == srcdev_devid) { - /* write to new disk, too */ - struct btrfs_io_stripe *new = - bioc->stripes + index_where_to_add; - struct btrfs_io_stripe *old = - bioc->stripes + i; - - new->physical = old->physical; - new->dev = dev_replace->tgtdev; - bioc->tgtdev_map[i] = index_where_to_add; - index_where_to_add++; - max_errors++; - tgtdev_indexes++; - } - } - num_stripes = index_where_to_add; - } else if (op == BTRFS_MAP_GET_READ_MIRRORS) { - int index_srcdev = 0; - int found = 0; - u64 physical_of_found = 0; + if (old->dev->devid != srcdev_devid) + continue; - /* - * During the dev-replace procedure, the target drive can also - * be used to read data in case it is needed to repair a corrupt - * block elsewhere. This is possible if the requested area is - * left of the left cursor. In this area, the target drive is a - * full copy of the source drive. - */ - for (i = 0; i < num_stripes; i++) { - if (bioc->stripes[i].dev->devid == srcdev_devid) { - /* - * In case of DUP, in order to keep it simple, - * only add the mirror with the lowest physical - * address - */ - if (found && - physical_of_found <= bioc->stripes[i].physical) - continue; - index_srcdev = i; - found = 1; - physical_of_found = bioc->stripes[i].physical; - } - } - if (found) { - struct btrfs_io_stripe *tgtdev_stripe = - bioc->stripes + num_stripes; + new->physical = old->physical; + new->dev = dev_replace->tgtdev; + if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) + bioc->replace_stripe_src = i; + nr_extra_stripes++; + } - tgtdev_stripe->physical = physical_of_found; - tgtdev_stripe->dev = dev_replace->tgtdev; - bioc->tgtdev_map[index_srcdev] = num_stripes; + /* We can only have at most 2 extra nr_stripes (for DUP). */ + ASSERT(nr_extra_stripes <= 2); + /* + * For GET_READ_MIRRORS, we can only return at most 1 extra stripe for + * replace. + * If we have 2 extra stripes, only choose the one with smaller physical. + */ + if (op == BTRFS_MAP_GET_READ_MIRRORS && nr_extra_stripes == 2) { + struct btrfs_io_stripe *first = &bioc->stripes[num_stripes]; + struct btrfs_io_stripe *second = &bioc->stripes[num_stripes + 1]; - tgtdev_indexes++; - num_stripes++; + /* Only DUP can have two extra stripes. */ + ASSERT(bioc->map_type & BTRFS_BLOCK_GROUP_DUP); + + /* + * Swap the last stripe stripes and reduce @nr_extra_stripes. + * The extra stripe would still be there, but won't be accessed. + */ + if (first->physical > second->physical) { + swap(second->physical, first->physical); + swap(second->dev, first->dev); + nr_extra_stripes--; } } - *num_stripes_ret = num_stripes; - *max_errors_ret = max_errors; - bioc->num_tgtdevs = tgtdev_indexes; + *num_stripes_ret = num_stripes + nr_extra_stripes; + *max_errors_ret = max_errors + nr_extra_stripes; + bioc->replace_nr_stripes = nr_extra_stripes; } static bool need_full_stripe(enum btrfs_map_op op) @@ -6377,7 +6355,6 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, int dev_replace_is_ongoing = 0; int patch_the_first_stripe_for_dev_replace = 0; u16 num_alloc_stripes; - u16 tgtdev_indexes = 0; u64 physical_to_patch_in_first_stripe = 0; u64 raid56_full_stripe_start = (u64)-1; u64 max_len; @@ -6523,13 +6500,16 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, } num_alloc_stripes = num_stripes; - if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) { - if (op == BTRFS_MAP_WRITE) - num_alloc_stripes <<= 1; - if (op == BTRFS_MAP_GET_READ_MIRRORS) - num_alloc_stripes++; - tgtdev_indexes = num_stripes; - } + if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL && + op != BTRFS_MAP_READ) + /* + * For replace case, we need to add extra stripes for extra + * duplicated stripes. + * + * For both WRITE and GET_READ_MIRRORS, we may have at most + * 2 more stripes (DUP types, otherwise 1). + */ + num_alloc_stripes += 2; /* * If this I/O maps to a single device, try to return the device and @@ -6554,11 +6534,12 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, goto out; } - bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes, tgtdev_indexes); + bioc = alloc_btrfs_io_context(fs_info, num_alloc_stripes); if (!bioc) { ret = -ENOMEM; goto out; } + bioc->map_type = map->type; for (i = 0; i < num_stripes; i++) { set_io_stripe(&bioc->stripes[i], map, stripe_index, stripe_offset, @@ -6599,7 +6580,6 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, } *bioc_ret = bioc; - bioc->map_type = map->type; bioc->num_stripes = num_stripes; bioc->max_errors = max_errors; bioc->mirror_num = mirror_num; diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index da0f9a9eaf94..e86e9f25ba0f 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -427,14 +427,13 @@ struct btrfs_io_context { /* * The following two members are for dev-replace case only. * - * @num_tgtdevs: Number of duplicated stripes which need to be + * @replace_nr_stripes: Number of duplicated stripes which need to be * written to replace target. * Should be <= 2 (2 for DUP, otherwise <= 1). - * @tgtdev_map: The array indicates where the duplicated stripes - * are from. The size is the number of original - * stripes (num_stripes - num_tgtdevs). + * @replace_stripe_src: The array indicates where the duplicated stripes + * are from. * - * The @tgtdev_map[] array is mostly for RAID56 cases. + * The @replace_stripe_src[] array is mostly for RAID56 cases. * As non-RAID56 stripes share the same contents of the mapped range, * thus no need to bother where the duplicated ones are from. * @@ -449,14 +448,17 @@ struct btrfs_io_context { * stripes[2]: dev = devid 3, physical = Z * stripes[3]: dev = devid 0, physical = Y * - * num_tgtdevs = 1 - * tgtdev_map[0] = 0 <- Means stripes[0] is not involved in replace. - * tgtdev_map[1] = 3 <- Means stripes[1] is involved in replace, - * and it's duplicated to stripes[3]. - * tgtdev_map[2] = 0 <- Means stripes[2] is not involved in replace. + * replace_nr_stripes = 1 + * replace_stripe_src = 1 <- Means stripes[1] is involved in replace. + * The duplicated stripe index would be + * (@num_stripes - 1). + * + * Note, that we can still have cases replace_nr_stripes = 2 for DUP. + * In that case, all stripes share the same content, thus we don't + * need to bother @replace_stripe_src value at all. */ - u16 num_tgtdevs; - u16 *tgtdev_map; + u16 replace_nr_stripes; + s16 replace_stripe_src; /* * logical block numbers for the start of each stripe * The last one or two are p/q. These are sorted, -- cgit v1.2.3 From 18d758a2d81a97b9a54a37d535870ce3170cc208 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 17 Feb 2023 13:37:03 +0800 Subject: btrfs: replace btrfs_io_context::raid_map with a fixed u64 value In btrfs_io_context structure, we have a pointer raid_map, which indicates the logical bytenr for each stripe. But considering we always call sort_parity_stripes(), the result raid_map[] is always sorted, thus raid_map[0] is always the logical bytenr of the full stripe. So why we waste the space and time (for sorting) for raid_map? This patch will replace btrfs_io_context::raid_map with a single u64 number, full_stripe_start, by: - Replace btrfs_io_context::raid_map with full_stripe_start - Replace call sites using raid_map[0] to use full_stripe_start - Replace call sites using raid_map[i] to compare with nr_data_stripes. The benefits are: - Less memory wasted on raid_map It's sizeof(u64) * num_stripes vs sizeof(u64). It'll always save at least one u64, and the benefit grows larger with num_stripes. - No more weird alloc_btrfs_io_context() behavior As there is only one fixed size + one variable length array. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 31 ++++++++-------- fs/btrfs/scrub.c | 25 +++++++------ fs/btrfs/volumes.c | 84 +++++++++++++++++--------------------------- fs/btrfs/volumes.h | 19 +++++++--- include/trace/events/btrfs.h | 2 +- 5 files changed, 78 insertions(+), 83 deletions(-) (limited to 'fs/btrfs/raid56.c') diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 0ac1fc7896dd..6cbbaa6c06ca 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -202,7 +202,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio) */ static int rbio_bucket(struct btrfs_raid_bio *rbio) { - u64 num = rbio->bioc->raid_map[0]; + u64 num = rbio->bioc->full_stripe_logical; /* * we shift down quite a bit. We're using byte @@ -567,7 +567,7 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, test_bit(RBIO_CACHE_BIT, &cur->flags)) return 0; - if (last->bioc->raid_map[0] != cur->bioc->raid_map[0]) + if (last->bioc->full_stripe_logical != cur->bioc->full_stripe_logical) return 0; /* we can't merge with different operations */ @@ -661,7 +661,7 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) spin_lock(&h->lock); list_for_each_entry(cur, &h->hash_list, hash_list) { - if (cur->bioc->raid_map[0] != rbio->bioc->raid_map[0]) + if (cur->bioc->full_stripe_logical != rbio->bioc->full_stripe_logical) continue; spin_lock(&cur->bio_list_lock); @@ -1113,7 +1113,7 @@ static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio) struct bio_vec bvec; struct bvec_iter iter; u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - - rbio->bioc->raid_map[0]; + rbio->bioc->full_stripe_logical; bio_for_each_segment(bvec, bio, iter) { u32 bvec_offset; @@ -1337,7 +1337,7 @@ static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) - - rbio->bioc->raid_map[0]; + rbio->bioc->full_stripe_logical; int total_nr_sector = offset >> fs_info->sectorsize_bits; ASSERT(total_nr_sector < rbio->nr_data * rbio->stripe_nsectors); @@ -1614,7 +1614,7 @@ static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio) { const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT; - const u64 full_stripe_start = rbio->bioc->raid_map[0]; + const u64 full_stripe_start = rbio->bioc->full_stripe_logical; const u32 orig_len = orig_bio->bi_iter.bi_size; const u32 sectorsize = fs_info->sectorsize; u64 cur_logical; @@ -1801,9 +1801,8 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, * here due to a crc mismatch and we can't give them the * data they want. */ - if (rbio->bioc->raid_map[failb] == RAID6_Q_STRIPE) { - if (rbio->bioc->raid_map[faila] == - RAID5_P_STRIPE) + if (failb == rbio->real_stripes - 1) { + if (faila == rbio->real_stripes - 2) /* * Only P and Q are corrupted. * We only care about data stripes recovery, @@ -1817,7 +1816,7 @@ static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr, goto pstripe; } - if (rbio->bioc->raid_map[failb] == RAID5_P_STRIPE) { + if (failb == rbio->real_stripes - 2) { raid6_datap_recov(rbio->real_stripes, sectorsize, faila, pointers); } else { @@ -2080,8 +2079,8 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio) { struct btrfs_fs_info *fs_info = rbio->bioc->fs_info; struct btrfs_root *csum_root = btrfs_csum_root(fs_info, - rbio->bioc->raid_map[0]); - const u64 start = rbio->bioc->raid_map[0]; + rbio->bioc->full_stripe_logical); + const u64 start = rbio->bioc->full_stripe_logical; const u32 len = (rbio->nr_data * rbio->stripe_nsectors) << fs_info->sectorsize_bits; int ret; @@ -2129,7 +2128,7 @@ error: */ btrfs_warn_rl(fs_info, "sub-stripe write for full stripe %llu is not safe, failed to get csum: %d", - rbio->bioc->raid_map[0], ret); + rbio->bioc->full_stripe_logical, ret); no_csum: kfree(rbio->csum_buf); bitmap_free(rbio->csum_bitmap); @@ -2385,10 +2384,10 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, int stripe_offset; int index; - ASSERT(logical >= rbio->bioc->raid_map[0]); - ASSERT(logical + sectorsize <= rbio->bioc->raid_map[0] + + ASSERT(logical >= rbio->bioc->full_stripe_logical); + ASSERT(logical + sectorsize <= rbio->bioc->full_stripe_logical + BTRFS_STRIPE_LEN * rbio->nr_data); - stripe_offset = (int)(logical - rbio->bioc->raid_map[0]); + stripe_offset = (int)(logical - rbio->bioc->full_stripe_logical); index = stripe_offset / sectorsize; rbio->bio_sectors[index].page = page; rbio->bio_sectors[index].pgoff = pgoff; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 64b52be6bf0b..91aeac36ebc9 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -1430,7 +1430,7 @@ static inline int scrub_nr_raid_mirrors(struct btrfs_io_context *bioc) } static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type, - u64 *raid_map, + u64 full_stripe_logical, int nstripes, int mirror, int *stripe_index, u64 *stripe_offset) @@ -1438,19 +1438,22 @@ static inline void scrub_stripe_index_and_offset(u64 logical, u64 map_type, int i; if (map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { + const int nr_data_stripes = (map_type & BTRFS_BLOCK_GROUP_RAID5) ? + nstripes - 1 : nstripes - 2; + /* RAID5/6 */ - for (i = 0; i < nstripes; i++) { - if (raid_map[i] == RAID6_Q_STRIPE || - raid_map[i] == RAID5_P_STRIPE) - continue; + for (i = 0; i < nr_data_stripes; i++) { + const u64 data_stripe_start = full_stripe_logical + + (i * BTRFS_STRIPE_LEN); - if (logical >= raid_map[i] && - logical < raid_map[i] + BTRFS_STRIPE_LEN) + if (logical >= data_stripe_start && + logical < data_stripe_start + BTRFS_STRIPE_LEN) break; } *stripe_index = i; - *stripe_offset = logical - raid_map[i]; + *stripe_offset = (logical - full_stripe_logical) & + BTRFS_STRIPE_LEN_MASK; } else { /* The other RAID type */ *stripe_index = mirror; @@ -1538,7 +1541,7 @@ static int scrub_setup_recheck_block(struct scrub_block *original_sblock, scrub_stripe_index_and_offset(logical, bioc->map_type, - bioc->raid_map, + bioc->full_stripe_logical, bioc->num_stripes - bioc->replace_nr_stripes, mirror_index, @@ -2398,7 +2401,7 @@ static void scrub_missing_raid56_pages(struct scrub_block *sblock) btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, &length, &bioc); - if (ret || !bioc || !bioc->raid_map) + if (ret || !bioc) goto bioc_out; if (WARN_ON(!sctx->is_dev_replace || @@ -3007,7 +3010,7 @@ static void scrub_parity_check_and_repair(struct scrub_parity *sparity) btrfs_bio_counter_inc_blocked(fs_info); ret = btrfs_map_sblock(fs_info, BTRFS_MAP_WRITE, sparity->logic_start, &length, &bioc); - if (ret || !bioc || !bioc->raid_map) + if (ret || !bioc) goto bioc_out; bio = bio_alloc(NULL, BIO_MAX_VECS, REQ_OP_READ, GFP_NOFS); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8f06f0e47ba8..b7e1d7dc4509 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5894,25 +5894,6 @@ static int find_live_mirror(struct btrfs_fs_info *fs_info, return preferred_mirror; } -/* Bubble-sort the stripe set to put the parity/syndrome stripes last */ -static void sort_parity_stripes(struct btrfs_io_context *bioc, int num_stripes) -{ - int i; - int again = 1; - - while (again) { - again = 0; - for (i = 0; i < num_stripes - 1; i++) { - /* Swap if parity is on a smaller index */ - if (bioc->raid_map[i] > bioc->raid_map[i + 1]) { - swap(bioc->stripes[i], bioc->stripes[i + 1]); - swap(bioc->raid_map[i], bioc->raid_map[i + 1]); - again = 1; - } - } - } -} - static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info, u16 total_stripes) { @@ -5922,12 +5903,7 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ /* The size of btrfs_io_context */ sizeof(struct btrfs_io_context) + /* Plus the variable array for the stripes */ - sizeof(struct btrfs_io_stripe) * (total_stripes) + - /* - * Plus the raid_map, which includes both the tgt dev - * and the stripes. - */ - sizeof(u64) * (total_stripes), + sizeof(struct btrfs_io_stripe) * (total_stripes), GFP_NOFS); if (!bioc) @@ -5936,8 +5912,8 @@ static struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_ refcount_set(&bioc->refs, 1); bioc->fs_info = fs_info; - bioc->raid_map = (u64 *)(bioc->stripes + total_stripes); bioc->replace_stripe_src = -1; + bioc->full_stripe_logical = (u64)-1; return bioc; } @@ -6541,33 +6517,39 @@ int __btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, } bioc->map_type = map->type; - for (i = 0; i < num_stripes; i++) { - set_io_stripe(&bioc->stripes[i], map, stripe_index, stripe_offset, - stripe_nr); - stripe_index++; - } - - /* Build raid_map */ + /* + * For RAID56 full map, we need to make sure the stripes[] follows the + * rule that data stripes are all ordered, then followed with P and Q + * (if we have). + * + * It's still mostly the same as other profiles, just with extra rotation. + */ if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map && (need_full_stripe(op) || mirror_num > 1)) { - u64 tmp; - unsigned rot; - - /* Work out the disk rotation on this stripe-set */ - rot = stripe_nr % num_stripes; - - /* Fill in the logical address of each stripe */ - tmp = stripe_nr * data_stripes; - for (i = 0; i < data_stripes; i++) - bioc->raid_map[(i + rot) % num_stripes] = - em->start + ((tmp + i) << BTRFS_STRIPE_LEN_SHIFT); - - bioc->raid_map[(i + rot) % map->num_stripes] = RAID5_P_STRIPE; - if (map->type & BTRFS_BLOCK_GROUP_RAID6) - bioc->raid_map[(i + rot + 1) % num_stripes] = - RAID6_Q_STRIPE; - - sort_parity_stripes(bioc, num_stripes); + /* + * For RAID56 @stripe_nr is already the number of full stripes + * before us, which is also the rotation value (needs to modulo + * with num_stripes). + * + * In this case, we just add @stripe_nr with @i, then do the + * modulo, to reduce one modulo call. + */ + bioc->full_stripe_logical = em->start + + ((stripe_nr * data_stripes) << BTRFS_STRIPE_LEN_SHIFT); + for (i = 0; i < num_stripes; i++) + set_io_stripe(&bioc->stripes[i], map, + (i + stripe_nr) % num_stripes, + stripe_offset, stripe_nr); + } else { + /* + * For all other non-RAID56 profiles, just copy the target + * stripe into the bioc. + */ + for (i = 0; i < num_stripes; i++) { + set_io_stripe(&bioc->stripes[i], map, stripe_index, + stripe_offset, stripe_nr); + stripe_index++; + } } if (need_full_stripe(op)) diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index e86e9f25ba0f..650e131d079e 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -460,11 +460,22 @@ struct btrfs_io_context { u16 replace_nr_stripes; s16 replace_stripe_src; /* - * logical block numbers for the start of each stripe - * The last one or two are p/q. These are sorted, - * so raid_map[0] is the start of our full stripe + * Logical bytenr of the full stripe start, only for RAID56 cases. + * + * When this value is set to other than (u64)-1, the stripes[] should + * follow this pattern: + * + * (real_stripes = num_stripes - replace_nr_stripes) + * (data_stripes = (is_raid6) ? (real_stripes - 2) : (real_stripes - 1)) + * + * stripes[0]: The first data stripe + * stripes[1]: The second data stripe + * ... + * stripes[data_stripes - 1]: The last data stripe + * stripes[data_stripes]: The P stripe + * stripes[data_stripes + 1]: The Q stripe (only for RAID6). */ - u64 *raid_map; + u64 full_stripe_logical; struct btrfs_io_stripe stripes[]; }; diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index 75d7d22c3a27..8ea9cea9bfeb 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -2422,7 +2422,7 @@ DECLARE_EVENT_CLASS(btrfs_raid56_bio, ), TP_fast_assign_btrfs(rbio->bioc->fs_info, - __entry->full_stripe = rbio->bioc->raid_map[0]; + __entry->full_stripe = rbio->bioc->full_stripe_logical; __entry->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT; __entry->len = bio->bi_iter.bi_size; __entry->opf = bio_op(bio); -- cgit v1.2.3 From cf32e41fa5f4fb88e4b4acf9cbf3acf9ac362553 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 30 Mar 2023 03:43:51 -0700 Subject: btrfs: use __bio_add_page to add single a page in rbio_add_io_sector The btrfs raid56 sector submission code uses bio_add_page() to add a page to a newly created bio. bio_add_page() can fail, but the return value is never checked. Use __bio_add_page() as adding a single page to a newly created bio is guaranteed to succeed. This brings us a step closer to marking bio_add_page() as __must_check. Reviewed-by: Damien Le Moal Signed-off-by: Johannes Thumshirn Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/raid56.c') diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index 6cbbaa6c06ca..f4651b60b9e2 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -1102,7 +1102,7 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio, bio->bi_iter.bi_sector = disk_start >> 9; bio->bi_private = rbio; - bio_add_page(bio, sector->page, sectorsize, sector->pgoff); + __bio_add_page(bio, sector->page, sectorsize, sector->pgoff); bio_list_add(bio_list, bio); return 0; } -- cgit v1.2.3 From b979547513ff060ebe4a381b69d8478b18e1cc4e Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Mon, 20 Mar 2023 10:12:51 +0800 Subject: btrfs: scrub: introduce helper to find and fill sector info for a scrub_stripe The new helper will search the extent tree to find the first extent of a logical range, then fill the sectors array by two loops: - Loop 1 to fill common bits and metadata generation - Loop 2 to fill csum data (only for data bgs) This loop will use the new btrfs_lookup_csums_bitmap() to fill the full csum buffer, and set scrub_sector_verification::csum. With all the needed info filled by this function, later we only need to submit and verify the stripe. Here we temporarily export the helper to avoid warning on unused static function. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/file-item.c | 9 +++- fs/btrfs/file-item.h | 3 +- fs/btrfs/raid56.c | 2 +- fs/btrfs/scrub.c | 143 +++++++++++++++++++++++++++++++++++++++++++++++++++ fs/btrfs/scrub.h | 4 ++ 5 files changed, 158 insertions(+), 3 deletions(-) (limited to 'fs/btrfs/raid56.c') diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 1ce306cea690..018c711a0bc8 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -597,7 +597,8 @@ fail: * in is large enough to contain all csums. */ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end, - u8 *csum_buf, unsigned long *csum_bitmap) + u8 *csum_buf, unsigned long *csum_bitmap, + bool search_commit) { struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_key key; @@ -614,6 +615,12 @@ int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end, if (!path) return -ENOMEM; + if (search_commit) { + path->skip_locking = 1; + path->reada = READA_FORWARD; + path->search_commit_root = 1; + } + key.objectid = BTRFS_EXTENT_CSUM_OBJECTID; key.type = BTRFS_EXTENT_CSUM_KEY; key.offset = start; diff --git a/fs/btrfs/file-item.h b/fs/btrfs/file-item.h index cd7f2ae515c0..6be8725cd574 100644 --- a/fs/btrfs/file-item.h +++ b/fs/btrfs/file-item.h @@ -57,7 +57,8 @@ int btrfs_lookup_csums_list(struct btrfs_root *root, u64 start, u64 end, struct list_head *list, int search_commit, bool nowait); int btrfs_lookup_csums_bitmap(struct btrfs_root *root, u64 start, u64 end, - u8 *csum_buf, unsigned long *csum_bitmap); + u8 *csum_buf, unsigned long *csum_bitmap, + bool search_commit); void btrfs_extent_item_to_extent_map(struct btrfs_inode *inode, const struct btrfs_path *path, struct btrfs_file_extent_item *fi, diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index f4651b60b9e2..ed6343f566d4 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2113,7 +2113,7 @@ static void fill_data_csums(struct btrfs_raid_bio *rbio) } ret = btrfs_lookup_csums_bitmap(csum_root, start, start + len - 1, - rbio->csum_buf, rbio->csum_bitmap); + rbio->csum_buf, rbio->csum_bitmap, false); if (ret < 0) goto error; if (bitmap_empty(rbio->csum_bitmap, len >> fs_info->sectorsize_bits)) diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 26763113b19b..56f8c54102b0 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3642,6 +3642,149 @@ static int sync_write_pointer_for_zoned(struct scrub_ctx *sctx, u64 logical, return ret; } +static void fill_one_extent_info(struct btrfs_fs_info *fs_info, + struct scrub_stripe *stripe, + u64 extent_start, u64 extent_len, + u64 extent_flags, u64 extent_gen) +{ + for (u64 cur_logical = max(stripe->logical, extent_start); + cur_logical < min(stripe->logical + BTRFS_STRIPE_LEN, + extent_start + extent_len); + cur_logical += fs_info->sectorsize) { + const int nr_sector = (cur_logical - stripe->logical) >> + fs_info->sectorsize_bits; + struct scrub_sector_verification *sector = + &stripe->sectors[nr_sector]; + + set_bit(nr_sector, &stripe->extent_sector_bitmap); + if (extent_flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { + sector->is_metadata = true; + sector->generation = extent_gen; + } + } +} + +static void scrub_stripe_reset_bitmaps(struct scrub_stripe *stripe) +{ + stripe->extent_sector_bitmap = 0; + stripe->init_error_bitmap = 0; + stripe->error_bitmap = 0; + stripe->io_error_bitmap = 0; + stripe->csum_error_bitmap = 0; + stripe->meta_error_bitmap = 0; +} + +/* + * Locate one stripe which has at least one extent in its range. + * + * Return 0 if found such stripe, and store its info into @stripe. + * Return >0 if there is no such stripe in the specified range. + * Return <0 for error. + */ +int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, + struct btrfs_device *dev, u64 physical, + int mirror_num, u64 logical_start, + u32 logical_len, struct scrub_stripe *stripe) +{ + struct btrfs_fs_info *fs_info = bg->fs_info; + struct btrfs_root *extent_root = btrfs_extent_root(fs_info, bg->start); + struct btrfs_root *csum_root = btrfs_csum_root(fs_info, bg->start); + const u64 logical_end = logical_start + logical_len; + struct btrfs_path path = { 0 }; + u64 cur_logical = logical_start; + u64 stripe_end; + u64 extent_start; + u64 extent_len; + u64 extent_flags; + u64 extent_gen; + int ret; + + memset(stripe->sectors, 0, sizeof(struct scrub_sector_verification) * + stripe->nr_sectors); + scrub_stripe_reset_bitmaps(stripe); + + /* The range must be inside the bg. */ + ASSERT(logical_start >= bg->start && logical_end <= bg->start + bg->length); + + path.search_commit_root = 1; + path.skip_locking = 1; + + ret = find_first_extent_item(extent_root, &path, logical_start, logical_len); + /* Either error or not found. */ + if (ret) + goto out; + get_extent_info(&path, &extent_start, &extent_len, &extent_flags, &extent_gen); + cur_logical = max(extent_start, cur_logical); + + /* + * Round down to stripe boundary. + * + * The extra calculation against bg->start is to handle block groups + * whose logical bytenr is not BTRFS_STRIPE_LEN aligned. + */ + stripe->logical = round_down(cur_logical - bg->start, BTRFS_STRIPE_LEN) + + bg->start; + stripe->physical = physical + stripe->logical - logical_start; + stripe->dev = dev; + stripe->bg = bg; + stripe->mirror_num = mirror_num; + stripe_end = stripe->logical + BTRFS_STRIPE_LEN - 1; + + /* Fill the first extent info into stripe->sectors[] array. */ + fill_one_extent_info(fs_info, stripe, extent_start, extent_len, + extent_flags, extent_gen); + cur_logical = extent_start + extent_len; + + /* Fill the extent info for the remaining sectors. */ + while (cur_logical <= stripe_end) { + ret = find_first_extent_item(extent_root, &path, cur_logical, + stripe_end - cur_logical + 1); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + break; + } + get_extent_info(&path, &extent_start, &extent_len, + &extent_flags, &extent_gen); + fill_one_extent_info(fs_info, stripe, extent_start, extent_len, + extent_flags, extent_gen); + cur_logical = extent_start + extent_len; + } + + /* Now fill the data csum. */ + if (bg->flags & BTRFS_BLOCK_GROUP_DATA) { + int sector_nr; + unsigned long csum_bitmap = 0; + + /* Csum space should have already been allocated. */ + ASSERT(stripe->csums); + + /* + * Our csum bitmap should be large enough, as BTRFS_STRIPE_LEN + * should contain at most 16 sectors. + */ + ASSERT(BITS_PER_LONG >= BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits); + + ret = btrfs_lookup_csums_bitmap(csum_root, stripe->logical, + stripe_end, stripe->csums, + &csum_bitmap, true); + if (ret < 0) + goto out; + if (ret > 0) + ret = 0; + + for_each_set_bit(sector_nr, &csum_bitmap, stripe->nr_sectors) { + stripe->sectors[sector_nr].csum = stripe->csums + + sector_nr * fs_info->csum_size; + } + } + set_bit(SCRUB_STRIPE_FLAG_INITIALIZED, &stripe->state); +out: + btrfs_release_path(&path); + return ret; +} + /* * Scrub one range which can only has simple mirror based profile. * (Including all range in SINGLE/DUP/RAID1/RAID1C*, and each stripe in diff --git a/fs/btrfs/scrub.h b/fs/btrfs/scrub.h index e04764f8bb7e..27019d86b539 100644 --- a/fs/btrfs/scrub.h +++ b/fs/btrfs/scrub.h @@ -20,5 +20,9 @@ int btrfs_scrub_progress(struct btrfs_fs_info *fs_info, u64 devid, struct scrub_stripe; int init_scrub_stripe(struct btrfs_fs_info *fs_info, struct scrub_stripe *stripe); void wait_scrub_stripe_io(struct scrub_stripe *stripe); +int scrub_find_fill_first_stripe(struct btrfs_block_group *bg, + struct btrfs_device *dev, u64 physical, + int mirror_num, u64 logical_start, + u32 logical_len, struct scrub_stripe *stripe); #endif -- cgit v1.2.3 From aca43fe839e4b227ef7028305586429af69b0bcd Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Wed, 12 Apr 2023 14:47:50 +0800 Subject: btrfs: remove unused raid56 functions which were dedicated for scrub Since the scrub rework, the following RAID56 functions are no longer called: - raid56_add_scrub_pages() - raid56_alloc_missing_rbio() - raid56_submit_missing_rbio() Those functions are all utilized by scrub to handle missing device cases for RAID56. However the new scrub code handle them in a completely different way: - If it's data stripe, go recovery path through btrfs_submit_bio() - If it's P/Q stripe, it would be handled through raid56_parity_submit_scrub_rbio() And that function would handle dev-replace and repair properly. Thus we can safely remove those functions. Signed-off-by: Qu Wenruo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/raid56.c | 47 ----------------------------------------------- fs/btrfs/raid56.h | 7 ------- 2 files changed, 54 deletions(-) (limited to 'fs/btrfs/raid56.c') diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index ed6343f566d4..2fab37f062de 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -2376,23 +2376,6 @@ struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio, return rbio; } -/* Used for both parity scrub and missing. */ -void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, - unsigned int pgoff, u64 logical) -{ - const u32 sectorsize = rbio->bioc->fs_info->sectorsize; - int stripe_offset; - int index; - - ASSERT(logical >= rbio->bioc->full_stripe_logical); - ASSERT(logical + sectorsize <= rbio->bioc->full_stripe_logical + - BTRFS_STRIPE_LEN * rbio->nr_data); - stripe_offset = (int)(logical - rbio->bioc->full_stripe_logical); - index = stripe_offset / sectorsize; - rbio->bio_sectors[index].page = page; - rbio->bio_sectors[index].pgoff = pgoff; -} - /* * We just scrub the parity that we have correct data on the same horizontal, * so we needn't allocate all pages for all the stripes. @@ -2764,33 +2747,3 @@ void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) if (!lock_stripe_add(rbio)) start_async_work(rbio, scrub_rbio_work_locked); } - -/* The following code is used for dev replace of a missing RAID 5/6 device. */ - -struct btrfs_raid_bio * -raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc) -{ - struct btrfs_fs_info *fs_info = bioc->fs_info; - struct btrfs_raid_bio *rbio; - - rbio = alloc_rbio(fs_info, bioc); - if (IS_ERR(rbio)) - return NULL; - - rbio->operation = BTRFS_RBIO_REBUILD_MISSING; - bio_list_add(&rbio->bio_list, bio); - /* - * This is a special bio which is used to hold the completion handler - * and make the scrub rbio is similar to the other types - */ - ASSERT(!bio->bi_iter.bi_size); - - set_rbio_range_error(rbio, bio); - - return rbio; -} - -void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio) -{ - start_async_work(rbio, recover_rbio_work); -} diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h index 6583c225b1bd..0f7f31c8cb98 100644 --- a/fs/btrfs/raid56.h +++ b/fs/btrfs/raid56.h @@ -187,19 +187,12 @@ void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc, int mirror_num); void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc); -void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, - unsigned int pgoff, u64 logical); - struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio, struct btrfs_io_context *bioc, struct btrfs_device *scrub_dev, unsigned long *dbitmap, int stripe_nsectors); void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio); -struct btrfs_raid_bio * -raid56_alloc_missing_rbio(struct bio *bio, struct btrfs_io_context *bioc); -void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio); - int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info); void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info); -- cgit v1.2.3