summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/m68k/emu/nfblock.c2
-rw-r--r--block/bio-integrity.c114
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-core.c19
-rw-r--r--block/blk-lib.c210
-rw-r--r--block/blk-map.c2
-rw-r--r--block/blk-merge.c25
-rw-r--r--block/blk-mq.c32
-rw-r--r--block/blk-settings.c51
-rw-r--r--block/blk-sysfs.c395
-rw-r--r--block/blk-throttle.c3
-rw-r--r--block/blk-wbt.c18
-rw-r--r--block/blk-zoned.c88
-rw-r--r--block/blk.h13
-rw-r--r--block/elevator.c9
-rw-r--r--block/elevator.h4
-rw-r--r--block/fops.c5
-rw-r--r--block/genhd.c2
-rw-r--r--block/ioctl.c2
-rw-r--r--block/mq-deadline.c20
-rw-r--r--block/t10-pi.c97
-rw-r--r--drivers/ata/libata-scsi.c3
-rw-r--r--drivers/ata/pata_macio.c4
-rw-r--r--drivers/block/loop.c92
-rw-r--r--drivers/block/null_blk/main.c13
-rw-r--r--drivers/block/null_blk/null_blk.h1
-rw-r--r--drivers/block/null_blk/zoned.c12
-rw-r--r--drivers/block/rnbd/rnbd-clt.c2
-rw-r--r--drivers/block/ublk_drv.c2
-rw-r--r--drivers/block/virtio_blk.c2
-rw-r--r--drivers/block/xen-blkfront.c16
-rw-r--r--drivers/md/bcache/super.c4
-rw-r--r--drivers/md/dm-zone.c50
-rw-r--r--drivers/md/dm.c159
-rw-r--r--drivers/md/dm.h3
-rw-r--r--drivers/md/md-cluster.c2
-rw-r--r--drivers/md/md.c26
-rw-r--r--drivers/md/md.h5
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/md/raid1.c2
-rw-r--r--drivers/md/raid10.c2
-rw-r--r--drivers/md/raid5.c78
-rw-r--r--drivers/nvme/host/core.c3
-rw-r--r--drivers/nvme/host/pci.c6
-rw-r--r--drivers/nvme/host/zns.c2
-rw-r--r--drivers/scsi/megaraid/megaraid_sas_base.c2
-rw-r--r--drivers/scsi/mpt3sas/mpt3sas_scsih.c6
-rw-r--r--drivers/scsi/scsi_lib.c4
-rw-r--r--drivers/scsi/sd_zbc.c2
-rw-r--r--drivers/ufs/core/ufshcd.c10
-rw-r--r--include/linux/blk-integrity.h25
-rw-r--r--include/linux/blkdev.h108
-rw-r--r--include/linux/bvec.h14
-rw-r--r--include/linux/device-mapper.h7
-rw-r--r--include/linux/t10-pi.h10
-rw-r--r--include/trace/events/block.h41
-rw-r--r--rust/bindings/bindings_helper.h2
57 files changed, 944 insertions, 891 deletions
diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 8eea7ef91151..874fe9588773 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -71,7 +71,7 @@ static void nfhd_submit_bio(struct bio *bio)
len = bvec.bv_len;
len >>= 9;
nfhd_read_write(dev->id, 0, dir, sec >> shift, len >> shift,
- page_to_phys(bvec.bv_page) + bvec.bv_offset);
+ bvec_phys(&bvec));
sec += len;
}
bio_endio(bio);
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 173ffd4d6237..0dc22985fed6 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -76,7 +76,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
&bip->bip_max_vcnt, gfp_mask);
if (!bip->bip_vec)
goto err;
- } else {
+ } else if (nr_vecs) {
bip->bip_vec = bip->bip_inline_vecs;
}
@@ -254,6 +254,7 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
bip->bip_flags |= BIP_INTEGRITY_USER | BIP_COPY_USER;
bip->bip_iter.bi_sector = seed;
+ bip->bip_vcnt = nr_vecs;
return 0;
free_bip:
bio_integrity_free(bio);
@@ -275,6 +276,7 @@ static int bio_integrity_init_user(struct bio *bio, struct bio_vec *bvec,
bip->bip_flags |= BIP_INTEGRITY_USER;
bip->bip_iter.bi_sector = seed;
bip->bip_iter.bi_size = len;
+ bip->bip_vcnt = nr_vecs;
return 0;
}
@@ -312,7 +314,7 @@ int bio_integrity_map_user(struct bio *bio, void __user *ubuf, ssize_t bytes,
u32 seed)
{
struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- unsigned int align = q->dma_pad_mask | queue_dma_alignment(q);
+ unsigned int align = blk_lim_dma_alignment_and_pad(&q->limits);
struct page *stack_pages[UIO_FASTIOV], **pages = stack_pages;
struct bio_vec stack_vec[UIO_FASTIOV], *bvec = stack_vec;
unsigned int direction, nr_bvecs;
@@ -375,44 +377,6 @@ free_bvec:
EXPORT_SYMBOL_GPL(bio_integrity_map_user);
/**
- * bio_integrity_process - Process integrity metadata for a bio
- * @bio: bio to generate/verify integrity metadata for
- * @proc_iter: iterator to process
- */
-static blk_status_t bio_integrity_process(struct bio *bio,
- struct bvec_iter *proc_iter)
-{
- struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
- struct blk_integrity_iter iter;
- struct bvec_iter bviter;
- struct bio_vec bv;
- struct bio_integrity_payload *bip = bio_integrity(bio);
- blk_status_t ret = BLK_STS_OK;
-
- iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
- iter.interval = 1 << bi->interval_exp;
- iter.seed = proc_iter->bi_sector;
- iter.prot_buf = bvec_virt(bip->bip_vec);
-
- __bio_for_each_segment(bv, bio, bviter, *proc_iter) {
- void *kaddr = bvec_kmap_local(&bv);
-
- iter.data_buf = kaddr;
- iter.data_size = bv.bv_len;
- if (bio_data_dir(bio) == WRITE)
- blk_integrity_generate(&iter, bi);
- else
- ret = blk_integrity_verify(&iter, bi);
- kunmap_local(kaddr);
-
- if (ret)
- break;
-
- }
- return ret;
-}
-
-/**
* bio_integrity_prep - Prepare bio for integrity I/O
* @bio: bio to prepare
*
@@ -428,18 +392,13 @@ bool bio_integrity_prep(struct bio *bio)
{
struct bio_integrity_payload *bip;
struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ unsigned int len;
void *buf;
- unsigned long start, end;
- unsigned int len, nr_pages;
- unsigned int bytes, offset, i;
gfp_t gfp = GFP_NOIO;
if (!bi)
return true;
- if (bio_op(bio) != REQ_OP_READ && bio_op(bio) != REQ_OP_WRITE)
- return true;
-
if (!bio_sectors(bio))
return true;
@@ -447,38 +406,36 @@ bool bio_integrity_prep(struct bio *bio)
if (bio_integrity(bio))
return true;
- if (bio_data_dir(bio) == READ) {
+ switch (bio_op(bio)) {
+ case REQ_OP_READ:
if (bi->flags & BLK_INTEGRITY_NOVERIFY)
return true;
- } else {
+ break;
+ case REQ_OP_WRITE:
if (bi->flags & BLK_INTEGRITY_NOGENERATE)
return true;
/*
* Zero the memory allocated to not leak uninitialized kernel
- * memory to disk. For PI this only affects the app tag, but
- * for non-integrity metadata it affects the entire metadata
- * buffer.
+ * memory to disk for non-integrity metadata where nothing else
+ * initializes the memory.
*/
- gfp |= __GFP_ZERO;
+ if (bi->csum_type == BLK_INTEGRITY_CSUM_NONE)
+ gfp |= __GFP_ZERO;
+ break;
+ default:
+ return true;
}
/* Allocate kernel buffer for protection data */
len = bio_integrity_bytes(bi, bio_sectors(bio));
buf = kmalloc(len, gfp);
if (unlikely(buf == NULL)) {
- printk(KERN_ERR "could not allocate integrity buffer\n");
goto err_end_io;
}
- end = (((unsigned long) buf) + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
- start = ((unsigned long) buf) >> PAGE_SHIFT;
- nr_pages = end - start;
-
- /* Allocate bio integrity payload and integrity vectors */
- bip = bio_integrity_alloc(bio, GFP_NOIO, nr_pages);
+ bip = bio_integrity_alloc(bio, GFP_NOIO, 1);
if (IS_ERR(bip)) {
- printk(KERN_ERR "could not allocate data integrity bioset\n");
kfree(buf);
goto err_end_io;
}
@@ -489,28 +446,15 @@ bool bio_integrity_prep(struct bio *bio)
if (bi->csum_type == BLK_INTEGRITY_CSUM_IP)
bip->bip_flags |= BIP_IP_CHECKSUM;
- /* Map it */
- offset = offset_in_page(buf);
- for (i = 0; i < nr_pages && len > 0; i++) {
- bytes = PAGE_SIZE - offset;
-
- if (bytes > len)
- bytes = len;
-
- if (bio_integrity_add_page(bio, virt_to_page(buf),
- bytes, offset) < bytes) {
- printk(KERN_ERR "could not attach integrity payload\n");
- goto err_end_io;
- }
-
- buf += bytes;
- len -= bytes;
- offset = 0;
+ if (bio_integrity_add_page(bio, virt_to_page(buf), len,
+ offset_in_page(buf)) < len) {
+ printk(KERN_ERR "could not attach integrity payload\n");
+ goto err_end_io;
}
/* Auto-generate integrity metadata if this is a write */
if (bio_data_dir(bio) == WRITE)
- bio_integrity_process(bio, &bio->bi_iter);
+ blk_integrity_generate(bio);
else
bip->bio_iter = bio->bi_iter;
return true;
@@ -536,12 +480,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
container_of(work, struct bio_integrity_payload, bip_work);
struct bio *bio = bip->bip_bio;
- /*
- * At the moment verify is called bio's iterator was advanced
- * during split and completion, we need to rewind iterator to
- * it's original position.
- */
- bio->bi_status = bio_integrity_process(bio, &bip->bio_iter);
+ blk_integrity_verify(bio);
bio_integrity_free(bio);
bio_endio(bio);
}
@@ -623,14 +562,11 @@ int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
BUG_ON(bip_src == NULL);
- bip = bio_integrity_alloc(bio, gfp_mask, bip_src->bip_vcnt);
+ bip = bio_integrity_alloc(bio, gfp_mask, 0);
if (IS_ERR(bip))
return PTR_ERR(bip);
- memcpy(bip->bip_vec, bip_src->bip_vec,
- bip_src->bip_vcnt * sizeof(struct bio_vec));
-
- bip->bip_vcnt = bip_src->bip_vcnt;
+ bip->bip_vec = bip_src->bip_vec;
bip->bip_iter = bip_src->bip_iter;
bip->bip_flags = bip_src->bip_flags & ~BIP_BLOCK_INTEGRITY;
diff --git a/block/bio.c b/block/bio.c
index e9e809a63c59..a3b1b2266c50 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -953,7 +953,7 @@ bool bvec_try_merge_hw_page(struct request_queue *q, struct bio_vec *bv,
bool *same_page)
{
unsigned long mask = queue_segment_boundary(q);
- phys_addr_t addr1 = page_to_phys(bv->bv_page) + bv->bv_offset;
+ phys_addr_t addr1 = bvec_phys(bv);
phys_addr_t addr2 = page_to_phys(page) + offset + len - 1;
if ((addr1 | mask) != (addr2 | mask))
diff --git a/block/blk-core.c b/block/blk-core.c
index 6fc1a5a1980d..02bceeb36f2c 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -94,20 +94,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q)
}
EXPORT_SYMBOL(blk_queue_flag_clear);
-/**
- * blk_queue_flag_test_and_set - atomically test and set a queue flag
- * @flag: flag to be set
- * @q: request queue
- *
- * Returns the previous value of @flag - 0 if the flag was not set and 1 if
- * the flag was already set.
- */
-bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q)
-{
- return test_and_set_bit(flag, &q->queue_flags);
-}
-EXPORT_SYMBOL_GPL(blk_queue_flag_test_and_set);
-
#define REQ_OP_NAME(name) [REQ_OP_##name] = #name
static const char *const blk_op_name[] = {
REQ_OP_NAME(READ),
@@ -844,11 +830,8 @@ void submit_bio_noacct(struct bio *bio)
case REQ_OP_ZONE_OPEN:
case REQ_OP_ZONE_CLOSE:
case REQ_OP_ZONE_FINISH:
- if (!bdev_is_zoned(bio->bi_bdev))
- goto not_supported;
- break;
case REQ_OP_ZONE_RESET_ALL:
- if (!bdev_is_zoned(bio->bi_bdev) || !blk_queue_zone_resetall(q))
+ if (!bdev_is_zoned(bio->bi_bdev))
goto not_supported;
break;
case REQ_OP_DRV_IN:
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 442da9dad042..9f735efa6c94 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -103,38 +103,71 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL(blkdev_issue_discard);
-static int __blkdev_issue_write_zeroes(struct block_device *bdev,
- sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
- struct bio **biop, unsigned flags)
+static sector_t bio_write_zeroes_limit(struct block_device *bdev)
{
- struct bio *bio = *biop;
- unsigned int max_sectors;
-
- if (bdev_read_only(bdev))
- return -EPERM;
-
- /* Ensure that max_sectors doesn't overflow bi_size */
- max_sectors = bdev_write_zeroes_sectors(bdev);
+ sector_t bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if (max_sectors == 0)
- return -EOPNOTSUPP;
+ return min(bdev_write_zeroes_sectors(bdev),
+ (UINT_MAX >> SECTOR_SHIFT) & ~bs_mask);
+}
+static void __blkdev_issue_write_zeroes(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+ struct bio **biop, unsigned flags)
+{
while (nr_sects) {
- unsigned int len = min_t(sector_t, nr_sects, max_sectors);
+ unsigned int len = min_t(sector_t, nr_sects,
+ bio_write_zeroes_limit(bdev));
+ struct bio *bio;
+
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current))
+ break;
- bio = blk_next_bio(bio, bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
+ bio = bio_alloc(bdev, 0, REQ_OP_WRITE_ZEROES, gfp_mask);
bio->bi_iter.bi_sector = sector;
if (flags & BLKDEV_ZERO_NOUNMAP)
bio->bi_opf |= REQ_NOUNMAP;
bio->bi_iter.bi_size = len << SECTOR_SHIFT;
+ *biop = bio_chain_and_submit(*biop, bio);
+
nr_sects -= len;
sector += len;
cond_resched();
}
+}
- *biop = bio;
- return 0;
+static int blkdev_issue_write_zeroes(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp, unsigned flags)
+{
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret = 0;
+
+ blk_start_plug(&plug);
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp, &bio, flags);
+ if (bio) {
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current)) {
+ bio_await_chain(bio);
+ blk_finish_plug(&plug);
+ return -EINTR;
+ }
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+
+ /*
+ * For some devices there is no non-destructive way to verify whether
+ * WRITE ZEROES is actually supported. These will clear the capability
+ * on an I/O error, in which case we'll turn any error into
+ * "not supported" here.
+ */
+ if (ret && !bdev_write_zeroes_sectors(bdev))
+ return -EOPNOTSUPP;
+ return ret;
}
/*
@@ -150,35 +183,63 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
return min(pages, (sector_t)BIO_MAX_VECS);
}
-static int __blkdev_issue_zero_pages(struct block_device *bdev,
+static void __blkdev_issue_zero_pages(struct block_device *bdev,
sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
- struct bio **biop)
+ struct bio **biop, unsigned int flags)
{
- struct bio *bio = *biop;
- int bi_size = 0;
- unsigned int sz;
-
- if (bdev_read_only(bdev))
- return -EPERM;
+ while (nr_sects) {
+ unsigned int nr_vecs = __blkdev_sectors_to_bio_pages(nr_sects);
+ struct bio *bio;
- while (nr_sects != 0) {
- bio = blk_next_bio(bio, bdev, __blkdev_sectors_to_bio_pages(nr_sects),
- REQ_OP_WRITE, gfp_mask);
+ bio = bio_alloc(bdev, nr_vecs, REQ_OP_WRITE, gfp_mask);
bio->bi_iter.bi_sector = sector;
- while (nr_sects != 0) {
- sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
- bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
- nr_sects -= bi_size >> 9;
- sector += bi_size >> 9;
- if (bi_size < sz)
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current))
+ break;
+
+ do {
+ unsigned int len, added;
+
+ len = min_t(sector_t,
+ PAGE_SIZE, nr_sects << SECTOR_SHIFT);
+ added = bio_add_page(bio, ZERO_PAGE(0), len, 0);
+ if (added < len)
break;
- }
+ nr_sects -= added >> SECTOR_SHIFT;
+ sector += added >> SECTOR_SHIFT;
+ } while (nr_sects);
+
+ *biop = bio_chain_and_submit(*biop, bio);
cond_resched();
}
+}
- *biop = bio;
- return 0;
+static int blkdev_issue_zero_pages(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp, unsigned flags)
+{
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret = 0;
+
+ if (flags & BLKDEV_ZERO_NOFALLBACK)
+ return -EOPNOTSUPP;
+
+ blk_start_plug(&plug);
+ __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp, &bio, flags);
+ if (bio) {
+ if ((flags & BLKDEV_ZERO_KILLABLE) &&
+ fatal_signal_pending(current)) {
+ bio_await_chain(bio);
+ blk_finish_plug(&plug);
+ return -EINTR;
+ }
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
+
+ return ret;
}
/**
@@ -204,20 +265,19 @@ int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
unsigned flags)
{
- int ret;
- sector_t bs_mask;
-
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if ((sector | nr_sects) & bs_mask)
- return -EINVAL;
-
- ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
- biop, flags);
- if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
- return ret;
+ if (bdev_read_only(bdev))
+ return -EPERM;
- return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
- biop);
+ if (bdev_write_zeroes_sectors(bdev)) {
+ __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+ gfp_mask, biop, flags);
+ } else {
+ if (flags & BLKDEV_ZERO_NOFALLBACK)
+ return -EOPNOTSUPP;
+ __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
+ biop, flags);
+ }
+ return 0;
}
EXPORT_SYMBOL(__blkdev_issue_zeroout);
@@ -237,51 +297,21 @@ EXPORT_SYMBOL(__blkdev_issue_zeroout);
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
{
- int ret = 0;
- sector_t bs_mask;
- struct bio *bio;
- struct blk_plug plug;
- bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
+ int ret;
- bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
- if ((sector | nr_sects) & bs_mask)
+ if ((sector | nr_sects) & ((bdev_logical_block_size(bdev) >> 9) - 1))
return -EINVAL;
+ if (bdev_read_only(bdev))
+ return -EPERM;
-retry:
- bio = NULL;
- blk_start_plug(&plug);
- if (try_write_zeroes) {
- ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
- gfp_mask, &bio, flags);
- } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
- ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
- gfp_mask, &bio);
- } else {
- /* No zeroing offload support */
- ret = -EOPNOTSUPP;
- }
- if (ret == 0 && bio) {
- ret = submit_bio_wait(bio);
- bio_put(bio);
- }
- blk_finish_plug(&plug);
- if (ret && try_write_zeroes) {
- if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
- try_write_zeroes = false;
- goto retry;
- }
- if (!bdev_write_zeroes_sectors(bdev)) {
- /*
- * Zeroing offload support was indicated, but the
- * device reported ILLEGAL REQUEST (for some devices
- * there is no non-destructive way to verify whether
- * WRITE ZEROES is actually supported).
- */
- ret = -EOPNOTSUPP;
- }
+ if (bdev_write_zeroes_sectors(bdev)) {
+ ret = blkdev_issue_write_zeroes(bdev, sector, nr_sects,
+ gfp_mask, flags);
+ if (ret != -EOPNOTSUPP)
+ return ret;
}
- return ret;
+ return blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, flags);
}
EXPORT_SYMBOL(blkdev_issue_zeroout);
diff --git a/block/blk-map.c b/block/blk-map.c
index 71210cdb3442..bce144091128 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -634,7 +634,7 @@ int blk_rq_map_user_iov(struct request_queue *q, struct request *rq,
const struct iov_iter *iter, gfp_t gfp_mask)
{
bool copy = false, map_bvec = false;
- unsigned long align = q->dma_pad_mask | queue_dma_alignment(q);
+ unsigned long align = blk_lim_dma_alignment_and_pad(&q->limits);
struct bio *bio = NULL;
struct iov_iter i;
int ret = -EINVAL;
diff --git a/block/blk-merge.c b/block/blk-merge.c
index cff20bcc0252..e41ea3318099 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -209,23 +209,22 @@ static inline unsigned get_max_io_size(struct bio *bio,
/**
* get_max_segment_size() - maximum number of bytes to add as a single segment
* @lim: Request queue limits.
- * @start_page: See below.
- * @offset: Offset from @start_page where to add a segment.
+ * @paddr: address of the range to add
+ * @max_len: maximum length available to add at @paddr
*
- * Returns the maximum number of bytes that can be added as a single segment.
+ * Returns the maximum number of bytes of the range starting at @paddr that can
+ * be added to a single segment.
*/
static inline unsigned get_max_segment_size(const struct queue_limits *lim,
- struct page *start_page, unsigned long offset)
+ phys_addr_t paddr, unsigned int len)
{
- unsigned long mask = lim->seg_boundary_mask;
-
- offset = mask & (page_to_phys(start_page) + offset);
-
/*
* Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1
* after having calculated the minimum.
*/
- return min(mask - offset, (unsigned long)lim->max_segment_size - 1) + 1;
+ return min_t(unsigned long, len,
+ min(lim->seg_boundary_mask - (lim->seg_boundary_mask & paddr),
+ (unsigned long)lim->max_segment_size - 1) + 1);
}
/**
@@ -258,9 +257,7 @@ static bool bvec_split_segs(const struct queue_limits *lim,
unsigned seg_size = 0;
while (len && *nsegs < max_segs) {
- seg_size = get_max_segment_size(lim, bv->bv_page,
- bv->bv_offset + total_len);
- seg_size = min(seg_size, len);
+ seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len);
(*nsegs)++;
total_len += seg_size;
@@ -494,8 +491,8 @@ static unsigned blk_bvec_map_sg(struct request_queue *q,
while (nbytes > 0) {
unsigned offset = bvec->bv_offset + total;
- unsigned len = min(get_max_segment_size(&q->limits,
- bvec->bv_page, offset), nbytes);
+ unsigned len = get_max_segment_size(&q->limits, bvec_phys(bvec),
+ nbytes);
struct page *page = bvec->bv_page;
/*
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 47fe9d19b8f1..e3c3c0c21b55 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -448,6 +448,10 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
if (data->cmd_flags & REQ_NOWAIT)
data->flags |= BLK_MQ_REQ_NOWAIT;
+retry:
+ data->ctx = blk_mq_get_ctx(q);
+ data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
+
if (q->elevator) {
/*
* All requests use scheduler tags when an I/O scheduler is
@@ -469,13 +473,9 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
if (ops->limit_depth)
ops->limit_depth(data->cmd_flags, data);
}
- }
-
-retry:
- data->ctx = blk_mq_get_ctx(q);
- data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
- if (!(data->rq_flags & RQF_SCHED_TAGS))
+ } else {
blk_mq_tag_busy(data->hctx);
+ }
if (data->flags & BLK_MQ_REQ_RESERVED)
data->rq_flags |= RQF_RESV;
@@ -2909,6 +2909,17 @@ static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug,
INIT_LIST_HEAD(&rq->queuelist);
}
+static bool bio_unaligned(const struct bio *bio, struct request_queue *q)
+{
+ unsigned int bs_mask = queue_logical_block_size(q) - 1;
+
+ /* .bi_sector of any zero sized bio need to be initialized */
+ if ((bio->bi_iter.bi_size & bs_mask) ||
+ ((bio->bi_iter.bi_sector << SECTOR_SHIFT) & bs_mask))
+ return true;
+ return false;
+}
+
/**
* blk_mq_submit_bio - Create and send a request to block device.
* @bio: Bio pointer.
@@ -2961,6 +2972,15 @@ void blk_mq_submit_bio(struct bio *bio)
return;
}
+ /*
+ * Device reconfiguration may change logical block size, so alignment
+ * check has to be done with queue usage counter held
+ */
+ if (unlikely(bio_unaligned(bio, q))) {
+ bio_io_error(bio);
+ goto queue_exit;
+ }
+
if (unlikely(bio_may_exceed_limits(bio, &q->limits))) {
bio = __bio_split_to_limits(bio, &q->limits, &nr_segs);
if (!bio)
diff --git a/block/blk-settings.c b/block/blk-settings.c
index ec7dbe93d5c3..9fa4eed4df06 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -55,7 +55,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
}
EXPORT_SYMBOL(blk_set_stacking_limits);
-static void blk_apply_bdi_limits(struct backing_dev_info *bdi,
+void blk_apply_bdi_limits(struct backing_dev_info *bdi,
struct queue_limits *lim)
{
/*
@@ -142,8 +142,7 @@ static int blk_validate_integrity_limits(struct queue_limits *lim)
* so we assume that we can fit in at least PAGE_SIZE in a segment, apart from
* the first and last segments.
*/
-static
-unsigned int blk_queue_max_guaranteed_bio(struct queue_limits *lim)
+static unsigned int blk_queue_max_guaranteed_bio(struct queue_limits *lim)
{
unsigned int max_segments = min(BIO_MAX_VECS, lim->max_segments);
unsigned int length;
@@ -277,11 +276,10 @@ static int blk_validate_limits(struct queue_limits *lim)
if (lim->max_user_sectors < PAGE_SIZE / SECTOR_SIZE)
return -EINVAL;
lim->max_sectors = min(max_hw_sectors, lim->max_user_sectors);
- } else if (lim->io_opt) {
+ } else if (lim->io_opt > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
lim->max_sectors =
min(max_hw_sectors, lim->io_opt >> SECTOR_SHIFT);
- } else if (lim->io_min &&
- lim->io_min > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
+ } else if (lim->io_min > (BLK_DEF_MAX_SECTORS_CAP << SECTOR_SHIFT)) {
lim->max_sectors =
min(max_hw_sectors, lim->io_min >> SECTOR_SHIFT);
} else {
@@ -351,7 +349,7 @@ static int blk_validate_limits(struct queue_limits *lim)
if (lim->alignment_offset) {
lim->alignment_offset &= (lim->physical_block_size - 1);
- lim->features &= ~BLK_FEAT_MISALIGNED;
+ lim->flags &= ~BLK_FLAG_MISALIGNED;
}
if (!(lim->features & BLK_FEAT_WRITE_CACHE))
@@ -435,12 +433,6 @@ int queue_limits_set(struct request_queue *q, struct queue_limits *lim)
}
EXPORT_SYMBOL_GPL(queue_limits_set);
-void disk_update_readahead(struct gendisk *disk)
-{
- blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
-}
-EXPORT_SYMBOL_GPL(disk_update_readahead);
-
/**
* blk_limits_io_min - set minimum request size for a device
* @limits: the queue limits
@@ -564,7 +556,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
if (!(b->features & BLK_FEAT_POLL))
t->features &= ~BLK_FEAT_POLL;
- t->flags |= (b->flags & BLK_FEAT_MISALIGNED);
+ t->flags |= (b->flags & BLK_FLAG_MISALIGNED);
t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
t->max_user_sectors = min_not_zero(t->max_user_sectors,
@@ -603,7 +595,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
/* Verify that top and bottom intervals line up */
if (max(top, bottom) % min(top, bottom)) {
- t->flags |= BLK_FEAT_MISALIGNED;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
}
@@ -625,28 +617,28 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
/* Physical block size a multiple of the logical block size? */
if (t->physical_block_size & (t->logical_block_size - 1)) {
t->physical_block_size = t->logical_block_size;
- t->flags |= BLK_FEAT_MISALIGNED;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
/* Minimum I/O a multiple of the physical block size? */
if (t->io_min & (t->physical_block_size - 1)) {
t->io_min = t->physical_block_size;
- t->flags |= BLK_FEAT_MISALIGNED;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
/* Optimal I/O a multiple of the physical block size? */
if (t->io_opt & (t->physical_block_size - 1)) {
t->io_opt = 0;
- t->flags |= BLK_FEAT_MISALIGNED;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
/* chunk_sectors a multiple of the physical block size? */
if ((t->chunk_sectors << 9) & (t->physical_block_size - 1)) {
t->chunk_sectors = 0;
- t->flags |= BLK_FEAT_MISALIGNED;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
@@ -656,7 +648,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
/* Verify that new alignment_offset is on a logical block boundary */
if (t->alignment_offset & (t->logical_block_size - 1)) {
- t->flags |= BLK_FEAT_MISALIGNED;
+ t->flags |= BLK_FLAG_MISALIGNED;
ret = -1;
}
@@ -776,23 +768,6 @@ incompatible:
EXPORT_SYMBOL_GPL(queue_limits_stack_integrity);
/**
- * blk_queue_update_dma_pad - update pad mask
- * @q: the request queue for the device
- * @mask: pad mask
- *
- * Update dma pad mask.
- *
- * Appending pad buffer to a request modifies the last entry of a
- * scatter list such that it includes the pad buffer.
- **/
-void blk_queue_update_dma_pad(struct request_queue *q, unsigned int mask)
-{
- if (mask > q->dma_pad_mask)
- q->dma_pad_mask = mask;
-}
-EXPORT_SYMBOL(blk_queue_update_dma_pad);
-
-/**
* blk_set_queue_depth - tell the block layer about the device queue depth
* @q: the request queue for the device
* @depth: queue depth
@@ -809,7 +784,7 @@ int bdev_alignment_offset(struct block_device *bdev)
{
struct request_queue *q = bdev_get_queue(bdev);
- if (q->limits.flags & BLK_FEAT_MISALIGNED)
+ if (q->limits.flags & BLK_FLAG_MISALIGNED)
return -1;
if (bdev_is_partition(bdev))
return queue_limit_alignment_offset(&q->limits,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 1a984179f3ac..60116d13cb80 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -22,8 +22,8 @@
struct queue_sysfs_entry {
struct attribute attr;
- ssize_t (*show)(struct request_queue *, char *);
- ssize_t (*store)(struct request_queue *, const char *, size_t);
+ ssize_t (*show)(struct gendisk *disk, char *page);
+ ssize_t (*store)(struct gendisk *disk, const char *page, size_t count);
};
static ssize_t
@@ -47,18 +47,18 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
return count;
}
-static ssize_t queue_requests_show(struct request_queue *q, char *page)
+static ssize_t queue_requests_show(struct gendisk *disk, char *page)
{
- return queue_var_show(q->nr_requests, page);
+ return queue_var_show(disk->queue->nr_requests, page);
}
static ssize_t
-queue_requests_store(struct request_queue *q, const char *page, size_t count)
+queue_requests_store(struct gendisk *disk, const char *page, size_t count)
{
unsigned long nr;
int ret, err;
- if (!queue_is_mq(q))
+ if (!queue_is_mq(disk->queue))
return -EINVAL;
ret = queue_var_store(&nr, page, count);
@@ -68,135 +68,91 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
if (nr < BLKDEV_MIN_RQ)
nr = BLKDEV_MIN_RQ;
- err = blk_mq_update_nr_requests(q, nr);
+ err = blk_mq_update_nr_requests(disk->queue, nr);
if (err)
return err;
return ret;
}
-static ssize_t queue_ra_show(struct request_queue *q, char *page)
+static ssize_t queue_ra_show(struct gendisk *disk, char *page)
{
- unsigned long ra_kb;
-
- if (!q->disk)
- return -EINVAL;
- ra_kb = q->disk->bdi->ra_pages << (PAGE_SHIFT - 10);
- return queue_var_show(ra_kb, page);
+ return queue_var_show(disk->bdi->ra_pages << (PAGE_SHIFT - 10), page);
}
static ssize_t
-queue_ra_store(struct request_queue *q, const char *page, size_t count)
+queue_ra_store(struct gendisk *disk, const char *page, size_t count)
{
unsigned long ra_kb;
ssize_t ret;
- if (!q->disk)
- return -EINVAL;
ret = queue_var_store(&ra_kb, page, count);
if (ret < 0)
return ret;
- q->disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
+ disk->bdi->ra_pages = ra_kb >> (PAGE_SHIFT - 10);
return ret;
}
-static ssize_t queue_max_sectors_show(struct request_queue *q, char *page)
-{
- int max_sectors_kb = queue_max_sectors(q) >> 1;
-
- return queue_var_show(max_sectors_kb, page);
+#define QUEUE_SYSFS_LIMIT_SHOW(_field) \
+static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
+{ \
+ return queue_var_show(disk->queue->limits._field, page); \
}
-static ssize_t queue_max_segments_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_max_segments(q), page);
-}
+QUEUE_SYSFS_LIMIT_SHOW(max_segments)
+QUEUE_SYSFS_LIMIT_SHOW(max_discard_segments)
+QUEUE_SYSFS_LIMIT_SHOW(max_integrity_segments)
+QUEUE_SYSFS_LIMIT_SHOW(max_segment_size)
+QUEUE_SYSFS_LIMIT_SHOW(logical_block_size)
+QUEUE_SYSFS_LIMIT_SHOW(physical_block_size)
+QUEUE_SYSFS_LIMIT_SHOW(chunk_sectors)
+QUEUE_SYSFS_LIMIT_SHOW(io_min)
+QUEUE_SYSFS_LIMIT_SHOW(io_opt)
+QUEUE_SYSFS_LIMIT_SHOW(discard_granularity)
+QUEUE_SYSFS_LIMIT_SHOW(zone_write_granularity)
+QUEUE_SYSFS_LIMIT_SHOW(virt_boundary_mask)
+QUEUE_SYSFS_LIMIT_SHOW(dma_alignment)
+QUEUE_SYSFS_LIMIT_SHOW(max_open_zones)
+QUEUE_SYSFS_LIMIT_SHOW(max_active_zones)
+QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_min)
+QUEUE_SYSFS_LIMIT_SHOW(atomic_write_unit_max)
-static ssize_t queue_max_discard_segments_show(struct request_queue *q,
- char *page)
-{
- return queue_var_show(queue_max_discard_segments(q), page);
+#define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(_field) \
+static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
+{ \
+ return sprintf(page, "%llu\n", \
+ (unsigned long long)disk->queue->limits._field << \
+ SECTOR_SHIFT); \
}
-static ssize_t queue_atomic_write_max_bytes_show(struct request_queue *q,
- char *page)
-{
- return queue_var_show(queue_atomic_write_max_bytes(q), page);
-}
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors)
-static ssize_t queue_atomic_write_boundary_show(struct request_queue *q,
- char *page)
-{
- return queue_var_show(queue_atomic_write_boundary_bytes(q), page);
+#define QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(_field) \
+static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
+{ \
+ return queue_var_show(disk->queue->limits._field >> 1, page); \
}
-static ssize_t queue_atomic_write_unit_min_show(struct request_queue *q,
- char *page)
-{
- return queue_var_show(queue_atomic_write_unit_min_bytes(q), page);
-}
-
-static ssize_t queue_atomic_write_unit_max_show(struct request_queue *q,
- char *page)
-{
- return queue_var_show(queue_atomic_write_unit_max_bytes(q), page);
-}
-
-static ssize_t queue_max_integrity_segments_show(struct request_queue *q, char *page)
-{
- return queue_var_show(q->limits.max_integrity_segments, page);
-}
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_KB(max_hw_sectors)
-static ssize_t queue_max_segment_size_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_max_segment_size(q), page);
+#define QUEUE_SYSFS_SHOW_CONST(_name, _val) \
+static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
+{ \
+ return sprintf(page, "%d\n", _val); \
}
-static ssize_t queue_logical_block_size_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_logical_block_size(q), page);
-}
+/* deprecated fields */
+QUEUE_SYSFS_SHOW_CONST(discard_zeroes_data, 0)
+QUEUE_SYSFS_SHOW_CONST(write_same_max, 0)
+QUEUE_SYSFS_SHOW_CONST(poll_delay, -1)
-static ssize_t queue_physical_block_size_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_physical_block_size(q), page);
-}
-
-static ssize_t queue_chunk_sectors_show(struct request_queue *q, char *page)
-{
- return queue_var_show(q->limits.chunk_sectors, page);
-}
-
-static ssize_t queue_io_min_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_io_min(q), page);
-}
-
-static ssize_t queue_io_opt_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_io_opt(q), page);
-}
-
-static ssize_t queue_discard_granularity_show(struct request_queue *q, char *page)
-{
- return queue_var_show(q->limits.discard_granularity, page);
-}
-
-static ssize_t queue_discard_max_hw_show(struct request_queue *q, char *page)
-{
-
- return sprintf(page, "%llu\n",
- (unsigned long long)q->limits.max_hw_discard_sectors << 9);
-}
-
-static ssize_t queue_discard_max_show(struct request_queue *q, char *page)
-{
- return sprintf(page, "%llu\n",
- (unsigned long long)q->limits.max_discard_sectors << 9);
-}
-
-static ssize_t queue_discard_max_store(struct request_queue *q,
- const char *page, size_t count)
+static ssize_t queue_max_discard_sectors_store(struct gendisk *disk,
+ const char *page, size_t count)
{
unsigned long max_discard_bytes;
struct queue_limits lim;
@@ -207,51 +163,34 @@ static ssize_t queue_discard_max_store(struct request_queue *q,
if (ret < 0)
return ret;
- if (max_discard_bytes & (q->limits.discard_granularity - 1))
+ if (max_discard_bytes & (disk->queue->limits.discard_granularity - 1))
return -EINVAL;
if ((max_discard_bytes >> SECTOR_SHIFT) > UINT_MAX)
return -EINVAL;
- lim = queue_limits_start_update(q);
+ lim = queue_limits_start_update(disk->queue);
lim.max_user_discard_sectors = max_discard_bytes >> SECTOR_SHIFT;
- err = queue_limits_commit_update(q, &lim);
+ err = queue_limits_commit_update(disk->queue, &lim);
if (err)
return err;
return ret;
}
-static ssize_t queue_discard_zeroes_data_show(struct request_queue *q, char *page)
-{
- return queue_var_show(0, page);
-}
-
-static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
-{
- return queue_var_show(0, page);
-}
-
-static ssize_t queue_write_zeroes_max_show(struct request_queue *q, char *page)
+/*
+ * For zone append queue_max_zone_append_sectors does not just return the
+ * underlying queue limits, but actually contains a calculation. Because of
+ * that we can't simply use QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES here.
+ */
+static ssize_t queue_zone_append_max_show(struct gendisk *disk, char *page)
{
return sprintf(page, "%llu\n",
- (unsigned long long)q->limits.max_write_zeroes_sectors << 9);
-}
-
-static ssize_t queue_zone_write_granularity_show(struct request_queue *q,
- char *page)
-{
- return queue_var_show(queue_zone_write_granularity(q), page);
-}
-
-static ssize_t queue_zone_append_max_show(struct request_queue *q, char *page)
-{
- unsigned long long max_sectors = queue_max_zone_append_sectors(q);
-
- return sprintf(page, "%llu\n", max_sectors << SECTOR_SHIFT);
+ (u64)queue_max_zone_append_sectors(disk->queue) <<
+ SECTOR_SHIFT);
}
static ssize_t
-queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
+queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count)
{
unsigned long max_sectors_kb;
struct queue_limits lim;
@@ -262,33 +201,16 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
if (ret < 0)
return ret;
- lim = queue_limits_start_update(q);
+ lim = queue_limits_start_update(disk->queue);
lim.max_user_sectors = max_sectors_kb << 1;
- err = queue_limits_commit_update(q, &lim);
+ err = queue_limits_commit_update(disk->queue, &lim);
if (err)
return err;
return ret;
}
-static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page)
-{
- int max_hw_sectors_kb = queue_max_hw_sectors(q) >> 1;
-
- return queue_var_show(max_hw_sectors_kb, page);
-}
-
-static ssize_t queue_virt_boundary_mask_show(struct request_queue *q, char *page)
-{
- return queue_var_show(q->limits.virt_boundary_mask, page);
-}
-
-static ssize_t queue_dma_alignment_show(struct request_queue *q, char *page)
-{
- return queue_var_show(queue_dma_alignment(q), page);
-}
-
-static ssize_t queue_feature_store(struct request_queue *q, const char *page,
- size_t count, unsigned int feature)
+static ssize_t queue_feature_store(struct gendisk *disk, const char *page,
+ size_t count, blk_features_t feature)
{
struct queue_limits lim;
unsigned long val;
@@ -298,26 +220,27 @@ static ssize_t queue_feature_store(struct request_queue *q, const char *page,
if (ret < 0)
return ret;
- lim = queue_limits_start_update(q);
+ lim = queue_limits_start_update(disk->queue);
if (val)
lim.features |= feature;
else
lim.features &= ~feature;
- ret = queue_limits_commit_update(q, &lim);
+ ret = queue_limits_commit_update(disk->queue, &lim);
if (ret)
return ret;
return count;
}
-#define QUEUE_SYSFS_FEATURE(_name, _feature) \
-static ssize_t queue_##_name##_show(struct request_queue *q, char *page) \
-{ \
- return sprintf(page, "%u\n", !!(q->limits.features & _feature)); \
-} \
-static ssize_t queue_##_name##_store(struct request_queue *q, \
- const char *page, size_t count) \
-{ \
- return queue_feature_store(q, page, count, _feature); \
+#define QUEUE_SYSFS_FEATURE(_name, _feature) \
+static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
+{ \
+ return sprintf(page, "%u\n", \
+ !!(disk->queue->limits.features & _feature)); \
+} \
+static ssize_t queue_##_name##_store(struct gendisk *disk, \
+ const char *page, size_t count) \
+{ \
+ return queue_feature_store(disk, page, count, _feature); \
}
QUEUE_SYSFS_FEATURE(rotational, BLK_FEAT_ROTATIONAL)
@@ -325,35 +248,36 @@ QUEUE_SYSFS_FEATURE(add_random, BLK_FEAT_ADD_RANDOM)
QUEUE_SYSFS_FEATURE(iostats, BLK_FEAT_IO_STAT)
QUEUE_SYSFS_FEATURE(stable_writes, BLK_FEAT_STABLE_WRITES);
-static ssize_t queue_zoned_show(struct request_queue *q, char *page)
-{
- if (blk_queue_is_zoned(q))
- return sprintf(page, "host-managed\n");
- return sprintf(page, "none\n");
+#define QUEUE_SYSFS_FEATURE_SHOW(_name, _feature) \
+static ssize_t queue_##_name##_show(struct gendisk *disk, char *page) \
+{ \
+ return sprintf(page, "%u\n", \
+ !!(disk->queue->limits.features & _feature)); \
}
-static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
-{
- return queue_var_show(disk_nr_zones(q->disk), page);
-}
+QUEUE_SYSFS_FEATURE_SHOW(poll, BLK_FEAT_POLL);
+QUEUE_SYSFS_FEATURE_SHOW(fua, BLK_FEAT_FUA);
+QUEUE_SYSFS_FEATURE_SHOW(dax, BLK_FEAT_DAX);
-static ssize_t queue_max_open_zones_show(struct request_queue *q, char *page)
+static ssize_t queue_zoned_show(struct gendisk *disk, char *page)
{
- return queue_var_show(bdev_max_open_zones(q->disk->part0), page);
+ if (blk_queue_is_zoned(disk->queue))
+ return sprintf(page, "host-managed\n");
+ return sprintf(page, "none\n");
}
-static ssize_t queue_max_active_zones_show(struct request_queue *q, char *page)
+static ssize_t queue_nr_zones_show(struct gendisk *disk, char *page)
{
- return queue_var_show(bdev_max_active_zones(q->disk->part0), page);
+ return queue_var_show(disk_nr_zones(disk), page);
}
-static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
+static ssize_t queue_nomerges_show(struct gendisk *disk, char *page)
{
- return queue_var_show((blk_queue_nomerges(q) << 1) |
- blk_queue_noxmerges(q), page);
+ return queue_var_show((blk_queue_nomerges(disk->queue) << 1) |
+ blk_queue_noxmerges(disk->queue), page);
}
-static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
+static ssize_t queue_nomerges_store(struct gendisk *disk, const char *page,
size_t count)
{
unsigned long nm;
@@ -362,29 +286,30 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
if (ret < 0)
return ret;
- blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
- blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, q);
+ blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, disk->queue);
+ blk_queue_flag_clear(QUEUE_FLAG_NOXMERGES, disk->queue);
if (nm == 2)
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q);
+ blk_queue_flag_set(QUEUE_FLAG_NOMERGES, disk->queue);
else if (nm)
- blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q);
+ blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, disk->queue);
return ret;
}
-static ssize_t queue_rq_affinity_show(struct request_queue *q, char *page)
+static ssize_t queue_rq_affinity_show(struct gendisk *disk, char *page)
{
- bool set = test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags);
- bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags);
+ bool set = test_bit(QUEUE_FLAG_SAME_COMP, &disk->queue->queue_flags);
+ bool force = test_bit(QUEUE_FLAG_SAME_FORCE, &disk->queue->queue_flags);
return queue_var_show(set << force, page);
}
static ssize_t
-queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
+queue_rq_affinity_store(struct gendisk *disk, const char *page, size_t count)
{
ssize_t ret = -EINVAL;
#ifdef CONFIG_SMP
+ struct request_queue *q = disk->queue;
unsigned long val;
ret = queue_var_store(&val, page, count);
@@ -405,38 +330,28 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count)
return ret;
}
-static ssize_t queue_poll_delay_show(struct request_queue *q, char *page)
-{
- return sprintf(page, "%d\n", -1);
-}
-
-static ssize_t queue_poll_delay_store(struct request_queue *q, const char *page,
+static ssize_t queue_poll_delay_store(struct gendisk *disk, const char *page,
size_t count)
{
return count;
}
-static ssize_t queue_poll_show(struct request_queue *q, char *page)
-{
- return queue_var_show(q->limits.features & BLK_FEAT_POLL, page);
-}
-
-static ssize_t queue_poll_store(struct request_queue *q, const char *page,
+static ssize_t queue_poll_store(struct gendisk *disk, const char *page,
size_t count)
{
- if (!(q->limits.features & BLK_FEAT_POLL))
+ if (!(disk->queue->limits.features & BLK_FEAT_POLL))
return -EINVAL;
pr_info_ratelimited("writes to the poll attribute are ignored.\n");
pr_info_ratelimited("please use driver specific parameters instead.\n");
return count;
}
-static ssize_t queue_io_timeout_show(struct request_queue *q, char *page)
+static ssize_t queue_io_timeout_show(struct gendisk *disk, char *page)
{
- return sprintf(page, "%u\n", jiffies_to_msecs(q->rq_timeout));
+ return sprintf(page, "%u\n", jiffies_to_msecs(disk->queue->rq_timeout));
}
-static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
+static ssize_t queue_io_timeout_store(struct gendisk *disk, const char *page,
size_t count)
{
unsigned int val;
@@ -446,19 +361,19 @@ static ssize_t queue_io_timeout_store(struct request_queue *q, const char *page,
if (err || val == 0)
return -EINVAL;
- blk_queue_rq_timeout(q, msecs_to_jiffies(val));
+ blk_queue_rq_timeout(disk->queue, msecs_to_jiffies(val));
return count;
}
-static ssize_t queue_wc_show(struct request_queue *q, char *page)
+static ssize_t queue_wc_show(struct gendisk *disk, char *page)
{
- if (q->limits.features & BLK_FLAG_WRITE_CACHE_DISABLED)
- return sprintf(page, "write through\n");
- return sprintf(page, "write back\n");
+ if (blk_queue_write_cache(disk->queue))
+ return sprintf(page, "write back\n");
+ return sprintf(page, "write through\n");
}
-static ssize_t queue_wc_store(struct request_queue *q, const char *page,
+static ssize_t queue_wc_store(struct gendisk *disk, const char *page,
size_t count)
{
struct queue_limits lim;
@@ -474,27 +389,17 @@ static ssize_t queue_wc_store(struct request_queue *q, const char *page,
return -EINVAL;
}
- lim = queue_limits_start_update(q);
+ lim = queue_limits_start_update(disk->queue);
if (disable)
lim.flags |= BLK_FLAG_WRITE_CACHE_DISABLED;
else
lim.flags &= ~BLK_FLAG_WRITE_CACHE_DISABLED;
- err = queue_limits_commit_update(q, &lim);
+ err = queue_limits_commit_update(disk->queue, &lim);
if (err)
return err;
return count;
}
-static ssize_t queue_fua_show(struct request_queue *q, char *page)
-{
- return sprintf(page, "%u\n", !!(q->limits.features & BLK_FEAT_FUA));
-}
-
-static ssize_t queue_dax_show(struct request_queue *q, char *page)
-{
- return queue_var_show(blk_queue_dax(q), page);
-}
-
#define QUEUE_RO_ENTRY(_prefix, _name) \
static struct queue_sysfs_entry _prefix##_entry = { \
.attr = { .name = _name, .mode = 0444 }, \
@@ -525,17 +430,18 @@ QUEUE_RO_ENTRY(queue_io_opt, "optimal_io_size");
QUEUE_RO_ENTRY(queue_max_discard_segments, "max_discard_segments");
QUEUE_RO_ENTRY(queue_discard_granularity, "discard_granularity");
-QUEUE_RO_ENTRY(queue_discard_max_hw, "discard_max_hw_bytes");
-QUEUE_RW_ENTRY(queue_discard_max, "discard_max_bytes");
+QUEUE_RO_ENTRY(queue_max_hw_discard_sectors, "discard_max_hw_bytes");
+QUEUE_RW_ENTRY(queue_max_discard_sectors, "discard_max_bytes");
QUEUE_RO_ENTRY(queue_discard_zeroes_data, "discard_zeroes_data");
-QUEUE_RO_ENTRY(queue_atomic_write_max_bytes, "atomic_write_max_bytes");
-QUEUE_RO_ENTRY(queue_atomic_write_boundary, "atomic_write_boundary_bytes");
+QUEUE_RO_ENTRY(queue_atomic_write_max_sectors, "atomic_write_max_bytes");
+QUEUE_RO_ENTRY(queue_atomic_write_boundary_sectors,
+ "atomic_write_boundary_bytes");
QUEUE_RO_ENTRY(queue_atomic_write_unit_max, "atomic_write_unit_max_bytes");
QUEUE_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
-QUEUE_RO_ENTRY(queue_write_zeroes_max, "write_zeroes_max_bytes");
+QUEUE_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
QUEUE_RO_ENTRY(queue_zone_append_max, "zone_append_max_bytes");
QUEUE_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
@@ -580,20 +486,22 @@ static ssize_t queue_var_store64(s64 *var, const char *page)
return 0;
}
-static ssize_t queue_wb_lat_show(struct request_queue *q, char *page)
+static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
{
- if (!wbt_rq_qos(q))
+ if (!wbt_rq_qos(disk->queue))
return -EINVAL;
- if (wbt_disabled(q))
+ if (wbt_disabled(disk->queue))
return sprintf(page, "0\n");
- return sprintf(page, "%llu\n", div_u64(wbt_get_min_lat(q), 1000));
+ return sprintf(page, "%llu\n",
+ div_u64(wbt_get_min_lat(disk->queue), 1000));
}
-static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
+static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
size_t count)
{
+ struct request_queue *q = disk->queue;
struct rq_qos *rqos;
ssize_t ret;
s64 val;
@@ -606,7 +514,7 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
rqos = wbt_rq_qos(q);
if (!rqos) {
- ret = wbt_init(q->disk);
+ ret = wbt_init(disk);
if (ret)
return ret;
}
@@ -652,15 +560,15 @@ static struct attribute *queue_attrs[] = {
&queue_io_min_entry.attr,
&queue_io_opt_entry.attr,
&queue_discard_granularity_entry.attr,
- &queue_discard_max_entry.attr,
- &queue_discard_max_hw_entry.attr,
+ &queue_max_discard_sectors_entry.attr,
+ &queue_max_hw_discard_sectors_entry.attr,
&queue_discard_zeroes_data_entry.attr,
- &queue_atomic_write_max_bytes_entry.attr,
- &queue_atomic_write_boundary_entry.attr,
+ &queue_atomic_write_max_sectors_entry.attr,
+ &queue_atomic_write_boundary_sectors_entry.attr,
&queue_atomic_write_unit_min_entry.attr,
&queue_atomic_write_unit_max_entry.attr,
&queue_write_same_max_entry.attr,
- &queue_write_zeroes_max_entry.attr,
+ &queue_max_write_zeroes_sectors_entry.attr,
&queue_zone_append_max_entry.attr,
&queue_zone_write_granularity_entry.attr,
&queue_rotational_entry.attr,
@@ -740,14 +648,13 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
struct queue_sysfs_entry *entry = to_queue(attr);
struct gendisk *disk = container_of(kobj, struct gendisk, queue_kobj);
- struct request_queue *q = disk->queue;
ssize_t res;
if (!entry->show)
return -EIO;
- mutex_lock(&q->sysfs_lock);
- res = entry->show(q, page);
- mutex_unlock(&q->sysfs_lock);
+ mutex_lock(&disk->queue->sysfs_lock);
+ res = entry->show(disk, page);
+ mutex_unlock(&disk->queue->sysfs_lock);
return res;
}
@@ -765,7 +672,7 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
blk_mq_freeze_queue(q);
mutex_lock(&q->sysfs_lock);
- res = entry->store(q, page, length);
+ res = entry->store(disk, page, length);
mutex_unlock(&q->sysfs_lock);
blk_mq_unfreeze_queue(q);
return res;
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index c1bf73f8c75d..dc6140fa3de0 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -704,6 +704,9 @@ static unsigned long tg_within_iops_limit(struct throtl_grp *tg, struct bio *bio
/* Calc approx time to dispatch */
jiffy_wait = jiffy_elapsed_rnd - jiffy_elapsed;
+
+ /* make sure at least one io can be dispatched after waiting */
+ jiffy_wait = max(jiffy_wait, HZ / iops_limit + 1);
return jiffy_wait;
}
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 1a5e4b049ecd..6dfc659d22e2 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -37,7 +37,7 @@
enum wbt_flags {
WBT_TRACKED = 1, /* write, tracked for throttling */
WBT_READ = 2, /* read */
- WBT_KSWAPD = 4, /* write, from kswapd */
+ WBT_SWAP = 4, /* write, from swap_writepage() */
WBT_DISCARD = 8, /* discard */
WBT_NR_BITS = 4, /* number of bits */
@@ -45,7 +45,7 @@ enum wbt_flags {
enum {
WBT_RWQ_BG = 0,
- WBT_RWQ_KSWAPD,
+ WBT_RWQ_SWAP,
WBT_RWQ_DISCARD,
WBT_NUM_RWQ,
};
@@ -172,8 +172,8 @@ static bool wb_recent_wait(struct rq_wb *rwb)
static inline struct rq_wait *get_rq_wait(struct rq_wb *rwb,
enum wbt_flags wb_acct)
{
- if (wb_acct & WBT_KSWAPD)
- return &rwb->rq_wait[WBT_RWQ_KSWAPD];
+ if (wb_acct & WBT_SWAP)
+ return &rwb->rq_wait[WBT_RWQ_SWAP];
else if (wb_acct & WBT_DISCARD)
return &rwb->rq_wait[WBT_RWQ_DISCARD];
@@ -528,7 +528,7 @@ static bool close_io(struct rq_wb *rwb)
time_before(now, rwb->last_comp + HZ / 10);
}
-#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO)
+#define REQ_HIPRIO (REQ_SYNC | REQ_META | REQ_PRIO | REQ_SWAP)
static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
{
@@ -539,13 +539,13 @@ static inline unsigned int get_limit(struct rq_wb *rwb, blk_opf_t opf)
/*
* At this point we know it's a buffered write. If this is
- * kswapd trying to free memory, or REQ_SYNC is set, then
+ * swap trying to free memory, or REQ_SYNC is set, then
* it's WB_SYNC_ALL writeback, and we'll use the max limit for
* that. If the write is marked as a background write, then use
* the idle limit, or go to normal if we haven't had competing
* IO for a bit.
*/
- if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb) || current_is_kswapd())
+ if ((opf & REQ_HIPRIO) || wb_recent_wait(rwb))
limit = rwb->rq_depth.max_depth;
else if ((opf & REQ_BACKGROUND) || close_io(rwb)) {
/*
@@ -622,8 +622,8 @@ static enum wbt_flags bio_to_wbt_flags(struct rq_wb *rwb, struct bio *bio)
if (bio_op(bio) == REQ_OP_READ) {
flags = WBT_READ;
} else if (wbt_should_throttle(bio)) {
- if (current_is_kswapd())
- flags |= WBT_KSWAPD;
+ if (bio->bi_opf & REQ_SWAP)
+ flags |= WBT_SWAP;
if (bio_op(bio) == REQ_OP_DISCARD)
flags |= WBT_DISCARD;
flags |= WBT_TRACKED;
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 601c21a224c9..7ace890136a3 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -150,77 +150,6 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL_GPL(blkdev_report_zones);
-static inline unsigned long *blk_alloc_zone_bitmap(int node,
- unsigned int nr_zones)
-{
- return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
- GFP_NOIO, node);
-}
-
-static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
-{
- /*
- * For an all-zones reset, ignore conventional, empty, read-only
- * and offline zones.
- */
- switch (zone->cond) {
- case BLK_ZONE_COND_NOT_WP:
- case BLK_ZONE_COND_EMPTY:
- case BLK_ZONE_COND_READONLY:
- case BLK_ZONE_COND_OFFLINE:
- return 0;
- default:
- set_bit(idx, (unsigned long *)data);
- return 0;
- }
-}
-
-static int blkdev_zone_reset_all_emulated(struct block_device *bdev)
-{
- struct gendisk *disk = bdev->bd_disk;
- sector_t capacity = bdev_nr_sectors(bdev);
- sector_t zone_sectors = bdev_zone_sectors(bdev);
- unsigned long *need_reset;
- struct bio *bio = NULL;
- sector_t sector = 0;
- int ret;
-
- need_reset = blk_alloc_zone_bitmap(disk->queue->node, disk->nr_zones);
- if (!need_reset)
- return -ENOMEM;
-
- ret = disk->fops->report_zones(disk, 0, disk->nr_zones,
- blk_zone_need_reset_cb, need_reset);
- if (ret < 0)
- goto out_free_need_reset;
-
- ret = 0;
- while (sector < capacity) {
- if (!test_bit(disk_zone_no(disk, sector), need_reset)) {
- sector += zone_sectors;
- continue;
- }
-
- bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
- GFP_KERNEL);
- bio->bi_iter.bi_sector = sector;
- sector += zone_sectors;
-
- /* This may take a while, so be nice to others */
- cond_resched();
- }
-
- if (bio) {
- ret = submit_bio_wait(bio);
- bio_put(bio);
- }
-
-out_free_need_reset:
- kfree(need_reset);
- return ret;
-}
-
static int blkdev_zone_reset_all(struct block_device *bdev)
{
struct bio bio;
@@ -247,7 +176,6 @@ static int blkdev_zone_reset_all(struct block_device *bdev)
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
sector_t sector, sector_t nr_sectors)
{
- struct request_queue *q = bdev_get_queue(bdev);
sector_t zone_sectors = bdev_zone_sectors(bdev);
sector_t capacity = bdev_nr_sectors(bdev);
sector_t end_sector = sector + nr_sectors;
@@ -275,16 +203,11 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
return -EINVAL;
/*
- * In the case of a zone reset operation over all zones,
- * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
- * command. For other devices, we emulate this command behavior by
- * identifying the zones needing a reset.
+ * In the case of a zone reset operation over all zones, use
+ * REQ_OP_ZONE_RESET_ALL.
*/
- if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
- if (!blk_queue_zone_resetall(q))
- return blkdev_zone_reset_all_emulated(bdev);
+ if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity)
return blkdev_zone_reset_all(bdev);
- }
while (sector < end_sector) {
bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, GFP_KERNEL);
@@ -1552,7 +1475,7 @@ void disk_free_zone_resources(struct gendisk *disk)
mempool_destroy(disk->zone_wplugs_pool);
disk->zone_wplugs_pool = NULL;
- kfree(disk->conv_zones_bitmap);
+ bitmap_free(disk->conv_zones_bitmap);
disk->conv_zones_bitmap = NULL;
disk->zone_capacity = 0;
disk->last_zone_capacity = 0;
@@ -1674,7 +1597,6 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
struct blk_revalidate_zone_args *args)
{
struct gendisk *disk = args->disk;
- struct request_queue *q = disk->queue;
if (zone->capacity != zone->len) {
pr_warn("%s: Invalid conventional zone capacity\n",
@@ -1690,7 +1612,7 @@ static int blk_revalidate_conv_zone(struct blk_zone *zone, unsigned int idx,
if (!args->conv_zones_bitmap) {
args->conv_zones_bitmap =
- blk_alloc_zone_bitmap(q->node, args->nr_zones);
+ bitmap_zalloc(args->nr_zones, GFP_NOIO);
if (!args->conv_zones_bitmap)
return -ENOMEM;
}
diff --git a/block/blk.h b/block/blk.h
index d0a986d8ee50..8e8936e97307 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -9,7 +9,6 @@
#include <xen/xen.h>
#include "blk-crypto-internal.h"
-struct blk_integrity_iter;
struct elevator_type;
/* Max future timer expiry for timeouts */
@@ -99,8 +98,8 @@ static inline bool biovec_phys_mergeable(struct request_queue *q,
struct bio_vec *vec1, struct bio_vec *vec2)
{
unsigned long mask = queue_segment_boundary(q);
- phys_addr_t addr1 = page_to_phys(vec1->bv_page) + vec1->bv_offset;
- phys_addr_t addr2 = page_to_phys(vec2->bv_page) + vec2->bv_offset;
+ phys_addr_t addr1 = bvec_phys(vec1);
+ phys_addr_t addr2 = bvec_phys(vec2);
/*
* Merging adjacent physical pages may not work correctly under KMSAN
@@ -358,6 +357,8 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio);
enum elv_merge blk_try_merge(struct request *rq, struct bio *bio);
int blk_set_default_limits(struct queue_limits *lim);
+void blk_apply_bdi_limits(struct backing_dev_info *bdi,
+ struct queue_limits *lim);
int blk_dev_init(void);
/*
@@ -679,10 +680,8 @@ int bdev_open(struct block_device *bdev, blk_mode_t mode, void *holder,
const struct blk_holder_ops *hops, struct file *bdev_file);
int bdev_permission(dev_t dev, blk_mode_t mode, void *holder);
-void blk_integrity_generate(struct blk_integrity_iter *iter,
- struct blk_integrity *bi);
-blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter,
- struct blk_integrity *bi);
+void blk_integrity_generate(struct bio *bio);
+void blk_integrity_verify(struct bio *bio);
void blk_integrity_prepare(struct request *rq);
void blk_integrity_complete(struct request *rq, unsigned int nr_bytes);
diff --git a/block/elevator.c b/block/elevator.c
index f64ebd726e58..f13d552a32c8 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -709,24 +709,25 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
return ret;
}
-ssize_t elv_iosched_store(struct request_queue *q, const char *buf,
+ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
size_t count)
{
char elevator_name[ELV_NAME_MAX];
int ret;
- if (!elv_support_iosched(q))
+ if (!elv_support_iosched(disk->queue))
return count;
strscpy(elevator_name, buf, sizeof(elevator_name));
- ret = elevator_change(q, strstrip(elevator_name));
+ ret = elevator_change(disk->queue, strstrip(elevator_name));
if (!ret)
return count;
return ret;
}
-ssize_t elv_iosched_show(struct request_queue *q, char *name)
+ssize_t elv_iosched_show(struct gendisk *disk, char *name)
{
+ struct request_queue *q = disk->queue;
struct elevator_queue *eq = q->elevator;
struct elevator_type *cur = NULL, *e;
int len = 0;
diff --git a/block/elevator.h b/block/elevator.h
index e9a050a96e53..3fe18e1a8692 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -147,8 +147,8 @@ extern void elv_unregister(struct elevator_type *);
/*
* io scheduler sysfs switching
*/
-extern ssize_t elv_iosched_show(struct request_queue *, char *);
-extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t);
+ssize_t elv_iosched_show(struct gendisk *disk, char *page);
+ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);
extern bool elv_bio_merge_ok(struct request *, struct bio *);
extern struct elevator_queue *elevator_alloc(struct request_queue *,
diff --git a/block/fops.c b/block/fops.c
index be36c9fbd500..9825c1713a49 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -394,10 +394,11 @@ static int blkdev_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
struct block_device *bdev = I_BDEV(inode);
loff_t isize = i_size_read(inode);
- iomap->bdev = bdev;
- iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
if (offset >= isize)
return -EIO;
+
+ iomap->bdev = bdev;
+ iomap->offset = ALIGN_DOWN(offset, bdev_logical_block_size(bdev));
iomap->type = IOMAP_MAPPED;
iomap->addr = iomap->offset;
iomap->length = isize - iomap->offset;
diff --git a/block/genhd.c b/block/genhd.c
index 8f1f3c6b4d67..4dc95a463505 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -524,7 +524,7 @@ int __must_check device_add_disk(struct device *parent, struct gendisk *disk,
disk->part0->bd_dev = MKDEV(disk->major, disk->first_minor);
}
- disk_update_readahead(disk);
+ blk_apply_bdi_limits(disk->bdi, &disk->queue->limits);
disk_add_events(disk);
set_bit(GD_ADDED, &disk->state);
return 0;
diff --git a/block/ioctl.c b/block/ioctl.c
index d570e1695896..e8e4a4190f18 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -224,7 +224,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
goto fail;
err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL,
- BLKDEV_ZERO_NOUNMAP);
+ BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE);
fail:
filemap_invalidate_unlock(bdev->bd_mapping);
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 94eede4fb9eb..acdc28756d9d 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -488,6 +488,20 @@ unlock:
}
/*
+ * 'depth' is a number in the range 1..INT_MAX representing a number of
+ * requests. Scale it with a factor (1 << bt->sb.shift) / q->nr_requests since
+ * 1..(1 << bt->sb.shift) is the range expected by sbitmap_get_shallow().
+ * Values larger than q->nr_requests have the same effect as q->nr_requests.
+ */
+static int dd_to_word_depth(struct blk_mq_hw_ctx *hctx, unsigned int qdepth)
+{
+ struct sbitmap_queue *bt = &hctx->sched_tags->bitmap_tags;
+ const unsigned int nrr = hctx->queue->nr_requests;
+
+ return ((qdepth << bt->sb.shift) + nrr - 1) / nrr;
+}
+
+/*
* Called by __blk_mq_alloc_request(). The shallow_depth value set by this
* function is used by __blk_mq_get_tag().
*/
@@ -503,7 +517,7 @@ static void dd_limit_depth(blk_opf_t opf, struct blk_mq_alloc_data *data)
* Throttle asynchronous requests and writes such that these requests
* do not block the allocation of synchronous requests.
*/
- data->shallow_depth = dd->async_depth;
+ data->shallow_depth = dd_to_word_depth(data->hctx, dd->async_depth);
}
/* Called by blk_mq_update_nr_requests(). */
@@ -513,9 +527,9 @@ static void dd_depth_updated(struct blk_mq_hw_ctx *hctx)
struct deadline_data *dd = q->elevator->elevator_data;
struct blk_mq_tags *tags = hctx->sched_tags;
- dd->async_depth = max(1UL, 3 * q->nr_requests / 4);
+ dd->async_depth = q->nr_requests;
- sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, dd->async_depth);
+ sbitmap_queue_min_shallow_depth(&tags->bitmap_tags, 1);
}
/* Called by blk_mq_init_hctx() and blk_mq_init_sched(). */
diff --git a/block/t10-pi.c b/block/t10-pi.c
index cd7fa60d63ff..425e2836f3e1 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -13,6 +13,15 @@
#include <asm/unaligned.h>
#include "blk.h"
+struct blk_integrity_iter {
+ void *prot_buf;
+ void *data_buf;
+ sector_t seed;
+ unsigned int data_size;
+ unsigned short interval;
+ const char *disk_name;
+};
+
static __be16 t10_pi_csum(__be16 csum, void *data, unsigned int len,
unsigned char csum_type)
{
@@ -364,33 +373,77 @@ static void ext_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
}
}
-void blk_integrity_generate(struct blk_integrity_iter *iter,
- struct blk_integrity *bi)
+void blk_integrity_generate(struct bio *bio)
{
- switch (bi->csum_type) {
- case BLK_INTEGRITY_CSUM_CRC64:
- ext_pi_crc64_generate(iter, bi);
- break;
- case BLK_INTEGRITY_CSUM_CRC:
- case BLK_INTEGRITY_CSUM_IP:
- t10_pi_generate(iter, bi);
- break;
- default:
- break;
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ struct blk_integrity_iter iter;
+ struct bvec_iter bviter;
+ struct bio_vec bv;
+
+ iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
+ iter.interval = 1 << bi->interval_exp;
+ iter.seed = bio->bi_iter.bi_sector;
+ iter.prot_buf = bvec_virt(bip->bip_vec);
+ bio_for_each_segment(bv, bio, bviter) {
+ void *kaddr = bvec_kmap_local(&bv);
+
+ iter.data_buf = kaddr;
+ iter.data_size = bv.bv_len;
+ switch (bi->csum_type) {
+ case BLK_INTEGRITY_CSUM_CRC64:
+ ext_pi_crc64_generate(&iter, bi);
+ break;
+ case BLK_INTEGRITY_CSUM_CRC:
+ case BLK_INTEGRITY_CSUM_IP:
+ t10_pi_generate(&iter, bi);
+ break;
+ default:
+ break;
+ }
+ kunmap_local(kaddr);
}
}
-blk_status_t blk_integrity_verify(struct blk_integrity_iter *iter,
- struct blk_integrity *bi)
+void blk_integrity_verify(struct bio *bio)
{
- switch (bi->csum_type) {
- case BLK_INTEGRITY_CSUM_CRC64:
- return ext_pi_crc64_verify(iter, bi);
- case BLK_INTEGRITY_CSUM_CRC:
- case BLK_INTEGRITY_CSUM_IP:
- return t10_pi_verify(iter, bi);
- default:
- return BLK_STS_OK;
+ struct blk_integrity *bi = blk_get_integrity(bio->bi_bdev->bd_disk);
+ struct bio_integrity_payload *bip = bio_integrity(bio);
+ struct blk_integrity_iter iter;
+ struct bvec_iter bviter;
+ struct bio_vec bv;
+
+ /*
+ * At the moment verify is called bi_iter has been advanced during split
+ * and completion, so use the copy created during submission here.
+ */
+ iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
+ iter.interval = 1 << bi->interval_exp;
+ iter.seed = bip->bio_iter.bi_sector;
+ iter.prot_buf = bvec_virt(bip->bip_vec);
+ __bio_for_each_segment(bv, bio, bviter, bip->bio_iter) {
+ void *kaddr = bvec_kmap_local(&bv);
+ blk_status_t ret = BLK_STS_OK;
+
+ iter.data_buf = kaddr;
+ iter.data_size = bv.bv_len;
+ switch (bi->csum_type) {
+ case BLK_INTEGRITY_CSUM_CRC64:
+ ret = ext_pi_crc64_verify(&iter, bi);
+ break;
+ case BLK_INTEGRITY_CSUM_CRC:
+ case BLK_INTEGRITY_CSUM_IP:
+ ret = t10_pi_verify(&iter, bi);
+ break;
+ default:
+ break;
+ }
+ kunmap_local(kaddr);
+
+ if (ret) {
+ bio->bi_status = ret;
+ return;
+ }
}
}
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index cdf29b178ddc..682971c4cbe4 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1024,7 +1024,6 @@ EXPORT_SYMBOL_GPL(ata_scsi_dma_need_drain);
int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim,
struct ata_device *dev)
{
- struct request_queue *q = sdev->request_queue;
int depth = 1;
if (!ata_id_has_unload(dev->id))
@@ -1038,7 +1037,7 @@ int ata_scsi_dev_config(struct scsi_device *sdev, struct queue_limits *lim,
sdev->sector_size = ATA_SECT_SIZE;
/* set DMA padding */
- blk_queue_update_dma_pad(q, ATA_DMA_PAD_SZ - 1);
+ lim->dma_pad_mask = ATA_DMA_PAD_SZ - 1;
/* make room for appending the drain */
lim->max_segments--;
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index 3cb455a32d92..1b85e8bf4ef9 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -816,7 +816,7 @@ static int pata_macio_device_configure(struct scsi_device *sdev,
/* OHare has issues with non cache aligned DMA on some chipsets */
if (priv->kind == controller_ohare) {
lim->dma_alignment = 31;
- blk_queue_update_dma_pad(sdev->request_queue, 31);
+ lim->dma_pad_mask = 31;
/* Tell the world about it */
ata_dev_info(dev, "OHare alignment limits applied\n");
@@ -831,7 +831,7 @@ static int pata_macio_device_configure(struct scsi_device *sdev,
if (priv->kind == controller_sh_ata6 || priv->kind == controller_k2_ata6) {
/* Allright these are bad, apply restrictions */
lim->dma_alignment = 15;
- blk_queue_update_dma_pad(sdev->request_queue, 15);
+ lim->dma_pad_mask = 15;
/* We enable MWI and hack cache line size directly here, this
* is specific to this chipset and not normal values, we happen
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 86b5d956dc4e..a6f2d6367898 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -211,13 +211,10 @@ static void __loop_update_dio(struct loop_device *lo, bool dio)
if (lo->lo_state == Lo_bound)
blk_mq_freeze_queue(lo->lo_queue);
lo->use_dio = use_dio;
- if (use_dio) {
- blk_queue_flag_clear(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+ if (use_dio)
lo->lo_flags |= LO_FLAGS_DIRECT_IO;
- } else {
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
+ else
lo->lo_flags &= ~LO_FLAGS_DIRECT_IO;
- }
if (lo->lo_state == Lo_bound)
blk_mq_unfreeze_queue(lo->lo_queue);
}
@@ -999,7 +996,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
const struct loop_config *config)
{
struct file *file = fget(config->fd);
- struct inode *inode;
struct address_space *mapping;
int error;
loff_t size;
@@ -1036,7 +1032,6 @@ static int loop_configure(struct loop_device *lo, blk_mode_t mode,
goto out_unlock;
mapping = file->f_mapping;
- inode = mapping->host;
if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) {
error = -EINVAL;
@@ -1123,20 +1118,12 @@ out_putf:
return error;
}
-static void __loop_clr_fd(struct loop_device *lo, bool release)
+static void __loop_clr_fd(struct loop_device *lo)
{
struct queue_limits lim;
struct file *filp;
gfp_t gfp = lo->old_gfp_mask;
- /*
- * Freeze the request queue when unbinding on a live file descriptor and
- * thus an open device. When called from ->release we are guaranteed
- * that there is no I/O in progress already.
- */
- if (!release)
- blk_mq_freeze_queue(lo->lo_queue);
-
spin_lock_irq(&lo->lo_lock);
filp = lo->lo_backing_file;
lo->lo_backing_file = NULL;
@@ -1161,8 +1148,6 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
mapping_set_gfp_mask(filp->f_mapping, gfp);
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- if (!release)
- blk_mq_unfreeze_queue(lo->lo_queue);
disk_force_media_change(lo->lo_disk);
@@ -1177,11 +1162,7 @@ static void __loop_clr_fd(struct loop_device *lo, bool release)
* must be at least one and it can only become zero when the
* current holder is released.
*/
- if (!release)
- mutex_lock(&lo->lo_disk->open_mutex);
err = bdev_disk_changed(lo->lo_disk, false);
- if (!release)
- mutex_unlock(&lo->lo_disk->open_mutex);
if (err)
pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
__func__, lo->lo_number, err);
@@ -1230,24 +1211,16 @@ static int loop_clr_fd(struct loop_device *lo)
return -ENXIO;
}
/*
- * If we've explicitly asked to tear down the loop device,
- * and it has an elevated reference count, set it for auto-teardown when
- * the last reference goes away. This stops $!~#$@ udev from
- * preventing teardown because it decided that it needs to run blkid on
- * the loopback device whenever they appear. xfstests is notorious for
- * failing tests because blkid via udev races with a losetup
- * <dev>/do something like mkfs/losetup -d <dev> causing the losetup -d
- * command to fail with EBUSY.
+ * Mark the device for removing the backing device on last close.
+ * If we are the only opener, also switch the state to roundown here to
+ * prevent new openers from coming in.
*/
- if (disk_openers(lo->lo_disk) > 1) {
- lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
- loop_global_unlock(lo, true);
- return 0;
- }
- lo->lo_state = Lo_rundown;
+
+ lo->lo_flags |= LO_FLAGS_AUTOCLEAR;
+ if (disk_openers(lo->lo_disk) == 1)
+ lo->lo_state = Lo_rundown;
loop_global_unlock(lo, true);
- __loop_clr_fd(lo, false);
return 0;
}
@@ -1714,25 +1687,43 @@ static int lo_compat_ioctl(struct block_device *bdev, blk_mode_t mode,
}
#endif
+static int lo_open(struct gendisk *disk, blk_mode_t mode)
+{
+ struct loop_device *lo = disk->private_data;
+ int err;
+
+ err = mutex_lock_killable(&lo->lo_mutex);
+ if (err)
+ return err;
+
+ if (lo->lo_state == Lo_deleting || lo->lo_state == Lo_rundown)
+ err = -ENXIO;
+ mutex_unlock(&lo->lo_mutex);
+ return err;
+}
+
static void lo_release(struct gendisk *disk)
{
struct loop_device *lo = disk->private_data;
+ bool need_clear = false;
if (disk_openers(disk) > 0)
return;
+ /*
+ * Clear the backing device information if this is the last close of
+ * a device that's been marked for auto clear, or on which LOOP_CLR_FD
+ * has been called.
+ */
mutex_lock(&lo->lo_mutex);
- if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR)) {
+ if (lo->lo_state == Lo_bound && (lo->lo_flags & LO_FLAGS_AUTOCLEAR))
lo->lo_state = Lo_rundown;
- mutex_unlock(&lo->lo_mutex);
- /*
- * In autoclear mode, stop the loop thread
- * and remove configuration after last close.
- */
- __loop_clr_fd(lo, true);
- return;
- }
+
+ need_clear = (lo->lo_state == Lo_rundown);
mutex_unlock(&lo->lo_mutex);
+
+ if (need_clear)
+ __loop_clr_fd(lo);
}
static void lo_free_disk(struct gendisk *disk)
@@ -1749,6 +1740,7 @@ static void lo_free_disk(struct gendisk *disk)
static const struct block_device_operations lo_fops = {
.owner = THIS_MODULE,
+ .open = lo_open,
.release = lo_release,
.ioctl = lo_ioctl,
#ifdef CONFIG_COMPAT
@@ -2034,14 +2026,6 @@ static int loop_add(int i)
lo->lo_queue = lo->lo_disk->queue;
/*
- * By default, we do buffer IO, so it doesn't make sense to enable
- * merge because the I/O submitted to backing file is handled page by
- * page. For directio mode, merge does help to dispatch bigger request
- * to underlayer disk. We will enable merge once directio is enabled.
- */
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, lo->lo_queue);
-
- /*
* Disable partition scanning by default. The in-kernel partition
* scanning can be requested individually per-device during its
* setup. Userspace can always add and remove partitions from all
diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 83a4ebe4763a..9d0f6da77601 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -77,7 +77,7 @@ enum {
NULL_IRQ_TIMER = 2,
};
-static bool g_virt_boundary = false;
+static bool g_virt_boundary;
module_param_named(virt_boundary, g_virt_boundary, bool, 0444);
MODULE_PARM_DESC(virt_boundary, "Require a virtual boundary for the device. Default: False");
@@ -227,7 +227,7 @@ MODULE_PARM_DESC(mbps, "Cache size in MiB for memory-backed device. Default: 0 (
static bool g_fua = true;
module_param_named(fua, g_fua, bool, 0444);
-MODULE_PARM_DESC(zoned, "Enable/disable FUA support when cache_size is used. Default: true");
+MODULE_PARM_DESC(fua, "Enable/disable FUA support when cache_size is used. Default: true");
static unsigned int g_mbps;
module_param_named(mbps, g_mbps, uint, 0444);
@@ -262,6 +262,10 @@ module_param_named(zone_append_max_sectors, g_zone_append_max_sectors, int, 0444
MODULE_PARM_DESC(zone_append_max_sectors,
"Maximum size of a zone append command (in 512B sectors). Specify 0 for zone append emulation");
+static bool g_zone_full;
+module_param_named(zone_full, g_zone_full, bool, S_IRUGO);
+MODULE_PARM_DESC(zone_full, "Initialize the sequential write required zones of a zoned device to be full. Default: false");
+
static struct nullb_device *null_alloc_dev(void);
static void null_free_dev(struct nullb_device *dev);
static void null_del_dev(struct nullb *nullb);
@@ -458,6 +462,7 @@ NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_open, uint, NULL);
NULLB_DEVICE_ATTR(zone_max_active, uint, NULL);
NULLB_DEVICE_ATTR(zone_append_max_sectors, uint, NULL);
+NULLB_DEVICE_ATTR(zone_full, bool, NULL);
NULLB_DEVICE_ATTR(virt_boundary, bool, NULL);
NULLB_DEVICE_ATTR(no_sched, bool, NULL);
NULLB_DEVICE_ATTR(shared_tags, bool, NULL);
@@ -610,6 +615,7 @@ static struct configfs_attribute *nullb_device_attrs[] = {
&nullb_device_attr_zone_append_max_sectors,
&nullb_device_attr_zone_readonly,
&nullb_device_attr_zone_offline,
+ &nullb_device_attr_zone_full,
&nullb_device_attr_virt_boundary,
&nullb_device_attr_no_sched,
&nullb_device_attr_shared_tags,
@@ -700,7 +706,7 @@ static ssize_t memb_group_features_show(struct config_item *item, char *page)
"shared_tags,size,submit_queues,use_per_node_hctx,"
"virt_boundary,zoned,zone_capacity,zone_max_active,"
"zone_max_open,zone_nr_conv,zone_offline,zone_readonly,"
- "zone_size,zone_append_max_sectors\n");
+ "zone_size,zone_append_max_sectors,zone_full\n");
}
CONFIGFS_ATTR_RO(memb_group_, features);
@@ -781,6 +787,7 @@ static struct nullb_device *null_alloc_dev(void)
dev->zone_max_open = g_zone_max_open;
dev->zone_max_active = g_zone_max_active;
dev->zone_append_max_sectors = g_zone_append_max_sectors;
+ dev->zone_full = g_zone_full;
dev->virt_boundary = g_virt_boundary;
dev->no_sched = g_no_sched;
dev->shared_tags = g_shared_tags;
diff --git a/drivers/block/null_blk/null_blk.h b/drivers/block/null_blk/null_blk.h
index 3234e6c85eed..a7bb32f73ec3 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -101,6 +101,7 @@ struct nullb_device {
bool memory_backed; /* if data is stored in memory */
bool discard; /* if support discard */
bool zoned; /* if device is zoned */
+ bool zone_full; /* Initialize zones to be full */
bool virt_boundary; /* virtual boundary on/off for the device */
bool no_sched; /* no IO scheduler for the device */
bool shared_tags; /* share tag set between devices for blk-mq */
diff --git a/drivers/block/null_blk/zoned.c b/drivers/block/null_blk/zoned.c
index 9f7151ad93cf..9bc768b2ca56 100644
--- a/drivers/block/null_blk/zoned.c
+++ b/drivers/block/null_blk/zoned.c
@@ -145,7 +145,7 @@ int null_init_zoned_dev(struct nullb_device *dev,
zone = &dev->zones[i];
null_init_zone_lock(dev, zone);
- zone->start = zone->wp = sector;
+ zone->start = sector;
if (zone->start + dev->zone_size_sects > dev_capacity_sects)
zone->len = dev_capacity_sects - zone->start;
else
@@ -153,12 +153,18 @@ int null_init_zoned_dev(struct nullb_device *dev,
zone->capacity =
min_t(sector_t, zone->len, zone_capacity_sects);
zone->type = BLK_ZONE_TYPE_SEQWRITE_REQ;
- zone->cond = BLK_ZONE_COND_EMPTY;
+ if (dev->zone_full) {
+ zone->cond = BLK_ZONE_COND_FULL;
+ zone->wp = zone->start + zone->capacity;
+ } else{
+ zone->cond = BLK_ZONE_COND_EMPTY;
+ zone->wp = zone->start;
+ }
sector += dev->zone_size_sects;
}
- lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL;
+ lim->features |= BLK_FEAT_ZONED;
lim->chunk_sectors = dev->zone_size_sects;
lim->max_zone_append_sectors = dev->zone_append_max_sectors;
lim->max_open_zones = dev->zone_max_open;
diff --git a/drivers/block/rnbd/rnbd-clt.c b/drivers/block/rnbd/rnbd-clt.c
index 4918b0f68b46..c34695d2eea7 100644
--- a/drivers/block/rnbd/rnbd-clt.c
+++ b/drivers/block/rnbd/rnbd-clt.c
@@ -1397,8 +1397,6 @@ static int rnbd_client_setup_device(struct rnbd_clt_dev *dev,
dev->queue = dev->gd->queue;
rnbd_init_mq_hw_queues(dev);
- blk_queue_flag_set(QUEUE_FLAG_SAME_COMP, dev->queue);
- blk_queue_flag_set(QUEUE_FLAG_SAME_FORCE, dev->queue);
return rnbd_clt_setup_gen_disk(dev, rsp, idx);
}
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 4fdff13fc23b..d10a2ea07292 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -2194,7 +2194,7 @@ static int ublk_ctrl_start_dev(struct ublk_device *ub, struct io_uring_cmd *cmd)
if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED))
return -EOPNOTSUPP;
- lim.features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL;
+ lim.features |= BLK_FEAT_ZONED;
lim.max_active_zones = p->max_active_zones;
lim.max_open_zones = p->max_open_zones;
lim.max_zone_append_sectors = p->max_zone_append_sectors;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6c64a67ab9c9..84c3efd0c611 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -728,7 +728,7 @@ static int virtblk_read_zoned_limits(struct virtio_blk *vblk,
dev_dbg(&vdev->dev, "probing host-managed zoned device\n");
- lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL;
+ lim->features |= BLK_FEAT_ZONED;
virtio_cread(vdev, struct virtio_blk_config,
zoned.max_open_zones, &v);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index fa3a2ba52545..59ce113b882a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1070,8 +1070,7 @@ static char *encode_disk_name(char *ptr, unsigned int n)
}
static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
- struct blkfront_info *info, u16 sector_size,
- unsigned int physical_sector_size)
+ struct blkfront_info *info)
{
struct queue_limits lim = {};
struct gendisk *gd;
@@ -1165,8 +1164,6 @@ static int xlvbd_alloc_gendisk(blkif_sector_t capacity,
info->rq = gd->queue;
info->gd = gd;
- info->sector_size = sector_size;
- info->physical_sector_size = physical_sector_size;
xlvbd_flush(info);
@@ -2320,8 +2317,6 @@ static void blkfront_gather_backend_features(struct blkfront_info *info)
static void blkfront_connect(struct blkfront_info *info)
{
unsigned long long sectors;
- unsigned long sector_size;
- unsigned int physical_sector_size;
int err, i;
struct blkfront_ring_info *rinfo;
@@ -2360,7 +2355,7 @@ static void blkfront_connect(struct blkfront_info *info)
err = xenbus_gather(XBT_NIL, info->xbdev->otherend,
"sectors", "%llu", &sectors,
"info", "%u", &info->vdisk_info,
- "sector-size", "%lu", &sector_size,
+ "sector-size", "%lu", &info->sector_size,
NULL);
if (err) {
xenbus_dev_fatal(info->xbdev, err,
@@ -2374,9 +2369,9 @@ static void blkfront_connect(struct blkfront_info *info)
* provide this. Assume physical sector size to be the same as
* sector_size in that case.
*/
- physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
+ info->physical_sector_size = xenbus_read_unsigned(info->xbdev->otherend,
"physical-sector-size",
- sector_size);
+ info->sector_size);
blkfront_gather_backend_features(info);
for_each_rinfo(info, rinfo, i) {
err = blkfront_setup_indirect(rinfo);
@@ -2388,8 +2383,7 @@ static void blkfront_connect(struct blkfront_info *info)
}
}
- err = xlvbd_alloc_gendisk(sectors, info, sector_size,
- physical_sector_size);
+ err = xlvbd_alloc_gendisk(sectors, info);
if (err) {
xenbus_dev_fatal(info->xbdev, err, "xlvbd_add at %s",
info->xbdev->otherend);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index 283b2511c6d2..b5d6ef430b86 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -1416,8 +1416,8 @@ static int cached_dev_init(struct cached_dev *dc, unsigned int block_size)
}
if (bdev_io_opt(dc->bdev))
- dc->partial_stripes_expensive = q->limits.features &
- BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE;
+ dc->partial_stripes_expensive = !!(q->limits.features &
+ BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE);
ret = bcache_device_init(&dc->disk, block_size,
bdev_nr_sectors(dc->bdev) - dc->sb.data_offset,
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 4d37e53b50ee..c0d41c36e06e 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -292,10 +292,12 @@ static int device_get_zone_resource_limits(struct dm_target *ti,
/*
* If the target does not map all sequential zones, the limits
- * will not be reliable.
+ * will not be reliable and we cannot use REQ_OP_ZONE_RESET_ALL.
*/
- if (zc.target_nr_seq_zones < zc.total_nr_seq_zones)
+ if (zc.target_nr_seq_zones < zc.total_nr_seq_zones) {
zlim->reliable_limits = false;
+ ti->zone_reset_all_supported = false;
+ }
/*
* If the target maps less sequential zones than the limit values, then
@@ -353,6 +355,14 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
for (unsigned int i = 0; i < t->num_targets; i++) {
struct dm_target *ti = dm_table_get_target(t, i);
+ /*
+ * Assume that the target can accept REQ_OP_ZONE_RESET_ALL.
+ * device_get_zone_resource_limits() may adjust this if one of
+ * the device used by the target does not have all its
+ * sequential write required zones mapped.
+ */
+ ti->zone_reset_all_supported = true;
+
if (!ti->type->iterate_devices ||
ti->type->iterate_devices(ti,
device_get_zone_resource_limits, &zlim)) {
@@ -420,3 +430,39 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
return;
}
+
+static int dm_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
+{
+ /*
+ * For an all-zones reset, ignore conventional, empty, read-only
+ * and offline zones.
+ */
+ switch (zone->cond) {
+ case BLK_ZONE_COND_NOT_WP:
+ case BLK_ZONE_COND_EMPTY:
+ case BLK_ZONE_COND_READONLY:
+ case BLK_ZONE_COND_OFFLINE:
+ return 0;
+ default:
+ set_bit(idx, (unsigned long *)data);
+ return 0;
+ }
+}
+
+int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
+ sector_t sector, unsigned int nr_zones,
+ unsigned long *need_reset)
+{
+ int ret;
+
+ ret = dm_blk_do_report_zones(md, t, sector, nr_zones,
+ dm_zone_need_reset_cb, need_reset);
+ if (ret != nr_zones) {
+ DMERR("Get %s zone reset bitmap failed\n",
+ md->disk->disk_name);
+ return -EIO;
+ }
+
+ return 0;
+}
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7d107ae06e1a..4b1b69e576a5 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1598,20 +1598,19 @@ static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti,
static bool is_abnormal_io(struct bio *bio)
{
- enum req_op op = bio_op(bio);
-
- if (op != REQ_OP_READ && op != REQ_OP_WRITE && op != REQ_OP_FLUSH) {
- switch (op) {
- case REQ_OP_DISCARD:
- case REQ_OP_SECURE_ERASE:
- case REQ_OP_WRITE_ZEROES:
- return true;
- default:
- break;
- }
+ switch (bio_op(bio)) {
+ case REQ_OP_READ:
+ case REQ_OP_WRITE:
+ case REQ_OP_FLUSH:
+ return false;
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_ZEROES:
+ case REQ_OP_ZONE_RESET_ALL:
+ return true;
+ default:
+ return false;
}
-
- return false;
}
static blk_status_t __process_abnormal_io(struct clone_info *ci,
@@ -1776,6 +1775,119 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
{
return dm_emulate_zone_append(md) && blk_zone_plug_bio(bio, 0);
}
+
+static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci,
+ struct dm_target *ti)
+{
+ struct bio_list blist = BIO_EMPTY_LIST;
+ struct mapped_device *md = ci->io->md;
+ unsigned int zone_sectors = md->disk->queue->limits.chunk_sectors;
+ unsigned long *need_reset;
+ unsigned int i, nr_zones, nr_reset;
+ unsigned int num_bios = 0;
+ blk_status_t sts = BLK_STS_OK;
+ sector_t sector = ti->begin;
+ struct bio *clone;
+ int ret;
+
+ nr_zones = ti->len >> ilog2(zone_sectors);
+ need_reset = bitmap_zalloc(nr_zones, GFP_NOIO);
+ if (!need_reset)
+ return BLK_STS_RESOURCE;
+
+ ret = dm_zone_get_reset_bitmap(md, ci->map, ti->begin,
+ nr_zones, need_reset);
+ if (ret) {
+ sts = BLK_STS_IOERR;
+ goto free_bitmap;
+ }
+
+ /* If we have no zone to reset, we are done. */
+ nr_reset = bitmap_weight(need_reset, nr_zones);
+ if (!nr_reset)
+ goto free_bitmap;
+
+ atomic_add(nr_zones, &ci->io->io_count);
+
+ for (i = 0; i < nr_zones; i++) {
+
+ if (!test_bit(i, need_reset)) {
+ sector += zone_sectors;
+ continue;
+ }
+
+ if (bio_list_empty(&blist)) {
+ /* This may take a while, so be nice to others */
+ if (num_bios)
+ cond_resched();
+
+ /*
+ * We may need to reset thousands of zones, so let's
+ * not go crazy with the clone allocation.
+ */
+ alloc_multiple_bios(&blist, ci, ti, min(nr_reset, 32),
+ NULL, GFP_NOIO);
+ }
+
+ /* Get a clone and change it to a regular reset operation. */
+ clone = bio_list_pop(&blist);
+ clone->bi_opf &= ~REQ_OP_MASK;
+ clone->bi_opf |= REQ_OP_ZONE_RESET | REQ_SYNC;
+ clone->bi_iter.bi_sector = sector;
+ clone->bi_iter.bi_size = 0;
+ __map_bio(clone);
+
+ sector += zone_sectors;
+ num_bios++;
+ nr_reset--;
+ }
+
+ WARN_ON_ONCE(!bio_list_empty(&blist));
+ atomic_sub(nr_zones - num_bios, &ci->io->io_count);
+ ci->sector_count = 0;
+
+free_bitmap:
+ bitmap_free(need_reset);
+
+ return sts;
+}
+
+static void __send_zone_reset_all_native(struct clone_info *ci,
+ struct dm_target *ti)
+{
+ unsigned int bios;
+
+ atomic_add(1, &ci->io->io_count);
+ bios = __send_duplicate_bios(ci, ti, 1, NULL, GFP_NOIO);
+ atomic_sub(1 - bios, &ci->io->io_count);
+
+ ci->sector_count = 0;
+}
+
+static blk_status_t __send_zone_reset_all(struct clone_info *ci)
+{
+ struct dm_table *t = ci->map;
+ blk_status_t sts = BLK_STS_OK;
+
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
+ if (ti->zone_reset_all_supported) {
+ __send_zone_reset_all_native(ci, ti);
+ continue;
+ }
+
+ sts = __send_zone_reset_all_emulated(ci, ti);
+ if (sts != BLK_STS_OK)
+ break;
+ }
+
+ /* Release the reference that alloc_io() took for submission. */
+ atomic_sub(1, &ci->io->io_count);
+
+ return sts;
+}
+
#else
static inline bool dm_zone_bio_needs_split(struct mapped_device *md,
struct bio *bio)
@@ -1786,6 +1898,10 @@ static inline bool dm_zone_plug_bio(struct mapped_device *md, struct bio *bio)
{
return false;
}
+static blk_status_t __send_zone_reset_all(struct clone_info *ci)
+{
+ return BLK_STS_NOTSUPP;
+}
#endif
/*
@@ -1799,9 +1915,14 @@ static void dm_split_and_process_bio(struct mapped_device *md,
blk_status_t error = BLK_STS_OK;
bool is_abnormal, need_split;
- need_split = is_abnormal = is_abnormal_io(bio);
- if (static_branch_unlikely(&zoned_enabled))
- need_split = is_abnormal || dm_zone_bio_needs_split(md, bio);
+ is_abnormal = is_abnormal_io(bio);
+ if (static_branch_unlikely(&zoned_enabled)) {
+ /* Special case REQ_OP_ZONE_RESET_ALL as it cannot be split. */
+ need_split = (bio_op(bio) != REQ_OP_ZONE_RESET_ALL) &&
+ (is_abnormal || dm_zone_bio_needs_split(md, bio));
+ } else {
+ need_split = is_abnormal;
+ }
if (unlikely(need_split)) {
/*
@@ -1842,6 +1963,12 @@ static void dm_split_and_process_bio(struct mapped_device *md,
goto out;
}
+ if (static_branch_unlikely(&zoned_enabled) &&
+ (bio_op(bio) == REQ_OP_ZONE_RESET_ALL)) {
+ error = __send_zone_reset_all(&ci);
+ goto out;
+ }
+
error = __split_and_process_bio(&ci);
if (error || !ci.sector_count)
goto out;
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index c984ecb64b1e..cc466ad5cb1d 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -110,6 +110,9 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
int dm_zone_map_bio(struct dm_target_io *io);
+int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
+ sector_t sector, unsigned int nr_zones,
+ unsigned long *need_reset);
#else
#define dm_blk_report_zones NULL
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index 8e36a0feec09..139fe2019c1d 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -1570,7 +1570,7 @@ out:
return err;
}
-static struct md_cluster_operations cluster_ops = {
+static const struct md_cluster_operations cluster_ops = {
.join = join,
.leave = leave,
.slot_number = slot_number,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 69ea54aedd99..64693913ed18 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -85,7 +85,7 @@ static DEFINE_SPINLOCK(pers_lock);
static const struct kobj_type md_ktype;
-struct md_cluster_operations *md_cluster_ops;
+const struct md_cluster_operations *md_cluster_ops;
EXPORT_SYMBOL(md_cluster_ops);
static struct module *md_cluster_mod;
@@ -627,7 +627,7 @@ static void md_submit_flush_data(struct work_struct *ws)
* always is 0, make_request() will not be called here.
*/
if (WARN_ON_ONCE(!mddev->pers->make_request(mddev, bio)))
- bio_io_error(bio);;
+ bio_io_error(bio);
}
/* The pair is percpu_ref_get() from md_flush_request() */
@@ -5853,6 +5853,14 @@ static void mddev_delayed_delete(struct work_struct *ws)
kobject_put(&mddev->kobj);
}
+void md_init_stacking_limits(struct queue_limits *lim)
+{
+ blk_set_stacking_limits(lim);
+ lim->features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
+ BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT;
+}
+EXPORT_SYMBOL_GPL(md_init_stacking_limits);
+
struct mddev *md_alloc(dev_t dev, char *name)
{
/*
@@ -5871,10 +5879,6 @@ struct mddev *md_alloc(dev_t dev, char *name)
int shift;
int unit;
int error;
- struct queue_limits lim = {
- .features = BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA |
- BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT,
- };
/*
* Wait for any previous instance of this device to be completely
@@ -5914,7 +5918,7 @@ struct mddev *md_alloc(dev_t dev, char *name)
*/
mddev->hold_active = UNTIL_STOP;
- disk = blk_alloc_disk(&lim, NUMA_NO_NODE);
+ disk = blk_alloc_disk(NULL, NUMA_NO_NODE);
if (IS_ERR(disk)) {
error = PTR_ERR(disk);
goto out_free_mddev;
@@ -7761,12 +7765,6 @@ static int md_ioctl(struct block_device *bdev, blk_mode_t mode,
return get_bitmap_file(mddev, argp);
}
- if (cmd == HOT_REMOVE_DISK)
- /* need to ensure recovery thread has run */
- wait_event_interruptible_timeout(mddev->sb_wait,
- !test_bit(MD_RECOVERY_NEEDED,
- &mddev->recovery),
- msecs_to_jiffies(5000));
if (cmd == STOP_ARRAY || cmd == STOP_ARRAY_RO) {
/* Need to flush page cache, and ensure no-one else opens
* and writes
@@ -8539,7 +8537,7 @@ int unregister_md_personality(struct md_personality *p)
}
EXPORT_SYMBOL(unregister_md_personality);
-int register_md_cluster_operations(struct md_cluster_operations *ops,
+int register_md_cluster_operations(const struct md_cluster_operations *ops,
struct module *module)
{
int ret = 0;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index c4d7ebf9587d..a0d6827dced9 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -849,7 +849,7 @@ static inline void safe_put_page(struct page *p)
extern int register_md_personality(struct md_personality *p);
extern int unregister_md_personality(struct md_personality *p);
-extern int register_md_cluster_operations(struct md_cluster_operations *ops,
+extern int register_md_cluster_operations(const struct md_cluster_operations *ops,
struct module *module);
extern int unregister_md_cluster_operations(void);
extern int md_setup_cluster(struct mddev *mddev, int nodes);
@@ -893,6 +893,7 @@ extern int strict_strtoul_scaled(const char *cp, unsigned long *res, int scale);
extern int mddev_init(struct mddev *mddev);
extern void mddev_destroy(struct mddev *mddev);
+void md_init_stacking_limits(struct queue_limits *lim);
struct mddev *md_alloc(dev_t dev, char *name);
void mddev_put(struct mddev *mddev);
extern int md_run(struct mddev *mddev);
@@ -931,7 +932,7 @@ static inline void rdev_dec_pending(struct md_rdev *rdev, struct mddev *mddev)
}
}
-extern struct md_cluster_operations *md_cluster_ops;
+extern const struct md_cluster_operations *md_cluster_ops;
static inline int mddev_is_clustered(struct mddev *mddev)
{
return mddev->cluster_info && mddev->bitmap_info.nodes > 1;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 62634e2a33bd..32d587524778 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -379,7 +379,7 @@ static int raid0_set_limits(struct mddev *mddev)
struct queue_limits lim;
int err;
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.max_hw_sectors = mddev->chunk_sectors;
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
lim.io_min = mddev->chunk_sectors << 9;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 1a0eba65b8a9..04a0c2ca1732 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3194,7 +3194,7 @@ static int raid1_set_limits(struct mddev *mddev)
struct queue_limits lim;
int err;
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err) {
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3334aa803c83..2a9c4ee982e0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -3974,7 +3974,7 @@ static int raid10_set_queue_limits(struct mddev *mddev)
struct queue_limits lim;
int err;
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0192a6323f09..c14cf2410365 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -155,7 +155,7 @@ static int raid6_idx_to_slot(int idx, struct stripe_head *sh,
return slot;
}
-static void print_raid5_conf (struct r5conf *conf);
+static void print_raid5_conf(struct r5conf *conf);
static int stripe_operations_active(struct stripe_head *sh)
{
@@ -5899,6 +5899,39 @@ out:
return ret;
}
+enum reshape_loc {
+ LOC_NO_RESHAPE,
+ LOC_AHEAD_OF_RESHAPE,
+ LOC_INSIDE_RESHAPE,
+ LOC_BEHIND_RESHAPE,
+};
+
+static enum reshape_loc get_reshape_loc(struct mddev *mddev,
+ struct r5conf *conf, sector_t logical_sector)
+{
+ sector_t reshape_progress, reshape_safe;
+ /*
+ * Spinlock is needed as reshape_progress may be
+ * 64bit on a 32bit platform, and so it might be
+ * possible to see a half-updated value
+ * Of course reshape_progress could change after
+ * the lock is dropped, so once we get a reference
+ * to the stripe that we think it is, we will have
+ * to check again.
+ */
+ spin_lock_irq(&conf->device_lock);
+ reshape_progress = conf->reshape_progress;
+ reshape_safe = conf->reshape_safe;
+ spin_unlock_irq(&conf->device_lock);
+ if (reshape_progress == MaxSector)
+ return LOC_NO_RESHAPE;
+ if (ahead_of_reshape(mddev, logical_sector, reshape_progress))
+ return LOC_AHEAD_OF_RESHAPE;
+ if (ahead_of_reshape(mddev, logical_sector, reshape_safe))
+ return LOC_INSIDE_RESHAPE;
+ return LOC_BEHIND_RESHAPE;
+}
+
static enum stripe_result make_stripe_request(struct mddev *mddev,
struct r5conf *conf, struct stripe_request_ctx *ctx,
sector_t logical_sector, struct bio *bi)
@@ -5913,28 +5946,14 @@ static enum stripe_result make_stripe_request(struct mddev *mddev,
seq = read_seqcount_begin(&conf->gen_lock);
if (unlikely(conf->reshape_progress != MaxSector)) {
- /*
- * Spinlock is needed as reshape_progress may be
- * 64bit on a 32bit platform, and so it might be
- * possible to see a half-updated value
- * Of course reshape_progress could change after
- * the lock is dropped, so once we get a reference
- * to the stripe that we think it is, we will have
- * to check again.
- */
- spin_lock_irq(&conf->device_lock);
- if (ahead_of_reshape(mddev, logical_sector,
- conf->reshape_progress)) {
- previous = 1;
- } else {
- if (ahead_of_reshape(mddev, logical_sector,
- conf->reshape_safe)) {
- spin_unlock_irq(&conf->device_lock);
- ret = STRIPE_SCHEDULE_AND_RETRY;
- goto out;
- }
+ enum reshape_loc loc = get_reshape_loc(mddev, conf,
+ logical_sector);
+ if (loc == LOC_INSIDE_RESHAPE) {
+ ret = STRIPE_SCHEDULE_AND_RETRY;
+ goto out;
}
- spin_unlock_irq(&conf->device_lock);
+ if (loc == LOC_AHEAD_OF_RESHAPE)
+ previous = 1;
}
new_sector = raid5_compute_sector(conf, logical_sector, previous,
@@ -6112,8 +6131,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
/* Bail out if conflicts with reshape and REQ_NOWAIT is set */
if ((bi->bi_opf & REQ_NOWAIT) &&
(conf->reshape_progress != MaxSector) &&
- !ahead_of_reshape(mddev, logical_sector, conf->reshape_progress) &&
- ahead_of_reshape(mddev, logical_sector, conf->reshape_safe)) {
+ get_reshape_loc(mddev, conf, logical_sector) == LOC_INSIDE_RESHAPE) {
bio_wouldblock_error(bi);
if (rw == WRITE)
md_write_end(mddev);
@@ -7568,11 +7586,11 @@ static struct r5conf *setup_conf(struct mddev *mddev)
if (test_bit(Replacement, &rdev->flags)) {
if (disk->replacement)
goto abort;
- RCU_INIT_POINTER(disk->replacement, rdev);
+ disk->replacement = rdev;
} else {
if (disk->rdev)
goto abort;
- RCU_INIT_POINTER(disk->rdev, rdev);
+ disk->rdev = rdev;
}
if (test_bit(In_sync, &rdev->flags)) {
@@ -7708,7 +7726,7 @@ static int raid5_set_limits(struct mddev *mddev)
*/
stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9));
- blk_set_stacking_limits(&lim);
+ md_init_stacking_limits(&lim);
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
lim.features |= BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE;
@@ -8054,7 +8072,7 @@ static void raid5_status(struct seq_file *seq, struct mddev *mddev)
seq_printf (seq, "]");
}
-static void print_raid5_conf (struct r5conf *conf)
+static void print_raid5_conf(struct r5conf *conf)
{
struct md_rdev *rdev;
int i;
@@ -8068,15 +8086,13 @@ static void print_raid5_conf (struct r5conf *conf)
conf->raid_disks,
conf->raid_disks - conf->mddev->degraded);
- rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) {
- rdev = rcu_dereference(conf->disks[i].rdev);
+ rdev = conf->disks[i].rdev;
if (rdev)
pr_debug(" disk %d, o:%d, dev:%pg\n",
i, !test_bit(Faulty, &rdev->flags),
rdev->bdev);
}
- rcu_read_unlock();
}
static int raid5_spare_active(struct mddev *mddev)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index bfea6cccbc0a..448b8239bc99 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2024,7 +2024,8 @@ static bool nvme_update_disk_info(struct nvme_ns *ns, struct nvme_id_ns *id,
/* NPWG = Namespace Preferred Write Granularity */
phys_bs = bs * (1 + le16_to_cpu(id->npwg));
/* NOWS = Namespace Optimal Write Size */
- io_opt = bs * (1 + le16_to_cpu(id->nows));
+ if (id->nows)
+ io_opt = bs * (1 + le16_to_cpu(id->nows));
}
/*
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index c1e1ecd0f1df..21f8e1b9801f 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -826,9 +826,9 @@ static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
struct nvme_command *cmnd)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+ struct bio_vec bv = rq_integrity_vec(req);
- iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
- rq_dma_dir(req), 0);
+ iod->meta_dma = dma_map_bvec(dev->dev, &bv, rq_dma_dir(req), 0);
if (dma_mapping_error(dev->dev, iod->meta_dma))
return BLK_STS_IOERR;
cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
@@ -967,7 +967,7 @@ static __always_inline void nvme_pci_unmap_rq(struct request *req)
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
dma_unmap_page(dev->dev, iod->meta_dma,
- rq_integrity_vec(req)->bv_len, rq_dma_dir(req));
+ rq_integrity_vec(req).bv_len, rq_dma_dir(req));
}
if (blk_rq_nr_phys_segments(req))
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 99bb89c2495a..9a06f9d98cd6 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -108,7 +108,7 @@ free_data:
void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
struct nvme_zone_info *zi)
{
- lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL;
+ lim->features |= BLK_FEAT_ZONED;
lim->max_open_zones = zi->max_open_zones;
lim->max_active_zones = zi->max_active_zones;
lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 88acefbf9aea..6c79c350a4d5 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -1981,8 +1981,6 @@ megasas_set_nvme_device_properties(struct scsi_device *sdev,
lim->max_hw_sectors = max_io_size / 512;
lim->virt_boundary_mask = mr_nvme_pg_size - 1;
-
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES, sdev->request_queue);
}
/*
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 12d08d8ba538..b050aedc9d43 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -2680,12 +2680,6 @@ scsih_device_configure(struct scsi_device *sdev, struct queue_limits *lim)
pcie_device_put(pcie_device);
spin_unlock_irqrestore(&ioc->pcie_device_lock, flags);
mpt3sas_scsih_change_queue_depth(sdev, qdepth);
- /* Enable QUEUE_FLAG_NOMERGES flag, so that IOs won't be
- ** merged and can eliminate holes created during merging
- ** operation.
- **/
- blk_queue_flag_set(QUEUE_FLAG_NOMERGES,
- sdev->request_queue);
lim->virt_boundary_mask = ioc->page_size - 1;
return 0;
}
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e2f7bfb2b9e4..3958a6d14bf4 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1139,9 +1139,9 @@ blk_status_t scsi_alloc_sgtables(struct scsi_cmnd *cmd)
*/
count = __blk_rq_map_sg(rq->q, rq, cmd->sdb.table.sgl, &last_sg);
- if (blk_rq_bytes(rq) & rq->q->dma_pad_mask) {
+ if (blk_rq_bytes(rq) & rq->q->limits.dma_pad_mask) {
unsigned int pad_len =
- (rq->q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
+ (rq->q->limits.dma_pad_mask & ~blk_rq_bytes(rq)) + 1;
last_sg->length += pad_len;
cmd->extra_len += pad_len;
diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c
index f7067afac79c..c8b9654d30f0 100644
--- a/drivers/scsi/sd_zbc.c
+++ b/drivers/scsi/sd_zbc.c
@@ -599,7 +599,7 @@ int sd_zbc_read_zones(struct scsi_disk *sdkp, struct queue_limits *lim,
if (sdkp->device->type != TYPE_ZBC)
return 0;
- lim->features |= BLK_FEAT_ZONED | BLK_FEAT_ZONE_RESETALL;
+ lim->features |= BLK_FEAT_ZONED;
/*
* Per ZBC and ZAC specifications, writes in sequential write required
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
index 0cf07194bbe8..b7957a431589 100644
--- a/drivers/ufs/core/ufshcd.c
+++ b/drivers/ufs/core/ufshcd.c
@@ -5193,17 +5193,19 @@ static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth)
}
/**
- * ufshcd_slave_configure - adjust SCSI device configurations
+ * ufshcd_device_configure - adjust SCSI device configurations
* @sdev: pointer to SCSI device
+ * @lim: queue limits
*
* Return: 0 (success).
*/
-static int ufshcd_slave_configure(struct scsi_device *sdev)
+static int ufshcd_device_configure(struct scsi_device *sdev,
+ struct queue_limits *lim)
{
struct ufs_hba *hba = shost_priv(sdev->host);
struct request_queue *q = sdev->request_queue;
- blk_queue_update_dma_pad(q, PRDT_DATA_BYTE_COUNT_PAD - 1);
+ lim->dma_pad_mask = PRDT_DATA_BYTE_COUNT_PAD - 1;
/*
* Block runtime-pm until all consumers are added.
@@ -8907,7 +8909,7 @@ static const struct scsi_host_template ufshcd_driver_template = {
.queuecommand = ufshcd_queuecommand,
.mq_poll = ufshcd_poll,
.slave_alloc = ufshcd_slave_alloc,
- .slave_configure = ufshcd_slave_configure,
+ .device_configure = ufshcd_device_configure,
.slave_destroy = ufshcd_slave_destroy,
.change_queue_depth = ufshcd_change_queue_depth,
.eh_abort_handler = ufshcd_abort,
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index d201140d77a3..804f856ed3e5 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -14,15 +14,6 @@ enum blk_integrity_flags {
BLK_INTEGRITY_STACKED = 1 << 4,
};
-struct blk_integrity_iter {
- void *prot_buf;
- void *data_buf;
- sector_t seed;
- unsigned int data_size;
- unsigned short interval;
- const char *disk_name;
-};
-
const char *blk_integrity_profile_name(struct blk_integrity *bi);
bool queue_limits_stack_integrity(struct queue_limits *t,
struct queue_limits *b);
@@ -90,14 +81,13 @@ static inline bool blk_integrity_rq(struct request *rq)
}
/*
- * Return the first bvec that contains integrity data. Only drivers that are
- * limited to a single integrity segment should use this helper.
+ * Return the current bvec that contains the integrity data. bip_iter may be
+ * advanced to iterate over the integrity data.
*/
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+static inline struct bio_vec rq_integrity_vec(struct request *rq)
{
- if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1))
- return NULL;
- return rq->bio->bi_integrity->bip_vec;
+ return mp_bvec_iter_bvec(rq->bio->bi_integrity->bip_vec,
+ rq->bio->bi_integrity->bip_iter);
}
#else /* CONFIG_BLK_DEV_INTEGRITY */
static inline int blk_rq_count_integrity_sg(struct request_queue *q,
@@ -146,9 +136,10 @@ static inline int blk_integrity_rq(struct request *rq)
return 0;
}
-static inline struct bio_vec *rq_integrity_vec(struct request *rq)
+static inline struct bio_vec rq_integrity_vec(struct request *rq)
{
- return NULL;
+ /* the optimizer will remove all calls to this function */
+ return (struct bio_vec){ };
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b2f1362c4681..02e04df27282 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -283,55 +283,53 @@ static inline bool blk_op_is_passthrough(blk_opf_t op)
}
/* flags set by the driver in queue_limits.features */
-enum {
- /* supports a volatile write cache */
- BLK_FEAT_WRITE_CACHE = (1u << 0),
+typedef unsigned int __bitwise blk_features_t;
- /* supports passing on the FUA bit */
- BLK_FEAT_FUA = (1u << 1),
+/* supports a volatile write cache */
+#define BLK_FEAT_WRITE_CACHE ((__force blk_features_t)(1u << 0))
- /* rotational device (hard drive or floppy) */
- BLK_FEAT_ROTATIONAL = (1u << 2),
+/* supports passing on the FUA bit */
+#define BLK_FEAT_FUA ((__force blk_features_t)(1u << 1))
- /* contributes to the random number pool */
- BLK_FEAT_ADD_RANDOM = (1u << 3),
+/* rotational device (hard drive or floppy) */
+#define BLK_FEAT_ROTATIONAL ((__force blk_features_t)(1u << 2))
- /* do disk/partitions IO accounting */
- BLK_FEAT_IO_STAT = (1u << 4),
+/* contributes to the random number pool */
+#define BLK_FEAT_ADD_RANDOM ((__force blk_features_t)(1u << 3))
- /* don't modify data until writeback is done */
- BLK_FEAT_STABLE_WRITES = (1u << 5),
+/* do disk/partitions IO accounting */
+#define BLK_FEAT_IO_STAT ((__force blk_features_t)(1u << 4))
- /* always completes in submit context */
- BLK_FEAT_SYNCHRONOUS = (1u << 6),
+/* don't modify data until writeback is done */
+#define BLK_FEAT_STABLE_WRITES ((__force blk_features_t)(1u << 5))
- /* supports REQ_NOWAIT */
- BLK_FEAT_NOWAIT = (1u << 7),
+/* always completes in submit context */
+#define BLK_FEAT_SYNCHRONOUS ((__force blk_features_t)(1u << 6))
- /* supports DAX */
- BLK_FEAT_DAX = (1u << 8),
+/* supports REQ_NOWAIT */
+#define BLK_FEAT_NOWAIT ((__force blk_features_t)(1u << 7))
- /* supports I/O polling */
- BLK_FEAT_POLL = (1u << 9),
+/* supports DAX */
+#define BLK_FEAT_DAX ((__force blk_features_t)(1u << 8))
- /* is a zoned device */
- BLK_FEAT_ZONED = (1u << 10),
+/* supports I/O polling */
+#define BLK_FEAT_POLL ((__force blk_features_t)(1u << 9))
- /* supports Zone Reset All */
- BLK_FEAT_ZONE_RESETALL = (1u << 11),
+/* is a zoned device */
+#define BLK_FEAT_ZONED ((__force blk_features_t)(1u << 10))
- /* supports PCI(e) p2p requests */
- BLK_FEAT_PCI_P2PDMA = (1u << 12),
+/* supports PCI(e) p2p requests */
+#define BLK_FEAT_PCI_P2PDMA ((__force blk_features_t)(1u << 12))
- /* skip this queue in blk_mq_(un)quiesce_tagset */
- BLK_FEAT_SKIP_TAGSET_QUIESCE = (1u << 13),
+/* skip this queue in blk_mq_(un)quiesce_tagset */
+#define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13))
- /* bounce all highmem pages */
- BLK_FEAT_BOUNCE_HIGH = (1u << 14),
+/* bounce all highmem pages */
+#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14))
- /* undocumented magic for bcache */
- BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE = (1u << 15),
-};
+/* undocumented magic for bcache */
+#define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \
+ ((__force blk_features_t)(1u << 15))
/*
* Flags automatically inherited when stacking limits.
@@ -342,17 +340,17 @@ enum {
BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE)
/* internal flags in queue_limits.flags */
-enum {
- /* do not send FLUSH/FUA commands despite advertising a write cache */
- BLK_FLAG_WRITE_CACHE_DISABLED = (1u << 0),
+typedef unsigned int __bitwise blk_flags_t;
- /* I/O topology is misaligned */
- BLK_FEAT_MISALIGNED = (1u << 1),
-};
+/* do not send FLUSH/FUA commands despite advertising a write cache */
+#define BLK_FLAG_WRITE_CACHE_DISABLED ((__force blk_flags_t)(1u << 0))
+
+/* I/O topology is misaligned */
+#define BLK_FLAG_MISALIGNED ((__force blk_flags_t)(1u << 1))
struct queue_limits {
- unsigned int features;
- unsigned int flags;
+ blk_features_t features;
+ blk_flags_t flags;
unsigned long seg_boundary_mask;
unsigned long virt_boundary_mask;
@@ -400,6 +398,7 @@ struct queue_limits {
* due to possible offsets.
*/
unsigned int dma_alignment;
+ unsigned int dma_pad_mask;
struct blk_integrity integrity;
};
@@ -508,8 +507,6 @@ struct request_queue {
*/
int id;
- unsigned int dma_pad_mask;
-
/*
* queue settings
*/
@@ -609,7 +606,6 @@ struct request_queue {
void blk_queue_flag_set(unsigned int flag, struct request_queue *q);
void blk_queue_flag_clear(unsigned int flag, struct request_queue *q);
-bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
@@ -619,8 +615,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
#define blk_queue_nonrot(q) (!((q)->limits.features & BLK_FEAT_ROTATIONAL))
#define blk_queue_io_stat(q) ((q)->limits.features & BLK_FEAT_IO_STAT)
-#define blk_queue_zone_resetall(q) \
- ((q)->limits.features & BLK_FEAT_ZONE_RESETALL)
#define blk_queue_dax(q) ((q)->limits.features & BLK_FEAT_DAX)
#define blk_queue_pci_p2pdma(q) ((q)->limits.features & BLK_FEAT_PCI_P2PDMA)
#ifdef CONFIG_BLK_RQ_ALLOC_TIME
@@ -972,7 +966,6 @@ static inline void blk_queue_disable_write_zeroes(struct request_queue *q)
/*
* Access functions for manipulating queue properties
*/
-void disk_update_readahead(struct gendisk *disk);
extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt);
extern void blk_set_queue_depth(struct request_queue *q, unsigned int depth);
@@ -981,7 +974,6 @@ extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
sector_t offset);
void queue_limits_stack_bdev(struct queue_limits *t, struct block_device *bdev,
sector_t offset, const char *pfx);
-extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int);
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
struct blk_independent_access_ranges *
@@ -1103,6 +1095,7 @@ int blkdev_issue_secure_erase(struct block_device *bdev, sector_t sector,
#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */
#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */
+#define BLKDEV_ZERO_KILLABLE (1 << 2) /* interruptible by fatal signals */
extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
@@ -1230,12 +1223,7 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev)
static inline unsigned queue_logical_block_size(const struct request_queue *q)
{
- int retval = 512;
-
- if (q && q->limits.logical_block_size)
- retval = q->limits.logical_block_size;
-
- return retval;
+ return q->limits.logical_block_size;
}
static inline unsigned int bdev_logical_block_size(struct block_device *bdev)
@@ -1394,7 +1382,7 @@ static inline bool bdev_is_zone_start(struct block_device *bdev,
static inline int queue_dma_alignment(const struct request_queue *q)
{
- return q ? q->limits.dma_alignment : 511;
+ return q->limits.dma_alignment;
}
static inline unsigned int
@@ -1433,10 +1421,16 @@ static inline bool bdev_iter_is_aligned(struct block_device *bdev,
bdev_logical_block_size(bdev) - 1);
}
+static inline int blk_lim_dma_alignment_and_pad(struct queue_limits *lim)
+{
+ return lim->dma_alignment | lim->dma_pad_mask;
+}
+
static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
unsigned int len)
{
- unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
+ unsigned int alignment = blk_lim_dma_alignment_and_pad(&q->limits);
+
return !(addr & alignment) && !(len & alignment);
}
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index bd1e361b351c..f41c7f0ef91e 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -280,4 +280,18 @@ static inline void *bvec_virt(struct bio_vec *bvec)
return page_address(bvec->bv_page) + bvec->bv_offset;
}
+/**
+ * bvec_phys - return the physical address for a bvec
+ * @bvec: bvec to return the physical address for
+ */
+static inline phys_addr_t bvec_phys(const struct bio_vec *bvec)
+{
+ /*
+ * Note this open codes page_to_phys because page_to_phys is defined in
+ * <asm/io.h>, which we don't want to pull in here. If it ever moves to
+ * a sensible place we should start using it.
+ */
+ return PFN_PHYS(page_to_pfn(bvec->bv_page)) + bvec->bv_offset;
+}
+
#endif /* __LINUX_BVEC_H */
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 82b2195efaca..15d28164bbbd 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -358,6 +358,13 @@ struct dm_target {
bool discards_supported:1;
/*
+ * Automatically set by dm-core if this target supports
+ * REQ_OP_ZONE_RESET_ALL. Otherwise, this operation will be emulated
+ * using REQ_OP_ZONE_RESET. Target drivers must not set this manually.
+ */
+ bool zone_reset_all_supported:1;
+
+ /*
* Set if this target requires that discards be split on
* 'max_discard_sectors' boundaries.
*/
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
index 1773610010eb..2c59fe3efcd4 100644
--- a/include/linux/t10-pi.h
+++ b/include/linux/t10-pi.h
@@ -39,8 +39,11 @@ struct t10_pi_tuple {
static inline u32 t10_pi_ref_tag(struct request *rq)
{
- unsigned int shift = rq->q->limits.integrity.interval_exp;
+ unsigned int shift = ilog2(queue_logical_block_size(rq->q));
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ rq->q->limits.integrity.interval_exp)
+ shift = rq->q->limits.integrity.interval_exp;
return blk_rq_pos(rq) >> (shift - SECTOR_SHIFT) & 0xffffffff;
}
@@ -61,8 +64,11 @@ static inline u64 lower_48_bits(u64 n)
static inline u64 ext_pi_ref_tag(struct request *rq)
{
- unsigned int shift = rq->q->limits.integrity.interval_exp;
+ unsigned int shift = ilog2(queue_logical_block_size(rq->q));
+ if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) &&
+ rq->q->limits.integrity.interval_exp)
+ shift = rq->q->limits.integrity.interval_exp;
return lower_48_bits(blk_rq_pos(rq) >> (shift - SECTOR_SHIFT));
}
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 0e128ad51460..1527d5d45e01 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -9,9 +9,17 @@
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/tracepoint.h>
+#include <uapi/linux/ioprio.h>
#define RWBS_LEN 8
+#define IOPRIO_CLASS_STRINGS \
+ { IOPRIO_CLASS_NONE, "none" }, \
+ { IOPRIO_CLASS_RT, "rt" }, \
+ { IOPRIO_CLASS_BE, "be" }, \
+ { IOPRIO_CLASS_IDLE, "idle" }, \
+ { IOPRIO_CLASS_INVALID, "invalid"}
+
#ifdef CONFIG_BUFFER_HEAD
DECLARE_EVENT_CLASS(block_buffer,
@@ -82,6 +90,7 @@ TRACE_EVENT(block_rq_requeue,
__field( dev_t, dev )
__field( sector_t, sector )
__field( unsigned int, nr_sector )
+ __field( unsigned short, ioprio )
__array( char, rwbs, RWBS_LEN )
__dynamic_array( char, cmd, 1 )
),
@@ -90,16 +99,20 @@ TRACE_EVENT(block_rq_requeue,
__entry->dev = rq->q->disk ? disk_devt(rq->q->disk) : 0;
__entry->sector = blk_rq_trace_sector(rq);
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
+ __entry->ioprio = rq->ioprio;
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
),
- TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, 0)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+ IOPRIO_CLASS_STRINGS),
+ IOPRIO_PRIO_HINT(__entry->ioprio),
+ IOPRIO_PRIO_LEVEL(__entry->ioprio), 0)
);
DECLARE_EVENT_CLASS(block_rq_completion,
@@ -113,6 +126,7 @@ DECLARE_EVENT_CLASS(block_rq_completion,
__field( sector_t, sector )
__field( unsigned int, nr_sector )
__field( int , error )
+ __field( unsigned short, ioprio )
__array( char, rwbs, RWBS_LEN )
__dynamic_array( char, cmd, 1 )
),
@@ -122,16 +136,20 @@ DECLARE_EVENT_CLASS(block_rq_completion,
__entry->sector = blk_rq_pos(rq);
__entry->nr_sector = nr_bytes >> 9;
__entry->error = blk_status_to_errno(error);
+ __entry->ioprio = rq->ioprio;
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
),
- TP_printk("%d,%d %s (%s) %llu + %u [%d]",
+ TP_printk("%d,%d %s (%s) %llu + %u %s,%u,%u [%d]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->error)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+ IOPRIO_CLASS_STRINGS),
+ IOPRIO_PRIO_HINT(__entry->ioprio),
+ IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->error)
);
/**
@@ -180,6 +198,7 @@ DECLARE_EVENT_CLASS(block_rq,
__field( sector_t, sector )
__field( unsigned int, nr_sector )
__field( unsigned int, bytes )
+ __field( unsigned short, ioprio )
__array( char, rwbs, RWBS_LEN )
__array( char, comm, TASK_COMM_LEN )
__dynamic_array( char, cmd, 1 )
@@ -190,17 +209,21 @@ DECLARE_EVENT_CLASS(block_rq,
__entry->sector = blk_rq_trace_sector(rq);
__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
__entry->bytes = blk_rq_bytes(rq);
+ __entry->ioprio = rq->ioprio;
blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
__get_str(cmd)[0] = '\0';
memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
),
- TP_printk("%d,%d %s %u (%s) %llu + %u [%s]",
+ TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u,%u [%s]",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->rwbs, __entry->bytes, __get_str(cmd),
- (unsigned long long)__entry->sector,
- __entry->nr_sector, __entry->comm)
+ (unsigned long long)__entry->sector, __entry->nr_sector,
+ __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
+ IOPRIO_CLASS_STRINGS),
+ IOPRIO_PRIO_HINT(__entry->ioprio),
+ IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->comm)
);
/**
diff --git a/rust/bindings/bindings_helper.h b/rust/bindings/bindings_helper.h
index 84f601d7068e..6deee85a29c8 100644
--- a/rust/bindings/bindings_helper.h
+++ b/rust/bindings/bindings_helper.h
@@ -9,6 +9,7 @@
#include <kunit/test.h>
#include <linux/blk_types.h>
#include <linux/blk-mq.h>
+#include <linux/blkdev.h>
#include <linux/errname.h>
#include <linux/ethtool.h>
#include <linux/jiffies.h>
@@ -28,3 +29,4 @@ const gfp_t RUST_CONST_HELPER_GFP_KERNEL = GFP_KERNEL;
const gfp_t RUST_CONST_HELPER_GFP_KERNEL_ACCOUNT = GFP_KERNEL_ACCOUNT;
const gfp_t RUST_CONST_HELPER_GFP_NOWAIT = GFP_NOWAIT;
const gfp_t RUST_CONST_HELPER___GFP_ZERO = __GFP_ZERO;
+const blk_features_t RUST_CONST_HELPER_BLK_FEAT_ROTATIONAL = BLK_FEAT_ROTATIONAL;