diff options
88 files changed, 1580 insertions, 726 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 55199ef7fa74..b3bbb1784913 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13363,7 +13363,7 @@ S: Maintained F: drivers/scsi/qla1280.[ch] QLOGIC QLA2XXX FC-SCSI DRIVER -M: qla2xxx-upstream@qlogic.com +M: hmadhani@marvell.com L: linux-scsi@vger.kernel.org S: Supported F: Documentation/scsi/LICENSE.qla2xxx diff --git a/block/blk-core.c b/block/blk-core.c index d5e668ec751b..f0d82227a2fc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -132,6 +132,9 @@ static const char *const blk_op_name[] = { REQ_OP_NAME(SECURE_ERASE), REQ_OP_NAME(ZONE_RESET), REQ_OP_NAME(ZONE_RESET_ALL), + REQ_OP_NAME(ZONE_OPEN), + REQ_OP_NAME(ZONE_CLOSE), + REQ_OP_NAME(ZONE_FINISH), REQ_OP_NAME(WRITE_SAME), REQ_OP_NAME(WRITE_ZEROES), REQ_OP_NAME(SCSI_IN), @@ -336,14 +339,14 @@ EXPORT_SYMBOL_GPL(blk_set_queue_dying); */ void blk_cleanup_queue(struct request_queue *q) { + WARN_ON_ONCE(blk_queue_registered(q)); + /* mark @q DYING, no new request or merges will be allowed afterwards */ - mutex_lock(&q->sysfs_lock); blk_set_queue_dying(q); blk_queue_flag_set(QUEUE_FLAG_NOMERGES, q); blk_queue_flag_set(QUEUE_FLAG_NOXMERGES, q); blk_queue_flag_set(QUEUE_FLAG_DYING, q); - mutex_unlock(&q->sysfs_lock); /* * Drain all requests queued before DYING marking. Set DEAD flag to @@ -849,10 +852,10 @@ static inline int blk_partition_remap(struct bio *bio) goto out; /* - * Zone reset does not include bi_size so bio_sectors() is always 0. - * Include a test for the reset op code and perform the remap if needed. + * Zone management bios do not have a sector count but they do have + * a start sector filled out and need to be remapped. */ - if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) { + if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) { if (bio_check_eod(bio, part_nr_sects_read(p))) goto out; bio->bi_iter.bi_sector += p->start_sect; @@ -936,6 +939,9 @@ generic_make_request_checks(struct bio *bio) goto not_supported; break; case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: if (!blk_queue_is_zoned(q)) goto not_supported; break; diff --git a/block/blk-exec.c b/block/blk-exec.c index 1db44ca0f4a6..e20a852ae432 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -55,6 +55,8 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, rq->rq_disk = bd_disk; rq->end_io = done; + blk_account_io_start(rq, true); + /* * don't check dying flag for MQ because the request won't * be reused after dying flag is set diff --git a/block/blk-merge.c b/block/blk-merge.c index 48e6725b32ee..f22cb6251d06 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -293,7 +293,7 @@ split: void __blk_queue_split(struct request_queue *q, struct bio **bio, unsigned int *nr_segs) { - struct bio *split; + struct bio *split = NULL; switch (bio_op(*bio)) { case REQ_OP_DISCARD: @@ -309,6 +309,20 @@ void __blk_queue_split(struct request_queue *q, struct bio **bio, nr_segs); break; default: + /* + * All drivers must accept single-segments bios that are <= + * PAGE_SIZE. This is a quick and dirty check that relies on + * the fact that bi_io_vec[0] is always valid if a bio has data. + * The check might lead to occasional false negatives when bios + * are cloned, but compared to the performance impact of cloned + * bios themselves the loop below doesn't matter anyway. + */ + if (!q->limits.chunk_sectors && + (*bio)->bi_vcnt == 1 && + (*bio)->bi_io_vec[0].bv_len <= PAGE_SIZE) { + *nr_segs = 1; + break; + } split = blk_bio_segment_split(q, *bio, &q->bio_split, nr_segs); break; } diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index a0d3ce30fa08..062229395a50 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c @@ -74,10 +74,8 @@ static ssize_t blk_mq_sysfs_show(struct kobject *kobj, struct attribute *attr, if (!entry->show) return -EIO; - res = -ENOENT; mutex_lock(&q->sysfs_lock); - if (!blk_queue_dying(q)) - res = entry->show(ctx, page); + res = entry->show(ctx, page); mutex_unlock(&q->sysfs_lock); return res; } @@ -97,10 +95,8 @@ static ssize_t blk_mq_sysfs_store(struct kobject *kobj, struct attribute *attr, if (!entry->store) return -EIO; - res = -ENOENT; mutex_lock(&q->sysfs_lock); - if (!blk_queue_dying(q)) - res = entry->store(ctx, page, length); + res = entry->store(ctx, page, length); mutex_unlock(&q->sysfs_lock); return res; } @@ -120,10 +116,8 @@ static ssize_t blk_mq_hw_sysfs_show(struct kobject *kobj, if (!entry->show) return -EIO; - res = -ENOENT; mutex_lock(&q->sysfs_lock); - if (!blk_queue_dying(q)) - res = entry->show(hctx, page); + res = entry->show(hctx, page); mutex_unlock(&q->sysfs_lock); return res; } @@ -144,10 +138,8 @@ static ssize_t blk_mq_hw_sysfs_store(struct kobject *kobj, if (!entry->store) return -EIO; - res = -ENOENT; mutex_lock(&q->sysfs_lock); - if (!blk_queue_dying(q)) - res = entry->store(hctx, page, length); + res = entry->store(hctx, page, length); mutex_unlock(&q->sysfs_lock); return res; } @@ -166,20 +158,25 @@ static ssize_t blk_mq_hw_sysfs_nr_reserved_tags_show(struct blk_mq_hw_ctx *hctx, static ssize_t blk_mq_hw_sysfs_cpus_show(struct blk_mq_hw_ctx *hctx, char *page) { + const size_t size = PAGE_SIZE - 1; unsigned int i, first = 1; - ssize_t ret = 0; + int ret = 0, pos = 0; for_each_cpu(i, hctx->cpumask) { if (first) - ret += sprintf(ret + page, "%u", i); + ret = snprintf(pos + page, size - pos, "%u", i); else - ret += sprintf(ret + page, ", %u", i); + ret = snprintf(pos + page, size - pos, ", %u", i); + + if (ret >= size - pos) + break; first = 0; + pos += ret; } - ret += sprintf(ret + page, "\n"); - return ret; + ret = snprintf(pos + page, size + 1 - pos, "\n"); + return pos + ret; } static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_nr_tags = { diff --git a/block/blk-mq.c b/block/blk-mq.c index ec791156e9cc..5c9adcaa27ac 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -93,7 +93,7 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, struct mq_inflight { struct hd_struct *part; - unsigned int *inflight; + unsigned int inflight[2]; }; static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, @@ -102,45 +102,29 @@ static bool blk_mq_check_inflight(struct blk_mq_hw_ctx *hctx, { struct mq_inflight *mi = priv; - /* - * index[0] counts the specific partition that was asked for. - */ if (rq->part == mi->part) - mi->inflight[0]++; + mi->inflight[rq_data_dir(rq)]++; return true; } unsigned int blk_mq_in_flight(struct request_queue *q, struct hd_struct *part) { - unsigned inflight[2]; - struct mq_inflight mi = { .part = part, .inflight = inflight, }; + struct mq_inflight mi = { .part = part }; - inflight[0] = inflight[1] = 0; blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); - return inflight[0]; -} - -static bool blk_mq_check_inflight_rw(struct blk_mq_hw_ctx *hctx, - struct request *rq, void *priv, - bool reserved) -{ - struct mq_inflight *mi = priv; - - if (rq->part == mi->part) - mi->inflight[rq_data_dir(rq)]++; - - return true; + return mi.inflight[0] + mi.inflight[1]; } void blk_mq_in_flight_rw(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]) { - struct mq_inflight mi = { .part = part, .inflight = inflight, }; + struct mq_inflight mi = { .part = part }; - inflight[0] = inflight[1] = 0; - blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight_rw, &mi); + blk_mq_queue_tag_busy_iter(q, blk_mq_check_inflight, &mi); + inflight[0] = mi.inflight[0]; + inflight[1] = mi.inflight[1]; } void blk_freeze_queue_start(struct request_queue *q) @@ -663,18 +647,6 @@ bool blk_mq_complete_request(struct request *rq) } EXPORT_SYMBOL(blk_mq_complete_request); -int blk_mq_request_started(struct request *rq) -{ - return blk_mq_rq_state(rq) != MQ_RQ_IDLE; -} -EXPORT_SYMBOL_GPL(blk_mq_request_started); - -int blk_mq_request_completed(struct request *rq) -{ - return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE; -} -EXPORT_SYMBOL_GPL(blk_mq_request_completed); - void blk_mq_start_request(struct request *rq) { struct request_queue *q = rq->q; @@ -1064,7 +1036,7 @@ bool blk_mq_get_driver_tag(struct request *rq) bool shared; if (rq->tag != -1) - goto done; + return true; if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag)) data.flags |= BLK_MQ_REQ_RESERVED; @@ -1079,7 +1051,6 @@ bool blk_mq_get_driver_tag(struct request *rq) data.hctx->tags->rqs[rq->tag] = rq; } -done: return rq->tag != -1; } @@ -1486,7 +1457,7 @@ void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs) } EXPORT_SYMBOL(blk_mq_delay_run_hw_queue); -bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) +void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) { int srcu_idx; bool need_run; @@ -1504,12 +1475,8 @@ bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) blk_mq_hctx_has_pending(hctx); hctx_unlock(hctx, srcu_idx); - if (need_run) { + if (need_run) __blk_mq_delay_run_hw_queue(hctx, async, 0); - return true; - } - - return false; } EXPORT_SYMBOL(blk_mq_run_hw_queue); @@ -2789,6 +2756,23 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, int i, j, end; struct blk_mq_hw_ctx **hctxs = q->queue_hw_ctx; + if (q->nr_hw_queues < set->nr_hw_queues) { + struct blk_mq_hw_ctx **new_hctxs; + + new_hctxs = kcalloc_node(set->nr_hw_queues, + sizeof(*new_hctxs), GFP_KERNEL, + set->numa_node); + if (!new_hctxs) + return; + if (hctxs) + memcpy(new_hctxs, hctxs, q->nr_hw_queues * + sizeof(*hctxs)); + q->queue_hw_ctx = new_hctxs; + q->nr_hw_queues = set->nr_hw_queues; + kfree(hctxs); + hctxs = new_hctxs; + } + /* protect against switching io scheduler */ mutex_lock(&q->sysfs_lock); for (i = 0; i < set->nr_hw_queues; i++) { @@ -2844,19 +2828,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, mutex_unlock(&q->sysfs_lock); } -/* - * Maximum number of hardware queues we support. For single sets, we'll never - * have more than the CPUs (software queues). For multiple sets, the tag_set - * user may have set ->nr_hw_queues larger. - */ -static unsigned int nr_hw_queues(struct blk_mq_tag_set *set) -{ - if (set->nr_maps == 1) - return nr_cpu_ids; - - return max(set->nr_hw_queues, nr_cpu_ids); -} - struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q, bool elevator_init) @@ -2876,12 +2847,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, /* init q->mq_kobj and sw queues' kobjects */ blk_mq_sysfs_init(q); - q->nr_queues = nr_hw_queues(set); - q->queue_hw_ctx = kcalloc_node(q->nr_queues, sizeof(*(q->queue_hw_ctx)), - GFP_KERNEL, set->numa_node); - if (!q->queue_hw_ctx) - goto err_sys_init; - INIT_LIST_HEAD(&q->unused_hctx_list); spin_lock_init(&q->unused_hctx_lock); @@ -2929,7 +2894,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, err_hctxs: kfree(q->queue_hw_ctx); q->nr_hw_queues = 0; -err_sys_init: blk_mq_sysfs_deinit(q); err_poll: blk_stat_free_callback(q->poll_cb); @@ -3030,6 +2994,29 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set *set) } } +static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set, + int cur_nr_hw_queues, int new_nr_hw_queues) +{ + struct blk_mq_tags **new_tags; + + if (cur_nr_hw_queues >= new_nr_hw_queues) + return 0; + + new_tags = kcalloc_node(new_nr_hw_queues, sizeof(struct blk_mq_tags *), + GFP_KERNEL, set->numa_node); + if (!new_tags) + return -ENOMEM; + + if (set->tags) + memcpy(new_tags, set->tags, cur_nr_hw_queues * + sizeof(*set->tags)); + kfree(set->tags); + set->tags = new_tags; + set->nr_hw_queues = new_nr_hw_queues; + + return 0; +} + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -3083,9 +3070,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids) set->nr_hw_queues = nr_cpu_ids; - set->tags = kcalloc_node(nr_hw_queues(set), sizeof(struct blk_mq_tags *), - GFP_KERNEL, set->numa_node); - if (!set->tags) + if (blk_mq_realloc_tag_set_tags(set, 0, set->nr_hw_queues) < 0) return -ENOMEM; ret = -ENOMEM; @@ -3126,7 +3111,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) { int i, j; - for (i = 0; i < nr_hw_queues(set); i++) + for (i = 0; i < set->nr_hw_queues; i++) blk_mq_free_map_and_requests(set, i); for (j = 0; j < set->nr_maps; j++) { @@ -3271,10 +3256,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, list_for_each_entry(q, &set->tag_list, tag_set_list) blk_mq_freeze_queue(q); /* - * Sync with blk_mq_queue_tag_busy_iter. - */ - synchronize_rcu(); - /* * Switch IO scheduler to 'none', cleaning up the data associated * with the previous scheduler. We will switch back once we are done * updating the new sw to hw queue mappings. @@ -3288,6 +3269,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, blk_mq_sysfs_unregister(q); } + if (blk_mq_realloc_tag_set_tags(set, set->nr_hw_queues, nr_hw_queues) < + 0) + goto reregister; + prev_nr_hw_queues = set->nr_hw_queues; set->nr_hw_queues = nr_hw_queues; blk_mq_update_queue_map(set); @@ -3304,6 +3289,7 @@ fallback: blk_mq_map_swqueue(q); } +reregister: list_for_each_entry(q, &set->tag_list, tag_set_list) { blk_mq_sysfs_register(q); blk_mq_debugfs_register_hctxs(q); diff --git a/block/blk-mq.h b/block/blk-mq.h index 32c62c64e6c2..eaaca8fc1c28 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -128,15 +128,6 @@ extern void blk_mq_hctx_kobj_init(struct blk_mq_hw_ctx *hctx); void blk_mq_release(struct request_queue *q); -/** - * blk_mq_rq_state() - read the current MQ_RQ_* state of a request - * @rq: target request. - */ -static inline enum mq_rq_state blk_mq_rq_state(struct request *rq) -{ - return READ_ONCE(rq->state); -} - static inline struct blk_mq_ctx *__blk_mq_get_ctx(struct request_queue *q, unsigned int cpu) { diff --git a/block/blk-stat.c b/block/blk-stat.c index 940f15d600f8..7da302ff88d0 100644 --- a/block/blk-stat.c +++ b/block/blk-stat.c @@ -53,7 +53,7 @@ void blk_stat_add(struct request *rq, u64 now) struct request_queue *q = rq->q; struct blk_stat_callback *cb; struct blk_rq_stat *stat; - int bucket; + int bucket, cpu; u64 value; value = (now >= rq->io_start_time_ns) ? now - rq->io_start_time_ns : 0; @@ -61,6 +61,7 @@ void blk_stat_add(struct request *rq, u64 now) blk_throtl_stat_add(rq, value); rcu_read_lock(); + cpu = get_cpu(); list_for_each_entry_rcu(cb, &q->stats->callbacks, list) { if (!blk_stat_is_active(cb)) continue; @@ -69,10 +70,10 @@ void blk_stat_add(struct request *rq, u64 now) if (bucket < 0) continue; - stat = &get_cpu_ptr(cb->cpu_stat)[bucket]; + stat = &per_cpu_ptr(cb->cpu_stat, cpu)[bucket]; blk_rq_stat_add(stat, value); - put_cpu_ptr(cb->cpu_stat); } + put_cpu(); rcu_read_unlock(); } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 46f5198be017..fca9b158f4a0 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -801,10 +801,6 @@ queue_attr_show(struct kobject *kobj, struct attribute *attr, char *page) if (!entry->show) return -EIO; mutex_lock(&q->sysfs_lock); - if (blk_queue_dying(q)) { - mutex_unlock(&q->sysfs_lock); - return -ENOENT; - } res = entry->show(q, page); mutex_unlock(&q->sysfs_lock); return res; @@ -823,10 +819,6 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr, q = container_of(kobj, struct request_queue, kobj); mutex_lock(&q->sysfs_lock); - if (blk_queue_dying(q)) { - mutex_unlock(&q->sysfs_lock); - return -ENOENT; - } res = entry->store(q, page, length); mutex_unlock(&q->sysfs_lock); return res; diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 4bc5f260248a..481eaf7d04d4 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -202,32 +202,14 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector, } EXPORT_SYMBOL_GPL(blkdev_report_zones); -/* - * Special case of zone reset operation to reset all zones in one command, - * useful for applications like mkfs. - */ -static int __blkdev_reset_all_zones(struct block_device *bdev, gfp_t gfp_mask) -{ - struct bio *bio = bio_alloc(gfp_mask, 0); - int ret; - - /* across the zones operations, don't need any sectors */ - bio_set_dev(bio, bdev); - bio_set_op_attrs(bio, REQ_OP_ZONE_RESET_ALL, 0); - - ret = submit_bio_wait(bio); - bio_put(bio); - - return ret; -} - static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev, + sector_t sector, sector_t nr_sectors) { if (!blk_queue_zone_resetall(bdev_get_queue(bdev))) return false; - if (nr_sectors != part_nr_sects_read(bdev->bd_part)) + if (sector || nr_sectors != part_nr_sects_read(bdev->bd_part)) return false; /* * REQ_OP_ZONE_RESET_ALL can be executed only if the block device is @@ -239,26 +221,29 @@ static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev, } /** - * blkdev_reset_zones - Reset zones write pointer + * blkdev_zone_mgmt - Execute a zone management operation on a range of zones * @bdev: Target block device - * @sector: Start sector of the first zone to reset - * @nr_sectors: Number of sectors, at least the length of one zone + * @op: Operation to be performed on the zones + * @sector: Start sector of the first zone to operate on + * @nr_sectors: Number of sectors, should be at least the length of one zone and + * must be zone size aligned. * @gfp_mask: Memory allocation flags (for bio_alloc) * * Description: - * Reset the write pointer of the zones contained in the range + * Perform the specified operation on the range of zones specified by * @sector..@sector+@nr_sectors. Specifying the entire disk sector range * is valid, but the specified range should not contain conventional zones. + * The operation to execute on each zone can be a zone reset, open, close + * or finish request. */ -int blkdev_reset_zones(struct block_device *bdev, - sector_t sector, sector_t nr_sectors, - gfp_t gfp_mask) +int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, + sector_t sector, sector_t nr_sectors, + gfp_t gfp_mask) { struct request_queue *q = bdev_get_queue(bdev); - sector_t zone_sectors; + sector_t zone_sectors = blk_queue_zone_sectors(q); sector_t end_sector = sector + nr_sectors; struct bio *bio = NULL; - struct blk_plug plug; int ret; if (!blk_queue_is_zoned(q)) @@ -267,15 +252,14 @@ int blkdev_reset_zones(struct block_device *bdev, if (bdev_read_only(bdev)) return -EPERM; + if (!op_is_zone_mgmt(op)) + return -EOPNOTSUPP; + if (!nr_sectors || end_sector > bdev->bd_part->nr_sects) /* Out of range */ return -EINVAL; - if (blkdev_allow_reset_all_zones(bdev, nr_sectors)) - return __blkdev_reset_all_zones(bdev, gfp_mask); - /* Check alignment (handle eventual smaller last zone) */ - zone_sectors = blk_queue_zone_sectors(q); if (sector & (zone_sectors - 1)) return -EINVAL; @@ -283,29 +267,34 @@ int blkdev_reset_zones(struct block_device *bdev, end_sector != bdev->bd_part->nr_sects) return -EINVAL; - blk_start_plug(&plug); while (sector < end_sector) { - bio = blk_next_bio(bio, 0, gfp_mask); - bio->bi_iter.bi_sector = sector; bio_set_dev(bio, bdev); - bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0); + /* + * Special case for the zone reset operation that reset all + * zones, this is useful for applications like mkfs. + */ + if (op == REQ_OP_ZONE_RESET && + blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) { + bio->bi_opf = REQ_OP_ZONE_RESET_ALL; + break; + } + + bio->bi_opf = op; + bio->bi_iter.bi_sector = sector; sector += zone_sectors; /* This may take a while, so be nice to others */ cond_resched(); - } ret = submit_bio_wait(bio); bio_put(bio); - blk_finish_plug(&plug); - return ret; } -EXPORT_SYMBOL_GPL(blkdev_reset_zones); +EXPORT_SYMBOL_GPL(blkdev_zone_mgmt); /* * BLKREPORTZONE ioctl processing. @@ -368,15 +357,16 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, } /* - * BLKRESETZONE ioctl processing. + * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing. * Called from blkdev_ioctl. */ -int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg) +int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; struct request_queue *q; struct blk_zone_range zrange; + enum req_opf op; if (!argp) return -EINVAL; @@ -397,8 +387,25 @@ int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) return -EFAULT; - return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors, - GFP_KERNEL); + switch (cmd) { + case BLKRESETZONE: + op = REQ_OP_ZONE_RESET; + break; + case BLKOPENZONE: + op = REQ_OP_ZONE_OPEN; + break; + case BLKCLOSEZONE: + op = REQ_OP_ZONE_CLOSE; + break; + case BLKFINISHZONE: + op = REQ_OP_ZONE_FINISH; + break; + default: + return -ENOTTY; + } + + return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors, + GFP_KERNEL); } static inline unsigned long *blk_alloc_zone_bitmap(int node, diff --git a/block/blk.h b/block/blk.h index 47fba9362e60..2bea40180b6f 100644 --- a/block/blk.h +++ b/block/blk.h @@ -242,14 +242,11 @@ int blk_dev_init(void); * Contribute to IO statistics IFF: * * a) it's attached to a gendisk, and - * b) the queue had IO stats enabled when this request was started, and - * c) it's a file system request + * b) the queue had IO stats enabled when this request was started */ static inline bool blk_do_io_stat(struct request *rq) { - return rq->rq_disk && - (rq->rq_flags & RQF_IO_STAT) && - !blk_rq_is_passthrough(rq); + return rq->rq_disk && (rq->rq_flags & RQF_IO_STAT); } static inline void req_set_nomerge(struct request_queue *q, struct request *req) diff --git a/block/elevator.c b/block/elevator.c index 5437059c9261..0b1db9afb586 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -831,3 +831,12 @@ struct request *elv_rb_latter_request(struct request_queue *q, return NULL; } EXPORT_SYMBOL(elv_rb_latter_request); + +static int __init elevator_setup(char *str) +{ + pr_warn("Kernel parameter elevator= does not have any effect anymore.\n" + "Please use sysfs to set IO scheduler for individual devices.\n"); + return 1; +} + +__setup("elevator=", elevator_setup); diff --git a/block/ioctl.c b/block/ioctl.c index 15a0eb80ada9..8756efb1419e 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -532,7 +532,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKREPORTZONE: return blkdev_report_zones_ioctl(bdev, mode, cmd, arg); case BLKRESETZONE: - return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg); + case BLKOPENZONE: + case BLKCLOSEZONE: + case BLKFINISHZONE: + return blkdev_zone_mgmt_ioctl(bdev, mode, cmd, arg); case BLKGETZONESZ: return put_uint(arg, bdev_zone_sectors(bdev)); case BLKGETNRZONES: diff --git a/block/opal_proto.h b/block/opal_proto.h index 5532412d567c..736e67c3e7c5 100644 --- a/block/opal_proto.h +++ b/block/opal_proto.h @@ -76,7 +76,6 @@ enum opal_response_token { * Derived from: TCG_Storage_Architecture_Core_Spec_v2.01_r1.00 * Section: 6.3 Assigned UIDs */ -#define OPAL_UID_LENGTH 8 #define OPAL_METHOD_LENGTH 8 #define OPAL_MSID_KEYLEN 15 #define OPAL_UID_LENGTH_HALF 4 @@ -108,6 +107,7 @@ enum opal_uid { OPAL_C_PIN_TABLE, OPAL_LOCKING_INFO_TABLE, OPAL_ENTERPRISE_LOCKING_INFO_TABLE, + OPAL_DATASTORE, /* C_PIN_TABLE object ID's */ OPAL_C_PIN_MSID, OPAL_C_PIN_SID, diff --git a/block/sed-opal.c b/block/sed-opal.c index b4c761973ac1..b2cacc9ddd11 100644 --- a/block/sed-opal.c +++ b/block/sed-opal.c @@ -149,6 +149,8 @@ static const u8 opaluid[][OPAL_UID_LENGTH] = { { 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x01 }, [OPAL_ENTERPRISE_LOCKING_INFO_TABLE] = { 0x00, 0x00, 0x08, 0x01, 0x00, 0x00, 0x00, 0x00 }, + [OPAL_DATASTORE] = + { 0x00, 0x00, 0x10, 0x01, 0x00, 0x00, 0x00, 0x00 }, /* C_PIN_TABLE object ID's */ [OPAL_C_PIN_MSID] = @@ -1139,11 +1141,11 @@ static int generic_get_column(struct opal_dev *dev, const u8 *table, * * the result is provided in dev->resp->tok[4] */ -static int generic_get_table_info(struct opal_dev *dev, enum opal_uid table, +static int generic_get_table_info(struct opal_dev *dev, const u8 *table_uid, u64 column) { u8 uid[OPAL_UID_LENGTH]; - const unsigned int half = OPAL_UID_LENGTH/2; + const unsigned int half = OPAL_UID_LENGTH_HALF; /* sed-opal UIDs can be split in two halves: * first: actual table index @@ -1152,7 +1154,7 @@ static int generic_get_table_info(struct opal_dev *dev, enum opal_uid table, * first part of the target table as relative index into that table */ memcpy(uid, opaluid[OPAL_TABLE_TABLE], half); - memcpy(uid+half, opaluid[table], half); + memcpy(uid + half, table_uid, half); return generic_get_column(dev, uid, column); } @@ -1221,6 +1223,75 @@ static int get_active_key(struct opal_dev *dev, void *data) return get_active_key_cont(dev); } +static int generic_table_write_data(struct opal_dev *dev, const u64 data, + u64 offset, u64 size, const u8 *uid) +{ + const u8 __user *src = (u8 __user *)(uintptr_t)data; + u8 *dst; + u64 len; + size_t off = 0; + int err; + + /* do we fit in the available space? */ + err = generic_get_table_info(dev, uid, OPAL_TABLE_ROWS); + if (err) { + pr_debug("Couldn't get the table size\n"); + return err; + } + + len = response_get_u64(&dev->parsed, 4); + if (size > len || offset > len - size) { + pr_debug("Does not fit in the table (%llu vs. %llu)\n", + offset + size, len); + return -ENOSPC; + } + + /* do the actual transmission(s) */ + while (off < size) { + err = cmd_start(dev, uid, opalmethod[OPAL_SET]); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_WHERE); + add_token_u64(&err, dev, offset + off); + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_VALUES); + + /* + * The bytestring header is either 1 or 2 bytes, so assume 2. + * There also needs to be enough space to accommodate the + * trailing OPAL_ENDNAME (1 byte) and tokens added by + * cmd_finalize. + */ + len = min(remaining_size(dev) - (2+1+CMD_FINALIZE_BYTES_NEEDED), + (size_t)(size - off)); + pr_debug("Write bytes %zu+%llu/%llu\n", off, len, size); + + dst = add_bytestring_header(&err, dev, len); + if (!dst) + break; + + if (copy_from_user(dst, src + off, len)) { + err = -EFAULT; + break; + } + + dev->pos += len; + + add_token_u8(&err, dev, OPAL_ENDNAME); + if (err) + break; + + err = finalize_and_send(dev, parse_and_check_status); + if (err) + break; + + off += len; + } + + return err; +} + static int generic_lr_enable_disable(struct opal_dev *dev, u8 *uid, bool rle, bool wle, bool rl, bool wl) @@ -1583,68 +1654,9 @@ static int set_mbr_enable_disable(struct opal_dev *dev, void *data) static int write_shadow_mbr(struct opal_dev *dev, void *data) { struct opal_shadow_mbr *shadow = data; - const u8 __user *src; - u8 *dst; - size_t off = 0; - u64 len; - int err = 0; - - /* do we fit in the available shadow mbr space? */ - err = generic_get_table_info(dev, OPAL_MBR, OPAL_TABLE_ROWS); - if (err) { - pr_debug("MBR: could not get shadow size\n"); - return err; - } - - len = response_get_u64(&dev->parsed, 4); - if (shadow->size > len || shadow->offset > len - shadow->size) { - pr_debug("MBR: does not fit in shadow (%llu vs. %llu)\n", - shadow->offset + shadow->size, len); - return -ENOSPC; - } - - /* do the actual transmission(s) */ - src = (u8 __user *)(uintptr_t)shadow->data; - while (off < shadow->size) { - err = cmd_start(dev, opaluid[OPAL_MBR], opalmethod[OPAL_SET]); - add_token_u8(&err, dev, OPAL_STARTNAME); - add_token_u8(&err, dev, OPAL_WHERE); - add_token_u64(&err, dev, shadow->offset + off); - add_token_u8(&err, dev, OPAL_ENDNAME); - - add_token_u8(&err, dev, OPAL_STARTNAME); - add_token_u8(&err, dev, OPAL_VALUES); - - /* - * The bytestring header is either 1 or 2 bytes, so assume 2. - * There also needs to be enough space to accommodate the - * trailing OPAL_ENDNAME (1 byte) and tokens added by - * cmd_finalize. - */ - len = min(remaining_size(dev) - (2+1+CMD_FINALIZE_BYTES_NEEDED), - (size_t)(shadow->size - off)); - pr_debug("MBR: write bytes %zu+%llu/%llu\n", - off, len, shadow->size); - - dst = add_bytestring_header(&err, dev, len); - if (!dst) - break; - if (copy_from_user(dst, src + off, len)) - err = -EFAULT; - dev->pos += len; - - add_token_u8(&err, dev, OPAL_ENDNAME); - if (err) - break; - - err = finalize_and_send(dev, parse_and_check_status); - if (err) - break; - - off += len; - } - return err; + return generic_table_write_data(dev, shadow->data, shadow->offset, + shadow->size, opaluid[OPAL_MBR]); } static int generic_pw_cmd(u8 *key, size_t key_len, u8 *cpin_uid, @@ -1957,6 +1969,113 @@ static int get_msid_cpin_pin(struct opal_dev *dev, void *data) return 0; } +static int write_table_data(struct opal_dev *dev, void *data) +{ + struct opal_read_write_table *write_tbl = data; + + return generic_table_write_data(dev, write_tbl->data, write_tbl->offset, + write_tbl->size, write_tbl->table_uid); +} + +static int read_table_data_cont(struct opal_dev *dev) +{ + int err; + const char *data_read; + + err = parse_and_check_status(dev); + if (err) + return err; + + dev->prev_d_len = response_get_string(&dev->parsed, 1, &data_read); + dev->prev_data = (void *)data_read; + if (!dev->prev_data) { + pr_debug("%s: Couldn't read data from the table.\n", __func__); + return OPAL_INVAL_PARAM; + } + + return 0; +} + +/* + * IO_BUFFER_LENGTH = 2048 + * sizeof(header) = 56 + * No. of Token Bytes in the Response = 11 + * MAX size of data that can be carried in response buffer + * at a time is : 2048 - (56 + 11) = 1981 = 0x7BD. + */ +#define OPAL_MAX_READ_TABLE (0x7BD) + +static int read_table_data(struct opal_dev *dev, void *data) +{ + struct opal_read_write_table *read_tbl = data; + int err; + size_t off = 0, max_read_size = OPAL_MAX_READ_TABLE; + u64 table_len, len; + u64 offset = read_tbl->offset, read_size = read_tbl->size - 1; + u8 __user *dst; + + err = generic_get_table_info(dev, read_tbl->table_uid, OPAL_TABLE_ROWS); + if (err) { + pr_debug("Couldn't get the table size\n"); + return err; + } + + table_len = response_get_u64(&dev->parsed, 4); + + /* Check if the user is trying to read from the table limits */ + if (read_size > table_len || offset > table_len - read_size) { + pr_debug("Read size exceeds the Table size limits (%llu vs. %llu)\n", + offset + read_size, table_len); + return -EINVAL; + } + + while (off < read_size) { + err = cmd_start(dev, read_tbl->table_uid, opalmethod[OPAL_GET]); + + add_token_u8(&err, dev, OPAL_STARTLIST); + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_STARTROW); + add_token_u64(&err, dev, offset + off); /* start row value */ + add_token_u8(&err, dev, OPAL_ENDNAME); + + add_token_u8(&err, dev, OPAL_STARTNAME); + add_token_u8(&err, dev, OPAL_ENDROW); + + len = min(max_read_size, (size_t)(read_size - off)); + add_token_u64(&err, dev, offset + off + len); /* end row value + */ + add_token_u8(&err, dev, OPAL_ENDNAME); + add_token_u8(&err, dev, OPAL_ENDLIST); + + if (err) { + pr_debug("Error building read table data command.\n"); + break; + } + + err = finalize_and_send(dev, read_table_data_cont); + if (err) + break; + + /* len+1: This includes the NULL terminator at the end*/ + if (dev->prev_d_len > len + 1) { + err = -EOVERFLOW; + break; + } + + dst = (u8 __user *)(uintptr_t)read_tbl->data; + if (copy_to_user(dst + off, dev->prev_data, dev->prev_d_len)) { + pr_debug("Error copying data to userspace\n"); + err = -EFAULT; + break; + } + dev->prev_data = NULL; + + off += len; + } + + return err; +} + static int end_opal_session(struct opal_dev *dev, void *data) { int err = 0; @@ -2443,6 +2562,68 @@ bool opal_unlock_from_suspend(struct opal_dev *dev) } EXPORT_SYMBOL(opal_unlock_from_suspend); +static int opal_read_table(struct opal_dev *dev, + struct opal_read_write_table *rw_tbl) +{ + const struct opal_step read_table_steps[] = { + { start_admin1LSP_opal_session, &rw_tbl->key }, + { read_table_data, rw_tbl }, + { end_opal_session, } + }; + int ret = 0; + + if (!rw_tbl->size) + return ret; + + return execute_steps(dev, read_table_steps, + ARRAY_SIZE(read_table_steps)); +} + +static int opal_write_table(struct opal_dev *dev, + struct opal_read_write_table *rw_tbl) +{ + const struct opal_step write_table_steps[] = { + { start_admin1LSP_opal_session, &rw_tbl->key }, + { write_table_data, rw_tbl }, + { end_opal_session, } + }; + int ret = 0; + + if (!rw_tbl->size) + return ret; + + return execute_steps(dev, write_table_steps, + ARRAY_SIZE(write_table_steps)); +} + +static int opal_generic_read_write_table(struct opal_dev *dev, + struct opal_read_write_table *rw_tbl) +{ + int ret, bit_set; + + mutex_lock(&dev->dev_lock); + setup_opal_dev(dev); + + bit_set = fls64(rw_tbl->flags) - 1; + switch (bit_set) { + case OPAL_READ_TABLE: + ret = opal_read_table(dev, rw_tbl); + break; + case OPAL_WRITE_TABLE: + ret = opal_write_table(dev, rw_tbl); + break; + default: + pr_debug("Invalid bit set in the flag (%016llx).\n", + rw_tbl->flags); + ret = -EINVAL; + break; + } + + mutex_unlock(&dev->dev_lock); + + return ret; +} + int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) { void *p; @@ -2505,6 +2686,9 @@ int sed_ioctl(struct opal_dev *dev, unsigned int cmd, void __user *arg) case IOC_OPAL_PSID_REVERT_TPR: ret = opal_reverttper(dev, p, true); break; + case IOC_OPAL_GENERIC_TABLE_RW: + ret = opal_generic_read_write_table(dev, p); + break; default: break; } diff --git a/block/t10-pi.c b/block/t10-pi.c index 9803c7e0376e..f4907d941f03 100644 --- a/block/t10-pi.c +++ b/block/t10-pi.c @@ -235,16 +235,12 @@ static blk_status_t t10_pi_type3_verify_ip(struct blk_integrity_iter *iter) return t10_pi_verify(iter, t10_pi_ip_fn, T10_PI_TYPE3_PROTECTION); } -/** - * Type 3 does not have a reference tag so no remapping is required. - */ +/* Type 3 does not have a reference tag so no remapping is required. */ static void t10_pi_type3_prepare(struct request *rq) { } -/** - * Type 3 does not have a reference tag so no remapping is required. - */ +/* Type 3 does not have a reference tag so no remapping is required. */ static void t10_pi_type3_complete(struct request *rq, unsigned int nr_bytes) { } diff --git a/drivers/block/loop.c b/drivers/block/loop.c index f6f77eaa7217..ef6e251857c8 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -417,18 +417,20 @@ out_free_page: return ret; } -static int lo_discard(struct loop_device *lo, struct request *rq, loff_t pos) +static int lo_fallocate(struct loop_device *lo, struct request *rq, loff_t pos, + int mode) { /* - * We use punch hole to reclaim the free space used by the - * image a.k.a. discard. However we do not support discard if - * encryption is enabled, because it may give an attacker - * useful information. + * We use fallocate to manipulate the space mappings used by the image + * a.k.a. discard/zerorange. However we do not support this if + * encryption is enabled, because it may give an attacker useful + * information. */ struct file *file = lo->lo_backing_file; - int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; int ret; + mode |= FALLOC_FL_KEEP_SIZE; + if ((!file->f_op->fallocate) || lo->lo_encrypt_key_size) { ret = -EOPNOTSUPP; goto out; @@ -596,9 +598,17 @@ static int do_req_filebacked(struct loop_device *lo, struct request *rq) switch (req_op(rq)) { case REQ_OP_FLUSH: return lo_req_flush(lo, rq); - case REQ_OP_DISCARD: case REQ_OP_WRITE_ZEROES: - return lo_discard(lo, rq, pos); + /* + * If the caller doesn't want deallocation, call zeroout to + * write zeroes the range. Otherwise, punch them out. + */ + return lo_fallocate(lo, rq, pos, + (rq->cmd_flags & REQ_NOUNMAP) ? + FALLOC_FL_ZERO_RANGE : + FALLOC_FL_PUNCH_HOLE); + case REQ_OP_DISCARD: + return lo_fallocate(lo, rq, pos, FALLOC_FL_PUNCH_HOLE); case REQ_OP_WRITE: if (lo->transfer) return lo_write_transfer(lo, rq, pos); diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c index 964f78cfffa0..f6bafa9a68b9 100644 --- a/drivers/block/mtip32xx/mtip32xx.c +++ b/drivers/block/mtip32xx/mtip32xx.c @@ -129,7 +129,7 @@ struct mtip_compat_ide_task_request_s { /* * This function check_for_surprise_removal is called * while card is removed from the system and it will - * read the vendor id from the configration space + * read the vendor id from the configuration space * * @pdev Pointer to the pci_dev structure. * diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index a235c45e22a7..93c2a3d403da 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -96,6 +96,8 @@ int null_zone_report(struct gendisk *disk, sector_t sector, blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op, sector_t sector, sector_t nr_sectors); +size_t null_zone_valid_read_len(struct nullb *nullb, + sector_t sector, unsigned int len); #else static inline int null_zone_init(struct nullb_device *dev) { @@ -115,5 +117,11 @@ static inline blk_status_t null_handle_zoned(struct nullb_cmd *cmd, { return BLK_STS_NOTSUPP; } +static inline size_t null_zone_valid_read_len(struct nullb *nullb, + sector_t sector, + unsigned int len) +{ + return len; +} #endif /* CONFIG_BLK_DEV_ZONED */ #endif /* __NULL_BLK_H */ diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index 0e7da5015ccd..ea7a4d6b7848 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -227,7 +227,7 @@ static ssize_t nullb_device_uint_attr_store(unsigned int *val, int result; result = kstrtouint(page, 0, &tmp); - if (result) + if (result < 0) return result; *val = tmp; @@ -241,7 +241,7 @@ static ssize_t nullb_device_ulong_attr_store(unsigned long *val, unsigned long tmp; result = kstrtoul(page, 0, &tmp); - if (result) + if (result < 0) return result; *val = tmp; @@ -255,7 +255,7 @@ static ssize_t nullb_device_bool_attr_store(bool *val, const char *page, int result; result = kstrtobool(page, &tmp); - if (result) + if (result < 0) return result; *val = tmp; @@ -263,7 +263,7 @@ static ssize_t nullb_device_bool_attr_store(bool *val, const char *page, } /* The following macro should only be used with TYPE = {uint, ulong, bool}. */ -#define NULLB_DEVICE_ATTR(NAME, TYPE) \ +#define NULLB_DEVICE_ATTR(NAME, TYPE, APPLY) \ static ssize_t \ nullb_device_##NAME##_show(struct config_item *item, char *page) \ { \ @@ -274,31 +274,57 @@ static ssize_t \ nullb_device_##NAME##_store(struct config_item *item, const char *page, \ size_t count) \ { \ - if (test_bit(NULLB_DEV_FL_CONFIGURED, &to_nullb_device(item)->flags)) \ - return -EBUSY; \ - return nullb_device_##TYPE##_attr_store( \ - &to_nullb_device(item)->NAME, page, count); \ + int (*apply_fn)(struct nullb_device *dev, TYPE new_value) = APPLY; \ + struct nullb_device *dev = to_nullb_device(item); \ + TYPE new_value; \ + int ret; \ + \ + ret = nullb_device_##TYPE##_attr_store(&new_value, page, count); \ + if (ret < 0) \ + return ret; \ + if (apply_fn) \ + ret = apply_fn(dev, new_value); \ + else if (test_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags)) \ + ret = -EBUSY; \ + if (ret < 0) \ + return ret; \ + dev->NAME = new_value; \ + return count; \ } \ CONFIGFS_ATTR(nullb_device_, NAME); -NULLB_DEVICE_ATTR(size, ulong); -NULLB_DEVICE_ATTR(completion_nsec, ulong); -NULLB_DEVICE_ATTR(submit_queues, uint); -NULLB_DEVICE_ATTR(home_node, uint); -NULLB_DEVICE_ATTR(queue_mode, uint); -NULLB_DEVICE_ATTR(blocksize, uint); -NULLB_DEVICE_ATTR(irqmode, uint); -NULLB_DEVICE_ATTR(hw_queue_depth, uint); -NULLB_DEVICE_ATTR(index, uint); -NULLB_DEVICE_ATTR(blocking, bool); -NULLB_DEVICE_ATTR(use_per_node_hctx, bool); -NULLB_DEVICE_ATTR(memory_backed, bool); -NULLB_DEVICE_ATTR(discard, bool); -NULLB_DEVICE_ATTR(mbps, uint); -NULLB_DEVICE_ATTR(cache_size, ulong); -NULLB_DEVICE_ATTR(zoned, bool); -NULLB_DEVICE_ATTR(zone_size, ulong); -NULLB_DEVICE_ATTR(zone_nr_conv, uint); +static int nullb_apply_submit_queues(struct nullb_device *dev, + unsigned int submit_queues) +{ + struct nullb *nullb = dev->nullb; + struct blk_mq_tag_set *set; + + if (!nullb) + return 0; + + set = nullb->tag_set; + blk_mq_update_nr_hw_queues(set, submit_queues); + return set->nr_hw_queues == submit_queues ? 0 : -ENOMEM; +} + +NULLB_DEVICE_ATTR(size, ulong, NULL); +NULLB_DEVICE_ATTR(completion_nsec, ulong, NULL); +NULLB_DEVICE_ATTR(submit_queues, uint, nullb_apply_submit_queues); +NULLB_DEVICE_ATTR(home_node, uint, NULL); +NULLB_DEVICE_ATTR(queue_mode, uint, NULL); +NULLB_DEVICE_ATTR(blocksize, uint, NULL); +NULLB_DEVICE_ATTR(irqmode, uint, NULL); +NULLB_DEVICE_ATTR(hw_queue_depth, uint, NULL); +NULLB_DEVICE_ATTR(index, uint, NULL); +NULLB_DEVICE_ATTR(blocking, bool, NULL); +NULLB_DEVICE_ATTR(use_per_node_hctx, bool, NULL); +NULLB_DEVICE_ATTR(memory_backed, bool, NULL); +NULLB_DEVICE_ATTR(discard, bool, NULL); +NULLB_DEVICE_ATTR(mbps, uint, NULL); +NULLB_DEVICE_ATTR(cache_size, ulong, NULL); +NULLB_DEVICE_ATTR(zoned, bool, NULL); +NULLB_DEVICE_ATTR(zone_size, ulong, NULL); +NULLB_DEVICE_ATTR(zone_nr_conv, uint, NULL); static ssize_t nullb_device_power_show(struct config_item *item, char *page) { @@ -996,6 +1022,16 @@ next: return 0; } +static void nullb_fill_pattern(struct nullb *nullb, struct page *page, + unsigned int len, unsigned int off) +{ + void *dst; + + dst = kmap_atomic(page); + memset(dst + off, 0xFF, len); + kunmap_atomic(dst); +} + static void null_handle_discard(struct nullb *nullb, sector_t sector, size_t n) { size_t temp; @@ -1036,10 +1072,24 @@ static int null_transfer(struct nullb *nullb, struct page *page, unsigned int len, unsigned int off, bool is_write, sector_t sector, bool is_fua) { + struct nullb_device *dev = nullb->dev; + unsigned int valid_len = len; int err = 0; if (!is_write) { - err = copy_from_nullb(nullb, page, off, sector, len); + if (dev->zoned) + valid_len = null_zone_valid_read_len(nullb, + sector, len); + + if (valid_len) { + err = copy_from_nullb(nullb, page, off, + sector, valid_len); + off += valid_len; + len -= valid_len; + } + + if (len) + nullb_fill_pattern(nullb, page, len, off); flush_dcache_page(page); } else { flush_dcache_page(page); diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index eabc116832a7..be7646205b8c 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -84,6 +84,24 @@ int null_zone_report(struct gendisk *disk, sector_t sector, return 0; } +size_t null_zone_valid_read_len(struct nullb *nullb, + sector_t sector, unsigned int len) +{ + struct nullb_device *dev = nullb->dev; + struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)]; + unsigned int nr_sectors = len >> SECTOR_SHIFT; + + /* Read must be below the write pointer position */ + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL || + sector + nr_sectors <= zone->wp) + return len; + + if (sector > zone->wp) + return 0; + + return (zone->wp - sector) << SECTOR_SHIFT; +} + static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, unsigned int nr_sectors) { @@ -118,14 +136,14 @@ static blk_status_t null_zone_write(struct nullb_cmd *cmd, sector_t sector, return BLK_STS_OK; } -static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector) +static blk_status_t null_zone_mgmt(struct nullb_cmd *cmd, enum req_opf op, + sector_t sector) { struct nullb_device *dev = cmd->nq->dev; - unsigned int zno = null_zone_no(dev, sector); - struct blk_zone *zone = &dev->zones[zno]; + struct blk_zone *zone = &dev->zones[null_zone_no(dev, sector)]; size_t i; - switch (req_op(cmd->rq)) { + switch (op) { case REQ_OP_ZONE_RESET_ALL: for (i = 0; i < dev->nr_zones; i++) { if (zone[i].type == BLK_ZONE_TYPE_CONVENTIONAL) @@ -141,6 +159,29 @@ static blk_status_t null_zone_reset(struct nullb_cmd *cmd, sector_t sector) zone->cond = BLK_ZONE_COND_EMPTY; zone->wp = zone->start; break; + case REQ_OP_ZONE_OPEN: + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return BLK_STS_IOERR; + if (zone->cond == BLK_ZONE_COND_FULL) + return BLK_STS_IOERR; + + zone->cond = BLK_ZONE_COND_EXP_OPEN; + break; + case REQ_OP_ZONE_CLOSE: + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return BLK_STS_IOERR; + if (zone->cond == BLK_ZONE_COND_FULL) + return BLK_STS_IOERR; + + zone->cond = BLK_ZONE_COND_CLOSED; + break; + case REQ_OP_ZONE_FINISH: + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + return BLK_STS_IOERR; + + zone->cond = BLK_ZONE_COND_FULL; + zone->wp = zone->start + zone->len; + break; default: cmd->error = BLK_STS_NOTSUPP; break; @@ -156,7 +197,10 @@ blk_status_t null_handle_zoned(struct nullb_cmd *cmd, enum req_opf op, return null_zone_write(cmd, sector, nr_sectors); case REQ_OP_ZONE_RESET: case REQ_OP_ZONE_RESET_ALL: - return null_zone_reset(cmd, sector); + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: + return null_zone_mgmt(cmd, op, sector); default: return BLK_STS_OK; } diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 2900fbde89b3..76587e9af0ef 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -280,7 +280,7 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio) struct flakey_c *fc = ti->private; bio_set_dev(bio, fc->dev->bdev); - if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) + if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) bio->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector); } @@ -322,8 +322,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); pb->bio_submitted = false; - /* Do not fail reset zone */ - if (bio_op(bio) == REQ_OP_ZONE_RESET) + if (op_is_zone_mgmt(bio_op(bio))) goto map_bio; /* Are we alive ? */ @@ -384,7 +383,7 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, struct flakey_c *fc = ti->private; struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data)); - if (bio_op(bio) == REQ_OP_ZONE_RESET) + if (op_is_zone_mgmt(bio_op(bio))) return DM_ENDIO_DONE; if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index ecefe6703736..97acafd48c85 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -90,7 +90,7 @@ static void linear_map_bio(struct dm_target *ti, struct bio *bio) struct linear_c *lc = ti->private; bio_set_dev(bio, lc->dev->bdev); - if (bio_sectors(bio) || bio_op(bio) == REQ_OP_ZONE_RESET) + if (bio_sectors(bio) || op_is_zone_mgmt(bio_op(bio))) bio->bi_iter.bi_sector = linear_map_sector(ti, bio->bi_iter.bi_sector); } diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 595a73110e17..feb4718ce6a6 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -1312,9 +1312,9 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, struct dm_zone *zone) if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) { struct dmz_dev *dev = zmd->dev; - ret = blkdev_reset_zones(dev->bdev, - dmz_start_sect(zmd, zone), - dev->zone_nr_sectors, GFP_NOIO); + ret = blkdev_zone_mgmt(dev->bdev, REQ_OP_ZONE_RESET, + dmz_start_sect(zmd, zone), + dev->zone_nr_sectors, GFP_NOIO); if (ret) { dmz_dev_err(dev, "Reset zone %u failed %d", dmz_id(zmd, zone), ret); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 1a5e328c443a..bc143c1b2333 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1174,7 +1174,8 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, /* * A target may call dm_accept_partial_bio only from the map routine. It is - * allowed for all bio types except REQ_PREFLUSH and REQ_OP_ZONE_RESET. + * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET, + * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE and REQ_OP_ZONE_FINISH. * * dm_accept_partial_bio informs the dm that the target only wants to process * additional n_sectors sectors of the bio and the rest of the data should be @@ -1627,7 +1628,7 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, ci.sector_count = 0; error = __send_empty_flush(&ci); /* dec_pending submits any data associated with flush */ - } else if (bio_op(bio) == REQ_OP_ZONE_RESET) { + } else if (op_is_zone_mgmt(bio_op(bio))) { ci.bio = bio; ci.sector_count = 0; error = __split_and_process_non_flush(&ci); diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index b092c7b5282f..3ad18246fcb3 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -2139,6 +2139,7 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, memcpy(page_address(store.sb_page), page_address(bitmap->storage.sb_page), sizeof(bitmap_super_t)); + spin_lock_irq(&bitmap->counts.lock); md_bitmap_file_unmap(&bitmap->storage); bitmap->storage = store; @@ -2154,7 +2155,6 @@ int md_bitmap_resize(struct bitmap *bitmap, sector_t blocks, blocks = min(old_counts.chunks << old_counts.chunkshift, chunks << chunkshift); - spin_lock_irq(&bitmap->counts.lock); /* For cluster raid, need to pre-allocate bitmap */ if (mddev_is_clustered(bitmap->mddev)) { unsigned long page; diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index c766c559d36d..26c75c0199fa 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -244,10 +244,9 @@ static bool linear_make_request(struct mddev *mddev, struct bio *bio) sector_t start_sector, end_sector, data_offset; sector_t bio_sector = bio->bi_iter.bi_sector; - if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { - md_flush_request(mddev, bio); + if (unlikely(bio->bi_opf & REQ_PREFLUSH) + && md_flush_request(mddev, bio)) return true; - } tmp_dev = which_dev(mddev, bio_sector); start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors; diff --git a/drivers/md/md-multipath.c b/drivers/md/md-multipath.c index 6780938d2991..152f9e65a226 100644 --- a/drivers/md/md-multipath.c +++ b/drivers/md/md-multipath.c @@ -104,10 +104,9 @@ static bool multipath_make_request(struct mddev *mddev, struct bio * bio) struct multipath_bh * mp_bh; struct multipath_info *multipath; - if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { - md_flush_request(mddev, bio); + if (unlikely(bio->bi_opf & REQ_PREFLUSH) + && md_flush_request(mddev, bio)) return true; - } mp_bh = mempool_alloc(&conf->pool, GFP_NOIO); diff --git a/drivers/md/md.c b/drivers/md/md.c index 1be7abeb24fd..6f0ecfe8eab2 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -550,7 +550,13 @@ static void md_submit_flush_data(struct work_struct *ws) } } -void md_flush_request(struct mddev *mddev, struct bio *bio) +/* + * Manages consolidation of flushes and submitting any flushes needed for + * a bio with REQ_PREFLUSH. Returns true if the bio is finished or is + * being finished in another context. Returns false if the flushing is + * complete but still needs the I/O portion of the bio to be processed. + */ +bool md_flush_request(struct mddev *mddev, struct bio *bio) { ktime_t start = ktime_get_boottime(); spin_lock_irq(&mddev->lock); @@ -575,9 +581,10 @@ void md_flush_request(struct mddev *mddev, struct bio *bio) bio_endio(bio); else { bio->bi_opf &= ~REQ_PREFLUSH; - mddev->pers->make_request(mddev, bio); + return false; } } + return true; } EXPORT_SYMBOL(md_flush_request); @@ -1149,7 +1156,15 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor rdev->desc_nr = sb->this_disk.number; if (!refdev) { - ret = 1; + /* + * Insist on good event counter while assembling, except + * for spares (which don't need an event count) + */ + if (sb->disks[rdev->desc_nr].state & ( + (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE))) + ret = 1; + else + ret = 0; } else { __u64 ev1, ev2; mdp_super_t *refsb = page_address(refdev->sb_page); @@ -1165,7 +1180,14 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor } ev1 = md_event(sb); ev2 = md_event(refsb); - if (ev1 > ev2) + + /* + * Insist on good event counter while assembling, except + * for spares (which don't need an event count) + */ + if (sb->disks[rdev->desc_nr].state & ( + (1<<MD_DISK_SYNC) | (1 << MD_DISK_ACTIVE)) && + (ev1 > ev2)) ret = 1; else ret = 0; @@ -1525,6 +1547,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ sector_t sectors; char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; int bmask; + __u64 role; /* * Calculate the position of the superblock in 512byte sectors. @@ -1658,8 +1681,20 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ sb->level != 0) return -EINVAL; + role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]); + if (!refdev) { - ret = 1; + /* + * Insist of good event counter while assembling, except for + * spares (which don't need an event count) + */ + if (rdev->desc_nr >= 0 && + rdev->desc_nr < le32_to_cpu(sb->max_dev) && + (role < MD_DISK_ROLE_MAX || + role == MD_DISK_ROLE_JOURNAL)) + ret = 1; + else + ret = 0; } else { __u64 ev1, ev2; struct mdp_superblock_1 *refsb = page_address(refdev->sb_page); @@ -1676,7 +1711,14 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ ev1 = le64_to_cpu(sb->events); ev2 = le64_to_cpu(refsb->events); - if (ev1 > ev2) + /* + * Insist of good event counter while assembling, except for + * spares (which don't need an event count) + */ + if (rdev->desc_nr >= 0 && + rdev->desc_nr < le32_to_cpu(sb->max_dev) && + (role < MD_DISK_ROLE_MAX || + role == MD_DISK_ROLE_JOURNAL) && ev1 > ev2) ret = 1; else ret = 0; @@ -3597,7 +3639,7 @@ abort_free: * Check a full RAID array for plausibility */ -static void analyze_sbs(struct mddev *mddev) +static int analyze_sbs(struct mddev *mddev) { int i; struct md_rdev *rdev, *freshest, *tmp; @@ -3618,6 +3660,12 @@ static void analyze_sbs(struct mddev *mddev) md_kick_rdev_from_array(rdev); } + /* Cannot find a valid fresh disk */ + if (!freshest) { + pr_warn("md: cannot find a valid disk\n"); + return -EINVAL; + } + super_types[mddev->major_version]. validate_super(mddev, freshest); @@ -3652,6 +3700,8 @@ static void analyze_sbs(struct mddev *mddev) clear_bit(In_sync, &rdev->flags); } } + + return 0; } /* Read a fixed-point number. @@ -5570,7 +5620,9 @@ int md_run(struct mddev *mddev) if (!mddev->raid_disks) { if (!mddev->persistent) return -EINVAL; - analyze_sbs(mddev); + err = analyze_sbs(mddev); + if (err) + return -EINVAL; } if (mddev->level != LEVEL_NONE) diff --git a/drivers/md/md.h b/drivers/md/md.h index c5e3ff398b59..5f86f8adb0a4 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -550,7 +550,7 @@ struct md_personality int level; struct list_head list; struct module *owner; - bool (*make_request)(struct mddev *mddev, struct bio *bio); + bool __must_check (*make_request)(struct mddev *mddev, struct bio *bio); /* * start up works that do NOT require md_thread. tasks that * requires md_thread should go into start() @@ -703,7 +703,7 @@ extern void md_error(struct mddev *mddev, struct md_rdev *rdev); extern void md_finish_reshape(struct mddev *mddev); extern int mddev_congested(struct mddev *mddev, int bits); -extern void md_flush_request(struct mddev *mddev, struct bio *bio); +extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev, sector_t sector, int size, struct page *page); extern int md_super_wait(struct mddev *mddev); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index f61693e59684..f2b83bd2fee6 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -575,10 +575,9 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) unsigned chunk_sects; unsigned sectors; - if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { - md_flush_request(mddev, bio); + if (unlikely(bio->bi_opf & REQ_PREFLUSH) + && md_flush_request(mddev, bio)) return true; - } if (unlikely((bio_op(bio) == REQ_OP_DISCARD))) { raid0_handle_discard(mddev, bio); @@ -615,7 +614,7 @@ static bool raid0_make_request(struct mddev *mddev, struct bio *bio) tmp_dev = map_sector(mddev, zone, sector, §or); break; default: - WARN("md/raid0:%s: Invalid layout\n", mdname(mddev)); + WARN(1, "md/raid0:%s: Invalid layout\n", mdname(mddev)); bio_io_error(bio); return true; } diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 0466ee2453b4..bb29aeefcbd0 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -1567,10 +1567,9 @@ static bool raid1_make_request(struct mddev *mddev, struct bio *bio) { sector_t sectors; - if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { - md_flush_request(mddev, bio); + if (unlikely(bio->bi_opf & REQ_PREFLUSH) + && md_flush_request(mddev, bio)) return true; - } /* * There is a limit to the maximum size, but diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 299c7b1c9718..2eca0a81a8c9 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1525,10 +1525,9 @@ static bool raid10_make_request(struct mddev *mddev, struct bio *bio) int chunk_sects = chunk_mask + 1; int sectors = bio_sectors(bio); - if (unlikely(bio->bi_opf & REQ_PREFLUSH)) { - md_flush_request(mddev, bio); + if (unlikely(bio->bi_opf & REQ_PREFLUSH) + && md_flush_request(mddev, bio)) return true; - } if (!md_write_start(mddev, bio)) return false; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 223e97ab27e6..12a8ce83786e 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5592,8 +5592,8 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) if (ret == 0) return true; if (ret == -ENODEV) { - md_flush_request(mddev, bi); - return true; + if (md_flush_request(mddev, bi)) + return true; } /* ret == -EAGAIN, fallback */ /* diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index fd7dea36c3b6..b4214e54f2d2 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -268,6 +268,8 @@ void nvme_complete_rq(struct request *req) trace_nvme_complete_rq(req); + nvme_cleanup_cmd(req); + if (nvme_req(req)->ctrl->kas) nvme_req(req)->ctrl->comp_seen = true; @@ -298,7 +300,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved) if (blk_mq_request_completed(req)) return true; - nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR; + nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(req); return true; } @@ -587,7 +589,7 @@ static blk_status_t nvme_setup_discard(struct nvme_ns *ns, struct request *req, } __rq_for_each_bio(bio, req) { - u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector); + u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector); u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift; if (n < segments) { @@ -628,7 +630,7 @@ static inline blk_status_t nvme_setup_write_zeroes(struct nvme_ns *ns, cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes; cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id); cmnd->write_zeroes.slba = - cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->write_zeroes.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); cmnd->write_zeroes.control = 0; @@ -652,7 +654,7 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns, cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read); cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id); - cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req))); + cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1); if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams) @@ -1609,7 +1611,7 @@ static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) static void nvme_set_chunk_size(struct nvme_ns *ns) { - u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9)); + u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob); blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); } @@ -1646,8 +1648,7 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) { - u32 max_sectors; - unsigned short bs = 1 << ns->lba_shift; + u64 max_blocks; if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) || (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES)) @@ -1663,11 +1664,12 @@ static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns) * nvme_init_identify() if available. */ if (ns->ctrl->max_hw_sectors == UINT_MAX) - max_sectors = ((u32)(USHRT_MAX + 1) * bs) >> 9; + max_blocks = (u64)USHRT_MAX + 1; else - max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9; + max_blocks = ns->ctrl->max_hw_sectors + 1; - blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors); + blk_queue_max_write_zeroes_sectors(disk->queue, + nvme_lba_to_sect(ns, max_blocks)); } static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, @@ -1710,7 +1712,7 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { - sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9); + sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); unsigned short bs = 1 << ns->lba_shift; u32 atomic_bs, phys_bs, io_opt; diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 265f89e11d8b..679a721ae229 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1224,7 +1224,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, lsreq->rqstlen = sizeof(*assoc_rqst); lsreq->rspaddr = assoc_acc; lsreq->rsplen = sizeof(*assoc_acc); - lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) @@ -1264,7 +1264,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, if (fcret) { ret = -EBADF; dev_err(ctrl->dev, - "q %d connect failed: %s\n", + "q %d Create Association LS failed: %s\n", queue->qnum, validation_errors[fcret]); } else { ctrl->association_id = @@ -1332,7 +1332,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, lsreq->rqstlen = sizeof(*conn_rqst); lsreq->rspaddr = conn_acc; lsreq->rsplen = sizeof(*conn_acc); - lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; ret = nvme_fc_send_ls_req(ctrl->rport, lsop); if (ret) @@ -1363,7 +1363,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, if (fcret) { ret = -EBADF; dev_err(ctrl->dev, - "q %d connect failed: %s\n", + "q %d Create I/O Connection LS failed: %s\n", queue->qnum, validation_errors[fcret]); } else { queue->connection_id = @@ -1376,7 +1376,7 @@ out_free_buffer: out_no_memory: if (ret) dev_err(ctrl->dev, - "queue %d connect command failed (%d).\n", + "queue %d connect I/O queue failed (%d).\n", queue->qnum, ret); return ret; } @@ -1413,8 +1413,8 @@ nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) static void nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) { - struct fcnvme_ls_disconnect_rqst *discon_rqst; - struct fcnvme_ls_disconnect_acc *discon_acc; + struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst; + struct fcnvme_ls_disconnect_assoc_acc *discon_acc; struct nvmefc_ls_req_op *lsop; struct nvmefc_ls_req *lsreq; int ret; @@ -1430,11 +1430,11 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) lsreq = &lsop->ls_req; lsreq->private = (void *)&lsop[1]; - discon_rqst = (struct fcnvme_ls_disconnect_rqst *) + discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *) (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); - discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1]; + discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; - discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; + discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; discon_rqst->desc_list_len = cpu_to_be32( sizeof(struct fcnvme_lsdesc_assoc_id) + sizeof(struct fcnvme_lsdesc_disconn_cmd)); @@ -1451,22 +1451,17 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) discon_rqst->discon_cmd.desc_len = fcnvme_lsdesc_len( sizeof(struct fcnvme_lsdesc_disconn_cmd)); - discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION; - discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id); lsreq->rqstaddr = discon_rqst; lsreq->rqstlen = sizeof(*discon_rqst); lsreq->rspaddr = discon_acc; lsreq->rsplen = sizeof(*discon_acc); - lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, nvme_fc_disconnect_assoc_done); if (ret) kfree(lsop); - - /* only meaningful part to terminating the association */ - ctrl->association_id = 0; } @@ -1662,7 +1657,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) (freq->rcv_rsplen / 4) || be32_to_cpu(op->rsp_iu.xfrd_len) != freq->transferred_length || - op->rsp_iu.status_code || + op->rsp_iu.ersp_result || sqe->common.command_id != cqe->command_id)) { status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1); dev_info(ctrl->ctrl.device, @@ -1672,7 +1667,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) ctrl->cnum, be16_to_cpu(op->rsp_iu.iu_len), be32_to_cpu(op->rsp_iu.xfrd_len), freq->transferred_length, - op->rsp_iu.status_code, + op->rsp_iu.ersp_result, sqe->common.command_id, cqe->command_id); goto done; @@ -1731,9 +1726,14 @@ __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, op->rq = rq; op->rqno = rqno; - cmdiu->scsi_id = NVME_CMD_SCSI_ID; + cmdiu->format_id = NVME_CMD_FORMAT_ID; cmdiu->fc_id = NVME_CMD_FC_ID; cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); + if (queue->qnum) + cmdiu->rsv_cat = fccmnd_set_cat_css(0, + (NVME_CC_CSS_NVM >> NVME_CC_CSS_SHIFT)); + else + cmdiu->rsv_cat = fccmnd_set_cat_admin(0); op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev, &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE); @@ -2173,8 +2173,6 @@ nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, rq_dma_dir(rq)); - nvme_cleanup_cmd(rq); - sg_free_table_chained(&freq->sg_table, SG_CHUNK_SIZE); freq->sg_cnt = 0; @@ -2305,6 +2303,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, if (!(op->flags & FCOP_FLAGS_AEN)) nvme_fc_unmap_data(ctrl, op->rq, op); + nvme_cleanup_cmd(op->rq); nvme_fc_ctrl_put(ctrl); if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE && @@ -2695,7 +2694,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) /* warn if maxcmd is lower than queue_size */ dev_warn(ctrl->ctrl.device, "queue_size %zu > ctrl maxcmd %u, reducing " - "to queue_size\n", + "to maxcmd\n", opts->queue_size, ctrl->ctrl.maxcmd); opts->queue_size = ctrl->ctrl.maxcmd; } @@ -2703,7 +2702,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) if (opts->queue_size > ctrl->ctrl.sqsize + 1) { /* warn if sqsize is lower than queue_size */ dev_warn(ctrl->ctrl.device, - "queue_size %zu > ctrl sqsize %u, clamping down\n", + "queue_size %zu > ctrl sqsize %u, reducing " + "to sqsize\n", opts->queue_size, ctrl->ctrl.sqsize + 1); opts->queue_size = ctrl->ctrl.sqsize + 1; } @@ -2739,6 +2739,7 @@ out_term_aen_ops: out_disconnect_admin_queue: /* send a Disconnect(association) LS to fc-nvme target */ nvme_fc_xmt_disconnect_assoc(ctrl); + ctrl->association_id = 0; out_delete_hw_queue: __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); out_free_queue: @@ -2830,6 +2831,8 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) if (ctrl->association_id) nvme_fc_xmt_disconnect_assoc(ctrl); + ctrl->association_id = 0; + if (ctrl->ctrl.tagset) { nvme_fc_delete_hw_io_queues(ctrl); nvme_fc_free_io_queues(ctrl); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 30de7efef003..682be6195a95 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -95,6 +95,7 @@ void nvme_failover_req(struct request *req) } break; case NVME_SC_HOST_PATH_ERROR: + case NVME_SC_HOST_ABORTED_CMD: /* * Temporary transport disruption in talking to the controller. * Try to send on a new path. @@ -444,8 +445,14 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) { struct nvme_ana_group_desc *desc = base + offset; - u32 nr_nsids = le32_to_cpu(desc->nnsids); - size_t nsid_buf_size = nr_nsids * sizeof(__le32); + u32 nr_nsids; + size_t nsid_buf_size; + + if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc))) + return -EINVAL; + + nr_nsids = le32_to_cpu(desc->nnsids); + nsid_buf_size = nr_nsids * sizeof(__le32); if (WARN_ON_ONCE(desc->grpid == 0)) return -EINVAL; @@ -465,8 +472,6 @@ static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data, return error; offset += nsid_buf_size; - if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc))) - return -EINVAL; } return 0; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 38a83ef5bcd3..2637d9dd278f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -418,9 +418,20 @@ static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); } -static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) +/* + * Convert a 512B sector number to a device logical block number. + */ +static inline u64 nvme_sect_to_lba(struct nvme_ns *ns, sector_t sector) +{ + return sector >> (ns->lba_shift - SECTOR_SHIFT); +} + +/* + * Convert a device logical block number to a 512B sector number. + */ +static inline sector_t nvme_lba_to_sect(struct nvme_ns *ns, u64 lba) { - return (sector >> (ns->lba_shift - 9)); + return lba << (ns->lba_shift - SECTOR_SHIFT); } static inline void nvme_end_request(struct request *req, __le16 status, @@ -445,6 +456,11 @@ static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl) put_device(ctrl->device); } +static inline bool nvme_is_aen_req(u16 qid, __u16 command_id) +{ + return !qid && command_id >= NVME_AQ_BLK_MQ_DEPTH; +} + void nvme_complete_rq(struct request *req); bool nvme_cancel_request(struct request *req, void *data, bool reserved); bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bb88681f4dc3..1b1b0db45567 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -924,7 +924,6 @@ static void nvme_pci_complete_rq(struct request *req) struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_dev *dev = iod->nvmeq->dev; - nvme_cleanup_cmd(req); if (blk_integrity_rq(req)) dma_unmap_page(dev->dev, iod->meta_dma, rq_integrity_vec(req)->bv_len, rq_data_dir(req)); @@ -967,8 +966,7 @@ static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvmeq->qid == 0 && - cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) { + if (unlikely(nvme_is_aen_req(nvmeq->qid, cqe->command_id))) { nvme_complete_async_event(&nvmeq->dev->ctrl, cqe->status, &cqe->result); return; @@ -2963,7 +2961,7 @@ static int nvme_suspend(struct device *dev) /* * Clearing npss forces a controller reset on resume. The - * correct value will be resdicovered then. + * correct value will be rediscovered then. */ nvme_dev_disable(ndev, true); ctrl->npss = 0; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4d280160dd3f..05f2dfa3d218 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1160,8 +1160,6 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, } ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); - - nvme_cleanup_cmd(rq); sg_free_table_chained(&req->sg_table, SG_CHUNK_SIZE); } @@ -1501,8 +1499,8 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_rdma_queue_idx(queue) == 0 && - cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) + if (unlikely(nvme_is_aen_req(nvme_rdma_queue_idx(queue), + cqe->command_id))) nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); else @@ -1760,7 +1758,6 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, if (unlikely(err < 0)) { dev_err(queue->ctrl->ctrl.device, "Failed to map data (%d)\n", err); - nvme_cleanup_cmd(rq); goto err; } @@ -1771,18 +1768,19 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, req->mr ? &req->reg_wr.wr : NULL); - if (unlikely(err)) { - nvme_rdma_unmap_data(queue, rq); - goto err; - } + if (unlikely(err)) + goto err_unmap; return BLK_STS_OK; +err_unmap: + nvme_rdma_unmap_data(queue, rq); err: if (err == -ENOMEM || err == -EAGAIN) ret = BLK_STS_RESOURCE; else ret = BLK_STS_IOERR; + nvme_cleanup_cmd(rq); unmap_qe: ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command), DMA_TO_DEVICE); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 385a5212c10f..124fda67613a 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -491,8 +491,8 @@ static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_tcp_queue_id(queue) == 0 && - cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) + if (unlikely(nvme_is_aen_req(nvme_tcp_queue_id(queue), + cqe->command_id))) nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); else diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 831a062d27cb..56c21b501185 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -31,7 +31,7 @@ u64 nvmet_get_log_page_offset(struct nvme_command *cmd) static void nvmet_execute_get_log_page_noop(struct nvmet_req *req) { - nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len)); + nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->transfer_len)); } static void nvmet_execute_get_log_page_error(struct nvmet_req *req) @@ -134,7 +134,7 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req) u16 status = NVME_SC_INTERNAL; unsigned long flags; - if (req->data_len != sizeof(*log)) + if (req->transfer_len != sizeof(*log)) goto out; log = kzalloc(sizeof(*log), GFP_KERNEL); @@ -196,7 +196,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) u16 status = NVME_SC_INTERNAL; size_t len; - if (req->data_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32)) + if (req->transfer_len != NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32)) goto out; mutex_lock(&ctrl->lock); @@ -206,7 +206,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) len = ctrl->nr_changed_ns * sizeof(__le32); status = nvmet_copy_to_sgl(req, 0, ctrl->changed_ns_list, len); if (!status) - status = nvmet_zero_sgl(req, len, req->data_len - len); + status = nvmet_zero_sgl(req, len, req->transfer_len - len); ctrl->nr_changed_ns = 0; nvmet_clear_aen_bit(req, NVME_AEN_BIT_NS_ATTR); mutex_unlock(&ctrl->lock); @@ -282,6 +282,36 @@ out: nvmet_req_complete(req, status); } +static void nvmet_execute_get_log_page(struct nvmet_req *req) +{ + if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd))) + return; + + switch (req->cmd->get_log_page.lid) { + case NVME_LOG_ERROR: + return nvmet_execute_get_log_page_error(req); + case NVME_LOG_SMART: + return nvmet_execute_get_log_page_smart(req); + case NVME_LOG_FW_SLOT: + /* + * We only support a single firmware slot which always is + * active, so we can zero out the whole firmware slot log and + * still claim to fully implement this mandatory log page. + */ + return nvmet_execute_get_log_page_noop(req); + case NVME_LOG_CHANGED_NS: + return nvmet_execute_get_log_changed_ns(req); + case NVME_LOG_CMD_EFFECTS: + return nvmet_execute_get_log_cmd_effects_ns(req); + case NVME_LOG_ANA: + return nvmet_execute_get_log_page_ana(req); + } + pr_err("unhandled lid %d on qid %d\n", + req->cmd->get_log_page.lid, req->sq->qid); + req->error_loc = offsetof(struct nvme_get_log_page_command, lid); + nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); +} + static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -565,6 +595,28 @@ out: nvmet_req_complete(req, status); } +static void nvmet_execute_identify(struct nvmet_req *req) +{ + if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) + return; + + switch (req->cmd->identify.cns) { + case NVME_ID_CNS_NS: + return nvmet_execute_identify_ns(req); + case NVME_ID_CNS_CTRL: + return nvmet_execute_identify_ctrl(req); + case NVME_ID_CNS_NS_ACTIVE_LIST: + return nvmet_execute_identify_nslist(req); + case NVME_ID_CNS_NS_DESC_LIST: + return nvmet_execute_identify_desclist(req); + } + + pr_err("unhandled identify cns %d on qid %d\n", + req->cmd->identify.cns, req->sq->qid); + req->error_loc = offsetof(struct nvme_identify, cns); + nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR); +} + /* * A "minimum viable" abort implementation: the command is mandatory in the * spec, but we are not required to do any useful work. We couldn't really @@ -574,6 +626,8 @@ out: */ static void nvmet_execute_abort(struct nvmet_req *req) { + if (!nvmet_check_data_len(req, 0)) + return; nvmet_set_result(req, 1); nvmet_req_complete(req, 0); } @@ -658,6 +712,9 @@ static void nvmet_execute_set_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 status = 0; + if (!nvmet_check_data_len(req, 0)) + return; + switch (cdw10 & 0xff) { case NVME_FEAT_NUM_QUEUES: nvmet_set_result(req, @@ -721,6 +778,9 @@ static void nvmet_execute_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 status = 0; + if (!nvmet_check_data_len(req, 0)) + return; + switch (cdw10 & 0xff) { /* * These features are mandatory in the spec, but we don't @@ -785,6 +845,9 @@ void nvmet_execute_async_event(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; + if (!nvmet_check_data_len(req, 0)) + return; + mutex_lock(&ctrl->lock); if (ctrl->nr_async_event_cmds >= NVMET_ASYNC_EVENTS) { mutex_unlock(&ctrl->lock); @@ -801,6 +864,9 @@ void nvmet_execute_keep_alive(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; + if (!nvmet_check_data_len(req, 0)) + return; + pr_debug("ctrl %d update keep-alive timer for %d secs\n", ctrl->cntlid, ctrl->kato); @@ -813,77 +879,36 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) struct nvme_command *cmd = req->cmd; u16 ret; + if (nvme_is_fabrics(cmd)) + return nvmet_parse_fabrics_cmd(req); + if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) + return nvmet_parse_discovery_cmd(req); + ret = nvmet_check_ctrl_status(req, cmd); if (unlikely(ret)) return ret; switch (cmd->common.opcode) { case nvme_admin_get_log_page: - req->data_len = nvmet_get_log_page_len(cmd); - - switch (cmd->get_log_page.lid) { - case NVME_LOG_ERROR: - req->execute = nvmet_execute_get_log_page_error; - return 0; - case NVME_LOG_SMART: - req->execute = nvmet_execute_get_log_page_smart; - return 0; - case NVME_LOG_FW_SLOT: - /* - * We only support a single firmware slot which always - * is active, so we can zero out the whole firmware slot - * log and still claim to fully implement this mandatory - * log page. - */ - req->execute = nvmet_execute_get_log_page_noop; - return 0; - case NVME_LOG_CHANGED_NS: - req->execute = nvmet_execute_get_log_changed_ns; - return 0; - case NVME_LOG_CMD_EFFECTS: - req->execute = nvmet_execute_get_log_cmd_effects_ns; - return 0; - case NVME_LOG_ANA: - req->execute = nvmet_execute_get_log_page_ana; - return 0; - } - break; + req->execute = nvmet_execute_get_log_page; + return 0; case nvme_admin_identify: - req->data_len = NVME_IDENTIFY_DATA_SIZE; - switch (cmd->identify.cns) { - case NVME_ID_CNS_NS: - req->execute = nvmet_execute_identify_ns; - return 0; - case NVME_ID_CNS_CTRL: - req->execute = nvmet_execute_identify_ctrl; - return 0; - case NVME_ID_CNS_NS_ACTIVE_LIST: - req->execute = nvmet_execute_identify_nslist; - return 0; - case NVME_ID_CNS_NS_DESC_LIST: - req->execute = nvmet_execute_identify_desclist; - return 0; - } - break; + req->execute = nvmet_execute_identify; + return 0; case nvme_admin_abort_cmd: req->execute = nvmet_execute_abort; - req->data_len = 0; return 0; case nvme_admin_set_features: req->execute = nvmet_execute_set_features; - req->data_len = 0; return 0; case nvme_admin_get_features: req->execute = nvmet_execute_get_features; - req->data_len = 0; return 0; case nvme_admin_async_event: req->execute = nvmet_execute_async_event; - req->data_len = 0; return 0; case nvme_admin_keep_alive: req->execute = nvmet_execute_keep_alive; - req->data_len = 0; return 0; } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 3a67e244e568..28438b833c1b 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -892,14 +892,10 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, } if (unlikely(!req->sq->ctrl)) - /* will return an error for any Non-connect command: */ + /* will return an error for any non-connect command: */ status = nvmet_parse_connect_cmd(req); else if (likely(req->sq->qid != 0)) status = nvmet_parse_io_cmd(req); - else if (nvme_is_fabrics(req->cmd)) - status = nvmet_parse_fabrics_cmd(req); - else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC) - status = nvmet_parse_discovery_cmd(req); else status = nvmet_parse_admin_cmd(req); @@ -930,15 +926,17 @@ void nvmet_req_uninit(struct nvmet_req *req) } EXPORT_SYMBOL_GPL(nvmet_req_uninit); -void nvmet_req_execute(struct nvmet_req *req) +bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) { - if (unlikely(req->data_len != req->transfer_len)) { + if (unlikely(data_len != req->transfer_len)) { req->error_loc = offsetof(struct nvme_common_command, dptr); nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); - } else - req->execute(req); + return false; + } + + return true; } -EXPORT_SYMBOL_GPL(nvmet_req_execute); +EXPORT_SYMBOL_GPL(nvmet_check_data_len); int nvmet_req_alloc_sgl(struct nvmet_req *req) { @@ -966,7 +964,7 @@ int nvmet_req_alloc_sgl(struct nvmet_req *req) } req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt); - if (!req->sg) + if (unlikely(!req->sg)) return -ENOMEM; return 0; diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 3764a8900850..0c2274b21e15 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -157,7 +157,7 @@ static size_t discovery_log_entries(struct nvmet_req *req) return entries; } -static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) +static void nvmet_execute_disc_get_log_page(struct nvmet_req *req) { const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry); struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -171,6 +171,16 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req) u16 status = 0; void *buffer; + if (!nvmet_check_data_len(req, data_len)) + return; + + if (req->cmd->get_log_page.lid != NVME_LOG_DISC) { + req->error_loc = + offsetof(struct nvme_get_log_page_command, lid); + status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + goto out; + } + /* Spec requires dword aligned offsets */ if (offset & 0x3) { status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; @@ -227,20 +237,35 @@ out: nvmet_req_complete(req, status); } -static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req) +static void nvmet_execute_disc_identify(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; struct nvme_id_ctrl *id; + const char model[] = "Linux"; u16 status = 0; + if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) + return; + + if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) { + req->error_loc = offsetof(struct nvme_identify, cns); + status = NVME_SC_INVALID_OPCODE | NVME_SC_DNR; + goto out; + } + id = kzalloc(sizeof(*id), GFP_KERNEL); if (!id) { status = NVME_SC_INTERNAL; goto out; } + memset(id->sn, ' ', sizeof(id->sn)); + bin2hex(id->sn, &ctrl->subsys->serial, + min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); memset(id->fr, ' ', sizeof(id->fr)); - strncpy((char *)id->fr, UTS_RELEASE, sizeof(id->fr)); + memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' '); + memcpy_and_pad(id->fr, sizeof(id->fr), + UTS_RELEASE, strlen(UTS_RELEASE), ' '); /* no limit on data transfer sizes for now */ id->mdts = 0; @@ -273,6 +298,9 @@ static void nvmet_execute_disc_set_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 stat; + if (!nvmet_check_data_len(req, 0)) + return; + switch (cdw10 & 0xff) { case NVME_FEAT_KATO: stat = nvmet_set_feat_kato(req); @@ -296,6 +324,9 @@ static void nvmet_execute_disc_get_features(struct nvmet_req *req) u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); u16 stat = 0; + if (!nvmet_check_data_len(req, 0)) + return; + switch (cdw10 & 0xff) { case NVME_FEAT_KATO: nvmet_get_feat_kato(req); @@ -328,47 +359,22 @@ u16 nvmet_parse_discovery_cmd(struct nvmet_req *req) switch (cmd->common.opcode) { case nvme_admin_set_features: req->execute = nvmet_execute_disc_set_features; - req->data_len = 0; return 0; case nvme_admin_get_features: req->execute = nvmet_execute_disc_get_features; - req->data_len = 0; return 0; case nvme_admin_async_event: req->execute = nvmet_execute_async_event; - req->data_len = 0; return 0; case nvme_admin_keep_alive: req->execute = nvmet_execute_keep_alive; - req->data_len = 0; return 0; case nvme_admin_get_log_page: - req->data_len = nvmet_get_log_page_len(cmd); - - switch (cmd->get_log_page.lid) { - case NVME_LOG_DISC: - req->execute = nvmet_execute_get_disc_log_page; - return 0; - default: - pr_err("unsupported get_log_page lid %d\n", - cmd->get_log_page.lid); - req->error_loc = - offsetof(struct nvme_get_log_page_command, lid); - return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; - } + req->execute = nvmet_execute_disc_get_log_page; + return 0; case nvme_admin_identify: - req->data_len = NVME_IDENTIFY_DATA_SIZE; - switch (cmd->identify.cns) { - case NVME_ID_CNS_CTRL: - req->execute = - nvmet_execute_identify_disc_ctrl; - return 0; - default: - pr_err("unsupported identify cns %d\n", - cmd->identify.cns); - req->error_loc = offsetof(struct nvme_identify, cns); - return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; - } + req->execute = nvmet_execute_disc_identify; + return 0; default: pr_err("unhandled cmd %d\n", cmd->common.opcode); req->error_loc = offsetof(struct nvme_common_command, opcode); diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index d16b55ffe79f..f7297473d9eb 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -12,6 +12,9 @@ static void nvmet_execute_prop_set(struct nvmet_req *req) u64 val = le64_to_cpu(req->cmd->prop_set.value); u16 status = 0; + if (!nvmet_check_data_len(req, 0)) + return; + if (req->cmd->prop_set.attrib & 1) { req->error_loc = offsetof(struct nvmf_property_set_command, attrib); @@ -38,6 +41,9 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) u16 status = 0; u64 val = 0; + if (!nvmet_check_data_len(req, 0)) + return; + if (req->cmd->prop_get.attrib & 1) { switch (le32_to_cpu(req->cmd->prop_get.offset)) { case NVME_REG_CAP: @@ -82,11 +88,9 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req) switch (cmd->fabrics.fctype) { case nvme_fabrics_type_property_set: - req->data_len = 0; req->execute = nvmet_execute_prop_set; break; case nvme_fabrics_type_property_get: - req->data_len = 0; req->execute = nvmet_execute_prop_get; break; default: @@ -147,6 +151,9 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) struct nvmet_ctrl *ctrl = NULL; u16 status = 0; + if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) + return; + d = kmalloc(sizeof(*d), GFP_KERNEL); if (!d) { status = NVME_SC_INTERNAL; @@ -211,6 +218,9 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) u16 qid = le16_to_cpu(c->qid); u16 status = 0; + if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) + return; + d = kmalloc(sizeof(*d), GFP_KERNEL); if (!d) { status = NVME_SC_INTERNAL; @@ -281,7 +291,6 @@ u16 nvmet_parse_connect_cmd(struct nvmet_req *req) return NVME_SC_INVALID_OPCODE | NVME_SC_DNR; } - req->data_len = sizeof(struct nvmf_connect_data); if (cmd->connect.qid == 0) req->execute = nvmet_execute_admin_connect; else diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index ce8d819f86cc..a0db6371b43e 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1495,20 +1495,20 @@ static void nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_ls_iod *iod) { - struct fcnvme_ls_disconnect_rqst *rqst = - (struct fcnvme_ls_disconnect_rqst *)iod->rqstbuf; - struct fcnvme_ls_disconnect_acc *acc = - (struct fcnvme_ls_disconnect_acc *)iod->rspbuf; + struct fcnvme_ls_disconnect_assoc_rqst *rqst = + (struct fcnvme_ls_disconnect_assoc_rqst *)iod->rqstbuf; + struct fcnvme_ls_disconnect_assoc_acc *acc = + (struct fcnvme_ls_disconnect_assoc_acc *)iod->rspbuf; struct nvmet_fc_tgt_assoc *assoc; int ret = 0; memset(acc, 0, sizeof(*acc)); - if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_rqst)) + if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst)) ret = VERR_DISCONN_LEN; else if (rqst->desc_list_len != fcnvme_lsdesc_len( - sizeof(struct fcnvme_ls_disconnect_rqst))) + sizeof(struct fcnvme_ls_disconnect_assoc_rqst))) ret = VERR_DISCONN_RQST_LEN; else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) ret = VERR_ASSOC_ID; @@ -1523,8 +1523,11 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, fcnvme_lsdesc_len( sizeof(struct fcnvme_lsdesc_disconn_cmd))) ret = VERR_DISCONN_CMD_LEN; - else if ((rqst->discon_cmd.scope != FCNVME_DISCONN_ASSOCIATION) && - (rqst->discon_cmd.scope != FCNVME_DISCONN_CONNECTION)) + /* + * As the standard changed on the LS, check if old format and scope + * something other than Association (e.g. 0). + */ + else if (rqst->discon_cmd.rsvd8[0]) ret = VERR_DISCONN_SCOPE; else { /* match an active association */ @@ -1556,8 +1559,8 @@ nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, fcnvme_lsdesc_len( - sizeof(struct fcnvme_ls_disconnect_acc)), - FCNVME_LS_DISCONNECT); + sizeof(struct fcnvme_ls_disconnect_assoc_acc)), + FCNVME_LS_DISCONNECT_ASSOC); /* release get taken in nvmet_fc_find_target_assoc */ nvmet_fc_tgt_a_put(iod->assoc); @@ -1632,7 +1635,7 @@ nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, /* Creates an IO Queue/Connection */ nvmet_fc_ls_create_connection(tgtport, iod); break; - case FCNVME_LS_DISCONNECT: + case FCNVME_LS_DISCONNECT_ASSOC: /* Terminate a Queue/Connection or the Association */ nvmet_fc_ls_disconnect(tgtport, iod); break; @@ -2015,7 +2018,7 @@ nvmet_fc_fod_op_done(struct nvmet_fc_fcp_iod *fod) } /* data transfer complete, resume with nvmet layer */ - nvmet_req_execute(&fod->req); + fod->req.execute(&fod->req); break; case NVMET_FCOP_READDATA: @@ -2231,7 +2234,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, * can invoke the nvmet_layer now. If read data, cmd completion will * push the data */ - nvmet_req_execute(&fod->req); + fod->req.execute(&fod->req); return; transport_error: @@ -2299,7 +2302,7 @@ nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *target_port, /* validate iu, so the connection id can be used to find the queue */ if ((cmdiubuf_len != sizeof(*cmdiu)) || - (cmdiu->scsi_id != NVME_CMD_SCSI_ID) || + (cmdiu->format_id != NVME_CMD_FORMAT_ID) || (cmdiu->fc_id != NVME_CMD_FC_ID) || (be16_to_cpu(cmdiu->iu_len) != (sizeof(*cmdiu)/4))) return -EIO; diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index 32008d85172b..b6fca0e421ef 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -147,8 +147,12 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) int sg_cnt = req->sg_cnt; struct bio *bio; struct scatterlist *sg; + struct blk_plug plug; sector_t sector; - int op, op_flags = 0, i; + int op, i; + + if (!nvmet_check_data_len(req, nvmet_rw_len(req))) + return; if (!req->sg_cnt) { nvmet_req_complete(req, 0); @@ -156,21 +160,20 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) } if (req->cmd->rw.opcode == nvme_cmd_write) { - op = REQ_OP_WRITE; - op_flags = REQ_SYNC | REQ_IDLE; + op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) - op_flags |= REQ_FUA; + op |= REQ_FUA; } else { op = REQ_OP_READ; } if (is_pci_p2pdma_page(sg_page(req->sg))) - op_flags |= REQ_NOMERGE; + op |= REQ_NOMERGE; sector = le64_to_cpu(req->cmd->rw.slba); sector <<= (req->ns->blksize_shift - 9); - if (req->data_len <= NVMET_MAX_INLINE_DATA_LEN) { + if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) { bio = &req->b.inline_bio; bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); } else { @@ -180,8 +183,9 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) bio->bi_iter.bi_sector = sector; bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; - bio_set_op_attrs(bio, op, op_flags); + bio->bi_opf = op; + blk_start_plug(&plug); for_each_sg(req->sg, sg, req->sg_cnt, i) { while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) != sg->length) { @@ -190,7 +194,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); bio_set_dev(bio, req->ns->bdev); bio->bi_iter.bi_sector = sector; - bio_set_op_attrs(bio, op, op_flags); + bio->bi_opf = op; bio_chain(bio, prev); submit_bio(prev); @@ -201,12 +205,16 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req) } submit_bio(bio); + blk_finish_plug(&plug); } static void nvmet_bdev_execute_flush(struct nvmet_req *req) { struct bio *bio = &req->b.inline_bio; + if (!nvmet_check_data_len(req, 0)) + return; + bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); bio_set_dev(bio, req->ns->bdev); bio->bi_private = req; @@ -261,12 +269,10 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req) if (bio) { bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; - if (status) { - bio->bi_status = BLK_STS_IOERR; - bio_endio(bio); - } else { + if (status) + bio_io_error(bio); + else submit_bio(bio); - } } else { nvmet_req_complete(req, status); } @@ -274,6 +280,9 @@ static void nvmet_bdev_execute_discard(struct nvmet_req *req) static void nvmet_bdev_execute_dsm(struct nvmet_req *req) { + if (!nvmet_check_data_len(req, nvmet_dsm_len(req))) + return; + switch (le32_to_cpu(req->cmd->dsm.attributes)) { case NVME_DSMGMT_AD: nvmet_bdev_execute_discard(req); @@ -295,6 +304,9 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) sector_t nr_sector; int ret; + if (!nvmet_check_data_len(req, 0)) + return; + sector = le64_to_cpu(write_zeroes->slba) << (req->ns->blksize_shift - 9); nr_sector = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << @@ -319,20 +331,15 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req) case nvme_cmd_read: case nvme_cmd_write: req->execute = nvmet_bdev_execute_rw; - req->data_len = nvmet_rw_len(req); return 0; case nvme_cmd_flush: req->execute = nvmet_bdev_execute_flush; - req->data_len = 0; return 0; case nvme_cmd_dsm: req->execute = nvmet_bdev_execute_dsm; - req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * - sizeof(struct nvme_dsm_range); return 0; case nvme_cmd_write_zeroes: req->execute = nvmet_bdev_execute_write_zeroes; - req->data_len = 0; return 0; default: pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 05453f5d1448..caebfce06605 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -126,7 +126,7 @@ static void nvmet_file_io_done(struct kiocb *iocb, long ret, long ret2) mempool_free(req->f.bvec, req->ns->bvec_pool); } - if (unlikely(ret != req->data_len)) + if (unlikely(ret != req->transfer_len)) status = errno_to_nvme_status(req, ret); nvmet_req_complete(req, status); } @@ -146,7 +146,7 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) is_sync = true; pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift; - if (unlikely(pos + req->data_len > req->ns->size)) { + if (unlikely(pos + req->transfer_len > req->ns->size)) { nvmet_req_complete(req, errno_to_nvme_status(req, -ENOSPC)); return true; } @@ -173,7 +173,7 @@ static bool nvmet_file_execute_io(struct nvmet_req *req, int ki_flags) nr_bvec--; } - if (WARN_ON_ONCE(total_len != req->data_len)) { + if (WARN_ON_ONCE(total_len != req->transfer_len)) { ret = -EIO; goto complete; } @@ -232,6 +232,9 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) { ssize_t nr_bvec = req->sg_cnt; + if (!nvmet_check_data_len(req, nvmet_rw_len(req))) + return; + if (!req->sg_cnt || !nr_bvec) { nvmet_req_complete(req, 0); return; @@ -273,6 +276,8 @@ static void nvmet_file_flush_work(struct work_struct *w) static void nvmet_file_execute_flush(struct nvmet_req *req) { + if (!nvmet_check_data_len(req, 0)) + return; INIT_WORK(&req->f.work, nvmet_file_flush_work); schedule_work(&req->f.work); } @@ -331,6 +336,8 @@ static void nvmet_file_dsm_work(struct work_struct *w) static void nvmet_file_execute_dsm(struct nvmet_req *req) { + if (!nvmet_check_data_len(req, nvmet_dsm_len(req))) + return; INIT_WORK(&req->f.work, nvmet_file_dsm_work); schedule_work(&req->f.work); } @@ -359,6 +366,8 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w) static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) { + if (!nvmet_check_data_len(req, 0)) + return; INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); schedule_work(&req->f.work); } @@ -371,20 +380,15 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) case nvme_cmd_read: case nvme_cmd_write: req->execute = nvmet_file_execute_rw; - req->data_len = nvmet_rw_len(req); return 0; case nvme_cmd_flush: req->execute = nvmet_file_execute_flush; - req->data_len = 0; return 0; case nvme_cmd_dsm: req->execute = nvmet_file_execute_dsm; - req->data_len = (le32_to_cpu(cmd->dsm.nr) + 1) * - sizeof(struct nvme_dsm_range); return 0; case nvme_cmd_write_zeroes: req->execute = nvmet_file_execute_write_zeroes; - req->data_len = 0; return 0; default: pr_err("unhandled cmd for file ns %d on qid %d\n", diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 748a39fca771..856eb0652f89 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -76,7 +76,6 @@ static void nvme_loop_complete_rq(struct request *req) { struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); - nvme_cleanup_cmd(req); sg_free_table_chained(&iod->sg_table, SG_CHUNK_SIZE); nvme_complete_rq(req); } @@ -102,8 +101,8 @@ static void nvme_loop_queue_response(struct nvmet_req *req) * aborts. We don't even bother to allocate a struct request * for them but rather special case them here. */ - if (unlikely(nvme_loop_queue_idx(queue) == 0 && - cqe->command_id >= NVME_AQ_BLK_MQ_DEPTH)) { + if (unlikely(nvme_is_aen_req(nvme_loop_queue_idx(queue), + cqe->command_id))) { nvme_complete_async_event(&queue->ctrl->ctrl, cqe->status, &cqe->result); } else { @@ -126,7 +125,7 @@ static void nvme_loop_execute_work(struct work_struct *work) struct nvme_loop_iod *iod = container_of(work, struct nvme_loop_iod, work); - nvmet_req_execute(&iod->req); + iod->req.execute(&iod->req); } static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index c51f8dd01dc4..46df45e837c9 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -304,8 +304,6 @@ struct nvmet_req { } f; }; int sg_cnt; - /* data length as parsed from the command: */ - size_t data_len; /* data length as parsed from the SGL descriptor: */ size_t transfer_len; @@ -375,7 +373,7 @@ u16 nvmet_parse_fabrics_cmd(struct nvmet_req *req); bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); void nvmet_req_uninit(struct nvmet_req *req); -void nvmet_req_execute(struct nvmet_req *req); +bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len); void nvmet_req_complete(struct nvmet_req *req, u16 status); int nvmet_req_alloc_sgl(struct nvmet_req *req); void nvmet_req_free_sgl(struct nvmet_req *req); @@ -495,6 +493,12 @@ static inline u32 nvmet_rw_len(struct nvmet_req *req) req->ns->blksize_shift; } +static inline u32 nvmet_dsm_len(struct nvmet_req *req) +{ + return (le32_to_cpu(req->cmd->dsm.nr) + 1) * + sizeof(struct nvme_dsm_range); +} + u16 errno_to_nvme_status(struct nvmet_req *req, int errno); /* Convert a 32-bit number to a 16-bit 0's based number */ diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 36d906a7f70d..37d262a65877 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -603,7 +603,7 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) return; } - nvmet_req_execute(&rsp->req); + rsp->req.execute(&rsp->req); } static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len, @@ -672,13 +672,13 @@ static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, return 0; ret = nvmet_req_alloc_sgl(&rsp->req); - if (ret < 0) + if (unlikely(ret < 0)) goto error_out; ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, rsp->req.sg, rsp->req.sg_cnt, 0, addr, key, nvmet_data_dir(&rsp->req)); - if (ret < 0) + if (unlikely(ret < 0)) goto error_out; rsp->n_rdma += ret; @@ -746,7 +746,7 @@ static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp) queue->cm_id->port_num, &rsp->read_cqe, NULL)) nvmet_req_complete(&rsp->req, NVME_SC_DATA_XFER_ERROR); } else { - nvmet_req_execute(&rsp->req); + rsp->req.execute(&rsp->req); } return true; diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index d535080b781f..af674fc0bb1e 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -320,7 +320,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl; u32 len = le32_to_cpu(sgl->length); - if (!cmd->req.data_len) + if (!len) return 0; if (sgl->type == ((NVME_SGL_FMT_DATA_DESC << 4) | @@ -813,13 +813,11 @@ free_crypto: static void nvmet_tcp_handle_req_failure(struct nvmet_tcp_queue *queue, struct nvmet_tcp_cmd *cmd, struct nvmet_req *req) { + size_t data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length); int ret; - /* recover the expected data transfer length */ - req->data_len = le32_to_cpu(req->cmd->common.dptr.sgl.length); - if (!nvme_is_write(cmd->req.cmd) || - req->data_len > cmd->req.port->inline_data_size) { + data_len > cmd->req.port->inline_data_size) { nvmet_prepare_receive_pdu(queue); return; } @@ -932,7 +930,7 @@ static int nvmet_tcp_done_recv_pdu(struct nvmet_tcp_queue *queue) goto out; } - nvmet_req_execute(&queue->cmd->req); + queue->cmd->req.execute(&queue->cmd->req); out: nvmet_prepare_receive_pdu(queue); return ret; @@ -1052,7 +1050,7 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue) nvmet_tcp_prep_recv_ddgst(cmd); return 0; } - nvmet_req_execute(&cmd->req); + cmd->req.execute(&cmd->req); } nvmet_prepare_receive_pdu(queue); @@ -1092,7 +1090,7 @@ static int nvmet_tcp_try_recv_ddgst(struct nvmet_tcp_queue *queue) if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) && cmd->rbytes_done == cmd->req.transfer_len) - nvmet_req_execute(&cmd->req); + cmd->req.execute(&cmd->req); ret = 0; out: nvmet_prepare_receive_pdu(queue); diff --git a/drivers/s390/scsi/zfcp_fsf.c b/drivers/s390/scsi/zfcp_fsf.c index 296bbc3c4606..cf63916814cc 100644 --- a/drivers/s390/scsi/zfcp_fsf.c +++ b/drivers/s390/scsi/zfcp_fsf.c @@ -27,6 +27,11 @@ struct kmem_cache *zfcp_fsf_qtcb_cache; +static bool ber_stop = true; +module_param(ber_stop, bool, 0600); +MODULE_PARM_DESC(ber_stop, + "Shuts down FCP devices for FCP channels that report a bit-error count in excess of its threshold (default on)"); + static void zfcp_fsf_request_timeout_handler(struct timer_list *t) { struct zfcp_fsf_req *fsf_req = from_timer(fsf_req, t, timer); @@ -236,10 +241,15 @@ static void zfcp_fsf_status_read_handler(struct zfcp_fsf_req *req) case FSF_STATUS_READ_SENSE_DATA_AVAIL: break; case FSF_STATUS_READ_BIT_ERROR_THRESHOLD: - dev_warn(&adapter->ccw_device->dev, - "The error threshold for checksum statistics " - "has been exceeded\n"); zfcp_dbf_hba_bit_err("fssrh_3", req); + if (ber_stop) { + dev_warn(&adapter->ccw_device->dev, + "All paths over this FCP device are disused because of excessive bit errors\n"); + zfcp_erp_adapter_shutdown(adapter, 0, "fssrh_b"); + } else { + dev_warn(&adapter->ccw_device->dev, + "The error threshold for checksum statistics has been exceeded\n"); + } break; case FSF_STATUS_READ_LINK_DOWN: zfcp_fsf_status_read_link_down(req); diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 1b92f3c19ff3..90cf4691b8c3 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -898,7 +898,7 @@ config SCSI_SNI_53C710 config 53C700_LE_ON_BE bool - depends on SCSI_LASI700 + depends on SCSI_LASI700 || SCSI_SNI_53C710 default y config SCSI_STEX diff --git a/drivers/scsi/ch.c b/drivers/scsi/ch.c index 5f8153c37f77..76751d6c7f0d 100644 --- a/drivers/scsi/ch.c +++ b/drivers/scsi/ch.c @@ -579,7 +579,6 @@ ch_release(struct inode *inode, struct file *file) scsi_changer *ch = file->private_data; scsi_device_put(ch->device); - ch->device = NULL; file->private_data = NULL; kref_put(&ch->ref, ch_destroy); return 0; diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c b/drivers/scsi/device_handler/scsi_dh_alua.c index 4971104b1817..f32da0ca529e 100644 --- a/drivers/scsi/device_handler/scsi_dh_alua.c +++ b/drivers/scsi/device_handler/scsi_dh_alua.c @@ -512,6 +512,7 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) unsigned int tpg_desc_tbl_off; unsigned char orig_transition_tmo; unsigned long flags; + bool transitioning_sense = false; if (!pg->expiry) { unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; @@ -572,13 +573,19 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) goto retry; } /* - * Retry on ALUA state transition or if any - * UNIT ATTENTION occurred. + * If the array returns with 'ALUA state transition' + * sense code here it cannot return RTPG data during + * transition. So set the state to 'transitioning' directly. */ if (sense_hdr.sense_key == NOT_READY && - sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) - err = SCSI_DH_RETRY; - else if (sense_hdr.sense_key == UNIT_ATTENTION) + sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) { + transitioning_sense = true; + goto skip_rtpg; + } + /* + * Retry on any other UNIT ATTENTION occurred. + */ + if (sense_hdr.sense_key == UNIT_ATTENTION) err = SCSI_DH_RETRY; if (err == SCSI_DH_RETRY && pg->expiry != 0 && time_before(jiffies, pg->expiry)) { @@ -666,7 +673,11 @@ static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) off = 8 + (desc[7] * 4); } + skip_rtpg: spin_lock_irqsave(&pg->lock, flags); + if (transitioning_sense) + pg->state = SCSI_ACCESS_STATE_TRANSITIONING; + sdev_printk(KERN_INFO, sdev, "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c index ac39ed79ccaa..216e557f703e 100644 --- a/drivers/scsi/hpsa.c +++ b/drivers/scsi/hpsa.c @@ -5477,6 +5477,8 @@ static int hpsa_ciss_submit(struct ctlr_info *h, return SCSI_MLQUEUE_HOST_BUSY; } + c->device = dev; + enqueue_cmd_and_start_io(h, c); /* the cmd'll come back via intr handler in complete_scsi_command() */ return 0; @@ -5548,6 +5550,7 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h, hpsa_cmd_init(h, c->cmdindex, c); c->cmd_type = CMD_SCSI; c->scsi_cmd = cmd; + c->device = dev; rc = hpsa_scsi_ioaccel_raid_map(h, c); if (rc < 0) /* scsi_dma_map failed. */ rc = SCSI_MLQUEUE_HOST_BUSY; @@ -5555,6 +5558,7 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h, hpsa_cmd_init(h, c->cmdindex, c); c->cmd_type = CMD_SCSI; c->scsi_cmd = cmd; + c->device = dev; rc = hpsa_scsi_ioaccel_direct_map(h, c); if (rc < 0) /* scsi_dma_map failed. */ rc = SCSI_MLQUEUE_HOST_BUSY; diff --git a/drivers/scsi/lpfc/lpfc_init.c b/drivers/scsi/lpfc/lpfc_init.c index e91377a4cafe..e8813d26e594 100644 --- a/drivers/scsi/lpfc/lpfc_init.c +++ b/drivers/scsi/lpfc/lpfc_init.c @@ -9055,7 +9055,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) } } -#if defined(BUILD_NVME) /* Clear NVME stats */ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME) { for (idx = 0; idx < phba->cfg_hdw_queue; idx++) { @@ -9063,7 +9062,6 @@ lpfc_sli4_queue_create(struct lpfc_hba *phba) sizeof(phba->sli4_hba.hdwq[idx].nvme_cstat)); } } -#endif /* Clear SCSI stats */ if (phba->cfg_enable_fc4_type & LPFC_ENABLE_FCP) { diff --git a/drivers/scsi/lpfc/lpfc_nportdisc.c b/drivers/scsi/lpfc/lpfc_nportdisc.c index f4b879d25fe9..fc6e4546d738 100644 --- a/drivers/scsi/lpfc/lpfc_nportdisc.c +++ b/drivers/scsi/lpfc/lpfc_nportdisc.c @@ -851,9 +851,9 @@ lpfc_disc_set_adisc(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp) if (!(vport->fc_flag & FC_PT2PT)) { /* Check config parameter use-adisc or FCP-2 */ - if ((vport->cfg_use_adisc && (vport->fc_flag & FC_RSCN_MODE)) || + if (vport->cfg_use_adisc && ((vport->fc_flag & FC_RSCN_MODE) || ((ndlp->nlp_fcp_info & NLP_FCP_2_DEVICE) && - (ndlp->nlp_type & NLP_FCP_TARGET))) { + (ndlp->nlp_type & NLP_FCP_TARGET)))) { spin_lock_irq(shost->host_lock); ndlp->nlp_flag |= NLP_NPR_ADISC; spin_unlock_irq(shost->host_lock); diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c index fe1097666de4..6822cd9ff8f1 100644 --- a/drivers/scsi/lpfc/lpfc_scsi.c +++ b/drivers/scsi/lpfc/lpfc_scsi.c @@ -528,7 +528,6 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, list_del_init(&psb->list); psb->exch_busy = 0; psb->status = IOSTAT_SUCCESS; -#ifdef BUILD_NVME if (psb->cur_iocbq.iocb_flag == LPFC_IO_NVME) { qp->abts_nvme_io_bufs--; spin_unlock(&qp->abts_io_buf_list_lock); @@ -536,7 +535,6 @@ lpfc_sli4_io_xri_aborted(struct lpfc_hba *phba, lpfc_sli4_nvme_xri_aborted(phba, axri, psb); return; } -#endif qp->abts_scsi_io_bufs--; spin_unlock(&qp->abts_io_buf_list_lock); diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c index a0c6945b8139..614f78dddafe 100644 --- a/drivers/scsi/lpfc/lpfc_sli.c +++ b/drivers/scsi/lpfc/lpfc_sli.c @@ -7866,7 +7866,7 @@ lpfc_sli4_process_missed_mbox_completions(struct lpfc_hba *phba) if (sli4_hba->hdwq) { for (eqidx = 0; eqidx < phba->cfg_irq_chann; eqidx++) { eq = phba->sli4_hba.hba_eq_hdl[eqidx].eq; - if (eq->queue_id == sli4_hba->mbx_cq->assoc_qid) { + if (eq && eq->queue_id == sli4_hba->mbx_cq->assoc_qid) { fpeq = eq; break; } diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c index 30bafd9d21e9..7259bce85e0e 100644 --- a/drivers/scsi/qla2xxx/qla_attr.c +++ b/drivers/scsi/qla2xxx/qla_attr.c @@ -440,9 +440,6 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, valid = 0; if (ha->optrom_size == OPTROM_SIZE_2300 && start == 0) valid = 1; - else if (start == (ha->flt_region_boot * 4) || - start == (ha->flt_region_fw * 4)) - valid = 1; else if (IS_QLA24XX_TYPE(ha) || IS_QLA25XX(ha)) valid = 1; if (!valid) { @@ -489,8 +486,10 @@ qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj, "Writing flash region -- 0x%x/0x%x.\n", ha->optrom_region_start, ha->optrom_region_size); - ha->isp_ops->write_optrom(vha, ha->optrom_buffer, + rval = ha->isp_ops->write_optrom(vha, ha->optrom_buffer, ha->optrom_region_start, ha->optrom_region_size); + if (rval) + rval = -EIO; break; default: rval = -EINVAL; diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c index 28d587a89ba6..99f0a1a08143 100644 --- a/drivers/scsi/qla2xxx/qla_bsg.c +++ b/drivers/scsi/qla2xxx/qla_bsg.c @@ -253,7 +253,7 @@ qla2x00_process_els(struct bsg_job *bsg_job) srb_t *sp; const char *type; int req_sg_cnt, rsp_sg_cnt; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); uint16_t nextlid = 0; if (bsg_request->msgcode == FC_BSG_RPT_ELS) { @@ -432,7 +432,7 @@ qla2x00_process_ct(struct bsg_job *bsg_job) struct Scsi_Host *host = fc_bsg_to_shost(bsg_job); scsi_qla_host_t *vha = shost_priv(host); struct qla_hw_data *ha = vha->hw; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); int req_sg_cnt, rsp_sg_cnt; uint16_t loop_id; struct fc_port *fcport; @@ -1950,7 +1950,7 @@ qlafx00_mgmt_cmd(struct bsg_job *bsg_job) struct Scsi_Host *host = fc_bsg_to_shost(bsg_job); scsi_qla_host_t *vha = shost_priv(host); struct qla_hw_data *ha = vha->hw; - int rval = (DRIVER_ERROR << 16); + int rval = (DID_ERROR << 16); struct qla_mt_iocb_rqst_fx00 *piocb_rqst; srb_t *sp; int req_sg_cnt = 0, rsp_sg_cnt = 0; diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c index 4c26630c1c3e..009fd5a33fcd 100644 --- a/drivers/scsi/qla2xxx/qla_isr.c +++ b/drivers/scsi/qla2xxx/qla_isr.c @@ -2837,8 +2837,6 @@ qla2x00_status_cont_entry(struct rsp_que *rsp, sts_cont_entry_t *pkt) if (sense_len == 0) { rsp->status_srb = NULL; sp->done(sp, cp->result); - } else { - WARN_ON_ONCE(true); } } diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c index 1cc6913f76c4..4a1f21c11758 100644 --- a/drivers/scsi/qla2xxx/qla_mbx.c +++ b/drivers/scsi/qla2xxx/qla_mbx.c @@ -702,6 +702,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr) mcp->mb[2] = LSW(risc_addr); mcp->mb[3] = 0; mcp->mb[4] = 0; + mcp->mb[11] = 0; ha->flags.using_lr_setting = 0; if (IS_QLA25XX(ha) || IS_QLA81XX(ha) || IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) { @@ -746,7 +747,7 @@ qla2x00_execute_fw(scsi_qla_host_t *vha, uint32_t risc_addr) if (ha->flags.exchoffld_enabled) mcp->mb[4] |= ENABLE_EXCHANGE_OFFLD; - mcp->out_mb |= MBX_4|MBX_3|MBX_2|MBX_1; + mcp->out_mb |= MBX_4 | MBX_3 | MBX_2 | MBX_1 | MBX_11; mcp->in_mb |= MBX_3 | MBX_2 | MBX_1; } else { mcp->mb[1] = LSW(risc_addr); diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c index e6ff17f38178..726ad4cbf4a6 100644 --- a/drivers/scsi/qla2xxx/qla_os.c +++ b/drivers/scsi/qla2xxx/qla_os.c @@ -3226,6 +3226,10 @@ qla2x00_probe_one(struct pci_dev *pdev, const struct pci_device_id *id) req->req_q_in, req->req_q_out, rsp->rsp_q_in, rsp->rsp_q_out); ha->wq = alloc_workqueue("qla2xxx_wq", 0, 0); + if (unlikely(!ha->wq)) { + ret = -ENOMEM; + goto probe_failed; + } if (ha->isp_ops->initialize_adapter(base_vha)) { ql_log(ql_log_fatal, base_vha, 0x00d6, @@ -3533,6 +3537,10 @@ qla2x00_shutdown(struct pci_dev *pdev) qla2x00_try_to_stop_firmware(vha); } + /* Disable timer */ + if (vha->timer_active) + qla2x00_stop_timer(vha); + /* Turn adapter off line */ vha->flags.online = 0; diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 1c470e31ae81..ae2fa170f6ad 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -967,6 +967,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, ses->data_direction = scmd->sc_data_direction; ses->sdb = scmd->sdb; ses->result = scmd->result; + ses->resid_len = scmd->req.resid_len; ses->underflow = scmd->underflow; ses->prot_op = scmd->prot_op; ses->eh_eflags = scmd->eh_eflags; @@ -977,6 +978,7 @@ void scsi_eh_prep_cmnd(struct scsi_cmnd *scmd, struct scsi_eh_save *ses, memset(scmd->cmnd, 0, BLK_MAX_CDB); memset(&scmd->sdb, 0, sizeof(scmd->sdb)); scmd->result = 0; + scmd->req.resid_len = 0; if (sense_bytes) { scmd->sdb.length = min_t(unsigned, SCSI_SENSE_BUFFERSIZE, @@ -1029,6 +1031,7 @@ void scsi_eh_restore_cmnd(struct scsi_cmnd* scmd, struct scsi_eh_save *ses) scmd->sc_data_direction = ses->data_direction; scmd->sdb = ses->sdb; scmd->result = ses->result; + scmd->req.resid_len = ses->resid_len; scmd->underflow = ses->underflow; scmd->prot_op = ses->prot_op; scmd->eh_eflags = ses->eh_eflags; diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 64c96c7828ee..6d7362e7367e 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -730,6 +730,14 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct kernfs_node *kn; + struct scsi_device *sdev = to_scsi_device(dev); + + /* + * We need to try to get module, avoiding the module been removed + * during delete. + */ + if (scsi_device_get(sdev)) + return -ENODEV; kn = sysfs_break_active_protection(&dev->kobj, &attr->attr); WARN_ON_ONCE(!kn); @@ -744,9 +752,10 @@ sdev_store_delete(struct device *dev, struct device_attribute *attr, * state into SDEV_DEL. */ device_remove_file(dev, attr); - scsi_remove_device(to_scsi_device(dev)); + scsi_remove_device(sdev); if (kn) sysfs_unbreak_active_protection(kn); + scsi_device_put(sdev); return count; }; static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete); diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 50928bc266eb..ebb40160539f 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1166,11 +1166,12 @@ static blk_status_t sd_setup_read_write_cmnd(struct scsi_cmnd *cmd) sector_t lba = sectors_to_logical(sdp, blk_rq_pos(rq)); sector_t threshold; unsigned int nr_blocks = sectors_to_logical(sdp, blk_rq_sectors(rq)); - bool dif, dix; unsigned int mask = logical_to_sectors(sdp, 1) - 1; bool write = rq_data_dir(rq) == WRITE; unsigned char protect, fua; blk_status_t ret; + unsigned int dif; + bool dix; ret = scsi_init_io(cmd); if (ret != BLK_STS_OK) @@ -1654,7 +1655,8 @@ static int sd_sync_cache(struct scsi_disk *sdkp, struct scsi_sense_hdr *sshdr) /* we need to evaluate the error return */ if (scsi_sense_valid(sshdr) && (sshdr->asc == 0x3a || /* medium not present */ - sshdr->asc == 0x20)) /* invalid command */ + sshdr->asc == 0x20 || /* invalid command */ + (sshdr->asc == 0x74 && sshdr->ascq == 0x71))) /* drive is password locked */ /* this is no error here */ return 0; diff --git a/drivers/scsi/sni_53c710.c b/drivers/scsi/sni_53c710.c index aef4881d8e21..a85d52b5dc32 100644 --- a/drivers/scsi/sni_53c710.c +++ b/drivers/scsi/sni_53c710.c @@ -66,10 +66,8 @@ static int snirm710_probe(struct platform_device *dev) base = res->start; hostdata = kzalloc(sizeof(*hostdata), GFP_KERNEL); - if (!hostdata) { - dev_printk(KERN_ERR, dev, "Failed to allocate host data\n"); + if (!hostdata) return -ENOMEM; - } hostdata->dev = &dev->dev; dma_set_mask(&dev->dev, DMA_BIT_MASK(32)); diff --git a/drivers/scsi/ufs/ufs_bsg.c b/drivers/scsi/ufs/ufs_bsg.c index a9344eb4e047..dc2f6d2b46ed 100644 --- a/drivers/scsi/ufs/ufs_bsg.c +++ b/drivers/scsi/ufs/ufs_bsg.c @@ -98,6 +98,8 @@ static int ufs_bsg_request(struct bsg_job *job) bsg_reply->reply_payload_rcv_len = 0; + pm_runtime_get_sync(hba->dev); + msgcode = bsg_request->msgcode; switch (msgcode) { case UPIU_TRANSACTION_QUERY_REQ: @@ -135,6 +137,8 @@ static int ufs_bsg_request(struct bsg_job *job) break; } + pm_runtime_put_sync(hba->dev); + if (!desc_buff) goto out; diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index c70caf4ea490..a2b5c796bbc4 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -1831,7 +1831,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb) while (credits) { struct sk_buff *p = cxgbit_sock_peek_wr(csk); - const u32 csum = (__force u32)p->csum; + u32 csum; if (unlikely(!p)) { pr_err("csk 0x%p,%u, cr %u,%u+%u, empty.\n", @@ -1840,6 +1840,7 @@ static void cxgbit_fw4_ack(struct cxgbit_sock *csk, struct sk_buff *skb) break; } + csum = (__force u32)p->csum; if (unlikely(credits < csum)) { pr_warn("csk 0x%p,%u, cr %u,%u+%u, < %u.\n", csk, csk->tid, diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 04bf2acd3800..2d19f0e332b0 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -1075,27 +1075,6 @@ passthrough_parse_cdb(struct se_cmd *cmd, unsigned int size; /* - * Clear a lun set in the cdb if the initiator talking to use spoke - * and old standards version, as we can't assume the underlying device - * won't choke up on it. - */ - switch (cdb[0]) { - case READ_10: /* SBC - RDProtect */ - case READ_12: /* SBC - RDProtect */ - case READ_16: /* SBC - RDProtect */ - case SEND_DIAGNOSTIC: /* SPC - SELF-TEST Code */ - case VERIFY: /* SBC - VRProtect */ - case VERIFY_16: /* SBC - VRProtect */ - case WRITE_VERIFY: /* SBC - VRProtect */ - case WRITE_VERIFY_12: /* SBC - VRProtect */ - case MAINTENANCE_IN: /* SPC - Parameter Data Format for SA RTPG */ - break; - default: - cdb[1] &= 0x1f; /* clear logical unit number */ - break; - } - - /* * For REPORT LUNS we always need to emulate the response, for everything * else, pass it up. */ diff --git a/fs/block_dev.c b/fs/block_dev.c index 9c073dbdc1b0..d612468ee66b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1403,11 +1403,7 @@ static void flush_disk(struct block_device *bdev, bool kill_dirty) "resized disk %s\n", bdev->bd_disk ? bdev->bd_disk->disk_name : ""); } - - if (!bdev->bd_disk) - return; - if (disk_part_scan_enabled(bdev->bd_disk)) - bdev->bd_invalidated = 1; + bdev->bd_invalidated = 1; } /** @@ -1512,6 +1508,19 @@ EXPORT_SYMBOL(bd_set_size); static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part); +static void bdev_disk_changed(struct block_device *bdev, bool invalidate) +{ + if (disk_part_scan_enabled(bdev->bd_disk)) { + if (invalidate) + invalidate_partitions(bdev->bd_disk, bdev); + else + rescan_partitions(bdev->bd_disk, bdev); + } else { + check_disk_size_change(bdev->bd_disk, bdev, !invalidate); + bdev->bd_invalidated = 0; + } +} + /* * bd_mutex locking: * @@ -1594,12 +1603,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) * The latter is necessary to prevent ghost * partitions on a removed medium. */ - if (bdev->bd_invalidated) { - if (!ret) - rescan_partitions(disk, bdev); - else if (ret == -ENOMEDIUM) - invalidate_partitions(disk, bdev); - } + if (bdev->bd_invalidated && + (!ret || ret == -ENOMEDIUM)) + bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) goto out_clear; @@ -1632,12 +1638,9 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); /* the same as first opener case, read comment there */ - if (bdev->bd_invalidated) { - if (!ret) - rescan_partitions(bdev->bd_disk, bdev); - else if (ret == -ENOMEDIUM) - invalidate_partitions(bdev->bd_disk, bdev); - } + if (bdev->bd_invalidated && + (!ret || ret == -ENOMEDIUM)) + bdev_disk_changed(bdev, ret == -ENOMEDIUM); if (ret) goto out_unlock_bdev; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 808709581481..2c997f94a3b2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1771,7 +1771,8 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, return -EIO; } trace_f2fs_issue_reset_zone(bdev, blkstart); - return blkdev_reset_zones(bdev, sector, nr_sects, GFP_NOFS); + return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, + sector, nr_sects, GFP_NOFS); } /* For conventional zones, use regular discard if supported */ diff --git a/fs/fcntl.c b/fs/fcntl.c index 3d40771e8e7c..41b6438bd2d9 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -261,7 +261,7 @@ static int f_getowner_uids(struct file *filp, unsigned long arg) static bool rw_hint_valid(enum rw_hint hint) { switch (hint) { - case RWF_WRITE_LIFE_NOT_SET: + case RWH_WRITE_LIFE_NOT_SET: case RWH_WRITE_LIFE_NONE: case RWH_WRITE_LIFE_SHORT: case RWH_WRITE_LIFE_MEDIUM: diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0bf056de5cc3..dc03e059fdff 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -10,103 +10,239 @@ struct blk_mq_tags; struct blk_flush_queue; /** - * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware block device + * struct blk_mq_hw_ctx - State for a hardware queue facing the hardware + * block device */ struct blk_mq_hw_ctx { struct { + /** @lock: Protects the dispatch list. */ spinlock_t lock; + /** + * @dispatch: Used for requests that are ready to be + * dispatched to the hardware but for some reason (e.g. lack of + * resources) could not be sent to the hardware. As soon as the + * driver can send new requests, requests at this list will + * be sent first for a fairer dispatch. + */ struct list_head dispatch; - unsigned long state; /* BLK_MQ_S_* flags */ + /** + * @state: BLK_MQ_S_* flags. Defines the state of the hw + * queue (active, scheduled to restart, stopped). + */ + unsigned long state; } ____cacheline_aligned_in_smp; + /** + * @run_work: Used for scheduling a hardware queue run at a later time. + */ struct delayed_work run_work; + /** @cpumask: Map of available CPUs where this hctx can run. */ cpumask_var_t cpumask; + /** + * @next_cpu: Used by blk_mq_hctx_next_cpu() for round-robin CPU + * selection from @cpumask. + */ int next_cpu; + /** + * @next_cpu_batch: Counter of how many works left in the batch before + * changing to the next CPU. + */ int next_cpu_batch; - unsigned long flags; /* BLK_MQ_F_* flags */ + /** @flags: BLK_MQ_F_* flags. Defines the behaviour of the queue. */ + unsigned long flags; + /** + * @sched_data: Pointer owned by the IO scheduler attached to a request + * queue. It's up to the IO scheduler how to use this pointer. + */ void *sched_data; + /** + * @queue: Pointer to the request queue that owns this hardware context. + */ struct request_queue *queue; + /** @fq: Queue of requests that need to perform a flush operation. */ struct blk_flush_queue *fq; + /** + * @driver_data: Pointer to data owned by the block driver that created + * this hctx + */ void *driver_data; + /** + * @ctx_map: Bitmap for each software queue. If bit is on, there is a + * pending request in that software queue. + */ struct sbitmap ctx_map; + /** + * @dispatch_from: Software queue to be used when no scheduler was + * selected. + */ struct blk_mq_ctx *dispatch_from; + /** + * @dispatch_busy: Number used by blk_mq_update_dispatch_busy() to + * decide if the hw_queue is busy using Exponential Weighted Moving + * Average algorithm. + */ unsigned int dispatch_busy; + /** @type: HCTX_TYPE_* flags. Type of hardware queue. */ unsigned short type; + /** @nr_ctx: Number of software queues. */ unsigned short nr_ctx; + /** @ctxs: Array of software queues. */ struct blk_mq_ctx **ctxs; + /** @dispatch_wait_lock: Lock for dispatch_wait queue. */ spinlock_t dispatch_wait_lock; + /** + * @dispatch_wait: Waitqueue to put requests when there is no tag + * available at the moment, to wait for another try in the future. + */ wait_queue_entry_t dispatch_wait; + + /** + * @wait_index: Index of next available dispatch_wait queue to insert + * requests. + */ atomic_t wait_index; + /** + * @tags: Tags owned by the block driver. A tag at this set is only + * assigned when a request is dispatched from a hardware queue. + */ struct blk_mq_tags *tags; + /** + * @sched_tags: Tags owned by I/O scheduler. If there is an I/O + * scheduler associated with a request queue, a tag is assigned when + * that request is allocated. Else, this member is not used. + */ struct blk_mq_tags *sched_tags; + /** @queued: Number of queued requests. */ unsigned long queued; + /** @run: Number of dispatched requests. */ unsigned long run; #define BLK_MQ_MAX_DISPATCH_ORDER 7 + /** @dispatched: Number of dispatch requests by queue. */ unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; + /** @numa_node: NUMA node the storage adapter has been connected to. */ unsigned int numa_node; + /** @queue_num: Index of this hardware queue. */ unsigned int queue_num; + /** + * @nr_active: Number of active requests. Only used when a tag set is + * shared across request queues. + */ atomic_t nr_active; + /** @cpuhp_dead: List to store request if some CPU die. */ struct hlist_node cpuhp_dead; + /** @kobj: Kernel object for sysfs. */ struct kobject kobj; + /** @poll_considered: Count times blk_poll() was called. */ unsigned long poll_considered; + /** @poll_invoked: Count how many requests blk_poll() polled. */ unsigned long poll_invoked; + /** @poll_success: Count how many polled requests were completed. */ unsigned long poll_success; #ifdef CONFIG_BLK_DEBUG_FS + /** + * @debugfs_dir: debugfs directory for this hardware queue. Named + * as cpu<cpu_number>. + */ struct dentry *debugfs_dir; + /** @sched_debugfs_dir: debugfs directory for the scheduler. */ struct dentry *sched_debugfs_dir; #endif + /** @hctx_list: List of all hardware queues. */ struct list_head hctx_list; - /* Must be the last member - see also blk_mq_hw_ctx_size(). */ + /** + * @srcu: Sleepable RCU. Use as lock when type of the hardware queue is + * blocking (BLK_MQ_F_BLOCKING). Must be the last member - see also + * blk_mq_hw_ctx_size(). + */ struct srcu_struct srcu[0]; }; +/** + * struct blk_mq_queue_map - Map software queues to hardware queues + * @mq_map: CPU ID to hardware queue index map. This is an array + * with nr_cpu_ids elements. Each element has a value in the range + * [@queue_offset, @queue_offset + @nr_queues). + * @nr_queues: Number of hardware queues to map CPU IDs onto. + * @queue_offset: First hardware queue to map onto. Used by the PCIe NVMe + * driver to map each hardware queue type (enum hctx_type) onto a distinct + * set of hardware queues. + */ struct blk_mq_queue_map { unsigned int *mq_map; unsigned int nr_queues; unsigned int queue_offset; }; +/** + * enum hctx_type - Type of hardware queue + * @HCTX_TYPE_DEFAULT: All I/O not otherwise accounted for. + * @HCTX_TYPE_READ: Just for READ I/O. + * @HCTX_TYPE_POLL: Polled I/O of any kind. + * @HCTX_MAX_TYPES: Number of types of hctx. + */ enum hctx_type { - HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */ - HCTX_TYPE_READ, /* just for READ I/O */ - HCTX_TYPE_POLL, /* polled I/O of any kind */ + HCTX_TYPE_DEFAULT, + HCTX_TYPE_READ, + HCTX_TYPE_POLL, HCTX_MAX_TYPES, }; +/** + * struct blk_mq_tag_set - tag set that can be shared between request queues + * @map: One or more ctx -> hctx mappings. One map exists for each + * hardware queue type (enum hctx_type) that the driver wishes + * to support. There are no restrictions on maps being of the + * same size, and it's perfectly legal to share maps between + * types. + * @nr_maps: Number of elements in the @map array. A number in the range + * [1, HCTX_MAX_TYPES]. + * @ops: Pointers to functions that implement block driver behavior. + * @nr_hw_queues: Number of hardware queues supported by the block driver that + * owns this data structure. + * @queue_depth: Number of tags per hardware queue, reserved tags included. + * @reserved_tags: Number of tags to set aside for BLK_MQ_REQ_RESERVED tag + * allocations. + * @cmd_size: Number of additional bytes to allocate per request. The block + * driver owns these additional bytes. + * @numa_node: NUMA node the storage adapter has been connected to. + * @timeout: Request processing timeout in jiffies. + * @flags: Zero or more BLK_MQ_F_* flags. + * @driver_data: Pointer to data owned by the block driver that created this + * tag set. + * @tags: Tag sets. One tag set per hardware queue. Has @nr_hw_queues + * elements. + * @tag_list_lock: Serializes tag_list accesses. + * @tag_list: List of the request queues that use this tag set. See also + * request_queue.tag_set_list. + */ struct blk_mq_tag_set { - /* - * map[] holds ctx -> hctx mappings, one map exists for each type - * that the driver wishes to support. There are no restrictions - * on maps being of the same size, and it's perfectly legal to - * share maps between types. - */ struct blk_mq_queue_map map[HCTX_MAX_TYPES]; - unsigned int nr_maps; /* nr entries in map[] */ + unsigned int nr_maps; const struct blk_mq_ops *ops; - unsigned int nr_hw_queues; /* nr hw queues across maps */ - unsigned int queue_depth; /* max hw supported */ + unsigned int nr_hw_queues; + unsigned int queue_depth; unsigned int reserved_tags; - unsigned int cmd_size; /* per-request extra data */ + unsigned int cmd_size; int numa_node; unsigned int timeout; - unsigned int flags; /* BLK_MQ_F_* */ + unsigned int flags; void *driver_data; struct blk_mq_tags **tags; @@ -115,6 +251,12 @@ struct blk_mq_tag_set { struct list_head tag_list; }; +/** + * struct blk_mq_queue_data - Data about a request inserted in a queue + * + * @rq: Request pointer. + * @last: If it is the last request in the queue. + */ struct blk_mq_queue_data { struct request *rq; bool last; @@ -142,81 +284,101 @@ typedef bool (busy_fn)(struct request_queue *); typedef void (complete_fn)(struct request *); typedef void (cleanup_rq_fn)(struct request *); - +/** + * struct blk_mq_ops - Callback functions that implements block driver + * behaviour. + */ struct blk_mq_ops { - /* - * Queue request + /** + * @queue_rq: Queue a new request from block IO. */ queue_rq_fn *queue_rq; - /* - * If a driver uses bd->last to judge when to submit requests to - * hardware, it must define this function. In case of errors that - * make us stop issuing further requests, this hook serves the + /** + * @commit_rqs: If a driver uses bd->last to judge when to submit + * requests to hardware, it must define this function. In case of errors + * that make us stop issuing further requests, this hook serves the * purpose of kicking the hardware (which the last request otherwise * would have done). */ commit_rqs_fn *commit_rqs; - /* - * Reserve budget before queue request, once .queue_rq is + /** + * @get_budget: Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the * reserved budget. Also we have to handle failure case * of .get_budget for avoiding I/O deadlock. */ get_budget_fn *get_budget; + /** + * @put_budget: Release the reserved budget. + */ put_budget_fn *put_budget; - /* - * Called on request timeout + /** + * @timeout: Called on request timeout. */ timeout_fn *timeout; - /* - * Called to poll for completion of a specific tag. + /** + * @poll: Called to poll for completion of a specific tag. */ poll_fn *poll; + /** + * @complete: Mark the request as complete. + */ complete_fn *complete; - /* - * Called when the block layer side of a hardware queue has been - * set up, allowing the driver to allocate/init matching structures. - * Ditto for exit/teardown. + /** + * @init_hctx: Called when the block layer side of a hardware queue has + * been set up, allowing the driver to allocate/init matching + * structures. */ init_hctx_fn *init_hctx; + /** + * @exit_hctx: Ditto for exit/teardown. + */ exit_hctx_fn *exit_hctx; - /* - * Called for every command allocated by the block layer to allow - * the driver to set up driver specific data. + /** + * @init_request: Called for every command allocated by the block layer + * to allow the driver to set up driver specific data. * * Tag greater than or equal to queue_depth is for setting up * flush request. - * - * Ditto for exit/teardown. */ init_request_fn *init_request; + /** + * @exit_request: Ditto for exit/teardown. + */ exit_request_fn *exit_request; - /* Called from inside blk_get_request() */ + + /** + * @initialize_rq_fn: Called from inside blk_get_request(). + */ void (*initialize_rq_fn)(struct request *rq); - /* - * Called before freeing one request which isn't completed yet, - * and usually for freeing the driver private data + /** + * @cleanup_rq: Called before freeing one request which isn't completed + * yet, and usually for freeing the driver private data. */ cleanup_rq_fn *cleanup_rq; - /* - * If set, returns whether or not this queue currently is busy + /** + * @busy: If set, returns whether or not this queue currently is busy. */ busy_fn *busy; + /** + * @map_queues: This allows drivers specify their own queue mapping by + * overriding the setup-time function that builds the mq_map. + */ map_queues_fn *map_queues; #ifdef CONFIG_BLK_DEBUG_FS - /* - * Used by the debugfs implementation to show driver-specific + /** + * @show_rq: Used by the debugfs implementation to show driver-specific * information about a request. */ void (*show_rq)(struct seq_file *m, struct request *rq); @@ -301,9 +463,25 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) return unique_tag & BLK_MQ_UNIQUE_TAG_MASK; } +/** + * blk_mq_rq_state() - read the current MQ_RQ_* state of a request + * @rq: target request. + */ +static inline enum mq_rq_state blk_mq_rq_state(struct request *rq) +{ + return READ_ONCE(rq->state); +} + +static inline int blk_mq_request_started(struct request *rq) +{ + return blk_mq_rq_state(rq) != MQ_RQ_IDLE; +} + +static inline int blk_mq_request_completed(struct request *rq) +{ + return blk_mq_rq_state(rq) == MQ_RQ_COMPLETE; +} -int blk_mq_request_started(struct request *rq); -int blk_mq_request_completed(struct request *rq); void blk_mq_start_request(struct request *rq); void blk_mq_end_request(struct request *rq, blk_status_t error); void __blk_mq_end_request(struct request *rq, blk_status_t error); @@ -324,7 +502,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); +void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); @@ -343,14 +521,29 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q); unsigned int blk_mq_rq_cpu(struct request *rq); -/* +/** + * blk_mq_rq_from_pdu - cast a PDU to a request + * @pdu: the PDU (Protocol Data Unit) to be casted + * + * Return: request + * * Driver command data is immediately after the request. So subtract request - * size to get back to the original request, add request size to get the PDU. + * size to get back to the original request. */ static inline struct request *blk_mq_rq_from_pdu(void *pdu) { return pdu - sizeof(struct request); } + +/** + * blk_mq_rq_to_pdu - cast a request to a PDU + * @rq: the request to be casted + * + * Return: pointer to the PDU + * + * Driver command data is immediately after the request. So add request to get + * the PDU. + */ static inline void *blk_mq_rq_to_pdu(struct request *rq) { return rq + 1; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d688b96d1d63..23a2fd534817 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -153,10 +153,10 @@ struct bio { unsigned short bi_write_hint; blk_status_t bi_status; u8 bi_partno; + atomic_t __bi_remaining; struct bvec_iter bi_iter; - atomic_t __bi_remaining; bio_end_io_t *bi_end_io; void *bi_private; @@ -290,6 +290,12 @@ enum req_opf { REQ_OP_ZONE_RESET_ALL = 8, /* write the zero filled sector many times */ REQ_OP_WRITE_ZEROES = 9, + /* Open a zone */ + REQ_OP_ZONE_OPEN = 10, + /* Close a zone */ + REQ_OP_ZONE_CLOSE = 11, + /* Transition a zone to full */ + REQ_OP_ZONE_FINISH = 12, /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, @@ -417,6 +423,25 @@ static inline bool op_is_discard(unsigned int op) return (op & REQ_OP_MASK) == REQ_OP_DISCARD; } +/* + * Check if a bio or request operation is a zone management operation, with + * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case + * due to its different handling in the block layer and device response in + * case of command failure. + */ +static inline bool op_is_zone_mgmt(enum req_opf op) +{ + switch (op & REQ_OP_MASK) { + case REQ_OP_ZONE_RESET: + case REQ_OP_ZONE_OPEN: + case REQ_OP_ZONE_CLOSE: + case REQ_OP_ZONE_FINISH: + return true; + default: + return false; + } +} + static inline int op_stat_group(unsigned int op) { if (op_is_discard(op)) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3ea78b0c91c..6a4f7abbdcf7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -360,14 +360,15 @@ extern unsigned int blkdev_nr_zones(struct block_device *bdev); extern int blkdev_report_zones(struct block_device *bdev, sector_t sector, struct blk_zone *zones, unsigned int *nr_zones); -extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors, - sector_t nr_sectors, gfp_t gfp_mask); +extern int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op, + sector_t sectors, sector_t nr_sectors, + gfp_t gfp_mask); extern int blk_revalidate_disk_zones(struct gendisk *disk); extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd, unsigned long arg); -extern int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, - unsigned int cmd, unsigned long arg); +extern int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode, + unsigned int cmd, unsigned long arg); #else /* CONFIG_BLK_DEV_ZONED */ @@ -388,9 +389,9 @@ static inline int blkdev_report_zones_ioctl(struct block_device *bdev, return -ENOTTY; } -static inline int blkdev_reset_zones_ioctl(struct block_device *bdev, - fmode_t mode, unsigned int cmd, - unsigned long arg) +static inline int blkdev_zone_mgmt_ioctl(struct block_device *bdev, + fmode_t mode, unsigned int cmd, + unsigned long arg) { return -ENOTTY; } @@ -411,7 +412,6 @@ struct request_queue { /* sw queues */ struct blk_mq_ctx __percpu *queue_ctx; - unsigned int nr_queues; unsigned int queue_depth; diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index 067c9fea64fe..e8c30b39bb27 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -4,33 +4,60 @@ */ /* - * This file contains definitions relative to FC-NVME r1.14 (16-020vB). - * The fcnvme_lsdesc_cr_assoc_cmd struct reflects expected r1.16 content. + * This file contains definitions relative to FC-NVME-2 r1.06 + * (T11-2019-00210-v001). */ #ifndef _NVME_FC_H #define _NVME_FC_H 1 +#include <uapi/scsi/fc/fc_fs.h> -#define NVME_CMD_SCSI_ID 0xFD +#define NVME_CMD_FORMAT_ID 0xFD #define NVME_CMD_FC_ID FC_TYPE_NVME /* FC-NVME Cmd IU Flags */ -#define FCNVME_CMD_FLAGS_DIRMASK 0x03 -#define FCNVME_CMD_FLAGS_WRITE 0x01 -#define FCNVME_CMD_FLAGS_READ 0x02 +enum { + FCNVME_CMD_FLAGS_DIRMASK = 0x03, + FCNVME_CMD_FLAGS_WRITE = (1 << 0), + FCNVME_CMD_FLAGS_READ = (1 << 1), + + FCNVME_CMD_FLAGS_PICWP = (1 << 2), +}; + +enum { + FCNVME_CMD_CAT_MASK = 0x0F, + FCNVME_CMD_CAT_ADMINQ = 0x01, + FCNVME_CMD_CAT_CSSMASK = 0x07, + FCNVME_CMD_CAT_CSSFLAG = 0x08, +}; + +static inline __u8 fccmnd_set_cat_admin(__u8 rsv_cat) +{ + return (rsv_cat & ~FCNVME_CMD_CAT_MASK) | FCNVME_CMD_CAT_ADMINQ; +} + +static inline __u8 fccmnd_set_cat_css(__u8 rsv_cat, __u8 css) +{ + return (rsv_cat & ~FCNVME_CMD_CAT_MASK) | FCNVME_CMD_CAT_CSSFLAG | + (css & FCNVME_CMD_CAT_CSSMASK); +} struct nvme_fc_cmd_iu { - __u8 scsi_id; + __u8 format_id; __u8 fc_id; __be16 iu_len; - __u8 rsvd4[3]; + __u8 rsvd4[2]; + __u8 rsv_cat; __u8 flags; __be64 connection_id; __be32 csn; __be32 data_len; struct nvme_command sqe; - __be32 rsvd88[2]; + __u8 dps; + __u8 lbads; + __be16 ms; + __be32 rsvd92; }; #define NVME_FC_SIZEOF_ZEROS_RSP 12 @@ -38,11 +65,12 @@ struct nvme_fc_cmd_iu { enum { FCNVME_SC_SUCCESS = 0, FCNVME_SC_INVALID_FIELD = 1, - FCNVME_SC_INVALID_CONNID = 2, + /* reserved 2 */ + FCNVME_SC_ILL_CONN_PARAMS = 3, }; struct nvme_fc_ersp_iu { - __u8 status_code; + __u8 ersp_result; __u8 rsvd1; __be16 iu_len; __be32 rsn; @@ -53,14 +81,44 @@ struct nvme_fc_ersp_iu { }; -/* FC-NVME Link Services */ +#define FCNVME_NVME_SR_OPCODE 0x01 + +struct nvme_fc_nvme_sr_iu { + __u8 fc_id; + __u8 opcode; + __u8 rsvd2; + __u8 retry_rctl; + __be32 rsvd4; +}; + + +enum { + FCNVME_SRSTAT_ACC = 0x0, + FCNVME_SRSTAT_INV_FCID = 0x1, + /* reserved 0x2 */ + FCNVME_SRSTAT_LOGICAL_ERR = 0x3, + FCNVME_SRSTAT_INV_QUALIF = 0x4, + FCNVME_SRSTAT_UNABL2PERFORM = 0x9, +}; + +struct nvme_fc_nvme_sr_rsp_iu { + __u8 fc_id; + __u8 opcode; + __u8 rsvd2; + __u8 status; + __be32 rsvd4; +}; + + +/* FC-NVME Link Services - LS cmd values (w0 bits 31:24) */ enum { FCNVME_LS_RSVD = 0, FCNVME_LS_RJT = 1, FCNVME_LS_ACC = 2, - FCNVME_LS_CREATE_ASSOCIATION = 3, - FCNVME_LS_CREATE_CONNECTION = 4, - FCNVME_LS_DISCONNECT = 5, + FCNVME_LS_CREATE_ASSOCIATION = 3, /* Create Association */ + FCNVME_LS_CREATE_CONNECTION = 4, /* Create I/O Connection */ + FCNVME_LS_DISCONNECT_ASSOC = 5, /* Disconnect Association */ + FCNVME_LS_DISCONNECT_CONN = 6, /* Disconnect Connection */ }; /* FC-NVME Link Service Descriptors */ @@ -117,14 +175,17 @@ enum fcnvme_ls_rjt_reason { FCNVME_RJT_RC_UNSUP = 0x0b, /* command not supported */ - FCNVME_RJT_RC_INPROG = 0x0e, - /* command already in progress */ - FCNVME_RJT_RC_INV_ASSOC = 0x40, - /* Invalid Association ID*/ + /* Invalid Association ID */ FCNVME_RJT_RC_INV_CONN = 0x41, - /* Invalid Connection ID*/ + /* Invalid Connection ID */ + + FCNVME_RJT_RC_INV_PARAM = 0x42, + /* Invalid Parameters */ + + FCNVME_RJT_RC_INSUF_RES = 0x43, + /* Insufficient Resources */ FCNVME_RJT_RC_VENDOR = 0xff, /* vendor specific error */ @@ -138,14 +199,32 @@ enum fcnvme_ls_rjt_explan { FCNVME_RJT_EXP_OXID_RXID = 0x17, /* invalid OX_ID-RX_ID combination */ - FCNVME_RJT_EXP_INSUF_RES = 0x29, - /* insufficient resources */ - FCNVME_RJT_EXP_UNAB_DATA = 0x2a, /* unable to supply requested data */ FCNVME_RJT_EXP_INV_LEN = 0x2d, /* Invalid payload length */ + + FCNVME_RJT_EXP_INV_ERSP_RAT = 0x40, + /* Invalid NVMe_ERSP Ratio */ + + FCNVME_RJT_EXP_INV_CTLR_ID = 0x41, + /* Invalid Controller ID */ + + FCNVME_RJT_EXP_INV_QUEUE_ID = 0x42, + /* Invalid Queue ID */ + + FCNVME_RJT_EXP_INV_SQSIZE = 0x43, + /* Invalid Submission Queue Size */ + + FCNVME_RJT_EXP_INV_HOSTID = 0x44, + /* Invalid HOST ID */ + + FCNVME_RJT_EXP_INV_HOSTNQN = 0x45, + /* Invalid HOSTNQN */ + + FCNVME_RJT_EXP_INV_SUBNQN = 0x46, + /* Invalid SUBNQN */ }; /* FCNVME_LSDESC_RJT */ @@ -209,21 +288,11 @@ struct fcnvme_lsdesc_cr_conn_cmd { __be32 rsvd52; }; -/* Disconnect Scope Values */ -enum { - FCNVME_DISCONN_ASSOCIATION = 0, - FCNVME_DISCONN_CONNECTION = 1, -}; - /* FCNVME_LSDESC_DISCONN_CMD */ struct fcnvme_lsdesc_disconn_cmd { __be32 desc_tag; /* FCNVME_LSDESC_xxx */ __be32 desc_len; - u8 rsvd8[3]; - /* note: scope is really a 1 bit field */ - u8 scope; /* FCNVME_DISCONN_xxx */ - __be32 rsvd12; - __be64 id; + __be32 rsvd8[4]; }; /* FCNVME_LSDESC_CONN_ID */ @@ -242,9 +311,14 @@ struct fcnvme_lsdesc_assoc_id { /* r_ctl values */ enum { - FCNVME_RS_RCTL_DATA = 1, - FCNVME_RS_RCTL_XFER_RDY = 5, - FCNVME_RS_RCTL_RSP = 8, + FCNVME_RS_RCTL_CMND = 0x6, + FCNVME_RS_RCTL_DATA = 0x1, + FCNVME_RS_RCTL_CONF = 0x3, + FCNVME_RS_RCTL_SR = 0x9, + FCNVME_RS_RCTL_XFER_RDY = 0x5, + FCNVME_RS_RCTL_RSP = 0x7, + FCNVME_RS_RCTL_ERSP = 0x8, + FCNVME_RS_RCTL_SR_RSP = 0xA, }; @@ -264,7 +338,10 @@ struct fcnvme_ls_acc_hdr { struct fcnvme_ls_rqst_w0 w0; __be32 desc_list_len; struct fcnvme_lsdesc_rqst rqst; - /* Followed by cmd-specific ACC descriptors, see next definitions */ + /* + * Followed by cmd-specific ACCEPT descriptors, see xxx_acc + * definitions below + */ }; /* FCNVME_LS_CREATE_ASSOCIATION */ @@ -302,25 +379,39 @@ struct fcnvme_ls_cr_conn_acc { struct fcnvme_lsdesc_conn_id connectid; }; -/* FCNVME_LS_DISCONNECT */ -struct fcnvme_ls_disconnect_rqst { +/* FCNVME_LS_DISCONNECT_ASSOC */ +struct fcnvme_ls_disconnect_assoc_rqst { struct fcnvme_ls_rqst_w0 w0; __be32 desc_list_len; struct fcnvme_lsdesc_assoc_id associd; struct fcnvme_lsdesc_disconn_cmd discon_cmd; }; -struct fcnvme_ls_disconnect_acc { +struct fcnvme_ls_disconnect_assoc_acc { + struct fcnvme_ls_acc_hdr hdr; +}; + + +/* FCNVME_LS_DISCONNECT_CONN */ +struct fcnvme_ls_disconnect_conn_rqst { + struct fcnvme_ls_rqst_w0 w0; + __be32 desc_list_len; + struct fcnvme_lsdesc_assoc_id associd; + struct fcnvme_lsdesc_disconn_cmd connectid; +}; + +struct fcnvme_ls_disconnect_conn_acc { struct fcnvme_ls_acc_hdr hdr; }; /* - * Yet to be defined in FC-NVME: + * Default R_A_TOV is pulled in from fc_fs.h but needs conversion + * from ms to seconds for our use. */ -#define NVME_FC_CONNECT_TIMEOUT_SEC 2 /* 2 seconds */ -#define NVME_FC_LS_TIMEOUT_SEC 2 /* 2 seconds */ -#define NVME_FC_TGTOP_TIMEOUT_SEC 2 /* 2 seconds */ +#define FC_TWO_TIMES_R_A_TOV (2 * (FC_DEF_R_A_TOV / 1000)) +#define NVME_FC_LS_TIMEOUT_SEC FC_TWO_TIMES_R_A_TOV +#define NVME_FC_TGTOP_TIMEOUT_SEC FC_TWO_TIMES_R_A_TOV /* * TRADDR string must be of form "nn-<16hexdigits>:pn-<16hexdigits>" @@ -328,6 +419,7 @@ struct fcnvme_ls_disconnect_acc { * infront of the <16hexdigits>. Without is considered the "min" string * and with is considered the "max" string. The hexdigits may be upper * or lower case. + * Note: FC-NVME-2 standard requires a "0x" prefix. */ #define NVME_FC_TRADDR_NNLEN 3 /* "?n-" */ #define NVME_FC_TRADDR_OXNNLEN 5 /* "?n-0x" */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f61d6906e59d..3eca4f7d8510 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -107,8 +107,22 @@ enum { NVME_REG_AQA = 0x0024, /* Admin Queue Attributes */ NVME_REG_ASQ = 0x0028, /* Admin SQ Base Address */ NVME_REG_ACQ = 0x0030, /* Admin CQ Base Address */ - NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ + NVME_REG_CMBLOC = 0x0038, /* Controller Memory Buffer Location */ NVME_REG_CMBSZ = 0x003c, /* Controller Memory Buffer Size */ + NVME_REG_BPINFO = 0x0040, /* Boot Partition Information */ + NVME_REG_BPRSEL = 0x0044, /* Boot Partition Read Select */ + NVME_REG_BPMBL = 0x0048, /* Boot Partition Memory Buffer + * Location + */ + NVME_REG_PMRCAP = 0x0e00, /* Persistent Memory Capabilities */ + NVME_REG_PMRCTL = 0x0e04, /* Persistent Memory Region Control */ + NVME_REG_PMRSTS = 0x0e08, /* Persistent Memory Region Status */ + NVME_REG_PMREBS = 0x0e0c, /* Persistent Memory Region Elasticity + * Buffer Size + */ + NVME_REG_PMRSWTP = 0x0e10, /* Persistent Memory Region Sustained + * Write Throughput + */ NVME_REG_DBS = 0x1000, /* SQ 0 Tail Doorbell */ }; @@ -295,6 +309,14 @@ enum { NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, + NVME_CTRL_CTRATT_128_ID = 1 << 0, + NVME_CTRL_CTRATT_NON_OP_PSP = 1 << 1, + NVME_CTRL_CTRATT_NVM_SETS = 1 << 2, + NVME_CTRL_CTRATT_READ_RECV_LVLS = 1 << 3, + NVME_CTRL_CTRATT_ENDURANCE_GROUPS = 1 << 4, + NVME_CTRL_CTRATT_PREDICTABLE_LAT = 1 << 5, + NVME_CTRL_CTRATT_NAMESPACE_GRANULARITY = 1 << 7, + NVME_CTRL_CTRATT_UUID_LIST = 1 << 9, }; struct nvme_lbaf { @@ -352,6 +374,9 @@ enum { NVME_ID_CNS_NS_PRESENT = 0x11, NVME_ID_CNS_CTRL_NS_LIST = 0x12, NVME_ID_CNS_CTRL_LIST = 0x13, + NVME_ID_CNS_SCNDRY_CTRL_LIST = 0x15, + NVME_ID_CNS_NS_GRANULARITY = 0x16, + NVME_ID_CNS_UUID_LIST = 0x17, }; enum { @@ -409,7 +434,8 @@ struct nvme_smart_log { __u8 avail_spare; __u8 spare_thresh; __u8 percent_used; - __u8 rsvd6[26]; + __u8 endu_grp_crit_warn_sumry; + __u8 rsvd7[25]; __u8 data_units_read[16]; __u8 data_units_written[16]; __u8 host_reads[16]; @@ -423,7 +449,11 @@ struct nvme_smart_log { __le32 warning_temp_time; __le32 critical_comp_time; __le16 temp_sensor[8]; - __u8 rsvd216[296]; + __le32 thm_temp1_trans_count; + __le32 thm_temp2_trans_count; + __le32 thm_temp1_total_time; + __le32 thm_temp2_total_time; + __u8 rsvd232[280]; }; struct nvme_fw_slot_info_log { @@ -440,6 +470,7 @@ enum { NVME_CMD_EFFECTS_NIC = 1 << 3, NVME_CMD_EFFECTS_CCC = 1 << 4, NVME_CMD_EFFECTS_CSE_MASK = 3 << 16, + NVME_CMD_EFFECTS_UUID_SEL = 1 << 19, }; struct nvme_effects_log { @@ -563,6 +594,7 @@ enum nvme_opcode { nvme_cmd_compare = 0x05, nvme_cmd_write_zeroes = 0x08, nvme_cmd_dsm = 0x09, + nvme_cmd_verify = 0x0c, nvme_cmd_resv_register = 0x0d, nvme_cmd_resv_report = 0x0e, nvme_cmd_resv_acquire = 0x11, @@ -806,10 +838,14 @@ enum nvme_admin_opcode { nvme_admin_ns_mgmt = 0x0d, nvme_admin_activate_fw = 0x10, nvme_admin_download_fw = 0x11, + nvme_admin_dev_self_test = 0x14, nvme_admin_ns_attach = 0x15, nvme_admin_keep_alive = 0x18, nvme_admin_directive_send = 0x19, nvme_admin_directive_recv = 0x1a, + nvme_admin_virtual_mgmt = 0x1c, + nvme_admin_nvme_mi_send = 0x1d, + nvme_admin_nvme_mi_recv = 0x1e, nvme_admin_dbbuf = 0x7C, nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, @@ -873,6 +909,7 @@ enum { NVME_FEAT_PLM_CONFIG = 0x13, NVME_FEAT_PLM_WINDOW = 0x14, NVME_FEAT_HOST_BEHAVIOR = 0x16, + NVME_FEAT_SANITIZE = 0x17, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, @@ -883,6 +920,10 @@ enum { NVME_LOG_FW_SLOT = 0x03, NVME_LOG_CHANGED_NS = 0x04, NVME_LOG_CMD_EFFECTS = 0x05, + NVME_LOG_DEVICE_SELF_TEST = 0x06, + NVME_LOG_TELEMETRY_HOST = 0x07, + NVME_LOG_TELEMETRY_CTRL = 0x08, + NVME_LOG_ENDURANCE_GROUP = 0x09, NVME_LOG_ANA = 0x0c, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, @@ -1290,7 +1331,11 @@ enum { NVME_SC_SGL_INVALID_OFFSET = 0x16, NVME_SC_SGL_INVALID_SUBTYPE = 0x17, + NVME_SC_SANITIZE_FAILED = 0x1C, + NVME_SC_SANITIZE_IN_PROGRESS = 0x1D, + NVME_SC_NS_WRITE_PROTECTED = 0x20, + NVME_SC_CMD_INTERRUPTED = 0x21, NVME_SC_LBA_RANGE = 0x80, NVME_SC_CAP_EXCEEDED = 0x81, @@ -1328,6 +1373,8 @@ enum { NVME_SC_NS_NOT_ATTACHED = 0x11a, NVME_SC_THIN_PROV_NOT_SUPP = 0x11b, NVME_SC_CTRL_LIST_INVALID = 0x11c, + NVME_SC_BP_WRITE_PROHIBITED = 0x11e, + NVME_SC_PMR_SAN_PROHIBITED = 0x123, /* * I/O Command Set Specific - NVM commands: @@ -1368,6 +1415,7 @@ enum { NVME_SC_ANA_INACCESSIBLE = 0x302, NVME_SC_ANA_TRANSITION = 0x303, NVME_SC_HOST_PATH_ERROR = 0x370, + NVME_SC_HOST_ABORTED_CMD = 0x371, NVME_SC_CRD = 0x1800, NVME_SC_DNR = 0x4000, diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 53c28d750a45..1ac0d712a9c3 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -42,6 +42,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_PSID_REVERT_TPR: case IOC_OPAL_MBR_DONE: case IOC_OPAL_WRITE_SHADOW_MBR: + case IOC_OPAL_GENERIC_TABLE_RW: return true; } return false; diff --git a/include/scsi/scsi_eh.h b/include/scsi/scsi_eh.h index 3810b340551c..6bd5ed695a5e 100644 --- a/include/scsi/scsi_eh.h +++ b/include/scsi/scsi_eh.h @@ -32,6 +32,7 @@ extern int scsi_ioctl_reset(struct scsi_device *, int __user *); struct scsi_eh_save { /* saved state */ int result; + unsigned int resid_len; int eh_eflags; enum dma_data_direction data_direction; unsigned underflow; diff --git a/include/trace/events/wbt.h b/include/trace/events/wbt.h index b048694070e2..37342a13c9cb 100644 --- a/include/trace/events/wbt.h +++ b/include/trace/events/wbt.h @@ -33,7 +33,8 @@ TRACE_EVENT(wbt_stat, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->rmean = stat[0].mean; __entry->rmin = stat[0].min; __entry->rmax = stat[0].max; @@ -67,7 +68,8 @@ TRACE_EVENT(wbt_lat, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->lat = div_u64(lat, 1000); ), @@ -103,7 +105,8 @@ TRACE_EVENT(wbt_step, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->msg = msg; __entry->step = step; __entry->window = div_u64(window, 1000); @@ -138,7 +141,8 @@ TRACE_EVENT(wbt_timer, ), TP_fast_assign( - strncpy(__entry->name, dev_name(bdi->dev), 32); + strlcpy(__entry->name, dev_name(bdi->dev), + ARRAY_SIZE(__entry->name)); __entry->status = status; __entry->step = step; __entry->inflight = inflight; diff --git a/include/uapi/linux/blkzoned.h b/include/uapi/linux/blkzoned.h index 498eec813494..0cdef67135f0 100644 --- a/include/uapi/linux/blkzoned.h +++ b/include/uapi/linux/blkzoned.h @@ -120,9 +120,11 @@ struct blk_zone_report { }; /** - * struct blk_zone_range - BLKRESETZONE ioctl request - * @sector: starting sector of the first zone to issue reset write pointer - * @nr_sectors: Total number of sectors of 1 or more zones to reset + * struct blk_zone_range - BLKRESETZONE/BLKOPENZONE/ + * BLKCLOSEZONE/BLKFINISHZONE ioctl + * requests + * @sector: Starting sector of the first zone to operate on. + * @nr_sectors: Total number of sectors of all zones to operate on. */ struct blk_zone_range { __u64 sector; @@ -139,10 +141,19 @@ struct blk_zone_range { * sector range. The sector range must be zone aligned. * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors. * @BLKGETNRZONES: Get the total number of zones of the device. + * @BLKOPENZONE: Open the zones in the specified sector range. + * The 512 B sector range must be zone aligned. + * @BLKCLOSEZONE: Close the zones in the specified sector range. + * The 512 B sector range must be zone aligned. + * @BLKFINISHZONE: Mark the zones as full in the specified sector range. + * The 512 B sector range must be zone aligned. */ #define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report) #define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range) #define BLKGETZONESZ _IOR(0x12, 132, __u32) #define BLKGETNRZONES _IOR(0x12, 133, __u32) +#define BLKOPENZONE _IOW(0x12, 134, struct blk_zone_range) +#define BLKCLOSEZONE _IOW(0x12, 135, struct blk_zone_range) +#define BLKFINISHZONE _IOW(0x12, 136, struct blk_zone_range) #endif /* _UAPI_BLKZONED_H */ diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index 1d338357df8a..1f97b33c840e 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -58,7 +58,7 @@ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be * used to clear any hints previously set. */ -#define RWF_WRITE_LIFE_NOT_SET 0 +#define RWH_WRITE_LIFE_NOT_SET 0 #define RWH_WRITE_LIFE_NONE 1 #define RWH_WRITE_LIFE_SHORT 2 #define RWH_WRITE_LIFE_MEDIUM 3 @@ -66,6 +66,13 @@ #define RWH_WRITE_LIFE_EXTREME 5 /* + * The originally introduced spelling is remained from the first + * versions of the patch set that introduced the feature, see commit + * v4.13-rc1~212^2~51. + */ +#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET + +/* * Types of directory notifications that may be requested. */ #define DN_ACCESS 0x00000001 /* File accessed */ diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index c6d035fa1b6c..6f5af1a84213 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -113,6 +113,25 @@ struct opal_shadow_mbr { __u64 size; }; +/* Opal table operations */ +enum opal_table_ops { + OPAL_READ_TABLE, + OPAL_WRITE_TABLE, +}; + +#define OPAL_UID_LENGTH 8 +struct opal_read_write_table { + struct opal_key key; + const __u64 data; + const __u8 table_uid[OPAL_UID_LENGTH]; + __u64 offset; + __u64 size; +#define OPAL_TABLE_READ (1 << OPAL_READ_TABLE) +#define OPAL_TABLE_WRITE (1 << OPAL_WRITE_TABLE) + __u64 flags; + __u64 priv; +}; + #define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock) #define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock) #define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key) @@ -128,5 +147,6 @@ struct opal_shadow_mbr { #define IOC_OPAL_PSID_REVERT_TPR _IOW('p', 232, struct opal_key) #define IOC_OPAL_MBR_DONE _IOW('p', 233, struct opal_mbr_done) #define IOC_OPAL_WRITE_SHADOW_MBR _IOW('p', 234, struct opal_shadow_mbr) +#define IOC_OPAL_GENERIC_TABLE_RW _IOW('p', 235, struct opal_read_write_table) #endif /* _UAPI_SED_OPAL_H */ diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index 1d338357df8a..1f97b33c840e 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -58,7 +58,7 @@ * Valid hint values for F_{GET,SET}_RW_HINT. 0 is "not set", or can be * used to clear any hints previously set. */ -#define RWF_WRITE_LIFE_NOT_SET 0 +#define RWH_WRITE_LIFE_NOT_SET 0 #define RWH_WRITE_LIFE_NONE 1 #define RWH_WRITE_LIFE_SHORT 2 #define RWH_WRITE_LIFE_MEDIUM 3 @@ -66,6 +66,13 @@ #define RWH_WRITE_LIFE_EXTREME 5 /* + * The originally introduced spelling is remained from the first + * versions of the patch set that introduced the feature, see commit + * v4.13-rc1~212^2~51. + */ +#define RWF_WRITE_LIFE_NOT_SET RWH_WRITE_LIFE_NOT_SET + +/* * Types of directory notifications that may be requested. */ #define DN_ACCESS 0x00000001 /* File accessed */ |