diff options
Diffstat (limited to 'drivers')
| -rw-r--r-- | drivers/nvme/host/core.c | 132 | ||||
| -rw-r--r-- | drivers/nvme/host/nvme.h | 2 | ||||
| -rw-r--r-- | drivers/nvme/host/pci.c | 20 | ||||
| -rw-r--r-- | drivers/nvme/host/rdma.c | 19 | ||||
| -rw-r--r-- | drivers/nvme/host/tcp.c | 4 | ||||
| -rw-r--r-- | drivers/nvme/target/io-cmd-bdev.c | 16 | ||||
| -rw-r--r-- | drivers/nvme/target/tcp.c | 12 | 
7 files changed, 154 insertions, 51 deletions
| diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 108f60b46804..fd7dea36c3b6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -102,10 +102,13 @@ static void nvme_set_queue_dying(struct nvme_ns *ns)  	 */  	if (!ns->disk || test_and_set_bit(NVME_NS_DEAD, &ns->flags))  		return; -	revalidate_disk(ns->disk);  	blk_set_queue_dying(ns->queue);  	/* Forcibly unquiesce queues to avoid blocking dispatch */  	blk_mq_unquiesce_queue(ns->queue); +	/* +	 * Revalidate after unblocking dispatchers that may be holding bd_butex +	 */ +	revalidate_disk(ns->disk);  }  static void nvme_queue_scan(struct nvme_ctrl *ctrl) @@ -847,7 +850,7 @@ out:  static int nvme_submit_user_cmd(struct request_queue *q,  		struct nvme_command *cmd, void __user *ubuffer,  		unsigned bufflen, void __user *meta_buffer, unsigned meta_len, -		u32 meta_seed, u32 *result, unsigned timeout) +		u32 meta_seed, u64 *result, unsigned timeout)  {  	bool write = nvme_is_write(cmd);  	struct nvme_ns *ns = q->queuedata; @@ -888,7 +891,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,  	else  		ret = nvme_req(req)->status;  	if (result) -		*result = le32_to_cpu(nvme_req(req)->result.u32); +		*result = le64_to_cpu(nvme_req(req)->result.u64);  	if (meta && !ret && !write) {  		if (copy_to_user(meta_buffer, meta, meta_len))  			ret = -EFAULT; @@ -1335,6 +1338,54 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,  	struct nvme_command c;  	unsigned timeout = 0;  	u32 effects; +	u64 result; +	int status; + +	if (!capable(CAP_SYS_ADMIN)) +		return -EACCES; +	if (copy_from_user(&cmd, ucmd, sizeof(cmd))) +		return -EFAULT; +	if (cmd.flags) +		return -EINVAL; + +	memset(&c, 0, sizeof(c)); +	c.common.opcode = cmd.opcode; +	c.common.flags = cmd.flags; +	c.common.nsid = cpu_to_le32(cmd.nsid); +	c.common.cdw2[0] = cpu_to_le32(cmd.cdw2); +	c.common.cdw2[1] = cpu_to_le32(cmd.cdw3); +	c.common.cdw10 = cpu_to_le32(cmd.cdw10); +	c.common.cdw11 = cpu_to_le32(cmd.cdw11); +	c.common.cdw12 = cpu_to_le32(cmd.cdw12); +	c.common.cdw13 = cpu_to_le32(cmd.cdw13); +	c.common.cdw14 = cpu_to_le32(cmd.cdw14); +	c.common.cdw15 = cpu_to_le32(cmd.cdw15); + +	if (cmd.timeout_ms) +		timeout = msecs_to_jiffies(cmd.timeout_ms); + +	effects = nvme_passthru_start(ctrl, ns, cmd.opcode); +	status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, +			(void __user *)(uintptr_t)cmd.addr, cmd.data_len, +			(void __user *)(uintptr_t)cmd.metadata, +			cmd.metadata_len, 0, &result, timeout); +	nvme_passthru_end(ctrl, effects); + +	if (status >= 0) { +		if (put_user(result, &ucmd->result)) +			return -EFAULT; +	} + +	return status; +} + +static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, +			struct nvme_passthru_cmd64 __user *ucmd) +{ +	struct nvme_passthru_cmd64 cmd; +	struct nvme_command c; +	unsigned timeout = 0; +	u32 effects;  	int status;  	if (!capable(CAP_SYS_ADMIN)) @@ -1405,6 +1456,41 @@ static void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx)  		srcu_read_unlock(&head->srcu, idx);  } +static bool is_ctrl_ioctl(unsigned int cmd) +{ +	if (cmd == NVME_IOCTL_ADMIN_CMD || cmd == NVME_IOCTL_ADMIN64_CMD) +		return true; +	if (is_sed_ioctl(cmd)) +		return true; +	return false; +} + +static int nvme_handle_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd, +				  void __user *argp, +				  struct nvme_ns_head *head, +				  int srcu_idx) +{ +	struct nvme_ctrl *ctrl = ns->ctrl; +	int ret; + +	nvme_get_ctrl(ns->ctrl); +	nvme_put_ns_from_disk(head, srcu_idx); + +	switch (cmd) { +	case NVME_IOCTL_ADMIN_CMD: +		ret = nvme_user_cmd(ctrl, NULL, argp); +		break; +	case NVME_IOCTL_ADMIN64_CMD: +		ret = nvme_user_cmd64(ctrl, NULL, argp); +		break; +	default: +		ret = sed_ioctl(ctrl->opal_dev, cmd, argp); +		break; +	} +	nvme_put_ctrl(ctrl); +	return ret; +} +  static int nvme_ioctl(struct block_device *bdev, fmode_t mode,  		unsigned int cmd, unsigned long arg)  { @@ -1422,20 +1508,8 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,  	 * seperately and drop the ns SRCU reference early.  This avoids a  	 * deadlock when deleting namespaces using the passthrough interface.  	 */ -	if (cmd == NVME_IOCTL_ADMIN_CMD || is_sed_ioctl(cmd)) { -		struct nvme_ctrl *ctrl = ns->ctrl; - -		nvme_get_ctrl(ns->ctrl); -		nvme_put_ns_from_disk(head, srcu_idx); - -		if (cmd == NVME_IOCTL_ADMIN_CMD) -			ret = nvme_user_cmd(ctrl, NULL, argp); -		else -			ret = sed_ioctl(ctrl->opal_dev, cmd, argp); - -		nvme_put_ctrl(ctrl); -		return ret; -	} +	if (is_ctrl_ioctl(cmd)) +		return nvme_handle_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);  	switch (cmd) {  	case NVME_IOCTL_ID: @@ -1448,6 +1522,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode,  	case NVME_IOCTL_SUBMIT_IO:  		ret = nvme_submit_io(ns, argp);  		break; +	case NVME_IOCTL_IO64_CMD: +		ret = nvme_user_cmd64(ns->ctrl, ns, argp); +		break;  	default:  		if (ns->ndev)  			ret = nvme_nvm_ioctl(ns, cmd, arg); @@ -2289,6 +2366,16 @@ static const struct nvme_core_quirk_entry core_quirks[] = {  		.vid = 0x14a4,  		.fr = "22301111",  		.quirks = NVME_QUIRK_SIMPLE_SUSPEND, +	}, +	{ +		/* +		 * This Kingston E8FK11.T firmware version has no interrupt +		 * after resume with actions related to suspend to idle +		 * https://bugzilla.kernel.org/show_bug.cgi?id=204887 +		 */ +		.vid = 0x2646, +		.fr = "E8FK11.T", +		.quirks = NVME_QUIRK_SIMPLE_SUSPEND,  	}  }; @@ -2540,8 +2627,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)  		list_add_tail(&subsys->entry, &nvme_subsystems);  	} -	if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, -			dev_name(ctrl->device))) { +	ret = sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, +				dev_name(ctrl->device)); +	if (ret) {  		dev_err(ctrl->device,  			"failed to create sysfs link from subsystem.\n");  		goto out_put_subsystem; @@ -2838,6 +2926,8 @@ static long nvme_dev_ioctl(struct file *file, unsigned int cmd,  	switch (cmd) {  	case NVME_IOCTL_ADMIN_CMD:  		return nvme_user_cmd(ctrl, NULL, argp); +	case NVME_IOCTL_ADMIN64_CMD: +		return nvme_user_cmd64(ctrl, NULL, argp);  	case NVME_IOCTL_IO_CMD:  		return nvme_dev_user_cmd(ctrl, argp);  	case NVME_IOCTL_RESET: @@ -3045,6 +3135,8 @@ static DEVICE_ATTR(field, S_IRUGO, field##_show, NULL);  nvme_show_int_function(cntlid);  nvme_show_int_function(numa_node); +nvme_show_int_function(queue_count); +nvme_show_int_function(sqsize);  static ssize_t nvme_sysfs_delete(struct device *dev,  				struct device_attribute *attr, const char *buf, @@ -3125,6 +3217,8 @@ static struct attribute *nvme_dev_attrs[] = {  	&dev_attr_address.attr,  	&dev_attr_state.attr,  	&dev_attr_numa_node.attr, +	&dev_attr_queue_count.attr, +	&dev_attr_sqsize.attr,  	NULL  }; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b5013c101b35..38a83ef5bcd3 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -221,6 +221,7 @@ struct nvme_ctrl {  	u16 oacs;  	u16 nssa;  	u16 nr_streams; +	u16 sqsize;  	u32 max_namespaces;  	atomic_t abort_limit;  	u8 vwc; @@ -269,7 +270,6 @@ struct nvme_ctrl {  	u16 hmmaxd;  	/* Fabrics only */ -	u16 sqsize;  	u32 ioccsz;  	u32 iorcsz;  	u16 icdoff; diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c0808f9eb8ab..bb88681f4dc3 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2946,11 +2946,21 @@ static int nvme_suspend(struct device *dev)  	if (ret < 0)  		goto unfreeze; +	/* +	 * A saved state prevents pci pm from generically controlling the +	 * device's power. If we're using protocol specific settings, we don't +	 * want pci interfering. +	 */ +	pci_save_state(pdev); +  	ret = nvme_set_power_state(ctrl, ctrl->npss);  	if (ret < 0)  		goto unfreeze;  	if (ret) { +		/* discard the saved state */ +		pci_load_saved_state(pdev, NULL); +  		/*  		 * Clearing npss forces a controller reset on resume. The  		 * correct value will be resdicovered then. @@ -2958,14 +2968,7 @@ static int nvme_suspend(struct device *dev)  		nvme_dev_disable(ndev, true);  		ctrl->npss = 0;  		ret = 0; -		goto unfreeze;  	} -	/* -	 * A saved state prevents pci pm from generically controlling the -	 * device's power. If we're using protocol specific settings, we don't -	 * want pci interfering. -	 */ -	pci_save_state(pdev);  unfreeze:  	nvme_unfreeze(ctrl);  	return ret; @@ -3090,6 +3093,9 @@ static const struct pci_device_id nvme_id_table[] = {  		.driver_data = NVME_QUIRK_LIGHTNVM, },  	{ PCI_DEVICE(0x10ec, 0x5762),   /* ADATA SX6000LNP */  		.driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, +	{ PCI_DEVICE(0x1cc1, 0x8201),   /* ADATA SX8200PNP 512GB */ +		.driver_data = NVME_QUIRK_NO_DEEPEST_PS | +				NVME_QUIRK_IGNORE_DEV_SUBNQN, },  	{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },  	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },  	{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) }, diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index dfa07bb9dfeb..4d280160dd3f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -427,7 +427,7 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)  static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)  {  	return min_t(u32, NVME_RDMA_MAX_SEGMENTS, -		     ibdev->attrs.max_fast_reg_page_list_len); +		     ibdev->attrs.max_fast_reg_page_list_len - 1);  }  static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) @@ -437,7 +437,7 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)  	const int cq_factor = send_wr_factor + 1;	/* + RECV */  	int comp_vector, idx = nvme_rdma_queue_idx(queue);  	enum ib_poll_context poll_ctx; -	int ret; +	int ret, pages_per_mr;  	queue->device = nvme_rdma_find_get_device(queue->cm_id);  	if (!queue->device) { @@ -479,10 +479,16 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)  		goto out_destroy_qp;  	} +	/* +	 * Currently we don't use SG_GAPS MR's so if the first entry is +	 * misaligned we'll end up using two entries for a single data page, +	 * so one additional entry is required. +	 */ +	pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1;  	ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,  			      queue->queue_size,  			      IB_MR_TYPE_MEM_REG, -			      nvme_rdma_get_max_fr_pages(ibdev), 0); +			      pages_per_mr, 0);  	if (ret) {  		dev_err(queue->ctrl->ctrl.device,  			"failed to initialize MR pool sized %d for QID %d\n", @@ -614,7 +620,8 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)  	if (!ret) {  		set_bit(NVME_RDMA_Q_LIVE, &queue->flags);  	} else { -		__nvme_rdma_stop_queue(queue); +		if (test_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags)) +			__nvme_rdma_stop_queue(queue);  		dev_info(ctrl->ctrl.device,  			"failed to connect queue: %d ret=%d\n", idx, ret);  	} @@ -820,8 +827,8 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,  	if (error)  		goto out_stop_queue; -	ctrl->ctrl.max_hw_sectors = -		(ctrl->max_fr_pages - 1) << (ilog2(SZ_4K) - 9); +	ctrl->ctrl.max_segments = ctrl->max_fr_pages; +	ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9);  	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 4ffd5957637a..385a5212c10f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1042,7 +1042,7 @@ static void nvme_tcp_io_work(struct work_struct *w)  {  	struct nvme_tcp_queue *queue =  		container_of(w, struct nvme_tcp_queue, io_work); -	unsigned long start = jiffies + msecs_to_jiffies(1); +	unsigned long deadline = jiffies + msecs_to_jiffies(1);  	do {  		bool pending = false; @@ -1067,7 +1067,7 @@ static void nvme_tcp_io_work(struct work_struct *w)  		if (!pending)  			return; -	} while (time_after(jiffies, start)); /* quota is exhausted */ +	} while (!time_after(jiffies, deadline)); /* quota is exhausted */  	queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);  } diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index de0bff70ebb6..32008d85172b 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -11,10 +11,10 @@  void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)  {  	const struct queue_limits *ql = &bdev_get_queue(bdev)->limits; -	/* Number of physical blocks per logical block. */ -	const u32 ppl = ql->physical_block_size / ql->logical_block_size; -	/* Physical blocks per logical block, 0's based. */ -	const __le16 ppl0b = to0based(ppl); +	/* Number of logical blocks per physical block. */ +	const u32 lpp = ql->physical_block_size / ql->logical_block_size; +	/* Logical blocks per physical block, 0's based. */ +	const __le16 lpp0b = to0based(lpp);  	/*  	 * For NVMe 1.2 and later, bit 1 indicates that the fields NAWUN, @@ -25,9 +25,9 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)  	 * field from the identify controller data structure should be used.  	 */  	id->nsfeat |= 1 << 1; -	id->nawun = ppl0b; -	id->nawupf = ppl0b; -	id->nacwu = ppl0b; +	id->nawun = lpp0b; +	id->nawupf = lpp0b; +	id->nacwu = lpp0b;  	/*  	 * Bit 4 indicates that the fields NPWG, NPWA, NPDG, NPDA, and @@ -36,7 +36,7 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)  	 */  	id->nsfeat |= 1 << 4;  	/* NPWG = Namespace Preferred Write Granularity. 0's based */ -	id->npwg = ppl0b; +	id->npwg = lpp0b;  	/* NPWA = Namespace Preferred Write Alignment. 0's based */  	id->npwa = id->npwg;  	/* NPDG = Namespace Preferred Deallocate Granularity. 0's based */ diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index bf4f03474e89..d535080b781f 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -348,8 +348,7 @@ static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd)  	return 0;  err: -	if (cmd->req.sg_cnt) -		sgl_free(cmd->req.sg); +	sgl_free(cmd->req.sg);  	return NVME_SC_INTERNAL;  } @@ -554,8 +553,7 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)  	if (queue->nvme_sq.sqhd_disabled) {  		kfree(cmd->iov); -		if (cmd->req.sg_cnt) -			sgl_free(cmd->req.sg); +		sgl_free(cmd->req.sg);  	}  	return 1; @@ -586,8 +584,7 @@ static int nvmet_try_send_response(struct nvmet_tcp_cmd *cmd,  		return -EAGAIN;  	kfree(cmd->iov); -	if (cmd->req.sg_cnt) -		sgl_free(cmd->req.sg); +	sgl_free(cmd->req.sg);  	cmd->queue->snd_cmd = NULL;  	nvmet_tcp_put_cmd(cmd);  	return 1; @@ -1310,8 +1307,7 @@ static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd)  	nvmet_req_uninit(&cmd->req);  	nvmet_tcp_unmap_pdu_iovec(cmd);  	kfree(cmd->iov); -	if (cmd->req.sg_cnt) -		sgl_free(cmd->req.sg); +	sgl_free(cmd->req.sg);  }  static void nvmet_tcp_uninit_data_in_cmds(struct nvmet_tcp_queue *queue) | 
