diff options
Diffstat (limited to 'drivers/nvme/host/core.c')
| -rw-r--r-- | drivers/nvme/host/core.c | 77 | 
1 files changed, 56 insertions, 21 deletions
| diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 84cb859a911d..855b42c92284 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -92,6 +92,17 @@ MODULE_PARM_DESC(apst_secondary_latency_tol_us,  	"secondary APST latency tolerance in us");  /* + * Older kernels didn't enable protection information if it was at an offset. + * Newer kernels do, so it breaks reads on the upgrade if such formats were + * used in prior kernels since the metadata written did not contain a valid + * checksum. + */ +static bool disable_pi_offsets = false; +module_param(disable_pi_offsets, bool, 0444); +MODULE_PARM_DESC(disable_pi_offsets, +	"disable protection information if it has an offset"); + +/*   * nvme_wq - hosts nvme related works that are not reset or delete   * nvme_reset_wq - hosts nvme reset works   * nvme_delete_wq - hosts nvme delete works @@ -1390,17 +1401,30 @@ static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,  	nvme_start_keep_alive(ctrl);  } -/* - * In NVMe 1.0 the CNS field was just a binary controller or namespace - * flag, thus sending any new CNS opcodes has a big chance of not working. - * Qemu unfortunately had that bug after reporting a 1.1 version compliance - * (but not for any later version). - */ -static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl) +static bool nvme_id_cns_ok(struct nvme_ctrl *ctrl, u8 cns)  { -	if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS) -		return ctrl->vs < NVME_VS(1, 2, 0); -	return ctrl->vs < NVME_VS(1, 1, 0); +	/* +	 * The CNS field occupies a full byte starting with NVMe 1.2 +	 */ +	if (ctrl->vs >= NVME_VS(1, 2, 0)) +		return true; + +	/* +	 * NVMe 1.1 expanded the CNS value to two bits, which means values +	 * larger than that could get truncated and treated as an incorrect +	 * value. +	 * +	 * Qemu implemented 1.0 behavior for controllers claiming 1.1 +	 * compliance, so they need to be quirked here. +	 */ +	if (ctrl->vs >= NVME_VS(1, 1, 0) && +	    !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) +		return cns <= 3; + +	/* +	 * NVMe 1.0 used a single bit for the CNS value. +	 */ +	return cns <= 1;  }  static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) @@ -1913,8 +1937,12 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,  	if (head->pi_size && head->ms >= head->pi_size)  		head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; -	if (!(id->dps & NVME_NS_DPS_PI_FIRST)) -		info->pi_offset = head->ms - head->pi_size; +	if (!(id->dps & NVME_NS_DPS_PI_FIRST)) { +		if (disable_pi_offsets) +			head->pi_type = 0; +		else +			info->pi_offset = head->ms - head->pi_size; +	}  	if (ctrl->ops->flags & NVME_F_FABRICS) {  		/* @@ -3104,7 +3132,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)  		ctrl->max_zeroes_sectors = 0;  	if (ctrl->subsys->subtype != NVME_NQN_NVME || -	    nvme_ctrl_limited_cns(ctrl) || +	    !nvme_id_cns_ok(ctrl, NVME_ID_CNS_CS_CTRL) ||  	    test_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags))  		return 0; @@ -3767,7 +3795,8 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)  	int srcu_idx;  	srcu_idx = srcu_read_lock(&ctrl->srcu); -	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { +	list_for_each_entry_srcu(ns, &ctrl->namespaces, list, +				 srcu_read_lock_held(&ctrl->srcu)) {  		if (ns->head->ns_id == nsid) {  			if (!nvme_get_ns(ns))  				continue; @@ -4200,7 +4229,7 @@ static void nvme_scan_work(struct work_struct *work)  	}  	mutex_lock(&ctrl->scan_lock); -	if (nvme_ctrl_limited_cns(ctrl)) { +	if (!nvme_id_cns_ok(ctrl, NVME_ID_CNS_NS_ACTIVE_LIST)) {  		nvme_scan_ns_sequential(ctrl);  	} else {  		/* @@ -4851,7 +4880,8 @@ void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)  	int srcu_idx;  	srcu_idx = srcu_read_lock(&ctrl->srcu); -	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) +	list_for_each_entry_srcu(ns, &ctrl->namespaces, list, +				 srcu_read_lock_held(&ctrl->srcu))  		blk_mark_disk_dead(ns->disk);  	srcu_read_unlock(&ctrl->srcu, srcu_idx);  } @@ -4863,7 +4893,8 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)  	int srcu_idx;  	srcu_idx = srcu_read_lock(&ctrl->srcu); -	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) +	list_for_each_entry_srcu(ns, &ctrl->namespaces, list, +				 srcu_read_lock_held(&ctrl->srcu))  		blk_mq_unfreeze_queue(ns->queue);  	srcu_read_unlock(&ctrl->srcu, srcu_idx);  	clear_bit(NVME_CTRL_FROZEN, &ctrl->flags); @@ -4876,7 +4907,8 @@ int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout)  	int srcu_idx;  	srcu_idx = srcu_read_lock(&ctrl->srcu); -	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { +	list_for_each_entry_srcu(ns, &ctrl->namespaces, list, +				 srcu_read_lock_held(&ctrl->srcu)) {  		timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout);  		if (timeout <= 0)  			break; @@ -4892,7 +4924,8 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl)  	int srcu_idx;  	srcu_idx = srcu_read_lock(&ctrl->srcu); -	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) +	list_for_each_entry_srcu(ns, &ctrl->namespaces, list, +				 srcu_read_lock_held(&ctrl->srcu))  		blk_mq_freeze_queue_wait(ns->queue);  	srcu_read_unlock(&ctrl->srcu, srcu_idx);  } @@ -4905,7 +4938,8 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)  	set_bit(NVME_CTRL_FROZEN, &ctrl->flags);  	srcu_idx = srcu_read_lock(&ctrl->srcu); -	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) +	list_for_each_entry_srcu(ns, &ctrl->namespaces, list, +				 srcu_read_lock_held(&ctrl->srcu))  		blk_freeze_queue_start(ns->queue);  	srcu_read_unlock(&ctrl->srcu, srcu_idx);  } @@ -4953,7 +4987,8 @@ void nvme_sync_io_queues(struct nvme_ctrl *ctrl)  	int srcu_idx;  	srcu_idx = srcu_read_lock(&ctrl->srcu); -	list_for_each_entry_rcu(ns, &ctrl->namespaces, list) +	list_for_each_entry_srcu(ns, &ctrl->namespaces, list, +				 srcu_read_lock_held(&ctrl->srcu))  		blk_sync_queue(ns->queue);  	srcu_read_unlock(&ctrl->srcu, srcu_idx);  } | 
