summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-01 13:41:55 -1000
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-01 13:41:55 -1000
commitf4a1e8e36973e2034c9eac2b3538470f8b2748a4 (patch)
tree632018fae450e75e92e3bb4814154c01a7e6106d
parentf0d3699aef2b6f864c78ccfa8e2a7327f65b8841 (diff)
parentd0c6cc6c6a6164a853e86206309b5a5bc5e3e72b (diff)
downloadlinux-f4a1e8e36973e2034c9eac2b3538470f8b2748a4.tar.gz
linux-f4a1e8e36973e2034c9eac2b3538470f8b2748a4.tar.bz2
linux-f4a1e8e36973e2034c9eac2b3538470f8b2748a4.zip
Merge tag 'block-6.12-20241101' of git://git.kernel.dk/linux
Pull block fixes from Jens Axboe: - Fixup for a recent blk_rq_map_user_bvec() patch - NVMe pull request via Keith: - Spec compliant identification fix (Keith) - Module parameter to enable backward compatibility on unusual namespace formats (Keith) - Target double free fix when using keys (Vitaliy) - Passthrough command error handling fix (Keith) * tag 'block-6.12-20241101' of git://git.kernel.dk/linux: nvme: re-fix error-handling for io_uring nvme-passthrough nvmet-auth: assign dh_key to NULL after kfree_sensitive nvme: module parameter to disable pi with offsets block: fix queue limits checks in blk_rq_map_user_bvec for real nvme: enhance cns version checking
-rw-r--r--block/blk-map.c56
-rw-r--r--drivers/nvme/host/core.c56
-rw-r--r--drivers/nvme/host/ioctl.c7
-rw-r--r--drivers/nvme/target/auth.c1
4 files changed, 65 insertions, 55 deletions
diff --git a/block/blk-map.c b/block/blk-map.c
index 6ef2ec1f7d78..b5fd1d857461 100644
--- a/block/blk-map.c
+++ b/block/blk-map.c
@@ -561,55 +561,33 @@ EXPORT_SYMBOL(blk_rq_append_bio);
/* Prepare bio for passthrough IO given ITER_BVEC iter */
static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
{
- struct request_queue *q = rq->q;
- size_t nr_iter = iov_iter_count(iter);
- size_t nr_segs = iter->nr_segs;
- struct bio_vec *bvecs, *bvprvp = NULL;
- const struct queue_limits *lim = &q->limits;
- unsigned int nsegs = 0, bytes = 0;
+ const struct queue_limits *lim = &rq->q->limits;
+ unsigned int max_bytes = lim->max_hw_sectors << SECTOR_SHIFT;
+ unsigned int nsegs;
struct bio *bio;
- size_t i;
+ int ret;
- if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q))
- return -EINVAL;
- if (nr_segs > queue_max_segments(q))
+ if (!iov_iter_count(iter) || iov_iter_count(iter) > max_bytes)
return -EINVAL;
- /* no iovecs to alloc, as we already have a BVEC iterator */
+ /* reuse the bvecs from the iterator instead of allocating new ones */
bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL);
- if (bio == NULL)
+ if (!bio)
return -ENOMEM;
-
bio_iov_bvec_set(bio, (struct iov_iter *)iter);
- blk_rq_bio_prep(rq, bio, nr_segs);
-
- /* loop to perform a bunch of sanity checks */
- bvecs = (struct bio_vec *)iter->bvec;
- for (i = 0; i < nr_segs; i++) {
- struct bio_vec *bv = &bvecs[i];
-
- /*
- * If the queue doesn't support SG gaps and adding this
- * offset would create a gap, fallback to copy.
- */
- if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) {
- blk_mq_map_bio_put(bio);
- return -EREMOTEIO;
- }
- /* check full condition */
- if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
- goto put_bio;
- if (bytes + bv->bv_len > nr_iter)
- break;
- nsegs++;
- bytes += bv->bv_len;
- bvprvp = bv;
+ /* check that the data layout matches the hardware restrictions */
+ ret = bio_split_rw_at(bio, lim, &nsegs, max_bytes);
+ if (ret) {
+ /* if we would have to split the bio, copy instead */
+ if (ret > 0)
+ ret = -EREMOTEIO;
+ blk_mq_map_bio_put(bio);
+ return ret;
}
+
+ blk_rq_bio_prep(rq, bio, nsegs);
return 0;
-put_bio:
- blk_mq_map_bio_put(bio);
- return -EINVAL;
}
/**
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 84cb859a911d..b149b638453f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -92,6 +92,17 @@ MODULE_PARM_DESC(apst_secondary_latency_tol_us,
"secondary APST latency tolerance in us");
/*
+ * Older kernels didn't enable protection information if it was at an offset.
+ * Newer kernels do, so it breaks reads on the upgrade if such formats were
+ * used in prior kernels since the metadata written did not contain a valid
+ * checksum.
+ */
+static bool disable_pi_offsets = false;
+module_param(disable_pi_offsets, bool, 0444);
+MODULE_PARM_DESC(disable_pi_offsets,
+ "disable protection information if it has an offset");
+
+/*
* nvme_wq - hosts nvme related works that are not reset or delete
* nvme_reset_wq - hosts nvme reset works
* nvme_delete_wq - hosts nvme delete works
@@ -1390,17 +1401,30 @@ static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
nvme_start_keep_alive(ctrl);
}
-/*
- * In NVMe 1.0 the CNS field was just a binary controller or namespace
- * flag, thus sending any new CNS opcodes has a big chance of not working.
- * Qemu unfortunately had that bug after reporting a 1.1 version compliance
- * (but not for any later version).
- */
-static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl)
+static bool nvme_id_cns_ok(struct nvme_ctrl *ctrl, u8 cns)
{
- if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)
- return ctrl->vs < NVME_VS(1, 2, 0);
- return ctrl->vs < NVME_VS(1, 1, 0);
+ /*
+ * The CNS field occupies a full byte starting with NVMe 1.2
+ */
+ if (ctrl->vs >= NVME_VS(1, 2, 0))
+ return true;
+
+ /*
+ * NVMe 1.1 expanded the CNS value to two bits, which means values
+ * larger than that could get truncated and treated as an incorrect
+ * value.
+ *
+ * Qemu implemented 1.0 behavior for controllers claiming 1.1
+ * compliance, so they need to be quirked here.
+ */
+ if (ctrl->vs >= NVME_VS(1, 1, 0) &&
+ !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS))
+ return cns <= 3;
+
+ /*
+ * NVMe 1.0 used a single bit for the CNS value.
+ */
+ return cns <= 1;
}
static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id)
@@ -1913,8 +1937,12 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl,
if (head->pi_size && head->ms >= head->pi_size)
head->pi_type = id->dps & NVME_NS_DPS_PI_MASK;
- if (!(id->dps & NVME_NS_DPS_PI_FIRST))
- info->pi_offset = head->ms - head->pi_size;
+ if (!(id->dps & NVME_NS_DPS_PI_FIRST)) {
+ if (disable_pi_offsets)
+ head->pi_type = 0;
+ else
+ info->pi_offset = head->ms - head->pi_size;
+ }
if (ctrl->ops->flags & NVME_F_FABRICS) {
/*
@@ -3104,7 +3132,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl)
ctrl->max_zeroes_sectors = 0;
if (ctrl->subsys->subtype != NVME_NQN_NVME ||
- nvme_ctrl_limited_cns(ctrl) ||
+ !nvme_id_cns_ok(ctrl, NVME_ID_CNS_CS_CTRL) ||
test_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags))
return 0;
@@ -4200,7 +4228,7 @@ static void nvme_scan_work(struct work_struct *work)
}
mutex_lock(&ctrl->scan_lock);
- if (nvme_ctrl_limited_cns(ctrl)) {
+ if (!nvme_id_cns_ok(ctrl, NVME_ID_CNS_NS_ACTIVE_LIST)) {
nvme_scan_ns_sequential(ctrl);
} else {
/*
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index b9b79ccfabf8..a96976b22fa7 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -421,10 +421,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
struct io_uring_cmd *ioucmd = req->end_io_data;
struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
- if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
+ if (nvme_req(req)->flags & NVME_REQ_CANCELLED) {
pdu->status = -EINTR;
- else
+ } else {
pdu->status = nvme_req(req)->status;
+ if (!pdu->status)
+ pdu->status = blk_status_to_errno(err);
+ }
pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
/*
diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c
index 29f8639cfe7f..b47d675232d2 100644
--- a/drivers/nvme/target/auth.c
+++ b/drivers/nvme/target/auth.c
@@ -115,6 +115,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id)
pr_debug("%s: ctrl %d failed to generate private key, err %d\n",
__func__, ctrl->cntlid, ret);
kfree_sensitive(ctrl->dh_key);
+ ctrl->dh_key = NULL;
return ret;
}
ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm);