From 43e2d08d0790a09ee1d2c838e33343ee1bcaf610 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 25 Feb 2019 13:00:04 +0200
Subject: nvme: avoid double dereference to convert le to cpu

Use le16_to_cpu instead of le16_to_cpup and le64_to_cpu instead of
le64_to_cpup. This will also align the code to nvme-core driver
convention.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 470601980794..b5939112b9b6 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1588,7 +1588,7 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b)
 static void nvme_update_disk_info(struct gendisk *disk,
 		struct nvme_ns *ns, struct nvme_id_ns *id)
 {
-	sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9);
+	sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
 	unsigned short bs = 1 << ns->lba_shift;
 
 	blk_mq_freeze_queue(disk->queue);
@@ -2549,7 +2549,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl)
 	ctrl->crdt[2] = le16_to_cpu(id->crdt3);
 
 	ctrl->oacs = le16_to_cpu(id->oacs);
-	ctrl->oncs = le16_to_cpup(&id->oncs);
+	ctrl->oncs = le16_to_cpu(id->oncs);
 	ctrl->oaes = le32_to_cpu(id->oaes);
 	atomic_set(&ctrl->abort_limit, id->acl + 1);
 	ctrl->vwc = id->vwc;
-- 
cgit v1.2.3


From cfe03c2ec46200dfefa5167e6a2bb50c0426c5f4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 12 Mar 2019 18:05:10 +0100
Subject: nvmet: avoid double errno conversions

Use errno_to_nvme_status to convert from a negative errno to a
nvme status field instead of going through a blk_status_t.

Also remove the pointless status variable in
nvmet_bdev_execute_write_zeroes.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/target/io-cmd-bdev.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index a065dbfc43b1..3efc52f9c309 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -196,7 +196,7 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req,
 			GFP_KERNEL, 0, bio);
 	if (ret && ret != -EOPNOTSUPP) {
 		req->error_slba = le64_to_cpu(range->slba);
-		return blk_to_nvme_status(req, errno_to_blk_status(ret));
+		return errno_to_nvme_status(req, ret);
 	}
 	return NVME_SC_SUCCESS;
 }
@@ -252,7 +252,6 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
 {
 	struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes;
 	struct bio *bio = NULL;
-	u16 status = NVME_SC_SUCCESS;
 	sector_t sector;
 	sector_t nr_sector;
 	int ret;
@@ -264,13 +263,12 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req)
 
 	ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector,
 			GFP_KERNEL, &bio, 0);
-	status = blk_to_nvme_status(req, errno_to_blk_status(ret));
 	if (bio) {
 		bio->bi_private = req;
 		bio->bi_end_io = nvmet_bio_done;
 		submit_bio(bio);
 	} else {
-		nvmet_req_complete(req, status);
+		nvmet_req_complete(req, errno_to_nvme_status(req, ret));
 	}
 }
 
-- 
cgit v1.2.3


From 6b80f1d2cc5a76f0121a43a612515bc8a8976e66 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Sat, 23 Feb 2019 12:51:08 -0600
Subject: nvmet-fc: use zero-sized array and struct_size() in kzalloc()

Update the code to use a zero-sized array instead of a pointer in
structure nvmet_fc_tgt_queue and use struct_size() in kzalloc().

Notice that one of the more common cases of allocation size calculations
is finding the size of a structure that has a zero-sized array at the end,
along with memory for some number of elements for that array. For example:

struct foo {
	int stuff;
	struct boo entry[];
};

instance = kzalloc(sizeof(struct foo) + sizeof(struct boo) * count, GFP_KERNEL);

Instead of leaving these open-coded and prone to type mistakes, we can now
use the new struct_size() helper:

instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL);

This code was detected with the help of Coccinelle.

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Reviewed-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/fc.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 98b7b1f4ee96..9369a11fe7a9 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -128,12 +128,12 @@ struct nvmet_fc_tgt_queue {
 	struct nvmet_cq			nvme_cq;
 	struct nvmet_sq			nvme_sq;
 	struct nvmet_fc_tgt_assoc	*assoc;
-	struct nvmet_fc_fcp_iod		*fod;		/* array of fcp_iods */
 	struct list_head		fod_list;
 	struct list_head		pending_cmd_list;
 	struct list_head		avail_defer_list;
 	struct workqueue_struct		*work_q;
 	struct kref			ref;
+	struct nvmet_fc_fcp_iod		fod[];		/* array of fcp_iods */
 } __aligned(sizeof(unsigned long long));
 
 struct nvmet_fc_tgt_assoc {
@@ -588,9 +588,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 	if (qid > NVMET_NR_QUEUES)
 		return NULL;
 
-	queue = kzalloc((sizeof(*queue) +
-				(sizeof(struct nvmet_fc_fcp_iod) * sqsize)),
-				GFP_KERNEL);
+	queue = kzalloc(struct_size(queue, fod, sqsize), GFP_KERNEL);
 	if (!queue)
 		return NULL;
 
@@ -603,7 +601,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc,
 	if (!queue->work_q)
 		goto out_a_put;
 
-	queue->fod = (struct nvmet_fc_fcp_iod *)&queue[1];
 	queue->qid = qid;
 	queue->sqsize = sqsize;
 	queue->assoc = assoc;
-- 
cgit v1.2.3


From 70583295388a3ceabc22bddeb16e788f58225d70 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Fri, 8 Mar 2019 15:41:21 -0800
Subject: nvmet-tcp: implement C2HData SUCCESS optimization

TP 8000 says that the use of the SUCCESS flag depends on weather the
controller support disabling sq_head pointer updates. Given that we
support it by default, makes sense that we go the extra mile to actually
use the SUCCESS flag.

When we create the C2HData PDU header, we check if sqhd_disabled is set
on our queue, if so, we set the SUCCESS flag in the PDU header and
skip sending a completion response capsule.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Oliver Smith-Denny <osmithde@cisco.com>
Tested-by: Oliver Smith-Denny <osmithde@cisco.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/tcp.c | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index ad0df786fe93..0a941abf56ec 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -371,7 +371,8 @@ static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd)
 	cmd->state = NVMET_TCP_SEND_DATA_PDU;
 
 	pdu->hdr.type = nvme_tcp_c2h_data;
-	pdu->hdr.flags = NVME_TCP_F_DATA_LAST;
+	pdu->hdr.flags = NVME_TCP_F_DATA_LAST | (queue->nvme_sq.sqhd_disabled ?
+						NVME_TCP_F_DATA_SUCCESS : 0);
 	pdu->hdr.hlen = sizeof(*pdu);
 	pdu->hdr.pdo = pdu->hdr.hlen + hdgst;
 	pdu->hdr.plen =
@@ -542,8 +543,19 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd)
 		cmd->state = NVMET_TCP_SEND_DDGST;
 		cmd->offset = 0;
 	} else {
-		nvmet_setup_response_pdu(cmd);
+		if (queue->nvme_sq.sqhd_disabled) {
+			cmd->queue->snd_cmd = NULL;
+			nvmet_tcp_put_cmd(cmd);
+		} else {
+			nvmet_setup_response_pdu(cmd);
+		}
 	}
+
+	if (queue->nvme_sq.sqhd_disabled) {
+		kfree(cmd->iov);
+		sgl_free(cmd->req.sg);
+	}
+
 	return 1;
 
 }
@@ -619,7 +631,13 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd)
 		return ret;
 
 	cmd->offset += ret;
-	nvmet_setup_response_pdu(cmd);
+
+	if (queue->nvme_sq.sqhd_disabled) {
+		cmd->queue->snd_cmd = NULL;
+		nvmet_tcp_put_cmd(cmd);
+	} else {
+		nvmet_setup_response_pdu(cmd);
+	}
 	return 1;
 }
 
-- 
cgit v1.2.3


From 7c349dde26b75db3fa1863e36984ac2271cd797a Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 8 Mar 2019 10:43:06 -0700
Subject: nvme-pci: use a flag for polled queues

A negative value for the cq_vector used to mean the queue is either
disabled or a polled queue. However, we have a queue enabled flag,
so the cq_vector had been serving double duty.

Don't overload the meaning of cq_vector. Use a flag specific to the
polled queues instead.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 33 +++++++++++++--------------------
 1 file changed, 13 insertions(+), 20 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a90cf5d63aac..4c0461bd6cfc 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -189,7 +189,7 @@ struct nvme_queue {
 	dma_addr_t cq_dma_addr;
 	u32 __iomem *q_db;
 	u16 q_depth;
-	s16 cq_vector;
+	u16 cq_vector;
 	u16 sq_tail;
 	u16 last_sq_tail;
 	u16 cq_head;
@@ -200,6 +200,7 @@ struct nvme_queue {
 #define NVMEQ_ENABLED		0
 #define NVMEQ_SQ_CMB		1
 #define NVMEQ_DELETE_ERROR	2
+#define NVMEQ_POLLED		3
 	u32 *dbbuf_sq_db;
 	u32 *dbbuf_cq_db;
 	u32 *dbbuf_sq_ei;
@@ -1088,7 +1089,7 @@ static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag)
 	 * using the CQ lock.  For normal interrupt driven threads we have
 	 * to disable the interrupt to avoid racing with it.
 	 */
-	if (nvmeq->cq_vector == -1) {
+	if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) {
 		spin_lock(&nvmeq->cq_poll_lock);
 		found = nvme_process_cq(nvmeq, &start, &end, tag);
 		spin_unlock(&nvmeq->cq_poll_lock);
@@ -1148,7 +1149,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 	struct nvme_command c;
 	int flags = NVME_QUEUE_PHYS_CONTIG;
 
-	if (vector != -1)
+	if (!test_bit(NVMEQ_POLLED, &nvmeq->flags))
 		flags |= NVME_CQ_IRQ_ENABLED;
 
 	/*
@@ -1161,10 +1162,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 	c.create_cq.cqid = cpu_to_le16(qid);
 	c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1);
 	c.create_cq.cq_flags = cpu_to_le16(flags);
-	if (vector != -1)
-		c.create_cq.irq_vector = cpu_to_le16(vector);
-	else
-		c.create_cq.irq_vector = 0;
+	c.create_cq.irq_vector = cpu_to_le16(vector);
 
 	return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0);
 }
@@ -1410,10 +1408,8 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 	nvmeq->dev->online_queues--;
 	if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
 		blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
-	if (nvmeq->cq_vector == -1)
-		return 0;
-	pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq);
-	nvmeq->cq_vector = -1;
+	if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags))
+		pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq);
 	return 0;
 }
 
@@ -1507,7 +1503,6 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 	nvmeq->q_depth = depth;
 	nvmeq->qid = qid;
-	nvmeq->cq_vector = -1;
 	dev->ctrl.queue_count++;
 
 	return 0;
@@ -1552,7 +1547,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
 {
 	struct nvme_dev *dev = nvmeq->dev;
 	int result;
-	s16 vector;
+	u16 vector = 0;
 
 	clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags);
 
@@ -1563,7 +1558,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
 	if (!polled)
 		vector = dev->num_vecs == 1 ? 0 : qid;
 	else
-		vector = -1;
+		set_bit(NVMEQ_POLLED, &nvmeq->flags);
 
 	result = adapter_alloc_cq(dev, qid, nvmeq, vector);
 	if (result)
@@ -1578,7 +1573,8 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
 	nvmeq->cq_vector = vector;
 	nvme_init_queue(nvmeq, qid);
 
-	if (vector != -1) {
+	if (!polled) {
+		nvmeq->cq_vector = vector;
 		result = queue_request_irq(nvmeq);
 		if (result < 0)
 			goto release_sq;
@@ -1588,7 +1584,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled)
 	return result;
 
 release_sq:
-	nvmeq->cq_vector = -1;
 	dev->online_queues--;
 	adapter_delete_sq(dev, qid);
 release_cq:
@@ -1730,7 +1725,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
 	nvme_init_queue(nvmeq, 0);
 	result = queue_request_irq(nvmeq);
 	if (result) {
-		nvmeq->cq_vector = -1;
+		dev->online_queues--;
 		return result;
 	}
 
@@ -2171,10 +2166,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	 * number of interrupts.
 	 */
 	result = queue_request_irq(adminq);
-	if (result) {
-		adminq->cq_vector = -1;
+	if (result)
 		return result;
-	}
 	set_bit(NVMEQ_ENABLED, &adminq->flags);
 
 	result = nvme_create_io_queues(dev);
-- 
cgit v1.2.3


From 88a041f4c1f6a21284c70b491929ed35336a0ea9 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 8 Mar 2019 10:43:11 -0700
Subject: nvme-pci: remove q_dmadev from nvme_queue

We don't need to save the dma device as it's not used in the hot path
and hasn't in a long time. Shrink the struct nvme_queue removing this
unnecessary member.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 4c0461bd6cfc..8af2b10b4507 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -177,7 +177,6 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl)
  * commands and one for I/O commands).
  */
 struct nvme_queue {
-	struct device *q_dmadev;
 	struct nvme_dev *dev;
 	spinlock_t sq_lock;
 	struct nvme_command *sq_cmds;
@@ -1369,16 +1368,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 
 static void nvme_free_queue(struct nvme_queue *nvmeq)
 {
-	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
+	dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq->q_depth),
 				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
 	if (!nvmeq->sq_cmds)
 		return;
 
 	if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) {
-		pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev),
+		pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev),
 				nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth));
 	} else {
-		dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
+		dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq->q_depth),
 				nvmeq->sq_cmds, nvmeq->sq_dma_addr);
 	}
 }
@@ -1494,7 +1493,6 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
 	if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth))
 		goto free_cqdma;
 
-	nvmeq->q_dmadev = dev->dev;
 	nvmeq->dev = dev;
 	spin_lock_init(&nvmeq->sq_lock);
 	spin_lock_init(&nvmeq->cq_poll_lock);
-- 
cgit v1.2.3


From 39f8e36401142d73e33a954ac4bdf844fb5de9ae Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 8 Mar 2019 10:43:13 -0700
Subject: nvme-pci: remove unused nvme_iod member

Signed-off-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 8af2b10b4507..0cba927224b4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -220,7 +220,6 @@ struct nvme_iod {
 	int aborted;
 	int npages;		/* In the PRP list. 0 means small pool in use */
 	int nents;		/* Used in scatterlist */
-	int length;		/* Of data, in bytes */
 	dma_addr_t first_dma;
 	struct scatterlist meta_sg; /* metadata requires single contiguous buffer */
 	struct scatterlist *sg;
@@ -603,7 +602,6 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
 	iod->aborted = 0;
 	iod->npages = -1;
 	iod->nents = 0;
-	iod->length = size;
 
 	return BLK_STS_OK;
 }
-- 
cgit v1.2.3


From 9b048119a153590b934ef49aae309b723587f527 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 3 Mar 2019 08:04:01 -0700
Subject: nvme-pci: remove nvme_init_iod

nvme_init_iod should really be split into two parts: initialize a few
general iod fields, which can easily be done at the beginning of
nvme_queue_rq, and allocating the scatterlist if needed, which logically
belongs into nvme_map_data with the code making use of it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/pci.c | 56 +++++++++++++++++++------------------------------
 1 file changed, 22 insertions(+), 34 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0cba927224b4..2102a107e09b 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -208,10 +208,10 @@ struct nvme_queue {
 };
 
 /*
- * The nvme_iod describes the data in an I/O, including the list of PRP
- * entries.  You can't see it in this data structure because C doesn't let
- * me express that.  Use nvme_init_iod to ensure there's enough space
- * allocated to store the PRP list.
+ * The nvme_iod describes the data in an I/O.
+ *
+ * The sg pointer contains the list of PRP/SGL chunk allocations in addition
+ * to the actual struct scatterlist.
  */
 struct nvme_iod {
 	struct nvme_request req;
@@ -583,29 +583,6 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
 	return true;
 }
 
-static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
-{
-	struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
-	int nseg = blk_rq_nr_phys_segments(rq);
-	unsigned int size = blk_rq_payload_bytes(rq);
-
-	iod->use_sgl = nvme_pci_use_sgls(dev, rq);
-
-	if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
-		iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
-		if (!iod->sg)
-			return BLK_STS_RESOURCE;
-	} else {
-		iod->sg = iod->inline_sg;
-	}
-
-	iod->aborted = 0;
-	iod->npages = -1;
-	iod->nents = 0;
-
-	return BLK_STS_OK;
-}
-
 static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
@@ -837,6 +814,17 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 	blk_status_t ret = BLK_STS_IOERR;
 	int nr_mapped;
 
+	if (blk_rq_payload_bytes(req) > NVME_INT_BYTES(dev) ||
+	    blk_rq_nr_phys_segments(req) > NVME_INT_PAGES) {
+		iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
+		if (!iod->sg)
+			return BLK_STS_RESOURCE;
+	} else {
+		iod->sg = iod->inline_sg;
+	}
+
+	iod->use_sgl = nvme_pci_use_sgls(dev, req);
+
 	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
 	iod->nents = blk_rq_map_sg(q, req, iod->sg);
 	if (!iod->nents)
@@ -881,6 +869,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 out_unmap:
 	dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
 out:
+	nvme_free_iod(dev, req);
 	return ret;
 }
 
@@ -913,9 +902,14 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	struct nvme_queue *nvmeq = hctx->driver_data;
 	struct nvme_dev *dev = nvmeq->dev;
 	struct request *req = bd->rq;
+	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 	struct nvme_command cmnd;
 	blk_status_t ret;
 
+	iod->aborted = 0;
+	iod->npages = -1;
+	iod->nents = 0;
+
 	/*
 	 * We should not need to do this, but we're still using this to
 	 * ensure we can drain requests on a dying queue.
@@ -927,21 +921,15 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 	if (ret)
 		return ret;
 
-	ret = nvme_init_iod(req, dev);
-	if (ret)
-		goto out_free_cmd;
-
 	if (blk_rq_nr_phys_segments(req)) {
 		ret = nvme_map_data(dev, req, &cmnd);
 		if (ret)
-			goto out_cleanup_iod;
+			goto out_free_cmd;
 	}
 
 	blk_mq_start_request(req);
 	nvme_submit_cmd(nvmeq, &cmnd, bd->last);
 	return BLK_STS_OK;
-out_cleanup_iod:
-	nvme_free_iod(dev, req);
 out_free_cmd:
 	nvme_cleanup_cmd(req);
 	return ret;
-- 
cgit v1.2.3


From 915f04c93db4e3a7388c8ad8ddfc28830e4cbce3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 3 Mar 2019 08:13:03 -0700
Subject: nvme-pci: move the call to nvme_cleanup_cmd out of nvme_unmap_data

Cleaning up the command setup isn't related to unmapping data, and
disentangling them will simplify error handling a bit down the road.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 2102a107e09b..2af6cfbd77ec 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -888,7 +888,6 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 			dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
 	}
 
-	nvme_cleanup_cmd(req);
 	nvme_free_iod(dev, req);
 }
 
@@ -939,6 +938,7 @@ static void nvme_pci_complete_rq(struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 
+	nvme_cleanup_cmd(req);
 	nvme_unmap_data(iod->nvmeq->dev, req);
 	nvme_complete_rq(req);
 }
-- 
cgit v1.2.3


From 7fe07d14f71fabef642a478626248a9121e95b7b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 3 Mar 2019 08:15:19 -0700
Subject: nvme-pci: merge nvme_free_iod into nvme_unmap_data

This means we now have a function that undoes everything nvme_map_data
does and we can simplify the error handling a bit.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/pci.c | 44 +++++++++++++++++---------------------------
 1 file changed, 17 insertions(+), 27 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 2af6cfbd77ec..de199aff8d05 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -583,14 +583,24 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
 	return true;
 }
 
-static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
+static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	enum dma_data_direction dma_dir = rq_data_dir(req) ?
+			DMA_TO_DEVICE : DMA_FROM_DEVICE;
 	const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1;
 	dma_addr_t dma_addr = iod->first_dma, next_dma_addr;
-
 	int i;
 
+	if (iod->nents) {
+		/* P2PDMA requests do not need to be unmapped */
+		if (!is_pci_p2pdma_page(sg_page(iod->sg)))
+			dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
+
+		if (blk_integrity_rq(req))
+			dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
+	}
+
 	if (iod->npages == 0)
 		dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
 			dma_addr);
@@ -847,50 +857,30 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
 
 	if (ret != BLK_STS_OK)
-		goto out_unmap;
+		goto out;
 
 	ret = BLK_STS_IOERR;
 	if (blk_integrity_rq(req)) {
 		if (blk_rq_count_integrity_sg(q, req->bio) != 1)
-			goto out_unmap;
+			goto out;
 
 		sg_init_table(&iod->meta_sg, 1);
 		if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
-			goto out_unmap;
+			goto out;
 
 		if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
-			goto out_unmap;
+			goto out;
 
 		cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
 	}
 
 	return BLK_STS_OK;
 
-out_unmap:
-	dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
 out:
-	nvme_free_iod(dev, req);
+	nvme_unmap_data(dev, req);
 	return ret;
 }
 
-static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
-{
-	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-	enum dma_data_direction dma_dir = rq_data_dir(req) ?
-			DMA_TO_DEVICE : DMA_FROM_DEVICE;
-
-	if (iod->nents) {
-		/* P2PDMA requests do not need to be unmapped */
-		if (!is_pci_p2pdma_page(sg_page(iod->sg)))
-			dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
-
-		if (blk_integrity_rq(req))
-			dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
-	}
-
-	nvme_free_iod(dev, req);
-}
-
 /*
  * NOTE: ns is NULL when called on the admin queue.
  */
-- 
cgit v1.2.3


From b15c592de37ed9d71499a3b8a750d1b235fcba3d Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 3 Mar 2019 08:52:21 -0700
Subject: nvme-pci: only call nvme_unmap_data for requests transferring data

This mirrors how nvme_map_pci is called and will allow simplifying some
checks in nvme_unmap_pci later on.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/pci.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index de199aff8d05..030ee94452dd 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -929,7 +929,8 @@ static void nvme_pci_complete_rq(struct request *req)
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 
 	nvme_cleanup_cmd(req);
-	nvme_unmap_data(iod->nvmeq->dev, req);
+	if (blk_rq_nr_phys_segments(req))
+		nvme_unmap_data(iod->nvmeq->dev, req);
 	nvme_complete_rq(req);
 }
 
-- 
cgit v1.2.3


From 783b94bd9250478154904fa782d2cfc46336cdf6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 3 Mar 2019 08:19:18 -0700
Subject: nvme-pci: do not build a scatterlist to map metadata

We always have exactly one segment, so we can simply call dma_map_bvec.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/pci.c | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 030ee94452dd..0679ac7fed19 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -221,7 +221,7 @@ struct nvme_iod {
 	int npages;		/* In the PRP list. 0 means small pool in use */
 	int nents;		/* Used in scatterlist */
 	dma_addr_t first_dma;
-	struct scatterlist meta_sg; /* metadata requires single contiguous buffer */
+	dma_addr_t meta_dma;
 	struct scatterlist *sg;
 	struct scatterlist inline_sg[0];
 };
@@ -592,13 +592,16 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 	dma_addr_t dma_addr = iod->first_dma, next_dma_addr;
 	int i;
 
+	if (blk_integrity_rq(req)) {
+		dma_unmap_page(dev->dev, iod->meta_dma,
+				rq_integrity_vec(req)->bv_len, dma_dir);
+	}
+
 	if (iod->nents) {
 		/* P2PDMA requests do not need to be unmapped */
 		if (!is_pci_p2pdma_page(sg_page(iod->sg)))
 			dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
 
-		if (blk_integrity_rq(req))
-			dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
 	}
 
 	if (iod->npages == 0)
@@ -861,17 +864,11 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 
 	ret = BLK_STS_IOERR;
 	if (blk_integrity_rq(req)) {
-		if (blk_rq_count_integrity_sg(q, req->bio) != 1)
-			goto out;
-
-		sg_init_table(&iod->meta_sg, 1);
-		if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
-			goto out;
-
-		if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
+		iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
+				dma_dir, 0);
+		if (dma_mapping_error(dev->dev, iod->meta_dma))
 			goto out;
-
-		cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg));
+		cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
 	}
 
 	return BLK_STS_OK;
-- 
cgit v1.2.3


From 4aedb705437f6f98b45f45c394e6803ca67abd33 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 3 Mar 2019 09:46:28 -0700
Subject: nvme-pci: split metadata handling from nvme_map_data /
 nvme_unmap_data

This prepares for some bigger changes to the data mapping helpers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/pci.c | 48 +++++++++++++++++++++++++++---------------------
 1 file changed, 27 insertions(+), 21 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 0679ac7fed19..10e6b5d055e9 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -592,11 +592,6 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 	dma_addr_t dma_addr = iod->first_dma, next_dma_addr;
 	int i;
 
-	if (blk_integrity_rq(req)) {
-		dma_unmap_page(dev->dev, iod->meta_dma,
-				rq_integrity_vec(req)->bv_len, dma_dir);
-	}
-
 	if (iod->nents) {
 		/* P2PDMA requests do not need to be unmapped */
 		if (!is_pci_p2pdma_page(sg_page(iod->sg)))
@@ -858,24 +853,23 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped);
 	else
 		ret = nvme_pci_setup_prps(dev, req, &cmnd->rw);
-
+out:
 	if (ret != BLK_STS_OK)
-		goto out;
-
-	ret = BLK_STS_IOERR;
-	if (blk_integrity_rq(req)) {
-		iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
-				dma_dir, 0);
-		if (dma_mapping_error(dev->dev, iod->meta_dma))
-			goto out;
-		cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
-	}
+		nvme_unmap_data(dev, req);
+	return ret;
+}
 
-	return BLK_STS_OK;
+static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req,
+		struct nvme_command *cmnd)
+{
+	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
 
-out:
-	nvme_unmap_data(dev, req);
-	return ret;
+	iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req),
+			rq_dma_dir(req), 0);
+	if (dma_mapping_error(dev->dev, iod->meta_dma))
+		return BLK_STS_IOERR;
+	cmnd->rw.metadata = cpu_to_le64(iod->meta_dma);
+	return 0;
 }
 
 /*
@@ -913,9 +907,17 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
 			goto out_free_cmd;
 	}
 
+	if (blk_integrity_rq(req)) {
+		ret = nvme_map_metadata(dev, req, &cmnd);
+		if (ret)
+			goto out_unmap_data;
+	}
+
 	blk_mq_start_request(req);
 	nvme_submit_cmd(nvmeq, &cmnd, bd->last);
 	return BLK_STS_OK;
+out_unmap_data:
+	nvme_unmap_data(dev, req);
 out_free_cmd:
 	nvme_cleanup_cmd(req);
 	return ret;
@@ -924,10 +926,14 @@ out_free_cmd:
 static void nvme_pci_complete_rq(struct request *req)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	struct nvme_dev *dev = iod->nvmeq->dev;
 
 	nvme_cleanup_cmd(req);
+	if (blk_integrity_rq(req))
+		dma_unmap_page(dev->dev, iod->meta_dma,
+			       rq_integrity_vec(req)->bv_len, rq_data_dir(req));
 	if (blk_rq_nr_phys_segments(req))
-		nvme_unmap_data(iod->nvmeq->dev, req);
+		nvme_unmap_data(dev, req);
 	nvme_complete_rq(req);
 }
 
-- 
cgit v1.2.3


From d43f1ccfad053dbefba1d15443cdc36ca60958f0 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 5 Mar 2019 05:46:58 -0700
Subject: nvme-pci: remove the inline scatterlist optimization

We'll have a better way to optimize for small I/O that doesn't
require it soon, so remove the existing inline_sg case to make that
optimization easier to implement.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/pci.c | 38 ++++++--------------------------------
 1 file changed, 6 insertions(+), 32 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 10e6b5d055e9..bd7e4209ab36 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -223,7 +223,6 @@ struct nvme_iod {
 	dma_addr_t first_dma;
 	dma_addr_t meta_dma;
 	struct scatterlist *sg;
-	struct scatterlist inline_sg[0];
 };
 
 /*
@@ -370,12 +369,6 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
 	return true;
 }
 
-/*
- * Max size of iod being embedded in the request payload
- */
-#define NVME_INT_PAGES		2
-#define NVME_INT_BYTES(dev)	(NVME_INT_PAGES * (dev)->ctrl.page_size)
-
 /*
  * Will slightly overestimate the number of pages needed.  This is OK
  * as it only leads to a small amount of wasted memory for the lifetime of
@@ -410,15 +403,6 @@ static unsigned int nvme_pci_iod_alloc_size(struct nvme_dev *dev,
 	return alloc_size + sizeof(struct scatterlist) * nseg;
 }
 
-static unsigned int nvme_pci_cmd_size(struct nvme_dev *dev, bool use_sgl)
-{
-	unsigned int alloc_size = nvme_pci_iod_alloc_size(dev,
-				    NVME_INT_BYTES(dev), NVME_INT_PAGES,
-				    use_sgl);
-
-	return sizeof(struct nvme_iod) + alloc_size;
-}
-
 static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
 				unsigned int hctx_idx)
 {
@@ -621,8 +605,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 		dma_addr = next_dma_addr;
 	}
 
-	if (iod->sg != iod->inline_sg)
-		mempool_free(iod->sg, dev->iod_mempool);
+	mempool_free(iod->sg, dev->iod_mempool);
 }
 
 static void nvme_print_sgl(struct scatterlist *sgl, int nents)
@@ -822,14 +805,9 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 	blk_status_t ret = BLK_STS_IOERR;
 	int nr_mapped;
 
-	if (blk_rq_payload_bytes(req) > NVME_INT_BYTES(dev) ||
-	    blk_rq_nr_phys_segments(req) > NVME_INT_PAGES) {
-		iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
-		if (!iod->sg)
-			return BLK_STS_RESOURCE;
-	} else {
-		iod->sg = iod->inline_sg;
-	}
+	iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
+	if (!iod->sg)
+		return BLK_STS_RESOURCE;
 
 	iod->use_sgl = nvme_pci_use_sgls(dev, req);
 
@@ -1612,7 +1590,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
 		dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
 		dev->admin_tagset.timeout = ADMIN_TIMEOUT;
 		dev->admin_tagset.numa_node = dev_to_node(dev->dev);
-		dev->admin_tagset.cmd_size = nvme_pci_cmd_size(dev, false);
+		dev->admin_tagset.cmd_size = sizeof(struct nvme_iod);
 		dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED;
 		dev->admin_tagset.driver_data = dev;
 
@@ -2257,11 +2235,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 		dev->tagset.numa_node = dev_to_node(dev->dev);
 		dev->tagset.queue_depth =
 				min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
-		dev->tagset.cmd_size = nvme_pci_cmd_size(dev, false);
-		if ((dev->ctrl.sgls & ((1 << 0) | (1 << 1))) && sgl_threshold) {
-			dev->tagset.cmd_size = max(dev->tagset.cmd_size,
-					nvme_pci_cmd_size(dev, true));
-		}
+		dev->tagset.cmd_size = sizeof(struct nvme_iod);
 		dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
 		dev->tagset.driver_data = dev;
 
-- 
cgit v1.2.3


From dff824b2aadb7808f50ceb0927acaec5ad750ce7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 5 Mar 2019 05:49:34 -0700
Subject: nvme-pci: optimize mapping of small single segment requests

If a request is single segment and fits into one or two PRP entries we
do not have to create a scatterlist for it, but can just map the bio_vec
directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/pci.c | 45 ++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 40 insertions(+), 5 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index bd7e4209ab36..59731264b052 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -221,6 +221,7 @@ struct nvme_iod {
 	int npages;		/* In the PRP list. 0 means small pool in use */
 	int nents;		/* Used in scatterlist */
 	dma_addr_t first_dma;
+	unsigned int dma_len;	/* length of single DMA segment mapping */
 	dma_addr_t meta_dma;
 	struct scatterlist *sg;
 };
@@ -576,13 +577,18 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
 	dma_addr_t dma_addr = iod->first_dma, next_dma_addr;
 	int i;
 
-	if (iod->nents) {
-		/* P2PDMA requests do not need to be unmapped */
-		if (!is_pci_p2pdma_page(sg_page(iod->sg)))
-			dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
-
+	if (iod->dma_len) {
+		dma_unmap_page(dev->dev, dma_addr, iod->dma_len, dma_dir);
+		return;
 	}
 
+	WARN_ON_ONCE(!iod->nents);
+
+	/* P2PDMA requests do not need to be unmapped */
+	if (!is_pci_p2pdma_page(sg_page(iod->sg)))
+		dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req));
+
+
 	if (iod->npages == 0)
 		dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0],
 			dma_addr);
@@ -795,6 +801,24 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev,
 	return BLK_STS_OK;
 }
 
+static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
+		struct request *req, struct nvme_rw_command *cmnd,
+		struct bio_vec *bv)
+{
+	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+	unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset;
+
+	iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
+	if (dma_mapping_error(dev->dev, iod->first_dma))
+		return BLK_STS_RESOURCE;
+	iod->dma_len = bv->bv_len;
+
+	cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma);
+	if (bv->bv_len > first_prp_len)
+		cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len);
+	return 0;
+}
+
 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
@@ -805,6 +829,17 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 	blk_status_t ret = BLK_STS_IOERR;
 	int nr_mapped;
 
+	if (blk_rq_nr_phys_segments(req) == 1) {
+		struct bio_vec bv = req_bvec(req);
+
+		if (!is_pci_p2pdma_page(bv.bv_page)) {
+			if (bv.bv_offset + bv.bv_len <= dev->ctrl.page_size * 2)
+				return nvme_setup_prp_simple(dev, req,
+							     &cmnd->rw, &bv);
+		}
+	}
+
+	iod->dma_len = 0;
 	iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
 	if (!iod->sg)
 		return BLK_STS_RESOURCE;
-- 
cgit v1.2.3


From 297910571f08f1d7e398793df6e606ebb375a3f1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 5 Mar 2019 05:54:18 -0700
Subject: nvme-pci: optimize mapping single segment requests using SGLs

If the controller supports SGLs we can take another short cut for single
segment request, given that we can always map those without another
indirection structure, and thus don't need to create a scatterlist
structure.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/pci.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 59731264b052..82aa5cb21828 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -819,6 +819,23 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
 	return 0;
 }
 
+static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev,
+		struct request *req, struct nvme_rw_command *cmnd,
+		struct bio_vec *bv)
+{
+	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
+
+	iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
+	if (dma_mapping_error(dev->dev, iod->first_dma))
+		return BLK_STS_RESOURCE;
+	iod->dma_len = bv->bv_len;
+
+	cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma);
+	cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len);
+	cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4;
+	return 0;
+}
+
 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
@@ -836,6 +853,11 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 			if (bv.bv_offset + bv.bv_len <= dev->ctrl.page_size * 2)
 				return nvme_setup_prp_simple(dev, req,
 							     &cmnd->rw, &bv);
+
+			if (iod->nvmeq->qid &&
+			    dev->ctrl.sgls & ((1 << 0) | (1 << 1)))
+				return nvme_setup_sgl_simple(dev, req,
+							     &cmnd->rw, &bv);
 		}
 	}
 
-- 
cgit v1.2.3


From 70479b71bc80ae6f63c8d6644cc76dff99f79686 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 5 Mar 2019 05:59:02 -0700
Subject: nvme-pci: tidy up nvme_map_data

Remove two pointless local variables, remove ret assignment that is
never used, move the use_sgl initialization closer to where it is used.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/pci.c | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 82aa5cb21828..c1eecde6b853 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -840,10 +840,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 		struct nvme_command *cmnd)
 {
 	struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
-	struct request_queue *q = req->q;
-	enum dma_data_direction dma_dir = rq_data_dir(req) ?
-			DMA_TO_DEVICE : DMA_FROM_DEVICE;
-	blk_status_t ret = BLK_STS_IOERR;
+	blk_status_t ret = BLK_STS_RESOURCE;
 	int nr_mapped;
 
 	if (blk_rq_nr_phys_segments(req) == 1) {
@@ -865,25 +862,21 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
 	iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
 	if (!iod->sg)
 		return BLK_STS_RESOURCE;
-
-	iod->use_sgl = nvme_pci_use_sgls(dev, req);
-
 	sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
-	iod->nents = blk_rq_map_sg(q, req, iod->sg);
+	iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
 	if (!iod->nents)
 		goto out;
 
-	ret = BLK_STS_RESOURCE;
-
 	if (is_pci_p2pdma_page(sg_page(iod->sg)))
 		nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents,
-					  dma_dir);
+					      rq_dma_dir(req));
 	else
 		nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents,
-					     dma_dir,  DMA_ATTR_NO_WARN);
+					     rq_dma_dir(req), DMA_ATTR_NO_WARN);
 	if (!nr_mapped)
 		goto out;
 
+	iod->use_sgl = nvme_pci_use_sgls(dev, req);
 	if (iod->use_sgl)
 		ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped);
 	else
-- 
cgit v1.2.3


From e84c2091a45228b62867ec0565898ef5404706a2 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Tue, 2 Apr 2019 14:52:47 +0300
Subject: nvmet: never fail double namespace enablement

In case we create N namespaces while N < NVMET_MAX_NAMESPACES, we can
perform "echo 1 > <nsid>/enable" as much as we want. In case N ==
NVMET_MAX_NAMESPACES we fail. Make sure we have the same flow for any N.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index b3e765a95af8..4dc388a2ecb0 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -494,13 +494,14 @@ int nvmet_ns_enable(struct nvmet_ns *ns)
 	int ret;
 
 	mutex_lock(&subsys->lock);
-	ret = -EMFILE;
-	if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
-		goto out_unlock;
 	ret = 0;
 	if (ns->enabled)
 		goto out_unlock;
 
+	ret = -EMFILE;
+	if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
+		goto out_unlock;
+
 	ret = nvmet_bdev_ns_enable(ns);
 	if (ret == -ENOTBLK)
 		ret = nvmet_file_ns_enable(ns);
-- 
cgit v1.2.3


From 013a63ef4edcd2366299225c3b081102171e8fa9 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Tue, 2 Apr 2019 14:51:54 +0300
Subject: nvmet: add safety check for subsystem lock during nvmet_ns_changed

we need to make sure that subsystem lock is taken during ctrl's list
traversing. nvmet_ns_changed function is not static and can be used from
various callers simultaneously.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 4dc388a2ecb0..4d8dd29479c0 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -214,6 +214,8 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
 {
 	struct nvmet_ctrl *ctrl;
 
+	lockdep_assert_held(&subsys->lock);
+
 	list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
 		nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
 		if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR))
-- 
cgit v1.2.3


From d0de579c043c3a2ab60ce75eb6cf4d414becc676 Mon Sep 17 00:00:00 2001
From: Kenneth Heitke <kenneth.heitke@intel.com>
Date: Thu, 4 Apr 2019 12:57:45 -0600
Subject: nvme: log the error status on Identify Namespace failure

Identify Namespace failures are logged as a warning but there is not
an indication of the cause for the failure. Update the log message to
include the error status.

Signed-off-by: Kenneth Heitke <kenneth.heitke@intel.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index b5939112b9b6..ddb943395118 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1105,7 +1105,7 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl,
 
 	error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
 	if (error) {
-		dev_warn(ctrl->device, "Identify namespace failed\n");
+		dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error);
 		kfree(id);
 		return NULL;
 	}
-- 
cgit v1.2.3


From 8dc2ed3f3e5ba245828ad89968f6818be8996e9d Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 8 Apr 2019 18:39:58 +0300
Subject: nvmet-rdma: remove p2p_client initialization from fast-path

Initialize it during command allocation.

Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Stephen Bates <sbates@raithlin.com>
Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/rdma.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index ef893addf341..b7275218dfa5 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -373,6 +373,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
 	if (ib_dma_mapping_error(ndev->device, r->send_sge.addr))
 		goto out_free_rsp;
 
+	r->req.p2p_client = &ndev->device->dev;
 	r->send_sge.length = sizeof(*r->req.rsp);
 	r->send_sge.lkey = ndev->pd->local_dma_lkey;
 
@@ -763,8 +764,6 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue,
 		cmd->send_sge.addr, cmd->send_sge.length,
 		DMA_TO_DEVICE);
 
-	cmd->req.p2p_client = &queue->dev->device->dev;
-
 	if (!nvmet_req_init(&cmd->req, &queue->nvme_cq,
 			&queue->nvme_sq, &nvmet_rdma_ops))
 		return;
-- 
cgit v1.2.3


From fc6c9730725d5cc57c851d0e261a5682bba913a7 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 8 Apr 2019 18:39:59 +0300
Subject: nvmet: rename nvme_completion instances from rsp to cqe

Use NVMe namings for improving code readability.

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by : Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/core.c        | 22 +++++++++++-----------
 drivers/nvme/target/fabrics-cmd.c | 16 ++++++++--------
 drivers/nvme/target/fc.c          |  2 +-
 drivers/nvme/target/loop.c        |  6 +++---
 drivers/nvme/target/nvmet.h       |  4 ++--
 drivers/nvme/target/rdma.c        | 18 +++++++++---------
 drivers/nvme/target/tcp.c         |  8 ++++----
 7 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 4d8dd29479c0..1c1776c3e316 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -647,7 +647,7 @@ static void nvmet_update_sq_head(struct nvmet_req *req)
 		} while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
 					old_sqhd);
 	}
-	req->rsp->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
+	req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF);
 }
 
 static void nvmet_set_error(struct nvmet_req *req, u16 status)
@@ -656,7 +656,7 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status)
 	struct nvme_error_slot *new_error_slot;
 	unsigned long flags;
 
-	req->rsp->status = cpu_to_le16(status << 1);
+	req->cqe->status = cpu_to_le16(status << 1);
 
 	if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC)
 		return;
@@ -676,15 +676,15 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status)
 	spin_unlock_irqrestore(&ctrl->error_lock, flags);
 
 	/* set the more bit for this request */
-	req->rsp->status |= cpu_to_le16(1 << 14);
+	req->cqe->status |= cpu_to_le16(1 << 14);
 }
 
 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 {
 	if (!req->sq->sqhd_disabled)
 		nvmet_update_sq_head(req);
-	req->rsp->sq_id = cpu_to_le16(req->sq->qid);
-	req->rsp->command_id = req->cmd->common.command_id;
+	req->cqe->sq_id = cpu_to_le16(req->sq->qid);
+	req->cqe->command_id = req->cmd->common.command_id;
 
 	if (unlikely(status))
 		nvmet_set_error(req, status);
@@ -841,8 +841,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 	req->sg = NULL;
 	req->sg_cnt = 0;
 	req->transfer_len = 0;
-	req->rsp->status = 0;
-	req->rsp->sq_head = 0;
+	req->cqe->status = 0;
+	req->cqe->sq_head = 0;
 	req->ns = NULL;
 	req->error_loc = NVMET_NO_ERROR_LOC;
 	req->error_slba = 0;
@@ -1069,7 +1069,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
 	if (!subsys) {
 		pr_warn("connect request for invalid subsystem %s!\n",
 			subsysnqn);
-		req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
+		req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 		return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 	}
 
@@ -1090,7 +1090,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
 
 	pr_warn("could not find controller %d for subsys %s / host %s\n",
 		cntlid, subsysnqn, hostnqn);
-	req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
+	req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
 	status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 
 out:
@@ -1188,7 +1188,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 	if (!subsys) {
 		pr_warn("connect request for invalid subsystem %s!\n",
 			subsysnqn);
-		req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
+		req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 		goto out;
 	}
 
@@ -1197,7 +1197,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 	if (!nvmet_host_allowed(subsys, hostnqn)) {
 		pr_info("connect by host %s for subsystem %s not allowed\n",
 			hostnqn, subsysnqn);
-		req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
+		req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
 		up_read(&nvmet_config_sem);
 		status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
 		goto out_put_subsystem;
diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c
index 3a76ebc3d155..3b9f79aba98f 100644
--- a/drivers/nvme/target/fabrics-cmd.c
+++ b/drivers/nvme/target/fabrics-cmd.c
@@ -72,7 +72,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req)
 			offsetof(struct nvmf_property_get_command, attrib);
 	}
 
-	req->rsp->result.u64 = cpu_to_le64(val);
+	req->cqe->result.u64 = cpu_to_le64(val);
 	nvmet_req_complete(req, status);
 }
 
@@ -124,7 +124,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req)
 
 	if (c->cattr & NVME_CONNECT_DISABLE_SQFLOW) {
 		req->sq->sqhd_disabled = true;
-		req->rsp->sq_head = cpu_to_le16(0xffff);
+		req->cqe->sq_head = cpu_to_le16(0xffff);
 	}
 
 	if (ctrl->ops->install_queue) {
@@ -158,7 +158,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 		goto out;
 
 	/* zero out initial completion result, assign values as needed */
-	req->rsp->result.u32 = 0;
+	req->cqe->result.u32 = 0;
 
 	if (c->recfmt != 0) {
 		pr_warn("invalid connect version (%d).\n",
@@ -172,7 +172,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 		pr_warn("connect attempt for invalid controller ID %#x\n",
 			d->cntlid);
 		status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
-		req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
+		req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
 		goto out;
 	}
 
@@ -195,7 +195,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req)
 
 	pr_info("creating controller %d for subsystem %s for NQN %s.\n",
 		ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn);
-	req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
+	req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
 
 out:
 	kfree(d);
@@ -222,7 +222,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
 		goto out;
 
 	/* zero out initial completion result, assign values as needed */
-	req->rsp->result.u32 = 0;
+	req->cqe->result.u32 = 0;
 
 	if (c->recfmt != 0) {
 		pr_warn("invalid connect version (%d).\n",
@@ -240,14 +240,14 @@ static void nvmet_execute_io_connect(struct nvmet_req *req)
 	if (unlikely(qid > ctrl->subsys->max_qid)) {
 		pr_warn("invalid queue id (%d)\n", qid);
 		status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
-		req->rsp->result.u32 = IPO_IATTR_CONNECT_SQE(qid);
+		req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(qid);
 		goto out_ctrl_put;
 	}
 
 	status = nvmet_install_queue(ctrl, req);
 	if (status) {
 		/* pass back cntlid that had the issue of installing queue */
-		req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid);
+		req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid);
 		goto out_ctrl_put;
 	}
 
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 9369a11fe7a9..508661af0f50 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -2184,7 +2184,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport,
 	}
 
 	fod->req.cmd = &fod->cmdiubuf.sqe;
-	fod->req.rsp = &fod->rspiubuf.cqe;
+	fod->req.cqe = &fod->rspiubuf.cqe;
 	fod->req.port = tgtport->pe->port;
 
 	/* clear any response payload */
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index b9f623ab01f3..a3ae491fa20e 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -18,7 +18,7 @@
 struct nvme_loop_iod {
 	struct nvme_request	nvme_req;
 	struct nvme_command	cmd;
-	struct nvme_completion	rsp;
+	struct nvme_completion	cqe;
 	struct nvmet_req	req;
 	struct nvme_loop_queue	*queue;
 	struct work_struct	work;
@@ -94,7 +94,7 @@ static void nvme_loop_queue_response(struct nvmet_req *req)
 {
 	struct nvme_loop_queue *queue =
 		container_of(req->sq, struct nvme_loop_queue, nvme_sq);
-	struct nvme_completion *cqe = req->rsp;
+	struct nvme_completion *cqe = req->cqe;
 
 	/*
 	 * AEN requests are special as they don't time out and can
@@ -207,7 +207,7 @@ static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
 		struct nvme_loop_iod *iod, unsigned int queue_idx)
 {
 	iod->req.cmd = &iod->cmd;
-	iod->req.rsp = &iod->rsp;
+	iod->req.cqe = &iod->cqe;
 	iod->queue = &ctrl->queues[queue_idx];
 	INIT_WORK(&iod->work, nvme_loop_execute_work);
 	return 0;
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 1653d19b187f..c25d88fc9dec 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -284,7 +284,7 @@ struct nvmet_fabrics_ops {
 
 struct nvmet_req {
 	struct nvme_command	*cmd;
-	struct nvme_completion	*rsp;
+	struct nvme_completion	*cqe;
 	struct nvmet_sq		*sq;
 	struct nvmet_cq		*cq;
 	struct nvmet_ns		*ns;
@@ -322,7 +322,7 @@ extern struct workqueue_struct *buffered_io_wq;
 
 static inline void nvmet_set_result(struct nvmet_req *req, u32 result)
 {
-	req->rsp->result.u32 = cpu_to_le32(result);
+	req->cqe->result.u32 = cpu_to_le32(result);
 }
 
 /*
diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c
index b7275218dfa5..36d906a7f70d 100644
--- a/drivers/nvme/target/rdma.c
+++ b/drivers/nvme/target/rdma.c
@@ -160,7 +160,7 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp)
 {
 	return !nvme_is_write(rsp->req.cmd) &&
 		rsp->req.transfer_len &&
-		!rsp->req.rsp->status &&
+		!rsp->req.cqe->status &&
 		!(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA);
 }
 
@@ -364,17 +364,17 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
 		struct nvmet_rdma_rsp *r)
 {
 	/* NVMe CQE / RDMA SEND */
-	r->req.rsp = kmalloc(sizeof(*r->req.rsp), GFP_KERNEL);
-	if (!r->req.rsp)
+	r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL);
+	if (!r->req.cqe)
 		goto out;
 
-	r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.rsp,
-			sizeof(*r->req.rsp), DMA_TO_DEVICE);
+	r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.cqe,
+			sizeof(*r->req.cqe), DMA_TO_DEVICE);
 	if (ib_dma_mapping_error(ndev->device, r->send_sge.addr))
 		goto out_free_rsp;
 
 	r->req.p2p_client = &ndev->device->dev;
-	r->send_sge.length = sizeof(*r->req.rsp);
+	r->send_sge.length = sizeof(*r->req.cqe);
 	r->send_sge.lkey = ndev->pd->local_dma_lkey;
 
 	r->send_cqe.done = nvmet_rdma_send_done;
@@ -389,7 +389,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
 	return 0;
 
 out_free_rsp:
-	kfree(r->req.rsp);
+	kfree(r->req.cqe);
 out:
 	return -ENOMEM;
 }
@@ -398,8 +398,8 @@ static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
 		struct nvmet_rdma_rsp *r)
 {
 	ib_dma_unmap_single(ndev->device, r->send_sge.addr,
-				sizeof(*r->req.rsp), DMA_TO_DEVICE);
-	kfree(r->req.rsp);
+				sizeof(*r->req.cqe), DMA_TO_DEVICE);
+	kfree(r->req.cqe);
 }
 
 static int
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 0a941abf56ec..17cf137dc88c 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -161,14 +161,14 @@ static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd)
 
 static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd)
 {
-	return nvmet_tcp_has_data_in(cmd) && !cmd->req.rsp->status;
+	return nvmet_tcp_has_data_in(cmd) && !cmd->req.cqe->status;
 }
 
 static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd)
 {
 	return !nvme_is_write(cmd->req.cmd) &&
 		cmd->req.transfer_len > 0 &&
-		!cmd->req.rsp->status;
+		!cmd->req.cqe->status;
 }
 
 static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd)
@@ -378,7 +378,7 @@ static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd)
 	pdu->hdr.plen =
 		cpu_to_le32(pdu->hdr.hlen + hdgst +
 				cmd->req.transfer_len + ddgst);
-	pdu->command_id = cmd->req.rsp->command_id;
+	pdu->command_id = cmd->req.cqe->command_id;
 	pdu->data_length = cpu_to_le32(cmd->req.transfer_len);
 	pdu->data_offset = cpu_to_le32(cmd->wbytes_done);
 
@@ -1224,7 +1224,7 @@ static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue,
 			sizeof(*c->rsp_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
 	if (!c->rsp_pdu)
 		goto out_free_cmd;
-	c->req.rsp = &c->rsp_pdu->cqe;
+	c->req.cqe = &c->rsp_pdu->cqe;
 
 	c->data_pdu = page_frag_alloc(&queue->pf_cache,
 			sizeof(*c->data_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO);
-- 
cgit v1.2.3


From 6b7e631b927ca1266b2695307ab71ed7764af75e Mon Sep 17 00:00:00 2001
From: Minwoo Im <minwoo.im.dev@gmail.com>
Date: Sun, 7 Apr 2019 15:28:06 +0900
Subject: nvmet: return a specified error it subsys_alloc fails

nvmet_subsys_alloc() returns its pointer or NULL if it fails.  We can
see three different steps in this function:
  1. memory allocation
  2. argument check
  3. memory allocation for string

But now the callers of this function do not seem to handle case 2 by
returning -ENOMEM only even if it fails with an invalid parameter.

This patch specifies error codes so that caller can pass it to its own
caller.

Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/configfs.c  | 4 ++--
 drivers/nvme/target/core.c      | 6 +++---
 drivers/nvme/target/discovery.c | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index adb79545cdd7..08dd5af357f7 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -898,8 +898,8 @@ static struct config_group *nvmet_subsys_make(struct config_group *group,
 	}
 
 	subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME);
-	if (!subsys)
-		return ERR_PTR(-ENOMEM);
+	if (IS_ERR(subsys))
+		return ERR_CAST(subsys);
 
 	config_group_init_type_name(&subsys->group, name, &nvmet_subsys_type);
 
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 1c1776c3e316..24e0a36392d9 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -1367,7 +1367,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 
 	subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
 	if (!subsys)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
 	subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
 	/* generate a random serial number as our controllers are ephemeral: */
@@ -1383,14 +1383,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 	default:
 		pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
 		kfree(subsys);
-		return NULL;
+		return ERR_PTR(-EINVAL);
 	}
 	subsys->type = type;
 	subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
 			GFP_KERNEL);
 	if (!subsys->subsysnqn) {
 		kfree(subsys);
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	}
 
 	kref_init(&subsys->ref);
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index 33ed95e72d6b..e8e09266bfa5 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -372,8 +372,8 @@ int __init nvmet_init_discovery(void)
 {
 	nvmet_disc_subsys =
 		nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_DISC);
-	if (!nvmet_disc_subsys)
-		return -ENOMEM;
+	if (IS_ERR(nvmet_disc_subsys))
+		return PTR_ERR(nvmet_disc_subsys);
 	return 0;
 }
 
-- 
cgit v1.2.3


From a5dffbb66d250a7ef07e27a2e75b8d9d7af2ab41 Mon Sep 17 00:00:00 2001
From: "Enrico Weigelt, metux IT consult" <info@metux.net>
Date: Wed, 24 Apr 2019 12:34:39 +0200
Subject: nvmet: include <linux/scatterlist.h>

Build breaks:

    drivers/nvme/target/core.c: In function 'nvmet_req_alloc_sgl':
    drivers/nvme/target/core.c:939:12: error: implicit declaration of \
function 'sgl_alloc'; did you mean 'bio_alloc'? \
[-Werror=implicit-function-declaration]
      req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
                ^~~~~~~~~
                bio_alloc
    drivers/nvme/target/core.c:939:10: warning: assignment makes pointer \
from integer without a cast [-Wint-conversion]
      req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
              ^
    drivers/nvme/target/core.c: In function 'nvmet_req_free_sgl':
    drivers/nvme/target/core.c:952:3: error: implicit declaration of \
function 'sgl_free'; did you mean 'ida_free'? [-Werror=implicit-function-declaration]
       sgl_free(req->sg);
       ^~~~~~~~
       ida_free

Cause:

    1. missing include to <linux/scatterlist.h>
    2. SGL_ALLOC needs to be enabled

Therefore adding the missing include, as well as Kconfig dependency.

Signed-off-by: Enrico Weigelt, metux IT consult <info@metux.net>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Minwoo Im <minwoo.im@samsung.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/Kconfig | 1 +
 drivers/nvme/target/core.c  | 1 +
 2 files changed, 2 insertions(+)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index d94f25cde019..3ef0a4e5eed6 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -3,6 +3,7 @@ config NVME_TARGET
 	tristate "NVMe Target support"
 	depends on BLOCK
 	depends on CONFIGFS_FS
+	select SGL_ALLOC
 	help
 	  This enabled target side support for the NVMe protocol, that is
 	  it allows the Linux kernel to implement NVMe subsystems and
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 24e0a36392d9..7734a6acff85 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -8,6 +8,7 @@
 #include <linux/random.h>
 #include <linux/rculist.h>
 #include <linux/pci-p2pdma.h>
+#include <linux/scatterlist.h>
 
 #include "nvmet.h"
 
-- 
cgit v1.2.3


From 525ec495e021068aa8635a0e18ff60695f5b1f4f Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 24 Apr 2019 11:43:23 -0700
Subject: nvmet-file: clamp-down file namespace lba_shift

When the backing file is a tempfile for example, the inode i_blkbits
can be 1M in size which causes problems for hosts to support as the
disk block size. Instead, expose the minimum between i_blkbits and
12 (4K sector size).

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by:- Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/io-cmd-file.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c
index bc6ebb51b0bf..05453f5d1448 100644
--- a/drivers/nvme/target/io-cmd-file.c
+++ b/drivers/nvme/target/io-cmd-file.c
@@ -49,7 +49,12 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns)
 		goto err;
 
 	ns->size = stat.size;
-	ns->blksize_shift = file_inode(ns->file)->i_blkbits;
+	/*
+	 * i_blkbits can be greater than the universally accepted upper bound,
+	 * so make sure we export a sane namespace lba_shift.
+	 */
+	ns->blksize_shift = min_t(u8,
+			file_inode(ns->file)->i_blkbits, 12);
 
 	ns->bvec_cache = kmem_cache_create("nvmet-bvec",
 			NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec),
-- 
cgit v1.2.3


From 569b3d3db1aac8586a16df1745c9e5a99ff47253 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 24 Apr 2019 11:53:16 -0700
Subject: nvmet-tcp: don't fail maxr2t greater than 1

The host may support it, but nothing prevents us from
sending a single r2t at a time like we do anyways.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/tcp.c | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 17cf137dc88c..69b83fa0c76c 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -774,12 +774,6 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue)
 		return -EPROTO;
 	}
 
-	if (icreq->maxr2t != 0) {
-		pr_err("queue %d: unsupported maxr2t %d\n", queue->idx,
-			le32_to_cpu(icreq->maxr2t) + 1);
-		return -EPROTO;
-	}
-
 	queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE);
 	queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE);
 	if (queue->hdr_digest || queue->data_digest) {
-- 
cgit v1.2.3


From 7a42589654ae79e1177f0d74306a02d6cef7bddf Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 24 Apr 2019 11:53:17 -0700
Subject: nvme-tcp: fix a NULL deref when an admin connect times out

If we timeout the admin startup sequence we might not yet have
an I/O tagset allocated which causes the teardown sequence to crash.
Make nvme_tcp_teardown_io_queues safe by not iterating inflight tags
if the tagset wasn't allocated.

Fixes: 39d57757467b ("nvme-tcp: fix timeout handler")
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 68c49dd67210..aae5374d2b93 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1710,7 +1710,9 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl,
 {
 	blk_mq_quiesce_queue(ctrl->admin_q);
 	nvme_tcp_stop_queue(ctrl, 0);
-	blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_cancel_request, ctrl);
+	if (ctrl->admin_tagset)
+		blk_mq_tagset_busy_iter(ctrl->admin_tagset,
+			nvme_cancel_request, ctrl);
 	blk_mq_unquiesce_queue(ctrl->admin_q);
 	nvme_tcp_destroy_admin_queue(ctrl, remove);
 }
@@ -1722,7 +1724,9 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl,
 		return;
 	nvme_stop_queues(ctrl);
 	nvme_tcp_stop_io_queues(ctrl);
-	blk_mq_tagset_busy_iter(ctrl->tagset, nvme_cancel_request, ctrl);
+	if (ctrl->tagset)
+		blk_mq_tagset_busy_iter(ctrl->tagset,
+			nvme_cancel_request, ctrl);
 	if (remove)
 		nvme_start_queues(ctrl);
 	nvme_tcp_destroy_io_queues(ctrl, remove);
-- 
cgit v1.2.3


From 1007709d7d06fab09bf2d007657575958676282b Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 24 Apr 2019 11:53:18 -0700
Subject: nvme-rdma: fix a NULL deref when an admin connect times out

If we timeout the admin startup sequence we might not yet have
an I/O tagset allocated which causes the teardown sequence to crash.
Make nvme_tcp_teardown_io_queues safe by not iterating inflight tags
if the tagset wasn't allocated.

Fixes: 4c174e636674 ("nvme-rdma: fix timeout handler")
Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/rdma.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 11a5ecae78c8..e1824c2e0a1c 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -914,8 +914,9 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl,
 {
 	blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
 	nvme_rdma_stop_queue(&ctrl->queues[0]);
-	blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request,
-			&ctrl->ctrl);
+	if (ctrl->ctrl.admin_tagset)
+		blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset,
+			nvme_cancel_request, &ctrl->ctrl);
 	blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
 	nvme_rdma_destroy_admin_queue(ctrl, remove);
 }
@@ -926,8 +927,9 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl,
 	if (ctrl->ctrl.queue_count > 1) {
 		nvme_stop_queues(&ctrl->ctrl);
 		nvme_rdma_stop_io_queues(ctrl);
-		blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request,
-				&ctrl->ctrl);
+		if (ctrl->ctrl.tagset)
+			blk_mq_tagset_busy_iter(ctrl->ctrl.tagset,
+				nvme_cancel_request, &ctrl->ctrl);
 		if (remove)
 			nvme_start_queues(&ctrl->ctrl);
 		nvme_rdma_destroy_io_queues(ctrl, remove);
-- 
cgit v1.2.3


From efb973b19b88642bb7e08b8ce8e03b0bbd2a7e2a Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Wed, 24 Apr 2019 11:53:19 -0700
Subject: nvme-tcp: rename function to have nvme_tcp prefix

usually nvme_ prefix is for core functions.
While we're cleaning up, remove redundant empty lines

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Minwoo Im <minwoo.im@samsung.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index aae5374d2b93..2405bb9c63cc 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -473,7 +473,6 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue,
 	}
 
 	return 0;
-
 }
 
 static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue,
@@ -634,7 +633,6 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status)
 	nvme_end_request(rq, cpu_to_le16(status << 1), res);
 }
 
-
 static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb,
 			      unsigned int *offset, size_t *len)
 {
@@ -1535,7 +1533,7 @@ out_free_queue:
 	return ret;
 }
 
-static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
+static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
 {
 	int i, ret;
 
@@ -1565,7 +1563,7 @@ static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
 	return nr_io_queues;
 }
 
-static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl)
+static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
 {
 	unsigned int nr_io_queues;
 	int ret;
@@ -1582,7 +1580,7 @@ static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl)
 	dev_info(ctrl->device,
 		"creating %d I/O queues.\n", nr_io_queues);
 
-	return nvme_tcp_alloc_io_queues(ctrl);
+	return __nvme_tcp_alloc_io_queues(ctrl);
 }
 
 static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove)
@@ -1599,7 +1597,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new)
 {
 	int ret;
 
-	ret = nvme_alloc_io_queues(ctrl);
+	ret = nvme_tcp_alloc_io_queues(ctrl);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3


From 663d6fee66b555f6a080104751be0b54e0bca78a Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 15 Apr 2019 09:51:46 +0800
Subject: nvme-loop: kill timeout handler

Firstly it doesn't make sense to handle timeout for loop: 1) for admin
queue, the request is always completed in code path of queuing IO. 2)
for normal IO request, the timeout on these IOs have been handled by
underlying queue already.

Secondly nvme-loop's timeout handler is simply broken, and easy to
cause issue: 1) no any sync/protection between timeout and normal
completion, and now it is driver's responsibility to deal with
that; 2) bad reset implementation, blk_mq_update_nr_hw_queues()
is called after all NSs's queue is stopped(quiesced), and easy
to trigger deadlock.

So kill the timeout handler.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Reviewd-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/loop.c | 16 ----------------
 1 file changed, 16 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index a3ae491fa20e..9e211ad6bdd3 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -129,20 +129,6 @@ static void nvme_loop_execute_work(struct work_struct *work)
 	nvmet_req_execute(&iod->req);
 }
 
-static enum blk_eh_timer_return
-nvme_loop_timeout(struct request *rq, bool reserved)
-{
-	struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
-
-	/* queue error recovery */
-	nvme_reset_ctrl(&iod->queue->ctrl->ctrl);
-
-	/* fail with DNR on admin cmd timeout */
-	nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
-
-	return BLK_EH_DONE;
-}
-
 static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
 		const struct blk_mq_queue_data *bd)
 {
@@ -253,7 +239,6 @@ static const struct blk_mq_ops nvme_loop_mq_ops = {
 	.complete	= nvme_loop_complete_rq,
 	.init_request	= nvme_loop_init_request,
 	.init_hctx	= nvme_loop_init_hctx,
-	.timeout	= nvme_loop_timeout,
 };
 
 static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
@@ -261,7 +246,6 @@ static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
 	.complete	= nvme_loop_complete_rq,
 	.init_request	= nvme_loop_init_request,
 	.init_hctx	= nvme_loop_init_admin_hctx,
-	.timeout	= nvme_loop_timeout,
 };
 
 static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
-- 
cgit v1.2.3


From 01fa017484ad98fccdeaab32db0077c574b6bd6f Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 11 Mar 2019 15:02:25 -0700
Subject: nvme: set 0 capacity if namespace block size exceeds PAGE_SIZE

If our target exposed a namespace with a block size that is greater
than PAGE_SIZE, set 0 capacity on the namespace as we do not support it.

This issue encountered when the nvmet namespace was backed by a tempfile.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 248ff3b48041..3dd043aa6d1f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1591,6 +1591,10 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
 	unsigned short bs = 1 << ns->lba_shift;
 
+	if (ns->lba_shift > PAGE_SHIFT) {
+		/* unsupported block size, set capacity to 0 later */
+		bs = (1 << 9);
+	}
 	blk_mq_freeze_queue(disk->queue);
 	blk_integrity_unregister(disk);
 
@@ -1601,7 +1605,8 @@ static void nvme_update_disk_info(struct gendisk *disk,
 	if (ns->ms && !ns->ext &&
 	    (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED))
 		nvme_init_integrity(disk, ns->ms, ns->pi_type);
-	if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk))
+	if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) ||
+	    ns->lba_shift > PAGE_SHIFT)
 		capacity = 0;
 
 	set_capacity(disk, capacity);
-- 
cgit v1.2.3


From f34e25898a608380a60135288019c4cb6013bec8 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 29 Apr 2019 16:25:48 -0700
Subject: nvme-tcp: fix possible null deref on a timed out io queue connect

If I/O queue connect times out, we might have freed the queue socket
already, so check for that on the error path in nvme_tcp_start_queue.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/tcp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 2405bb9c63cc..2b107a1d152b 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1423,7 +1423,8 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx)
 	if (!ret) {
 		set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags);
 	} else {
-		__nvme_tcp_stop_queue(&ctrl->queues[idx]);
+		if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags))
+			__nvme_tcp_stop_queue(&ctrl->queues[idx]);
 		dev_err(nctrl->device,
 			"failed to connect queue: %d ret=%d\n", idx, ret);
 	}
-- 
cgit v1.2.3


From 525aa5a705d86e193726ee465d1a975265fabf19 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Tue, 30 Apr 2019 18:57:09 +0200
Subject: nvme-multipath: split bios with the ns_head bio_set before submitting

If the bio is moved to a different queue via blk_steal_bios() and
the original queue is destroyed in nvme_remove_ns() we'll be ending
with a crash in bio_endio() as the mempool for the split bio bvecs
had already been destroyed.
So split the bio using the original queue (which will remain during the
lifetime of the bio) before sending it down to the underlying device.

Signed-off-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/multipath.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index f0716f6ce41f..e6ddc83223df 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -232,6 +232,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
 	blk_qc_t ret = BLK_QC_T_NONE;
 	int srcu_idx;
 
+	/*
+	 * The namespace might be going away and the bio might
+	 * be moved to a different queue via blk_steal_bios(),
+	 * so we need to use the bio_split pool from the original
+	 * queue to allocate the bvecs from.
+	 */
+	blk_queue_split(q, &bio);
+
 	srcu_idx = srcu_read_lock(&head->srcu);
 	ns = nvme_find_path(head);
 	if (likely(ns)) {
-- 
cgit v1.2.3


From 592b6e7b0226b198c12439065f725be00c92d559 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.com>
Date: Sun, 28 Apr 2019 20:24:42 -0700
Subject: nvme-multipath: don't print ANA group state by default

Signed-off-by: Hannes Reinecke <hare@suse.com>
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/multipath.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index e6ddc83223df..5c9429d41120 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -429,7 +429,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
 	unsigned *nr_change_groups = data;
 	struct nvme_ns *ns;
 
-	dev_info(ctrl->device, "ANA group %d: %s.\n",
+	dev_dbg(ctrl->device, "ANA group %d: %s.\n",
 			le32_to_cpu(desc->grpid),
 			nvme_ana_state_names[desc->state]);
 
-- 
cgit v1.2.3


From 049bf37262c61c99f45438910711b55054b24838 Mon Sep 17 00:00:00 2001
From: Klaus Birkelund Jensen <klaus@birkelund.eu>
Date: Tue, 30 Apr 2019 18:53:29 +0200
Subject: nvme-pci: fix psdt field for single segment sgls

The shortcut for single segment SGL requests did not set the PSDT field
to mark the request as using SGLs.

Fixes: 297910571f08 ("nvme-pci: optimize mapping single segment requests using SGLs")
Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index c1eecde6b853..efc1da56521c 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -830,6 +830,7 @@ static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev,
 		return BLK_STS_RESOURCE;
 	iod->dma_len = bv->bv_len;
 
+	cmnd->flags = NVME_CMD_SGL_METABUF;
 	cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma);
 	cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len);
 	cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4;
-- 
cgit v1.2.3


From 9dc1a38ef1925d23c2933c5867df816386d92ff8 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 30 Apr 2019 09:33:40 -0600
Subject: nvme-pci: shutdown on timeout during deletion

We do not restart a controller in a deleting state for timeout errors.
When in this state, unblock potential request dispatchers with failed
completions by shutting down the controller on timeout detection.

Reported-by: Yufen Yu <yuyufen@huawei.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index efc1da56521c..3df0f2b29427 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1277,6 +1277,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	struct nvme_dev *dev = nvmeq->dev;
 	struct request *abort_req;
 	struct nvme_command cmd;
+	bool shutdown = false;
 	u32 csts = readl(dev->bar + NVME_REG_CSTS);
 
 	/* If PCI error recovery process is happening, we cannot reset or
@@ -1313,12 +1314,14 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 	 * shutdown, so we return BLK_EH_DONE.
 	 */
 	switch (dev->ctrl.state) {
+	case NVME_CTRL_DELETING:
+		shutdown = true;
 	case NVME_CTRL_CONNECTING:
 	case NVME_CTRL_RESETTING:
 		dev_warn_ratelimited(dev->ctrl.device,
 			 "I/O %d QID %d timeout, disable controller\n",
 			 req->tag, nvmeq->qid);
-		nvme_dev_disable(dev, false);
+		nvme_dev_disable(dev, shutdown);
 		nvme_req(req)->flags |= NVME_REQ_CANCELLED;
 		return BLK_EH_DONE;
 	default:
-- 
cgit v1.2.3


From c8e9e9b7646ebe1c5066ddc420d7630876277eb4 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 30 Apr 2019 09:33:41 -0600
Subject: nvme-pci: unquiesce admin queue on shutdown

Just like IO queues, the admin queue also will not be restarted after a
controller shutdown. Unquiesce this queue so that we do not block
request dispatch on a permanently disabled controller.

Reported-by: Yufen Yu <yuyufen@huawei.com>
Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 3df0f2b29427..ac10d3ad1e75 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2437,8 +2437,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
 	 * must flush all entered requests to their failed completion to avoid
 	 * deadlocking blk-mq hot-cpu notifier.
 	 */
-	if (shutdown)
+	if (shutdown) {
 		nvme_start_queues(&dev->ctrl);
+		if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q))
+			blk_mq_unquiesce_queue(dev->ctrl.admin_q);
+	}
 	mutex_unlock(&dev->shutdown_lock);
 }
 
-- 
cgit v1.2.3


From 665648673ef5384c7194ea6df4b55f2da98646cf Mon Sep 17 00:00:00 2001
From: Minwoo Im <minwoo.im.dev@gmail.com>
Date: Fri, 12 Apr 2019 00:52:39 +0900
Subject: nvme-pci: remove an unneeded variable initialization

Variable "n" will be assigned once kstrtoint() succeeds, otherwise it
will not be referred because kstrtoint() will return an error which
means go out from this function.

Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index ac10d3ad1e75..e2ff92de41a7 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -146,7 +146,7 @@ static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
 
 static int queue_count_set(const char *val, const struct kernel_param *kp)
 {
-	int n = 0, ret;
+	int n, ret;
 
 	ret = kstrtoint(val, 10, &n);
 	if (ret)
-- 
cgit v1.2.3


From a97234e1ff1ec9d5a41c6adff5632c61639dee6a Mon Sep 17 00:00:00 2001
From: Minwoo Im <minwoo.im.dev@gmail.com>
Date: Fri, 12 Apr 2019 00:18:32 +0900
Subject: nvme-pci: check more command sizes

All the NVMe command has 64bytes fixed size so that it has been assured
with BUILD_BUG_ON().  The remaining command structures in linux/nvme.h
also need to be checked here.

Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/pci.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e2ff92de41a7..9c1a8fd68b3a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -231,19 +231,26 @@ struct nvme_iod {
  */
 static inline void _nvme_check_size(void)
 {
+	BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_identify) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
 	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
 	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
 	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
 	BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
 }
 
 static unsigned int max_io_queues(void)
-- 
cgit v1.2.3


From a2faf94e57c5237a9cad31f63eeaf2412ed0e951 Mon Sep 17 00:00:00 2001
From: Minwoo Im <minwoo.im.dev@gmail.com>
Date: Fri, 12 Apr 2019 00:18:31 +0900
Subject: nvme-fabrics: check more command sizes

struct common_command provides a common structure for NVMe-oF command
format.  It also needs to be checked for unintended size growth.

Signed-off-by: Minwoo Im <minwoo.im.dev@gmail.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/host/fabrics.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index d4cb826f58ff..592d1e61ef7e 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -1188,6 +1188,7 @@ static void __exit nvmf_exit(void)
 	class_destroy(nvmf_class);
 	nvmf_host_put(nvmf_default_host);
 
+	BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64);
 	BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64);
-- 
cgit v1.2.3


From 811015409fd4af80bbecb8e46b3aa24c8986fb74 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 30 Apr 2019 11:36:52 -0400
Subject: nvme: move command size checks to the core

Most command aren't PCIe specific, so move the size checking for them
to core.c

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/core.c | 27 +++++++++++++++++++++++++++
 drivers/nvme/host/pci.c  | 31 +++----------------------------
 2 files changed, 30 insertions(+), 28 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3dd043aa6d1f..e970c5adee28 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3879,10 +3879,37 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
 }
 EXPORT_SYMBOL_GPL(nvme_start_queues);
 
+/*
+ * Check we didn't inadvertently grow the command structure sizes:
+ */
+static inline void _nvme_check_size(void)
+{
+	BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_identify) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
+	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
+	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
+	BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
+}
+
+
 int __init nvme_core_init(void)
 {
 	int result = -ENOMEM;
 
+	_nvme_check_size();
+
 	nvme_wq = alloc_workqueue("nvme-wq",
 			WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
 	if (!nvme_wq)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 9c1a8fd68b3a..3e4fb891a95a 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -226,33 +226,6 @@ struct nvme_iod {
 	struct scatterlist *sg;
 };
 
-/*
- * Check we didin't inadvertently grow the command struct
- */
-static inline void _nvme_check_size(void)
-{
-	BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_identify) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_features) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_command) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE);
-	BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE);
-	BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512);
-	BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64);
-	BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64);
-}
-
 static unsigned int max_io_queues(void)
 {
 	return num_possible_cpus() + write_queues + poll_queues;
@@ -2988,6 +2961,9 @@ static struct pci_driver nvme_driver = {
 
 static int __init nvme_init(void)
 {
+	BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64);
+	BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64);
 	BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2);
 	return pci_register_driver(&nvme_driver);
 }
@@ -2996,7 +2972,6 @@ static void __exit nvme_exit(void)
 {
 	pci_unregister_driver(&nvme_driver);
 	flush_workqueue(nvme_wq);
-	_nvme_check_size();
 }
 
 MODULE_AUTHOR("Matthew Wilcox <willy@linux.intel.com>");
-- 
cgit v1.2.3


From 893a74b7a76e6e9c5c7199e6aae946f090622fa2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 30 Apr 2019 11:37:43 -0400
Subject: nvme: mark nvme_core_init and nvme_core_exit static

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
---
 drivers/nvme/host/core.c | 4 ++--
 drivers/nvme/host/nvme.h | 3 ---
 2 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index e970c5adee28..cd16d98d1f1a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3904,7 +3904,7 @@ static inline void _nvme_check_size(void)
 }
 
 
-int __init nvme_core_init(void)
+static int __init nvme_core_init(void)
 {
 	int result = -ENOMEM;
 
@@ -3956,7 +3956,7 @@ out:
 	return result;
 }
 
-void __exit nvme_core_exit(void)
+static void __exit nvme_core_exit(void)
 {
 	ida_destroy(&nvme_subsystems_ida);
 	class_destroy(nvme_subsys_class);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 527d64545023..5ee75b5ff83f 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -577,7 +577,4 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev)
 	return dev_to_disk(dev)->private_data;
 }
 
-int __init nvme_core_init(void);
-void __exit nvme_core_exit(void);
-
 #endif /* _NVME_H */
-- 
cgit v1.2.3


From 6f53e73b9ec5b3cd097077c5ffcb76df708ce3f8 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Mon, 29 Apr 2019 16:28:19 -0700
Subject: nvmet: protect discovery change log event list iteration

When we iterate on the discovery subsystem controllers
we need to protect against concurrent mutations to it.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Minwoo Im <minwoo.im@samsung.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/discovery.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index e8e09266bfa5..5baf269f3f8a 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -30,14 +30,17 @@ void nvmet_port_disc_changed(struct nvmet_port *port,
 {
 	struct nvmet_ctrl *ctrl;
 
+	lockdep_assert_held(&nvmet_config_sem);
 	nvmet_genctr++;
 
+	mutex_lock(&nvmet_disc_subsys->lock);
 	list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) {
 		if (subsys && !nvmet_host_allowed(subsys, ctrl->hostnqn))
 			continue;
 
 		__nvmet_disc_changed(port, ctrl);
 	}
+	mutex_unlock(&nvmet_disc_subsys->lock);
 }
 
 static void __nvmet_subsys_disc_changed(struct nvmet_port *port,
@@ -46,12 +49,14 @@ static void __nvmet_subsys_disc_changed(struct nvmet_port *port,
 {
 	struct nvmet_ctrl *ctrl;
 
+	mutex_lock(&nvmet_disc_subsys->lock);
 	list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) {
 		if (host && strcmp(nvmet_host_name(host), ctrl->hostnqn))
 			continue;
 
 		__nvmet_disc_changed(port, ctrl);
 	}
+	mutex_unlock(&nvmet_disc_subsys->lock);
 }
 
 void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys,
-- 
cgit v1.2.3