From d24de76af836260a99ca2ba281a937bd5bc55591 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 3 Jun 2020 07:14:43 +0200
Subject: block: remove the error argument to the block_bio_complete tracepoint

The status can be trivially derived from the bio itself.  That also avoid
callers like NVMe to incorrectly pass a blk_status_t instead of the errno,
and the overhead of translating the blk_status_t to the errno in the I/O
completion fast path when no tracing is enabled.

Fixes: 35fe0d12c8a3 ("nvme: trace bio completion")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/nvme.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index fa5c75501049..c0f4226d3299 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -599,8 +599,7 @@ static inline void nvme_trace_bio_complete(struct request *req,
 	struct nvme_ns *ns = req->q->queuedata;
 
 	if (req->cmd_flags & REQ_NVME_MPATH)
-		trace_block_bio_complete(ns->head->disk->queue,
-					 req->bio, status);
+		trace_block_bio_complete(ns->head->disk->queue, req->bio);
 }
 
 extern struct device_attribute dev_attr_ana_grpid;
-- 
cgit v1.2.3


From 108a58585b196e31822c597891f2c83cea7aeb4a Mon Sep 17 00:00:00 2001
From: Niklas Cassel <niklas.cassel@wdc.com>
Date: Sun, 7 Jun 2020 13:45:20 +0200
Subject: nvme: do not call del_gendisk() on a disk that was never added

device_add_disk() is negated by del_gendisk().
alloc_disk_node() is negated by put_disk().

In nvme_alloc_ns(), device_add_disk() is one of the last things being
called in the success case, and only void functions are being called
after this. Therefore this call should not be negated in the error path.

The superfluous call to del_gendisk() leads to the following prints:
[    7.839975] kobject: '(null)' (000000001ff73734): is not initialized, yet kobject_put() is being called.
[    7.840865] WARNING: CPU: 2 PID: 361 at lib/kobject.c:736 kobject_put+0x70/0x120

Fixes: 33cfdc2aa696 ("nvme: enforce extended LBA format for fabrics metadata")
Signed-off-by: Niklas Cassel <niklas.cassel@wdc.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/core.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 0585efa47d8f..c2c5bc4fb702 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3669,7 +3669,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	ns->disk = disk;
 
 	if (__nvme_revalidate_disk(disk, id))
-		goto out_free_disk;
+		goto out_put_disk;
 
 	if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
 		ret = nvme_nvm_register(ns, disk_name, node);
@@ -3696,8 +3696,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
 	/* prevent double queue cleanup */
 	ns->disk->queue = NULL;
 	put_disk(ns->disk);
- out_free_disk:
-	del_gendisk(ns->disk);
  out_unlink_ns:
 	mutex_lock(&ctrl->subsys->lock);
 	list_del_rcu(&ns->siblings);
-- 
cgit v1.2.3


From 6acbd9619b153f2881c91e84cd36a9a2ce124565 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Fri, 29 May 2020 00:25:07 +0200
Subject: nvme-tcp: constify nvme_tcp_mq_ops and nvme_tcp_admin_mq_ops

nvme_tcp_mq_ops and nvme_tcp_admin_mq_ops are never modified and can be
made const to allow the compiler to put them in read-only memory.

Before:
   text    data     bss     dec     hex filename
  53102    6885     576   60563    ec93 drivers/nvme/host/tcp.o

After:
   text    data     bss     dec     hex filename
  53422    6565     576   60563    ec93 drivers/nvme/host/tcp.o

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Max Gurtovoy <maxg@mellanox.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/tcp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 7c7c1886642f..7979fcf0c15f 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -131,8 +131,8 @@ struct nvme_tcp_ctrl {
 static LIST_HEAD(nvme_tcp_ctrl_list);
 static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
 static struct workqueue_struct *nvme_tcp_wq;
-static struct blk_mq_ops nvme_tcp_mq_ops;
-static struct blk_mq_ops nvme_tcp_admin_mq_ops;
+static const struct blk_mq_ops nvme_tcp_mq_ops;
+static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
 static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
 
 static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
@@ -2338,7 +2338,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
 	return queue->nr_cqe;
 }
 
-static struct blk_mq_ops nvme_tcp_mq_ops = {
+static const struct blk_mq_ops nvme_tcp_mq_ops = {
 	.queue_rq	= nvme_tcp_queue_rq,
 	.complete	= nvme_complete_rq,
 	.init_request	= nvme_tcp_init_request,
@@ -2349,7 +2349,7 @@ static struct blk_mq_ops nvme_tcp_mq_ops = {
 	.poll		= nvme_tcp_poll,
 };
 
-static struct blk_mq_ops nvme_tcp_admin_mq_ops = {
+static const struct blk_mq_ops nvme_tcp_admin_mq_ops = {
 	.queue_rq	= nvme_tcp_queue_rq,
 	.complete	= nvme_complete_rq,
 	.init_request	= nvme_tcp_init_request,
-- 
cgit v1.2.3


From a40aae6bbf8dea09fbe6353a5635f4027c43c796 Mon Sep 17 00:00:00 2001
From: Max Gurtovoy <maxg@mellanox.com>
Date: Mon, 1 Jun 2020 20:05:20 +0300
Subject: nvmet-tcp: constify nvmet_tcp_ops

nvmet_tcp_ops is never modified and can be made const to allow the
compiler to put it in read-only memory, as done in other transports.

Before:
   text    data     bss     dec     hex filename
  16164     160      12   16336    3fd0 drivers/nvme/target/tcp.o

After:
   text    data     bss     dec     hex filename
  16277      64      12   16353    3fe1 drivers/nvme/target/tcp.o

Signed-off-by: Max Gurtovoy <maxg@mellanox.com>
Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
Reviewed-by: Israel Rukshin <israelr@mellanox.com>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/tcp.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 6f557db0320d..9e4cb904ab27 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -153,7 +153,7 @@ static LIST_HEAD(nvmet_tcp_queue_list);
 static DEFINE_MUTEX(nvmet_tcp_queue_mutex);
 
 static struct workqueue_struct *nvmet_tcp_wq;
-static struct nvmet_fabrics_ops nvmet_tcp_ops;
+static const struct nvmet_fabrics_ops nvmet_tcp_ops;
 static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
 static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
 
@@ -1747,7 +1747,7 @@ static void nvmet_tcp_disc_port_addr(struct nvmet_req *req,
 	}
 }
 
-static struct nvmet_fabrics_ops nvmet_tcp_ops = {
+static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
 	.owner			= THIS_MODULE,
 	.type			= NVMF_TRTYPE_TCP,
 	.msdbd			= 1,
-- 
cgit v1.2.3


From c9c12e51b82b2bd0c59ac4e27ee5427f382a503f Mon Sep 17 00:00:00 2001
From: Daniel Wagner <dwagner@suse.de>
Date: Fri, 29 May 2020 13:37:40 +0200
Subject: nvme-fc: don't call nvme_cleanup_cmd() for AENs

Asynchronous event notifications do not have an associated request.
When fcp_io() fails we unconditionally call nvme_cleanup_cmd() which
leads to a crash.

Fixes: 16686f3a6c3c ("nvme: move common call to nvme_cleanup_cmd to core layer")
Signed-off-by: Daniel Wagner <dwagner@suse.de>
Reviewed-by: Himanshu Madhani <hmadhani2024@gmail.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/fc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index cb0007592c12..e999a8c4b7e8 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2634,10 +2634,11 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
 		opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
 		__nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
 
-		if (!(op->flags & FCOP_FLAGS_AEN))
+		if (!(op->flags & FCOP_FLAGS_AEN)) {
 			nvme_fc_unmap_data(ctrl, op->rq, op);
+			nvme_cleanup_cmd(op->rq);
+		}
 
-		nvme_cleanup_cmd(op->rq);
 		nvme_fc_ctrl_put(ctrl);
 
 		if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE &&
-- 
cgit v1.2.3


From b97120b15ebd3de51325084136d3b9c3cce656d6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 3 Jun 2020 08:24:17 +0200
Subject: nvme-pci: use simple suspend when a HMB is enabled

While the NVMe specification allows the device to access the host memory
buffer in host DRAM from all power states, hosts will fail access to
DRAM during S3 and similar power states.

Fixes: d916b1be94b6 ("nvme-pci: use host managed power state for suspend")
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/host/pci.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index d690d5593a80..e2bacd369a88 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2950,9 +2950,15 @@ static int nvme_suspend(struct device *dev)
 	 * the PCI bus layer to put it into D3 in order to take the PCIe link
 	 * down, so as to allow the platform to achieve its minimum low-power
 	 * state (which may not be possible if the link is up).
+	 *
+	 * If a host memory buffer is enabled, shut down the device as the NVMe
+	 * specification allows the device to access the host memory buffer in
+	 * host DRAM from all power states, but hosts will fail access to DRAM
+	 * during S3.
 	 */
 	if (pm_suspend_via_firmware() || !ctrl->npss ||
 	    !pcie_aspm_enabled(pdev) ||
+	    ndev->nr_host_mem_descs ||
 	    (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
 		return nvme_disable_prepare_reset(ndev, true);
 
-- 
cgit v1.2.3


From 819f7b88b48fd2bce932cfe3a38bf8fcefbcabe7 Mon Sep 17 00:00:00 2001
From: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Date: Tue, 9 Jun 2020 16:55:14 -0700
Subject: nvmet: fail outstanding host posted AEN req

In function nvmet_async_event_process() we only process AENs iff
there is an open slot on the ctrl->async_event_cmds[] && aen
event list posted by the target is not empty. This keeps host
posted AEN outstanding if target generated AEN list is empty.
We do cleanup the target generated entries from the aen list in
nvmet_ctrl_free()-> nvmet_async_events_free() but we don't
process AEN posted by the host. This leads to following problem :-

When processing admin sq at the time of nvmet_sq_destroy() holds
an extra percpu reference(atomic value = 1), so in the following code
path after switching to atomic rcu, release function (nvmet_sq_free())
is not getting called which blocks the sq->free_done in
nvmet_sq_destroy() :-

nvmet_sq_destroy()
 percpu_ref_kill_and_confirm()
 - __percpu_ref_switch_mode()
 --  __percpu_ref_switch_to_atomic()
 ---   call_rcu() -> percpu_ref_switch_to_atomic_rcu()
 ----     /* calls switch callback */
 - percpu_ref_put()
 -- percpu_ref_put_many(ref, 1)
 --- else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
 ----   ref->release(ref); <---- Not called.

This results in indefinite hang:-

  void nvmet_sq_destroy(struct nvmet_sq *sq)
...
          if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) {
                  nvmet_async_events_process(ctrl, status);
                  percpu_ref_put(&sq->ref);
          }
          percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
          wait_for_completion(&sq->confirm_done);
          wait_for_completion(&sq->free_done); <-- Hang here

Which breaks the further disconnect sequence. This problem seems to be
introduced after commit 64f5e9cdd711b ("nvmet: fix memory leak when
removing namespaces and controllers concurrently").

This patch processes ctrl->async_event_cmds[] in the admin sq destroy()
context irrespetive of aen_list. Also we get rid of the controller's
aen_list processing in the nvmet_sq_destroy() context and just ignore
ctrl->aen_list.

This results in nvmet_async_events_process() being called from workqueue
context so we adjust the code accordingly.

Fixes: 64f5e9cdd711 ("nvmet: fix memory leak when removing namespaces and controllers concurrently ")
Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/nvme/target/core.c | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

(limited to 'drivers/nvme')

diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index 6392bcd30bd7..6e2f623e472e 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -129,7 +129,22 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
 	return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
 }
 
-static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
+static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
+{
+	u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
+	struct nvmet_req *req;
+
+	mutex_lock(&ctrl->lock);
+	while (ctrl->nr_async_event_cmds) {
+		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
+		mutex_unlock(&ctrl->lock);
+		nvmet_req_complete(req, status);
+		mutex_lock(&ctrl->lock);
+	}
+	mutex_unlock(&ctrl->lock);
+}
+
+static void nvmet_async_events_process(struct nvmet_ctrl *ctrl)
 {
 	struct nvmet_async_event *aen;
 	struct nvmet_req *req;
@@ -139,15 +154,14 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
 		aen = list_first_entry(&ctrl->async_events,
 				       struct nvmet_async_event, entry);
 		req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
-		if (status == 0)
-			nvmet_set_result(req, nvmet_async_event_result(aen));
+		nvmet_set_result(req, nvmet_async_event_result(aen));
 
 		list_del(&aen->entry);
 		kfree(aen);
 
 		mutex_unlock(&ctrl->lock);
 		trace_nvmet_async_event(ctrl, req->cqe->result.u32);
-		nvmet_req_complete(req, status);
+		nvmet_req_complete(req, 0);
 		mutex_lock(&ctrl->lock);
 	}
 	mutex_unlock(&ctrl->lock);
@@ -170,7 +184,7 @@ static void nvmet_async_event_work(struct work_struct *work)
 	struct nvmet_ctrl *ctrl =
 		container_of(work, struct nvmet_ctrl, async_event_work);
 
-	nvmet_async_events_process(ctrl, 0);
+	nvmet_async_events_process(ctrl);
 }
 
 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
@@ -779,7 +793,6 @@ static void nvmet_confirm_sq(struct percpu_ref *ref)
 
 void nvmet_sq_destroy(struct nvmet_sq *sq)
 {
-	u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
 	struct nvmet_ctrl *ctrl = sq->ctrl;
 
 	/*
@@ -787,7 +800,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
 	 * queue doesn't have outstanding requests on it.
 	 */
 	if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
-		nvmet_async_events_process(ctrl, status);
+		nvmet_async_events_failall(ctrl);
 	percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
 	wait_for_completion(&sq->confirm_done);
 	wait_for_completion(&sq->free_done);
-- 
cgit v1.2.3