From d24de76af836260a99ca2ba281a937bd5bc55591 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Jun 2020 07:14:43 +0200 Subject: block: remove the error argument to the block_bio_complete tracepoint The status can be trivially derived from the bio itself. That also avoid callers like NVMe to incorrectly pass a blk_status_t instead of the errno, and the overhead of translating the blk_status_t to the errno in the I/O completion fast path when no tracing is enabled. Fixes: 35fe0d12c8a3 ("nvme: trace bio completion") Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- drivers/nvme/host/nvme.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index fa5c75501049..c0f4226d3299 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -599,8 +599,7 @@ static inline void nvme_trace_bio_complete(struct request *req, struct nvme_ns *ns = req->q->queuedata; if (req->cmd_flags & REQ_NVME_MPATH) - trace_block_bio_complete(ns->head->disk->queue, - req->bio, status); + trace_block_bio_complete(ns->head->disk->queue, req->bio); } extern struct device_attribute dev_attr_ana_grpid; -- cgit v1.2.3 From 108a58585b196e31822c597891f2c83cea7aeb4a Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Sun, 7 Jun 2020 13:45:20 +0200 Subject: nvme: do not call del_gendisk() on a disk that was never added device_add_disk() is negated by del_gendisk(). alloc_disk_node() is negated by put_disk(). In nvme_alloc_ns(), device_add_disk() is one of the last things being called in the success case, and only void functions are being called after this. Therefore this call should not be negated in the error path. The superfluous call to del_gendisk() leads to the following prints: [ 7.839975] kobject: '(null)' (000000001ff73734): is not initialized, yet kobject_put() is being called. [ 7.840865] WARNING: CPU: 2 PID: 361 at lib/kobject.c:736 kobject_put+0x70/0x120 Fixes: 33cfdc2aa696 ("nvme: enforce extended LBA format for fabrics metadata") Signed-off-by: Niklas Cassel Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0585efa47d8f..c2c5bc4fb702 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3669,7 +3669,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->disk = disk; if (__nvme_revalidate_disk(disk, id)) - goto out_free_disk; + goto out_put_disk; if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { ret = nvme_nvm_register(ns, disk_name, node); @@ -3696,8 +3696,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) /* prevent double queue cleanup */ ns->disk->queue = NULL; put_disk(ns->disk); - out_free_disk: - del_gendisk(ns->disk); out_unlink_ns: mutex_lock(&ctrl->subsys->lock); list_del_rcu(&ns->siblings); -- cgit v1.2.3 From 6acbd9619b153f2881c91e84cd36a9a2ce124565 Mon Sep 17 00:00:00 2001 From: Rikard Falkeborn Date: Fri, 29 May 2020 00:25:07 +0200 Subject: nvme-tcp: constify nvme_tcp_mq_ops and nvme_tcp_admin_mq_ops nvme_tcp_mq_ops and nvme_tcp_admin_mq_ops are never modified and can be made const to allow the compiler to put them in read-only memory. Before: text data bss dec hex filename 53102 6885 576 60563 ec93 drivers/nvme/host/tcp.o After: text data bss dec hex filename 53422 6565 576 60563 ec93 drivers/nvme/host/tcp.o Signed-off-by: Rikard Falkeborn Acked-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/tcp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 7c7c1886642f..7979fcf0c15f 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -131,8 +131,8 @@ struct nvme_tcp_ctrl { static LIST_HEAD(nvme_tcp_ctrl_list); static DEFINE_MUTEX(nvme_tcp_ctrl_mutex); static struct workqueue_struct *nvme_tcp_wq; -static struct blk_mq_ops nvme_tcp_mq_ops; -static struct blk_mq_ops nvme_tcp_admin_mq_ops; +static const struct blk_mq_ops nvme_tcp_mq_ops; +static const struct blk_mq_ops nvme_tcp_admin_mq_ops; static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) @@ -2338,7 +2338,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx) return queue->nr_cqe; } -static struct blk_mq_ops nvme_tcp_mq_ops = { +static const struct blk_mq_ops nvme_tcp_mq_ops = { .queue_rq = nvme_tcp_queue_rq, .complete = nvme_complete_rq, .init_request = nvme_tcp_init_request, @@ -2349,7 +2349,7 @@ static struct blk_mq_ops nvme_tcp_mq_ops = { .poll = nvme_tcp_poll, }; -static struct blk_mq_ops nvme_tcp_admin_mq_ops = { +static const struct blk_mq_ops nvme_tcp_admin_mq_ops = { .queue_rq = nvme_tcp_queue_rq, .complete = nvme_complete_rq, .init_request = nvme_tcp_init_request, -- cgit v1.2.3 From a40aae6bbf8dea09fbe6353a5635f4027c43c796 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 1 Jun 2020 20:05:20 +0300 Subject: nvmet-tcp: constify nvmet_tcp_ops nvmet_tcp_ops is never modified and can be made const to allow the compiler to put it in read-only memory, as done in other transports. Before: text data bss dec hex filename 16164 160 12 16336 3fd0 drivers/nvme/target/tcp.o After: text data bss dec hex filename 16277 64 12 16353 3fe1 drivers/nvme/target/tcp.o Signed-off-by: Max Gurtovoy Reviewed-by: Himanshu Madhani Reviewed-by: Israel Rukshin Acked-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/tcp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 6f557db0320d..9e4cb904ab27 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -153,7 +153,7 @@ static LIST_HEAD(nvmet_tcp_queue_list); static DEFINE_MUTEX(nvmet_tcp_queue_mutex); static struct workqueue_struct *nvmet_tcp_wq; -static struct nvmet_fabrics_ops nvmet_tcp_ops; +static const struct nvmet_fabrics_ops nvmet_tcp_ops; static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c); static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd); @@ -1747,7 +1747,7 @@ static void nvmet_tcp_disc_port_addr(struct nvmet_req *req, } } -static struct nvmet_fabrics_ops nvmet_tcp_ops = { +static const struct nvmet_fabrics_ops nvmet_tcp_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_TCP, .msdbd = 1, -- cgit v1.2.3 From c9c12e51b82b2bd0c59ac4e27ee5427f382a503f Mon Sep 17 00:00:00 2001 From: Daniel Wagner Date: Fri, 29 May 2020 13:37:40 +0200 Subject: nvme-fc: don't call nvme_cleanup_cmd() for AENs Asynchronous event notifications do not have an associated request. When fcp_io() fails we unconditionally call nvme_cleanup_cmd() which leads to a crash. Fixes: 16686f3a6c3c ("nvme: move common call to nvme_cleanup_cmd to core layer") Signed-off-by: Daniel Wagner Reviewed-by: Himanshu Madhani Reviewed-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/host/fc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index cb0007592c12..e999a8c4b7e8 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2634,10 +2634,11 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); - if (!(op->flags & FCOP_FLAGS_AEN)) + if (!(op->flags & FCOP_FLAGS_AEN)) { nvme_fc_unmap_data(ctrl, op->rq, op); + nvme_cleanup_cmd(op->rq); + } - nvme_cleanup_cmd(op->rq); nvme_fc_ctrl_put(ctrl); if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE && -- cgit v1.2.3 From b97120b15ebd3de51325084136d3b9c3cce656d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Jun 2020 08:24:17 +0200 Subject: nvme-pci: use simple suspend when a HMB is enabled While the NVMe specification allows the device to access the host memory buffer in host DRAM from all power states, hosts will fail access to DRAM during S3 and similar power states. Fixes: d916b1be94b6 ("nvme-pci: use host managed power state for suspend") Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- drivers/nvme/host/pci.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index d690d5593a80..e2bacd369a88 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2950,9 +2950,15 @@ static int nvme_suspend(struct device *dev) * the PCI bus layer to put it into D3 in order to take the PCIe link * down, so as to allow the platform to achieve its minimum low-power * state (which may not be possible if the link is up). + * + * If a host memory buffer is enabled, shut down the device as the NVMe + * specification allows the device to access the host memory buffer in + * host DRAM from all power states, but hosts will fail access to DRAM + * during S3. */ if (pm_suspend_via_firmware() || !ctrl->npss || !pcie_aspm_enabled(pdev) || + ndev->nr_host_mem_descs || (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND)) return nvme_disable_prepare_reset(ndev, true); -- cgit v1.2.3 From 819f7b88b48fd2bce932cfe3a38bf8fcefbcabe7 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 9 Jun 2020 16:55:14 -0700 Subject: nvmet: fail outstanding host posted AEN req In function nvmet_async_event_process() we only process AENs iff there is an open slot on the ctrl->async_event_cmds[] && aen event list posted by the target is not empty. This keeps host posted AEN outstanding if target generated AEN list is empty. We do cleanup the target generated entries from the aen list in nvmet_ctrl_free()-> nvmet_async_events_free() but we don't process AEN posted by the host. This leads to following problem :- When processing admin sq at the time of nvmet_sq_destroy() holds an extra percpu reference(atomic value = 1), so in the following code path after switching to atomic rcu, release function (nvmet_sq_free()) is not getting called which blocks the sq->free_done in nvmet_sq_destroy() :- nvmet_sq_destroy() percpu_ref_kill_and_confirm() - __percpu_ref_switch_mode() -- __percpu_ref_switch_to_atomic() --- call_rcu() -> percpu_ref_switch_to_atomic_rcu() ---- /* calls switch callback */ - percpu_ref_put() -- percpu_ref_put_many(ref, 1) --- else if (unlikely(atomic_long_sub_and_test(nr, &ref->count))) ---- ref->release(ref); <---- Not called. This results in indefinite hang:- void nvmet_sq_destroy(struct nvmet_sq *sq) ... if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) { nvmet_async_events_process(ctrl, status); percpu_ref_put(&sq->ref); } percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); <-- Hang here Which breaks the further disconnect sequence. This problem seems to be introduced after commit 64f5e9cdd711b ("nvmet: fix memory leak when removing namespaces and controllers concurrently"). This patch processes ctrl->async_event_cmds[] in the admin sq destroy() context irrespetive of aen_list. Also we get rid of the controller's aen_list processing in the nvmet_sq_destroy() context and just ignore ctrl->aen_list. This results in nvmet_async_events_process() being called from workqueue context so we adjust the code accordingly. Fixes: 64f5e9cdd711 ("nvmet: fix memory leak when removing namespaces and controllers concurrently ") Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/core.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 6392bcd30bd7..6e2f623e472e 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -129,7 +129,22 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen) return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16); } -static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) +static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl) +{ + u16 status = NVME_SC_INTERNAL | NVME_SC_DNR; + struct nvmet_req *req; + + mutex_lock(&ctrl->lock); + while (ctrl->nr_async_event_cmds) { + req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; + mutex_unlock(&ctrl->lock); + nvmet_req_complete(req, status); + mutex_lock(&ctrl->lock); + } + mutex_unlock(&ctrl->lock); +} + +static void nvmet_async_events_process(struct nvmet_ctrl *ctrl) { struct nvmet_async_event *aen; struct nvmet_req *req; @@ -139,15 +154,14 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status) aen = list_first_entry(&ctrl->async_events, struct nvmet_async_event, entry); req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; - if (status == 0) - nvmet_set_result(req, nvmet_async_event_result(aen)); + nvmet_set_result(req, nvmet_async_event_result(aen)); list_del(&aen->entry); kfree(aen); mutex_unlock(&ctrl->lock); trace_nvmet_async_event(ctrl, req->cqe->result.u32); - nvmet_req_complete(req, status); + nvmet_req_complete(req, 0); mutex_lock(&ctrl->lock); } mutex_unlock(&ctrl->lock); @@ -170,7 +184,7 @@ static void nvmet_async_event_work(struct work_struct *work) struct nvmet_ctrl *ctrl = container_of(work, struct nvmet_ctrl, async_event_work); - nvmet_async_events_process(ctrl, 0); + nvmet_async_events_process(ctrl); } void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, @@ -779,7 +793,6 @@ static void nvmet_confirm_sq(struct percpu_ref *ref) void nvmet_sq_destroy(struct nvmet_sq *sq) { - u16 status = NVME_SC_INTERNAL | NVME_SC_DNR; struct nvmet_ctrl *ctrl = sq->ctrl; /* @@ -787,7 +800,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq) * queue doesn't have outstanding requests on it. */ if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) - nvmet_async_events_process(ctrl, status); + nvmet_async_events_failall(ctrl); percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); wait_for_completion(&sq->confirm_done); wait_for_completion(&sq->free_done); -- cgit v1.2.3