From 651438bb0af5213f1f70d66e75bf11d08cb5537a Mon Sep 17 00:00:00 2001 From: Wen Xiong Date: Thu, 15 Feb 2018 14:05:10 -0600 Subject: nvme-pci: Fix EEH failure on ppc Triggering PPC EEH detection and handling requires a memory mapped read failure. The NVMe driver removed the periodic health check MMIO, so there's no early detection mechanism to trigger the recovery. Instead, the detection now happens when the nvme driver handles an IO timeout event. This takes the pci channel offline, so we do not want the driver to proceed with escalating its own recovery efforts that may conflict with the EEH handler. This patch ensures the driver will observe the channel was set to offline after a failed MMIO read and resets the IO timer so the EEH handler has a chance to recover the device. Signed-off-by: Wen Xiong [updated change log] Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5933a5c732e8..e5ce07f4966f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1153,12 +1153,6 @@ static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) if (!(csts & NVME_CSTS_CFS) && !nssro) return false; - /* If PCI error recovery process is happening, we cannot reset or - * the recovery mechanism will surely fail. - */ - if (pci_channel_offline(to_pci_dev(dev->dev))) - return false; - return true; } @@ -1189,6 +1183,13 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_command cmd; u32 csts = readl(dev->bar + NVME_REG_CSTS); + /* If PCI error recovery process is happening, we cannot reset or + * the recovery mechanism will surely fail. + */ + mb(); + if (pci_channel_offline(to_pci_dev(dev->dev))) + return BLK_EH_RESET_TIMER; + /* * Reset immediately if the controller is failed */ -- cgit v1.2.3 From 16ccfff2897613007b5eda9e29d65303c6280026 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 6 Feb 2018 20:17:42 +0800 Subject: nvme: pci: pass max vectors as num_possible_cpus() to pci_alloc_irq_vectors 84676c1f21 ("genirq/affinity: assign vectors to all possible CPUs") has switched to do irq vectors spread among all possible CPUs, so pass num_possible_cpus() as max vecotrs to be assigned. For example, in a 8 cores system, 0~3 online, 4~8 offline/not present, see 'lscpu': [ming@box]$lscpu Architecture: x86_64 CPU op-mode(s): 32-bit, 64-bit Byte Order: Little Endian CPU(s): 4 On-line CPU(s) list: 0-3 Thread(s) per core: 1 Core(s) per socket: 2 Socket(s): 2 NUMA node(s): 2 ... NUMA node0 CPU(s): 0-3 NUMA node1 CPU(s): ... 1) before this patch, follows the allocated vectors and their affinity: irq 47, cpu list 0,4 irq 48, cpu list 1,6 irq 49, cpu list 2,5 irq 50, cpu list 3,7 2) after this patch, follows the allocated vectors and their affinity: irq 43, cpu list 0 irq 44, cpu list 1 irq 45, cpu list 2 irq 46, cpu list 3 irq 47, cpu list 4 irq 48, cpu list 6 irq 49, cpu list 5 irq 50, cpu list 7 Cc: Keith Busch Cc: Sagi Grimberg Cc: Thomas Gleixner Cc: Christoph Hellwig Signed-off-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e5ce07f4966f..b6f43b738f03 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1914,7 +1914,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) int result, nr_io_queues; unsigned long size; - nr_io_queues = num_present_cpus(); + nr_io_queues = num_possible_cpus(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); if (result < 0) return result; -- cgit v1.2.3 From 8a30ecc6e0ecbb9ae95daf499b2680b885ed0349 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 7 Mar 2018 14:13:58 +0100 Subject: Revert "nvme: create 'slaves' and 'holders' entries for hidden controllers" This reverts commit e9a48034d7d1318ece7d4a235838a86c94db9d68. The slaves and holders link for the hidden gendisks confuse lsblk so that it errors out on, or doesn't report the nvme multipath devices. Given that we don't need holder relationships for something that can't even be directly accessed we should just stop creating those links. Signed-off-by: Christoph Hellwig Reported-by: Potnuri Bharat Teja Cc: stable@vger.kernel.org Signed-off-by: Keith Busch --- drivers/nvme/host/core.c | 2 -- drivers/nvme/host/multipath.c | 30 ------------------------------ drivers/nvme/host/nvme.h | 8 -------- 3 files changed, 40 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 817e5e2766da..7aeca5db7916 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3033,7 +3033,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ns->disk->disk_name); nvme_mpath_add_disk(ns->head); - nvme_mpath_add_disk_links(ns); return; out_unlink_ns: mutex_lock(&ctrl->subsys->lock); @@ -3053,7 +3052,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) return; if (ns->disk && ns->disk->flags & GENHD_FL_UP) { - nvme_mpath_remove_disk_links(ns); sysfs_remove_group(&disk_to_dev(ns->disk)->kobj, &nvme_ns_id_attr_group); if (ns->ndev) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index b7e5c6db4d92..060f69e03427 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -210,25 +210,6 @@ void nvme_mpath_add_disk(struct nvme_ns_head *head) mutex_unlock(&head->subsys->lock); } -void nvme_mpath_add_disk_links(struct nvme_ns *ns) -{ - struct kobject *slave_disk_kobj, *holder_disk_kobj; - - if (!ns->head->disk) - return; - - slave_disk_kobj = &disk_to_dev(ns->disk)->kobj; - if (sysfs_create_link(ns->head->disk->slave_dir, slave_disk_kobj, - kobject_name(slave_disk_kobj))) - return; - - holder_disk_kobj = &disk_to_dev(ns->head->disk)->kobj; - if (sysfs_create_link(ns->disk->part0.holder_dir, holder_disk_kobj, - kobject_name(holder_disk_kobj))) - sysfs_remove_link(ns->head->disk->slave_dir, - kobject_name(slave_disk_kobj)); -} - void nvme_mpath_remove_disk(struct nvme_ns_head *head) { if (!head->disk) @@ -243,14 +224,3 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head) blk_cleanup_queue(head->disk->queue); put_disk(head->disk); } - -void nvme_mpath_remove_disk_links(struct nvme_ns *ns) -{ - if (!ns->head->disk) - return; - - sysfs_remove_link(ns->disk->part0.holder_dir, - kobject_name(&disk_to_dev(ns->head->disk)->kobj)); - sysfs_remove_link(ns->head->disk->slave_dir, - kobject_name(&disk_to_dev(ns->disk)->kobj)); -} diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 0521e4707d1c..d733b14ede9d 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -410,9 +410,7 @@ bool nvme_req_needs_failover(struct request *req, blk_status_t error); void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); void nvme_mpath_add_disk(struct nvme_ns_head *head); -void nvme_mpath_add_disk_links(struct nvme_ns *ns); void nvme_mpath_remove_disk(struct nvme_ns_head *head); -void nvme_mpath_remove_disk_links(struct nvme_ns *ns); static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) { @@ -454,12 +452,6 @@ static inline void nvme_mpath_add_disk(struct nvme_ns_head *head) static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) { } -static inline void nvme_mpath_add_disk_links(struct nvme_ns *ns) -{ -} -static inline void nvme_mpath_remove_disk_links(struct nvme_ns *ns) -{ -} static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns) { } -- cgit v1.2.3 From 0475821e229cfd9954b7501113d1acbc57b68689 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Mon, 5 Mar 2018 11:59:53 -0800 Subject: nvme-fabrics: Ignore nr_io_queues option for discovery controllers This removes a dependency on the order options are passed when creating a fabrics controller. With the old code, if "nr_io_queues" appears before an "nqn" option specifying the discovery controller, then nr_io_queues is overridden with zero. If "nr_io_queues" appears after specifying the discovery controller, then the nr_io_queues option is used to set the number of queues, and the driver attempts to establish IO connections to the discovery controller (which doesn't work). It seems better to ignore (and warn about) the "nr_io_queues" option if userspace has already asked to connect to the discovery controller. Signed-off-by: Roland Dreier Reviewed-by: James Smart Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- drivers/nvme/host/fabrics.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index a1c58e35075e..8f0f34d06d46 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -650,6 +650,11 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, ret = -EINVAL; goto out; } + if (opts->discovery_nqn) { + pr_debug("Ignoring nr_io_queues value for discovery controller\n"); + break; + } + opts->nr_io_queues = min_t(unsigned int, num_online_cpus(), token); break; -- cgit v1.2.3 From d157e5343cb360a525e6b3e7924612a9a90df3b7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 7 Mar 2018 15:59:36 -0800 Subject: nvme_fc: rework sqsize handling Corrected four outstanding issues in the transport around sqsize. 1: Create Connection LS is sending the 1's-based sqsize, should be sending the 0's-based value. 2: allocation of hw queue is using the 0's-base size. It should be using the 1's-based value. 3: normalization of ctrl.sqsize by MQES is using MQES+1 (1's-based value). It should be MQES (0's-based value). 4: Missing clause to ensure queue_count not larger than ctrl->sqsize. Corrected by: Clean up routines that pass queue size around. The queue size value is the actual count (1's-based) value and determined from ctrl->sqsize + 1. Routines that send 0's-based value adapt from queue size. Sset ctrl->sqsize properly for MQES. Added clause to nsure queue_count not larger than ctrl->sqsize + 1. Signed-off-by: James Smart Reviewed-by: Sagi Grimberg Signed-off-by: Keith Busch --- drivers/nvme/host/fc.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7f51f8414b97..1dc1387b7134 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -1206,7 +1206,7 @@ nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); - assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize); + assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1); /* Linux supports only Dynamic controllers */ assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id); @@ -1321,7 +1321,7 @@ nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum); - conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize); + conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1); lsop->queue = queue; lsreq->rqstaddr = conn_rqst; @@ -2481,11 +2481,11 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) goto out_free_tag_set; } - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) goto out_cleanup_blk_queue; - ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) goto out_delete_hw_queues; @@ -2532,11 +2532,11 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) if (ret) goto out_free_io_queues; - ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) goto out_free_io_queues; - ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); + ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); if (ret) goto out_delete_hw_queues; @@ -2632,13 +2632,12 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) nvme_fc_init_queue(ctrl, 0); ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, - NVME_AQ_BLK_MQ_DEPTH); + NVME_AQ_DEPTH); if (ret) goto out_free_queue; ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], - NVME_AQ_BLK_MQ_DEPTH, - (NVME_AQ_BLK_MQ_DEPTH / 4)); + NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4)); if (ret) goto out_delete_hw_queue; @@ -2666,7 +2665,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) } ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize); + min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); if (ret) @@ -2699,6 +2698,14 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) opts->queue_size = ctrl->ctrl.maxcmd; } + if (opts->queue_size > ctrl->ctrl.sqsize + 1) { + /* warn if sqsize is lower than queue_size */ + dev_warn(ctrl->ctrl.device, + "queue_size %zu > ctrl sqsize %u, clamping down\n", + opts->queue_size, ctrl->ctrl.sqsize + 1); + opts->queue_size = ctrl->ctrl.sqsize + 1; + } + ret = nvme_fc_init_aen_ops(ctrl); if (ret) goto out_term_aen_ops; -- cgit v1.2.3