summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2019-08-10 21:41:41 -0600
committerJens Axboe <axboe@kernel.dk>2019-08-10 21:41:41 -0600
commit0c9c83043d4957092f74173cfeeb5bc63e7a70e7 (patch)
treebc5178267fcb7a20e220e20068e86fe7c0638f95
parent20621fedb2a696e4dc60bc1c5de37cf21976abcb (diff)
parentbd46a90634302bfe791e93ad5496f98f165f7ae0 (diff)
downloadlinux-0c9c83043d4957092f74173cfeeb5bc63e7a70e7.tar.gz
linux-0c9c83043d4957092f74173cfeeb5bc63e7a70e7.tar.bz2
linux-0c9c83043d4957092f74173cfeeb5bc63e7a70e7.zip
Merge branch 'nvme-5.3-rc' of git://git.infradead.org/nvme into for-linus
Pull NVMe fixes from Sagi: "Few nvme fixes for the next rc round. - detect capacity changes on the mpath disk from Anthony - probe/remove fix from Keith - various fixes to pass blktests from Logan - deadlock in reset/scan race fix - nvme-rdma use-after-free fix - deadlock fix when passthru commands race mpath disk info update" * 'nvme-5.3-rc' of git://git.infradead.org/nvme: nvme-pci: Fix async probe remove race nvme: fix controller removal race with scan work nvme-rdma: fix possible use-after-free in connect error flow nvme: fix a possible deadlock when passthru commands sent to a multipath device nvme-core: Fix extra device_put() call on error path nvmet-file: fix nvmet_file_flush() always returning an error nvmet-loop: Flush nvme_delete_wq when removing the port nvmet: Fix use-after-free bug when a port is removed nvme-multipath: revalidate nvme_ns_head gendisk in nvme_validate_ns
-rw-r--r--drivers/nvme/host/core.c15
-rw-r--r--drivers/nvme/host/multipath.c76
-rw-r--r--drivers/nvme/host/nvme.h21
-rw-r--r--drivers/nvme/host/pci.c3
-rw-r--r--drivers/nvme/host/rdma.c16
-rw-r--r--drivers/nvme/target/configfs.c1
-rw-r--r--drivers/nvme/target/core.c15
-rw-r--r--drivers/nvme/target/loop.c8
-rw-r--r--drivers/nvme/target/nvmet.h3
9 files changed, 143 insertions, 15 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 8f3fbe5ca937..c258a1ce4b28 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1286,6 +1286,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
*/
if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
mutex_lock(&ctrl->scan_lock);
+ mutex_lock(&ctrl->subsys->lock);
+ nvme_mpath_start_freeze(ctrl->subsys);
+ nvme_mpath_wait_freeze(ctrl->subsys);
nvme_start_freeze(ctrl);
nvme_wait_freeze(ctrl);
}
@@ -1316,6 +1319,8 @@ static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
nvme_update_formats(ctrl);
if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
nvme_unfreeze(ctrl);
+ nvme_mpath_unfreeze(ctrl->subsys);
+ mutex_unlock(&ctrl->subsys->lock);
mutex_unlock(&ctrl->scan_lock);
}
if (effects & NVME_CMD_EFFECTS_CCC)
@@ -1715,6 +1720,7 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id)
if (ns->head->disk) {
nvme_update_disk_info(ns->head->disk, ns, id);
blk_queue_stack_limits(ns->head->disk->queue, ns->queue);
+ revalidate_disk(ns->head->disk);
}
#endif
}
@@ -2487,6 +2493,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
if (ret) {
dev_err(ctrl->device,
"failed to register subsystem device.\n");
+ put_device(&subsys->dev);
goto out_unlock;
}
ida_init(&subsys->ns_ida);
@@ -2509,7 +2516,6 @@ out_put_subsystem:
nvme_put_subsystem(subsys);
out_unlock:
mutex_unlock(&nvme_subsystems_lock);
- put_device(&subsys->dev);
return ret;
}
@@ -3571,6 +3577,13 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
struct nvme_ns *ns, *next;
LIST_HEAD(ns_list);
+ /*
+ * make sure to requeue I/O to all namespaces as these
+ * might result from the scan itself and must complete
+ * for the scan_work to make progress
+ */
+ nvme_mpath_clear_ctrl_paths(ctrl);
+
/* prevent racing with ns scanning */
flush_work(&ctrl->scan_work);
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 4f0d0d12744e..888d4543894e 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -12,6 +12,36 @@ module_param(multipath, bool, 0444);
MODULE_PARM_DESC(multipath,
"turn on native support for multiple controllers per subsystem");
+void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
+{
+ struct nvme_ns_head *h;
+
+ lockdep_assert_held(&subsys->lock);
+ list_for_each_entry(h, &subsys->nsheads, entry)
+ if (h->disk)
+ blk_mq_unfreeze_queue(h->disk->queue);
+}
+
+void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
+{
+ struct nvme_ns_head *h;
+
+ lockdep_assert_held(&subsys->lock);
+ list_for_each_entry(h, &subsys->nsheads, entry)
+ if (h->disk)
+ blk_mq_freeze_queue_wait(h->disk->queue);
+}
+
+void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
+{
+ struct nvme_ns_head *h;
+
+ lockdep_assert_held(&subsys->lock);
+ list_for_each_entry(h, &subsys->nsheads, entry)
+ if (h->disk)
+ blk_freeze_queue_start(h->disk->queue);
+}
+
/*
* If multipathing is enabled we need to always use the subsystem instance
* number for numbering our devices to avoid conflicts between subsystems that
@@ -104,18 +134,34 @@ static const char *nvme_ana_state_names[] = {
[NVME_ANA_CHANGE] = "change",
};
-void nvme_mpath_clear_current_path(struct nvme_ns *ns)
+bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
{
struct nvme_ns_head *head = ns->head;
+ bool changed = false;
int node;
if (!head)
- return;
+ goto out;
for_each_node(node) {
- if (ns == rcu_access_pointer(head->current_path[node]))
+ if (ns == rcu_access_pointer(head->current_path[node])) {
rcu_assign_pointer(head->current_path[node], NULL);
+ changed = true;
+ }
}
+out:
+ return changed;
+}
+
+void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
+{
+ struct nvme_ns *ns;
+
+ mutex_lock(&ctrl->scan_lock);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ if (nvme_mpath_clear_current_path(ns))
+ kblockd_schedule_work(&ns->head->requeue_work);
+ mutex_unlock(&ctrl->scan_lock);
}
static bool nvme_path_is_disabled(struct nvme_ns *ns)
@@ -226,6 +272,24 @@ inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
return ns;
}
+static bool nvme_available_path(struct nvme_ns_head *head)
+{
+ struct nvme_ns *ns;
+
+ list_for_each_entry_rcu(ns, &head->list, siblings) {
+ switch (ns->ctrl->state) {
+ case NVME_CTRL_LIVE:
+ case NVME_CTRL_RESETTING:
+ case NVME_CTRL_CONNECTING:
+ /* fallthru */
+ return true;
+ default:
+ break;
+ }
+ }
+ return false;
+}
+
static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
struct bio *bio)
{
@@ -252,14 +316,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
disk_devt(ns->head->disk),
bio->bi_iter.bi_sector);
ret = direct_make_request(bio);
- } else if (!list_empty_careful(&head->list)) {
- dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
+ } else if (nvme_available_path(head)) {
+ dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
spin_lock_irq(&head->requeue_lock);
bio_list_add(&head->requeue_list, bio);
spin_unlock_irq(&head->requeue_lock);
} else {
- dev_warn_ratelimited(dev, "no path - failing I/O\n");
+ dev_warn_ratelimited(dev, "no available path - failing I/O\n");
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 26b563f9985b..778b3a0b6adb 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -490,6 +490,9 @@ static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
return ctrl->ana_log_buf != NULL;
}
+void nvme_mpath_unfreeze(struct nvme_subsystem *subsys);
+void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys);
+void nvme_mpath_start_freeze(struct nvme_subsystem *subsys);
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
struct nvme_ctrl *ctrl, int *flags);
void nvme_failover_req(struct request *req);
@@ -500,7 +503,8 @@ void nvme_mpath_remove_disk(struct nvme_ns_head *head);
int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id);
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
-void nvme_mpath_clear_current_path(struct nvme_ns *ns);
+bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
+void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
struct nvme_ns *nvme_find_path(struct nvme_ns_head *head);
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
@@ -548,7 +552,11 @@ static inline void nvme_mpath_add_disk(struct nvme_ns *ns,
static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head)
{
}
-static inline void nvme_mpath_clear_current_path(struct nvme_ns *ns)
+static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
+{
+ return false;
+}
+static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{
}
static inline void nvme_mpath_check_last_path(struct nvme_ns *ns)
@@ -568,6 +576,15 @@ static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl)
{
}
+static inline void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
+{
+}
+static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
+{
+}
+static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
+{
+}
#endif /* CONFIG_NVME_MULTIPATH */
#ifdef CONFIG_NVM
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index db160cee42ad..0c2c4b0c6655 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -2695,7 +2695,7 @@ static void nvme_async_probe(void *data, async_cookie_t cookie)
{
struct nvme_dev *dev = data;
- nvme_reset_ctrl_sync(&dev->ctrl);
+ flush_work(&dev->ctrl.reset_work);
flush_work(&dev->ctrl.scan_work);
nvme_put_ctrl(&dev->ctrl);
}
@@ -2761,6 +2761,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
+ nvme_reset_ctrl(&dev->ctrl);
nvme_get_ctrl(&dev->ctrl);
async_schedule(nvme_async_probe, dev);
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index a249db528d54..1a6449bc547b 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -562,13 +562,17 @@ out_destroy_cm_id:
return ret;
}
+static void __nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
+{
+ rdma_disconnect(queue->cm_id);
+ ib_drain_qp(queue->qp);
+}
+
static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
{
if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
return;
-
- rdma_disconnect(queue->cm_id);
- ib_drain_qp(queue->qp);
+ __nvme_rdma_stop_queue(queue);
}
static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
@@ -607,11 +611,13 @@ static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx)
else
ret = nvmf_connect_admin_queue(&ctrl->ctrl);
- if (!ret)
+ if (!ret) {
set_bit(NVME_RDMA_Q_LIVE, &queue->flags);
- else
+ } else {
+ __nvme_rdma_stop_queue(queue);
dev_info(ctrl->ctrl.device,
"failed to connect queue: %d ret=%d\n", idx, ret);
+ }
return ret;
}
diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c
index cd52b9f15376..98613a45bd3b 100644
--- a/drivers/nvme/target/configfs.c
+++ b/drivers/nvme/target/configfs.c
@@ -675,6 +675,7 @@ static void nvmet_port_subsys_drop_link(struct config_item *parent,
found:
list_del(&p->entry);
+ nvmet_port_del_ctrls(port, subsys);
nvmet_port_disc_changed(port, subsys);
if (list_empty(&port->subsystems))
diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c
index dad0243c7c96..3a67e244e568 100644
--- a/drivers/nvme/target/core.c
+++ b/drivers/nvme/target/core.c
@@ -46,6 +46,9 @@ inline u16 errno_to_nvme_status(struct nvmet_req *req, int errno)
u16 status;
switch (errno) {
+ case 0:
+ status = NVME_SC_SUCCESS;
+ break;
case -ENOSPC:
req->error_loc = offsetof(struct nvme_rw_command, length);
status = NVME_SC_CAP_EXCEEDED | NVME_SC_DNR;
@@ -280,6 +283,18 @@ void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
}
EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
+void nvmet_port_del_ctrls(struct nvmet_port *port, struct nvmet_subsys *subsys)
+{
+ struct nvmet_ctrl *ctrl;
+
+ mutex_lock(&subsys->lock);
+ list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
+ if (ctrl->port == port)
+ ctrl->ops->delete_ctrl(ctrl);
+ }
+ mutex_unlock(&subsys->lock);
+}
+
int nvmet_enable_port(struct nvmet_port *port)
{
const struct nvmet_fabrics_ops *ops;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index b16dc3981c69..0940c5024a34 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -654,6 +654,14 @@ static void nvme_loop_remove_port(struct nvmet_port *port)
mutex_lock(&nvme_loop_ports_mutex);
list_del_init(&port->entry);
mutex_unlock(&nvme_loop_ports_mutex);
+
+ /*
+ * Ensure any ctrls that are in the process of being
+ * deleted are in fact deleted before we return
+ * and free the port. This is to prevent active
+ * ctrls from using a port after it's freed.
+ */
+ flush_workqueue(nvme_delete_wq);
}
static const struct nvmet_fabrics_ops nvme_loop_ops = {
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 6ee66c610739..c51f8dd01dc4 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -418,6 +418,9 @@ void nvmet_port_send_ana_event(struct nvmet_port *port);
int nvmet_register_transport(const struct nvmet_fabrics_ops *ops);
void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops);
+void nvmet_port_del_ctrls(struct nvmet_port *port,
+ struct nvmet_subsys *subsys);
+
int nvmet_enable_port(struct nvmet_port *port);
void nvmet_disable_port(struct nvmet_port *port);