summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKeith Busch <kbusch@kernel.org>2024-06-04 11:59:08 -0700
committerKeith Busch <kbusch@kernel.org>2024-06-24 12:53:42 -0700
commit1a9e218195a55d0a31e8cbe263aa2f618580ef1d (patch)
tree7d2a55dc57ecfdb8e7d67edb303ce0be621fd52e
parent72cded7573c8c038f999e49c77d1097efcfd15aa (diff)
downloadlinux-1a9e218195a55d0a31e8cbe263aa2f618580ef1d.tar.gz
linux-1a9e218195a55d0a31e8cbe263aa2f618580ef1d.tar.bz2
linux-1a9e218195a55d0a31e8cbe263aa2f618580ef1d.zip
nvme: split device add from initialization
Combining both creates an ambiguous cleanup scenario for the caller if an error is returned: does the device reference need to be dropped or did the error occur before the device was initialized? If an error occurs after the device is added, then the existing cleanup routines will leak memory. Furthermore, the nvme core is taking it upon itself to free the device's kobj name under certain conditions rather than go through the core device API. We shouldn't be peaking into these implementation details. Split the device initialization from the addition to make it easier to know the error handling actions, fix the existing memory leaks, and stop the device layering violations. Link: https://lore.kernel.org/linux-nvme/c4050a37-ecc9-462c-9772-65e25166f439@grimberg.me/ Tested-by: Yi Zhang <yi.zhang@redhat.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> Signed-off-by: Keith Busch <kbusch@kernel.org>
-rw-r--r--drivers/nvme/host/apple.c5
-rw-r--r--drivers/nvme/host/core.c58
-rw-r--r--drivers/nvme/host/fc.c5
-rw-r--r--drivers/nvme/host/nvme.h1
-rw-r--r--drivers/nvme/host/pci.c5
-rw-r--r--drivers/nvme/host/rdma.c5
-rw-r--r--drivers/nvme/host/tcp.c5
-rw-r--r--drivers/nvme/target/loop.c5
8 files changed, 66 insertions, 23 deletions
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index c43ada920c3b..42ba7fbc749f 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1531,6 +1531,10 @@ static int apple_nvme_probe(struct platform_device *pdev)
if (IS_ERR(anv))
return PTR_ERR(anv);
+ ret = nvme_add_ctrl(&anv->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
anv->ctrl.admin_q = blk_mq_alloc_queue(&anv->admin_tagset, NULL, NULL);
if (IS_ERR(anv->ctrl.admin_q)) {
ret = -ENOMEM;
@@ -1545,6 +1549,7 @@ static int apple_nvme_probe(struct platform_device *pdev)
out_uninit_ctrl:
nvme_uninit_ctrl(&anv->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&anv->ctrl);
return ret;
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 89ebfa89613e..80d8460bb92f 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4667,6 +4667,9 @@ static void nvme_free_ctrl(struct device *dev)
* Initialize a NVMe controller structures. This needs to be called during
* earliest initialization so that we have the initialized structured around
* during probing.
+ *
+ * On success, the caller must use the nvme_put_ctrl() to release this when
+ * needed, which also invokes the ops->free_ctrl() callback.
*/
int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
const struct nvme_ctrl_ops *ops, unsigned long quirks)
@@ -4715,6 +4718,12 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
goto out;
ctrl->instance = ret;
+ ret = nvme_auth_init_ctrl(ctrl);
+ if (ret)
+ goto out_release_instance;
+
+ nvme_mpath_init_ctrl(ctrl);
+
device_initialize(&ctrl->ctrl_device);
ctrl->device = &ctrl->ctrl_device;
ctrl->device->devt = MKDEV(MAJOR(nvme_ctrl_base_chr_devt),
@@ -4727,16 +4736,36 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
ctrl->device->groups = nvme_dev_attr_groups;
ctrl->device->release = nvme_free_ctrl;
dev_set_drvdata(ctrl->device, ctrl);
+
+ return ret;
+
+out_release_instance:
+ ida_free(&nvme_instance_ida, ctrl->instance);
+out:
+ if (ctrl->discard_page)
+ __free_page(ctrl->discard_page);
+ cleanup_srcu_struct(&ctrl->srcu);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nvme_init_ctrl);
+
+/*
+ * On success, returns with an elevated controller reference and caller must
+ * use nvme_uninit_ctrl() to properly free resources associated with the ctrl.
+ */
+int nvme_add_ctrl(struct nvme_ctrl *ctrl)
+{
+ int ret;
+
ret = dev_set_name(ctrl->device, "nvme%d", ctrl->instance);
if (ret)
- goto out_release_instance;
+ return ret;
- nvme_get_ctrl(ctrl);
cdev_init(&ctrl->cdev, &nvme_dev_fops);
- ctrl->cdev.owner = ops->module;
+ ctrl->cdev.owner = ctrl->ops->module;
ret = cdev_device_add(&ctrl->cdev, ctrl->device);
if (ret)
- goto out_free_name;
+ return ret;
/*
* Initialize latency tolerance controls. The sysfs files won't
@@ -4747,28 +4776,11 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
min(default_ps_max_latency_us, (unsigned long)S32_MAX));
nvme_fault_inject_init(&ctrl->fault_inject, dev_name(ctrl->device));
- nvme_mpath_init_ctrl(ctrl);
- ret = nvme_auth_init_ctrl(ctrl);
- if (ret)
- goto out_free_cdev;
+ nvme_get_ctrl(ctrl);
return 0;
-out_free_cdev:
- nvme_fault_inject_fini(&ctrl->fault_inject);
- dev_pm_qos_hide_latency_tolerance(ctrl->device);
- cdev_device_del(&ctrl->cdev, ctrl->device);
-out_free_name:
- nvme_put_ctrl(ctrl);
- kfree_const(ctrl->device->kobj.name);
-out_release_instance:
- ida_free(&nvme_instance_ida, ctrl->instance);
-out:
- if (ctrl->discard_page)
- __free_page(ctrl->discard_page);
- cleanup_srcu_struct(&ctrl->srcu);
- return ret;
}
-EXPORT_SYMBOL_GPL(nvme_init_ctrl);
+EXPORT_SYMBOL_GPL(nvme_add_ctrl);
/* let I/O to all namespaces fail in preparation for surprise removal */
void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl)
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index c52446013388..d5a383766b34 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -3563,6 +3563,10 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
if (IS_ERR(ctrl))
return ERR_CAST(ctrl);
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
ret = nvme_alloc_admin_tag_set(&ctrl->ctrl, &ctrl->admin_tag_set,
&nvme_fc_admin_mq_ops,
struct_size_t(struct nvme_fcp_op_w_sgl, priv,
@@ -3607,6 +3611,7 @@ fail_ctrl:
/* initiate nvme ctrl ref counting teardown */
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
/* Remove core ctrl ref. */
nvme_put_ctrl(&ctrl->ctrl);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index f3a41133ac3f..969349068086 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -792,6 +792,7 @@ int nvme_disable_ctrl(struct nvme_ctrl *ctrl, bool shutdown);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl);
int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
const struct nvme_ctrl_ops *ops, unsigned long quirks);
+int nvme_add_ctrl(struct nvme_ctrl *ctrl);
void nvme_uninit_ctrl(struct nvme_ctrl *ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl);
void nvme_stop_ctrl(struct nvme_ctrl *ctrl);
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 102a9fb0c65f..c92125b0238d 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -3015,6 +3015,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (IS_ERR(dev))
return PTR_ERR(dev);
+ result = nvme_add_ctrl(&dev->ctrl);
+ if (result)
+ goto out_put_ctrl;
+
result = nvme_dev_map(dev);
if (result)
goto out_uninit_ctrl;
@@ -3101,6 +3105,7 @@ out_dev_unmap:
nvme_dev_unmap(dev);
out_uninit_ctrl:
nvme_uninit_ctrl(&dev->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&dev->ctrl);
return result;
}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 94d4f3dbac6b..5c44c7c5c688 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -2323,6 +2323,10 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
if (IS_ERR(ctrl))
return ERR_CAST(ctrl);
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING);
WARN_ON_ONCE(!changed);
@@ -2341,6 +2345,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&ctrl->ctrl);
if (ret > 0)
ret = -EIO;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 5ee3bbc67f41..3be67c98c906 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2779,6 +2779,10 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
if (IS_ERR(ctrl))
return ERR_CAST(ctrl);
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
WARN_ON_ONCE(1);
ret = -EINTR;
@@ -2800,6 +2804,7 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev,
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&ctrl->ctrl);
if (ret > 0)
ret = -EIO;
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index e589915ddef8..e32790d8fc26 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -555,6 +555,10 @@ static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
goto out;
}
+ ret = nvme_add_ctrl(&ctrl->ctrl);
+ if (ret)
+ goto out_put_ctrl;
+
if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING))
WARN_ON_ONCE(1);
@@ -611,6 +615,7 @@ out_free_queues:
kfree(ctrl->queues);
out_uninit_ctrl:
nvme_uninit_ctrl(&ctrl->ctrl);
+out_put_ctrl:
nvme_put_ctrl(&ctrl->ctrl);
out:
if (ret > 0)