From 373282e7ab6840cd583a223fa90628f2d8293c26 Mon Sep 17 00:00:00 2001 From: John Pittman Date: Fri, 4 Jan 2019 12:06:37 -0500 Subject: null_blk: add zoned config support information If the kernel is built without CONFIG_BLK_DEV_ZONED, a modprobe of the null_blk driver with zoned=1 fails with 'Invalid argument'. This can be confusing to users, prompting a search as to why the parameter is invalid. To assist in that search, add a bit more information to the failure, additionally adding to the documentation that CONFIG_BLK_DEV_ZONED is needed for zoned=1. Reviewed-by: Bart Van Assche Signed-off-by: John Pittman Added null_blk prefix to error message. Signed-off-by: Jens Axboe --- drivers/block/null_blk.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers') diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index b3df2793e7cd..34b22d6523ba 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -97,6 +97,7 @@ void null_zone_reset(struct nullb_cmd *cmd, sector_t sector); #else static inline int null_zone_init(struct nullb_device *dev) { + pr_err("null_blk: CONFIG_BLK_DEV_ZONED not enabled\n"); return -EINVAL; } static inline void null_zone_exit(struct nullb_device *dev) {} -- cgit v1.2.3 From c61e678f30da733a1b7fdd5983d0770de2e6009c Mon Sep 17 00:00:00 2001 From: Jianchao Wang Date: Mon, 24 Dec 2018 11:15:53 +0800 Subject: nvme-pci: fix the wrong setting of nr_maps We only set the nr_maps to 3 if poll queues are supported. Signed-off-by: Jianchao Wang Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 5a0bf6a24d50..cc65fa8a537b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2294,7 +2294,6 @@ static int nvme_dev_add(struct nvme_dev *dev) dev->tagset.nr_maps = 2; /* default + read */ if (dev->io_queues[HCTX_TYPE_POLL]) dev->tagset.nr_maps++; - dev->tagset.nr_maps = HCTX_MAX_TYPES; dev->tagset.timeout = NVME_IO_TIMEOUT; dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = -- cgit v1.2.3 From cc667f6d5de023ee131e96bb88e5cddca23272bd Mon Sep 17 00:00:00 2001 From: Liviu Dudau Date: Sat, 29 Dec 2018 17:23:43 +0000 Subject: nvme-pci: use the same attributes when freeing host_mem_desc_bufs. When using HMB the PCIe host driver allocates host_mem_desc_bufs using dma_alloc_attrs() but frees them using dma_free_coherent(). Use the correct dma_free_attrs() function to free the buffers. Signed-off-by: Liviu Dudau Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index cc65fa8a537b..efe46f518022 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1885,8 +1885,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev) struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i]; size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size; - dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i], - le64_to_cpu(desc->addr)); + dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i], + le64_to_cpu(desc->addr), + DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); } kfree(dev->host_mem_desc_bufs); @@ -1952,8 +1953,9 @@ out_free_bufs: while (--i >= 0) { size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size; - dma_free_coherent(dev->dev, size, bufs[i], - le64_to_cpu(descs[i].addr)); + dma_free_attrs(dev->dev, size, bufs[i], + le64_to_cpu(descs[i].addr), + DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN); } kfree(bufs); -- cgit v1.2.3 From 8fae268b40f5191227ae7050a99cb2cf1b914ddd Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 4 Jan 2019 15:04:33 -0700 Subject: nvme-pci: rerun irq setup on IO queue init errors If the driver is unable to create a subset of IO queues for any reason, the read/write and polled queue sets will not match the actual allocated hardware contexts. This leaves gaps in the CPU affinity mappings and causes the following kernel panic after blk_mq_map_queue_type() returns a NULL hctx. BUG: unable to handle kernel NULL pointer dereference at 0000000000000198 #PF error: [normal kernel read fault] PGD 0 P4D 0 Oops: 0000 [#1] SMP CPU: 64 PID: 1171 Comm: kworker/u259:1 Not tainted 4.20.0+ #241 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-2.fc27 04/01/2014 Workqueue: nvme-wq nvme_scan_work [nvme_core] RIP: 0010:blk_mq_init_allocated_queue+0x2d9/0x440 RSP: 0018:ffffb1bf0abc3cd0 EFLAGS: 00010286 RAX: 000000000000001f RBX: ffff8ea744cf0718 RCX: 0000000000000000 RDX: 0000000000000002 RSI: 000000000000007c RDI: ffffffff9109a820 RBP: ffff8ea7565f7008 R08: 000000000000001f R09: 000000000000003f R10: ffffb1bf0abc3c00 R11: 0000000000000000 R12: 000000000001d008 R13: ffff8ea7565f7008 R14: 000000000000003f R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff8ea757200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000198 CR3: 0000000013058000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: blk_mq_init_queue+0x35/0x60 nvme_validate_ns+0xc6/0x7c0 [nvme_core] ? nvme_identify_ctrl.isra.56+0x7e/0xc0 [nvme_core] nvme_scan_work+0xc8/0x340 [nvme_core] ? __wake_up_common+0x6d/0x120 ? try_to_wake_up+0x55/0x410 process_one_work+0x1e9/0x3d0 worker_thread+0x2d/0x3d0 ? process_one_work+0x3d0/0x3d0 kthread+0x111/0x130 ? kthread_park+0x90/0x90 ret_from_fork+0x1f/0x30 Modules linked in: nvme nvme_core serio_raw CR2: 0000000000000198 Fix by re-running the interrupt vector setup from scratch using a reduced count that may be successful until the created queues matches the irq affinity plus polling queue sets. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Ming Lei Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 50 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index efe46f518022..f891eb57f263 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -95,6 +95,7 @@ struct nvme_dev; struct nvme_queue; static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown); +static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode); /* * Represents an NVM Express device. Each nvme_dev is a PCI function. @@ -1420,6 +1421,14 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) return 0; } +static void nvme_suspend_io_queues(struct nvme_dev *dev) +{ + int i; + + for (i = dev->ctrl.queue_count - 1; i > 0; i--) + nvme_suspend_queue(&dev->queues[i]); +} + static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown) { struct nvme_queue *nvmeq = &dev->queues[0]; @@ -2134,6 +2143,12 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues) return result; } +static void nvme_disable_io_queues(struct nvme_dev *dev) +{ + if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq)) + __nvme_disable_io_queues(dev, nvme_admin_delete_cq); +} + static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = &dev->queues[0]; @@ -2170,6 +2185,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) } while (1); adminq->q_db = dev->dbs; + retry: /* Deregister the admin queue's interrupt */ pci_free_irq(pdev, 0, adminq); @@ -2187,25 +2203,34 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) result = max(result - 1, 1); dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL]; - dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n", - dev->io_queues[HCTX_TYPE_DEFAULT], - dev->io_queues[HCTX_TYPE_READ], - dev->io_queues[HCTX_TYPE_POLL]); - /* * Should investigate if there's a performance win from allocating * more queues than interrupt vectors; it might allow the submission * path to scale better, even if the receive path is limited by the * number of interrupts. */ - result = queue_request_irq(adminq); if (result) { adminq->cq_vector = -1; return result; } set_bit(NVMEQ_ENABLED, &adminq->flags); - return nvme_create_io_queues(dev); + + result = nvme_create_io_queues(dev); + if (result || dev->online_queues < 2) + return result; + + if (dev->online_queues - 1 < dev->max_qid) { + nr_io_queues = dev->online_queues - 1; + nvme_disable_io_queues(dev); + nvme_suspend_io_queues(dev); + goto retry; + } + dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n", + dev->io_queues[HCTX_TYPE_DEFAULT], + dev->io_queues[HCTX_TYPE_READ], + dev->io_queues[HCTX_TYPE_POLL]); + return 0; } static void nvme_del_queue_end(struct request *req, blk_status_t error) @@ -2250,7 +2275,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) return 0; } -static bool nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) +static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode) { int nr_queues = dev->online_queues - 1, sent = 0; unsigned long timeout; @@ -2411,7 +2436,6 @@ static void nvme_pci_disable(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { - int i; bool dead = true; struct pci_dev *pdev = to_pci_dev(dev->dev); @@ -2438,13 +2462,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) nvme_stop_queues(&dev->ctrl); if (!dead && dev->ctrl.queue_count > 0) { - if (nvme_disable_io_queues(dev, nvme_admin_delete_sq)) - nvme_disable_io_queues(dev, nvme_admin_delete_cq); + nvme_disable_io_queues(dev); nvme_disable_admin_queue(dev, shutdown); } - for (i = dev->ctrl.queue_count - 1; i >= 0; i--) - nvme_suspend_queue(&dev->queues[i]); - + nvme_suspend_io_queues(dev); + nvme_suspend_queue(&dev->queues[0]); nvme_pci_disable(dev); blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl); -- cgit v1.2.3 From dcca1662727220d18fa351097ddff33f95f516c5 Mon Sep 17 00:00:00 2001 From: Hongbo Yao Date: Mon, 7 Jan 2019 10:22:07 +0800 Subject: nvme-pci: fix out of bounds access in nvme_cqe_pending There is an out of bounds array access in nvme_cqe_peding(). When enable irq_thread for nvme interrupt, there is racing between the nvmeq->cq_head updating and reading. nvmeq->cq_head is updated in nvme_update_cq_head(), if nvmeq->cq_head equals nvmeq->q_depth and before its value set to zero, nvme_cqe_pending() uses its value as an array index, the index will be out of bounds. Signed-off-by: Hongbo Yao [hch: slight coding style update] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index f891eb57f263..3f53a2c3042d 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1020,9 +1020,11 @@ static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end) static inline void nvme_update_cq_head(struct nvme_queue *nvmeq) { - if (++nvmeq->cq_head == nvmeq->q_depth) { + if (nvmeq->cq_head == nvmeq->q_depth - 1) { nvmeq->cq_head = 0; nvmeq->cq_phase = !nvmeq->cq_phase; + } else { + nvmeq->cq_head++; } } -- cgit v1.2.3 From e9c2edc098921173920df370c69b5c38fe52df56 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 31 Dec 2018 23:58:29 -0800 Subject: nvme-tcp: remove dead code We should never touch the opal device from the transport driver. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index de174912445e..7210b2d6df13 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1620,7 +1620,6 @@ static void nvme_tcp_destroy_admin_queue(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_queue(ctrl, 0); if (remove) { - free_opal_dev(ctrl->opal_dev); blk_cleanup_queue(ctrl->admin_q); blk_mq_free_tag_set(ctrl->admin_tagset); } -- cgit v1.2.3 From e85037a2e90ac9aa448a08927d7a7436206c6000 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 31 Dec 2018 23:58:30 -0800 Subject: nvme-tcp: don't ask if controller is fabrics For sure we are a fabric driver. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 7210b2d6df13..265a0543b381 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1565,8 +1565,7 @@ static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) { nvme_tcp_stop_io_queues(ctrl); if (remove) { - if (ctrl->ops->flags & NVME_F_FABRICS) - blk_cleanup_queue(ctrl->connect_q); + blk_cleanup_queue(ctrl->connect_q); blk_mq_free_tag_set(ctrl->tagset); } nvme_tcp_free_io_queues(ctrl); @@ -1587,12 +1586,10 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) goto out_free_io_queues; } - if (ctrl->ops->flags & NVME_F_FABRICS) { - ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); - if (IS_ERR(ctrl->connect_q)) { - ret = PTR_ERR(ctrl->connect_q); - goto out_free_tag_set; - } + ctrl->connect_q = blk_mq_init_queue(ctrl->tagset); + if (IS_ERR(ctrl->connect_q)) { + ret = PTR_ERR(ctrl->connect_q); + goto out_free_tag_set; } } else { blk_mq_update_nr_hw_queues(ctrl->tagset, @@ -1606,7 +1603,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) return 0; out_cleanup_connect_q: - if (new && (ctrl->ops->flags & NVME_F_FABRICS)) + if (new) blk_cleanup_queue(ctrl->connect_q); out_free_tag_set: if (new) -- cgit v1.2.3 From 9846ac0143fe9872e92fe2a1ddff868ad05bdbb6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 7 Jan 2019 23:54:23 -0800 Subject: nvme-fabrics: unset write/poll queues for discovery controllers Even if user-space sent it to us, it got it wrong so lets help by disallowing it. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index b2ab213f43de..3eb908c50e1a 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -874,6 +874,8 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, if (opts->discovery_nqn) { opts->kato = 0; opts->nr_io_queues = 0; + opts->nr_write_queues = 0; + opts->nr_poll_queues = 0; opts->duplicate_connect = true; } if (ctrl_loss_tmo < 0) -- cgit v1.2.3 From c7055fd15ff46d92eb0dd1c16a4fe010d58224c8 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 8 Jan 2019 12:46:58 +0100 Subject: nvme-multipath: zero out ANA log buffer When nvme_init_identify() fails the ANA log buffer is deallocated but _not_ set to NULL. This can cause double free oops when this controller is deleted without ever being reconnected. Signed-off-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 183ec17ba067..df4b3a6db51b 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -570,6 +570,7 @@ int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) return 0; out_free_ana_log_buf: kfree(ctrl->ana_log_buf); + ctrl->ana_log_buf = NULL; out: return error; } @@ -577,5 +578,6 @@ out: void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { kfree(ctrl->ana_log_buf); + ctrl->ana_log_buf = NULL; } -- cgit v1.2.3 From 3da584f57133e51aeb84aaefae5e3d69531a1e4f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 8 Jan 2019 09:37:43 -0700 Subject: nvme: pad fake subsys NQN vid and ssvid with zeros We need to preserve the leading zeros in the vid and ssvid when generating a unique NQN. Truncating these may lead to naming collisions. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 08f2c92602f4..f81fde164d37 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2184,7 +2184,7 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */ off = snprintf(subsys->subnqn, NVMF_NQN_SIZE, - "nqn.2014.08.org.nvmexpress:%4x%4x", + "nqn.2014.08.org.nvmexpress:%04x%04x", le16_to_cpu(id->vid), le16_to_cpu(id->ssvid)); memcpy(subsys->subnqn + off, id->sn, sizeof(id->sn)); off += sizeof(id->sn); -- cgit v1.2.3 From 6299358d198a0635da2dd3c4b3ec37789e811e44 Mon Sep 17 00:00:00 2001 From: James Dingwall Date: Tue, 8 Jan 2019 10:20:51 -0700 Subject: nvme: introduce NVME_QUIRK_IGNORE_DEV_SUBNQN If a device provides an NQN it is expected to be globally unique. Unfortunately some firmware revisions for Intel 760p/Pro 7600p devices did not satisfy this requirement. In these circumstances if a system has >1 affected device then only one device is enabled. If this quirk is enabled then the device supplied subnqn is ignored and we fallback to generating one as if the field was empty. In this case we also suppress the version check so we don't print a warning when the quirk is enabled. Reviewed-by: Keith Busch Signed-off-by: James Dingwall Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 16 +++++++++------- drivers/nvme/host/nvme.h | 5 +++++ drivers/nvme/host/pci.c | 2 ++ 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f81fde164d37..8485be6bb895 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2173,14 +2173,16 @@ static void nvme_init_subnqn(struct nvme_subsystem *subsys, struct nvme_ctrl *ct size_t nqnlen; int off; - nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); - if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { - strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); - return; - } + if(!(ctrl->quirks & NVME_QUIRK_IGNORE_DEV_SUBNQN)) { + nqnlen = strnlen(id->subnqn, NVMF_NQN_SIZE); + if (nqnlen > 0 && nqnlen < NVMF_NQN_SIZE) { + strlcpy(subsys->subnqn, id->subnqn, NVMF_NQN_SIZE); + return; + } - if (ctrl->vs >= NVME_VS(1, 2, 1)) - dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n"); + if (ctrl->vs >= NVME_VS(1, 2, 1)) + dev_warn(ctrl->device, "missing or invalid SUBNQN field.\n"); + } /* Generate a "fake" NQN per Figure 254 in NVMe 1.3 + ECN 001 */ off = snprintf(subsys->subnqn, NVMF_NQN_SIZE, diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2b36ac922596..ab961bdeea89 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -90,6 +90,11 @@ enum nvme_quirks { * Set MEDIUM priority on SQ creation */ NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7), + + /* + * Ignore device provided subnqn. + */ + NVME_QUIRK_IGNORE_DEV_SUBNQN = (1 << 8), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3f53a2c3042d..fc9d17c317b8 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2971,6 +2971,8 @@ static const struct pci_device_id nvme_id_table[] = { { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ .driver_data = NVME_QUIRK_NO_DEEPEST_PS | NVME_QUIRK_MEDIUM_PRIO_SQ }, + { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */ + .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */ -- cgit v1.2.3 From b8a38ea64dc714a64f8fb76e311a4f15a3f67861 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Mon, 7 Jan 2019 19:08:49 -0800 Subject: nvme: don't initlialize ctrl->cntlid twice ctrl->cntlid will already be initialized from id->cntlid for non-NVME_F_FABRICS controllers few lines below. For NVME_F_FABRICS controllers this field should already be initialized, otherwise the check if (ctrl->cntlid != le16_to_cpu(id->cntlid)) below will always be a no-op. Signed-off-by: Andrey Smirnov Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 8485be6bb895..150e49723c15 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2502,7 +2502,6 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->oaes = le32_to_cpu(id->oaes); atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; - ctrl->cntlid = le16_to_cpup(&id->cntlid); if (id->mdts) max_hw_sectors = 1 << (id->mdts + page_shift - 9); else -- cgit v1.2.3 From 5db470e229e22b7eda6e23b5566e532c96fb5bc3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 9 Jan 2019 19:17:14 -0800 Subject: loop: drop caches if offset or block_size are changed If we don't drop caches used in old offset or block_size, we can get old data from new offset/block_size, which gives unexpected data to user. For example, Martijn found a loopback bug in the below scenario. 1) LOOP_SET_FD loads first two pages on loop file 2) LOOP_SET_STATUS64 changes the offset on the loop file 3) mount is failed due to the cached pages having wrong superblock Cc: Jens Axboe Cc: linux-block@vger.kernel.org Reported-by: Martijn Coenen Reviewed-by: Bart Van Assche Signed-off-by: Jaegeuk Kim Signed-off-by: Jens Axboe --- drivers/block/loop.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b8a0720d3653..cf5538942834 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1190,6 +1190,12 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) goto out_unlock; } + if (lo->lo_offset != info->lo_offset || + lo->lo_sizelimit != info->lo_sizelimit) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + /* I/O need to be drained during transfer transition */ blk_mq_freeze_queue(lo->lo_queue); @@ -1218,6 +1224,14 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (lo->lo_offset != info->lo_offset || lo->lo_sizelimit != info->lo_sizelimit) { + /* kill_bdev should have truncated all the pages */ + if (lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { err = -EFBIG; goto out_unfreeze; @@ -1443,22 +1457,39 @@ static int loop_set_dio(struct loop_device *lo, unsigned long arg) static int loop_set_block_size(struct loop_device *lo, unsigned long arg) { + int err = 0; + if (lo->lo_state != Lo_bound) return -ENXIO; if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) return -EINVAL; + if (lo->lo_queue->limits.logical_block_size != arg) { + sync_blockdev(lo->lo_device); + kill_bdev(lo->lo_device); + } + blk_mq_freeze_queue(lo->lo_queue); + /* kill_bdev should have truncated all the pages */ + if (lo->lo_queue->limits.logical_block_size != arg && + lo->lo_device->bd_inode->i_mapping->nrpages) { + err = -EAGAIN; + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", + __func__, lo->lo_number, lo->lo_file_name, + lo->lo_device->bd_inode->i_mapping->nrpages); + goto out_unfreeze; + } + blk_queue_logical_block_size(lo->lo_queue, arg); blk_queue_physical_block_size(lo->lo_queue, arg); blk_queue_io_min(lo->lo_queue, arg); loop_update_dio(lo); - +out_unfreeze: blk_mq_unfreeze_queue(lo->lo_queue); - return 0; + return err; } static int lo_simple_ioctl(struct loop_device *lo, unsigned int cmd, -- cgit v1.2.3 From 49e54187ae0b2f9b5c0760e568a103baf4481610 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 4 Dec 2018 20:28:25 +0100 Subject: ata: libahci_platform: comply to PHY framework Current implementation of the libahci does not take into account the new PHY framework. Correct the situation by adding a call to phy_set_mode() before phy_power_on(). PHYs should also be handled at suspend/resume time. For this, call ahci_platform_enable/disable_phys() at suspend/resume_host() time. These calls are guarded by a HFLAG (AHCI_HFLAG_SUSPEND_PHYS) that the user of the libahci driver must set manually in hpriv->flags at probe time. This is to avoid breaking users that have not been tested with this change. Reviewed-by: Hans de Goede Suggested-by: Grzegorz Jaszczyk Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe --- drivers/ata/ahci.h | 2 ++ drivers/ata/libahci_platform.c | 13 +++++++++++++ 2 files changed, 15 insertions(+) (limited to 'drivers') diff --git a/drivers/ata/ahci.h b/drivers/ata/ahci.h index ef356e70e6de..8810475f307a 100644 --- a/drivers/ata/ahci.h +++ b/drivers/ata/ahci.h @@ -254,6 +254,8 @@ enum { AHCI_HFLAG_IS_MOBILE = (1 << 25), /* mobile chipset, use SATA_MOBILE_LPM_POLICY as default lpm_policy */ + AHCI_HFLAG_SUSPEND_PHYS = (1 << 26), /* handle PHYs during + suspend/resume */ /* ap->flags bits */ diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c index 4b900fc659f7..81b1a3332ed6 100644 --- a/drivers/ata/libahci_platform.c +++ b/drivers/ata/libahci_platform.c @@ -56,6 +56,12 @@ static int ahci_platform_enable_phys(struct ahci_host_priv *hpriv) if (rc) goto disable_phys; + rc = phy_set_mode(hpriv->phys[i], PHY_MODE_SATA); + if (rc) { + phy_exit(hpriv->phys[i]); + goto disable_phys; + } + rc = phy_power_on(hpriv->phys[i]); if (rc) { phy_exit(hpriv->phys[i]); @@ -738,6 +744,9 @@ int ahci_platform_suspend_host(struct device *dev) writel(ctl, mmio + HOST_CTL); readl(mmio + HOST_CTL); /* flush */ + if (hpriv->flags & AHCI_HFLAG_SUSPEND_PHYS) + ahci_platform_disable_phys(hpriv); + return ata_host_suspend(host, PMSG_SUSPEND); } EXPORT_SYMBOL_GPL(ahci_platform_suspend_host); @@ -756,6 +765,7 @@ EXPORT_SYMBOL_GPL(ahci_platform_suspend_host); int ahci_platform_resume_host(struct device *dev) { struct ata_host *host = dev_get_drvdata(dev); + struct ahci_host_priv *hpriv = host->private_data; int rc; if (dev->power.power_state.event == PM_EVENT_SUSPEND) { @@ -766,6 +776,9 @@ int ahci_platform_resume_host(struct device *dev) ahci_init_controller(host); } + if (hpriv->flags & AHCI_HFLAG_SUSPEND_PHYS) + ahci_platform_enable_phys(hpriv); + ata_host_resume(host); return 0; -- cgit v1.2.3 From c9bc136791ba0eefe07ed57d3850b8c5cee6471b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 4 Dec 2018 20:28:26 +0100 Subject: ata: ahci: mvebu: remove stale comment For Armada-38x (32-bit) SoCs, PM platform support has been added since: commit 32f9494c9dfd ("ARM: mvebu: prepare pm-board.c for the introduction of Armada 38x support") commit 3cbd6a6ca81c ("ARM: mvebu: Add standby support") For Armada 64-bit SoCs, like the A3700 also using this AHCI driver, PM platform support has always existed. There are even suspend/resume hooks in this driver since: commit d6ecf15814888 ("ata: ahci_mvebu: add suspend/resume support") Remove the stale comment at the end of this driver stating that all the above does not exist yet. Fixes: d6ecf15814888 ("ata: ahci_mvebu: add suspend/resume support") Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe --- drivers/ata/ahci_mvebu.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers') diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index f9cb51be38eb..128d6f22926d 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -197,11 +197,6 @@ static const struct of_device_id ahci_mvebu_of_match[] = { }; MODULE_DEVICE_TABLE(of, ahci_mvebu_of_match); -/* - * We currently don't provide power management related operations, - * since there is no suspend/resume support at the platform level for - * Armada 38x for the moment. - */ static struct platform_driver ahci_mvebu_driver = { .probe = ahci_mvebu_probe, .remove = ata_platform_remove_one, -- cgit v1.2.3 From 96dbcb40e4b1a387cdb9b21f43638c759aebb5a4 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 4 Dec 2018 20:28:27 +0100 Subject: ata: ahci: mvebu: do Armada 38x configuration only on relevant SoCs At the beginning, only Armada 38x SoCs where supported by the ahci_mvebu.c driver. Commit 15d3ce7b63bd ("ata: ahci_mvebu: add support for Armada 3700 variant") introduced Armada 3700 support. As opposed to Armada 38x SoCs, the 3700 variants do not have to configure mbus and the regret option. This patch took care of avoiding such configuration when not needed in the probe function, but failed to do the same in the resume path. While doing so looks harmless by experience, let's clean the driver logic and avoid doing this useless configuration with Armada 3700 SoCs. Because the logic is very similar between these two places, it has been decided to factorize this code and put it in a "Armada 38x configuration function". This function is part of a new (per-compatible) platform data structure, so that the addition of such configuration function for Armada 3700 will be eased. Fixes: 15d3ce7b63bd ("ata: ahci_mvebu: add support for Armada 3700 variant") Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe --- drivers/ata/ahci_mvebu.c | 68 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index 128d6f22926d..7839a5df1fd2 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -28,6 +28,10 @@ #define AHCI_WINDOW_BASE(win) (0x64 + ((win) << 4)) #define AHCI_WINDOW_SIZE(win) (0x68 + ((win) << 4)) +struct ahci_mvebu_plat_data { + int (*plat_config)(struct ahci_host_priv *hpriv); +}; + static void ahci_mvebu_mbus_config(struct ahci_host_priv *hpriv, const struct mbus_dram_target_info *dram) { @@ -62,6 +66,22 @@ static void ahci_mvebu_regret_option(struct ahci_host_priv *hpriv) writel(0x80, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); } +static int ahci_mvebu_armada_380_config(struct ahci_host_priv *hpriv) +{ + const struct mbus_dram_target_info *dram; + int rc = 0; + + dram = mv_mbus_dram_info(); + if (dram) + ahci_mvebu_mbus_config(hpriv, dram); + else + rc = -ENODEV; + + ahci_mvebu_regret_option(hpriv); + + return rc; +} + /** * ahci_mvebu_stop_engine * @@ -126,13 +146,10 @@ static int ahci_mvebu_resume(struct platform_device *pdev) { struct ata_host *host = platform_get_drvdata(pdev); struct ahci_host_priv *hpriv = host->private_data; - const struct mbus_dram_target_info *dram; + const struct ahci_mvebu_plat_data *pdata = hpriv->plat_data; - dram = mv_mbus_dram_info(); - if (dram) - ahci_mvebu_mbus_config(hpriv, dram); - - ahci_mvebu_regret_option(hpriv); + if (pdata->plat_config) + pdata->plat_config(hpriv); return ahci_platform_resume_host(&pdev->dev); } @@ -154,28 +171,31 @@ static struct scsi_host_template ahci_platform_sht = { static int ahci_mvebu_probe(struct platform_device *pdev) { + const struct ahci_mvebu_plat_data *pdata; struct ahci_host_priv *hpriv; - const struct mbus_dram_target_info *dram; int rc; + pdata = of_device_get_match_data(&pdev->dev); + if (!pdata) + return -EINVAL; + hpriv = ahci_platform_get_resources(pdev, 0); if (IS_ERR(hpriv)) return PTR_ERR(hpriv); + hpriv->plat_data = (void *)pdata; + rc = ahci_platform_enable_resources(hpriv); if (rc) return rc; hpriv->stop_engine = ahci_mvebu_stop_engine; - if (of_device_is_compatible(pdev->dev.of_node, - "marvell,armada-380-ahci")) { - dram = mv_mbus_dram_info(); - if (!dram) - return -ENODEV; - - ahci_mvebu_mbus_config(hpriv, dram); - ahci_mvebu_regret_option(hpriv); + pdata = hpriv->plat_data; + if (pdata->plat_config) { + rc = pdata->plat_config(hpriv); + if (rc) + goto disable_resources; } rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info, @@ -190,9 +210,23 @@ disable_resources: return rc; } +static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = { + .plat_config = ahci_mvebu_armada_380_config, +}; + +static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = { + .plat_config = NULL, +}; + static const struct of_device_id ahci_mvebu_of_match[] = { - { .compatible = "marvell,armada-380-ahci", }, - { .compatible = "marvell,armada-3700-ahci", }, + { + .compatible = "marvell,armada-380-ahci", + .data = &ahci_mvebu_armada_380_plat_data, + }, + { + .compatible = "marvell,armada-3700-ahci", + .data = &ahci_mvebu_armada_3700_plat_data, + }, { }, }; MODULE_DEVICE_TABLE(of, ahci_mvebu_of_match); -- cgit v1.2.3 From 2f558bc3f33ca344489cec2218545741028b6a70 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 4 Dec 2018 20:28:28 +0100 Subject: ata: ahci: mvebu: add Armada 3700 initialization needed for S2RAM A3700 comphy initialization is done in the firmware (TF-A). Looking at the SATA PHY initialization routine, there is a comment about "vendor specific" registers. Two registers are mentioned. They are not initialized there in the firmware because they are AHCI related, while the firmware at this location does only PHY configuration. The solution to avoid doing such initialization is relying on U-Boot. While this work at boot time, U-Boot is definitely not going to run during a resume after suspending to RAM. Two possible solutions were considered: * Fixing the firmware. * Fixing the kernel driver. The first solution would take ages to propagate, while the second solution is easy to implement as the driver as been a little bit reworked to prepare for such platform configuration. Hence, this patch adds an Armada 3700 configuration function to set these two registers both at boot time (in the probe) and after a suspend (in the resume path). Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe --- drivers/ata/ahci_mvebu.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'drivers') diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index 7839a5df1fd2..bbab688d3c34 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -82,6 +82,19 @@ static int ahci_mvebu_armada_380_config(struct ahci_host_priv *hpriv) return rc; } +static int ahci_mvebu_armada_3700_config(struct ahci_host_priv *hpriv) +{ + u32 reg; + + writel(0, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_ADDR); + + reg = readl(hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); + reg |= BIT(6); + writel(reg, hpriv->mmio + AHCI_VENDOR_SPECIFIC_0_DATA); + + return 0; +} + /** * ahci_mvebu_stop_engine * @@ -148,8 +161,7 @@ static int ahci_mvebu_resume(struct platform_device *pdev) struct ahci_host_priv *hpriv = host->private_data; const struct ahci_mvebu_plat_data *pdata = hpriv->plat_data; - if (pdata->plat_config) - pdata->plat_config(hpriv); + pdata->plat_config(hpriv); return ahci_platform_resume_host(&pdev->dev); } @@ -191,12 +203,9 @@ static int ahci_mvebu_probe(struct platform_device *pdev) hpriv->stop_engine = ahci_mvebu_stop_engine; - pdata = hpriv->plat_data; - if (pdata->plat_config) { - rc = pdata->plat_config(hpriv); - if (rc) - goto disable_resources; - } + rc = pdata->plat_config(hpriv); + if (rc) + goto disable_resources; rc = ahci_platform_init_host(pdev, hpriv, &ahci_mvebu_port_info, &ahci_platform_sht); @@ -215,7 +224,7 @@ static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = { }; static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = { - .plat_config = NULL, + .plat_config = ahci_mvebu_armada_3700_config, }; static const struct of_device_id ahci_mvebu_of_match[] = { -- cgit v1.2.3 From bde0b5c109e8b22b57745e3b9914f9e87ad857ea Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 4 Dec 2018 20:28:29 +0100 Subject: ata: ahci: mvebu: request PHY suspend/resume for Armada 3700 A feature has been added in the libahci driver: the possibility to set a new flag in hpriv->flags to let the core handle PHY suspend/resume automatically. Make use of this feature to make suspend to RAM work with SATA drives on A3700. Signed-off-by: Miquel Raynal Signed-off-by: Jens Axboe --- drivers/ata/ahci_mvebu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/ata/ahci_mvebu.c b/drivers/ata/ahci_mvebu.c index bbab688d3c34..d4bba3ace45d 100644 --- a/drivers/ata/ahci_mvebu.c +++ b/drivers/ata/ahci_mvebu.c @@ -30,6 +30,7 @@ struct ahci_mvebu_plat_data { int (*plat_config)(struct ahci_host_priv *hpriv); + unsigned int flags; }; static void ahci_mvebu_mbus_config(struct ahci_host_priv *hpriv, @@ -195,6 +196,7 @@ static int ahci_mvebu_probe(struct platform_device *pdev) if (IS_ERR(hpriv)) return PTR_ERR(hpriv); + hpriv->flags |= pdata->flags; hpriv->plat_data = (void *)pdata; rc = ahci_platform_enable_resources(hpriv); @@ -225,6 +227,7 @@ static const struct ahci_mvebu_plat_data ahci_mvebu_armada_380_plat_data = { static const struct ahci_mvebu_plat_data ahci_mvebu_armada_3700_plat_data = { .plat_config = ahci_mvebu_armada_3700_config, + .flags = AHCI_HFLAG_SUSPEND_PHYS, }; static const struct of_device_id ahci_mvebu_of_match[] = { -- cgit v1.2.3