From 43e2d08d0790a09ee1d2c838e33343ee1bcaf610 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 25 Feb 2019 13:00:04 +0200 Subject: nvme: avoid double dereference to convert le to cpu Use le16_to_cpu instead of le16_to_cpup and le64_to_cpu instead of le64_to_cpup. This will also align the code to nvme-core driver convention. Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 470601980794..b5939112b9b6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1588,7 +1588,7 @@ static bool nvme_ns_ids_equal(struct nvme_ns_ids *a, struct nvme_ns_ids *b) static void nvme_update_disk_info(struct gendisk *disk, struct nvme_ns *ns, struct nvme_id_ns *id) { - sector_t capacity = le64_to_cpup(&id->nsze) << (ns->lba_shift - 9); + sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9); unsigned short bs = 1 << ns->lba_shift; blk_mq_freeze_queue(disk->queue); @@ -2549,7 +2549,7 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->crdt[2] = le16_to_cpu(id->crdt3); ctrl->oacs = le16_to_cpu(id->oacs); - ctrl->oncs = le16_to_cpup(&id->oncs); + ctrl->oncs = le16_to_cpu(id->oncs); ctrl->oaes = le32_to_cpu(id->oaes); atomic_set(&ctrl->abort_limit, id->acl + 1); ctrl->vwc = id->vwc; -- cgit v1.2.3 From cfe03c2ec46200dfefa5167e6a2bb50c0426c5f4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 12 Mar 2019 18:05:10 +0100 Subject: nvmet: avoid double errno conversions Use errno_to_nvme_status to convert from a negative errno to a nvme status field instead of going through a blk_status_t. Also remove the pointless status variable in nvmet_bdev_execute_write_zeroes. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/target/io-cmd-bdev.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index a065dbfc43b1..3efc52f9c309 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -196,7 +196,7 @@ static u16 nvmet_bdev_discard_range(struct nvmet_req *req, GFP_KERNEL, 0, bio); if (ret && ret != -EOPNOTSUPP) { req->error_slba = le64_to_cpu(range->slba); - return blk_to_nvme_status(req, errno_to_blk_status(ret)); + return errno_to_nvme_status(req, ret); } return NVME_SC_SUCCESS; } @@ -252,7 +252,6 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) { struct nvme_write_zeroes_cmd *write_zeroes = &req->cmd->write_zeroes; struct bio *bio = NULL; - u16 status = NVME_SC_SUCCESS; sector_t sector; sector_t nr_sector; int ret; @@ -264,13 +263,12 @@ static void nvmet_bdev_execute_write_zeroes(struct nvmet_req *req) ret = __blkdev_issue_zeroout(req->ns->bdev, sector, nr_sector, GFP_KERNEL, &bio, 0); - status = blk_to_nvme_status(req, errno_to_blk_status(ret)); if (bio) { bio->bi_private = req; bio->bi_end_io = nvmet_bio_done; submit_bio(bio); } else { - nvmet_req_complete(req, status); + nvmet_req_complete(req, errno_to_nvme_status(req, ret)); } } -- cgit v1.2.3 From 6b80f1d2cc5a76f0121a43a612515bc8a8976e66 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sat, 23 Feb 2019 12:51:08 -0600 Subject: nvmet-fc: use zero-sized array and struct_size() in kzalloc() Update the code to use a zero-sized array instead of a pointer in structure nvmet_fc_tgt_queue and use struct_size() in kzalloc(). Notice that one of the more common cases of allocation size calculations is finding the size of a structure that has a zero-sized array at the end, along with memory for some number of elements for that array. For example: struct foo { int stuff; struct boo entry[]; }; instance = kzalloc(sizeof(struct foo) + sizeof(struct boo) * count, GFP_KERNEL); Instead of leaving these open-coded and prone to type mistakes, we can now use the new struct_size() helper: instance = kzalloc(struct_size(instance, entry, count), GFP_KERNEL); This code was detected with the help of Coccinelle. Signed-off-by: Gustavo A. R. Silva Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 98b7b1f4ee96..9369a11fe7a9 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -128,12 +128,12 @@ struct nvmet_fc_tgt_queue { struct nvmet_cq nvme_cq; struct nvmet_sq nvme_sq; struct nvmet_fc_tgt_assoc *assoc; - struct nvmet_fc_fcp_iod *fod; /* array of fcp_iods */ struct list_head fod_list; struct list_head pending_cmd_list; struct list_head avail_defer_list; struct workqueue_struct *work_q; struct kref ref; + struct nvmet_fc_fcp_iod fod[]; /* array of fcp_iods */ } __aligned(sizeof(unsigned long long)); struct nvmet_fc_tgt_assoc { @@ -588,9 +588,7 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, if (qid > NVMET_NR_QUEUES) return NULL; - queue = kzalloc((sizeof(*queue) + - (sizeof(struct nvmet_fc_fcp_iod) * sqsize)), - GFP_KERNEL); + queue = kzalloc(struct_size(queue, fod, sqsize), GFP_KERNEL); if (!queue) return NULL; @@ -603,7 +601,6 @@ nvmet_fc_alloc_target_queue(struct nvmet_fc_tgt_assoc *assoc, if (!queue->work_q) goto out_a_put; - queue->fod = (struct nvmet_fc_fcp_iod *)&queue[1]; queue->qid = qid; queue->sqsize = sqsize; queue->assoc = assoc; -- cgit v1.2.3 From 70583295388a3ceabc22bddeb16e788f58225d70 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 8 Mar 2019 15:41:21 -0800 Subject: nvmet-tcp: implement C2HData SUCCESS optimization TP 8000 says that the use of the SUCCESS flag depends on weather the controller support disabling sq_head pointer updates. Given that we support it by default, makes sense that we go the extra mile to actually use the SUCCESS flag. When we create the C2HData PDU header, we check if sqhd_disabled is set on our queue, if so, we set the SUCCESS flag in the PDU header and skip sending a completion response capsule. Signed-off-by: Sagi Grimberg Reviewed-by: Oliver Smith-Denny Tested-by: Oliver Smith-Denny Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index ad0df786fe93..0a941abf56ec 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -371,7 +371,8 @@ static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) cmd->state = NVMET_TCP_SEND_DATA_PDU; pdu->hdr.type = nvme_tcp_c2h_data; - pdu->hdr.flags = NVME_TCP_F_DATA_LAST; + pdu->hdr.flags = NVME_TCP_F_DATA_LAST | (queue->nvme_sq.sqhd_disabled ? + NVME_TCP_F_DATA_SUCCESS : 0); pdu->hdr.hlen = sizeof(*pdu); pdu->hdr.pdo = pdu->hdr.hlen + hdgst; pdu->hdr.plen = @@ -542,8 +543,19 @@ static int nvmet_try_send_data(struct nvmet_tcp_cmd *cmd) cmd->state = NVMET_TCP_SEND_DDGST; cmd->offset = 0; } else { - nvmet_setup_response_pdu(cmd); + if (queue->nvme_sq.sqhd_disabled) { + cmd->queue->snd_cmd = NULL; + nvmet_tcp_put_cmd(cmd); + } else { + nvmet_setup_response_pdu(cmd); + } } + + if (queue->nvme_sq.sqhd_disabled) { + kfree(cmd->iov); + sgl_free(cmd->req.sg); + } + return 1; } @@ -619,7 +631,13 @@ static int nvmet_try_send_ddgst(struct nvmet_tcp_cmd *cmd) return ret; cmd->offset += ret; - nvmet_setup_response_pdu(cmd); + + if (queue->nvme_sq.sqhd_disabled) { + cmd->queue->snd_cmd = NULL; + nvmet_tcp_put_cmd(cmd); + } else { + nvmet_setup_response_pdu(cmd); + } return 1; } -- cgit v1.2.3 From 7c349dde26b75db3fa1863e36984ac2271cd797a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 8 Mar 2019 10:43:06 -0700 Subject: nvme-pci: use a flag for polled queues A negative value for the cq_vector used to mean the queue is either disabled or a polled queue. However, we have a queue enabled flag, so the cq_vector had been serving double duty. Don't overload the meaning of cq_vector. Use a flag specific to the polled queues instead. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 33 +++++++++++++-------------------- 1 file changed, 13 insertions(+), 20 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a90cf5d63aac..4c0461bd6cfc 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -189,7 +189,7 @@ struct nvme_queue { dma_addr_t cq_dma_addr; u32 __iomem *q_db; u16 q_depth; - s16 cq_vector; + u16 cq_vector; u16 sq_tail; u16 last_sq_tail; u16 cq_head; @@ -200,6 +200,7 @@ struct nvme_queue { #define NVMEQ_ENABLED 0 #define NVMEQ_SQ_CMB 1 #define NVMEQ_DELETE_ERROR 2 +#define NVMEQ_POLLED 3 u32 *dbbuf_sq_db; u32 *dbbuf_cq_db; u32 *dbbuf_sq_ei; @@ -1088,7 +1089,7 @@ static int nvme_poll_irqdisable(struct nvme_queue *nvmeq, unsigned int tag) * using the CQ lock. For normal interrupt driven threads we have * to disable the interrupt to avoid racing with it. */ - if (nvmeq->cq_vector == -1) { + if (test_bit(NVMEQ_POLLED, &nvmeq->flags)) { spin_lock(&nvmeq->cq_poll_lock); found = nvme_process_cq(nvmeq, &start, &end, tag); spin_unlock(&nvmeq->cq_poll_lock); @@ -1148,7 +1149,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG; - if (vector != -1) + if (!test_bit(NVMEQ_POLLED, &nvmeq->flags)) flags |= NVME_CQ_IRQ_ENABLED; /* @@ -1161,10 +1162,7 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, c.create_cq.cqid = cpu_to_le16(qid); c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); c.create_cq.cq_flags = cpu_to_le16(flags); - if (vector != -1) - c.create_cq.irq_vector = cpu_to_le16(vector); - else - c.create_cq.irq_vector = 0; + c.create_cq.irq_vector = cpu_to_le16(vector); return nvme_submit_sync_cmd(dev->ctrl.admin_q, &c, NULL, 0); } @@ -1410,10 +1408,8 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) nvmeq->dev->online_queues--; if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q); - if (nvmeq->cq_vector == -1) - return 0; - pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); - nvmeq->cq_vector = -1; + if (!test_and_clear_bit(NVMEQ_POLLED, &nvmeq->flags)) + pci_free_irq(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector, nvmeq); return 0; } @@ -1507,7 +1503,6 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; nvmeq->q_depth = depth; nvmeq->qid = qid; - nvmeq->cq_vector = -1; dev->ctrl.queue_count++; return 0; @@ -1552,7 +1547,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) { struct nvme_dev *dev = nvmeq->dev; int result; - s16 vector; + u16 vector = 0; clear_bit(NVMEQ_DELETE_ERROR, &nvmeq->flags); @@ -1563,7 +1558,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) if (!polled) vector = dev->num_vecs == 1 ? 0 : qid; else - vector = -1; + set_bit(NVMEQ_POLLED, &nvmeq->flags); result = adapter_alloc_cq(dev, qid, nvmeq, vector); if (result) @@ -1578,7 +1573,8 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) nvmeq->cq_vector = vector; nvme_init_queue(nvmeq, qid); - if (vector != -1) { + if (!polled) { + nvmeq->cq_vector = vector; result = queue_request_irq(nvmeq); if (result < 0) goto release_sq; @@ -1588,7 +1584,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid, bool polled) return result; release_sq: - nvmeq->cq_vector = -1; dev->online_queues--; adapter_delete_sq(dev, qid); release_cq: @@ -1730,7 +1725,7 @@ static int nvme_pci_configure_admin_queue(struct nvme_dev *dev) nvme_init_queue(nvmeq, 0); result = queue_request_irq(nvmeq); if (result) { - nvmeq->cq_vector = -1; + dev->online_queues--; return result; } @@ -2171,10 +2166,8 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * number of interrupts. */ result = queue_request_irq(adminq); - if (result) { - adminq->cq_vector = -1; + if (result) return result; - } set_bit(NVMEQ_ENABLED, &adminq->flags); result = nvme_create_io_queues(dev); -- cgit v1.2.3 From 88a041f4c1f6a21284c70b491929ed35336a0ea9 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 8 Mar 2019 10:43:11 -0700 Subject: nvme-pci: remove q_dmadev from nvme_queue We don't need to save the dma device as it's not used in the hot path and hasn't in a long time. Shrink the struct nvme_queue removing this unnecessary member. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 4c0461bd6cfc..8af2b10b4507 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -177,7 +177,6 @@ static inline struct nvme_dev *to_nvme_dev(struct nvme_ctrl *ctrl) * commands and one for I/O commands). */ struct nvme_queue { - struct device *q_dmadev; struct nvme_dev *dev; spinlock_t sq_lock; struct nvme_command *sq_cmds; @@ -1369,16 +1368,16 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) static void nvme_free_queue(struct nvme_queue *nvmeq) { - dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth), + dma_free_coherent(nvmeq->dev->dev, CQ_SIZE(nvmeq->q_depth), (void *)nvmeq->cqes, nvmeq->cq_dma_addr); if (!nvmeq->sq_cmds) return; if (test_and_clear_bit(NVMEQ_SQ_CMB, &nvmeq->flags)) { - pci_free_p2pmem(to_pci_dev(nvmeq->q_dmadev), + pci_free_p2pmem(to_pci_dev(nvmeq->dev->dev), nvmeq->sq_cmds, SQ_SIZE(nvmeq->q_depth)); } else { - dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth), + dma_free_coherent(nvmeq->dev->dev, SQ_SIZE(nvmeq->q_depth), nvmeq->sq_cmds, nvmeq->sq_dma_addr); } } @@ -1494,7 +1493,6 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth) if (nvme_alloc_sq_cmds(dev, nvmeq, qid, depth)) goto free_cqdma; - nvmeq->q_dmadev = dev->dev; nvmeq->dev = dev; spin_lock_init(&nvmeq->sq_lock); spin_lock_init(&nvmeq->cq_poll_lock); -- cgit v1.2.3 From 39f8e36401142d73e33a954ac4bdf844fb5de9ae Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 8 Mar 2019 10:43:13 -0700 Subject: nvme-pci: remove unused nvme_iod member Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8af2b10b4507..0cba927224b4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -220,7 +220,6 @@ struct nvme_iod { int aborted; int npages; /* In the PRP list. 0 means small pool in use */ int nents; /* Used in scatterlist */ - int length; /* Of data, in bytes */ dma_addr_t first_dma; struct scatterlist meta_sg; /* metadata requires single contiguous buffer */ struct scatterlist *sg; @@ -603,7 +602,6 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) iod->aborted = 0; iod->npages = -1; iod->nents = 0; - iod->length = size; return BLK_STS_OK; } -- cgit v1.2.3 From 9b048119a153590b934ef49aae309b723587f527 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 08:04:01 -0700 Subject: nvme-pci: remove nvme_init_iod nvme_init_iod should really be split into two parts: initialize a few general iod fields, which can easily be done at the beginning of nvme_queue_rq, and allocating the scatterlist if needed, which logically belongs into nvme_map_data with the code making use of it. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 56 +++++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 34 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0cba927224b4..2102a107e09b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -208,10 +208,10 @@ struct nvme_queue { }; /* - * The nvme_iod describes the data in an I/O, including the list of PRP - * entries. You can't see it in this data structure because C doesn't let - * me express that. Use nvme_init_iod to ensure there's enough space - * allocated to store the PRP list. + * The nvme_iod describes the data in an I/O. + * + * The sg pointer contains the list of PRP/SGL chunk allocations in addition + * to the actual struct scatterlist. */ struct nvme_iod { struct nvme_request req; @@ -583,29 +583,6 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) return true; } -static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); - int nseg = blk_rq_nr_phys_segments(rq); - unsigned int size = blk_rq_payload_bytes(rq); - - iod->use_sgl = nvme_pci_use_sgls(dev, rq); - - if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { - iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); - if (!iod->sg) - return BLK_STS_RESOURCE; - } else { - iod->sg = iod->inline_sg; - } - - iod->aborted = 0; - iod->npages = -1; - iod->nents = 0; - - return BLK_STS_OK; -} - static void nvme_free_iod(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); @@ -837,6 +814,17 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, blk_status_t ret = BLK_STS_IOERR; int nr_mapped; + if (blk_rq_payload_bytes(req) > NVME_INT_BYTES(dev) || + blk_rq_nr_phys_segments(req) > NVME_INT_PAGES) { + iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); + if (!iod->sg) + return BLK_STS_RESOURCE; + } else { + iod->sg = iod->inline_sg; + } + + iod->use_sgl = nvme_pci_use_sgls(dev, req); + sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); iod->nents = blk_rq_map_sg(q, req, iod->sg); if (!iod->nents) @@ -881,6 +869,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, out_unmap: dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); out: + nvme_free_iod(dev, req); return ret; } @@ -913,9 +902,14 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, struct nvme_queue *nvmeq = hctx->driver_data; struct nvme_dev *dev = nvmeq->dev; struct request *req = bd->rq; + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); struct nvme_command cmnd; blk_status_t ret; + iod->aborted = 0; + iod->npages = -1; + iod->nents = 0; + /* * We should not need to do this, but we're still using this to * ensure we can drain requests on a dying queue. @@ -927,21 +921,15 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, if (ret) return ret; - ret = nvme_init_iod(req, dev); - if (ret) - goto out_free_cmd; - if (blk_rq_nr_phys_segments(req)) { ret = nvme_map_data(dev, req, &cmnd); if (ret) - goto out_cleanup_iod; + goto out_free_cmd; } blk_mq_start_request(req); nvme_submit_cmd(nvmeq, &cmnd, bd->last); return BLK_STS_OK; -out_cleanup_iod: - nvme_free_iod(dev, req); out_free_cmd: nvme_cleanup_cmd(req); return ret; -- cgit v1.2.3 From 915f04c93db4e3a7388c8ad8ddfc28830e4cbce3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 08:13:03 -0700 Subject: nvme-pci: move the call to nvme_cleanup_cmd out of nvme_unmap_data Cleaning up the command setup isn't related to unmapping data, and disentangling them will simplify error handling a bit down the road. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2102a107e09b..2af6cfbd77ec 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -888,7 +888,6 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); } - nvme_cleanup_cmd(req); nvme_free_iod(dev, req); } @@ -939,6 +938,7 @@ static void nvme_pci_complete_rq(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + nvme_cleanup_cmd(req); nvme_unmap_data(iod->nvmeq->dev, req); nvme_complete_rq(req); } -- cgit v1.2.3 From 7fe07d14f71fabef642a478626248a9121e95b7b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 08:15:19 -0700 Subject: nvme-pci: merge nvme_free_iod into nvme_unmap_data This means we now have a function that undoes everything nvme_map_data does and we can simplify the error handling a bit. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 44 +++++++++++++++++--------------------------- 1 file changed, 17 insertions(+), 27 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2af6cfbd77ec..de199aff8d05 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -583,14 +583,24 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req) return true; } -static void nvme_free_iod(struct nvme_dev *dev, struct request *req) +static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + enum dma_data_direction dma_dir = rq_data_dir(req) ? + DMA_TO_DEVICE : DMA_FROM_DEVICE; const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1; dma_addr_t dma_addr = iod->first_dma, next_dma_addr; - int i; + if (iod->nents) { + /* P2PDMA requests do not need to be unmapped */ + if (!is_pci_p2pdma_page(sg_page(iod->sg))) + dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); + + if (blk_integrity_rq(req)) + dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); + } + if (iod->npages == 0) dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], dma_addr); @@ -847,50 +857,30 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); if (ret != BLK_STS_OK) - goto out_unmap; + goto out; ret = BLK_STS_IOERR; if (blk_integrity_rq(req)) { if (blk_rq_count_integrity_sg(q, req->bio) != 1) - goto out_unmap; + goto out; sg_init_table(&iod->meta_sg, 1); if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1) - goto out_unmap; + goto out; if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) - goto out_unmap; + goto out; cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); } return BLK_STS_OK; -out_unmap: - dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); out: - nvme_free_iod(dev, req); + nvme_unmap_data(dev, req); return ret; } -static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) -{ - struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - enum dma_data_direction dma_dir = rq_data_dir(req) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE; - - if (iod->nents) { - /* P2PDMA requests do not need to be unmapped */ - if (!is_pci_p2pdma_page(sg_page(iod->sg))) - dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); - - if (blk_integrity_rq(req)) - dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); - } - - nvme_free_iod(dev, req); -} - /* * NOTE: ns is NULL when called on the admin queue. */ -- cgit v1.2.3 From b15c592de37ed9d71499a3b8a750d1b235fcba3d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 08:52:21 -0700 Subject: nvme-pci: only call nvme_unmap_data for requests transferring data This mirrors how nvme_map_pci is called and will allow simplifying some checks in nvme_unmap_pci later on. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index de199aff8d05..030ee94452dd 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -929,7 +929,8 @@ static void nvme_pci_complete_rq(struct request *req) struct nvme_iod *iod = blk_mq_rq_to_pdu(req); nvme_cleanup_cmd(req); - nvme_unmap_data(iod->nvmeq->dev, req); + if (blk_rq_nr_phys_segments(req)) + nvme_unmap_data(iod->nvmeq->dev, req); nvme_complete_rq(req); } -- cgit v1.2.3 From 783b94bd9250478154904fa782d2cfc46336cdf6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 08:19:18 -0700 Subject: nvme-pci: do not build a scatterlist to map metadata We always have exactly one segment, so we can simply call dma_map_bvec. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 030ee94452dd..0679ac7fed19 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -221,7 +221,7 @@ struct nvme_iod { int npages; /* In the PRP list. 0 means small pool in use */ int nents; /* Used in scatterlist */ dma_addr_t first_dma; - struct scatterlist meta_sg; /* metadata requires single contiguous buffer */ + dma_addr_t meta_dma; struct scatterlist *sg; struct scatterlist inline_sg[0]; }; @@ -592,13 +592,16 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) dma_addr_t dma_addr = iod->first_dma, next_dma_addr; int i; + if (blk_integrity_rq(req)) { + dma_unmap_page(dev->dev, iod->meta_dma, + rq_integrity_vec(req)->bv_len, dma_dir); + } + if (iod->nents) { /* P2PDMA requests do not need to be unmapped */ if (!is_pci_p2pdma_page(sg_page(iod->sg))) dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); - if (blk_integrity_rq(req)) - dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); } if (iod->npages == 0) @@ -861,17 +864,11 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, ret = BLK_STS_IOERR; if (blk_integrity_rq(req)) { - if (blk_rq_count_integrity_sg(q, req->bio) != 1) - goto out; - - sg_init_table(&iod->meta_sg, 1); - if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1) - goto out; - - if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) + iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req), + dma_dir, 0); + if (dma_mapping_error(dev->dev, iod->meta_dma)) goto out; - - cmnd->rw.metadata = cpu_to_le64(sg_dma_address(&iod->meta_sg)); + cmnd->rw.metadata = cpu_to_le64(iod->meta_dma); } return BLK_STS_OK; -- cgit v1.2.3 From 4aedb705437f6f98b45f45c394e6803ca67abd33 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 09:46:28 -0700 Subject: nvme-pci: split metadata handling from nvme_map_data / nvme_unmap_data This prepares for some bigger changes to the data mapping helpers. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 48 +++++++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 21 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 0679ac7fed19..10e6b5d055e9 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -592,11 +592,6 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) dma_addr_t dma_addr = iod->first_dma, next_dma_addr; int i; - if (blk_integrity_rq(req)) { - dma_unmap_page(dev->dev, iod->meta_dma, - rq_integrity_vec(req)->bv_len, dma_dir); - } - if (iod->nents) { /* P2PDMA requests do not need to be unmapped */ if (!is_pci_p2pdma_page(sg_page(iod->sg))) @@ -858,24 +853,23 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); else ret = nvme_pci_setup_prps(dev, req, &cmnd->rw); - +out: if (ret != BLK_STS_OK) - goto out; - - ret = BLK_STS_IOERR; - if (blk_integrity_rq(req)) { - iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req), - dma_dir, 0); - if (dma_mapping_error(dev->dev, iod->meta_dma)) - goto out; - cmnd->rw.metadata = cpu_to_le64(iod->meta_dma); - } + nvme_unmap_data(dev, req); + return ret; +} - return BLK_STS_OK; +static blk_status_t nvme_map_metadata(struct nvme_dev *dev, struct request *req, + struct nvme_command *cmnd) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); -out: - nvme_unmap_data(dev, req); - return ret; + iod->meta_dma = dma_map_bvec(dev->dev, rq_integrity_vec(req), + rq_dma_dir(req), 0); + if (dma_mapping_error(dev->dev, iod->meta_dma)) + return BLK_STS_IOERR; + cmnd->rw.metadata = cpu_to_le64(iod->meta_dma); + return 0; } /* @@ -913,9 +907,17 @@ static blk_status_t nvme_queue_rq(struct blk_mq_hw_ctx *hctx, goto out_free_cmd; } + if (blk_integrity_rq(req)) { + ret = nvme_map_metadata(dev, req, &cmnd); + if (ret) + goto out_unmap_data; + } + blk_mq_start_request(req); nvme_submit_cmd(nvmeq, &cmnd, bd->last); return BLK_STS_OK; +out_unmap_data: + nvme_unmap_data(dev, req); out_free_cmd: nvme_cleanup_cmd(req); return ret; @@ -924,10 +926,14 @@ out_free_cmd: static void nvme_pci_complete_rq(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + struct nvme_dev *dev = iod->nvmeq->dev; nvme_cleanup_cmd(req); + if (blk_integrity_rq(req)) + dma_unmap_page(dev->dev, iod->meta_dma, + rq_integrity_vec(req)->bv_len, rq_data_dir(req)); if (blk_rq_nr_phys_segments(req)) - nvme_unmap_data(iod->nvmeq->dev, req); + nvme_unmap_data(dev, req); nvme_complete_rq(req); } -- cgit v1.2.3 From d43f1ccfad053dbefba1d15443cdc36ca60958f0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Mar 2019 05:46:58 -0700 Subject: nvme-pci: remove the inline scatterlist optimization We'll have a better way to optimize for small I/O that doesn't require it soon, so remove the existing inline_sg case to make that optimization easier to implement. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 38 ++++++-------------------------------- 1 file changed, 6 insertions(+), 32 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 10e6b5d055e9..bd7e4209ab36 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -223,7 +223,6 @@ struct nvme_iod { dma_addr_t first_dma; dma_addr_t meta_dma; struct scatterlist *sg; - struct scatterlist inline_sg[0]; }; /* @@ -370,12 +369,6 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db, return true; } -/* - * Max size of iod being embedded in the request payload - */ -#define NVME_INT_PAGES 2 -#define NVME_INT_BYTES(dev) (NVME_INT_PAGES * (dev)->ctrl.page_size) - /* * Will slightly overestimate the number of pages needed. This is OK * as it only leads to a small amount of wasted memory for the lifetime of @@ -410,15 +403,6 @@ static unsigned int nvme_pci_iod_alloc_size(struct nvme_dev *dev, return alloc_size + sizeof(struct scatterlist) * nseg; } -static unsigned int nvme_pci_cmd_size(struct nvme_dev *dev, bool use_sgl) -{ - unsigned int alloc_size = nvme_pci_iod_alloc_size(dev, - NVME_INT_BYTES(dev), NVME_INT_PAGES, - use_sgl); - - return sizeof(struct nvme_iod) + alloc_size; -} - static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { @@ -621,8 +605,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) dma_addr = next_dma_addr; } - if (iod->sg != iod->inline_sg) - mempool_free(iod->sg, dev->iod_mempool); + mempool_free(iod->sg, dev->iod_mempool); } static void nvme_print_sgl(struct scatterlist *sgl, int nents) @@ -822,14 +805,9 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, blk_status_t ret = BLK_STS_IOERR; int nr_mapped; - if (blk_rq_payload_bytes(req) > NVME_INT_BYTES(dev) || - blk_rq_nr_phys_segments(req) > NVME_INT_PAGES) { - iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); - if (!iod->sg) - return BLK_STS_RESOURCE; - } else { - iod->sg = iod->inline_sg; - } + iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); + if (!iod->sg) + return BLK_STS_RESOURCE; iod->use_sgl = nvme_pci_use_sgls(dev, req); @@ -1612,7 +1590,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) dev->admin_tagset.queue_depth = NVME_AQ_MQ_TAG_DEPTH; dev->admin_tagset.timeout = ADMIN_TIMEOUT; dev->admin_tagset.numa_node = dev_to_node(dev->dev); - dev->admin_tagset.cmd_size = nvme_pci_cmd_size(dev, false); + dev->admin_tagset.cmd_size = sizeof(struct nvme_iod); dev->admin_tagset.flags = BLK_MQ_F_NO_SCHED; dev->admin_tagset.driver_data = dev; @@ -2257,11 +2235,7 @@ static int nvme_dev_add(struct nvme_dev *dev) dev->tagset.numa_node = dev_to_node(dev->dev); dev->tagset.queue_depth = min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1; - dev->tagset.cmd_size = nvme_pci_cmd_size(dev, false); - if ((dev->ctrl.sgls & ((1 << 0) | (1 << 1))) && sgl_threshold) { - dev->tagset.cmd_size = max(dev->tagset.cmd_size, - nvme_pci_cmd_size(dev, true)); - } + dev->tagset.cmd_size = sizeof(struct nvme_iod); dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE; dev->tagset.driver_data = dev; -- cgit v1.2.3 From dff824b2aadb7808f50ceb0927acaec5ad750ce7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Mar 2019 05:49:34 -0700 Subject: nvme-pci: optimize mapping of small single segment requests If a request is single segment and fits into one or two PRP entries we do not have to create a scatterlist for it, but can just map the bio_vec directly. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index bd7e4209ab36..59731264b052 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -221,6 +221,7 @@ struct nvme_iod { int npages; /* In the PRP list. 0 means small pool in use */ int nents; /* Used in scatterlist */ dma_addr_t first_dma; + unsigned int dma_len; /* length of single DMA segment mapping */ dma_addr_t meta_dma; struct scatterlist *sg; }; @@ -576,13 +577,18 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) dma_addr_t dma_addr = iod->first_dma, next_dma_addr; int i; - if (iod->nents) { - /* P2PDMA requests do not need to be unmapped */ - if (!is_pci_p2pdma_page(sg_page(iod->sg))) - dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); - + if (iod->dma_len) { + dma_unmap_page(dev->dev, dma_addr, iod->dma_len, dma_dir); + return; } + WARN_ON_ONCE(!iod->nents); + + /* P2PDMA requests do not need to be unmapped */ + if (!is_pci_p2pdma_page(sg_page(iod->sg))) + dma_unmap_sg(dev->dev, iod->sg, iod->nents, rq_dma_dir(req)); + + if (iod->npages == 0) dma_pool_free(dev->prp_small_pool, nvme_pci_iod_list(req)[0], dma_addr); @@ -795,6 +801,24 @@ static blk_status_t nvme_pci_setup_sgls(struct nvme_dev *dev, return BLK_STS_OK; } +static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, + struct request *req, struct nvme_rw_command *cmnd, + struct bio_vec *bv) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + unsigned int first_prp_len = dev->ctrl.page_size - bv->bv_offset; + + iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); + if (dma_mapping_error(dev->dev, iod->first_dma)) + return BLK_STS_RESOURCE; + iod->dma_len = bv->bv_len; + + cmnd->dptr.prp1 = cpu_to_le64(iod->first_dma); + if (bv->bv_len > first_prp_len) + cmnd->dptr.prp2 = cpu_to_le64(iod->first_dma + first_prp_len); + return 0; +} + static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, struct nvme_command *cmnd) { @@ -805,6 +829,17 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, blk_status_t ret = BLK_STS_IOERR; int nr_mapped; + if (blk_rq_nr_phys_segments(req) == 1) { + struct bio_vec bv = req_bvec(req); + + if (!is_pci_p2pdma_page(bv.bv_page)) { + if (bv.bv_offset + bv.bv_len <= dev->ctrl.page_size * 2) + return nvme_setup_prp_simple(dev, req, + &cmnd->rw, &bv); + } + } + + iod->dma_len = 0; iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); if (!iod->sg) return BLK_STS_RESOURCE; -- cgit v1.2.3 From 297910571f08f1d7e398793df6e606ebb375a3f1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Mar 2019 05:54:18 -0700 Subject: nvme-pci: optimize mapping single segment requests using SGLs If the controller supports SGLs we can take another short cut for single segment request, given that we can always map those without another indirection structure, and thus don't need to create a scatterlist structure. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 59731264b052..82aa5cb21828 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -819,6 +819,23 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev, return 0; } +static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, + struct request *req, struct nvme_rw_command *cmnd, + struct bio_vec *bv) +{ + struct nvme_iod *iod = blk_mq_rq_to_pdu(req); + + iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0); + if (dma_mapping_error(dev->dev, iod->first_dma)) + return BLK_STS_RESOURCE; + iod->dma_len = bv->bv_len; + + cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma); + cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len); + cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4; + return 0; +} + static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, struct nvme_command *cmnd) { @@ -836,6 +853,11 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, if (bv.bv_offset + bv.bv_len <= dev->ctrl.page_size * 2) return nvme_setup_prp_simple(dev, req, &cmnd->rw, &bv); + + if (iod->nvmeq->qid && + dev->ctrl.sgls & ((1 << 0) | (1 << 1))) + return nvme_setup_sgl_simple(dev, req, + &cmnd->rw, &bv); } } -- cgit v1.2.3 From 70479b71bc80ae6f63c8d6644cc76dff99f79686 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 5 Mar 2019 05:59:02 -0700 Subject: nvme-pci: tidy up nvme_map_data Remove two pointless local variables, remove ret assignment that is never used, move the use_sgl initialization closer to where it is used. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/pci.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 82aa5cb21828..c1eecde6b853 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -840,10 +840,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, struct nvme_command *cmnd) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - struct request_queue *q = req->q; - enum dma_data_direction dma_dir = rq_data_dir(req) ? - DMA_TO_DEVICE : DMA_FROM_DEVICE; - blk_status_t ret = BLK_STS_IOERR; + blk_status_t ret = BLK_STS_RESOURCE; int nr_mapped; if (blk_rq_nr_phys_segments(req) == 1) { @@ -865,25 +862,21 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); if (!iod->sg) return BLK_STS_RESOURCE; - - iod->use_sgl = nvme_pci_use_sgls(dev, req); - sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); - iod->nents = blk_rq_map_sg(q, req, iod->sg); + iod->nents = blk_rq_map_sg(req->q, req, iod->sg); if (!iod->nents) goto out; - ret = BLK_STS_RESOURCE; - if (is_pci_p2pdma_page(sg_page(iod->sg))) nr_mapped = pci_p2pdma_map_sg(dev->dev, iod->sg, iod->nents, - dma_dir); + rq_dma_dir(req)); else nr_mapped = dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, - dma_dir, DMA_ATTR_NO_WARN); + rq_dma_dir(req), DMA_ATTR_NO_WARN); if (!nr_mapped) goto out; + iod->use_sgl = nvme_pci_use_sgls(dev, req); if (iod->use_sgl) ret = nvme_pci_setup_sgls(dev, req, &cmnd->rw, nr_mapped); else -- cgit v1.2.3 From e84c2091a45228b62867ec0565898ef5404706a2 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 2 Apr 2019 14:52:47 +0300 Subject: nvmet: never fail double namespace enablement In case we create N namespaces while N < NVMET_MAX_NAMESPACES, we can perform "echo 1 > /enable" as much as we want. In case N == NVMET_MAX_NAMESPACES we fail. Make sure we have the same flow for any N. Signed-off-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index b3e765a95af8..4dc388a2ecb0 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -494,13 +494,14 @@ int nvmet_ns_enable(struct nvmet_ns *ns) int ret; mutex_lock(&subsys->lock); - ret = -EMFILE; - if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) - goto out_unlock; ret = 0; if (ns->enabled) goto out_unlock; + ret = -EMFILE; + if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) + goto out_unlock; + ret = nvmet_bdev_ns_enable(ns); if (ret == -ENOTBLK) ret = nvmet_file_ns_enable(ns); -- cgit v1.2.3 From 013a63ef4edcd2366299225c3b081102171e8fa9 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Tue, 2 Apr 2019 14:51:54 +0300 Subject: nvmet: add safety check for subsystem lock during nvmet_ns_changed we need to make sure that subsystem lock is taken during ctrl's list traversing. nvmet_ns_changed function is not static and can be used from various callers simultaneously. Signed-off-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 4dc388a2ecb0..4d8dd29479c0 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -214,6 +214,8 @@ void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) { struct nvmet_ctrl *ctrl; + lockdep_assert_held(&subsys->lock); + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid)); if (nvmet_aen_bit_disabled(ctrl, NVME_AEN_BIT_NS_ATTR)) -- cgit v1.2.3 From d0de579c043c3a2ab60ce75eb6cf4d414becc676 Mon Sep 17 00:00:00 2001 From: Kenneth Heitke Date: Thu, 4 Apr 2019 12:57:45 -0600 Subject: nvme: log the error status on Identify Namespace failure Identify Namespace failures are logged as a warning but there is not an indication of the cause for the failure. Update the log message to include the error status. Signed-off-by: Kenneth Heitke Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b5939112b9b6..ddb943395118 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1105,7 +1105,7 @@ static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); if (error) { - dev_warn(ctrl->device, "Identify namespace failed\n"); + dev_warn(ctrl->device, "Identify namespace failed (%d)\n", error); kfree(id); return NULL; } -- cgit v1.2.3 From 8dc2ed3f3e5ba245828ad89968f6818be8996e9d Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 8 Apr 2019 18:39:58 +0300 Subject: nvmet-rdma: remove p2p_client initialization from fast-path Initialize it during command allocation. Cc: Logan Gunthorpe Cc: Stephen Bates Signed-off-by: Max Gurtovoy Reviewed-by: Logan Gunthorpe Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index ef893addf341..b7275218dfa5 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -373,6 +373,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) goto out_free_rsp; + r->req.p2p_client = &ndev->device->dev; r->send_sge.length = sizeof(*r->req.rsp); r->send_sge.lkey = ndev->pd->local_dma_lkey; @@ -763,8 +764,6 @@ static void nvmet_rdma_handle_command(struct nvmet_rdma_queue *queue, cmd->send_sge.addr, cmd->send_sge.length, DMA_TO_DEVICE); - cmd->req.p2p_client = &queue->dev->device->dev; - if (!nvmet_req_init(&cmd->req, &queue->nvme_cq, &queue->nvme_sq, &nvmet_rdma_ops)) return; -- cgit v1.2.3 From fc6c9730725d5cc57c851d0e261a5682bba913a7 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 8 Apr 2019 18:39:59 +0300 Subject: nvmet: rename nvme_completion instances from rsp to cqe Use NVMe namings for improving code readability. Signed-off-by: Max Gurtovoy Reviewed-by : Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 22 +++++++++++----------- drivers/nvme/target/fabrics-cmd.c | 16 ++++++++-------- drivers/nvme/target/fc.c | 2 +- drivers/nvme/target/loop.c | 6 +++--- drivers/nvme/target/nvmet.h | 4 ++-- drivers/nvme/target/rdma.c | 18 +++++++++--------- drivers/nvme/target/tcp.c | 8 ++++---- 7 files changed, 38 insertions(+), 38 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 4d8dd29479c0..1c1776c3e316 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -647,7 +647,7 @@ static void nvmet_update_sq_head(struct nvmet_req *req) } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) != old_sqhd); } - req->rsp->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); + req->cqe->sq_head = cpu_to_le16(req->sq->sqhd & 0x0000FFFF); } static void nvmet_set_error(struct nvmet_req *req, u16 status) @@ -656,7 +656,7 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status) struct nvme_error_slot *new_error_slot; unsigned long flags; - req->rsp->status = cpu_to_le16(status << 1); + req->cqe->status = cpu_to_le16(status << 1); if (!ctrl || req->error_loc == NVMET_NO_ERROR_LOC) return; @@ -676,15 +676,15 @@ static void nvmet_set_error(struct nvmet_req *req, u16 status) spin_unlock_irqrestore(&ctrl->error_lock, flags); /* set the more bit for this request */ - req->rsp->status |= cpu_to_le16(1 << 14); + req->cqe->status |= cpu_to_le16(1 << 14); } static void __nvmet_req_complete(struct nvmet_req *req, u16 status) { if (!req->sq->sqhd_disabled) nvmet_update_sq_head(req); - req->rsp->sq_id = cpu_to_le16(req->sq->qid); - req->rsp->command_id = req->cmd->common.command_id; + req->cqe->sq_id = cpu_to_le16(req->sq->qid); + req->cqe->command_id = req->cmd->common.command_id; if (unlikely(status)) nvmet_set_error(req, status); @@ -841,8 +841,8 @@ bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, req->sg = NULL; req->sg_cnt = 0; req->transfer_len = 0; - req->rsp->status = 0; - req->rsp->sq_head = 0; + req->cqe->status = 0; + req->cqe->sq_head = 0; req->ns = NULL; req->error_loc = NVMET_NO_ERROR_LOC; req->error_slba = 0; @@ -1069,7 +1069,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, if (!subsys) { pr_warn("connect request for invalid subsystem %s!\n", subsysnqn); - req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); + req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; } @@ -1090,7 +1090,7 @@ u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid, pr_warn("could not find controller %d for subsys %s / host %s\n", cntlid, subsysnqn, hostnqn); - req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); + req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; out: @@ -1188,7 +1188,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!subsys) { pr_warn("connect request for invalid subsystem %s!\n", subsysnqn); - req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); + req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn); goto out; } @@ -1197,7 +1197,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, if (!nvmet_host_allowed(subsys, hostnqn)) { pr_info("connect by host %s for subsystem %s not allowed\n", hostnqn, subsysnqn); - req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); + req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); up_read(&nvmet_config_sem); status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; goto out_put_subsystem; diff --git a/drivers/nvme/target/fabrics-cmd.c b/drivers/nvme/target/fabrics-cmd.c index 3a76ebc3d155..3b9f79aba98f 100644 --- a/drivers/nvme/target/fabrics-cmd.c +++ b/drivers/nvme/target/fabrics-cmd.c @@ -72,7 +72,7 @@ static void nvmet_execute_prop_get(struct nvmet_req *req) offsetof(struct nvmf_property_get_command, attrib); } - req->rsp->result.u64 = cpu_to_le64(val); + req->cqe->result.u64 = cpu_to_le64(val); nvmet_req_complete(req, status); } @@ -124,7 +124,7 @@ static u16 nvmet_install_queue(struct nvmet_ctrl *ctrl, struct nvmet_req *req) if (c->cattr & NVME_CONNECT_DISABLE_SQFLOW) { req->sq->sqhd_disabled = true; - req->rsp->sq_head = cpu_to_le16(0xffff); + req->cqe->sq_head = cpu_to_le16(0xffff); } if (ctrl->ops->install_queue) { @@ -158,7 +158,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) goto out; /* zero out initial completion result, assign values as needed */ - req->rsp->result.u32 = 0; + req->cqe->result.u32 = 0; if (c->recfmt != 0) { pr_warn("invalid connect version (%d).\n", @@ -172,7 +172,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) pr_warn("connect attempt for invalid controller ID %#x\n", d->cntlid); status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; - req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); + req->cqe->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid); goto out; } @@ -195,7 +195,7 @@ static void nvmet_execute_admin_connect(struct nvmet_req *req) pr_info("creating controller %d for subsystem %s for NQN %s.\n", ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn); - req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); + req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); out: kfree(d); @@ -222,7 +222,7 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) goto out; /* zero out initial completion result, assign values as needed */ - req->rsp->result.u32 = 0; + req->cqe->result.u32 = 0; if (c->recfmt != 0) { pr_warn("invalid connect version (%d).\n", @@ -240,14 +240,14 @@ static void nvmet_execute_io_connect(struct nvmet_req *req) if (unlikely(qid > ctrl->subsys->max_qid)) { pr_warn("invalid queue id (%d)\n", qid); status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR; - req->rsp->result.u32 = IPO_IATTR_CONNECT_SQE(qid); + req->cqe->result.u32 = IPO_IATTR_CONNECT_SQE(qid); goto out_ctrl_put; } status = nvmet_install_queue(ctrl, req); if (status) { /* pass back cntlid that had the issue of installing queue */ - req->rsp->result.u16 = cpu_to_le16(ctrl->cntlid); + req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); goto out_ctrl_put; } diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 9369a11fe7a9..508661af0f50 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -2184,7 +2184,7 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, } fod->req.cmd = &fod->cmdiubuf.sqe; - fod->req.rsp = &fod->rspiubuf.cqe; + fod->req.cqe = &fod->rspiubuf.cqe; fod->req.port = tgtport->pe->port; /* clear any response payload */ diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index b9f623ab01f3..a3ae491fa20e 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -18,7 +18,7 @@ struct nvme_loop_iod { struct nvme_request nvme_req; struct nvme_command cmd; - struct nvme_completion rsp; + struct nvme_completion cqe; struct nvmet_req req; struct nvme_loop_queue *queue; struct work_struct work; @@ -94,7 +94,7 @@ static void nvme_loop_queue_response(struct nvmet_req *req) { struct nvme_loop_queue *queue = container_of(req->sq, struct nvme_loop_queue, nvme_sq); - struct nvme_completion *cqe = req->rsp; + struct nvme_completion *cqe = req->cqe; /* * AEN requests are special as they don't time out and can @@ -207,7 +207,7 @@ static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, struct nvme_loop_iod *iod, unsigned int queue_idx) { iod->req.cmd = &iod->cmd; - iod->req.rsp = &iod->rsp; + iod->req.cqe = &iod->cqe; iod->queue = &ctrl->queues[queue_idx]; INIT_WORK(&iod->work, nvme_loop_execute_work); return 0; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 1653d19b187f..c25d88fc9dec 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -284,7 +284,7 @@ struct nvmet_fabrics_ops { struct nvmet_req { struct nvme_command *cmd; - struct nvme_completion *rsp; + struct nvme_completion *cqe; struct nvmet_sq *sq; struct nvmet_cq *cq; struct nvmet_ns *ns; @@ -322,7 +322,7 @@ extern struct workqueue_struct *buffered_io_wq; static inline void nvmet_set_result(struct nvmet_req *req, u32 result) { - req->rsp->result.u32 = cpu_to_le32(result); + req->cqe->result.u32 = cpu_to_le32(result); } /* diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index b7275218dfa5..36d906a7f70d 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -160,7 +160,7 @@ static inline bool nvmet_rdma_need_data_out(struct nvmet_rdma_rsp *rsp) { return !nvme_is_write(rsp->req.cmd) && rsp->req.transfer_len && - !rsp->req.rsp->status && + !rsp->req.cqe->status && !(rsp->flags & NVMET_RDMA_REQ_INLINE_DATA); } @@ -364,17 +364,17 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, struct nvmet_rdma_rsp *r) { /* NVMe CQE / RDMA SEND */ - r->req.rsp = kmalloc(sizeof(*r->req.rsp), GFP_KERNEL); - if (!r->req.rsp) + r->req.cqe = kmalloc(sizeof(*r->req.cqe), GFP_KERNEL); + if (!r->req.cqe) goto out; - r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.rsp, - sizeof(*r->req.rsp), DMA_TO_DEVICE); + r->send_sge.addr = ib_dma_map_single(ndev->device, r->req.cqe, + sizeof(*r->req.cqe), DMA_TO_DEVICE); if (ib_dma_mapping_error(ndev->device, r->send_sge.addr)) goto out_free_rsp; r->req.p2p_client = &ndev->device->dev; - r->send_sge.length = sizeof(*r->req.rsp); + r->send_sge.length = sizeof(*r->req.cqe); r->send_sge.lkey = ndev->pd->local_dma_lkey; r->send_cqe.done = nvmet_rdma_send_done; @@ -389,7 +389,7 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev, return 0; out_free_rsp: - kfree(r->req.rsp); + kfree(r->req.cqe); out: return -ENOMEM; } @@ -398,8 +398,8 @@ static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, struct nvmet_rdma_rsp *r) { ib_dma_unmap_single(ndev->device, r->send_sge.addr, - sizeof(*r->req.rsp), DMA_TO_DEVICE); - kfree(r->req.rsp); + sizeof(*r->req.cqe), DMA_TO_DEVICE); + kfree(r->req.cqe); } static int diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 0a941abf56ec..17cf137dc88c 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -161,14 +161,14 @@ static inline bool nvmet_tcp_has_data_in(struct nvmet_tcp_cmd *cmd) static inline bool nvmet_tcp_need_data_in(struct nvmet_tcp_cmd *cmd) { - return nvmet_tcp_has_data_in(cmd) && !cmd->req.rsp->status; + return nvmet_tcp_has_data_in(cmd) && !cmd->req.cqe->status; } static inline bool nvmet_tcp_need_data_out(struct nvmet_tcp_cmd *cmd) { return !nvme_is_write(cmd->req.cmd) && cmd->req.transfer_len > 0 && - !cmd->req.rsp->status; + !cmd->req.cqe->status; } static inline bool nvmet_tcp_has_inline_data(struct nvmet_tcp_cmd *cmd) @@ -378,7 +378,7 @@ static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd) pdu->hdr.plen = cpu_to_le32(pdu->hdr.hlen + hdgst + cmd->req.transfer_len + ddgst); - pdu->command_id = cmd->req.rsp->command_id; + pdu->command_id = cmd->req.cqe->command_id; pdu->data_length = cpu_to_le32(cmd->req.transfer_len); pdu->data_offset = cpu_to_le32(cmd->wbytes_done); @@ -1224,7 +1224,7 @@ static int nvmet_tcp_alloc_cmd(struct nvmet_tcp_queue *queue, sizeof(*c->rsp_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); if (!c->rsp_pdu) goto out_free_cmd; - c->req.rsp = &c->rsp_pdu->cqe; + c->req.cqe = &c->rsp_pdu->cqe; c->data_pdu = page_frag_alloc(&queue->pf_cache, sizeof(*c->data_pdu) + hdgst, GFP_KERNEL | __GFP_ZERO); -- cgit v1.2.3 From 6b7e631b927ca1266b2695307ab71ed7764af75e Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 7 Apr 2019 15:28:06 +0900 Subject: nvmet: return a specified error it subsys_alloc fails nvmet_subsys_alloc() returns its pointer or NULL if it fails. We can see three different steps in this function: 1. memory allocation 2. argument check 3. memory allocation for string But now the callers of this function do not seem to handle case 2 by returning -ENOMEM only even if it fails with an invalid parameter. This patch specifies error codes so that caller can pass it to its own caller. Signed-off-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni . Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 4 ++-- drivers/nvme/target/core.c | 6 +++--- drivers/nvme/target/discovery.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index adb79545cdd7..08dd5af357f7 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -898,8 +898,8 @@ static struct config_group *nvmet_subsys_make(struct config_group *group, } subsys = nvmet_subsys_alloc(name, NVME_NQN_NVME); - if (!subsys) - return ERR_PTR(-ENOMEM); + if (IS_ERR(subsys)) + return ERR_CAST(subsys); config_group_init_type_name(&subsys->group, name, &nvmet_subsys_type); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 1c1776c3e316..24e0a36392d9 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1367,7 +1367,7 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, subsys = kzalloc(sizeof(*subsys), GFP_KERNEL); if (!subsys) - return NULL; + return ERR_PTR(-ENOMEM); subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */ /* generate a random serial number as our controllers are ephemeral: */ @@ -1383,14 +1383,14 @@ struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn, default: pr_err("%s: Unknown Subsystem type - %d\n", __func__, type); kfree(subsys); - return NULL; + return ERR_PTR(-EINVAL); } subsys->type = type; subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE, GFP_KERNEL); if (!subsys->subsysnqn) { kfree(subsys); - return NULL; + return ERR_PTR(-ENOMEM); } kref_init(&subsys->ref); diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 33ed95e72d6b..e8e09266bfa5 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -372,8 +372,8 @@ int __init nvmet_init_discovery(void) { nvmet_disc_subsys = nvmet_subsys_alloc(NVME_DISC_SUBSYS_NAME, NVME_NQN_DISC); - if (!nvmet_disc_subsys) - return -ENOMEM; + if (IS_ERR(nvmet_disc_subsys)) + return PTR_ERR(nvmet_disc_subsys); return 0; } -- cgit v1.2.3 From a5dffbb66d250a7ef07e27a2e75b8d9d7af2ab41 Mon Sep 17 00:00:00 2001 From: "Enrico Weigelt, metux IT consult" Date: Wed, 24 Apr 2019 12:34:39 +0200 Subject: nvmet: include Build breaks: drivers/nvme/target/core.c: In function 'nvmet_req_alloc_sgl': drivers/nvme/target/core.c:939:12: error: implicit declaration of \ function 'sgl_alloc'; did you mean 'bio_alloc'? \ [-Werror=implicit-function-declaration] req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt); ^~~~~~~~~ bio_alloc drivers/nvme/target/core.c:939:10: warning: assignment makes pointer \ from integer without a cast [-Wint-conversion] req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt); ^ drivers/nvme/target/core.c: In function 'nvmet_req_free_sgl': drivers/nvme/target/core.c:952:3: error: implicit declaration of \ function 'sgl_free'; did you mean 'ida_free'? [-Werror=implicit-function-declaration] sgl_free(req->sg); ^~~~~~~~ ida_free Cause: 1. missing include to 2. SGL_ALLOC needs to be enabled Therefore adding the missing include, as well as Kconfig dependency. Signed-off-by: Enrico Weigelt, metux IT consult Reviewed-by: Sagi Grimberg Reviewed-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/Kconfig | 1 + drivers/nvme/target/core.c | 1 + 2 files changed, 2 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig index d94f25cde019..3ef0a4e5eed6 100644 --- a/drivers/nvme/target/Kconfig +++ b/drivers/nvme/target/Kconfig @@ -3,6 +3,7 @@ config NVME_TARGET tristate "NVMe Target support" depends on BLOCK depends on CONFIGFS_FS + select SGL_ALLOC help This enabled target side support for the NVMe protocol, that is it allows the Linux kernel to implement NVMe subsystems and diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 24e0a36392d9..7734a6acff85 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "nvmet.h" -- cgit v1.2.3 From 525ec495e021068aa8635a0e18ff60695f5b1f4f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 24 Apr 2019 11:43:23 -0700 Subject: nvmet-file: clamp-down file namespace lba_shift When the backing file is a tempfile for example, the inode i_blkbits can be 1M in size which causes problems for hosts to support as the disk block size. Instead, expose the minimum between i_blkbits and 12 (4K sector size). Signed-off-by: Sagi Grimberg Reviewed-by:- Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-file.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index bc6ebb51b0bf..05453f5d1448 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -49,7 +49,12 @@ int nvmet_file_ns_enable(struct nvmet_ns *ns) goto err; ns->size = stat.size; - ns->blksize_shift = file_inode(ns->file)->i_blkbits; + /* + * i_blkbits can be greater than the universally accepted upper bound, + * so make sure we export a sane namespace lba_shift. + */ + ns->blksize_shift = min_t(u8, + file_inode(ns->file)->i_blkbits, 12); ns->bvec_cache = kmem_cache_create("nvmet-bvec", NVMET_MAX_MPOOL_BVEC * sizeof(struct bio_vec), -- cgit v1.2.3 From 569b3d3db1aac8586a16df1745c9e5a99ff47253 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 24 Apr 2019 11:53:16 -0700 Subject: nvmet-tcp: don't fail maxr2t greater than 1 The host may support it, but nothing prevents us from sending a single r2t at a time like we do anyways. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/tcp.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 17cf137dc88c..69b83fa0c76c 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -774,12 +774,6 @@ static int nvmet_tcp_handle_icreq(struct nvmet_tcp_queue *queue) return -EPROTO; } - if (icreq->maxr2t != 0) { - pr_err("queue %d: unsupported maxr2t %d\n", queue->idx, - le32_to_cpu(icreq->maxr2t) + 1); - return -EPROTO; - } - queue->hdr_digest = !!(icreq->digest & NVME_TCP_HDR_DIGEST_ENABLE); queue->data_digest = !!(icreq->digest & NVME_TCP_DATA_DIGEST_ENABLE); if (queue->hdr_digest || queue->data_digest) { -- cgit v1.2.3 From 7a42589654ae79e1177f0d74306a02d6cef7bddf Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 24 Apr 2019 11:53:17 -0700 Subject: nvme-tcp: fix a NULL deref when an admin connect times out If we timeout the admin startup sequence we might not yet have an I/O tagset allocated which causes the teardown sequence to crash. Make nvme_tcp_teardown_io_queues safe by not iterating inflight tags if the tagset wasn't allocated. Fixes: 39d57757467b ("nvme-tcp: fix timeout handler") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 68c49dd67210..aae5374d2b93 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1710,7 +1710,9 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, { blk_mq_quiesce_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); - blk_mq_tagset_busy_iter(ctrl->admin_tagset, nvme_cancel_request, ctrl); + if (ctrl->admin_tagset) + blk_mq_tagset_busy_iter(ctrl->admin_tagset, + nvme_cancel_request, ctrl); blk_mq_unquiesce_queue(ctrl->admin_q); nvme_tcp_destroy_admin_queue(ctrl, remove); } @@ -1722,7 +1724,9 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, return; nvme_stop_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); - blk_mq_tagset_busy_iter(ctrl->tagset, nvme_cancel_request, ctrl); + if (ctrl->tagset) + blk_mq_tagset_busy_iter(ctrl->tagset, + nvme_cancel_request, ctrl); if (remove) nvme_start_queues(ctrl); nvme_tcp_destroy_io_queues(ctrl, remove); -- cgit v1.2.3 From 1007709d7d06fab09bf2d007657575958676282b Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 24 Apr 2019 11:53:18 -0700 Subject: nvme-rdma: fix a NULL deref when an admin connect times out If we timeout the admin startup sequence we might not yet have an I/O tagset allocated which causes the teardown sequence to crash. Make nvme_tcp_teardown_io_queues safe by not iterating inflight tags if the tagset wasn't allocated. Fixes: 4c174e636674 ("nvme-rdma: fix timeout handler") Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 11a5ecae78c8..e1824c2e0a1c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -914,8 +914,9 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, { blk_mq_quiesce_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); - blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, - &ctrl->ctrl); + if (ctrl->ctrl.admin_tagset) + blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset, + nvme_cancel_request, &ctrl->ctrl); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_rdma_destroy_admin_queue(ctrl, remove); } @@ -926,8 +927,9 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); - blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, - &ctrl->ctrl); + if (ctrl->ctrl.tagset) + blk_mq_tagset_busy_iter(ctrl->ctrl.tagset, + nvme_cancel_request, &ctrl->ctrl); if (remove) nvme_start_queues(&ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, remove); -- cgit v1.2.3 From efb973b19b88642bb7e08b8ce8e03b0bbd2a7e2a Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 24 Apr 2019 11:53:19 -0700 Subject: nvme-tcp: rename function to have nvme_tcp prefix usually nvme_ prefix is for core functions. While we're cleaning up, remove redundant empty lines Signed-off-by: Sagi Grimberg Reviewed-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index aae5374d2b93..2405bb9c63cc 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -473,7 +473,6 @@ static int nvme_tcp_handle_c2h_data(struct nvme_tcp_queue *queue, } return 0; - } static int nvme_tcp_handle_comp(struct nvme_tcp_queue *queue, @@ -634,7 +633,6 @@ static inline void nvme_tcp_end_request(struct request *rq, u16 status) nvme_end_request(rq, cpu_to_le16(status << 1), res); } - static int nvme_tcp_recv_data(struct nvme_tcp_queue *queue, struct sk_buff *skb, unsigned int *offset, size_t *len) { @@ -1535,7 +1533,7 @@ out_free_queue: return ret; } -static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) +static int __nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { int i, ret; @@ -1565,7 +1563,7 @@ static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl) return nr_io_queues; } -static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl) +static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) { unsigned int nr_io_queues; int ret; @@ -1582,7 +1580,7 @@ static int nvme_alloc_io_queues(struct nvme_ctrl *ctrl) dev_info(ctrl->device, "creating %d I/O queues.\n", nr_io_queues); - return nvme_tcp_alloc_io_queues(ctrl); + return __nvme_tcp_alloc_io_queues(ctrl); } static void nvme_tcp_destroy_io_queues(struct nvme_ctrl *ctrl, bool remove) @@ -1599,7 +1597,7 @@ static int nvme_tcp_configure_io_queues(struct nvme_ctrl *ctrl, bool new) { int ret; - ret = nvme_alloc_io_queues(ctrl); + ret = nvme_tcp_alloc_io_queues(ctrl); if (ret) return ret; -- cgit v1.2.3 From 663d6fee66b555f6a080104751be0b54e0bca78a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 15 Apr 2019 09:51:46 +0800 Subject: nvme-loop: kill timeout handler Firstly it doesn't make sense to handle timeout for loop: 1) for admin queue, the request is always completed in code path of queuing IO. 2) for normal IO request, the timeout on these IOs have been handled by underlying queue already. Secondly nvme-loop's timeout handler is simply broken, and easy to cause issue: 1) no any sync/protection between timeout and normal completion, and now it is driver's responsibility to deal with that; 2) bad reset implementation, blk_mq_update_nr_hw_queues() is called after all NSs's queue is stopped(quiesced), and easy to trigger deadlock. So kill the timeout handler. Signed-off-by: Ming Lei Reviewd-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index a3ae491fa20e..9e211ad6bdd3 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -129,20 +129,6 @@ static void nvme_loop_execute_work(struct work_struct *work) nvmet_req_execute(&iod->req); } -static enum blk_eh_timer_return -nvme_loop_timeout(struct request *rq, bool reserved) -{ - struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq); - - /* queue error recovery */ - nvme_reset_ctrl(&iod->queue->ctrl->ctrl); - - /* fail with DNR on admin cmd timeout */ - nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR; - - return BLK_EH_DONE; -} - static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -253,7 +239,6 @@ static const struct blk_mq_ops nvme_loop_mq_ops = { .complete = nvme_loop_complete_rq, .init_request = nvme_loop_init_request, .init_hctx = nvme_loop_init_hctx, - .timeout = nvme_loop_timeout, }; static const struct blk_mq_ops nvme_loop_admin_mq_ops = { @@ -261,7 +246,6 @@ static const struct blk_mq_ops nvme_loop_admin_mq_ops = { .complete = nvme_loop_complete_rq, .init_request = nvme_loop_init_request, .init_hctx = nvme_loop_init_admin_hctx, - .timeout = nvme_loop_timeout, }; static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) -- cgit v1.2.3 From 01fa017484ad98fccdeaab32db0077c574b6bd6f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 11 Mar 2019 15:02:25 -0700 Subject: nvme: set 0 capacity if namespace block size exceeds PAGE_SIZE If our target exposed a namespace with a block size that is greater than PAGE_SIZE, set 0 capacity on the namespace as we do not support it. This issue encountered when the nvmet namespace was backed by a tempfile. Signed-off-by: Sagi Grimberg Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 248ff3b48041..3dd043aa6d1f 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1591,6 +1591,10 @@ static void nvme_update_disk_info(struct gendisk *disk, sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9); unsigned short bs = 1 << ns->lba_shift; + if (ns->lba_shift > PAGE_SHIFT) { + /* unsupported block size, set capacity to 0 later */ + bs = (1 << 9); + } blk_mq_freeze_queue(disk->queue); blk_integrity_unregister(disk); @@ -1601,7 +1605,8 @@ static void nvme_update_disk_info(struct gendisk *disk, if (ns->ms && !ns->ext && (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) nvme_init_integrity(disk, ns->ms, ns->pi_type); - if (ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) + if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) || + ns->lba_shift > PAGE_SHIFT) capacity = 0; set_capacity(disk, capacity); -- cgit v1.2.3 From f34e25898a608380a60135288019c4cb6013bec8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 29 Apr 2019 16:25:48 -0700 Subject: nvme-tcp: fix possible null deref on a timed out io queue connect If I/O queue connect times out, we might have freed the queue socket already, so check for that on the error path in nvme_tcp_start_queue. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/tcp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 2405bb9c63cc..2b107a1d152b 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -1423,7 +1423,8 @@ static int nvme_tcp_start_queue(struct nvme_ctrl *nctrl, int idx) if (!ret) { set_bit(NVME_TCP_Q_LIVE, &ctrl->queues[idx].flags); } else { - __nvme_tcp_stop_queue(&ctrl->queues[idx]); + if (test_bit(NVME_TCP_Q_ALLOCATED, &ctrl->queues[idx].flags)) + __nvme_tcp_stop_queue(&ctrl->queues[idx]); dev_err(nctrl->device, "failed to connect queue: %d ret=%d\n", idx, ret); } -- cgit v1.2.3 From 525aa5a705d86e193726ee465d1a975265fabf19 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Tue, 30 Apr 2019 18:57:09 +0200 Subject: nvme-multipath: split bios with the ns_head bio_set before submitting If the bio is moved to a different queue via blk_steal_bios() and the original queue is destroyed in nvme_remove_ns() we'll be ending with a crash in bio_endio() as the mempool for the split bio bvecs had already been destroyed. So split the bio using the original queue (which will remain during the lifetime of the bio) before sending it down to the underlying device. Signed-off-by: Hannes Reinecke Reviewed-by: Ming Lei Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index f0716f6ce41f..e6ddc83223df 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -232,6 +232,14 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, blk_qc_t ret = BLK_QC_T_NONE; int srcu_idx; + /* + * The namespace might be going away and the bio might + * be moved to a different queue via blk_steal_bios(), + * so we need to use the bio_split pool from the original + * queue to allocate the bvecs from. + */ + blk_queue_split(q, &bio); + srcu_idx = srcu_read_lock(&head->srcu); ns = nvme_find_path(head); if (likely(ns)) { -- cgit v1.2.3 From 592b6e7b0226b198c12439065f725be00c92d559 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Sun, 28 Apr 2019 20:24:42 -0700 Subject: nvme-multipath: don't print ANA group state by default Signed-off-by: Hannes Reinecke Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index e6ddc83223df..5c9429d41120 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -429,7 +429,7 @@ static int nvme_update_ana_state(struct nvme_ctrl *ctrl, unsigned *nr_change_groups = data; struct nvme_ns *ns; - dev_info(ctrl->device, "ANA group %d: %s.\n", + dev_dbg(ctrl->device, "ANA group %d: %s.\n", le32_to_cpu(desc->grpid), nvme_ana_state_names[desc->state]); -- cgit v1.2.3 From 049bf37262c61c99f45438910711b55054b24838 Mon Sep 17 00:00:00 2001 From: Klaus Birkelund Jensen Date: Tue, 30 Apr 2019 18:53:29 +0200 Subject: nvme-pci: fix psdt field for single segment sgls The shortcut for single segment SGL requests did not set the PSDT field to mark the request as using SGLs. Fixes: 297910571f08 ("nvme-pci: optimize mapping single segment requests using SGLs") Signed-off-by: Klaus Birkelund Jensen Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index c1eecde6b853..efc1da56521c 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -830,6 +830,7 @@ static blk_status_t nvme_setup_sgl_simple(struct nvme_dev *dev, return BLK_STS_RESOURCE; iod->dma_len = bv->bv_len; + cmnd->flags = NVME_CMD_SGL_METABUF; cmnd->dptr.sgl.addr = cpu_to_le64(iod->first_dma); cmnd->dptr.sgl.length = cpu_to_le32(iod->dma_len); cmnd->dptr.sgl.type = NVME_SGL_FMT_DATA_DESC << 4; -- cgit v1.2.3 From 9dc1a38ef1925d23c2933c5867df816386d92ff8 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 30 Apr 2019 09:33:40 -0600 Subject: nvme-pci: shutdown on timeout during deletion We do not restart a controller in a deleting state for timeout errors. When in this state, unblock potential request dispatchers with failed completions by shutting down the controller on timeout detection. Reported-by: Yufen Yu Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index efc1da56521c..3df0f2b29427 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1277,6 +1277,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) struct nvme_dev *dev = nvmeq->dev; struct request *abort_req; struct nvme_command cmd; + bool shutdown = false; u32 csts = readl(dev->bar + NVME_REG_CSTS); /* If PCI error recovery process is happening, we cannot reset or @@ -1313,12 +1314,14 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) * shutdown, so we return BLK_EH_DONE. */ switch (dev->ctrl.state) { + case NVME_CTRL_DELETING: + shutdown = true; case NVME_CTRL_CONNECTING: case NVME_CTRL_RESETTING: dev_warn_ratelimited(dev->ctrl.device, "I/O %d QID %d timeout, disable controller\n", req->tag, nvmeq->qid); - nvme_dev_disable(dev, false); + nvme_dev_disable(dev, shutdown); nvme_req(req)->flags |= NVME_REQ_CANCELLED; return BLK_EH_DONE; default: -- cgit v1.2.3 From c8e9e9b7646ebe1c5066ddc420d7630876277eb4 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 30 Apr 2019 09:33:41 -0600 Subject: nvme-pci: unquiesce admin queue on shutdown Just like IO queues, the admin queue also will not be restarted after a controller shutdown. Unquiesce this queue so that we do not block request dispatch on a permanently disabled controller. Reported-by: Yufen Yu Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3df0f2b29427..ac10d3ad1e75 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2437,8 +2437,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) * must flush all entered requests to their failed completion to avoid * deadlocking blk-mq hot-cpu notifier. */ - if (shutdown) + if (shutdown) { nvme_start_queues(&dev->ctrl); + if (dev->ctrl.admin_q && !blk_queue_dying(dev->ctrl.admin_q)) + blk_mq_unquiesce_queue(dev->ctrl.admin_q); + } mutex_unlock(&dev->shutdown_lock); } -- cgit v1.2.3 From 665648673ef5384c7194ea6df4b55f2da98646cf Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 12 Apr 2019 00:52:39 +0900 Subject: nvme-pci: remove an unneeded variable initialization Variable "n" will be assigned once kstrtoint() succeeds, otherwise it will not be referred because kstrtoint() will return an error which means go out from this function. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index ac10d3ad1e75..e2ff92de41a7 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -146,7 +146,7 @@ static int io_queue_depth_set(const char *val, const struct kernel_param *kp) static int queue_count_set(const char *val, const struct kernel_param *kp) { - int n = 0, ret; + int n, ret; ret = kstrtoint(val, 10, &n); if (ret) -- cgit v1.2.3 From a97234e1ff1ec9d5a41c6adff5632c61639dee6a Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 12 Apr 2019 00:18:32 +0900 Subject: nvme-pci: check more command sizes All the NVMe command has 64bytes fixed size so that it has been assured with BUILD_BUG_ON(). The remaining command structures in linux/nvme.h also need to be checked here. Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e2ff92de41a7..9c1a8fd68b3a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -231,19 +231,26 @@ struct nvme_iod { */ static inline void _nvme_check_size(void) { + BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_identify) != 64); BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(sizeof(struct nvme_features) != 64); + BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64); BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64); BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_command) != 64); BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); + BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64); } static unsigned int max_io_queues(void) -- cgit v1.2.3 From a2faf94e57c5237a9cad31f63eeaf2412ed0e951 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Fri, 12 Apr 2019 00:18:31 +0900 Subject: nvme-fabrics: check more command sizes struct common_command provides a common structure for NVMe-oF command format. It also needs to be checked for unintended size growth. Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index d4cb826f58ff..592d1e61ef7e 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -1188,6 +1188,7 @@ static void __exit nvmf_exit(void) class_destroy(nvmf_class); nvmf_host_put(nvmf_default_host); + BUILD_BUG_ON(sizeof(struct nvmf_common_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_connect_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_property_get_command) != 64); BUILD_BUG_ON(sizeof(struct nvmf_property_set_command) != 64); -- cgit v1.2.3 From 811015409fd4af80bbecb8e46b3aa24c8986fb74 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 30 Apr 2019 11:36:52 -0400 Subject: nvme: move command size checks to the core Most command aren't PCIe specific, so move the size checking for them to core.c Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 27 +++++++++++++++++++++++++++ drivers/nvme/host/pci.c | 31 +++---------------------------- 2 files changed, 30 insertions(+), 28 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3dd043aa6d1f..e970c5adee28 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3879,10 +3879,37 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_start_queues); +/* + * Check we didn't inadvertently grow the command structure sizes: + */ +static inline void _nvme_check_size(void) +{ + BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_identify) != 64); + BUILD_BUG_ON(sizeof(struct nvme_features) != 64); + BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64); + BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); + BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_command) != 64); + BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); + BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); + BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); + BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); + BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64); +} + + int __init nvme_core_init(void) { int result = -ENOMEM; + _nvme_check_size(); + nvme_wq = alloc_workqueue("nvme-wq", WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0); if (!nvme_wq) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9c1a8fd68b3a..3e4fb891a95a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -226,33 +226,6 @@ struct nvme_iod { struct scatterlist *sg; }; -/* - * Check we didin't inadvertently grow the command struct - */ -static inline void _nvme_check_size(void) -{ - BUILD_BUG_ON(sizeof(struct nvme_common_command) != 64); - BUILD_BUG_ON(sizeof(struct nvme_rw_command) != 64); - BUILD_BUG_ON(sizeof(struct nvme_identify) != 64); - BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); - BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); - BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); - BUILD_BUG_ON(sizeof(struct nvme_features) != 64); - BUILD_BUG_ON(sizeof(struct nvme_download_firmware) != 64); - BUILD_BUG_ON(sizeof(struct nvme_format_cmd) != 64); - BUILD_BUG_ON(sizeof(struct nvme_dsm_cmd) != 64); - BUILD_BUG_ON(sizeof(struct nvme_write_zeroes_cmd) != 64); - BUILD_BUG_ON(sizeof(struct nvme_abort_cmd) != 64); - BUILD_BUG_ON(sizeof(struct nvme_get_log_page_command) != 64); - BUILD_BUG_ON(sizeof(struct nvme_command) != 64); - BUILD_BUG_ON(sizeof(struct nvme_id_ctrl) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_id_ns) != NVME_IDENTIFY_DATA_SIZE); - BUILD_BUG_ON(sizeof(struct nvme_lba_range_type) != 64); - BUILD_BUG_ON(sizeof(struct nvme_smart_log) != 512); - BUILD_BUG_ON(sizeof(struct nvme_dbbuf) != 64); - BUILD_BUG_ON(sizeof(struct nvme_directive_cmd) != 64); -} - static unsigned int max_io_queues(void) { return num_possible_cpus() + write_queues + poll_queues; @@ -2988,6 +2961,9 @@ static struct pci_driver nvme_driver = { static int __init nvme_init(void) { + BUILD_BUG_ON(sizeof(struct nvme_create_cq) != 64); + BUILD_BUG_ON(sizeof(struct nvme_create_sq) != 64); + BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); return pci_register_driver(&nvme_driver); } @@ -2996,7 +2972,6 @@ static void __exit nvme_exit(void) { pci_unregister_driver(&nvme_driver); flush_workqueue(nvme_wq); - _nvme_check_size(); } MODULE_AUTHOR("Matthew Wilcox "); -- cgit v1.2.3 From 893a74b7a76e6e9c5c7199e6aae946f090622fa2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 30 Apr 2019 11:37:43 -0400 Subject: nvme: mark nvme_core_init and nvme_core_exit static Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni --- drivers/nvme/host/core.c | 4 ++-- drivers/nvme/host/nvme.h | 3 --- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/nvme') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index e970c5adee28..cd16d98d1f1a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3904,7 +3904,7 @@ static inline void _nvme_check_size(void) } -int __init nvme_core_init(void) +static int __init nvme_core_init(void) { int result = -ENOMEM; @@ -3956,7 +3956,7 @@ out: return result; } -void __exit nvme_core_exit(void) +static void __exit nvme_core_exit(void) { ida_destroy(&nvme_subsystems_ida); class_destroy(nvme_subsys_class); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 527d64545023..5ee75b5ff83f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -577,7 +577,4 @@ static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) return dev_to_disk(dev)->private_data; } -int __init nvme_core_init(void); -void __exit nvme_core_exit(void); - #endif /* _NVME_H */ -- cgit v1.2.3 From 6f53e73b9ec5b3cd097077c5ffcb76df708ce3f8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 29 Apr 2019 16:28:19 -0700 Subject: nvmet: protect discovery change log event list iteration When we iterate on the discovery subsystem controllers we need to protect against concurrent mutations to it. Signed-off-by: Sagi Grimberg Reviewed-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/target/discovery.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/nvme') diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index e8e09266bfa5..5baf269f3f8a 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -30,14 +30,17 @@ void nvmet_port_disc_changed(struct nvmet_port *port, { struct nvmet_ctrl *ctrl; + lockdep_assert_held(&nvmet_config_sem); nvmet_genctr++; + mutex_lock(&nvmet_disc_subsys->lock); list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { if (subsys && !nvmet_host_allowed(subsys, ctrl->hostnqn)) continue; __nvmet_disc_changed(port, ctrl); } + mutex_unlock(&nvmet_disc_subsys->lock); } static void __nvmet_subsys_disc_changed(struct nvmet_port *port, @@ -46,12 +49,14 @@ static void __nvmet_subsys_disc_changed(struct nvmet_port *port, { struct nvmet_ctrl *ctrl; + mutex_lock(&nvmet_disc_subsys->lock); list_for_each_entry(ctrl, &nvmet_disc_subsys->ctrls, subsys_entry) { if (host && strcmp(nvmet_host_name(host), ctrl->hostnqn)) continue; __nvmet_disc_changed(port, ctrl); } + mutex_unlock(&nvmet_disc_subsys->lock); } void nvmet_subsys_disc_changed(struct nvmet_subsys *subsys, -- cgit v1.2.3