From 0866bf0c3778661e65f68a5c93df8e0a1e9e43cc Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 11 Jun 2018 13:40:07 -0400 Subject: nvmet: add commands supported and effects log page This patch adds support for Commands Supported and Effects log page (Log Identifier 05h) for NVMeOF. This also makes it easier to find which commands are supported, e.g. :- subnqn : testnqn1 Admin Command Set ACS2 [Get Log Page ] 00000001 ACS6 [Identify ] 00000001 ACS8 [Abort ] 00000001 ACS9 [Set Features ] 00000001 ACS10 [Get Features ] 00000001 ACS12 [Asynchronous Event Request ] 00000001 ACS24 [Keep Alive ] 00000001 NVM Command Set IOCS0 [Flush ] 00000001 IOCS1 [Write ] 00000001 IOCS2 [Read ] 00000001 IOCS8 [Write Zeroes ] 00000001 IOCS9 [Dataset Management ] 00000001 This partticular functionality can be used from the host side to examine the NVMeOF ctrl commands supported. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 38803576d5e1..e2c6f8b39388 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -128,6 +128,36 @@ out: nvmet_req_complete(req, status); } +static void nvmet_execute_get_log_cmd_effects_ns(struct nvmet_req *req) +{ + u16 status = NVME_SC_INTERNAL; + struct nvme_effects_log *log; + + log = kzalloc(sizeof(*log), GFP_KERNEL); + if (!log) + goto out; + + log->acs[nvme_admin_get_log_page] = cpu_to_le32(1 << 0); + log->acs[nvme_admin_identify] = cpu_to_le32(1 << 0); + log->acs[nvme_admin_abort_cmd] = cpu_to_le32(1 << 0); + log->acs[nvme_admin_set_features] = cpu_to_le32(1 << 0); + log->acs[nvme_admin_get_features] = cpu_to_le32(1 << 0); + log->acs[nvme_admin_async_event] = cpu_to_le32(1 << 0); + log->acs[nvme_admin_keep_alive] = cpu_to_le32(1 << 0); + + log->iocs[nvme_cmd_read] = cpu_to_le32(1 << 0); + log->iocs[nvme_cmd_write] = cpu_to_le32(1 << 0); + log->iocs[nvme_cmd_flush] = cpu_to_le32(1 << 0); + log->iocs[nvme_cmd_dsm] = cpu_to_le32(1 << 0); + log->iocs[nvme_cmd_write_zeroes] = cpu_to_le32(1 << 0); + + status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); + + kfree(log); +out: + nvmet_req_complete(req, status); +} + static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -208,7 +238,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) /* first slot is read-only, only one slot supported */ id->frmw = (1 << 0) | (1 << 1); - id->lpa = (1 << 0) | (1 << 2); + id->lpa = (1 << 0) | (1 << 1) | (1 << 2); id->elpe = NVMET_ERROR_LOG_SLOTS - 1; id->npss = 0; @@ -586,6 +616,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) case NVME_LOG_CHANGED_NS: req->execute = nvmet_execute_get_log_changed_ns; return 0; + case NVME_LOG_CMD_EFFECTS: + req->execute = nvmet_execute_get_log_cmd_effects_ns; + return 0; } break; case nvme_admin_identify: -- cgit v1.2.3 From 55eb942eda2ccbbbea61db4c1a774ba22b618046 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 20 Jun 2018 00:01:41 -0400 Subject: nvmet: add buffered I/O support for file backed ns Add a new "buffered_io" attribute, which disabled direct I/O and thus enables page cache based caching when enabled. The attribute can only be changed when the namespace is disabled as the file has to be reopend for the change to take effect. The possibly blocking read/write are deferred to a newly introduced global workqueue. Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/configfs.c | 29 +++++++++++++++++++++++++++++ drivers/nvme/target/core.c | 9 +++++++++ drivers/nvme/target/io-cmd-file.c | 31 ++++++++++++++++++++++++++----- drivers/nvme/target/nvmet.h | 3 +++ 4 files changed, 67 insertions(+), 5 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index d3f3b3ec4d1a..fee56b3a23bc 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -407,11 +407,40 @@ static ssize_t nvmet_ns_enable_store(struct config_item *item, CONFIGFS_ATTR(nvmet_ns_, enable); +static ssize_t nvmet_ns_buffered_io_show(struct config_item *item, char *page) +{ + return sprintf(page, "%d\n", to_nvmet_ns(item)->buffered_io); +} + +static ssize_t nvmet_ns_buffered_io_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_ns *ns = to_nvmet_ns(item); + bool val; + + if (strtobool(page, &val)) + return -EINVAL; + + mutex_lock(&ns->subsys->lock); + if (ns->enabled) { + pr_err("disable ns before setting buffered_io value.\n"); + mutex_unlock(&ns->subsys->lock); + return -EINVAL; + } + + ns->buffered_io = val; + mutex_unlock(&ns->subsys->lock); + return count; +} + +CONFIGFS_ATTR(nvmet_ns_, buffered_io); + static struct configfs_attribute *nvmet_ns_attrs[] = { &nvmet_ns_attr_device_path, &nvmet_ns_attr_device_nguid, &nvmet_ns_attr_device_uuid, &nvmet_ns_attr_enable, + &nvmet_ns_attr_buffered_io, NULL, }; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 74d4b785d2da..96eafbd419e7 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -18,6 +18,7 @@ #include "nvmet.h" +struct workqueue_struct *buffered_io_wq; static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static DEFINE_IDA(cntlid_ida); @@ -437,6 +438,7 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) ns->nsid = nsid; ns->subsys = subsys; uuid_gen(&ns->uuid); + ns->buffered_io = false; return ns; } @@ -1109,6 +1111,12 @@ static int __init nvmet_init(void) { int error; + buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", + WQ_MEM_RECLAIM, 0); + if (!buffered_io_wq) { + error = -ENOMEM; + goto out; + } error = nvmet_init_discovery(); if (error) goto out; @@ -1129,6 +1137,7 @@ static void __exit nvmet_exit(void) nvmet_exit_configfs(); nvmet_exit_discovery(); ida_destroy(&cntlid_ida); + destroy_workqueue(buffered_io_wq); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024); BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024); diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 8c42b3a8c420..57c660e3245d 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -16,6 +16,8 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns) { if (ns->file) { + if (ns->buffered_io) + flush_workqueue(buffered_io_wq); mempool_destroy(ns->bvec_pool); ns->bvec_pool = NULL; kmem_cache_destroy(ns->bvec_cache); @@ -27,11 +29,14 @@ void nvmet_file_ns_disable(struct nvmet_ns *ns) int nvmet_file_ns_enable(struct nvmet_ns *ns) { - int ret; + int flags = O_RDWR | O_LARGEFILE; struct kstat stat; + int ret; + + if (!ns->buffered_io) + flags |= O_DIRECT; - ns->file = filp_open(ns->device_path, - O_RDWR | O_LARGEFILE | O_DIRECT, 0); + ns->file = filp_open(ns->device_path, flags, 0); if (IS_ERR(ns->file)) { pr_err("failed to open file %s: (%ld)\n", ns->device_path, PTR_ERR(ns->file)); @@ -100,7 +105,7 @@ static ssize_t nvmet_file_submit_bvec(struct nvmet_req *req, loff_t pos, iocb->ki_pos = pos; iocb->ki_filp = req->ns->file; - iocb->ki_flags = IOCB_DIRECT | ki_flags; + iocb->ki_flags = ki_flags | iocb_flags(req->ns->file); ret = call_iter(iocb, &iter); @@ -189,6 +194,19 @@ out: nvmet_file_submit_bvec(req, pos, bv_cnt, total_len); } +static void nvmet_file_buffered_io_work(struct work_struct *w) +{ + struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); + + nvmet_file_execute_rw(req); +} + +static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req) +{ + INIT_WORK(&req->f.work, nvmet_file_buffered_io_work); + queue_work(buffered_io_wq, &req->f.work); +} + static void nvmet_file_flush_work(struct work_struct *w) { struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); @@ -280,7 +298,10 @@ u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) switch (cmd->common.opcode) { case nvme_cmd_read: case nvme_cmd_write: - req->execute = nvmet_file_execute_rw; + if (req->ns->buffered_io) + req->execute = nvmet_file_execute_rw_buffered_io; + else + req->execute = nvmet_file_execute_rw; req->data_len = nvmet_rw_len(req); return 0; case nvme_cmd_flush: diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 480dfe10fad9..5efb98ec95df 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -65,6 +65,7 @@ struct nvmet_ns { u8 nguid[16]; uuid_t uuid; + bool buffered_io; bool enabled; struct nvmet_subsys *subsys; const char *device_path; @@ -269,6 +270,8 @@ struct nvmet_req { const struct nvmet_fabrics_ops *ops; }; +extern struct workqueue_struct *buffered_io_wq; + static inline void nvmet_set_status(struct nvmet_req *req, u16 status) { req->rsp->status = cpu_to_le16(status << 1); -- cgit v1.2.3 From 0d5ee2b2ab4f6776c361bc975c2323bc8b5cf349 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Wed, 20 Jun 2018 07:15:10 -0700 Subject: nvmet-rdma: support max(16KB, PAGE_SIZE) inline data The patch enables inline data sizes using up to 4 recv sges, and capping the size at 16KB or at least 1 page size. So on a 4K page system, up to 16KB is supported, and for a 64K page system 1 page of 64KB is supported. We avoid > 0 order page allocations for the inline buffers by using multiple recv sges, one for each page. If the device cannot support the configured inline data size due to lack of enough recv sges, then log a warning and reduce the inline size. Add a new configfs port attribute, called param_inline_data_size, to allow configuring the size of inline data for a given nvmf port. The maximum size allowed is still enforced by nvmet-rdma with NVMET_RDMA_MAX_INLINE_DATA_SIZE, which is now max(16KB, PAGE_SIZE). And the default size, if not specified via configfs, is still PAGE_SIZE. This preserves the existing behavior, but allows larger inline sizes for small page systems. If the configured inline data size exceeds NVMET_RDMA_MAX_INLINE_DATA_SIZE, a warning is logged and the size is reduced. If param_inline_data_size is set to 0, then inline data is disabled for that nvmf port. Reviewed-by: Sagi Grimberg Reviewed-by: Max Gurtovoy Signed-off-by: Steve Wise Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 4 +- drivers/nvme/target/configfs.c | 31 ++++++++ drivers/nvme/target/core.c | 4 + drivers/nvme/target/discovery.c | 2 +- drivers/nvme/target/nvmet.h | 2 +- drivers/nvme/target/rdma.c | 169 ++++++++++++++++++++++++++++++---------- 6 files changed, 169 insertions(+), 43 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index e2c6f8b39388..837bbdbfaa4b 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -268,14 +268,14 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ if (ctrl->ops->has_keyed_sgls) id->sgls |= cpu_to_le32(1 << 2); - if (ctrl->ops->sqe_inline_size) + if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); strcpy(id->subnqn, ctrl->subsys->subsysnqn); /* Max command capsule size is sqe + single page of in-capsule data */ id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + - ctrl->ops->sqe_inline_size) / 16); + req->port->inline_data_size) / 16); /* Max response capsule size is cqe */ id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index fee56b3a23bc..3ba5ea5c4376 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -218,6 +218,35 @@ static ssize_t nvmet_addr_trsvcid_store(struct config_item *item, CONFIGFS_ATTR(nvmet_, addr_trsvcid); +static ssize_t nvmet_param_inline_data_size_show(struct config_item *item, + char *page) +{ + struct nvmet_port *port = to_nvmet_port(item); + + return snprintf(page, PAGE_SIZE, "%d\n", port->inline_data_size); +} + +static ssize_t nvmet_param_inline_data_size_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_port *port = to_nvmet_port(item); + int ret; + + if (port->enabled) { + pr_err("Cannot modify inline_data_size while port enabled\n"); + pr_err("Disable the port before modifying\n"); + return -EACCES; + } + ret = kstrtoint(page, 0, &port->inline_data_size); + if (ret) { + pr_err("Invalid value '%s' for inline_data_size\n", page); + return -EINVAL; + } + return count; +} + +CONFIGFS_ATTR(nvmet_, param_inline_data_size); + static ssize_t nvmet_addr_trtype_show(struct config_item *item, char *page) { @@ -903,6 +932,7 @@ static struct configfs_attribute *nvmet_port_attrs[] = { &nvmet_attr_addr_traddr, &nvmet_attr_addr_trsvcid, &nvmet_attr_addr_trtype, + &nvmet_attr_param_inline_data_size, NULL, }; @@ -932,6 +962,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, INIT_LIST_HEAD(&port->entry); INIT_LIST_HEAD(&port->subsystems); INIT_LIST_HEAD(&port->referrals); + port->inline_data_size = -1; /* < 0 == let the transport choose */ port->disc_addr.portid = cpu_to_le16(portid); config_group_init_type_name(&port->group, name, &nvmet_port_type); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 96eafbd419e7..ddd85715a00a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -242,6 +242,10 @@ int nvmet_enable_port(struct nvmet_port *port) return ret; } + /* If the transport didn't set inline_data_size, then disable it. */ + if (port->inline_data_size < 0) + port->inline_data_size = 0; + port->enabled = true; return 0; } diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c index 08656b849bd6..eae29f493a07 100644 --- a/drivers/nvme/target/discovery.c +++ b/drivers/nvme/target/discovery.c @@ -171,7 +171,7 @@ static void nvmet_execute_identify_disc_ctrl(struct nvmet_req *req) id->sgls = cpu_to_le32(1 << 0); /* we always support SGLs */ if (ctrl->ops->has_keyed_sgls) id->sgls |= cpu_to_le32(1 << 2); - if (ctrl->ops->sqe_inline_size) + if (req->port->inline_data_size) id->sgls |= cpu_to_le32(1 << 20); strcpy(id->subnqn, ctrl->subsys->subsysnqn); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 5efb98ec95df..688993855402 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -117,6 +117,7 @@ struct nvmet_port { struct list_head referrals; void *priv; bool enabled; + int inline_data_size; }; static inline struct nvmet_port *to_nvmet_port(struct config_item *item) @@ -226,7 +227,6 @@ struct nvmet_req; struct nvmet_fabrics_ops { struct module *owner; unsigned int type; - unsigned int sqe_inline_size; unsigned int msdbd; bool has_keyed_sgls : 1; void (*queue_response)(struct nvmet_req *req); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 52e0c5d579a7..2106ae2ec177 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -33,16 +33,17 @@ #include "nvmet.h" /* - * We allow up to a page of inline data to go with the SQE + * We allow at least 1 page, up to 4 SGEs, and up to 16KB of inline data */ -#define NVMET_RDMA_INLINE_DATA_SIZE PAGE_SIZE +#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE PAGE_SIZE +#define NVMET_RDMA_MAX_INLINE_SGE 4 +#define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE) struct nvmet_rdma_cmd { - struct ib_sge sge[2]; + struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; struct ib_cqe cqe; struct ib_recv_wr wr; - struct scatterlist inline_sg; - struct page *inline_page; + struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE]; struct nvme_command *nvme_cmd; struct nvmet_rdma_queue *queue; }; @@ -116,6 +117,8 @@ struct nvmet_rdma_device { size_t srq_size; struct kref ref; struct list_head entry; + int inline_data_size; + int inline_page_count; }; static bool nvmet_rdma_use_srq; @@ -138,6 +141,11 @@ static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); static const struct nvmet_fabrics_ops nvmet_rdma_ops; +static int num_pages(int len) +{ + return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT); +} + /* XXX: really should move to a generic header sooner or later.. */ static inline u32 get_unaligned_le24(const u8 *p) { @@ -184,6 +192,71 @@ nvmet_rdma_put_rsp(struct nvmet_rdma_rsp *rsp) spin_unlock_irqrestore(&rsp->queue->rsps_lock, flags); } +static void nvmet_rdma_free_inline_pages(struct nvmet_rdma_device *ndev, + struct nvmet_rdma_cmd *c) +{ + struct scatterlist *sg; + struct ib_sge *sge; + int i; + + if (!ndev->inline_data_size) + return; + + sg = c->inline_sg; + sge = &c->sge[1]; + + for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) { + if (sge->length) + ib_dma_unmap_page(ndev->device, sge->addr, + sge->length, DMA_FROM_DEVICE); + if (sg_page(sg)) + __free_page(sg_page(sg)); + } +} + +static int nvmet_rdma_alloc_inline_pages(struct nvmet_rdma_device *ndev, + struct nvmet_rdma_cmd *c) +{ + struct scatterlist *sg; + struct ib_sge *sge; + struct page *pg; + int len; + int i; + + if (!ndev->inline_data_size) + return 0; + + sg = c->inline_sg; + sg_init_table(sg, ndev->inline_page_count); + sge = &c->sge[1]; + len = ndev->inline_data_size; + + for (i = 0; i < ndev->inline_page_count; i++, sg++, sge++) { + pg = alloc_page(GFP_KERNEL); + if (!pg) + goto out_err; + sg_assign_page(sg, pg); + sge->addr = ib_dma_map_page(ndev->device, + pg, 0, PAGE_SIZE, DMA_FROM_DEVICE); + if (ib_dma_mapping_error(ndev->device, sge->addr)) + goto out_err; + sge->length = min_t(int, len, PAGE_SIZE); + sge->lkey = ndev->pd->local_dma_lkey; + len -= sge->length; + } + + return 0; +out_err: + for (; i >= 0; i--, sg--, sge--) { + if (sge->length) + ib_dma_unmap_page(ndev->device, sge->addr, + sge->length, DMA_FROM_DEVICE); + if (sg_page(sg)) + __free_page(sg_page(sg)); + } + return -ENOMEM; +} + static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *c, bool admin) { @@ -200,33 +273,17 @@ static int nvmet_rdma_alloc_cmd(struct nvmet_rdma_device *ndev, c->sge[0].length = sizeof(*c->nvme_cmd); c->sge[0].lkey = ndev->pd->local_dma_lkey; - if (!admin) { - c->inline_page = alloc_pages(GFP_KERNEL, - get_order(NVMET_RDMA_INLINE_DATA_SIZE)); - if (!c->inline_page) - goto out_unmap_cmd; - c->sge[1].addr = ib_dma_map_page(ndev->device, - c->inline_page, 0, NVMET_RDMA_INLINE_DATA_SIZE, - DMA_FROM_DEVICE); - if (ib_dma_mapping_error(ndev->device, c->sge[1].addr)) - goto out_free_inline_page; - c->sge[1].length = NVMET_RDMA_INLINE_DATA_SIZE; - c->sge[1].lkey = ndev->pd->local_dma_lkey; - } + if (!admin && nvmet_rdma_alloc_inline_pages(ndev, c)) + goto out_unmap_cmd; c->cqe.done = nvmet_rdma_recv_done; c->wr.wr_cqe = &c->cqe; c->wr.sg_list = c->sge; - c->wr.num_sge = admin ? 1 : 2; + c->wr.num_sge = admin ? 1 : ndev->inline_page_count + 1; return 0; -out_free_inline_page: - if (!admin) { - __free_pages(c->inline_page, - get_order(NVMET_RDMA_INLINE_DATA_SIZE)); - } out_unmap_cmd: ib_dma_unmap_single(ndev->device, c->sge[0].addr, sizeof(*c->nvme_cmd), DMA_FROM_DEVICE); @@ -240,12 +297,8 @@ out: static void nvmet_rdma_free_cmd(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *c, bool admin) { - if (!admin) { - ib_dma_unmap_page(ndev->device, c->sge[1].addr, - NVMET_RDMA_INLINE_DATA_SIZE, DMA_FROM_DEVICE); - __free_pages(c->inline_page, - get_order(NVMET_RDMA_INLINE_DATA_SIZE)); - } + if (!admin) + nvmet_rdma_free_inline_pages(ndev, c); ib_dma_unmap_single(ndev->device, c->sge[0].addr, sizeof(*c->nvme_cmd), DMA_FROM_DEVICE); kfree(c->nvme_cmd); @@ -429,7 +482,7 @@ static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); } - if (rsp->req.sg != &rsp->cmd->inline_sg) + if (rsp->req.sg != rsp->cmd->inline_sg) sgl_free(rsp->req.sg); if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list))) @@ -529,10 +582,25 @@ static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc) static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len, u64 off) { - sg_init_table(&rsp->cmd->inline_sg, 1); - sg_set_page(&rsp->cmd->inline_sg, rsp->cmd->inline_page, len, off); - rsp->req.sg = &rsp->cmd->inline_sg; - rsp->req.sg_cnt = 1; + int sg_count = num_pages(len); + struct scatterlist *sg; + int i; + + sg = rsp->cmd->inline_sg; + for (i = 0; i < sg_count; i++, sg++) { + if (i < sg_count - 1) + sg_unmark_end(sg); + else + sg_mark_end(sg); + sg->offset = off; + sg->length = min_t(int, len, PAGE_SIZE - off); + len -= sg->length; + if (!i) + off = 0; + } + + rsp->req.sg = rsp->cmd->inline_sg; + rsp->req.sg_cnt = sg_count; } static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp) @@ -544,7 +612,7 @@ static u16 nvmet_rdma_map_sgl_inline(struct nvmet_rdma_rsp *rsp) if (!nvme_is_write(rsp->req.cmd)) return NVME_SC_INVALID_FIELD | NVME_SC_DNR; - if (off + len > NVMET_RDMA_INLINE_DATA_SIZE) { + if (off + len > rsp->queue->dev->inline_data_size) { pr_err("invalid inline data offset!\n"); return NVME_SC_SGL_INVALID_OFFSET | NVME_SC_DNR; } @@ -743,7 +811,7 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) srq_size = 4095; /* XXX: tune */ srq_attr.attr.max_wr = srq_size; - srq_attr.attr.max_sge = 2; + srq_attr.attr.max_sge = 1 + ndev->inline_page_count; srq_attr.attr.srq_limit = 0; srq_attr.srq_type = IB_SRQT_BASIC; srq = ib_create_srq(ndev->pd, &srq_attr); @@ -793,7 +861,10 @@ static void nvmet_rdma_free_dev(struct kref *ref) static struct nvmet_rdma_device * nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) { + struct nvmet_port *port = cm_id->context; struct nvmet_rdma_device *ndev; + int inline_page_count; + int inline_sge_count; int ret; mutex_lock(&device_list_mutex); @@ -807,6 +878,18 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) if (!ndev) goto out_err; + inline_page_count = num_pages(port->inline_data_size); + inline_sge_count = max(cm_id->device->attrs.max_sge_rd, + cm_id->device->attrs.max_sge) - 1; + if (inline_page_count > inline_sge_count) { + pr_warn("inline_data_size %d cannot be supported by device %s. Reducing to %lu.\n", + port->inline_data_size, cm_id->device->name, + inline_sge_count * PAGE_SIZE); + port->inline_data_size = inline_sge_count * PAGE_SIZE; + inline_page_count = inline_sge_count; + } + ndev->inline_data_size = port->inline_data_size; + ndev->inline_page_count = inline_page_count; ndev->device = cm_id->device; kref_init(&ndev->ref); @@ -881,7 +964,7 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) } else { /* +1 for drain */ qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size; - qp_attr.cap.max_recv_sge = 2; + qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count; } ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr); @@ -1379,6 +1462,15 @@ static int nvmet_rdma_add_port(struct nvmet_port *port) return -EINVAL; } + if (port->inline_data_size < 0) { + port->inline_data_size = NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE; + } else if (port->inline_data_size > NVMET_RDMA_MAX_INLINE_DATA_SIZE) { + pr_warn("inline_data_size %u is too large, reducing to %u\n", + port->inline_data_size, + NVMET_RDMA_MAX_INLINE_DATA_SIZE); + port->inline_data_size = NVMET_RDMA_MAX_INLINE_DATA_SIZE; + } + ret = inet_pton_with_scope(&init_net, af, port->disc_addr.traddr, port->disc_addr.trsvcid, &addr); if (ret) { @@ -1456,7 +1548,6 @@ static void nvmet_rdma_disc_port_addr(struct nvmet_req *req, static const struct nvmet_fabrics_ops nvmet_rdma_ops = { .owner = THIS_MODULE, .type = NVMF_TRTYPE_RDMA, - .sqe_inline_size = NVMET_RDMA_INLINE_DATA_SIZE, .msdbd = 1, .has_keyed_sgls = 1, .add_port = nvmet_rdma_add_port, -- cgit v1.2.3 From 2fc464e2162c2b2f7faf7404fa9c35d1cf70aa00 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 27 Jun 2018 14:58:02 +0300 Subject: nvmet-rdma: add unlikely check in the fast path ib_post_send operation should succeed unless something unusual happened to the ib device. Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 2106ae2ec177..4ca09456bbbb 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -546,7 +546,7 @@ static void nvmet_rdma_queue_response(struct nvmet_req *req) rsp->send_sge.addr, rsp->send_sge.length, DMA_TO_DEVICE); - if (ib_post_send(cm_id->qp, first_wr, &bad_wr)) { + if (unlikely(ib_post_send(cm_id->qp, first_wr, &bad_wr))) { pr_err("sending cmd response failed\n"); nvmet_rdma_release_rsp(rsp); } -- cgit v1.2.3 From 202093848cac2da7d92ae666b51b7109bbab633c Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 1 Jul 2018 12:20:24 +0300 Subject: nvmet-rdma: add an error flow for post_recv failures Posting receive buffer operation can fail, thus we should make sure to have an error flow during initialization phase. While we're here, add a debug print in case of a failure. Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/target/rdma.c | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 4ca09456bbbb..e7f43d1e1779 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -436,14 +436,21 @@ static int nvmet_rdma_post_recv(struct nvmet_rdma_device *ndev, struct nvmet_rdma_cmd *cmd) { struct ib_recv_wr *bad_wr; + int ret; ib_dma_sync_single_for_device(ndev->device, cmd->sge[0].addr, cmd->sge[0].length, DMA_FROM_DEVICE); if (ndev->srq) - return ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); - return ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr); + ret = ib_post_srq_recv(ndev->srq, &cmd->wr, &bad_wr); + else + ret = ib_post_recv(cmd->queue->cm_id->qp, &cmd->wr, &bad_wr); + + if (unlikely(ret)) + pr_err("post_recv cmd failed\n"); + + return ret; } static void nvmet_rdma_process_wr_wait_list(struct nvmet_rdma_queue *queue) @@ -833,11 +840,16 @@ static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) ndev->srq = srq; ndev->srq_size = srq_size; - for (i = 0; i < srq_size; i++) - nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]); + for (i = 0; i < srq_size; i++) { + ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]); + if (ret) + goto out_free_cmds; + } return 0; +out_free_cmds: + nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false); out_destroy_srq: ib_destroy_srq(srq); return ret; @@ -982,13 +994,17 @@ static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue) if (!ndev->srq) { for (i = 0; i < queue->recv_queue_size; i++) { queue->cmds[i].queue = queue; - nvmet_rdma_post_recv(ndev, &queue->cmds[i]); + ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]); + if (ret) + goto err_destroy_qp; } } out: return ret; +err_destroy_qp: + rdma_destroy_qp(queue->cm_id); err_destroy_cq: ib_free_cq(queue->cq); goto out; -- cgit v1.2.3 From 59e29ce66bc52ebd6d0cb450f13079c7e913430d Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 29 Jun 2018 16:50:00 -0600 Subject: nvme: cache struct nvme_ctrl reference to struct nvme_request We will need to reference the controller in the setup and completion time for tracing and future traffic based keep alive support. Reviewed-by: Johannes Thumshirn Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/loop.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index d8d91f04bd7e..af7fbf4132b0 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -227,6 +227,7 @@ static int nvme_loop_init_request(struct blk_mq_tag_set *set, { struct nvme_loop_ctrl *ctrl = set->driver_data; + nvme_req(req)->ctrl = &ctrl->ctrl; return nvme_loop_init_iod(ctrl, blk_mq_rq_to_pdu(req), (set == &ctrl->tag_set) ? hctx_idx + 1 : 0); } -- cgit v1.2.3 From 1b72b71faccee986e2128a271125177dfe91f7b7 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Jul 2018 12:43:16 +0300 Subject: nvmet: fix file discard return status If nvmet_copy_from_sgl failed, we falsly return successful completion status. Fixes: d5eff33ee6f8 ("nvmet: add simple file backed ns support") Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-file.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 57c660e3245d..dad8d44bf90e 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -227,22 +227,24 @@ static void nvmet_file_execute_discard(struct nvmet_req *req) { int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; struct nvme_dsm_range range; - loff_t offset; - loff_t len; - int i, ret; + loff_t offset, len; + u16 ret; + int i; for (i = 0; i <= le32_to_cpu(req->cmd->dsm.nr); i++) { - if (nvmet_copy_from_sgl(req, i * sizeof(range), &range, - sizeof(range))) + ret = nvmet_copy_from_sgl(req, i * sizeof(range), &range, + sizeof(range)); + if (ret) break; offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; len = le32_to_cpu(range.nlb) << req->ns->blksize_shift; - ret = vfs_fallocate(req->ns->file, mode, offset, len); - if (ret) + if (vfs_fallocate(req->ns->file, mode, offset, len)) { + ret = NVME_SC_INTERNAL | NVME_SC_DNR; break; + } } - nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); + nvmet_req_complete(req, ret); } static void nvmet_file_dsm_work(struct work_struct *w) -- cgit v1.2.3 From 9c891c139894ce2ec0ca585a00e0bec5e6b4ccab Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Jul 2018 16:13:04 +0300 Subject: nvmet: check fileio lba range access boundaries Fail out-of-bounds with a proper status code. Fixes: d5eff33ee6f8 ("nvmet: add simple file backed ns support") Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/io-cmd-file.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index dad8d44bf90e..c2d0d08b59c8 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -145,6 +145,12 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) return; } + pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift; + if (unlikely(pos + req->data_len > req->ns->size)) { + nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR); + return; + } + if (nr_bvec > NVMET_MAX_INLINE_BIOVEC) req->f.bvec = kmalloc_array(nr_bvec, sizeof(struct bio_vec), GFP_KERNEL); @@ -160,8 +166,6 @@ static void nvmet_file_execute_rw(struct nvmet_req *req) is_sync = true; } - pos = le64_to_cpu(req->cmd->rw.slba) << req->ns->blksize_shift; - memset(&req->f.iocb, 0, sizeof(struct kiocb)); for_each_sg_page(req->sg, &sg_pg_iter, req->sg_cnt, 0) { nvmet_file_init_bvec(&req->f.bvec[bv_cnt], &sg_pg_iter); @@ -236,8 +240,14 @@ static void nvmet_file_execute_discard(struct nvmet_req *req) sizeof(range)); if (ret) break; + offset = le64_to_cpu(range.slba) << req->ns->blksize_shift; len = le32_to_cpu(range.nlb) << req->ns->blksize_shift; + if (offset + len > req->ns->size) { + ret = NVME_SC_LBA_RANGE | NVME_SC_DNR; + break; + } + if (vfs_fallocate(req->ns->file, mode, offset, len)) { ret = NVME_SC_INTERNAL | NVME_SC_DNR; break; @@ -283,6 +293,11 @@ static void nvmet_file_write_zeroes_work(struct work_struct *w) len = (((sector_t)le16_to_cpu(write_zeroes->length) + 1) << req->ns->blksize_shift); + if (unlikely(offset + len > req->ns->size)) { + nvmet_req_complete(req, NVME_SC_LBA_RANGE | NVME_SC_DNR); + return; + } + ret = vfs_fallocate(req->ns->file, mode, offset, len); nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); } -- cgit v1.2.3 From 1b0d274523df5ef1caedc834da055ff721e4d4f0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 17 Jul 2018 17:17:36 +0300 Subject: nvmet: don't use uuid_le type Don't use sizeof(uuid_le) where none of the parameters is type of uuid_le. Since both arguments are u8 [16], use size of destination there. Moreover, uuid_le is a deprecated type, and nvmet is using uuid_t already. Signed-off-by: Andy Shevchenko Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 837bbdbfaa4b..16a9b24270f9 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -338,7 +338,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) */ id->nmic = (1 << 0); - memcpy(&id->nguid, &ns->nguid, sizeof(uuid_le)); + memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid)); id->lbaf[0].ds = ns->blksize_shift; -- cgit v1.2.3 From 4ee43280488b0f6cbd74702725a32f47d03d690b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 7 Jun 2018 15:09:50 +0200 Subject: nvmet: keep a port pointer in nvmet_ctrl This will be needed for the ANA AEN code. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/core.c | 2 ++ drivers/nvme/target/nvmet.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index ddd85715a00a..cbcd19f52121 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -876,6 +876,8 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, nvmet_init_cap(ctrl); + ctrl->port = req->port; + INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work); INIT_LIST_HEAD(&ctrl->async_events); diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 688993855402..de12dcbfd3f3 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -140,6 +140,8 @@ struct nvmet_ctrl { u16 cntlid; u32 kato; + struct nvmet_port *port; + u32 aen_enabled; unsigned long aen_masked; struct nvmet_req *async_event_cmds[NVMET_ASYNC_EVENTS]; -- cgit v1.2.3 From 793c7cfce02ce88b7bd67d43834c052d16c096e3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 13 May 2018 19:00:13 +0200 Subject: nvmet: track and limit the number of namespaces per subsystem TP 4004 introduces a new 'Maximum Number of Allocated Namespaces' field in the Identify controller data to help the host size resources. Put an upper limit on the supported namespaces to be able to support this value as supporting 32-bits worth of namespaces would lead to very large buffers. The limit is completely arbitrary at this point. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/admin-cmd.c | 1 + drivers/nvme/target/core.c | 8 +++++++- drivers/nvme/target/nvmet.h | 8 ++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 16a9b24270f9..55f2bf4b5d07 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -252,6 +252,7 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->maxcmd = cpu_to_le16(NVMET_MAX_CMD); id->nn = cpu_to_le32(ctrl->subsys->max_nsid); + id->mnan = cpu_to_le32(NVMET_MAX_NAMESPACES); id->oncs = cpu_to_le16(NVME_CTRL_ONCS_DSM | NVME_CTRL_ONCS_WRITE_ZEROES); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index cbcd19f52121..42e8565015d5 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -337,9 +337,13 @@ static void nvmet_ns_dev_disable(struct nvmet_ns *ns) int nvmet_ns_enable(struct nvmet_ns *ns) { struct nvmet_subsys *subsys = ns->subsys; - int ret = 0; + int ret; mutex_lock(&subsys->lock); + ret = -EMFILE; + if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES) + goto out_unlock; + ret = 0; if (ns->enabled) goto out_unlock; @@ -374,6 +378,7 @@ int nvmet_ns_enable(struct nvmet_ns *ns) list_add_tail_rcu(&ns->dev_link, &old->dev_link); } + subsys->nr_namespaces++; nvmet_ns_changed(subsys, ns->nsid); ns->enabled = true; @@ -414,6 +419,7 @@ void nvmet_ns_disable(struct nvmet_ns *ns) percpu_ref_exit(&ns->ref); mutex_lock(&subsys->lock); + subsys->nr_namespaces--; nvmet_ns_changed(subsys, ns->nsid); nvmet_ns_dev_disable(ns); out_unlock: diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index de12dcbfd3f3..701017f7f3df 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -170,6 +170,7 @@ struct nvmet_subsys { struct kref ref; struct list_head namespaces; + unsigned int nr_namespaces; unsigned int max_nsid; struct list_head ctrls; @@ -362,6 +363,13 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd); #define NVMET_QUEUE_SIZE 1024 #define NVMET_NR_QUEUES 128 #define NVMET_MAX_CMD NVMET_QUEUE_SIZE + +/* + * Nice round number that makes a list of nsids fit into a page. + * Should become tunable at some point in the future. + */ +#define NVMET_MAX_NAMESPACES 1024 + #define NVMET_KAS 10 #define NVMET_DISC_KATO 120 -- cgit v1.2.3 From 72efd25dcf4f6310e9e6fa85620aa443b27c23fe Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 19 Jul 2018 07:35:20 -0700 Subject: nvmet: add minimal ANA support Add support for Asynchronous Namespace Access as specified in NVMe 1.3 TP 4004. Just add a default ANA group 1 that is optimized on all ports. This is (and will remain) the default assignment for any namespace not epxlicitly assigned to another ANA group. The ANA state can be manually changed through the configfs interface, including the change state. Includes fixes and improvements from Hannes Reinecke. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/admin-cmd.c | 87 +++++++++++++++++++++++++++++++++++++++-- drivers/nvme/target/configfs.c | 10 +++++ drivers/nvme/target/core.c | 34 ++++++++++++++++ drivers/nvme/target/nvmet.h | 15 +++++++ 4 files changed, 142 insertions(+), 4 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 55f2bf4b5d07..b98d38c4e579 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -182,6 +182,69 @@ out: nvmet_req_complete(req, status); } +static u32 nvmet_format_ana_group(struct nvmet_req *req, u32 grpid, + struct nvme_ana_group_desc *desc) +{ + struct nvmet_ctrl *ctrl = req->sq->ctrl; + struct nvmet_ns *ns; + u32 count = 0; + + if (!(req->cmd->get_log_page.lsp & NVME_ANA_LOG_RGO)) { + rcu_read_lock(); + list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) + if (ns->anagrpid == grpid) + desc->nsids[count++] = cpu_to_le32(ns->nsid); + rcu_read_unlock(); + } + + desc->grpid = cpu_to_le32(grpid); + desc->nnsids = cpu_to_le32(count); + desc->chgcnt = cpu_to_le64(nvmet_ana_chgcnt); + desc->state = req->port->ana_state[grpid]; + memset(desc->rsvd17, 0, sizeof(desc->rsvd17)); + return sizeof(struct nvme_ana_group_desc) + count * sizeof(__le32); +} + +static void nvmet_execute_get_log_page_ana(struct nvmet_req *req) +{ + struct nvme_ana_rsp_hdr hdr = { 0, }; + struct nvme_ana_group_desc *desc; + size_t offset = sizeof(struct nvme_ana_rsp_hdr); /* start beyond hdr */ + size_t len; + u32 grpid; + u16 ngrps = 0; + u16 status; + + status = NVME_SC_INTERNAL; + desc = kmalloc(sizeof(struct nvme_ana_group_desc) + + NVMET_MAX_NAMESPACES * sizeof(__le32), GFP_KERNEL); + if (!desc) + goto out; + + down_read(&nvmet_ana_sem); + for (grpid = 1; grpid <= NVMET_MAX_ANAGRPS; grpid++) { + if (!nvmet_ana_group_enabled[grpid]) + continue; + len = nvmet_format_ana_group(req, grpid, desc); + status = nvmet_copy_to_sgl(req, offset, desc, len); + if (status) + break; + offset += len; + ngrps++; + } + + hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt); + hdr.ngrps = cpu_to_le16(ngrps); + up_read(&nvmet_ana_sem); + + kfree(desc); + + /* copy the header last once we know the number of groups */ + status = nvmet_copy_to_sgl(req, 0, &hdr, sizeof(hdr)); +out: + nvmet_req_complete(req, status); +} + static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -213,8 +276,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) * the safest is to leave it as zeroes. */ - /* we support multiple ports and multiples hosts: */ - id->cmic = (1 << 0) | (1 << 1); + /* we support multiple ports, multiples hosts and ANA: */ + id->cmic = (1 << 0) | (1 << 1) | (1 << 3); /* no limit on data transfer sizes for now */ id->mdts = 0; @@ -282,6 +345,11 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->msdbd = ctrl->ops->msdbd; + id->anacap = (1 << 0) | (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); + id->anatt = 10; /* random value */ + id->anagrpmax = cpu_to_le32(NVMET_MAX_ANAGRPS); + id->nanagrpid = cpu_to_le32(NVMET_MAX_ANAGRPS); + /* * Meh, we don't really support any power state. Fake up the same * values that qemu does. @@ -323,8 +391,15 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) * nuse = ncap = nsze isn't always true, but we have no way to find * that out from the underlying device. */ - id->ncap = id->nuse = id->nsze = - cpu_to_le64(ns->size >> ns->blksize_shift); + id->ncap = id->nsze = cpu_to_le64(ns->size >> ns->blksize_shift); + switch (req->port->ana_state[ns->anagrpid]) { + case NVME_ANA_INACCESSIBLE: + case NVME_ANA_PERSISTENT_LOSS: + break; + default: + id->nuse = id->nsze; + break; + } /* * We just provide a single LBA format that matches what the @@ -338,6 +413,7 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) * controllers, but also with any other user of the block device. */ id->nmic = (1 << 0); + id->anagrpid = cpu_to_le32(ns->anagrpid); memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid)); @@ -620,6 +696,9 @@ u16 nvmet_parse_admin_cmd(struct nvmet_req *req) case NVME_LOG_CMD_EFFECTS: req->execute = nvmet_execute_get_log_cmd_effects_ns; return 0; + case NVME_LOG_ANA: + req->execute = nvmet_execute_get_log_page_ana; + return 0; } break; case nvme_admin_identify: diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 3ba5ea5c4376..b3c62b41b2ae 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -923,6 +923,7 @@ static void nvmet_port_release(struct config_item *item) { struct nvmet_port *port = to_nvmet_port(item); + kfree(port->ana_state); kfree(port); } @@ -959,6 +960,15 @@ static struct config_group *nvmet_ports_make(struct config_group *group, if (!port) return ERR_PTR(-ENOMEM); + port->ana_state = kcalloc(NVMET_MAX_ANAGRPS + 1, + sizeof(*port->ana_state), GFP_KERNEL); + if (!port->ana_state) { + kfree(port); + return ERR_PTR(-ENOMEM); + } + + port->ana_state[NVMET_DEFAULT_ANA_GRPID] = NVME_ANA_OPTIMIZED; + INIT_LIST_HEAD(&port->entry); INIT_LIST_HEAD(&port->subsystems); INIT_LIST_HEAD(&port->referrals); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 42e8565015d5..43a755f7baa5 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -40,6 +40,10 @@ static DEFINE_IDA(cntlid_ida); */ DECLARE_RWSEM(nvmet_config_sem); +u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; +u64 nvmet_ana_chgcnt; +DECLARE_RWSEM(nvmet_ana_sem); + static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port, const char *subsysnqn); @@ -430,6 +434,10 @@ void nvmet_ns_free(struct nvmet_ns *ns) { nvmet_ns_disable(ns); + down_write(&nvmet_ana_sem); + nvmet_ana_group_enabled[ns->anagrpid]--; + up_write(&nvmet_ana_sem); + kfree(ns->device_path); kfree(ns); } @@ -447,6 +455,12 @@ struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid) ns->nsid = nsid; ns->subsys = subsys; + + down_write(&nvmet_ana_sem); + ns->anagrpid = NVMET_DEFAULT_ANA_GRPID; + nvmet_ana_group_enabled[ns->anagrpid]++; + up_write(&nvmet_ana_sem); + uuid_gen(&ns->uuid); ns->buffered_io = false; @@ -554,6 +568,20 @@ int nvmet_sq_init(struct nvmet_sq *sq) } EXPORT_SYMBOL_GPL(nvmet_sq_init); +static inline u16 nvmet_check_ana_state(struct nvmet_port *port, + struct nvmet_ns *ns) +{ + enum nvme_ana_state state = port->ana_state[ns->anagrpid]; + + if (unlikely(state == NVME_ANA_INACCESSIBLE)) + return NVME_SC_ANA_INACCESSIBLE; + if (unlikely(state == NVME_ANA_PERSISTENT_LOSS)) + return NVME_SC_ANA_PERSISTENT_LOSS; + if (unlikely(state == NVME_ANA_CHANGE)) + return NVME_SC_ANA_TRANSITION; + return 0; +} + static u16 nvmet_parse_io_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; @@ -566,6 +594,9 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req) req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid); if (unlikely(!req->ns)) return NVME_SC_INVALID_NS | NVME_SC_DNR; + ret = nvmet_check_ana_state(req->port, req->ns); + if (unlikely(ret)) + return ret; if (req->ns->file) return nvmet_file_parse_io_cmd(req); @@ -1123,12 +1154,15 @@ static int __init nvmet_init(void) { int error; + nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1; + buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq", WQ_MEM_RECLAIM, 0); if (!buffered_io_wq) { error = -ENOMEM; goto out; } + error = nvmet_init_discovery(); if (error) goto out; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 701017f7f3df..f7d622fc1aa7 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -64,6 +64,7 @@ struct nvmet_ns { loff_t size; u8 nguid[16]; uuid_t uuid; + u32 anagrpid; bool buffered_io; bool enabled; @@ -115,6 +116,7 @@ struct nvmet_port { struct list_head subsystems; struct config_group referrals_group; struct list_head referrals; + enum nvme_ana_state *ana_state; void *priv; bool enabled; int inline_data_size; @@ -370,6 +372,15 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd); */ #define NVMET_MAX_NAMESPACES 1024 +/* + * 0 is not a valid ANA group ID, so we start numbering at 1. + * + * ANA Group 1 exists without manual intervention, has namespaces assigned to it + * by default, and is available in an optimized state through all ports. + */ +#define NVMET_MAX_ANAGRPS 1 +#define NVMET_DEFAULT_ANA_GRPID 1 + #define NVMET_KAS 10 #define NVMET_DISC_KATO 120 @@ -383,6 +394,10 @@ extern struct nvmet_subsys *nvmet_disc_subsys; extern u64 nvmet_genctr; extern struct rw_semaphore nvmet_config_sem; +extern u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1]; +extern u64 nvmet_ana_chgcnt; +extern struct rw_semaphore nvmet_ana_sem; + bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys, const char *hostnqn); -- cgit v1.2.3 From 62ac0d32f74ea511d5813be728dc589d03f866a3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 1 Jun 2018 08:59:25 +0200 Subject: nvmet: support configuring ANA groups Allow creating non-default ANA groups (group ID > 1). Groups are created either by assigning the group ID to a namespace, or by creating a configfs group object under a specific port. All namespaces assigned to a group that doesn't have a configfs object for a given port are marked as inaccessible. Allow changing the ANA state on a per-port basis by creating an ana_groups directory under each port, and another directory with an ana_state file in it. The default ANA group 1 directory is created automatically for each port. For all changes in ANA configuration the ANA change AEN is sent. We only keep a global changecount instead of additional per-group changecounts to keep the implementation as simple as possible. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn --- drivers/nvme/target/admin-cmd.c | 1 + drivers/nvme/target/configfs.c | 182 +++++++++++++++++++++++++++++++++++++++- drivers/nvme/target/core.c | 27 ++++++ drivers/nvme/target/nvmet.h | 30 ++++++- 4 files changed, 236 insertions(+), 4 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index b98d38c4e579..d1de639786ee 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -235,6 +235,7 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req) hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt); hdr.ngrps = cpu_to_le16(ngrps); + clear_bit(NVME_AEN_CFG_ANA_CHANGE, &req->sq->ctrl->aen_masked); up_read(&nvmet_ana_sem); kfree(desc); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index b3c62b41b2ae..51f5a8c092b4 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -411,6 +411,39 @@ out_unlock: CONFIGFS_ATTR(nvmet_ns_, device_nguid); +static ssize_t nvmet_ns_ana_grpid_show(struct config_item *item, char *page) +{ + return sprintf(page, "%u\n", to_nvmet_ns(item)->anagrpid); +} + +static ssize_t nvmet_ns_ana_grpid_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_ns *ns = to_nvmet_ns(item); + u32 oldgrpid, newgrpid; + int ret; + + ret = kstrtou32(page, 0, &newgrpid); + if (ret) + return ret; + + if (newgrpid < 1 || newgrpid > NVMET_MAX_ANAGRPS) + return -EINVAL; + + down_write(&nvmet_ana_sem); + oldgrpid = ns->anagrpid; + nvmet_ana_group_enabled[newgrpid]++; + ns->anagrpid = newgrpid; + nvmet_ana_group_enabled[oldgrpid]--; + nvmet_ana_chgcnt++; + up_write(&nvmet_ana_sem); + + nvmet_send_ana_event(ns->subsys, NULL); + return count; +} + +CONFIGFS_ATTR(nvmet_ns_, ana_grpid); + static ssize_t nvmet_ns_enable_show(struct config_item *item, char *page) { return sprintf(page, "%d\n", to_nvmet_ns(item)->enabled); @@ -468,6 +501,7 @@ static struct configfs_attribute *nvmet_ns_attrs[] = { &nvmet_ns_attr_device_path, &nvmet_ns_attr_device_nguid, &nvmet_ns_attr_device_uuid, + &nvmet_ns_attr_ana_grpid, &nvmet_ns_attr_enable, &nvmet_ns_attr_buffered_io, NULL, @@ -916,6 +950,134 @@ static const struct config_item_type nvmet_referrals_type = { .ct_group_ops = &nvmet_referral_group_ops, }; +static struct { + enum nvme_ana_state state; + const char *name; +} nvmet_ana_state_names[] = { + { NVME_ANA_OPTIMIZED, "optimized" }, + { NVME_ANA_NONOPTIMIZED, "non-optimized" }, + { NVME_ANA_INACCESSIBLE, "inaccessible" }, + { NVME_ANA_PERSISTENT_LOSS, "persistent-loss" }, + { NVME_ANA_CHANGE, "change" }, +}; + +static ssize_t nvmet_ana_group_ana_state_show(struct config_item *item, + char *page) +{ + struct nvmet_ana_group *grp = to_ana_group(item); + enum nvme_ana_state state = grp->port->ana_state[grp->grpid]; + int i; + + for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) { + if (state != nvmet_ana_state_names[i].state) + continue; + return sprintf(page, "%s\n", nvmet_ana_state_names[i].name); + } + + return sprintf(page, "\n"); +} + +static ssize_t nvmet_ana_group_ana_state_store(struct config_item *item, + const char *page, size_t count) +{ + struct nvmet_ana_group *grp = to_ana_group(item); + int i; + + for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) { + if (sysfs_streq(page, nvmet_ana_state_names[i].name)) + goto found; + } + + pr_err("Invalid value '%s' for ana_state\n", page); + return -EINVAL; + +found: + down_write(&nvmet_ana_sem); + grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state; + nvmet_ana_chgcnt++; + up_write(&nvmet_ana_sem); + + nvmet_port_send_ana_event(grp->port); + return count; +} + +CONFIGFS_ATTR(nvmet_ana_group_, ana_state); + +static struct configfs_attribute *nvmet_ana_group_attrs[] = { + &nvmet_ana_group_attr_ana_state, + NULL, +}; + +static void nvmet_ana_group_release(struct config_item *item) +{ + struct nvmet_ana_group *grp = to_ana_group(item); + + if (grp == &grp->port->ana_default_group) + return; + + down_write(&nvmet_ana_sem); + grp->port->ana_state[grp->grpid] = NVME_ANA_INACCESSIBLE; + nvmet_ana_group_enabled[grp->grpid]--; + up_write(&nvmet_ana_sem); + + nvmet_port_send_ana_event(grp->port); + kfree(grp); +} + +static struct configfs_item_operations nvmet_ana_group_item_ops = { + .release = nvmet_ana_group_release, +}; + +static const struct config_item_type nvmet_ana_group_type = { + .ct_item_ops = &nvmet_ana_group_item_ops, + .ct_attrs = nvmet_ana_group_attrs, + .ct_owner = THIS_MODULE, +}; + +static struct config_group *nvmet_ana_groups_make_group( + struct config_group *group, const char *name) +{ + struct nvmet_port *port = ana_groups_to_port(&group->cg_item); + struct nvmet_ana_group *grp; + u32 grpid; + int ret; + + ret = kstrtou32(name, 0, &grpid); + if (ret) + goto out; + + ret = -EINVAL; + if (grpid <= 1 || grpid > NVMET_MAX_ANAGRPS) + goto out; + + ret = -ENOMEM; + grp = kzalloc(sizeof(*grp), GFP_KERNEL); + if (!grp) + goto out; + grp->port = port; + grp->grpid = grpid; + + down_write(&nvmet_ana_sem); + nvmet_ana_group_enabled[grpid]++; + up_write(&nvmet_ana_sem); + + nvmet_port_send_ana_event(grp->port); + + config_group_init_type_name(&grp->group, name, &nvmet_ana_group_type); + return &grp->group; +out: + return ERR_PTR(ret); +} + +static struct configfs_group_operations nvmet_ana_groups_group_ops = { + .make_group = nvmet_ana_groups_make_group, +}; + +static const struct config_item_type nvmet_ana_groups_type = { + .ct_group_ops = &nvmet_ana_groups_group_ops, + .ct_owner = THIS_MODULE, +}; + /* * Ports definitions. */ @@ -952,6 +1114,7 @@ static struct config_group *nvmet_ports_make(struct config_group *group, { struct nvmet_port *port; u16 portid; + u32 i; if (kstrtou16(name, 0, &portid)) return ERR_PTR(-EINVAL); @@ -967,7 +1130,12 @@ static struct config_group *nvmet_ports_make(struct config_group *group, return ERR_PTR(-ENOMEM); } - port->ana_state[NVMET_DEFAULT_ANA_GRPID] = NVME_ANA_OPTIMIZED; + for (i = 1; i <= NVMET_MAX_ANAGRPS; i++) { + if (i == NVMET_DEFAULT_ANA_GRPID) + port->ana_state[1] = NVME_ANA_OPTIMIZED; + else + port->ana_state[i] = NVME_ANA_INACCESSIBLE; + } INIT_LIST_HEAD(&port->entry); INIT_LIST_HEAD(&port->subsystems); @@ -985,6 +1153,18 @@ static struct config_group *nvmet_ports_make(struct config_group *group, "referrals", &nvmet_referrals_type); configfs_add_default_group(&port->referrals_group, &port->group); + config_group_init_type_name(&port->ana_groups_group, + "ana_groups", &nvmet_ana_groups_type); + configfs_add_default_group(&port->ana_groups_group, &port->group); + + port->ana_default_group.port = port; + port->ana_default_group.grpid = NVMET_DEFAULT_ANA_GRPID; + config_group_init_type_name(&port->ana_default_group.group, + __stringify(NVMET_DEFAULT_ANA_GRPID), + &nvmet_ana_group_type); + configfs_add_default_group(&port->ana_default_group.group, + &port->ana_groups_group); + return &port->group; } diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 43a755f7baa5..3ceb7a03bb2a 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -194,6 +194,33 @@ static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) } } +void nvmet_send_ana_event(struct nvmet_subsys *subsys, + struct nvmet_port *port) +{ + struct nvmet_ctrl *ctrl; + + mutex_lock(&subsys->lock); + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { + if (port && ctrl->port != port) + continue; + if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE)) + continue; + nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, + NVME_AER_NOTICE_ANA, NVME_LOG_ANA); + } + mutex_unlock(&subsys->lock); +} + +void nvmet_port_send_ana_event(struct nvmet_port *port) +{ + struct nvmet_subsys_link *p; + + down_read(&nvmet_config_sem); + list_for_each_entry(p, &port->subsystems, entry) + nvmet_send_ana_event(p->subsys, port); + up_read(&nvmet_config_sem); +} + int nvmet_register_transport(const struct nvmet_fabrics_ops *ops) { int ret = 0; diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index f7d622fc1aa7..22941045f46e 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -30,12 +30,11 @@ #define NVMET_ASYNC_EVENTS 4 #define NVMET_ERROR_LOG_SLOTS 128 - /* * Supported optional AENs: */ #define NVMET_AEN_CFG_OPTIONAL \ - NVME_AEN_CFG_NS_ATTR + (NVME_AEN_CFG_NS_ATTR | NVME_AEN_CFG_ANA_CHANGE) /* * Plus mandatory SMART AENs (we'll never send them, but allow enabling them): @@ -99,6 +98,18 @@ struct nvmet_sq { struct completion confirm_done; }; +struct nvmet_ana_group { + struct config_group group; + struct nvmet_port *port; + u32 grpid; +}; + +static inline struct nvmet_ana_group *to_ana_group(struct config_item *item) +{ + return container_of(to_config_group(item), struct nvmet_ana_group, + group); +} + /** * struct nvmet_port - Common structure to keep port * information for the target. @@ -116,6 +127,8 @@ struct nvmet_port { struct list_head subsystems; struct config_group referrals_group; struct list_head referrals; + struct config_group ana_groups_group; + struct nvmet_ana_group ana_default_group; enum nvme_ana_state *ana_state; void *priv; bool enabled; @@ -128,6 +141,13 @@ static inline struct nvmet_port *to_nvmet_port(struct config_item *item) group); } +static inline struct nvmet_port *ana_groups_to_port( + struct config_item *item) +{ + return container_of(to_config_group(item), struct nvmet_port, + ana_groups_group); +} + struct nvmet_ctrl { struct nvmet_subsys *subsys; struct nvmet_cq **cqs; @@ -345,6 +365,10 @@ void nvmet_ns_disable(struct nvmet_ns *ns); struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid); void nvmet_ns_free(struct nvmet_ns *ns); +void nvmet_send_ana_event(struct nvmet_subsys *subsys, + struct nvmet_port *port); +void nvmet_port_send_ana_event(struct nvmet_port *port); + int nvmet_register_transport(const struct nvmet_fabrics_ops *ops); void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops); @@ -378,7 +402,7 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd); * ANA Group 1 exists without manual intervention, has namespaces assigned to it * by default, and is available in an optimized state through all ports. */ -#define NVMET_MAX_ANAGRPS 1 +#define NVMET_MAX_ANAGRPS 128 #define NVMET_DEFAULT_ANA_GRPID 1 #define NVMET_KAS 10 -- cgit v1.2.3 From b369b30cf510fe94d8884837039362e2ec223cec Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 26 Jul 2018 14:00:41 -0700 Subject: nvmet: use Retain Async Event bit to clear AEN In the current implementation, we clear the AEN bit when we get the "get log page" command if given log page is associated with AEN. This patch allows optionally retaining the AEN for the ctrl under consideration when Retain Asynchronous Event (RAE) bit is set as a part of "get log page" command. This allows the host to read the Log page and optionally retaining the AEN associated with this log page when using userspace tools like nvme-cli. Signed-off-by: Chaitanya Kulkarni [hch: also use the new helper in the just merged ANA code] Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index d1de639786ee..f517bc562d26 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -19,6 +19,19 @@ #include #include "nvmet.h" +/* + * This helper allows us to clear the AEN based on the RAE bit, + * Please use this helper when processing the log pages which are + * associated with the AEN. + */ +static inline void nvmet_clear_aen(struct nvmet_req *req, u32 aen_bit) +{ + int rae = le32_to_cpu(req->cmd->common.cdw10[0]) & 1 << 15; + + if (!rae) + clear_bit(aen_bit, &req->sq->ctrl->aen_masked); +} + u32 nvmet_get_log_page_len(struct nvme_command *cmd) { u32 len = le16_to_cpu(cmd->get_log_page.numdu); @@ -176,7 +189,7 @@ static void nvmet_execute_get_log_changed_ns(struct nvmet_req *req) if (!status) status = nvmet_zero_sgl(req, len, req->data_len - len); ctrl->nr_changed_ns = 0; - clear_bit(NVME_AEN_CFG_NS_ATTR, &ctrl->aen_masked); + nvmet_clear_aen(req, NVME_AEN_CFG_NS_ATTR); mutex_unlock(&ctrl->lock); out: nvmet_req_complete(req, status); @@ -235,7 +248,7 @@ static void nvmet_execute_get_log_page_ana(struct nvmet_req *req) hdr.chgcnt = cpu_to_le64(nvmet_ana_chgcnt); hdr.ngrps = cpu_to_le16(ngrps); - clear_bit(NVME_AEN_CFG_ANA_CHANGE, &req->sq->ctrl->aen_masked); + nvmet_clear_aen(req, NVME_AEN_CFG_ANA_CHANGE); up_read(&nvmet_ana_sem); kfree(desc); -- cgit v1.2.3 From dedf0be544614b6d9d395e78d72cc8c30d03e440 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Tue, 7 Aug 2018 23:01:07 -0700 Subject: nvmet: add ns write protect support This patch implements the Namespace Write Protect feature described in "NVMe TP 4005a Namespace Write Protect". In this version, we implement No Write Protect and Write Protect states for target ns which can be toggled by set-features commands from the host side. For write-protect state transition, we need to flush the ns specified as a part of command so we also add helpers for carrying out synchronous flush operations. Signed-off-by: Chaitanya Kulkarni [hch: fixed an incorrect endianess conversion, minor cleanups] Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 76 +++++++++++++++++++++++++++++++++++++++ drivers/nvme/target/core.c | 20 ++++++++++- drivers/nvme/target/io-cmd-bdev.c | 7 ++++ drivers/nvme/target/io-cmd-file.c | 12 ++++--- drivers/nvme/target/nvmet.h | 4 +++ 5 files changed, 114 insertions(+), 5 deletions(-) (limited to 'drivers/nvme/target') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index f517bc562d26..a21caea1e080 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -372,6 +372,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) id->psd[0].entry_lat = cpu_to_le32(0x10); id->psd[0].exit_lat = cpu_to_le32(0x4); + id->nwpc = 1 << 0; /* write protect and no write protect */ + status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); kfree(id); @@ -433,6 +435,8 @@ static void nvmet_execute_identify_ns(struct nvmet_req *req) id->lbaf[0].ds = ns->blksize_shift; + if (ns->readonly) + id->nsattr |= (1 << 0); nvmet_put_namespace(ns); done: status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id)); @@ -545,6 +549,52 @@ static void nvmet_execute_abort(struct nvmet_req *req) nvmet_req_complete(req, 0); } +static u16 nvmet_write_protect_flush_sync(struct nvmet_req *req) +{ + u16 status; + + if (req->ns->file) + status = nvmet_file_flush(req); + else + status = nvmet_bdev_flush(req); + + if (status) + pr_err("write protect flush failed nsid: %u\n", req->ns->nsid); + return status; +} + +static u16 nvmet_set_feat_write_protect(struct nvmet_req *req) +{ + u32 write_protect = le32_to_cpu(req->cmd->common.cdw10[1]); + struct nvmet_subsys *subsys = req->sq->ctrl->subsys; + u16 status = NVME_SC_FEATURE_NOT_CHANGEABLE; + + req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->rw.nsid); + if (unlikely(!req->ns)) + return status; + + mutex_lock(&subsys->lock); + switch (write_protect) { + case NVME_NS_WRITE_PROTECT: + req->ns->readonly = true; + status = nvmet_write_protect_flush_sync(req); + if (status) + req->ns->readonly = false; + break; + case NVME_NS_NO_WRITE_PROTECT: + req->ns->readonly = false; + status = 0; + break; + default: + break; + } + + if (!status) + nvmet_ns_changed(subsys, req->ns->nsid); + mutex_unlock(&subsys->lock); + return status; +} + static void nvmet_execute_set_features(struct nvmet_req *req) { struct nvmet_subsys *subsys = req->sq->ctrl->subsys; @@ -575,6 +625,9 @@ static void nvmet_execute_set_features(struct nvmet_req *req) case NVME_FEAT_HOST_ID: status = NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR; break; + case NVME_FEAT_WRITE_PROTECT: + status = nvmet_set_feat_write_protect(req); + break; default: status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; break; @@ -583,6 +636,26 @@ static void nvmet_execute_set_features(struct nvmet_req *req) nvmet_req_complete(req, status); } +static u16 nvmet_get_feat_write_protect(struct nvmet_req *req) +{ + struct nvmet_subsys *subsys = req->sq->ctrl->subsys; + u32 result; + + req->ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->common.nsid); + if (!req->ns) + return NVME_SC_INVALID_NS | NVME_SC_DNR; + + mutex_lock(&subsys->lock); + if (req->ns->readonly == true) + result = NVME_NS_WRITE_PROTECT; + else + result = NVME_NS_NO_WRITE_PROTECT; + nvmet_set_result(req, result); + mutex_unlock(&subsys->lock); + + return 0; +} + static void nvmet_execute_get_features(struct nvmet_req *req) { struct nvmet_subsys *subsys = req->sq->ctrl->subsys; @@ -634,6 +707,9 @@ static void nvmet_execute_get_features(struct nvmet_req *req) status = nvmet_copy_to_sgl(req, 0, &req->sq->ctrl->hostid, sizeof(req->sq->ctrl->hostid)); break; + case NVME_FEAT_WRITE_PROTECT: + status = nvmet_get_feat_write_protect(req); + break; default: status = NVME_SC_INVALID_FIELD | NVME_SC_DNR; break; diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 3ceb7a03bb2a..14b4c4916a8e 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -180,7 +180,7 @@ out_unlock: mutex_unlock(&ctrl->lock); } -static void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) +void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid) { struct nvmet_ctrl *ctrl; @@ -609,6 +609,21 @@ static inline u16 nvmet_check_ana_state(struct nvmet_port *port, return 0; } +static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req) +{ + if (unlikely(req->ns->readonly)) { + switch (req->cmd->common.opcode) { + case nvme_cmd_read: + case nvme_cmd_flush: + break; + default: + return NVME_SC_NS_WRITE_PROTECTED; + } + } + + return 0; +} + static u16 nvmet_parse_io_cmd(struct nvmet_req *req) { struct nvme_command *cmd = req->cmd; @@ -622,6 +637,9 @@ static u16 nvmet_parse_io_cmd(struct nvmet_req *req) if (unlikely(!req->ns)) return NVME_SC_INVALID_NS | NVME_SC_DNR; ret = nvmet_check_ana_state(req->port, req->ns); + if (unlikely(ret)) + return ret; + ret = nvmet_io_cmd_check_access(req); if (unlikely(ret)) return ret; diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c index e0b0f7df70c2..7bc9f6240432 100644 --- a/drivers/nvme/target/io-cmd-bdev.c +++ b/drivers/nvme/target/io-cmd-bdev.c @@ -124,6 +124,13 @@ static void nvmet_bdev_execute_flush(struct nvmet_req *req) submit_bio(bio); } +u16 nvmet_bdev_flush(struct nvmet_req *req) +{ + if (blkdev_issue_flush(req->ns->bdev, GFP_KERNEL, NULL)) + return NVME_SC_INTERNAL | NVME_SC_DNR; + return 0; +} + static u16 nvmet_bdev_discard_range(struct nvmet_ns *ns, struct nvme_dsm_range *range, struct bio **bio) { diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index c2d0d08b59c8..81a9dc5290a8 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -211,14 +211,18 @@ static void nvmet_file_execute_rw_buffered_io(struct nvmet_req *req) queue_work(buffered_io_wq, &req->f.work); } +u16 nvmet_file_flush(struct nvmet_req *req) +{ + if (vfs_fsync(req->ns->file, 1) < 0) + return NVME_SC_INTERNAL | NVME_SC_DNR; + return 0; +} + static void nvmet_file_flush_work(struct work_struct *w) { struct nvmet_req *req = container_of(w, struct nvmet_req, f.work); - int ret; - - ret = vfs_fsync(req->ns->file, 1); - nvmet_req_complete(req, ret < 0 ? NVME_SC_INTERNAL | NVME_SC_DNR : 0); + nvmet_req_complete(req, nvmet_file_flush(req)); } static void nvmet_file_execute_flush(struct nvmet_req *req) diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index 22941045f46e..ec9af4ee03b6 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -58,6 +58,7 @@ struct nvmet_ns { struct percpu_ref ref; struct block_device *bdev; struct file *file; + bool readonly; u32 nsid; u32 blksize_shift; loff_t size; @@ -429,6 +430,9 @@ int nvmet_bdev_ns_enable(struct nvmet_ns *ns); int nvmet_file_ns_enable(struct nvmet_ns *ns); void nvmet_bdev_ns_disable(struct nvmet_ns *ns); void nvmet_file_ns_disable(struct nvmet_ns *ns); +u16 nvmet_bdev_flush(struct nvmet_req *req); +u16 nvmet_file_flush(struct nvmet_req *req); +void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); static inline u32 nvmet_rw_len(struct nvmet_req *req) { -- cgit v1.2.3