summaryrefslogtreecommitdiffstats
path: root/drivers/nvme/host
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2021-10-18 06:47:18 -0600
committerJens Axboe <axboe@kernel.dk>2021-10-19 12:41:09 -0600
commita9a7e30fd918588bc312ba782426e3a1282df359 (patch)
tree469362f97239ca9fb84910917ab683d26038da12 /drivers/nvme/host
parent9c3d29296fe4c297447d2055e7a9535c981a8370 (diff)
downloadlinux-stable-a9a7e30fd918588bc312ba782426e3a1282df359.tar.gz
linux-stable-a9a7e30fd918588bc312ba782426e3a1282df359.tar.bz2
linux-stable-a9a7e30fd918588bc312ba782426e3a1282df359.zip
nvme: don't memset() the normal read/write command
This memset in the fast path costs a lot of cycles on my setup. Here's a top-of-profile of doing ~6.7M IOPS: + 5.90% io_uring [nvme] [k] nvme_queue_rq + 5.32% io_uring [nvme_core] [k] nvme_setup_cmd + 5.17% io_uring [kernel.vmlinux] [k] io_submit_sqes + 4.97% io_uring [kernel.vmlinux] [k] blkdev_direct_IO and a perf diff with this patch: 0.92% +4.40% [nvme_core] [k] nvme_setup_cmd reducing it from 5.3% to only 0.9%. This takes it from the 2nd most cycle consumer to something that's mostly irrelevant. Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com> Reviewed-by: Keith Busch <kbusch@kernel.org> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/core.c8
1 files changed, 6 insertions, 2 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 824790bed2f5..c415c3faf420 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -929,8 +929,6 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
u16 control = 0;
u32 dsmgmt = 0;
- memset(cmnd, 0, sizeof(*cmnd));
-
if (req->cmd_flags & REQ_FUA)
control |= NVME_RW_FUA;
if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
@@ -940,9 +938,15 @@ static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
cmnd->rw.opcode = op;
+ cmnd->rw.flags = 0;
cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
+ cmnd->rw.rsvd2 = 0;
+ cmnd->rw.metadata = 0;
cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
+ cmnd->rw.reftag = 0;
+ cmnd->rw.apptag = 0;
+ cmnd->rw.appmask = 0;
if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);