summaryrefslogtreecommitdiffstats
path: root/drivers/block
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2023-05-19 14:50:30 +0800
committerJens Axboe <axboe@kernel.dk>2023-05-19 19:59:17 -0600
commit1172d5b8beca6b899deb9f7f2850e7e47ec16198 (patch)
treeb4e9738db856c741a58af05f3d4172508fb81039 /drivers/block
parent62fe99cef94a5900cac3bf15fd03ee8baad1a99c (diff)
downloadlinux-1172d5b8beca6b899deb9f7f2850e7e47ec16198.tar.gz
linux-1172d5b8beca6b899deb9f7f2850e7e47ec16198.tar.bz2
linux-1172d5b8beca6b899deb9f7f2850e7e47ec16198.zip
ublk: support user copy
Currently copy between io request buffer(pages) and userspace buffer is done inside ublk_map_io() or ublk_unmap_io(). This way performs very well in case of pre-allocated userspace io buffer. For dynamically allocated or external userspace backend io buffer, UBLK_F_NEED_GET_DATA is added for ublk server to provide buffer by one extra command communication for WRITE request. For READ, userspace simply provides buffer, but can't know when the buffer is done[1]. Add UBLK_F_USER_COPY by moving io data copy out of kernel by providing read()/write() on /dev/ublkcN, and simply let ublk server do the io data copy. This way makes both side cleaner, the cost is that one extra syscall for copy io data between request and backend buffer. With UBLK_F_USER_COPY, it actually becomes possible to run per-io zero copy now, such as, only do zero copy for big size IO, so it can be thought as one prep patch for supporting zero copy. Meantime zero copy still needs to expose read()/write() buffer for some corner case, such as passthrough IO. [1] READ buffer in UBLK_F_NEED_GET_DATA https://lore.kernel.org/linux-block/116d8a56-0881-56d3-9bcc-78ff3e1dc4e5@linux.alibaba.com/T/#m23bd4b8634c0a054e6797063167b469949a247bb ublksrv loop usercopy code: https://github.com/ming1/ubdsrv/commits/usercopy Signed-off-by: Ming Lei <ming.lei@redhat.com> Link: https://lore.kernel.org/r/20230519065030.351216-8-ming.lei@redhat.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/ublk_drv.c58
1 files changed, 47 insertions, 11 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index ec40ac4f9af3..e00733b6fea8 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -55,7 +55,8 @@
| UBLK_F_USER_RECOVERY \
| UBLK_F_USER_RECOVERY_REISSUE \
| UBLK_F_UNPRIVILEGED_DEV \
- | UBLK_F_CMD_IOCTL_ENCODE)
+ | UBLK_F_CMD_IOCTL_ENCODE \
+ | UBLK_F_USER_COPY)
/* All UBLK_PARAM_TYPE_* should be included here */
#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | \
@@ -312,9 +313,18 @@ static int ublk_apply_params(struct ublk_device *ub)
return 0;
}
+static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
+{
+ return ubq->flags & UBLK_F_USER_COPY;
+}
+
static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
{
- return false;
+ /*
+ * read()/write() is involved in user copy, so request reference
+ * has to be grabbed
+ */
+ return ublk_support_user_copy(ubq);
}
static inline void ublk_init_req_ref(const struct ublk_queue *ubq,
@@ -591,6 +601,9 @@ static int ublk_map_io(const struct ublk_queue *ubq, const struct request *req,
{
const unsigned int rq_bytes = blk_rq_bytes(req);
+ if (ublk_support_user_copy(ubq))
+ return rq_bytes;
+
/*
* no zero copy, we delay copy WRITE request data into ublksrv
* context and the big benefit is that pinning pages in current
@@ -615,6 +628,9 @@ static int ublk_unmap_io(const struct ublk_queue *ubq,
{
const unsigned int rq_bytes = blk_rq_bytes(req);
+ if (ublk_support_user_copy(ubq))
+ return rq_bytes;
+
if (ublk_need_unmap_req(req)) {
struct iov_iter iter;
struct iovec iov;
@@ -1390,6 +1406,11 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
^ (_IOC_NR(cmd_op) == UBLK_IO_NEED_GET_DATA))
goto out;
+ if (ublk_support_user_copy(ubq) && ub_cmd->addr) {
+ ret = -EINVAL;
+ goto out;
+ }
+
ret = ublk_check_cmd_op(cmd_op);
if (ret)
goto out;
@@ -1408,23 +1429,34 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
*/
if (io->flags & UBLK_IO_FLAG_OWNED_BY_SRV)
goto out;
- /* FETCH_RQ has to provide IO buffer if NEED GET DATA is not enabled */
- if (!ub_cmd->addr && !ublk_need_get_data(ubq))
- goto out;
+
+ if (!ublk_support_user_copy(ubq)) {
+ /*
+ * FETCH_RQ has to provide IO buffer if NEED GET
+ * DATA is not enabled
+ */
+ if (!ub_cmd->addr && !ublk_need_get_data(ubq))
+ goto out;
+ }
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
ublk_mark_io_ready(ub, ubq);
break;
case UBLK_IO_COMMIT_AND_FETCH_REQ:
req = blk_mq_tag_to_rq(ub->tag_set.tags[ub_cmd->q_id], tag);
- /*
- * COMMIT_AND_FETCH_REQ has to provide IO buffer if NEED GET DATA is
- * not enabled or it is Read IO.
- */
- if (!ub_cmd->addr && (!ublk_need_get_data(ubq) || req_op(req) == REQ_OP_READ))
- goto out;
+
if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
goto out;
+
+ if (!ublk_support_user_copy(ubq)) {
+ /*
+ * COMMIT_AND_FETCH_REQ has to provide IO buffer if
+ * NEED GET DATA is not enabled or it is Read IO.
+ */
+ if (!ub_cmd->addr && (!ublk_need_get_data(ubq) ||
+ req_op(req) == REQ_OP_READ))
+ goto out;
+ }
ublk_fill_io_cmd(io, cmd, ub_cmd->addr);
ublk_commit_completion(ub, ub_cmd);
break;
@@ -1996,6 +2028,10 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
ub->dev_info.flags |= UBLK_F_CMD_IOCTL_ENCODE |
UBLK_F_URING_CMD_COMP_IN_TASK;
+ /* GET_DATA isn't needed any more with USER_COPY */
+ if (ub->dev_info.flags & UBLK_F_USER_COPY)
+ ub->dev_info.flags &= ~UBLK_F_NEED_GET_DATA;
+
/* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;