summaryrefslogtreecommitdiffstats
path: root/drivers/block/ublk_drv.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/ublk_drv.c')
-rw-r--r--drivers/block/ublk_drv.c348
1 files changed, 303 insertions, 45 deletions
diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
index 3f1906965ac8..2b7d1db5c4a7 100644
--- a/drivers/block/ublk_drv.c
+++ b/drivers/block/ublk_drv.c
@@ -47,7 +47,12 @@
#define UBLK_MINORS (1U << MINORBITS)
/* All UBLK_F_* have to be included into UBLK_F_ALL */
-#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_URING_CMD_COMP_IN_TASK)
+#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
+ | UBLK_F_URING_CMD_COMP_IN_TASK \
+ | UBLK_F_NEED_GET_DATA)
+
+/* All UBLK_PARAM_TYPE_* should be included here */
+#define UBLK_PARAM_TYPE_ALL (UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD)
struct ublk_rq_data {
struct callback_head work;
@@ -86,6 +91,15 @@ struct ublk_uring_cmd_pdu {
*/
#define UBLK_IO_FLAG_ABORTED 0x04
+/*
+ * UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires
+ * get data buffer address from ublksrv.
+ *
+ * Then, bio data could be copied into this data buffer for a WRITE request
+ * after the IO command is issued again and UBLK_IO_FLAG_NEED_GET_DATA is unset.
+ */
+#define UBLK_IO_FLAG_NEED_GET_DATA 0x08
+
struct ublk_io {
/* userspace buffer address from io cmd */
__u64 addr;
@@ -119,7 +133,6 @@ struct ublk_device {
char *__queues;
unsigned short queue_size;
- unsigned short bs_shift;
struct ublksrv_ctrl_dev_info dev_info;
struct blk_mq_tag_set tag_set;
@@ -137,6 +150,8 @@ struct ublk_device {
spinlock_t mm_lock;
struct mm_struct *mm;
+ struct ublk_params params;
+
struct completion completion;
unsigned int nr_queues_ready;
atomic_t nr_aborted_queues;
@@ -149,6 +164,12 @@ struct ublk_device {
struct work_struct stop_work;
};
+/* header of ublk_params */
+struct ublk_params_header {
+ __u32 len;
+ __u32 types;
+};
+
static dev_t ublk_chr_devt;
static struct class *ublk_chr_class;
@@ -160,6 +181,90 @@ static DEFINE_MUTEX(ublk_ctl_mutex);
static struct miscdevice ublk_misc;
+static void ublk_dev_param_basic_apply(struct ublk_device *ub)
+{
+ struct request_queue *q = ub->ub_disk->queue;
+ const struct ublk_param_basic *p = &ub->params.basic;
+
+ blk_queue_logical_block_size(q, 1 << p->logical_bs_shift);
+ blk_queue_physical_block_size(q, 1 << p->physical_bs_shift);
+ blk_queue_io_min(q, 1 << p->io_min_shift);
+ blk_queue_io_opt(q, 1 << p->io_opt_shift);
+
+ blk_queue_write_cache(q, p->attrs & UBLK_ATTR_VOLATILE_CACHE,
+ p->attrs & UBLK_ATTR_FUA);
+ if (p->attrs & UBLK_ATTR_ROTATIONAL)
+ blk_queue_flag_clear(QUEUE_FLAG_NONROT, q);
+ else
+ blk_queue_flag_set(QUEUE_FLAG_NONROT, q);
+
+ blk_queue_max_hw_sectors(q, p->max_sectors);
+ blk_queue_chunk_sectors(q, p->chunk_sectors);
+ blk_queue_virt_boundary(q, p->virt_boundary_mask);
+
+ if (p->attrs & UBLK_ATTR_READ_ONLY)
+ set_disk_ro(ub->ub_disk, true);
+
+ set_capacity(ub->ub_disk, p->dev_sectors);
+}
+
+static void ublk_dev_param_discard_apply(struct ublk_device *ub)
+{
+ struct request_queue *q = ub->ub_disk->queue;
+ const struct ublk_param_discard *p = &ub->params.discard;
+
+ q->limits.discard_alignment = p->discard_alignment;
+ q->limits.discard_granularity = p->discard_granularity;
+ blk_queue_max_discard_sectors(q, p->max_discard_sectors);
+ blk_queue_max_write_zeroes_sectors(q,
+ p->max_write_zeroes_sectors);
+ blk_queue_max_discard_segments(q, p->max_discard_segments);
+}
+
+static int ublk_validate_params(const struct ublk_device *ub)
+{
+ /* basic param is the only one which must be set */
+ if (ub->params.types & UBLK_PARAM_TYPE_BASIC) {
+ const struct ublk_param_basic *p = &ub->params.basic;
+
+ if (p->logical_bs_shift > PAGE_SHIFT)
+ return -EINVAL;
+
+ if (p->logical_bs_shift > p->physical_bs_shift)
+ return -EINVAL;
+
+ if (p->max_sectors > (ub->dev_info.max_io_buf_bytes >> 9))
+ return -EINVAL;
+ } else
+ return -EINVAL;
+
+ if (ub->params.types & UBLK_PARAM_TYPE_DISCARD) {
+ const struct ublk_param_discard *p = &ub->params.discard;
+
+ /* So far, only support single segment discard */
+ if (p->max_discard_sectors && p->max_discard_segments != 1)
+ return -EINVAL;
+
+ if (!p->discard_granularity)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int ublk_apply_params(struct ublk_device *ub)
+{
+ if (!(ub->params.types & UBLK_PARAM_TYPE_BASIC))
+ return -EINVAL;
+
+ ublk_dev_param_basic_apply(ub);
+
+ if (ub->params.types & UBLK_PARAM_TYPE_DISCARD)
+ ublk_dev_param_discard_apply(ub);
+
+ return 0;
+}
+
static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
{
if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
@@ -168,6 +273,13 @@ static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
return false;
}
+static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
+{
+ if (ubq->flags & UBLK_F_NEED_GET_DATA)
+ return true;
+ return false;
+}
+
static struct ublk_device *ublk_get_device(struct ublk_device *ub)
{
if (kobject_get_unless_zero(&ub->cdev_dev.kobj))
@@ -509,6 +621,21 @@ static void __ublk_fail_req(struct ublk_io *io, struct request *req)
}
}
+static void ubq_complete_io_cmd(struct ublk_io *io, int res)
+{
+ /* mark this cmd owned by ublksrv */
+ io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
+
+ /*
+ * clear ACTIVE since we are done with this sqe/cmd slot
+ * We can only accept io cmd in case of being not active.
+ */
+ io->flags &= ~UBLK_IO_FLAG_ACTIVE;
+
+ /* tell ublksrv one io request is coming */
+ io_uring_cmd_done(io->cmd, res, 0);
+}
+
#define UBLK_REQUEUE_DELAY_MS 3
static inline void __ublk_rq_task_work(struct request *req)
@@ -531,6 +658,30 @@ static inline void __ublk_rq_task_work(struct request *req)
return;
}
+ if (ublk_need_get_data(ubq) &&
+ (req_op(req) == REQ_OP_WRITE ||
+ req_op(req) == REQ_OP_FLUSH)) {
+ /*
+ * We have not handled UBLK_IO_NEED_GET_DATA command yet,
+ * so immepdately pass UBLK_IO_RES_NEED_GET_DATA to ublksrv
+ * and notify it.
+ */
+ if (!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) {
+ io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
+ pr_devel("%s: need get data. op %d, qid %d tag %d io_flags %x\n",
+ __func__, io->cmd->cmd_op, ubq->q_id,
+ req->tag, io->flags);
+ ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA);
+ return;
+ }
+ /*
+ * We have handled UBLK_IO_NEED_GET_DATA command,
+ * so clear UBLK_IO_FLAG_NEED_GET_DATA now and just
+ * do the copy work.
+ */
+ io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
+ }
+
mapped_bytes = ublk_map_io(ubq, req, io);
/* partially mapped, update io descriptor */
@@ -553,17 +704,7 @@ static inline void __ublk_rq_task_work(struct request *req)
mapped_bytes >> 9;
}
- /* mark this cmd owned by ublksrv */
- io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
-
- /*
- * clear ACTIVE since we are done with this sqe/cmd slot
- * We can only accept io cmd in case of being not active.
- */
- io->flags &= ~UBLK_IO_FLAG_ACTIVE;
-
- /* tell ublksrv one io request is coming */
- io_uring_cmd_done(io->cmd, UBLK_IO_RES_OK, 0);
+ ubq_complete_io_cmd(io, UBLK_IO_RES_OK);
}
static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
@@ -788,16 +929,27 @@ static void ublk_daemon_monitor_work(struct work_struct *work)
UBLK_DAEMON_MONITOR_PERIOD);
}
+static inline bool ublk_queue_ready(struct ublk_queue *ubq)
+{
+ return ubq->nr_io_ready == ubq->q_depth;
+}
+
static void ublk_cancel_queue(struct ublk_queue *ubq)
{
int i;
+ if (!ublk_queue_ready(ubq))
+ return;
+
for (i = 0; i < ubq->q_depth; i++) {
struct ublk_io *io = &ubq->ios[i];
if (io->flags & UBLK_IO_FLAG_ACTIVE)
io_uring_cmd_done(io->cmd, UBLK_IO_RES_ABORT, 0);
}
+
+ /* all io commands are canceled */
+ ubq->nr_io_ready = 0;
}
/* Cancel all pending commands, must be called after del_gendisk() returns */
@@ -818,19 +970,14 @@ static void ublk_stop_dev(struct ublk_device *ub)
del_gendisk(ub->ub_disk);
ub->dev_info.state = UBLK_S_DEV_DEAD;
ub->dev_info.ublksrv_pid = -1;
- ublk_cancel_dev(ub);
put_disk(ub->ub_disk);
ub->ub_disk = NULL;
unlock:
+ ublk_cancel_dev(ub);
mutex_unlock(&ub->mutex);
cancel_delayed_work_sync(&ub->monitor_work);
}
-static inline bool ublk_queue_ready(struct ublk_queue *ubq)
-{
- return ubq->nr_io_ready == ubq->q_depth;
-}
-
/* device can only be started after all IOs are ready */
static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
{
@@ -846,6 +993,25 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
mutex_unlock(&ub->mutex);
}
+static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
+ int tag, struct io_uring_cmd *cmd)
+{
+ struct ublk_queue *ubq = ublk_get_queue(ub, q_id);
+ struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
+
+ if (ublk_can_use_task_work(ubq)) {
+ struct ublk_rq_data *data = blk_mq_rq_to_pdu(req);
+
+ /* should not fail since we call it just in ubq->ubq_daemon */
+ task_work_add(ubq->ubq_daemon, &data->work, TWA_SIGNAL_NO_IPI);
+ } else {
+ struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
+
+ pdu->req = req;
+ io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
+ }
+}
+
static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
{
struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
@@ -884,6 +1050,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
goto out;
}
+ /*
+ * ensure that the user issues UBLK_IO_NEED_GET_DATA
+ * iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
+ */
+ if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA))
+ ^ (cmd_op == UBLK_IO_NEED_GET_DATA))
+ goto out;
+
switch (cmd_op) {
case UBLK_IO_FETCH_REQ:
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
@@ -917,6 +1091,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
io->cmd = cmd;
ublk_commit_completion(ub, ub_cmd);
break;
+ case UBLK_IO_NEED_GET_DATA:
+ if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
+ goto out;
+ io->addr = ub_cmd->addr;
+ io->cmd = cmd;
+ io->flags |= UBLK_IO_FLAG_ACTIVE;
+ ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd);
+ break;
default:
goto out;
}
@@ -1083,13 +1265,13 @@ static void ublk_stop_work_fn(struct work_struct *work)
ublk_stop_dev(ub);
}
-/* align maximum I/O size to PAGE_SIZE */
+/* align max io buffer size with PAGE_SIZE */
static void ublk_align_max_io_size(struct ublk_device *ub)
{
- unsigned int max_rq_bytes = ub->dev_info.rq_max_blocks << ub->bs_shift;
+ unsigned int max_io_bytes = ub->dev_info.max_io_buf_bytes;
- ub->dev_info.rq_max_blocks =
- round_down(max_rq_bytes, PAGE_SIZE) >> ub->bs_shift;
+ ub->dev_info.max_io_buf_bytes =
+ round_down(max_io_bytes, PAGE_SIZE);
}
static int ublk_add_tag_set(struct ublk_device *ub)
@@ -1132,7 +1314,6 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
{
struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
int ublksrv_pid = (int)header->data[0];
- unsigned long dev_blocks = header->data[1];
struct ublk_device *ub;
struct gendisk *disk;
int ret = -EINVAL;
@@ -1155,10 +1336,6 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
goto out_unlock;
}
- /* We may get disk size updated */
- if (dev_blocks)
- ub->dev_info.dev_blocks = dev_blocks;
-
disk = blk_mq_alloc_disk(&ub->tag_set, ub);
if (IS_ERR(disk)) {
ret = PTR_ERR(disk);
@@ -1168,27 +1345,28 @@ static int ublk_ctrl_start_dev(struct io_uring_cmd *cmd)
disk->fops = &ub_fops;
disk->private_data = ub;
- blk_queue_logical_block_size(disk->queue, ub->dev_info.block_size);
- blk_queue_physical_block_size(disk->queue, ub->dev_info.block_size);
- blk_queue_io_min(disk->queue, ub->dev_info.block_size);
- blk_queue_max_hw_sectors(disk->queue,
- ub->dev_info.rq_max_blocks << (ub->bs_shift - 9));
- disk->queue->limits.discard_granularity = PAGE_SIZE;
- blk_queue_max_discard_sectors(disk->queue, UINT_MAX >> 9);
- blk_queue_max_write_zeroes_sectors(disk->queue, UINT_MAX >> 9);
-
- set_capacity(disk, ub->dev_info.dev_blocks << (ub->bs_shift - 9));
-
ub->dev_info.ublksrv_pid = ublksrv_pid;
ub->ub_disk = disk;
+
+ ret = ublk_apply_params(ub);
+ if (ret)
+ goto out_put_disk;
+
get_device(&ub->cdev_dev);
ret = add_disk(disk);
if (ret) {
- put_disk(disk);
- goto out_unlock;
+ /*
+ * Has to drop the reference since ->free_disk won't be
+ * called in case of add_disk failure.
+ */
+ ublk_put_device(ub);
+ goto out_put_disk;
}
set_bit(UB_STATE_USED, &ub->state);
ub->dev_info.state = UBLK_S_DEV_LIVE;
+out_put_disk:
+ if (ret)
+ put_disk(disk);
out_unlock:
mutex_unlock(&ub->mutex);
ublk_put_device(ub);
@@ -1250,9 +1428,8 @@ static inline void ublk_dump_dev_info(struct ublksrv_ctrl_dev_info *info)
{
pr_devel("%s: dev id %d flags %llx\n", __func__,
info->dev_id, info->flags);
- pr_devel("\t nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
- info->nr_hw_queues, info->queue_depth,
- info->block_size, info->dev_blocks);
+ pr_devel("\t nr_hw_queues %d queue_depth %d\n",
+ info->nr_hw_queues, info->queue_depth);
}
static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
@@ -1312,7 +1489,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
/* We are not ready to support zero copy */
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
- ub->bs_shift = ilog2(ub->dev_info.block_size);
ub->dev_info.nr_hw_queues = min_t(unsigned int,
ub->dev_info.nr_hw_queues, nr_cpu_ids);
ublk_align_max_io_size(ub);
@@ -1436,6 +1612,82 @@ static int ublk_ctrl_get_dev_info(struct io_uring_cmd *cmd)
return ret;
}
+static int ublk_ctrl_get_params(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ void __user *argp = (void __user *)(unsigned long)header->addr;
+ struct ublk_params_header ph;
+ struct ublk_device *ub;
+ int ret;
+
+ if (header->len <= sizeof(ph) || !header->addr)
+ return -EINVAL;
+
+ if (copy_from_user(&ph, argp, sizeof(ph)))
+ return -EFAULT;
+
+ if (ph.len > header->len || !ph.len)
+ return -EINVAL;
+
+ if (ph.len > sizeof(struct ublk_params))
+ ph.len = sizeof(struct ublk_params);
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return -EINVAL;
+
+ mutex_lock(&ub->mutex);
+ if (copy_to_user(argp, &ub->params, ph.len))
+ ret = -EFAULT;
+ else
+ ret = 0;
+ mutex_unlock(&ub->mutex);
+
+ ublk_put_device(ub);
+ return ret;
+}
+
+static int ublk_ctrl_set_params(struct io_uring_cmd *cmd)
+{
+ struct ublksrv_ctrl_cmd *header = (struct ublksrv_ctrl_cmd *)cmd->cmd;
+ void __user *argp = (void __user *)(unsigned long)header->addr;
+ struct ublk_params_header ph;
+ struct ublk_device *ub;
+ int ret = -EFAULT;
+
+ if (header->len <= sizeof(ph) || !header->addr)
+ return -EINVAL;
+
+ if (copy_from_user(&ph, argp, sizeof(ph)))
+ return -EFAULT;
+
+ if (ph.len > header->len || !ph.len || !ph.types)
+ return -EINVAL;
+
+ if (ph.len > sizeof(struct ublk_params))
+ ph.len = sizeof(struct ublk_params);
+
+ ub = ublk_get_device_from_id(header->dev_id);
+ if (!ub)
+ return -EINVAL;
+
+ /* parameters can only be changed when device isn't live */
+ mutex_lock(&ub->mutex);
+ if (ub->dev_info.state == UBLK_S_DEV_LIVE) {
+ ret = -EACCES;
+ } else if (copy_from_user(&ub->params, argp, ph.len)) {
+ ret = -EFAULT;
+ } else {
+ /* clear all we don't support yet */
+ ub->params.types &= UBLK_PARAM_TYPE_ALL;
+ ret = ublk_validate_params(ub);
+ }
+ mutex_unlock(&ub->mutex);
+ ublk_put_device(ub);
+
+ return ret;
+}
+
static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
unsigned int issue_flags)
{
@@ -1471,6 +1723,12 @@ static int ublk_ctrl_uring_cmd(struct io_uring_cmd *cmd,
case UBLK_CMD_GET_QUEUE_AFFINITY:
ret = ublk_ctrl_get_queue_affinity(cmd);
break;
+ case UBLK_CMD_GET_PARAMS:
+ ret = ublk_ctrl_get_params(cmd);
+ break;
+ case UBLK_CMD_SET_PARAMS:
+ ret = ublk_ctrl_set_params(cmd);
+ break;
default:
break;
}