diff options
Diffstat (limited to 'block/ioctl.c')
-rw-r--r-- | block/ioctl.c | 243 |
1 files changed, 202 insertions, 41 deletions
diff --git a/block/ioctl.c b/block/ioctl.c index a9028a2c2db5..6554b728bae6 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -11,6 +11,9 @@ #include <linux/blktrace_api.h> #include <linux/pr.h> #include <linux/uaccess.h> +#include <linux/pagemap.h> +#include <linux/io_uring/cmd.h> +#include <uapi/linux/blkdev.h> #include "blk.h" static int blkpg_do_ioctl(struct block_device *bdev, @@ -33,7 +36,7 @@ static int blkpg_do_ioctl(struct block_device *bdev, if (op == BLKPG_DEL_PARTITION) return bdev_del_partition(disk, p.pno); - if (p.start < 0 || p.length <= 0 || p.start + p.length < 0) + if (p.start < 0 || p.length <= 0 || LLONG_MAX - p.length < p.start) return -EINVAL; /* Check that the partition is aligned to the block size */ if (!IS_ALIGNED(p.start | p.length, bdev_logical_block_size(bdev))) @@ -92,49 +95,91 @@ static int compat_blkpg_ioctl(struct block_device *bdev, } #endif +/* + * Check that [start, start + len) is a valid range from the block device's + * perspective, including verifying that it can be correctly translated into + * logical block addresses. + */ +static int blk_validate_byte_range(struct block_device *bdev, + uint64_t start, uint64_t len) +{ + unsigned int bs_mask = bdev_logical_block_size(bdev) - 1; + uint64_t end; + + if ((start | len) & bs_mask) + return -EINVAL; + if (!len) + return -EINVAL; + if (check_add_overflow(start, len, &end) || end > bdev_nr_bytes(bdev)) + return -EINVAL; + + return 0; +} + static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode, unsigned long arg) { - uint64_t range[2]; - uint64_t start, len, end; - struct inode *inode = bdev->bd_inode; + uint64_t range[2], start, len; + struct bio *prev = NULL, *bio; + sector_t sector, nr_sects; + struct blk_plug plug; int err; - if (!(mode & BLK_OPEN_WRITE)) - return -EBADF; - - if (!bdev_max_discard_sectors(bdev)) - return -EOPNOTSUPP; - if (copy_from_user(range, (void __user *)arg, sizeof(range))) return -EFAULT; - start = range[0]; len = range[1]; - if (start & 511) - return -EINVAL; - if (len & 511) - return -EINVAL; + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; - if (check_add_overflow(start, len, &end) || - end > bdev_nr_bytes(bdev)) - return -EINVAL; + if (!(mode & BLK_OPEN_WRITE)) + return -EBADF; + if (bdev_read_only(bdev)) + return -EPERM; + err = blk_validate_byte_range(bdev, start, len); + if (err) + return err; - filemap_invalidate_lock(inode->i_mapping); + filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, start + len - 1); if (err) goto fail; - err = blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); + + sector = start >> SECTOR_SHIFT; + nr_sects = len >> SECTOR_SHIFT; + + blk_start_plug(&plug); + while (1) { + if (fatal_signal_pending(current)) { + if (prev) + bio_await_chain(prev); + err = -EINTR; + goto out_unplug; + } + bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, + GFP_KERNEL); + if (!bio) + break; + prev = bio_chain_and_submit(prev, bio); + } + if (prev) { + err = submit_bio_wait(prev); + if (err == -EOPNOTSUPP) + err = 0; + bio_put(prev); + } +out_unplug: + blk_finish_plug(&plug); fail: - filemap_invalidate_unlock(inode->i_mapping); + filemap_invalidate_unlock(bdev->bd_mapping); return err; } static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, void __user *argp) { - uint64_t start, len; + uint64_t start, len, end; uint64_t range[2]; int err; @@ -149,15 +194,16 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode, len = range[1]; if ((start & 511) || (len & 511)) return -EINVAL; - if (start + len > bdev_nr_bytes(bdev)) + if (check_add_overflow(start, len, &end) || + end > bdev_nr_bytes(bdev)) return -EINVAL; - filemap_invalidate_lock(bdev->bd_inode->i_mapping); - err = truncate_bdev_range(bdev, mode, start, start + len - 1); + filemap_invalidate_lock(bdev->bd_mapping); + err = truncate_bdev_range(bdev, mode, start, end - 1); if (!err) err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9, GFP_KERNEL); - filemap_invalidate_unlock(bdev->bd_inode->i_mapping); + filemap_invalidate_unlock(bdev->bd_mapping); return err; } @@ -167,7 +213,6 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, { uint64_t range[2]; uint64_t start, end, len; - struct inode *inode = bdev->bd_inode; int err; if (!(mode & BLK_OPEN_WRITE)) @@ -190,16 +235,16 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode, return -EINVAL; /* Invalidate the page cache, including dirty pages */ - filemap_invalidate_lock(inode->i_mapping); + filemap_invalidate_lock(bdev->bd_mapping); err = truncate_bdev_range(bdev, mode, start, end); if (err) goto fail; err = blkdev_issue_zeroout(bdev, start >> 9, len >> 9, GFP_KERNEL, - BLKDEV_ZERO_NOUNMAP); + BLKDEV_ZERO_NOUNMAP | BLKDEV_ZERO_KILLABLE); fail: - filemap_invalidate_unlock(inode->i_mapping); + filemap_invalidate_unlock(bdev->bd_mapping); return err; } @@ -403,7 +448,10 @@ static int blkdev_roset(struct block_device *bdev, unsigned cmd, if (ret) return ret; } - bdev->bd_read_only = n; + if (n) + bdev_set_flag(bdev, BD_READ_ONLY); + else + bdev_clear_flag(bdev, BD_READ_ONLY); return 0; } @@ -473,11 +521,14 @@ static int compat_hdio_getgeo(struct block_device *bdev, #endif /* set the logical block size */ -static int blkdev_bszset(struct block_device *bdev, blk_mode_t mode, +static int blkdev_bszset(struct file *file, blk_mode_t mode, int __user *argp) { + // this one might be file_inode(file)->i_rdev - a rare valid + // use of file_inode() for those. + dev_t dev = I_BDEV(file->f_mapping->host)->bd_dev; + struct file *excl_file; int ret, n; - struct file *file; if (!capable(CAP_SYS_ADMIN)) return -EACCES; @@ -487,13 +538,13 @@ static int blkdev_bszset(struct block_device *bdev, blk_mode_t mode, return -EFAULT; if (mode & BLK_OPEN_EXCL) - return set_blocksize(bdev, n); + return set_blocksize(file, n); - file = bdev_file_open_by_dev(bdev->bd_dev, mode, &bdev, NULL); - if (IS_ERR(file)) + excl_file = bdev_file_open_by_dev(dev, mode, &dev, NULL); + if (IS_ERR(excl_file)) return -EBUSY; - ret = set_blocksize(bdev, n); - fput(file); + ret = set_blocksize(excl_file, n); + fput(excl_file); return ret; } @@ -563,7 +614,8 @@ static int blkdev_common_ioctl(struct block_device *bdev, blk_mode_t mode, return -EACCES; if (bdev_is_partition(bdev)) return -EINVAL; - return disk_scan_partitions(bdev->bd_disk, mode); + return disk_scan_partitions(bdev->bd_disk, + mode | BLK_OPEN_STRICT_SCAN); case BLKTRACESTART: case BLKTRACESTOP: case BLKTRACETEARDOWN: @@ -621,7 +673,7 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ return put_int(argp, block_size(bdev)); case BLKBSZSET: - return blkdev_bszset(bdev, mode, argp); + return blkdev_bszset(file, mode, argp); case BLKGETSIZE64: return put_u64(argp, bdev_nr_bytes(bdev)); @@ -681,7 +733,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ return put_int(argp, bdev_logical_block_size(bdev)); case BLKBSZSET_32: - return blkdev_bszset(bdev, mode, argp); + return blkdev_bszset(file, mode, argp); case BLKGETSIZE64_32: return put_u64(argp, bdev_nr_bytes(bdev)); @@ -699,3 +751,112 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return ret; } #endif + +struct blk_iou_cmd { + int res; + bool nowait; +}; + +static void blk_cmd_complete(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + + if (bic->res == -EAGAIN && bic->nowait) + io_uring_cmd_issue_blocking(cmd); + else + io_uring_cmd_done(cmd, bic->res, 0, issue_flags); +} + +static void bio_cmd_bio_end_io(struct bio *bio) +{ + struct io_uring_cmd *cmd = bio->bi_private; + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + + if (unlikely(bio->bi_status) && !bic->res) + bic->res = blk_status_to_errno(bio->bi_status); + + io_uring_cmd_do_in_task_lazy(cmd, blk_cmd_complete); + bio_put(bio); +} + +static int blkdev_cmd_discard(struct io_uring_cmd *cmd, + struct block_device *bdev, + uint64_t start, uint64_t len, bool nowait) +{ + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + gfp_t gfp = nowait ? GFP_NOWAIT : GFP_KERNEL; + sector_t sector = start >> SECTOR_SHIFT; + sector_t nr_sects = len >> SECTOR_SHIFT; + struct bio *prev = NULL, *bio; + int err; + + if (!bdev_max_discard_sectors(bdev)) + return -EOPNOTSUPP; + if (!(file_to_blk_mode(cmd->file) & BLK_OPEN_WRITE)) + return -EBADF; + if (bdev_read_only(bdev)) + return -EPERM; + err = blk_validate_byte_range(bdev, start, len); + if (err) + return err; + + err = filemap_invalidate_pages(bdev->bd_mapping, start, + start + len - 1, nowait); + if (err) + return err; + + while (true) { + bio = blk_alloc_discard_bio(bdev, §or, &nr_sects, gfp); + if (!bio) + break; + if (nowait) { + /* + * Don't allow multi-bio non-blocking submissions as + * subsequent bios may fail but we won't get a direct + * indication of that. Normally, the caller should + * retry from a blocking context. + */ + if (unlikely(nr_sects)) { + bio_put(bio); + return -EAGAIN; + } + bio->bi_opf |= REQ_NOWAIT; + } + + prev = bio_chain_and_submit(prev, bio); + } + if (unlikely(!prev)) + return -EAGAIN; + if (unlikely(nr_sects)) + bic->res = -EAGAIN; + + prev->bi_private = cmd; + prev->bi_end_io = bio_cmd_bio_end_io; + submit_bio(prev); + return -EIOCBQUEUED; +} + +int blkdev_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) +{ + struct block_device *bdev = I_BDEV(cmd->file->f_mapping->host); + struct blk_iou_cmd *bic = io_uring_cmd_to_pdu(cmd, struct blk_iou_cmd); + const struct io_uring_sqe *sqe = cmd->sqe; + u32 cmd_op = cmd->cmd_op; + uint64_t start, len; + + if (unlikely(sqe->ioprio || sqe->__pad1 || sqe->len || + sqe->rw_flags || sqe->file_index)) + return -EINVAL; + + bic->res = 0; + bic->nowait = issue_flags & IO_URING_F_NONBLOCK; + + start = READ_ONCE(sqe->addr); + len = READ_ONCE(sqe->addr3); + + switch (cmd_op) { + case BLOCK_URING_CMD_DISCARD: + return blkdev_cmd_discard(cmd, bdev, start, len, bic->nowait); + } + return -EINVAL; +} |