From 99c79f6692ccdc42e04deea8a36e22bb48168a62 Mon Sep 17 00:00:00 2001 From: Aleix Roca Nonell Date: Thu, 15 Aug 2019 14:03:22 +0200 Subject: io_uring: fix manual setup of iov_iter for fixed buffers Commit bd11b3a391e3 ("io_uring: don't use iov_iter_advance() for fixed buffers") introduced an optimization to avoid using the slow iov_iter_advance by manually populating the iov_iter iterator in some cases. However, the computation of the iterator count field was erroneous: The first bvec was always accounted for an extent of page size even if the bvec length was smaller. In consequence, some I/O operations on fixed buffers were unable to operate on the full extent of the buffer, consistently skipping some bytes at the end of it. Fixes: bd11b3a391e3 ("io_uring: don't use iov_iter_advance() for fixed buffers") Cc: stable@vger.kernel.org Signed-off-by: Aleix Roca Nonell Signed-off-by: Jens Axboe --- fs/io_uring.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index d542f1cf4428..aa25b5bbd4ae 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -1097,10 +1097,8 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, iter->bvec = bvec + seg_skip; iter->nr_segs -= seg_skip; - iter->count -= (seg_skip << PAGE_SHIFT); + iter->count -= bvec->bv_len + offset; iter->iov_offset = offset & ~PAGE_MASK; - if (iter->iov_offset) - iter->count -= iter->iov_offset; } } -- cgit v1.2.3 From 7b6620d7db566a46f49b4b9deab9fa061fd4b59b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Aug 2019 11:09:16 -0600 Subject: block: remove REQ_NOWAIT_INLINE We had a few issues with this code, and there's still a problem around how we deal with error handling for chained/split bios. For now, just revert the code and we'll try again with a thoroug solution. This reverts commits: e15c2ffa1091 ("block: fix O_DIRECT error handling for bio fragments") 0eb6ddfb865c ("block: Fix __blkdev_direct_IO() for bio fragments") 6a43074e2f46 ("block: properly handle IOCB_NOWAIT for async O_DIRECT IO") 893a1c97205a ("blk-mq: allow REQ_NOWAIT to return an error inline") Signed-off-by: Jens Axboe --- fs/block_dev.c | 49 +++++-------------------------------------------- 1 file changed, 5 insertions(+), 44 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index eb657ab94060..677cb364d33f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -345,24 +345,15 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) struct bio *bio; bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; bool is_read = (iov_iter_rw(iter) == READ), is_sync; - bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0; loff_t pos = iocb->ki_pos; blk_qc_t qc = BLK_QC_T_NONE; - gfp_t gfp; - int ret; + int ret = 0; if ((pos | iov_iter_alignment(iter)) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL; - if (nowait) - gfp = GFP_NOWAIT; - else - gfp = GFP_KERNEL; - - bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool); - if (!bio) - return -EAGAIN; + bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); dio->is_sync = is_sync = is_sync_kiocb(iocb); @@ -384,7 +375,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) if (!is_poll) blk_start_plug(&plug); - ret = 0; for (;;) { bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = pos >> 9; @@ -409,14 +399,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) task_io_account_write(bio->bi_iter.bi_size); } - /* - * Tell underlying layer to not block for resource shortage. - * And if we would have blocked, return error inline instead - * of through the bio->bi_end_io() callback. - */ - if (nowait) - bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE); - + dio->size += bio->bi_iter.bi_size; pos += bio->bi_iter.bi_size; nr_pages = iov_iter_npages(iter, BIO_MAX_PAGES); @@ -428,13 +411,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) polled = true; } - dio->size += bio->bi_iter.bi_size; qc = submit_bio(bio); - if (qc == BLK_QC_T_EAGAIN) { - dio->size -= bio->bi_iter.bi_size; - ret = -EAGAIN; - goto error; - } if (polled) WRITE_ONCE(iocb->ki_cookie, qc); @@ -455,19 +432,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) atomic_inc(&dio->ref); } - dio->size += bio->bi_iter.bi_size; - qc = submit_bio(bio); - if (qc == BLK_QC_T_EAGAIN) { - dio->size -= bio->bi_iter.bi_size; - ret = -EAGAIN; - goto error; - } - - bio = bio_alloc(gfp, nr_pages); - if (!bio) { - ret = -EAGAIN; - goto error; - } + submit_bio(bio); + bio = bio_alloc(GFP_KERNEL, nr_pages); } if (!is_poll) @@ -487,7 +453,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) } __set_current_state(TASK_RUNNING); -out: if (!ret) ret = blk_status_to_errno(dio->bio.bi_status); if (likely(!ret)) @@ -495,10 +460,6 @@ out: bio_put(&dio->bio); return ret; -error: - if (!is_poll) - blk_finish_plug(&plug); - goto out; } static ssize_t -- cgit v1.2.3 From a982eeb09b6030e567b8b815277c8c9197168040 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Wed, 14 Aug 2019 17:35:22 +0800 Subject: io_uring: fix an issue when IOSQE_IO_LINK is inserted into defer list This patch may fix two issues: First, when IOSQE_IO_DRAIN set, the next IOs need to be inserted into defer list to delay execution, but link io will be actively scheduled to run by calling io_queue_sqe. Second, when multiple LINK_IOs are inserted together with defer_list, the LINK_IO is no longer keep order. |-------------| | LINK_IO | ----> insert to defer_list ----------- |-------------| | | LINK_IO | ----> insert to defer_list ----------| |-------------| | | LINK_IO | ----> insert to defer_list ----------| |-------------| | | NORMAL_IO | ----> insert to defer_list ----------| |-------------| | | queue_work at same time <-----| Fixes: 9e645e1105c ("io_uring: add support for sqe links") Signed-off-by: Jackie Liu Signed-off-by: Jens Axboe --- fs/io_uring.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/io_uring.c b/fs/io_uring.c index aa25b5bbd4ae..24bbe3cb7ad4 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2023,6 +2023,15 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, { int ret; + ret = io_req_defer(ctx, req, s->sqe); + if (ret) { + if (ret != -EIOCBQUEUED) { + io_free_req(req); + io_cqring_add_event(ctx, s->sqe->user_data, ret); + } + return 0; + } + ret = __io_submit_sqe(ctx, req, s, true); if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { struct io_uring_sqe *sqe_copy; @@ -2095,13 +2104,6 @@ err: return; } - ret = io_req_defer(ctx, req, s->sqe); - if (ret) { - if (ret != -EIOCBQUEUED) - goto err_req; - return; - } - /* * If we already have a head request, queue this one for async * submittal once the head completes. If we don't have a head but -- cgit v1.2.3