diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/afs/fsclient.c | 51 | ||||
-rw-r--r-- | fs/afs/yfsclient.c | 54 | ||||
-rw-r--r-- | fs/block_dev.c | 58 | ||||
-rw-r--r-- | fs/btrfs/Kconfig | 1 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 1 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 24 | ||||
-rw-r--r-- | fs/btrfs/locking.c | 9 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 11 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 10 | ||||
-rw-r--r-- | fs/exec.c | 2 | ||||
-rw-r--r-- | fs/io_uring.c | 81 | ||||
-rw-r--r-- | fs/namespace.c | 4 | ||||
-rw-r--r-- | fs/open.c | 19 |
13 files changed, 248 insertions, 77 deletions
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 1ce73e014139..114f281f3687 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -339,8 +339,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) call->tmp_u = htonl(0); afs_extract_to_tmp(call); } + /* Fall through */ - /* Fall through - and extract the returned data length */ + /* extract the returned data length */ case 1: _debug("extract data length"); ret = afs_extract_data(call, true); @@ -366,8 +367,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) call->bvec[0].bv_page = req->pages[req->index]; iov_iter_bvec(&call->iter, READ, call->bvec, 1, size); ASSERTCMP(size, <=, PAGE_SIZE); + /* Fall through */ - /* Fall through - and extract the returned data */ + /* extract the returned data */ case 2: _debug("extract data %zu/%llu", iov_iter_count(&call->iter), req->remain); @@ -394,8 +396,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) /* Discard any excess data the server gave us */ iov_iter_discard(&call->iter, READ, req->actual_len - req->len); call->unmarshall = 3; - /* Fall through */ + case 3: _debug("extract discard %zu/%llu", iov_iter_count(&call->iter), req->actual_len - req->len); @@ -407,8 +409,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call) no_more_data: call->unmarshall = 4; afs_extract_to_buf(call, (21 + 3 + 6) * 4); + /* Fall through */ - /* Fall through - and extract the metadata */ + /* extract the metadata */ case 4: ret = afs_extract_data(call, false); if (ret < 0) @@ -1471,8 +1474,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) case 0: call->unmarshall++; afs_extract_to_buf(call, 12 * 4); + /* Fall through */ - /* Fall through - and extract the returned status record */ + /* extract the returned status record */ case 1: _debug("extract status"); ret = afs_extract_data(call, true); @@ -1483,8 +1487,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) xdr_decode_AFSFetchVolumeStatus(&bp, call->out_volstatus); call->unmarshall++; afs_extract_to_tmp(call); + /* Fall through */ - /* Fall through - and extract the volume name length */ + /* extract the volume name length */ case 2: ret = afs_extract_data(call, true); if (ret < 0) @@ -1498,8 +1503,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the volume name */ + /* extract the volume name */ case 3: _debug("extract volname"); ret = afs_extract_data(call, true); @@ -1511,8 +1517,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) _debug("volname '%s'", p); afs_extract_to_tmp(call); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the offline message length */ + /* extract the offline message length */ case 4: ret = afs_extract_data(call, true); if (ret < 0) @@ -1526,8 +1533,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the offline message */ + /* extract the offline message */ case 5: _debug("extract offline"); ret = afs_extract_data(call, true); @@ -1540,8 +1548,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) afs_extract_to_tmp(call); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the message of the day length */ + /* extract the message of the day length */ case 6: ret = afs_extract_data(call, true); if (ret < 0) @@ -1555,8 +1564,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call) size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the message of the day */ + /* extract the message of the day */ case 7: _debug("extract motd"); ret = afs_extract_data(call, false); @@ -1850,8 +1860,9 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) case 0: afs_extract_to_tmp(call); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the capabilities word count */ + /* Extract the capabilities word count */ case 1: ret = afs_extract_data(call, true); if (ret < 0) @@ -1863,8 +1874,9 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call) call->count2 = count; iov_iter_discard(&call->iter, READ, count * sizeof(__be32)); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract capabilities words */ + /* Extract capabilities words */ case 2: ret = afs_extract_data(call, false); if (ret < 0) @@ -2020,9 +2032,9 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) case 0: afs_extract_to_tmp(call); call->unmarshall++; + /* Fall through */ /* Extract the file status count and array in two steps */ - /* Fall through */ case 1: _debug("extract status count"); ret = afs_extract_data(call, true); @@ -2039,8 +2051,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) call->unmarshall++; more_counts: afs_extract_to_buf(call, 21 * sizeof(__be32)); - /* Fall through */ + case 2: _debug("extract status array %u", call->count); ret = afs_extract_data(call, true); @@ -2060,9 +2072,9 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) call->count = 0; call->unmarshall++; afs_extract_to_tmp(call); + /* Fall through */ /* Extract the callback count and array in two steps */ - /* Fall through */ case 3: _debug("extract CB count"); ret = afs_extract_data(call, true); @@ -2078,8 +2090,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) call->unmarshall++; more_cbs: afs_extract_to_buf(call, 3 * sizeof(__be32)); - /* Fall through */ + case 4: _debug("extract CB array"); ret = afs_extract_data(call, true); @@ -2096,8 +2108,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call) afs_extract_to_buf(call, 6 * sizeof(__be32)); call->unmarshall++; - /* Fall through */ + case 5: ret = afs_extract_data(call, false); if (ret < 0) @@ -2193,6 +2205,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) case 0: afs_extract_to_tmp(call); call->unmarshall++; + /* Fall through */ /* extract the returned data length */ case 1: @@ -2210,6 +2223,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) acl->size = call->count2; afs_extract_begin(call, acl->data, size); call->unmarshall++; + /* Fall through */ /* extract the returned data */ case 2: @@ -2219,6 +2233,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call) afs_extract_to_buf(call, (21 + 6) * 4); call->unmarshall++; + /* Fall through */ /* extract the metadata */ case 3: diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c index 18722aaeda33..2575503170fc 100644 --- a/fs/afs/yfsclient.c +++ b/fs/afs/yfsclient.c @@ -450,8 +450,9 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) req->offset = req->pos & (PAGE_SIZE - 1); afs_extract_to_tmp64(call); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the returned data length */ + /* extract the returned data length */ case 1: _debug("extract data length"); ret = afs_extract_data(call, true); @@ -477,8 +478,9 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) call->bvec[0].bv_page = req->pages[req->index]; iov_iter_bvec(&call->iter, READ, call->bvec, 1, size); ASSERTCMP(size, <=, PAGE_SIZE); + /* Fall through */ - /* Fall through - and extract the returned data */ + /* extract the returned data */ case 2: _debug("extract data %zu/%llu", iov_iter_count(&call->iter), req->remain); @@ -505,8 +507,8 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) /* Discard any excess data the server gave us */ iov_iter_discard(&call->iter, READ, req->actual_len - req->len); call->unmarshall = 3; - /* Fall through */ + case 3: _debug("extract discard %zu/%llu", iov_iter_count(&call->iter), req->actual_len - req->len); @@ -521,8 +523,9 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSCallBack) + sizeof(struct yfs_xdr_YFSVolSync)); + /* Fall through */ - /* Fall through - and extract the metadata */ + /* extract the metadata */ case 4: ret = afs_extract_data(call, false); if (ret < 0) @@ -539,8 +542,8 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call) req->file_size = call->out_scb->status.size; call->unmarshall++; - /* Fall through */ + case 5: break; } @@ -1429,8 +1432,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) case 0: call->unmarshall++; afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus)); + /* Fall through */ - /* Fall through - and extract the returned status record */ + /* extract the returned status record */ case 1: _debug("extract status"); ret = afs_extract_data(call, true); @@ -1441,8 +1445,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) xdr_decode_YFSFetchVolumeStatus(&bp, call->out_volstatus); call->unmarshall++; afs_extract_to_tmp(call); + /* Fall through */ - /* Fall through - and extract the volume name length */ + /* extract the volume name length */ case 2: ret = afs_extract_data(call, true); if (ret < 0) @@ -1456,8 +1461,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the volume name */ + /* extract the volume name */ case 3: _debug("extract volname"); ret = afs_extract_data(call, true); @@ -1469,8 +1475,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) _debug("volname '%s'", p); afs_extract_to_tmp(call); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the offline message length */ + /* extract the offline message length */ case 4: ret = afs_extract_data(call, true); if (ret < 0) @@ -1484,8 +1491,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the offline message */ + /* extract the offline message */ case 5: _debug("extract offline"); ret = afs_extract_data(call, true); @@ -1498,8 +1506,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) afs_extract_to_tmp(call); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the message of the day length */ + /* extract the message of the day length */ case 6: ret = afs_extract_data(call, true); if (ret < 0) @@ -1513,8 +1522,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) size = (call->count + 3) & ~3; /* It's padded */ afs_extract_to_buf(call, size); call->unmarshall++; + /* Fall through */ - /* Fall through - and extract the message of the day */ + /* extract the message of the day */ case 7: _debug("extract motd"); ret = afs_extract_data(call, false); @@ -1526,8 +1536,8 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call) _debug("motd '%s'", p); call->unmarshall++; - /* Fall through */ + case 8: break; } @@ -1805,9 +1815,9 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) case 0: afs_extract_to_tmp(call); call->unmarshall++; + /* Fall through */ /* Extract the file status count and array in two steps */ - /* Fall through */ case 1: _debug("extract status count"); ret = afs_extract_data(call, true); @@ -1824,8 +1834,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) call->unmarshall++; more_counts: afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus)); - /* Fall through */ + case 2: _debug("extract status array %u", call->count); ret = afs_extract_data(call, true); @@ -1845,9 +1855,9 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) call->count = 0; call->unmarshall++; afs_extract_to_tmp(call); + /* Fall through */ /* Extract the callback count and array in two steps */ - /* Fall through */ case 3: _debug("extract CB count"); ret = afs_extract_data(call, true); @@ -1863,8 +1873,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) call->unmarshall++; more_cbs: afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack)); - /* Fall through */ + case 4: _debug("extract CB array"); ret = afs_extract_data(call, true); @@ -1881,8 +1891,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync)); call->unmarshall++; - /* Fall through */ + case 5: ret = afs_extract_data(call, false); if (ret < 0) @@ -1892,8 +1902,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call) xdr_decode_YFSVolSync(&bp, call->out_volsync); call->unmarshall++; - /* Fall through */ + case 6: break; } @@ -1978,6 +1988,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) case 0: afs_extract_to_tmp(call); call->unmarshall++; + /* Fall through */ /* Extract the file ACL length */ case 1: @@ -1999,6 +2010,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) iov_iter_discard(&call->iter, READ, size); } call->unmarshall++; + /* Fall through */ /* Extract the file ACL */ case 2: @@ -2008,6 +2020,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) afs_extract_to_tmp(call); call->unmarshall++; + /* Fall through */ /* Extract the volume ACL length */ case 3: @@ -2029,6 +2042,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) iov_iter_discard(&call->iter, READ, size); } call->unmarshall++; + /* Fall through */ /* Extract the volume ACL */ case 4: @@ -2041,6 +2055,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) sizeof(struct yfs_xdr_YFSFetchStatus) + sizeof(struct yfs_xdr_YFSVolSync)); call->unmarshall++; + /* Fall through */ /* extract the metadata */ case 5: @@ -2057,6 +2072,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call) xdr_decode_YFSVolSync(&bp, call->out_volsync); call->unmarshall++; + /* Fall through */ case 6: break; diff --git a/fs/block_dev.c b/fs/block_dev.c index 4707dfff991b..c2a85b587922 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -345,15 +345,24 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) struct bio *bio; bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; bool is_read = (iov_iter_rw(iter) == READ), is_sync; + bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0; loff_t pos = iocb->ki_pos; blk_qc_t qc = BLK_QC_T_NONE; - int ret = 0; + gfp_t gfp; + ssize_t ret; if ((pos | iov_iter_alignment(iter)) & (bdev_logical_block_size(bdev) - 1)) return -EINVAL; - bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool); + if (nowait) + gfp = GFP_NOWAIT; + else + gfp = GFP_KERNEL; + + bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool); + if (!bio) + return -EAGAIN; dio = container_of(bio, struct blkdev_dio, bio); dio->is_sync = is_sync = is_sync_kiocb(iocb); @@ -375,7 +384,10 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) if (!is_poll) blk_start_plug(&plug); + ret = 0; for (;;) { + int err; + bio_set_dev(bio, bdev); bio->bi_iter.bi_sector = pos >> 9; bio->bi_write_hint = iocb->ki_hint; @@ -383,8 +395,10 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bio->bi_end_io = blkdev_bio_end_io; bio->bi_ioprio = iocb->ki_ioprio; - ret = bio_iov_iter_get_pages(bio, iter); - if (unlikely(ret)) { + err = bio_iov_iter_get_pages(bio, iter); + if (unlikely(err)) { + if (!ret) + ret = err; bio->bi_status = BLK_STS_IOERR; bio_endio(bio); break; @@ -399,6 +413,14 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) task_io_account_write(bio->bi_iter.bi_size); } + /* + * Tell underlying layer to not block for resource shortage. + * And if we would have blocked, return error inline instead + * of through the bio->bi_end_io() callback. + */ + if (nowait) + bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE); + dio->size += bio->bi_iter.bi_size; pos += bio->bi_iter.bi_size; @@ -412,6 +434,11 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) } qc = submit_bio(bio); + if (qc == BLK_QC_T_EAGAIN) { + if (!ret) + ret = -EAGAIN; + goto error; + } if (polled) WRITE_ONCE(iocb->ki_cookie, qc); @@ -432,8 +459,20 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) atomic_inc(&dio->ref); } - submit_bio(bio); - bio = bio_alloc(GFP_KERNEL, nr_pages); + qc = submit_bio(bio); + if (qc == BLK_QC_T_EAGAIN) { + if (!ret) + ret = -EAGAIN; + goto error; + } + ret += bio->bi_iter.bi_size; + + bio = bio_alloc(gfp, nr_pages); + if (!bio) { + if (!ret) + ret = -EAGAIN; + goto error; + } } if (!is_poll) @@ -453,13 +492,16 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) } __set_current_state(TASK_RUNNING); +out: if (!ret) ret = blk_status_to_errno(dio->bio.bi_status); - if (likely(!ret)) - ret = dio->size; bio_put(&dio->bio); return ret; +error: + if (!is_poll) + blk_finish_plug(&plug); + goto out; } static ssize_t diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig index 212b4a854f2c..38651fae7f21 100644 --- a/fs/btrfs/Kconfig +++ b/fs/btrfs/Kconfig @@ -4,6 +4,7 @@ config BTRFS_FS tristate "Btrfs filesystem support" select CRYPTO select CRYPTO_CRC32C + select LIBCRC32C select ZLIB_INFLATE select ZLIB_DEFLATE select LZO_COMPRESS diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 41a2bd2e0c56..5f7ee70b3d1a 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4106,6 +4106,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) percpu_counter_destroy(&fs_info->dev_replace.bio_counter); cleanup_srcu_struct(&fs_info->subvol_srcu); + btrfs_free_csum_hash(fs_info); btrfs_free_stripe_hash_table(fs_info); btrfs_free_ref_cache(fs_info); } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 1af069a9a0c7..ee582a36653d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -395,10 +395,31 @@ static noinline int add_async_extent(struct async_chunk *cow, return 0; } +/* + * Check if the inode has flags compatible with compression + */ +static inline bool inode_can_compress(struct inode *inode) +{ + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW || + BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) + return false; + return true; +} + +/* + * Check if the inode needs to be submitted to compression, based on mount + * options, defragmentation, properties or heuristics. + */ static inline int inode_need_compress(struct inode *inode, u64 start, u64 end) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + if (!inode_can_compress(inode)) { + WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG), + KERN_ERR "BTRFS: unexpected compression for ino %llu\n", + btrfs_ino(BTRFS_I(inode))); + return 0; + } /* force compress */ if (btrfs_test_opt(fs_info, FORCE_COMPRESS)) return 1; @@ -1631,7 +1652,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page, } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { ret = run_delalloc_nocow(inode, locked_page, start, end, page_started, 0, nr_written); - } else if (!inode_need_compress(inode, start, end)) { + } else if (!inode_can_compress(inode) || + !inode_need_compress(inode, start, end)) { ret = cow_file_range(inode, locked_page, start, end, end, page_started, nr_written, 1, NULL); } else { diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 98fccce4208c..393eceda57c8 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -346,9 +346,12 @@ void btrfs_tree_unlock(struct extent_buffer *eb) if (blockers) { btrfs_assert_no_spinning_writers(eb); eb->blocking_writers--; - /* Use the lighter barrier after atomic */ - smp_mb__after_atomic(); - cond_wake_up_nomb(&eb->write_lock_wq); + /* + * We need to order modifying blocking_writers above with + * actually waking up the sleepers to ensure they see the + * updated value of blocking_writers + */ + cond_wake_up(&eb->write_lock_wq); } else { btrfs_assert_spinning_writers_put(eb); write_unlock(&eb->lock); diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 1744ba8b2754..ae7f64a8facb 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -985,13 +985,14 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree, struct extent_state **cached_state) { struct btrfs_ordered_extent *ordered; - struct extent_state *cachedp = NULL; + struct extent_state *cache = NULL; + struct extent_state **cachedp = &cache; if (cached_state) - cachedp = *cached_state; + cachedp = cached_state; while (1) { - lock_extent_bits(tree, start, end, &cachedp); + lock_extent_bits(tree, start, end, cachedp); ordered = btrfs_lookup_ordered_range(inode, start, end - start + 1); if (!ordered) { @@ -1001,10 +1002,10 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree, * aren't exposing it outside of this function */ if (!cached_state) - refcount_dec(&cachedp->refs); + refcount_dec(&cache->refs); break; } - unlock_extent_cached(tree, start, end, &cachedp); + unlock_extent_cached(tree, start, end, cachedp); btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1); btrfs_put_ordered_extent(ordered); } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a13ddba1ebc3..d74b74ca07af 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5941,6 +5941,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, u64 stripe_len; u64 raid56_full_stripe_start = (u64)-1; int data_stripes; + int ret = 0; ASSERT(op != BTRFS_MAP_DISCARD); @@ -5961,8 +5962,8 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, btrfs_crit(fs_info, "stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu", stripe_offset, offset, em->start, logical, stripe_len); - free_extent_map(em); - return -EINVAL; + ret = -EINVAL; + goto out; } /* stripe_offset is the offset of this block in its stripe */ @@ -6009,7 +6010,10 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op, io_geom->stripe_offset = stripe_offset; io_geom->raid56_stripe_offset = raid56_full_stripe_start; - return 0; +out: + /* once for us */ + free_extent_map(em); + return ret; } static int __btrfs_map_block(struct btrfs_fs_info *fs_info, diff --git a/fs/exec.c b/fs/exec.c index c71cbfe6826a..f7f6a140856a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1828,7 +1828,7 @@ static int __do_execve_file(int fd, struct filename *filename, membarrier_execve(current); rseq_execve(current); acct_update_integrals(current); - task_numa_free(current); + task_numa_free(current, false); free_bprm(bprm); kfree(pathbuf); if (filename) diff --git a/fs/io_uring.c b/fs/io_uring.c index e2a66e12fbc6..012bc0efb9d3 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -202,7 +202,7 @@ struct async_list { struct file *file; off_t io_end; - size_t io_pages; + size_t io_len; }; struct io_ring_ctx { @@ -333,7 +333,8 @@ struct io_kiocb { #define REQ_F_IO_DRAIN 16 /* drain existing IO first */ #define REQ_F_IO_DRAINED 32 /* drain done */ #define REQ_F_LINK 64 /* linked sqes */ -#define REQ_F_FAIL_LINK 128 /* fail rest of links */ +#define REQ_F_LINK_DONE 128 /* linked sqes done */ +#define REQ_F_FAIL_LINK 256 /* fail rest of links */ u64 user_data; u32 result; u32 sequence; @@ -429,7 +430,7 @@ static inline bool io_sequence_defer(struct io_ring_ctx *ctx, if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) return false; - return req->sequence > ctx->cached_cq_tail + ctx->sq_ring->dropped; + return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped; } static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) @@ -632,6 +633,7 @@ static void io_req_link_next(struct io_kiocb *req) nxt->flags |= REQ_F_LINK; } + nxt->flags |= REQ_F_LINK_DONE; INIT_WORK(&nxt->work, io_sq_wq_submit_work); queue_work(req->ctx->sqo_wq, &nxt->work); } @@ -1064,8 +1066,44 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw, */ offset = buf_addr - imu->ubuf; iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); - if (offset) - iov_iter_advance(iter, offset); + + if (offset) { + /* + * Don't use iov_iter_advance() here, as it's really slow for + * using the latter parts of a big fixed buffer - it iterates + * over each segment manually. We can cheat a bit here, because + * we know that: + * + * 1) it's a BVEC iter, we set it up + * 2) all bvecs are PAGE_SIZE in size, except potentially the + * first and last bvec + * + * So just find our index, and adjust the iterator afterwards. + * If the offset is within the first bvec (or the whole first + * bvec, just use iov_iter_advance(). This makes it easier + * since we can just skip the first segment, which may not + * be PAGE_SIZE aligned. + */ + const struct bio_vec *bvec = imu->bvec; + + if (offset <= bvec->bv_len) { + iov_iter_advance(iter, offset); + } else { + unsigned long seg_skip; + + /* skip first vec */ + offset -= bvec->bv_len; + seg_skip = 1 + (offset >> PAGE_SHIFT); + + iter->bvec = bvec + seg_skip; + iter->nr_segs -= seg_skip; + iter->count -= (seg_skip << PAGE_SHIFT); + iter->iov_offset = offset & ~PAGE_MASK; + if (iter->iov_offset) + iter->count -= iter->iov_offset; + } + } + return 0; } @@ -1120,28 +1158,26 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len) off_t io_end = kiocb->ki_pos + len; if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) { - unsigned long max_pages; + unsigned long max_bytes; /* Use 8x RA size as a decent limiter for both reads/writes */ - max_pages = filp->f_ra.ra_pages; - if (!max_pages) - max_pages = VM_READAHEAD_PAGES; - max_pages *= 8; - - /* If max pages are exceeded, reset the state */ - len >>= PAGE_SHIFT; - if (async_list->io_pages + len <= max_pages) { + max_bytes = filp->f_ra.ra_pages << (PAGE_SHIFT + 3); + if (!max_bytes) + max_bytes = VM_READAHEAD_PAGES << (PAGE_SHIFT + 3); + + /* If max len are exceeded, reset the state */ + if (async_list->io_len + len <= max_bytes) { req->flags |= REQ_F_SEQ_PREV; - async_list->io_pages += len; + async_list->io_len += len; } else { io_end = 0; - async_list->io_pages = 0; + async_list->io_len = 0; } } /* New file? Reset state. */ if (async_list->file != filp) { - async_list->io_pages = 0; + async_list->io_len = 0; async_list->file = filp; } async_list->io_end = io_end; @@ -1630,6 +1666,8 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) INIT_LIST_HEAD(&poll->wait.entry); init_waitqueue_func_entry(&poll->wait, io_poll_wake); + INIT_LIST_HEAD(&req->list); + mask = vfs_poll(poll->file, &ipt.pt) & poll->events; spin_lock_irq(&ctx->completion_lock); @@ -1844,6 +1882,10 @@ restart: /* async context always use a copy of the sqe */ kfree(sqe); + /* req from defer and link list needn't decrease async cnt */ + if (req->flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE)) + goto out; + if (!async_list) break; if (!list_empty(&req_list)) { @@ -1891,6 +1933,7 @@ restart: } } +out: if (cur_mm) { set_fs(old_fs); unuse_mm(cur_mm); @@ -1917,6 +1960,10 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req) ret = true; spin_lock(&list->lock); list_add_tail(&req->list, &list->list); + /* + * Ensure we see a simultaneous modification from io_sq_wq_submit_work() + */ + smp_mb(); if (!atomic_read(&list->cnt)) { list_del_init(&req->list); ret = false; diff --git a/fs/namespace.c b/fs/namespace.c index 6464ea4acba9..d28d30b13043 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1463,7 +1463,6 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; disconnect = disconnect_mount(p, how); - if (mnt_has_parent(p)) { mnt_add_count(p->mnt_parent, -1); if (!disconnect) { @@ -1471,10 +1470,11 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts); } else { umount_mnt(p); - hlist_add_head(&p->mnt_umount, &unmounted); } } change_mnt_propagation(p, MS_PRIVATE); + if (disconnect) + hlist_add_head(&p->mnt_umount, &unmounted); } } diff --git a/fs/open.c b/fs/open.c index b5b80469b93d..a59abe3c669a 100644 --- a/fs/open.c +++ b/fs/open.c @@ -374,6 +374,25 @@ long do_faccessat(int dfd, const char __user *filename, int mode) override_cred->cap_permitted; } + /* + * The new set of credentials can *only* be used in + * task-synchronous circumstances, and does not need + * RCU freeing, unless somebody then takes a separate + * reference to it. + * + * NOTE! This is _only_ true because this credential + * is used purely for override_creds() that installs + * it as the subjective cred. Other threads will be + * accessing ->real_cred, not the subjective cred. + * + * If somebody _does_ make a copy of this (using the + * 'get_current_cred()' function), that will clear the + * non_rcu field, because now that other user may be + * expecting RCU freeing. But normal thread-synchronous + * cred accesses will keep things non-RCY. + */ + override_cred->non_rcu = 1; + old_cred = override_creds(override_cred); retry: res = user_path_at(dfd, filename, lookup_flags, &path); |