diff options
author | Kalle Valo <kvalo@kernel.org> | 2023-01-17 13:36:25 +0200 |
---|---|---|
committer | Kalle Valo <kvalo@kernel.org> | 2023-01-17 13:36:25 +0200 |
commit | d0e99511834b6828c960e978d9a8cb6e5731250d (patch) | |
tree | e7b062c1f9d28a55083477a1462286a7923a57fd /fs/btrfs/disk-io.c | |
parent | de7d0ff301fccc75281d7d8eb98c4a47faacf32d (diff) | |
parent | 80f8a66dede0a4b4e9e846765a97809c6fe49ce5 (diff) | |
download | linux-d0e99511834b6828c960e978d9a8cb6e5731250d.tar.gz linux-d0e99511834b6828c960e978d9a8cb6e5731250d.tar.bz2 linux-d0e99511834b6828c960e978d9a8cb6e5731250d.zip |
Merge wireless into wireless-next
Due to the two cherry picked commits from wireless to wireless-next we have
several conflicts in mt76. To avoid any bugs with conflicts merge wireless into
wireless-next.
96f134dc1964 wifi: mt76: handle possible mt76_rx_token_consume failures
fe13dad8992b wifi: mt76: dma: do not increment queue head if mt76_dma_add_buf fails
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 247 |
1 files changed, 170 insertions, 77 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d99bf7c64611..0888d484df80 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -23,7 +23,7 @@ #include "disk-io.h" #include "transaction.h" #include "btrfs_inode.h" -#include "volumes.h" +#include "bio.h" #include "print-tree.h" #include "locking.h" #include "tree-log.h" @@ -43,6 +43,15 @@ #include "space-info.h" #include "zoned.h" #include "subpage.h" +#include "fs.h" +#include "accessors.h" +#include "extent-tree.h" +#include "root-tree.h" +#include "defrag.h" +#include "uuid-tree.h" +#include "relocation.h" +#include "scrub.h" +#include "super.h" #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ BTRFS_HEADER_FLAG_RELOC |\ @@ -75,12 +84,12 @@ static void btrfs_free_csum_hash(struct btrfs_fs_info *fs_info) * just before they are sent down the IO stack. */ struct async_submit_bio { - struct inode *inode; + struct btrfs_inode *inode; struct bio *bio; - extent_submit_bio_start_t *submit_bio_start; + enum btrfs_wq_submit_cmd submit_cmd; int mirror_num; - /* Optional parameter for submit_bio_start used by direct io */ + /* Optional parameter for used by direct io */ u64 dio_file_offset; struct btrfs_work work; blk_status_t status; @@ -246,40 +255,54 @@ int btrfs_verify_level_key(struct extent_buffer *eb, int level, return ret; } +static int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, + int mirror_num) +{ + struct btrfs_fs_info *fs_info = eb->fs_info; + u64 start = eb->start; + int i, num_pages = num_extent_pages(eb); + int ret = 0; + + if (sb_rdonly(fs_info->sb)) + return -EROFS; + + for (i = 0; i < num_pages; i++) { + struct page *p = eb->pages[i]; + + ret = btrfs_repair_io_failure(fs_info, 0, start, PAGE_SIZE, + start, p, start - page_offset(p), mirror_num); + if (ret) + break; + start += PAGE_SIZE; + } + + return ret; +} + /* * helper to read a given tree block, doing retries as required when * the checksums don't match and we have alternate mirrors to try. * - * @parent_transid: expected transid, skip check if 0 - * @level: expected level, mandatory check - * @first_key: expected key of first slot, skip check if NULL + * @check: expected tree parentness check, see the comments of the + * structure for details. */ int btrfs_read_extent_buffer(struct extent_buffer *eb, - u64 parent_transid, int level, - struct btrfs_key *first_key) + struct btrfs_tree_parent_check *check) { struct btrfs_fs_info *fs_info = eb->fs_info; - struct extent_io_tree *io_tree; int failed = 0; int ret; int num_copies = 0; int mirror_num = 0; int failed_mirror = 0; - io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; + ASSERT(check); + while (1) { clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); - ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num); - if (!ret) { - if (verify_parent_transid(io_tree, eb, - parent_transid, 0)) - ret = -EIO; - else if (btrfs_verify_level_key(eb, level, - first_key, parent_transid)) - ret = -EUCLEAN; - else - break; - } + ret = read_extent_buffer_pages(eb, WAIT_COMPLETE, mirror_num, check); + if (!ret) + break; num_copies = btrfs_num_copies(fs_info, eb->start, eb->len); @@ -455,7 +478,8 @@ static int check_tree_block_fsid(struct extent_buffer *eb) } /* Do basic extent buffer checks at read time */ -static int validate_extent_buffer(struct extent_buffer *eb) +static int validate_extent_buffer(struct extent_buffer *eb, + struct btrfs_tree_parent_check *check) { struct btrfs_fs_info *fs_info = eb->fs_info; u64 found_start; @@ -465,6 +489,8 @@ static int validate_extent_buffer(struct extent_buffer *eb) const u8 *header_csum; int ret = 0; + ASSERT(check); + found_start = btrfs_header_bytenr(eb); if (found_start != eb->start) { btrfs_err_rl(fs_info, @@ -503,6 +529,45 @@ static int validate_extent_buffer(struct extent_buffer *eb) goto out; } + if (found_level != check->level) { + ret = -EIO; + goto out; + } + if (unlikely(check->transid && + btrfs_header_generation(eb) != check->transid)) { + btrfs_err_rl(eb->fs_info, +"parent transid verify failed on logical %llu mirror %u wanted %llu found %llu", + eb->start, eb->read_mirror, check->transid, + btrfs_header_generation(eb)); + ret = -EIO; + goto out; + } + if (check->has_first_key) { + struct btrfs_key *expect_key = &check->first_key; + struct btrfs_key found_key; + + if (found_level) + btrfs_node_key_to_cpu(eb, &found_key, 0); + else + btrfs_item_key_to_cpu(eb, &found_key, 0); + if (unlikely(btrfs_comp_cpu_keys(expect_key, &found_key))) { + btrfs_err(fs_info, +"tree first key mismatch detected, bytenr=%llu parent_transid=%llu key expected=(%llu,%u,%llu) has=(%llu,%u,%llu)", + eb->start, check->transid, + expect_key->objectid, + expect_key->type, expect_key->offset, + found_key.objectid, found_key.type, + found_key.offset); + ret = -EUCLEAN; + goto out; + } + } + if (check->owner_root) { + ret = btrfs_check_eb_owner(eb, check->owner_root); + if (ret < 0) + goto out; + } + /* * If this is a leaf block and it is corrupt, set the corrupt bit so * that we don't try and read the other copies of this block, just @@ -527,13 +592,15 @@ out: } static int validate_subpage_buffer(struct page *page, u64 start, u64 end, - int mirror) + int mirror, struct btrfs_tree_parent_check *check) { struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb); struct extent_buffer *eb; bool reads_done; int ret = 0; + ASSERT(check); + /* * We don't allow bio merge for subpage metadata read, so we should * only get one eb for each endio hook. @@ -557,7 +624,7 @@ static int validate_subpage_buffer(struct page *page, u64 start, u64 end, ret = -EIO; goto err; } - ret = validate_extent_buffer(eb); + ret = validate_extent_buffer(eb, check); if (ret < 0) goto err; @@ -587,7 +654,8 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, ASSERT(page->private); if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE) - return validate_subpage_buffer(page, start, end, mirror); + return validate_subpage_buffer(page, start, end, mirror, + &bbio->parent_check); eb = (struct extent_buffer *)page->private; @@ -606,7 +674,7 @@ int btrfs_validate_metadata_buffer(struct btrfs_bio *bbio, ret = -EIO; goto err; } - ret = validate_extent_buffer(eb); + ret = validate_extent_buffer(eb, &bbio->parent_check); err: if (ret) { /* @@ -628,8 +696,18 @@ static void run_one_async_start(struct btrfs_work *work) blk_status_t ret; async = container_of(work, struct async_submit_bio, work); - ret = async->submit_bio_start(async->inode, async->bio, - async->dio_file_offset); + switch (async->submit_cmd) { + case WQ_SUBMIT_METADATA: + ret = btree_submit_bio_start(async->bio); + break; + case WQ_SUBMIT_DATA: + ret = btrfs_submit_bio_start(async->inode, async->bio); + break; + case WQ_SUBMIT_DATA_DIO: + ret = btrfs_submit_bio_start_direct_io(async->inode, + async->bio, async->dio_file_offset); + break; + } if (ret) async->status = ret; } @@ -646,7 +724,7 @@ static void run_one_async_done(struct btrfs_work *work) { struct async_submit_bio *async = container_of(work, struct async_submit_bio, work); - struct inode *inode = async->inode; + struct btrfs_inode *inode = async->inode; struct btrfs_bio *bbio = btrfs_bio(async->bio); /* If an error occurred we just want to clean up the bio and move on */ @@ -661,7 +739,7 @@ static void run_one_async_done(struct btrfs_work *work) * This changes nothing when cgroups aren't in use. */ async->bio->bi_opf |= REQ_CGROUP_PUNT; - btrfs_submit_bio(btrfs_sb(inode->i_sb), async->bio, async->mirror_num); + btrfs_submit_bio(inode->root->fs_info, async->bio, async->mirror_num); } static void run_one_async_free(struct btrfs_work *work) @@ -679,11 +757,10 @@ static void run_one_async_free(struct btrfs_work *work) * - true if the work has been succesfuly submitted * - false in case of error */ -bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num, - u64 dio_file_offset, - extent_submit_bio_start_t *submit_bio_start) +bool btrfs_wq_submit_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num, + u64 dio_file_offset, enum btrfs_wq_submit_cmd cmd) { - struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct async_submit_bio *async; async = kmalloc(sizeof(*async), GFP_NOFS); @@ -693,7 +770,7 @@ bool btrfs_wq_submit_bio(struct inode *inode, struct bio *bio, int mirror_num, async->inode = inode; async->bio = bio; async->mirror_num = mirror_num; - async->submit_bio_start = submit_bio_start; + async->submit_cmd = cmd; btrfs_init_work(&async->work, run_one_async_start, run_one_async_done, run_one_async_free); @@ -727,8 +804,7 @@ static blk_status_t btree_csum_one_bio(struct bio *bio) return errno_to_blk_status(ret); } -static blk_status_t btree_submit_bio_start(struct inode *inode, struct bio *bio, - u64 dio_file_offset) +blk_status_t btree_submit_bio_start(struct bio *bio) { /* * when we're called for a write, we're already in the async @@ -749,13 +825,14 @@ static bool should_async_write(struct btrfs_fs_info *fs_info, return true; } -void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_num) +void btrfs_submit_metadata_bio(struct btrfs_inode *inode, struct bio *bio, int mirror_num) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct btrfs_bio *bbio = btrfs_bio(bio); blk_status_t ret; bio->bi_opf |= REQ_META; + bbio->is_metadata = 1; if (btrfs_op(bio) != BTRFS_MAP_WRITE) { btrfs_submit_bio(fs_info, bio, mirror_num); @@ -766,8 +843,8 @@ void btrfs_submit_metadata_bio(struct inode *inode, struct bio *bio, int mirror_ * Kthread helpers are used to submit writes so that checksumming can * happen in parallel across all CPUs. */ - if (should_async_write(fs_info, BTRFS_I(inode)) && - btrfs_wq_submit_bio(inode, bio, mirror_num, 0, btree_submit_bio_start)) + if (should_async_write(fs_info, inode) && + btrfs_wq_submit_bio(inode, bio, mirror_num, 0, WQ_SUBMIT_METADATA)) return; ret = btree_csum_one_bio(bio); @@ -919,28 +996,28 @@ struct extent_buffer *btrfs_find_create_tree_block( * Read tree block at logical address @bytenr and do variant basic but critical * verification. * - * @owner_root: the objectid of the root owner for this block. - * @parent_transid: expected transid of this tree block, skip check if 0 - * @level: expected level, mandatory check - * @first_key: expected key in slot 0, skip check if NULL + * @check: expected tree parentness check, see comments of the + * structure for details. */ struct extent_buffer *read_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, - u64 owner_root, u64 parent_transid, - int level, struct btrfs_key *first_key) + struct btrfs_tree_parent_check *check) { struct extent_buffer *buf = NULL; int ret; - buf = btrfs_find_create_tree_block(fs_info, bytenr, owner_root, level); + ASSERT(check); + + buf = btrfs_find_create_tree_block(fs_info, bytenr, check->owner_root, + check->level); if (IS_ERR(buf)) return buf; - ret = btrfs_read_extent_buffer(buf, parent_transid, level, first_key); + ret = btrfs_read_extent_buffer(buf, check); if (ret) { free_extent_buffer_stale(buf); return ERR_PTR(ret); } - if (btrfs_check_eb_owner(buf, owner_root)) { + if (btrfs_check_eb_owner(buf, check->owner_root)) { free_extent_buffer_stale(buf); return ERR_PTR(-EUCLEAN); } @@ -1027,9 +1104,9 @@ static void __setup_root(struct btrfs_root *root, struct btrfs_fs_info *fs_info, root->anon_dev = 0; if (!dummy) { extent_io_tree_init(fs_info, &root->dirty_log_pages, - IO_TREE_ROOT_DIRTY_LOG_PAGES, NULL); + IO_TREE_ROOT_DIRTY_LOG_PAGES); extent_io_tree_init(fs_info, &root->log_csum_range, - IO_TREE_LOG_CSUM_RANGE, NULL); + IO_TREE_LOG_CSUM_RANGE); } spin_lock_init(&root->root_item_lock); @@ -1167,6 +1244,13 @@ struct btrfs_root *btrfs_extent_root(struct btrfs_fs_info *fs_info, u64 bytenr) return btrfs_global_root(fs_info, &key); } +struct btrfs_root *btrfs_block_group_root(struct btrfs_fs_info *fs_info) +{ + if (btrfs_fs_compat_ro(fs_info, BLOCK_GROUP_TREE)) + return fs_info->block_group_root; + return btrfs_extent_root(fs_info, 0); +} + struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, u64 objectid) { @@ -1197,7 +1281,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, if (IS_ERR(leaf)) { ret = PTR_ERR(leaf); leaf = NULL; - goto fail_unlock; + goto fail; } root->node = leaf; @@ -1232,9 +1316,6 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans, return root; -fail_unlock: - if (leaf) - btrfs_tree_unlock(leaf); fail: btrfs_put_root(root); @@ -1352,6 +1433,7 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root, struct btrfs_key *key) { struct btrfs_root *root; + struct btrfs_tree_parent_check check = { 0 }; struct btrfs_fs_info *fs_info = tree_root->fs_info; u64 generation; int ret; @@ -1371,9 +1453,11 @@ static struct btrfs_root *read_tree_root_path(struct btrfs_root *tree_root, generation = btrfs_root_generation(&root->root_item); level = btrfs_root_level(&root->root_item); - root->node = read_tree_block(fs_info, - btrfs_root_bytenr(&root->root_item), - key->objectid, generation, level, NULL); + check.level = level; + check.transid = generation; + check.owner_root = key->objectid; + root->node = read_tree_block(fs_info, btrfs_root_bytenr(&root->root_item), + &check); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); root->node = NULL; @@ -2084,8 +2168,6 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->workers); if (fs_info->endio_workers) destroy_workqueue(fs_info->endio_workers); - if (fs_info->endio_raid56_workers) - destroy_workqueue(fs_info->endio_raid56_workers); if (fs_info->rmw_workers) destroy_workqueue(fs_info->rmw_workers); if (fs_info->compressed_write_workers) @@ -2231,7 +2313,7 @@ static void btrfs_init_btree_inode(struct btrfs_fs_info *fs_info) RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); extent_io_tree_init(fs_info, &BTRFS_I(inode)->io_tree, - IO_TREE_BTREE_INODE_IO, NULL); + IO_TREE_BTREE_INODE_IO); extent_map_tree_init(&BTRFS_I(inode)->extent_tree); BTRFS_I(inode)->root = btrfs_grab_root(fs_info->tree_root); @@ -2291,8 +2373,6 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) alloc_workqueue("btrfs-endio", flags, max_active); fs_info->endio_meta_workers = alloc_workqueue("btrfs-endio-meta", flags, max_active); - fs_info->endio_raid56_workers = - alloc_workqueue("btrfs-endio-raid56", flags, max_active); fs_info->rmw_workers = alloc_workqueue("btrfs-rmw", flags, max_active); fs_info->endio_write_workers = btrfs_alloc_workqueue(fs_info, "endio-write", flags, @@ -2314,7 +2394,7 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info) fs_info->delalloc_workers && fs_info->flush_workers && fs_info->endio_workers && fs_info->endio_meta_workers && fs_info->compressed_write_workers && - fs_info->endio_write_workers && fs_info->endio_raid56_workers && + fs_info->endio_write_workers && fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->fixup_workers && fs_info->delayed_workers && fs_info->qgroup_rescan_workers && @@ -2350,6 +2430,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, struct btrfs_fs_devices *fs_devices) { int ret; + struct btrfs_tree_parent_check check = { 0 }; struct btrfs_root *log_tree_root; struct btrfs_super_block *disk_super = fs_info->super_copy; u64 bytenr = btrfs_super_log_root(disk_super); @@ -2365,10 +2446,10 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info, if (!log_tree_root) return -ENOMEM; - log_tree_root->node = read_tree_block(fs_info, bytenr, - BTRFS_TREE_LOG_OBJECTID, - fs_info->generation + 1, level, - NULL); + check.level = level; + check.transid = fs_info->generation + 1; + check.owner_root = BTRFS_TREE_LOG_OBJECTID; + log_tree_root->node = read_tree_block(fs_info, bytenr, &check); if (IS_ERR(log_tree_root->node)) { btrfs_warn(fs_info, "failed to read log tree"); ret = PTR_ERR(log_tree_root->node); @@ -2846,10 +2927,14 @@ out: static int load_super_root(struct btrfs_root *root, u64 bytenr, u64 gen, int level) { + struct btrfs_tree_parent_check check = { + .level = level, + .transid = gen, + .owner_root = root->root_key.objectid + }; int ret = 0; - root->node = read_tree_block(root->fs_info, bytenr, - root->root_key.objectid, gen, level, NULL); + root->node = read_tree_block(root->fs_info, bytenr, &check); if (IS_ERR(root->node)) { ret = PTR_ERR(root->node); root->node = NULL; @@ -3057,7 +3142,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info) fs_info->block_group_cache_tree = RB_ROOT_CACHED; extent_io_tree_init(fs_info, &fs_info->excluded_extents, - IO_TREE_FS_EXCLUDED_EXTENTS, NULL); + IO_TREE_FS_EXCLUDED_EXTENTS); mutex_init(&fs_info->ordered_operations_mutex); mutex_init(&fs_info->tree_log_mutex); @@ -3743,10 +3828,18 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device } /* - * Mount does not set all options immediately, we can do it now and do - * not have to wait for transaction commit + * For devices supporting discard turn on discard=async automatically, + * unless it's already set or disabled. This could be turned off by + * nodiscard for the same mount. */ - btrfs_apply_pending_changes(fs_info); + if (!(btrfs_test_opt(fs_info, DISCARD_SYNC) || + btrfs_test_opt(fs_info, DISCARD_ASYNC) || + btrfs_test_opt(fs_info, NODISCARD)) && + fs_info->fs_devices->discardable) { + btrfs_set_and_info(fs_info, DISCARD_ASYNC, + "auto enabling async discard"); + btrfs_clear_opt(fs_info->mount_opt, NODISCARD); + } #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY if (btrfs_test_opt(fs_info, CHECK_INTEGRITY)) { @@ -3875,7 +3968,7 @@ static void btrfs_end_super_write(struct bio *bio) if (bio->bi_status) { btrfs_warn_rl_in_rcu(device->fs_info, "lost page write due to IO error on %s (%d)", - rcu_str_deref(device->name), + btrfs_dev_name(device), blk_status_to_errno(bio->bi_status)); ClearPageUptodate(page); SetPageError(page); |