From 581c1760415c48cca9349b198bba52dd38750765 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Thu, 29 Mar 2018 09:08:11 +0800 Subject: btrfs: Validate child tree block's level and first key We have several reports about node pointer points to incorrect child tree blocks, which could have even wrong owner and level but still with valid generation and checksum. Although btrfs check could handle it and print error message like: leaf parent key incorrect 60670574592 Kernel doesn't have enough check on this type of corruption correctly. At least add such check to read_tree_block() and btrfs_read_buffer(), where we need two new parameters @level and @first_key to verify the child tree block. The new @level check is mandatory and all call sites are already modified to extract expected level from its call chain. While @first_key is optional, the following call sites are skipping such check: 1) Root node/leaf As ROOT_ITEM doesn't contain the first key, skip @first_key check. 2) Direct backref Only parent bytenr and level is known and we need to resolve the key all by ourselves, skip @first_key check. Another note of this verification is, it needs extra info from nodeptr or ROOT_ITEM, so it can't fit into current tree-checker framework, which is limited to node/leaf boundary. Signed-off-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/ctree.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/ctree.c') diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1ef6b67f893a..7c8faeb868f4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1354,6 +1354,7 @@ get_old_root(struct btrfs_root *root, u64 time_seq) struct tree_mod_root *old_root = NULL; u64 old_generation = 0; u64 logical; + int level; eb_root = btrfs_read_lock_root_node(root); tm = __tree_mod_log_oldest_root(eb_root, time_seq); @@ -1364,15 +1365,17 @@ get_old_root(struct btrfs_root *root, u64 time_seq) old_root = &tm->old_root; old_generation = tm->generation; logical = old_root->logical; + level = old_root->level; } else { logical = eb_root->start; + level = btrfs_header_level(eb_root); } tm = tree_mod_log_search(fs_info, logical, time_seq); if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) { btrfs_tree_read_unlock(eb_root); free_extent_buffer(eb_root); - old = read_tree_block(fs_info, logical, 0); + old = read_tree_block(fs_info, logical, 0, level, NULL); if (WARN_ON(IS_ERR(old) || !extent_buffer_uptodate(old))) { if (!IS_ERR(old)) free_extent_buffer(old); @@ -1592,6 +1595,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, btrfs_set_lock_blocking(parent); for (i = start_slot; i <= end_slot; i++) { + struct btrfs_key first_key; int close = 1; btrfs_node_key(parent, &disk_key, i); @@ -1601,6 +1605,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, progress_passed = 1; blocknr = btrfs_node_blockptr(parent, i); gen = btrfs_node_ptr_generation(parent, i); + btrfs_node_key_to_cpu(parent, &first_key, i); if (last_block == 0) last_block = blocknr; @@ -1624,7 +1629,9 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, uptodate = 0; if (!cur || !uptodate) { if (!cur) { - cur = read_tree_block(fs_info, blocknr, gen); + cur = read_tree_block(fs_info, blocknr, gen, + parent_level - 1, + &first_key); if (IS_ERR(cur)) { return PTR_ERR(cur); } else if (!extent_buffer_uptodate(cur)) { @@ -1632,7 +1639,8 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, return -EIO; } } else if (!uptodate) { - err = btrfs_read_buffer(cur, gen); + err = btrfs_read_buffer(cur, gen, + parent_level - 1,&first_key); if (err) { free_extent_buffer(cur); return err; @@ -1785,14 +1793,17 @@ read_node_slot(struct btrfs_fs_info *fs_info, struct extent_buffer *parent, { int level = btrfs_header_level(parent); struct extent_buffer *eb; + struct btrfs_key first_key; if (slot < 0 || slot >= btrfs_header_nritems(parent)) return ERR_PTR(-ENOENT); BUG_ON(level == 0); + btrfs_node_key_to_cpu(parent, &first_key, slot); eb = read_tree_block(fs_info, btrfs_node_blockptr(parent, slot), - btrfs_node_ptr_generation(parent, slot)); + btrfs_node_ptr_generation(parent, slot), + level - 1, &first_key); if (!IS_ERR(eb) && !extent_buffer_uptodate(eb)) { free_extent_buffer(eb); eb = ERR_PTR(-EIO); @@ -2388,10 +2399,14 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, u64 gen; struct extent_buffer *b = *eb_ret; struct extent_buffer *tmp; + struct btrfs_key first_key; int ret; + int parent_level; blocknr = btrfs_node_blockptr(b, slot); gen = btrfs_node_ptr_generation(b, slot); + parent_level = btrfs_header_level(b); + btrfs_node_key_to_cpu(b, &first_key, slot); tmp = find_extent_buffer(fs_info, blocknr); if (tmp) { @@ -2410,7 +2425,7 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, btrfs_set_path_blocking(p); /* now we're allowed to do a blocking uptodate check */ - ret = btrfs_read_buffer(tmp, gen); + ret = btrfs_read_buffer(tmp, gen, parent_level - 1, &first_key); if (!ret) { *eb_ret = tmp; return 0; @@ -2437,7 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, btrfs_release_path(p); ret = -EAGAIN; - tmp = read_tree_block(fs_info, blocknr, 0); + tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1, + &first_key); if (!IS_ERR(tmp)) { /* * If the read above didn't mark this buffer up to date, -- cgit v1.2.3