From 8eab77ff167b62760d878f1d19312eb9f7d4c176 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 13 Nov 2015 23:57:16 +0000 Subject: Btrfs: use global reserve when deleting unused block group after ENOSPC It's possible to reach a state where the cleaner kthread isn't able to start a transaction to delete an unused block group due to lack of enough free metadata space and due to lack of unallocated device space to allocate a new metadata block group as well. If this happens try to use space from the global block group reserve just like we do for unlink operations, so that we don't reach a permanent state where starting a transaction for filesystem operations (file creation, renames, etc) keeps failing with -ENOSPC. Such an unfortunate state was observed on a machine where over a dozen unused data block groups existed and the cleaner kthread was failing to delete them due to ENOSPC error when attempting to start a transaction, and even running balance with a -dusage=0 filter failed with ENOSPC as well. Also unmounting and mounting again the filesystem didn't help. Allowing the cleaner kthread to use the global block reserve to delete the unused data block groups fixed the problem. Signed-off-by: Filipe Manana Signed-off-by: Jeff Mahoney Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index acf3ed11cfb6..78200932c1cf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10256,6 +10256,17 @@ out: return ret; } +struct btrfs_trans_handle * +btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info) +{ + /* + * 1 unit for adding the free space inode's orphan (located in the tree + * of tree roots). + */ + return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root, + 1, 1); +} + /* * Process the unused_bgs list and remove any that don't have any allocated * space inside of them. @@ -10322,8 +10333,7 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * Want to do this before we do anything else so we can recover * properly if we fail to join the transaction. */ - /* 1 for btrfs_orphan_reserve_metadata() */ - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_trans_remove_block_group(fs_info); if (IS_ERR(trans)) { btrfs_dec_block_group_ro(root, block_group); ret = PTR_ERR(trans); -- cgit v1.2.3 From 7fd01182d1a1412cd44a5474b9aa93548d4a73ae Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 13 Nov 2015 23:57:17 +0000 Subject: Btrfs: fix the number of transaction units needed to remove a block group We were using only 1 transaction unit when attempting to delete an unused block group but in reality we need 3 + N units, where N corresponds to the number of stripes. We were accounting only for the addition of the orphan item (for the block group's free space cache inode) but we were not accounting that we need to delete one block group item from the extent tree, one free space item from the tree of tree roots and N device extent items from the device tree. While one unit is not enough, it worked most of the time because for each single unit we are too pessimistic and assume an entire tree path, with the highest possible heigth (8), needs to be COWed with eventual node splits at every possible level in the tree, so there was usually enough reserved space for removing all the items and adding the orphan item. However after adding the orphan item, writepages() can by called by the VM subsystem against the btree inode when we are under memory pressure, which causes writeback to start for the nodes we COWed before, this forces the operation to remove the free space item to COW again some (or all of) the same nodes (in the tree of tree roots). Even without writepages() being called, we could fail with ENOSPC because these items are located in multiple trees and one of them might have a higher heigth and require node/leaf splits at many levels, exhausting all the reserved space before removing all the items and adding the orphan. In the kernel 4.0 release, commit 3d84be799194 ("Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group"), we attempted to fix a BUG_ON due to ENOSPC when trying to add the orphan item by making the cleaner kthread reserve one transaction unit before attempting to remove the block group, but this was not enough. We had a couple user reports still hitting the same BUG_ON after 4.0, like Stefan Priebe's report on a 4.2-rc6 kernel for example: http://www.spinics.net/lists/linux-btrfs/msg46070.html So fix this by reserving all the necessary units of metadata. Reported-by: Stefan Priebe Fixes: 3d84be799194 ("Btrfs: fix BUG_ON in btrfs_orphan_add() when delete unused block group") Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 3 ++- fs/btrfs/extent-tree.c | 37 ++++++++++++++++++++++++++++++++++--- fs/btrfs/volumes.c | 3 ++- 3 files changed, 38 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 1573be6f9518..d88994f71eae 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3480,7 +3480,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size); struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( - struct btrfs_fs_info *fs_info); + struct btrfs_fs_info *fs_info, + const u64 chunk_offset); int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start, struct extent_map *em); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 78200932c1cf..e97d6d61cd42 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10257,14 +10257,44 @@ out: } struct btrfs_trans_handle * -btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info) +btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, + const u64 chunk_offset) { + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map *em; + struct map_lookup *map; + unsigned int num_items; + + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, chunk_offset, 1); + read_unlock(&em_tree->lock); + ASSERT(em && em->start == chunk_offset); + /* + * We need to reserve 3 + N units from the metadata space info in order + * to remove a block group (done at btrfs_remove_chunk() and at + * btrfs_remove_block_group()), which are used for: + * * 1 unit for adding the free space inode's orphan (located in the tree * of tree roots). + * 1 unit for deleting the block group item (located in the extent + * tree). + * 1 unit for deleting the free space item (located in tree of tree + * roots). + * N units for deleting N device extent items corresponding to each + * stripe (located in the device tree). + * + * In order to remove a block group we also need to reserve units in the + * system space info in order to update the chunk tree (update one or + * more device items and remove one chunk item), but this is done at + * btrfs_remove_chunk() through a call to check_system_chunk(). */ + map = (struct map_lookup *)em->bdev; + num_items = 3 + map->num_stripes; + free_extent_map(em); + return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root, - 1, 1); + num_items, 1); } /* @@ -10333,7 +10363,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * Want to do this before we do anything else so we can recover * properly if we fail to join the transaction. */ - trans = btrfs_start_trans_remove_block_group(fs_info); + trans = btrfs_start_trans_remove_block_group(fs_info, + block_group->key.objectid); if (IS_ERR(trans)) { btrfs_dec_block_group_ro(root, block_group); ret = PTR_ERR(trans); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index e0bd364e958d..45f20252efed 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -2853,7 +2853,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset) if (ret) return ret; - trans = btrfs_start_trans_remove_block_group(root->fs_info); + trans = btrfs_start_trans_remove_block_group(root->fs_info, + chunk_offset); if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_std_error(root->fs_info, ret, NULL); -- cgit v1.2.3 From 758f2dfcf8a249b1f1510aa32e625c2ec20642a3 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Thu, 19 Nov 2015 11:45:48 +0000 Subject: Btrfs: fix scrub preventing unused block groups from being deleted Currently scrub can race with the cleaner kthread when the later attempts to delete an unused block group, and the result is preventing the cleaner kthread from ever deleting later the block group - unless the block group becomes used and unused again. The following diagram illustrates that race: CPU 1 CPU 2 cleaner kthread btrfs_delete_unused_bgs() gets block group X from fs_info->unused_bgs and removes it from that list scrub_enumerate_chunks() searches device tree using its commit root finds device extent for block group X gets block group X from the tree fs_info->block_group_cache_tree (via btrfs_lookup_block_group()) sets bg X to RO sees the block group is already RO and therefore doesn't delete it nor adds it back to unused list So fix this by making scrub add the block group again to the list of unused block groups if the block group is still unused when it finished scrubbing it and it hasn't been removed already. Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/ctree.h | 1 + fs/btrfs/extent-tree.c | 2 +- fs/btrfs/scrub.c | 22 ++++++++++++++++++++++ 3 files changed, 24 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index d88994f71eae..a0165c6e6243 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3416,6 +3416,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *btrfs_lookup_block_group( struct btrfs_fs_info *info, u64 bytenr); +void btrfs_get_block_group(struct btrfs_block_group_cache *cache); void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int get_block_group_index(struct btrfs_block_group_cache *cache); struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e97d6d61cd42..8fd14b6f1d33 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -124,7 +124,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) return (cache->flags & bits) == bits; } -static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) +void btrfs_get_block_group(struct btrfs_block_group_cache *cache) { atomic_inc(&cache->count); } diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 68af3169d527..b091d94ceef6 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3641,6 +3641,28 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, if (ro_set) btrfs_dec_block_group_ro(root, cache); + /* + * We might have prevented the cleaner kthread from deleting + * this block group if it was already unused because we raced + * and set it to RO mode first. So add it back to the unused + * list, otherwise it might not ever be deleted unless a manual + * balance is triggered or it becomes used and unused again. + */ + spin_lock(&cache->lock); + if (!cache->removed && !cache->ro && cache->reserved == 0 && + btrfs_block_group_used(&cache->item) == 0) { + spin_unlock(&cache->lock); + spin_lock(&fs_info->unused_bgs_lock); + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &fs_info->unused_bgs); + } + spin_unlock(&fs_info->unused_bgs_lock); + } else { + spin_unlock(&cache->lock); + } + btrfs_put_block_group(cache); if (ret) break; -- cgit v1.2.3 From 036a9348dcd0060e3df8f5b5db2a59e7d7eaf1f5 Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Mon, 23 Nov 2015 15:25:16 +0000 Subject: Btrfs: fix race between cleaner kthread and space cache writeout When a block group becomes unused and the cleaner kthread is currently running, we can end up getting the current transaction aborted with error -ENOENT when we try to commit the transaction, leading to the following trace: [59779.258768] WARNING: CPU: 3 PID: 5990 at fs/btrfs/extent-tree.c:3740 btrfs_write_dirty_block_groups+0x17c/0x214 [btrfs]() [59779.272594] BTRFS: Transaction aborted (error -2) (...) [59779.291137] Call Trace: [59779.291621] [] dump_stack+0x4e/0x79 [59779.292543] [] warn_slowpath_common+0x9f/0xb8 [59779.293435] [] ? btrfs_write_dirty_block_groups+0x17c/0x214 [btrfs] [59779.295000] [] warn_slowpath_fmt+0x48/0x50 [59779.296138] [] ? write_one_cache_group.isra.32+0x77/0x82 [btrfs] [59779.297663] [] btrfs_write_dirty_block_groups+0x17c/0x214 [btrfs] [59779.299141] [] commit_cowonly_roots+0x1de/0x261 [btrfs] [59779.300359] [] btrfs_commit_transaction+0x4c4/0x99c [btrfs] [59779.301805] [] btrfs_sync_fs+0x145/0x1ad [btrfs] [59779.302893] [] sync_filesystem+0x7f/0x93 (...) [59779.318186] ---[ end trace 577e2daff90da33a ]--- The following diagram illustrates a sequence of steps leading to this problem: CPU 1 CPU 2 adds bg A to list fs_info->unused_bgs adds bg B to list fs_info->unused_bgs cleaner kthread delete_unused_bgs() sees bg A in list fs_info->unused_bgs btrfs_start_transaction() deletes bg A update_block_group(bg C) --> adds bg C to list fs_info->unused_bgs deletes bg B sees bg C in the list fs_info->unused_bgs btrfs_remove_chunk(bg C) btrfs_remove_block_group(bg C) --> checks if the block group is in a dirty list, and because it isn't now, it does nothing --> the block group item is deleted from the extent tree --> adds bg C to list transaction->dirty_bgs some task calls btrfs_commit_transaction(t N + 1) commit_cowonly_roots() btrfs_write_dirty_block_groups() --> sees bg C in cur_trans->dirty_bgs --> calls write_one_cache_group() which returns -ENOENT because it did not find the block group item in the extent tree --> transaction aborte with -ENOENT because write_one_cache_group() returned that error So fix this by adding a block group to the list of dirty block groups before adding it to the list of unused block groups. This happened on a stress test using fsstress plus concurrent calls to fallocate 20G and truncate (releasing part of the space allocated with fallocate). Signed-off-by: Filipe Manana Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8fd14b6f1d33..e563e5ab72b4 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -5915,19 +5915,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, set_extent_dirty(info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); - /* - * No longer have used bytes in this block group, queue - * it for deletion. - */ - if (old_val == 0) { - spin_lock(&info->unused_bgs_lock); - if (list_empty(&cache->bg_list)) { - btrfs_get_block_group(cache); - list_add_tail(&cache->bg_list, - &info->unused_bgs); - } - spin_unlock(&info->unused_bgs_lock); - } } spin_lock(&trans->transaction->dirty_bgs_lock); @@ -5939,6 +5926,22 @@ static int update_block_group(struct btrfs_trans_handle *trans, } spin_unlock(&trans->transaction->dirty_bgs_lock); + /* + * No longer have used bytes in this block group, queue it for + * deletion. We do this after adding the block group to the + * dirty list to avoid races between cleaner kthread and space + * cache writeout. + */ + if (!alloc && old_val == 0) { + spin_lock(&info->unused_bgs_lock); + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &info->unused_bgs); + } + spin_unlock(&info->unused_bgs_lock); + } + btrfs_put_block_group(cache); total -= num_bytes; bytenr += num_bytes; -- cgit v1.2.3 From 82bd101b5240d3d1c4078a8017917a40c0dcc514 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Thu, 5 Nov 2015 14:38:00 -0800 Subject: btrfs: qgroup: account shared subtree during snapshot delete Commit 0ed4792 ('btrfs: qgroup: Switch to new extent-oriented qgroup mechanism.') removed our qgroup accounting during btrfs_drop_snapshot(). Predictably, this results in qgroup numbers going bad shortly after a snapshot is removed. Fix this by adding a dirty extent record when we encounter extents during our shared subtree walk. This effectively restores the functionality we had with the original shared subtree walking code in 1152651 (btrfs: qgroup: account shared subtrees during snapshot delete). The idea with the original patch (and this one) is that shared subtrees can get skipped during drop_snapshot. The shared subtree walk then allows us a chance to visit those extents and add them to the qgroup work for later processing. This ultimately makes the accounting for drop snapshot work. The new qgroup code nicely handles all the other extents during the tree walk via the ref dec/inc functions so we don't have to add actions beyond what we had originally. Signed-off-by: Mark Fasheh Signed-off-by: Chris Mason --- fs/btrfs/extent-tree.c | 47 ++++++++++++++++++++++++++++++++++++++++------- fs/btrfs/qgroup.c | 2 ++ 2 files changed, 42 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e563e5ab72b4..4b89680a1923 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -8108,21 +8108,47 @@ reada: } /* - * TODO: Modify related function to add related node/leaf to dirty_extent_root, - * for later qgroup accounting. - * - * Current, this function does nothing. + * These may not be seen by the usual inc/dec ref code so we have to + * add them here. */ +static int record_one_subtree_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes) +{ + struct btrfs_qgroup_extent_record *qrecord; + struct btrfs_delayed_ref_root *delayed_refs; + + qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); + if (!qrecord) + return -ENOMEM; + + qrecord->bytenr = bytenr; + qrecord->num_bytes = num_bytes; + qrecord->old_roots = NULL; + + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord)) + kfree(qrecord); + spin_unlock(&delayed_refs->lock); + + return 0; +} + static int account_leaf_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) { int nr = btrfs_header_nritems(eb); - int i, extent_type; + int i, extent_type, ret; struct btrfs_key key; struct btrfs_file_extent_item *fi; u64 bytenr, num_bytes; + /* We can be called directly from walk_up_proc() */ + if (!root->fs_info->quota_enabled) + return 0; + for (i = 0; i < nr; i++) { btrfs_item_key_to_cpu(eb, &key, i); @@ -8141,6 +8167,10 @@ static int account_leaf_items(struct btrfs_trans_handle *trans, continue; num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + + ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); + if (ret) + return ret; } return 0; } @@ -8209,8 +8239,6 @@ static int adjust_slots_upwards(struct btrfs_root *root, /* * root_eb is the subtree root and is locked before this function is called. - * TODO: Modify this function to mark all (including complete shared node) - * to dirty_extent_root to allow it get accounted in qgroup. */ static int account_shared_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -8288,6 +8316,11 @@ walk_down: btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); path->locks[level] = BTRFS_READ_LOCK_BLOCKING; + + ret = record_one_subtree_extent(trans, root, child_bytenr, + root->nodesize); + if (ret) + goto out; } if (level == 0) { diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index fd0a196c8f74..5279fdae7142 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1462,6 +1462,8 @@ struct btrfs_qgroup_extent_record struct btrfs_qgroup_extent_record *entry; u64 bytenr = record->bytenr; + assert_spin_locked(&delayed_refs->lock); + while (*p) { parent_node = *p; entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, -- cgit v1.2.3 From 348a0013d54acec35c22958480af054b97b5e4fe Mon Sep 17 00:00:00 2001 From: Filipe Manana Date: Fri, 27 Nov 2015 12:16:16 +0000 Subject: Btrfs: fix unprotected list move from unused_bgs to deleted_bgs list As of my previous change titled "Btrfs: fix scrub preventing unused block groups from being deleted", the following warning at extent-tree.c:btrfs_delete_unused_bgs() can be hit when we mount the a filesysten with "-o discard": 10263 void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) 10264 { (...) 10405 if (trimming) { 10406 WARN_ON(!list_empty(&block_group->bg_list)); 10407 spin_lock(&trans->transaction->deleted_bgs_lock); 10408 list_move(&block_group->bg_list, 10409 &trans->transaction->deleted_bgs); 10410 spin_unlock(&trans->transaction->deleted_bgs_lock); 10411 btrfs_get_block_group(block_group); 10412 } (...) This happens because scrub can now add back the block group to the list of unused block groups (fs_info->unused_bgs). This is dangerous because we are moving the block group from the unused block groups list to the list of deleted block groups without holding the lock that protects the source list (fs_info->unused_bgs_lock). The following diagram illustrates how this happens: CPU 1 CPU 2 cleaner_kthread() btrfs_delete_unused_bgs() sees bg X in list fs_info->unused_bgs deletes bg X from list fs_info->unused_bgs scrub_enumerate_chunks() searches device tree using its commit root finds device extent for block group X gets block group X from the tree fs_info->block_group_cache_tree (via btrfs_lookup_block_group()) sets bg X to RO (again) scrub_chunk(bg X) sets bg X back to RW mode adds bg X to the list fs_info->unused_bgs again, since it's still unused and currently not in that list sets bg X to RO mode btrfs_remove_chunk(bg X) --> discard is enabled and bg X is in the fs_info->unused_bgs list again so the warning is triggered --> we move it from that list into the transaction's delete_bgs list, but we can have another task currently manipulating the first list (fs_info->unused_bgs) Fix this by using the same lock (fs_info->unused_bgs_lock) to protect both the list of unused block groups and the list of deleted block groups. This makes it safe and there's not much worry for more lock contention, as this lock is seldom used and only the cleaner kthread adds elements to the list of deleted block groups. The warning goes away too, as this was previously an impossible case (and would have been better a BUG_ON/ASSERT) but it's not impossible anymore. Reproduced with fstest btrfs/073 (using MOUNT_OPTIONS="-o discard"). Signed-off-by: Filipe Manana --- fs/btrfs/extent-tree.c | 10 +++++++--- fs/btrfs/transaction.c | 1 - fs/btrfs/transaction.h | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent-tree.c') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 4b89680a1923..c4661db2b72a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -10480,11 +10480,15 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * until transaction commit to do the actual discard. */ if (trimming) { - WARN_ON(!list_empty(&block_group->bg_list)); - spin_lock(&trans->transaction->deleted_bgs_lock); + spin_lock(&fs_info->unused_bgs_lock); + /* + * A concurrent scrub might have added us to the list + * fs_info->unused_bgs, so use a list_move operation + * to add the block group to the deleted_bgs list. + */ list_move(&block_group->bg_list, &trans->transaction->deleted_bgs); - spin_unlock(&trans->transaction->deleted_bgs_lock); + spin_unlock(&fs_info->unused_bgs_lock); btrfs_get_block_group(block_group); } end_trans: diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 3367a3c6f214..be8eae80ff65 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -274,7 +274,6 @@ loop: cur_trans->num_dirty_bgs = 0; spin_lock_init(&cur_trans->dirty_bgs_lock); INIT_LIST_HEAD(&cur_trans->deleted_bgs); - spin_lock_init(&cur_trans->deleted_bgs_lock); spin_lock_init(&cur_trans->dropped_roots_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 0da21ca9b3fb..64c8221b6165 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -77,8 +77,8 @@ struct btrfs_transaction { */ struct mutex cache_write_mutex; spinlock_t dirty_bgs_lock; + /* Protected by spin lock fs_info->unused_bgs_lock. */ struct list_head deleted_bgs; - spinlock_t deleted_bgs_lock; spinlock_t dropped_roots_lock; struct btrfs_delayed_ref_root delayed_refs; int aborted; -- cgit v1.2.3