diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-03-04 19:16:04 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:26 -0400 |
commit | 3098553776a16c08446c408005090423d62e6b54 (patch) | |
tree | 17cc317918e2fb51dd540d86650510928965cb5e /fs/bcachefs/btree_cache.c | |
parent | 5b3f780540aa5e39859a0c00ace61713da054a0f (diff) | |
download | linux-stable-3098553776a16c08446c408005090423d62e6b54.tar.gz linux-stable-3098553776a16c08446c408005090423d62e6b54.tar.bz2 linux-stable-3098553776a16c08446c408005090423d62e6b54.zip |
bcachefs: Fix usage of six lock's percpu mode
Six locks have a percpu mode, which we use for interior btree nodes, as
well as btree key cache keys for the subvolumes btree. We've been
switching locks back and forth between percpu and non percpu mode as
needed, but it turns out this is racy - when we're reusing an existing
node, other threads could be attempting to lock it while we're switching
it between modes.
This patch fixes this by never switching 'struct btree' between the two
modes, and instead segragating them between two different freed lists.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/btree_cache.c')
-rw-r--r-- | fs/bcachefs/btree_cache.c | 41 |
1 files changed, 27 insertions, 14 deletions
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 42253ca17f04..92a8cc704cab 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -40,6 +40,14 @@ static inline unsigned btree_cache_can_free(struct btree_cache *bc) return max_t(int, 0, bc->used - bc->reserve); } +static void btree_node_to_freedlist(struct btree_cache *bc, struct btree *b) +{ + if (b->c.lock.readers) + list_move(&b->list, &bc->freed_pcpu); + else + list_move(&b->list, &bc->freed_nonpcpu); +} + static void btree_node_data_free(struct bch_fs *c, struct btree *b) { struct btree_cache *bc = &c->btree_cache; @@ -56,7 +64,8 @@ static void btree_node_data_free(struct bch_fs *c, struct btree *b) b->aux_data = NULL; bc->used--; - list_move(&b->list, &bc->freed); + + btree_node_to_freedlist(bc, b); } static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg, @@ -162,11 +171,6 @@ int bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b, b->c.level = level; b->c.btree_id = id; - if (level) - six_lock_pcpu_alloc(&b->c.lock); - else - six_lock_pcpu_free_rcu(&b->c.lock); - mutex_lock(&bc->lock); ret = __bch2_btree_node_hash_insert(bc, b); if (!ret) @@ -432,8 +436,10 @@ void bch2_fs_btree_cache_exit(struct bch_fs *c) BUG_ON(atomic_read(&c->btree_cache.dirty)); - while (!list_empty(&bc->freed)) { - b = list_first_entry(&bc->freed, struct btree, list); + list_splice(&bc->freed_pcpu, &bc->freed_nonpcpu); + + while (!list_empty(&bc->freed_nonpcpu)) { + b = list_first_entry(&bc->freed_nonpcpu, struct btree, list); list_del(&b->list); six_lock_pcpu_free(&b->c.lock); kfree(b); @@ -487,7 +493,8 @@ void bch2_fs_btree_cache_init_early(struct btree_cache *bc) mutex_init(&bc->lock); INIT_LIST_HEAD(&bc->live); INIT_LIST_HEAD(&bc->freeable); - INIT_LIST_HEAD(&bc->freed); + INIT_LIST_HEAD(&bc->freed_pcpu); + INIT_LIST_HEAD(&bc->freed_nonpcpu); } /* @@ -562,9 +569,12 @@ static struct btree *btree_node_cannibalize(struct bch_fs *c) } } -struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) +struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c, bool pcpu_read_locks) { struct btree_cache *bc = &c->btree_cache; + struct list_head *freed = pcpu_read_locks + ? &bc->freed_pcpu + : &bc->freed_nonpcpu; struct btree *b, *b2; u64 start_time = local_clock(); unsigned flags; @@ -576,7 +586,7 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) * We never free struct btree itself, just the memory that holds the on * disk node. Check the freed list before allocating a new one: */ - list_for_each_entry(b, &bc->freed, list) + list_for_each_entry(b, freed, list) if (!btree_node_reclaim(c, b)) { list_del_init(&b->list); goto got_node; @@ -586,6 +596,9 @@ struct btree *bch2_btree_node_mem_alloc(struct bch_fs *c) if (!b) goto err_locked; + if (pcpu_read_locks) + six_lock_pcpu_alloc(&b->c.lock); + BUG_ON(!six_trylock_intent(&b->c.lock)); BUG_ON(!six_trylock_write(&b->c.lock)); got_node: @@ -598,7 +611,7 @@ got_node: if (!btree_node_reclaim(c, b2)) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); - list_move(&b2->list, &bc->freed); + btree_node_to_freedlist(bc, b2); six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); goto got_mem; @@ -643,7 +656,7 @@ err_locked: if (b) { swap(b->data, b2->data); swap(b->aux_data, b2->aux_data); - list_move(&b2->list, &bc->freed); + btree_node_to_freedlist(bc, b2); six_unlock_write(&b2->c.lock); six_unlock_intent(&b2->c.lock); } else { @@ -688,7 +701,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, return ERR_PTR(-EINTR); } - b = bch2_btree_node_mem_alloc(c); + b = bch2_btree_node_mem_alloc(c, level != 0); if (trans && b == ERR_PTR(-ENOMEM)) { trans->memory_allocation_failure = true; |