summaryrefslogtreecommitdiffstats
path: root/fs/bcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-09-10 23:33:08 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:05 -0400
commitee7570546e89ece9b56eaa22c94a6ec0941ec827 (patch)
tree7e454dc52b3f955a76cf46e849ee010ee7dafa24 /fs/bcachefs
parent9f2772c45460e420de9a88980129bef135c6b76f (diff)
downloadlinux-stable-ee7570546e89ece9b56eaa22c94a6ec0941ec827.tar.gz
linux-stable-ee7570546e89ece9b56eaa22c94a6ec0941ec827.tar.bz2
linux-stable-ee7570546e89ece9b56eaa22c94a6ec0941ec827.zip
bcachefs: Fix a deadlock
Waiting on a btree node write with btree locks held can deadlock, if the write errors: the write error path has to do do a btree update to drop the pointer to the replica that errored. The interior update path has to wait on in flight btree writes before freeing nodes on disk. Previously, this was done in bch2_btree_interior_update_will_free_node(), and could deadlock; now, we just stash a pointer to the node and do it in btree_update_nodes_written(), just prior to the transactional part of the update. Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/btree_io.c4
-rw-r--r--fs/bcachefs/btree_update_interior.c26
-rw-r--r--fs/bcachefs/btree_update_interior.h4
3 files changed, 27 insertions, 7 deletions
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 0f2a730e04b5..4ffdc11f4d9a 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1727,6 +1727,10 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b)
return;
if (old & (1 << BTREE_NODE_write_in_flight)) {
+ /*
+ * XXX waiting on btree writes with btree locks held -
+ * this can deadlock, and we hit the write error path
+ */
btree_node_wait_on_io(b);
continue;
}
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 6eeb0ca58b6a..569db972f3bb 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -551,6 +551,22 @@ static void btree_update_nodes_written(struct btree_update *as)
BUG_ON(!journal_pin_active(&as->journal));
/*
+ * Wait for any in flight writes to finish before we free the old nodes
+ * on disk:
+ */
+ for (i = 0; i < as->nr_old_nodes; i++) {
+ struct btree *old = as->old_nodes[i];
+ __le64 seq;
+
+ six_lock_read(&old->c.lock, NULL, NULL);
+ seq = old->data ? old->data->keys.seq : 0;
+ six_unlock_read(&old->c.lock);
+
+ if (seq == as->old_nodes_seq[i])
+ btree_node_wait_on_io(old);
+ }
+
+ /*
* We did an update to a parent node where the pointers we added pointed
* to child nodes that weren't written yet: now, the child nodes have
* been written so we can write out the update to the interior node.
@@ -889,13 +905,9 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
btree_update_will_delete_key(as, &b->key);
- /*
- * XXX: Waiting on io with btree node locks held, we don't want to be
- * doing this. We can't have btree writes happening after the space has
- * been freed, but we really only need to block before
- * btree_update_nodes_written_trans() happens.
- */
- btree_node_wait_on_io(b);
+ as->old_nodes[as->nr_old_nodes] = b;
+ as->old_nodes_seq[as->nr_old_nodes] = b->data->keys.seq;
+ as->nr_old_nodes++;
}
void bch2_btree_update_done(struct btree_update *as)
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 7eef3dbb6ef1..7ed67b47e1b9 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -92,6 +92,10 @@ struct btree_update {
struct btree *new_nodes[BTREE_UPDATE_NODES_MAX];
unsigned nr_new_nodes;
+ struct btree *old_nodes[BTREE_UPDATE_NODES_MAX];
+ __le64 old_nodes_seq[BTREE_UPDATE_NODES_MAX];
+ unsigned nr_old_nodes;
+
open_bucket_idx_t open_buckets[BTREE_UPDATE_NODES_MAX *
BCH_REPLICAS_MAX];
open_bucket_idx_t nr_open_buckets;