summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2022-02-15 00:06:59 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:09:24 -0400
commiteb331fe5a4e801dc11d96ba7fbda0a91c8bd626c (patch)
treebb8144cd4214f0bf41f7e8f680c1a09d8d4adda7 /fs
parentfcf01959eaa828b1005f8f30732949e64edb8c4d (diff)
downloadlinux-stable-eb331fe5a4e801dc11d96ba7fbda0a91c8bd626c.tar.gz
linux-stable-eb331fe5a4e801dc11d96ba7fbda0a91c8bd626c.tar.bz2
linux-stable-eb331fe5a4e801dc11d96ba7fbda0a91c8bd626c.zip
bcachefs: Check for stale dirty pointer before reads
Since we retry reads when we discover we read from a pointer that went stale, if a dirty pointer is erroniously stale it would cause us to loop retrying that read forever - unless we check before issuing the read, while the btree is still locked, when we know that a dirty pointer should never be stale. This patch adds that check, along with printing some helpful debug info. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/fs-io.c2
-rw-r--r--fs/bcachefs/io.c60
-rw-r--r--fs/bcachefs/move.c6
3 files changed, 54 insertions, 14 deletions
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 5fce958bafc9..9161125aec17 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -1043,8 +1043,6 @@ retry:
sectors = min(sectors, k.k->size - offset_into_extent);
- bch2_trans_unlock(trans);
-
if (readpages_iter)
readpage_bio_extend(readpages_iter, &rbio->bio, sectors,
extent_partial_reads_expensive(k));
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index 218934b4e19b..914e22c5c247 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -2032,6 +2032,33 @@ err:
return ret;
}
+static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans,
+ struct bkey_s_c k,
+ struct bch_extent_ptr ptr)
+{
+ struct bch_fs *c = trans->c;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev);
+ struct btree_iter iter;
+ char buf[200];
+ int ret;
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, k);
+ bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf);
+
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc,
+ POS(ptr.dev, PTR_BUCKET_NR(ca, &ptr)),
+ BTREE_ITER_CACHED);
+
+ ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter)));
+ if (ret)
+ return;
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, k);
+ bch_err(c, "%s", buf);
+ bch_err(c, "memory gen: %u", *bucket_gen(ca, iter.pos.offset));
+ bch2_trans_iter_exit(trans, &iter);
+}
+
int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bvec_iter iter, struct bpos read_pos,
enum btree_id data_btree, struct bkey_s_c k,
@@ -2041,7 +2068,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
struct bch_fs *c = trans->c;
struct extent_ptr_decoded pick;
struct bch_read_bio *rbio = NULL;
- struct bch_dev *ca;
+ struct bch_dev *ca = NULL;
struct promote_op *promote = NULL;
bool bounce = false, read_full = false, narrow_crcs = false;
struct bpos data_pos = bkey_start_pos(k.k);
@@ -2058,7 +2085,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
zero_fill_bio_iter(&orig->bio, iter);
goto out_read_done;
}
-
+retry_pick:
pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick);
/* hole or reservation - just zero fill: */
@@ -2071,8 +2098,27 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig,
goto err;
}
- if (pick_ret > 0)
- ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+ ca = bch_dev_bkey_exists(c, pick.ptr.dev);
+
+ /*
+ * Stale dirty pointers are treated as IO errors, but @failed isn't
+ * allocated unless we're in the retry path - so if we're not in the
+ * retry path, don't check here, it'll be caught in bch2_read_endio()
+ * and we'll end up in the retry path:
+ */
+ if ((flags & BCH_READ_IN_RETRY) &&
+ !pick.ptr.cached &&
+ unlikely(ptr_stale(ca, &pick.ptr))) {
+ read_from_stale_dirty_pointer(trans, k, pick.ptr);
+ bch2_mark_io_failure(failed, &pick);
+ goto retry_pick;
+ }
+
+ /*
+ * Unlock the iterator while the btree node's lock is still in
+ * cache, before doing the IO:
+ */
+ bch2_trans_unlock(trans);
if (flags & BCH_READ_NODECODE) {
/*
@@ -2367,12 +2413,6 @@ retry:
*/
sectors = min(sectors, k.k->size - offset_into_extent);
- /*
- * Unlock the iterator while the btree node's lock is still in
- * cache, before doing the IO:
- */
- bch2_trans_unlock(&trans);
-
bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9;
swap(bvec_iter.bi_size, bytes);
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 04971bf847bf..4751d79219cb 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -752,10 +752,12 @@ static int __bch2_move_data(struct bch_fs *c,
BUG();
}
- /* unlock before doing IO: */
+ /*
+ * The iterator gets unlocked by __bch2_read_extent - need to
+ * save a copy of @k elsewhere:
+ */
bch2_bkey_buf_reassemble(&sk, c, k);
k = bkey_i_to_s_c(sk.k);
- bch2_trans_unlock(&trans);
ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k,
data_cmd, data_opts);