diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2021-02-02 17:09:10 -0500 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:12 -0400 |
commit | 7a7d17b2f7c23c0891b0cbd13fafd3bc805b1b29 (patch) | |
tree | dc4c22912655b6b21e721d05baa07646cbf52396 /fs | |
parent | 8c6d298ab22fc1b2912ccef4ffd4a01b35f9c5b4 (diff) | |
download | linux-stable-7a7d17b2f7c23c0891b0cbd13fafd3bc805b1b29.tar.gz linux-stable-7a7d17b2f7c23c0891b0cbd13fafd3bc805b1b29.tar.bz2 linux-stable-7a7d17b2f7c23c0891b0cbd13fafd3bc805b1b29.zip |
bcachefs: Whiteouts for snapshots
This patch adds KEY_TYPE_whiteout, a new type of whiteout for snapshots,
when we're deleting and the key being deleted is in an ancestor
snapshot - and updates the transaction update/commit path to use it.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 4 | ||||
-rw-r--r-- | fs/bcachefs/bkey.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/bkey_methods.c | 26 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 113 |
4 files changed, 127 insertions, 18 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index ae8f3a5bc787..f922302332ee 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -327,7 +327,7 @@ static inline void bkey_init(struct bkey *k) */ #define BCH_BKEY_TYPES() \ x(deleted, 0) \ - x(discard, 1) \ + x(whiteout, 1) \ x(error, 2) \ x(cookie, 3) \ x(hash_whiteout, 4) \ @@ -361,7 +361,7 @@ struct bch_deleted { struct bch_val v; }; -struct bch_discard { +struct bch_whiteout { struct bch_val v; }; diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 904ceb67a029..6a637a408a9f 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -63,7 +63,7 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes) #define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted) #define bkey_whiteout(_k) \ - ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard) + ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_whiteout) enum bkey_lr_packed { BKEY_PACKED_BOTH, diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 42fdcc4487de..3133db236b7b 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -31,7 +31,7 @@ static const char *deleted_key_invalid(const struct bch_fs *c, .key_invalid = deleted_key_invalid, \ } -#define bch2_bkey_ops_discard (struct bkey_ops) { \ +#define bch2_bkey_ops_whiteout (struct bkey_ops) { \ .key_invalid = deleted_key_invalid, \ } @@ -101,6 +101,8 @@ const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k) static unsigned bch2_key_types_allowed[] = { [BKEY_TYPE_extents] = + (1U << KEY_TYPE_deleted)| + (1U << KEY_TYPE_whiteout)| (1U << KEY_TYPE_error)| (1U << KEY_TYPE_cookie)| (1U << KEY_TYPE_extent)| @@ -108,30 +110,43 @@ static unsigned bch2_key_types_allowed[] = { (1U << KEY_TYPE_reflink_p)| (1U << KEY_TYPE_inline_data), [BKEY_TYPE_inodes] = + (1U << KEY_TYPE_deleted)| + (1U << KEY_TYPE_whiteout)| (1U << KEY_TYPE_inode)| (1U << KEY_TYPE_inode_generation), [BKEY_TYPE_dirents] = + (1U << KEY_TYPE_deleted)| + (1U << KEY_TYPE_whiteout)| (1U << KEY_TYPE_hash_whiteout)| (1U << KEY_TYPE_dirent), [BKEY_TYPE_xattrs] = + (1U << KEY_TYPE_deleted)| + (1U << KEY_TYPE_whiteout)| (1U << KEY_TYPE_cookie)| (1U << KEY_TYPE_hash_whiteout)| (1U << KEY_TYPE_xattr), [BKEY_TYPE_alloc] = + (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_alloc)| (1U << KEY_TYPE_alloc_v2), [BKEY_TYPE_quotas] = + (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_quota), [BKEY_TYPE_stripes] = + (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_stripe), [BKEY_TYPE_reflink] = + (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_reflink_v)| (1U << KEY_TYPE_indirect_inline_data), [BKEY_TYPE_subvolumes] = + (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_subvolume), [BKEY_TYPE_snapshots] = + (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_snapshot), [BKEY_TYPE_btree] = + (1U << KEY_TYPE_deleted)| (1U << KEY_TYPE_btree_ptr)| (1U << KEY_TYPE_btree_ptr_v2), }; @@ -139,21 +154,18 @@ static unsigned bch2_key_types_allowed[] = { const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, enum btree_node_type type) { - unsigned key_types_allowed = (1U << KEY_TYPE_deleted)| - bch2_key_types_allowed[type] ; - if (k.k->u64s < BKEY_U64s) return "u64s too small"; - if (!(key_types_allowed & (1U << k.k->type))) + if (!(bch2_key_types_allowed[type] & (1U << k.k->type))) return "invalid key type for this btree"; if (type == BKEY_TYPE_btree && bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) return "value too big"; - if (btree_node_type_is_extents(type)) { - if ((k.k->size == 0) != bkey_deleted(k.k)) + if (btree_node_type_is_extents(type) && !bkey_whiteout(k.k)) { + if (k.k->size == 0) return "bad size field"; if (k.k->size > k.k->p.offset) diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 1922bf8236f7..2fc134e34572 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -1002,21 +1002,24 @@ static int bch2_trans_update_extent(struct btree_trans *trans, goto next; } - if (!bkey_cmp(k.k->p, bkey_start_pos(&insert->k))) + if (!bkey_cmp(k.k->p, start)) goto next; while (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) > 0) { + bool front_split = bkey_cmp(bkey_start_pos(k.k), start) < 0; + bool back_split = bkey_cmp(k.k->p, insert->k.p) > 0; + /* * If we're going to be splitting a compressed extent, note it * so that __bch2_trans_commit() can increase our disk * reservation: */ - if (bkey_cmp(bkey_start_pos(k.k), start) < 0 && - bkey_cmp(k.k->p, insert->k.p) > 0 && + if (((front_split && back_split) || + ((front_split || back_split) && k.k->p.snapshot != insert->k.p.snapshot)) && (compressed_sectors = bch2_bkey_sectors_compressed(k))) trans->extra_journal_res += compressed_sectors; - if (bkey_cmp(bkey_start_pos(k.k), start) < 0) { + if (front_split) { update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); if ((ret = PTR_ERR_OR_ZERO(update))) goto err; @@ -1027,6 +1030,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans, bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p, BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_ALL_SNAPSHOTS| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&update_iter) ?: + bch2_trans_update(trans, &update_iter, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| + flags); + bch2_trans_iter_exit(trans, &update_iter); + + if (ret) + goto err; + } + + if (k.k->p.snapshot != insert->k.p.snapshot && + (front_split || back_split)) { + update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); + if ((ret = PTR_ERR_OR_ZERO(update))) + goto err; + + bkey_reassemble(update, k); + + bch2_cut_front(start, update); + bch2_cut_back(insert->k.p, update); + + bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_ALL_SNAPSHOTS| BTREE_ITER_INTENT); ret = bch2_btree_iter_traverse(&update_iter) ?: bch2_trans_update(trans, &update_iter, update, @@ -1038,12 +1067,32 @@ static int bch2_trans_update_extent(struct btree_trans *trans, } if (bkey_cmp(k.k->p, insert->k.p) <= 0) { - ret = bch2_btree_delete_at(trans, &iter, flags); + update = bch2_trans_kmalloc(trans, sizeof(*update)); + if ((ret = PTR_ERR_OR_ZERO(update))) + goto err; + + bkey_init(&update->k); + update->k.p = k.k->p; + + if (insert->k.p.snapshot != k.k->p.snapshot) { + update->k.p.snapshot = insert->k.p.snapshot; + update->k.type = KEY_TYPE_whiteout; + } + + bch2_trans_iter_init(trans, &update_iter, btree_id, update->k.p, + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_INTENT); + ret = bch2_btree_iter_traverse(&update_iter) ?: + bch2_trans_update(trans, &update_iter, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| + flags); + bch2_trans_iter_exit(trans, &update_iter); + if (ret) goto err; } - if (bkey_cmp(k.k->p, insert->k.p) > 0) { + if (back_split) { update = bch2_trans_kmalloc(trans, bkey_bytes(k.k)); if ((ret = PTR_ERR_OR_ZERO(update))) goto err; @@ -1051,10 +1100,15 @@ static int bch2_trans_update_extent(struct btree_trans *trans, bkey_reassemble(update, k); bch2_cut_front(insert->k.p, update); - ret = bch2_trans_update(trans, &iter, update, flags); + bch2_trans_copy_iter(&update_iter, &iter); + update_iter.pos = update->k.p; + ret = bch2_trans_update(trans, &update_iter, update, + BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE| + flags); + bch2_trans_iter_exit(trans, &update_iter); + if (ret) goto err; - goto out; } next: @@ -1086,6 +1140,39 @@ err: return ret; } +/* + * When deleting, check if we need to emit a whiteout (because we're overwriting + * something in an ancestor snapshot) + */ +static int need_whiteout_for_snapshot(struct btree_trans *trans, + enum btree_id btree_id, struct bpos pos) +{ + struct btree_iter iter; + struct bkey_s_c k; + u32 snapshot = pos.snapshot; + int ret; + + if (!bch2_snapshot_parent(trans->c, pos.snapshot)) + return 0; + + pos.snapshot++; + + for_each_btree_key(trans, iter, btree_id, pos, + BTREE_ITER_ALL_SNAPSHOTS, k, ret) { + if (bkey_cmp(k.k->p, pos)) + break; + + if (bch2_snapshot_is_ancestor(trans->c, snapshot, + k.k->p.snapshot)) { + ret = !bkey_whiteout(k.k); + break; + } + } + bch2_trans_iter_exit(trans, &iter); + + return ret; +} + int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, struct bkey_i *k, enum btree_update_flags flags) { @@ -1118,6 +1205,16 @@ int bch2_trans_update(struct btree_trans *trans, struct btree_iter *iter, btree_insert_entry_cmp(i - 1, i) >= 0); #endif + if (bkey_deleted(&n.k->k) && + (iter->flags & BTREE_ITER_FILTER_SNAPSHOTS)) { + int ret = need_whiteout_for_snapshot(trans, n.btree_id, n.k->k.p); + if (unlikely(ret < 0)) + return ret; + + if (ret) + n.k->k.type = KEY_TYPE_whiteout; + } + /* * Pending updates are kept sorted: first, find position of new update, * then delete/trim any updates the new update overwrites: |