From 1889ad5a1285ba452f6a8cef3df663087611050a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 14 Mar 2021 19:01:14 -0400 Subject: bcachefs: Add code to scan for/rewite old btree nodes This adds a new data job type to scan for btree nodes in the old extent format, and rewrite them. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/bcachefs_ioctl.h | 17 +++-- fs/bcachefs/btree_io.c | 5 ++ fs/bcachefs/btree_types.h | 1 + fs/bcachefs/btree_update_interior.c | 1 + fs/bcachefs/move.c | 131 +++++++++++++++++++++++++++++------- fs/bcachefs/move.h | 6 +- fs/bcachefs/movinggc.c | 6 +- fs/bcachefs/rebalance.c | 3 +- 8 files changed, 132 insertions(+), 38 deletions(-) diff --git a/fs/bcachefs/bcachefs_ioctl.h b/fs/bcachefs/bcachefs_ioctl.h index 38c6ac96e12f..1ef9907e07ad 100644 --- a/fs/bcachefs/bcachefs_ioctl.h +++ b/fs/bcachefs/bcachefs_ioctl.h @@ -171,10 +171,11 @@ struct bch_ioctl_disk_set_state { }; enum bch_data_ops { - BCH_DATA_OP_SCRUB = 0, - BCH_DATA_OP_REREPLICATE = 1, - BCH_DATA_OP_MIGRATE = 2, - BCH_DATA_OP_NR = 3, + BCH_DATA_OP_SCRUB = 0, + BCH_DATA_OP_REREPLICATE = 1, + BCH_DATA_OP_MIGRATE = 2, + BCH_DATA_OP_REWRITE_OLD_NODES = 3, + BCH_DATA_OP_NR = 4, }; /* @@ -187,11 +188,13 @@ enum bch_data_ops { * job. The file descriptor is O_CLOEXEC. */ struct bch_ioctl_data { - __u32 op; + __u16 op; + __u8 start_btree; + __u8 end_btree; __u32 flags; - struct bpos start; - struct bpos end; + struct bpos start_pos; + struct bpos end_pos; union { struct { diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 6e656ed6b32a..eac51c39fc6c 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -920,6 +920,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, unsigned u64s; int ret, retry_read = 0, write = READ; + b->version_ondisk = U16_MAX; + iter = mempool_alloc(&c->fill_iter, GFP_NOIO); sort_iter_init(iter, b); iter->size = (btree_blocks(c) + 1) * 2; @@ -1000,6 +1002,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca, sectors = vstruct_sectors(bne, c->block_bits); } + b->version_ondisk = min(b->version_ondisk, + le16_to_cpu(i->version)); + ret = validate_bset(c, ca, b, i, sectors, READ, have_retry); if (ret) diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 80bb31a53339..55d8d815a04a 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -76,6 +76,7 @@ struct btree { u16 written; u8 nsets; u8 nr_key_bits; + u16 version_ondisk; struct bkey_format format; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 285365ba7012..989ba81207c9 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -286,6 +286,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev bch2_bset_init_first(b, &b->data->keys); b->c.level = level; b->c.btree_id = as->btree_id; + b->version_ondisk = c->sb.version; memset(&b->nr, 0, sizeof(b->nr)); b->data->magic = cpu_to_le64(bset_magic(c)); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 28e2125c12ed..72958b867014 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -531,7 +531,7 @@ static int __bch2_move_data(struct bch_fs *c, stats->data_type = BCH_DATA_user; stats->btree_id = btree_id; - stats->pos = POS_MIN; + stats->pos = start; iter = bch2_trans_get_iter(&trans, btree_id, start, BTREE_ITER_PREFETCH); @@ -646,14 +646,15 @@ out: } int bch2_move_data(struct bch_fs *c, + enum btree_id start_btree_id, struct bpos start_pos, + enum btree_id end_btree_id, struct bpos end_pos, struct bch_ratelimit *rate, struct write_point_specifier wp, - struct bpos start, - struct bpos end, move_pred_fn pred, void *arg, struct bch_move_stats *stats) { struct moving_context ctxt = { .stats = stats }; + enum btree_id id; int ret; closure_init_stack(&ctxt.cl); @@ -662,10 +663,23 @@ int bch2_move_data(struct bch_fs *c, stats->data_type = BCH_DATA_user; - ret = __bch2_move_data(c, &ctxt, rate, wp, start, end, - pred, arg, stats, BTREE_ID_EXTENTS) ?: - __bch2_move_data(c, &ctxt, rate, wp, start, end, - pred, arg, stats, BTREE_ID_REFLINK); + for (id = start_btree_id; + id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1); + id++) { + stats->btree_id = id; + + if (id != BTREE_ID_EXTENTS && + id != BTREE_ID_REFLINK) + continue; + + ret = __bch2_move_data(c, &ctxt, rate, wp, + id == start_btree_id ? start_pos : POS_MIN, + id == end_btree_id ? end_pos : POS_MAX, + pred, arg, stats, id); + if (ret) + break; + } + move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads)); closure_sync(&ctxt.cl); @@ -679,16 +693,22 @@ int bch2_move_data(struct bch_fs *c, return ret; } +typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *, + struct btree *, struct bch_io_opts *, + struct data_opts *); + static int bch2_move_btree(struct bch_fs *c, - move_pred_fn pred, - void *arg, + enum btree_id start_btree_id, struct bpos start_pos, + enum btree_id end_btree_id, struct bpos end_pos, + move_btree_pred pred, void *arg, struct bch_move_stats *stats) { + bool kthread = (current->flags & PF_KTHREAD) != 0; struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); struct btree_trans trans; struct btree_iter *iter; struct btree *b; - unsigned id; + enum btree_id id; struct data_opts data_opts; enum data_cmd cmd; int ret = 0; @@ -697,16 +717,24 @@ static int bch2_move_btree(struct bch_fs *c, stats->data_type = BCH_DATA_btree; - for (id = 0; id < BTREE_ID_NR; id++) { + for (id = start_btree_id; + id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1); + id++) { stats->btree_id = id; - for_each_btree_node(&trans, iter, id, POS_MIN, + for_each_btree_node(&trans, iter, id, + id == start_btree_id ? start_pos : POS_MIN, BTREE_ITER_PREFETCH, b) { + if (kthread && kthread_should_stop()) + goto out; + + if ((cmp_int(id, end_btree_id) ?: + bkey_cmp(b->key.k.p, end_pos)) > 0) + break; + stats->pos = iter->pos; - switch ((cmd = pred(c, arg, - bkey_i_to_s_c(&b->key), - &io_opts, &data_opts))) { + switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) { case DATA_SKIP: goto next; case DATA_SCRUB: @@ -726,7 +754,7 @@ next: ret = bch2_trans_iter_free(&trans, iter) ?: ret; } - +out: bch2_trans_exit(&trans); return ret; @@ -785,6 +813,38 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, return DATA_REWRITE; } +static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} + +static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts); +} + +static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg, + struct btree *b, + struct bch_io_opts *io_opts, + struct data_opts *data_opts) +{ + if (b->version_ondisk != c->sb.version || + btree_node_need_rewrite(b)) { + data_opts->target = 0; + data_opts->nr_replicas = 1; + data_opts->btree_insert_flags = 0; + return DATA_REWRITE; + } + + return DATA_SKIP; +} + int bch2_data_job(struct bch_fs *c, struct bch_move_stats *stats, struct bch_ioctl_data op) @@ -796,17 +856,20 @@ int bch2_data_job(struct bch_fs *c, stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, -1); - ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret; + ret = bch2_move_btree(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + rereplicate_btree_pred, c, stats) ?: ret; closure_wait_event(&c->btree_interior_update_wait, !bch2_btree_interior_updates_nr_pending(c)); ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, NULL, - writepoint_hashed((unsigned long) current), - op.start, - op.end, + ret = bch2_move_data(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + NULL, writepoint_hashed((unsigned long) current), rereplicate_pred, c, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; break; @@ -817,16 +880,32 @@ int bch2_data_job(struct bch_fs *c, stats->data_type = BCH_DATA_journal; ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev); - ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret; + ret = bch2_move_btree(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + migrate_btree_pred, &op, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; - ret = bch2_move_data(c, NULL, - writepoint_hashed((unsigned long) current), - op.start, - op.end, + ret = bch2_move_data(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + NULL, writepoint_hashed((unsigned long) current), migrate_pred, &op, stats) ?: ret; ret = bch2_replicas_gc2(c) ?: ret; break; + case BCH_DATA_OP_REWRITE_OLD_NODES: + ret = bch2_move_btree(c, + op.start_btree, op.start_pos, + op.end_btree, op.end_pos, + rewrite_old_nodes_pred, &op, stats) ?: ret; + + if (!ret) { + mutex_lock(&c->sb_lock); + c->disk_sb.sb->version_min = c->disk_sb.sb->version; + bch2_write_super(c); + mutex_unlock(&c->sb_lock); + } + break; default: ret = -EINVAL; } diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index b04bc669226d..403ca695c875 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -52,9 +52,11 @@ typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, struct bkey_s_c, struct bch_io_opts *, struct data_opts *); -int bch2_move_data(struct bch_fs *, struct bch_ratelimit *, +int bch2_move_data(struct bch_fs *, + enum btree_id, struct bpos, + enum btree_id, struct bpos, + struct bch_ratelimit *, struct write_point_specifier, - struct bpos, struct bpos, move_pred_fn, void *, struct bch_move_stats *); diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index b61bbc18a0aa..65a8cd14ee75 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -219,9 +219,11 @@ static int bch2_copygc(struct bch_fs *c) sizeof(h->data[0]), bucket_offset_cmp, NULL); - ret = bch2_move_data(c, &c->copygc_pd.rate, + ret = bch2_move_data(c, + 0, POS_MIN, + BTREE_ID_NR, POS_MAX, + &c->copygc_pd.rate, writepoint_ptr(&c->copygc_write_point), - POS_MIN, POS_MAX, copygc_pred, NULL, &move_stats); diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index c75411af4622..c83c12dbb0d2 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -239,10 +239,11 @@ static int bch2_rebalance_thread(void *arg) rebalance_work_reset(c); bch2_move_data(c, + 0, POS_MIN, + BTREE_ID_NR, POS_MAX, /* ratelimiting disabled for now */ NULL, /* &r->pd.rate, */ writepoint_ptr(&c->rebalance_write_point), - POS_MIN, POS_MAX, rebalance_pred, NULL, &r->move_stats); } -- cgit v1.2.3