summaryrefslogtreecommitdiffstats
path: root/fs/bcachefs/extents.c
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2023-10-20 13:33:14 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-11-01 21:11:05 -0400
commitfb3f57bb1177ae4d5550bbb431f90ebf277329e8 (patch)
treee55c5b2899b5885cdd660914848eec727fcd6f13 /fs/bcachefs/extents.c
parent55c11a159d3ca4ca7f9d5c1275d0768474b12195 (diff)
downloadlinux-fb3f57bb1177ae4d5550bbb431f90ebf277329e8.tar.gz
linux-fb3f57bb1177ae4d5550bbb431f90ebf277329e8.tar.bz2
linux-fb3f57bb1177ae4d5550bbb431f90ebf277329e8.zip
bcachefs: rebalance_work
This adds a new btree, rebalance_work, to eliminate scanning required for finding extents that need work done on them in the background - i.e. for the background_target and background_compression options. rebalance_work is a bitset btree, where a KEY_TYPE_set corresponds to an extent in the extents or reflink btree at the same pos. A new extent field is added, bch_extent_rebalance, which indicates that this extent has work that needs to be done in the background - and which options to use. This allows per-inode options to be propagated to indirect extents - at least in some circumstances. In this patch, changing IO options on a file will not propagate the new options to indirect extents pointed to by that file. Updating (setting/clearing) the rebalance_work btree is done by the extent trigger, which looks at the bch_extent_rebalance field. Scanning is still requrired after changing IO path options - either just for a given inode, or for the whole filesystem. We indicate that scanning is required by adding a KEY_TYPE_cookie key to the rebalance_work btree: the cookie counter is so that we can detect that scanning is still required when an option has been flipped mid-way through an existing scan. Future possible work: - Propagate options to indirect extents when being changed - Add other IO path options - nr_replicas, ec, to rebalance_work so they can be applied in the background when they change - Add a counter, for bcachefs fs usage output, showing the pending amount of rebalance work: we'll probably want to do this after the disk space accounting rewrite (moving it to a new btree) Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/extents.c')
-rw-r--r--fs/bcachefs/extents.c155
1 files changed, 142 insertions, 13 deletions
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index ccb62fa22b04..0c60d49c3599 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -13,6 +13,7 @@
#include "btree_iter.h"
#include "buckets.h"
#include "checksum.h"
+#include "compress.h"
#include "debug.h"
#include "disk_groups.h"
#include "error.h"
@@ -757,18 +758,6 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
return i;
}
-static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry)
-{
- union bch_extent_entry *next = extent_entry_next(entry);
-
- /* stripes have ptrs, but their layout doesn't work with this code */
- BUG_ON(k.k->type == KEY_TYPE_stripe);
-
- memmove_u64s_down(entry, next,
- (u64 *) bkey_val_end(k) - (u64 *) next);
- k.k->u64s -= (u64 *) next - (u64 *) entry;
-}
-
/*
* Returns pointer to the next entry after the one being dropped:
*/
@@ -1048,6 +1037,18 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c,
(u64) ec->idx, ec->block);
break;
}
+ case BCH_EXTENT_ENTRY_rebalance: {
+ const struct bch_extent_rebalance *r = &entry->rebalance;
+
+ prt_str(out, "rebalance: target ");
+ if (c)
+ bch2_target_to_text(out, c, r->target);
+ else
+ prt_printf(out, "%u", r->target);
+ prt_str(out, " compression ");
+ bch2_compression_opt_to_text(out, r->compression);
+ break;
+ }
default:
prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry));
return;
@@ -1223,9 +1224,18 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k,
}
have_ec = true;
break;
- case BCH_EXTENT_ENTRY_rebalance:
+ case BCH_EXTENT_ENTRY_rebalance: {
+ const struct bch_extent_rebalance *r = &entry->rebalance;
+
+ if (!bch2_compression_opt_valid(r->compression)) {
+ struct bch_compression_opt opt = __bch2_compression_decode(r->compression);
+ prt_printf(err, "invalid compression opt %u:%u",
+ opt.type, opt.level);
+ return -BCH_ERR_invalid_bkey;
+ }
break;
}
+ }
}
if (!nr_ptrs) {
@@ -1289,6 +1299,125 @@ void bch2_ptr_swab(struct bkey_s k)
}
}
+const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ const union bch_extent_entry *entry;
+
+ bkey_extent_entry_for_each(ptrs, entry)
+ if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance)
+ return &entry->rebalance;
+
+ return NULL;
+}
+
+unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k,
+ unsigned target, unsigned compression)
+{
+ struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+ unsigned rewrite_ptrs = 0;
+
+ if (compression) {
+ unsigned compression_type = bch2_compression_opt_to_type(compression);
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned i = 0;
+
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible) {
+ rewrite_ptrs = 0;
+ goto incompressible;
+ }
+
+ if (!p.ptr.cached && p.crc.compression_type != compression_type)
+ rewrite_ptrs |= 1U << i;
+ i++;
+ }
+ }
+incompressible:
+ if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) {
+ const struct bch_extent_ptr *ptr;
+ unsigned i = 0;
+
+ bkey_for_each_ptr(ptrs, ptr) {
+ if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target))
+ rewrite_ptrs |= 1U << i;
+ i++;
+ }
+ }
+
+ return rewrite_ptrs;
+}
+
+bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k)
+{
+ const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k);
+
+ /*
+ * If it's an indirect extent, we don't delete the rebalance entry when
+ * done so that we know what options were applied - check if it still
+ * needs work done:
+ */
+ if (r &&
+ k.k->type == KEY_TYPE_reflink_v &&
+ !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression))
+ r = NULL;
+
+ return r != NULL;
+}
+
+int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k,
+ unsigned target, unsigned compression)
+{
+ struct bkey_s k = bkey_i_to_s(_k);
+ struct bch_extent_rebalance *r;
+ bool needs_rebalance;
+
+ if (!bkey_extent_is_direct_data(k.k))
+ return 0;
+
+ /* get existing rebalance entry: */
+ r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c);
+ if (r) {
+ if (k.k->type == KEY_TYPE_reflink_v) {
+ /*
+ * indirect extents: existing options take precedence,
+ * so that we don't move extents back and forth if
+ * they're referenced by different inodes with different
+ * options:
+ */
+ if (r->target)
+ target = r->target;
+ if (r->compression)
+ compression = r->compression;
+ }
+
+ r->target = target;
+ r->compression = compression;
+ }
+
+ needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression);
+
+ if (needs_rebalance && !r) {
+ union bch_extent_entry *new = bkey_val_end(k);
+
+ new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance;
+ new->rebalance.compression = compression;
+ new->rebalance.target = target;
+ new->rebalance.unused = 0;
+ k.k->u64s += extent_entry_u64s(new);
+ } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) {
+ /*
+ * For indirect extents, don't delete the rebalance entry when
+ * we're finished so that we know we specifically moved it or
+ * compressed it to its current location/compression type
+ */
+ extent_entry_drop(k, (union bch_extent_entry *) r);
+ }
+
+ return 0;
+}
+
/* Generic extent code: */
int bch2_cut_front_s(struct bpos where, struct bkey_s k)