summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/btrfs/backref.c122
-rw-r--r--fs/btrfs/backref.h17
-rw-r--r--fs/btrfs/ctree.h18
-rw-r--r--fs/btrfs/extent-tree.c10
-rw-r--r--fs/btrfs/extent_io.c11
5 files changed, 170 insertions, 8 deletions
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 7766a6717a7d..e0ff71159071 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1512,6 +1512,105 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
}
/*
+ * The caller has joined a transaction or is holding a read lock on the
+ * fs_info->commit_root_sem semaphore, so no need to worry about the root's last
+ * snapshot field changing while updating or checking the cache.
+ */
+static bool lookup_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
+ struct btrfs_root *root,
+ u64 bytenr, int level, bool *is_shared)
+{
+ struct btrfs_backref_shared_cache_entry *entry;
+
+ if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
+ return false;
+
+ /*
+ * Level -1 is used for the data extent, which is not reliable to cache
+ * because its reference count can increase or decrease without us
+ * realizing. We cache results only for extent buffers that lead from
+ * the root node down to the leaf with the file extent item.
+ */
+ ASSERT(level >= 0);
+
+ entry = &cache->entries[level];
+
+ /* Unused cache entry or being used for some other extent buffer. */
+ if (entry->bytenr != bytenr)
+ return false;
+
+ /*
+ * We cached a false result, but the last snapshot generation of the
+ * root changed, so we now have a snapshot. Don't trust the result.
+ */
+ if (!entry->is_shared &&
+ entry->gen != btrfs_root_last_snapshot(&root->root_item))
+ return false;
+
+ /*
+ * If we cached a true result and the last generation used for dropping
+ * a root changed, we can not trust the result, because the dropped root
+ * could be a snapshot sharing this extent buffer.
+ */
+ if (entry->is_shared &&
+ entry->gen != btrfs_get_last_root_drop_gen(root->fs_info))
+ return false;
+
+ *is_shared = entry->is_shared;
+
+ return true;
+}
+
+/*
+ * The caller has joined a transaction or is holding a read lock on the
+ * fs_info->commit_root_sem semaphore, so no need to worry about the root's last
+ * snapshot field changing while updating or checking the cache.
+ */
+static void store_backref_shared_cache(struct btrfs_backref_shared_cache *cache,
+ struct btrfs_root *root,
+ u64 bytenr, int level, bool is_shared)
+{
+ struct btrfs_backref_shared_cache_entry *entry;
+ u64 gen;
+
+ if (WARN_ON_ONCE(level >= BTRFS_MAX_LEVEL))
+ return;
+
+ /*
+ * Level -1 is used for the data extent, which is not reliable to cache
+ * because its reference count can increase or decrease without us
+ * realizing. We cache results only for extent buffers that lead from
+ * the root node down to the leaf with the file extent item.
+ */
+ ASSERT(level >= 0);
+
+ if (is_shared)
+ gen = btrfs_get_last_root_drop_gen(root->fs_info);
+ else
+ gen = btrfs_root_last_snapshot(&root->root_item);
+
+ entry = &cache->entries[level];
+ entry->bytenr = bytenr;
+ entry->is_shared = is_shared;
+ entry->gen = gen;
+
+ /*
+ * If we found an extent buffer is shared, set the cache result for all
+ * extent buffers below it to true. As nodes in the path are COWed,
+ * their sharedness is moved to their children, and if a leaf is COWed,
+ * then the sharedness of a data extent becomes direct, the refcount of
+ * data extent is increased in the extent item at the extent tree.
+ */
+ if (is_shared) {
+ for (int i = 0; i < level; i++) {
+ entry = &cache->entries[i];
+ entry->is_shared = is_shared;
+ entry->gen = gen;
+ }
+ }
+}
+
+/*
* Check if a data extent is shared or not.
*
* @root: root inode belongs to
@@ -1519,6 +1618,7 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
* @bytenr: logical bytenr of the extent we are checking
* @roots: list of roots this extent is shared among
* @tmp: temporary list used for iteration
+ * @cache: a backref lookup result cache
*
* btrfs_is_data_extent_shared uses the backref walking code but will short
* circuit as soon as it finds a root or inode that doesn't match the
@@ -1532,7 +1632,8 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans,
* Return: 0 if extent is not shared, 1 if it is shared, < 0 on error.
*/
int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
- struct ulist *roots, struct ulist *tmp)
+ struct ulist *roots, struct ulist *tmp,
+ struct btrfs_backref_shared_cache *cache)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_trans_handle *trans;
@@ -1545,6 +1646,7 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
.inum = inum,
.share_count = 0,
};
+ int level;
ulist_init(roots);
ulist_init(tmp);
@@ -1561,22 +1663,40 @@ int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
btrfs_get_tree_mod_seq(fs_info, &elem);
}
+ /* -1 means we are in the bytenr of the data extent. */
+ level = -1;
ULIST_ITER_INIT(&uiter);
while (1) {
+ bool is_shared;
+ bool cached;
+
ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp,
roots, NULL, &shared, false);
if (ret == BACKREF_FOUND_SHARED) {
/* this is the only condition under which we return 1 */
ret = 1;
+ if (level >= 0)
+ store_backref_shared_cache(cache, root, bytenr,
+ level, true);
break;
}
if (ret < 0 && ret != -ENOENT)
break;
ret = 0;
+ if (level >= 0)
+ store_backref_shared_cache(cache, root, bytenr,
+ level, false);
node = ulist_next(tmp, &uiter);
if (!node)
break;
bytenr = node->val;
+ level++;
+ cached = lookup_backref_shared_cache(cache, root, bytenr, level,
+ &is_shared);
+ if (cached) {
+ ret = (is_shared ? 1 : 0);
+ break;
+ }
shared.share_count = 0;
cond_resched();
}
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 08354394b1bb..fec77b30a249 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -17,6 +17,20 @@ struct inode_fs_paths {
struct btrfs_data_container *fspath;
};
+struct btrfs_backref_shared_cache_entry {
+ u64 bytenr;
+ u64 gen;
+ bool is_shared;
+};
+
+struct btrfs_backref_shared_cache {
+ /*
+ * A path from a root to a leaf that has a file extent item pointing to
+ * a given data extent should never exceed the maximum b+tree height.
+ */
+ struct btrfs_backref_shared_cache_entry entries[BTRFS_MAX_LEVEL];
+};
+
typedef int (iterate_extent_inodes_t)(u64 inum, u64 offset, u64 root,
void *ctx);
@@ -63,7 +77,8 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
struct btrfs_inode_extref **ret_extref,
u64 *found_off);
int btrfs_is_data_extent_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
- struct ulist *roots, struct ulist *tmp);
+ struct ulist *roots, struct ulist *tmp,
+ struct btrfs_backref_shared_cache *cache);
int __init btrfs_prelim_ref_init(void);
void __cold btrfs_prelim_ref_exit(void);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 7007c7974a2e..cb786a25a926 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1090,6 +1090,13 @@ struct btrfs_fs_info {
struct btrfs_commit_stats commit_stats;
/*
+ * Last generation where we dropped a non-relocation root.
+ * Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen()
+ * to change it and to read it, respectively.
+ */
+ u64 last_root_drop_gen;
+
+ /*
* Annotations for transaction events (structures are empty when
* compiled without lockdep).
*/
@@ -1113,6 +1120,17 @@ struct btrfs_fs_info {
#endif
};
+static inline void btrfs_set_last_root_drop_gen(struct btrfs_fs_info *fs_info,
+ u64 gen)
+{
+ WRITE_ONCE(fs_info->last_root_drop_gen, gen);
+}
+
+static inline u64 btrfs_get_last_root_drop_gen(const struct btrfs_fs_info *fs_info)
+{
+ return READ_ONCE(fs_info->last_root_drop_gen);
+}
+
static inline struct btrfs_fs_info *btrfs_sb(struct super_block *sb)
{
return sb->s_fs_info;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bcd0e72cded3..9818285dface 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5635,6 +5635,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
*/
int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
{
+ const bool is_reloc_root = (root->root_key.objectid ==
+ BTRFS_TREE_RELOC_OBJECTID);
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_path *path;
struct btrfs_trans_handle *trans;
@@ -5794,6 +5796,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
goto out_end_trans;
}
+ if (!is_reloc_root)
+ btrfs_set_last_root_drop_gen(fs_info, trans->transid);
+
btrfs_end_transaction_throttle(trans);
if (!for_reloc && btrfs_need_cleaner_sleep(fs_info)) {
btrfs_debug(fs_info,
@@ -5828,7 +5833,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
goto out_end_trans;
}
- if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
+ if (!is_reloc_root) {
ret = btrfs_find_root(tree_root, &root->root_key, path,
NULL, NULL);
if (ret < 0) {
@@ -5860,6 +5865,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref, int for_reloc)
btrfs_put_root(root);
root_dropped = true;
out_end_trans:
+ if (!is_reloc_root)
+ btrfs_set_last_root_drop_gen(fs_info, trans->transid);
+
btrfs_end_transaction_throttle(trans);
out_free:
kfree(wc);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index cf939f85ca47..618c6890699a 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -5448,6 +5448,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
struct btrfs_path *path;
struct btrfs_root *root = inode->root;
struct fiemap_cache cache = { 0 };
+ struct btrfs_backref_shared_cache *backref_cache;
struct ulist *roots;
struct ulist *tmp_ulist;
int end = 0;
@@ -5455,13 +5456,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
u64 em_len = 0;
u64 em_end = 0;
+ backref_cache = kzalloc(sizeof(*backref_cache), GFP_KERNEL);
path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
-
roots = ulist_alloc(GFP_KERNEL);
tmp_ulist = ulist_alloc(GFP_KERNEL);
- if (!roots || !tmp_ulist) {
+ if (!backref_cache || !path || !roots || !tmp_ulist) {
ret = -ENOMEM;
goto out_free_ulist;
}
@@ -5587,7 +5586,8 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo,
*/
ret = btrfs_is_data_extent_shared(root, btrfs_ino(inode),
bytenr, roots,
- tmp_ulist);
+ tmp_ulist,
+ backref_cache);
if (ret < 0)
goto out_free;
if (ret)
@@ -5639,6 +5639,7 @@ out:
&cached_state);
out_free_ulist:
+ kfree(backref_cache);
btrfs_free_path(path);
ulist_free(roots);
ulist_free(tmp_ulist);