summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
authorJan Schmidt <list.btrfs@jan-o-sch.net>2013-04-24 16:57:33 +0000
committerJosef Bacik <jbacik@fusionio.com>2013-05-06 15:55:17 -0400
commitfc36ed7e0b13955ba66fc56dc5067e67ac105150 (patch)
treedea2b4ebd056c6e87e5dea53c8df20e0509b88db /fs/btrfs/ctree.c
parent6d49ba1b47b9c6822d08f90af6f1a2d8ca1cf533 (diff)
downloadlinux-stable-fc36ed7e0b13955ba66fc56dc5067e67ac105150.tar.gz
linux-stable-fc36ed7e0b13955ba66fc56dc5067e67ac105150.tar.bz2
linux-stable-fc36ed7e0b13955ba66fc56dc5067e67ac105150.zip
Btrfs: separate sequence numbers for delayed ref tracking and tree mod log
Sequence numbers for delayed refs have been introduced in the first version of the qgroup patch set. To solve the problem of find_all_roots on a busy file system, the tree mod log was introduced. The sequence numbers for that were simply shared between those two users. However, at one point in qgroup's quota accounting, there's a statement accessing the previous sequence number, that's still just doing (seq - 1) just as it would have to in the very first version. To satisfy that requirement, this patch makes the sequence number counter 64 bit and splits it into a major part (used for qgroup sequence number counting) and a minor part (incremented for each tree modification in the log). This enables us to go exactly one major step backwards, as required for qgroups, while still incrementing the sequence counter for tree mod log insertions to keep track of their order. Keeping them in a single variable means there's no need to change all the code dealing with comparisons of two sequence numbers. The sequence number is reset to 0 on commit (not new in this patch), which ensures we won't overflow the two 32 bit counters. Without this fix, the qgroup tracking can occasionally go wrong and WARN_ONs from the tree mod log code may happen. Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net> Signed-off-by: Josef Bacik <jbacik@fusionio.com>
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c47
1 files changed, 44 insertions, 3 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 2bc34408872d..a17d9991c333 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -361,6 +361,44 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
}
/*
+ * Increment the upper half of tree_mod_seq, set lower half zero.
+ *
+ * Must be called with fs_info->tree_mod_seq_lock held.
+ */
+static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
+{
+ u64 seq = atomic64_read(&fs_info->tree_mod_seq);
+ seq &= 0xffffffff00000000ull;
+ seq += 1ull << 32;
+ atomic64_set(&fs_info->tree_mod_seq, seq);
+ return seq;
+}
+
+/*
+ * Increment the lower half of tree_mod_seq.
+ *
+ * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
+ * are generated should not technically require a spin lock here. (Rationale:
+ * incrementing the minor while incrementing the major seq number is between its
+ * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
+ * just returns a unique sequence number as usual.) We have decided to leave
+ * that requirement in here and rethink it once we notice it really imposes a
+ * problem on some workload.
+ */
+static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info)
+{
+ return atomic64_inc_return(&fs_info->tree_mod_seq);
+}
+
+/*
+ * return the last minor in the previous major tree_mod_seq number
+ */
+u64 btrfs_tree_mod_seq_prev(u64 seq)
+{
+ return (seq & 0xffffffff00000000ull) - 1ull;
+}
+
+/*
* This adds a new blocker to the tree mod log's blocker list if the @elem
* passed does not already have a sequence number set. So when a caller expects
* to record tree modifications, it should ensure to set elem->seq to zero
@@ -376,10 +414,10 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
tree_mod_log_write_lock(fs_info);
spin_lock(&fs_info->tree_mod_seq_lock);
if (!elem->seq) {
- elem->seq = btrfs_inc_tree_mod_seq(fs_info);
+ elem->seq = btrfs_inc_tree_mod_seq_major(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
- seq = btrfs_inc_tree_mod_seq(fs_info);
+ seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info);
@@ -524,7 +562,10 @@ static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
if (!tm)
return -ENOMEM;
- tm->seq = btrfs_inc_tree_mod_seq(fs_info);
+ spin_lock(&fs_info->tree_mod_seq_lock);
+ tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
+ spin_unlock(&fs_info->tree_mod_seq_lock);
+
return tm->seq;
}