summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <josef@toxicpanda.com>2019-06-18 10:59:18 -0400
committerDavid Sterba <dsterba@suse.com>2019-07-02 12:30:50 +0200
commit63611e738a62bfb186a61e4b16b9fb72657144e5 (patch)
tree44265c54059f11c852f135b901cc011f8971f882
parent179006688a7e888cbff39577189f2e034786d06a (diff)
downloadlinux-63611e738a62bfb186a61e4b16b9fb72657144e5.tar.gz
linux-63611e738a62bfb186a61e4b16b9fb72657144e5.tar.bz2
linux-63611e738a62bfb186a61e4b16b9fb72657144e5.zip
btrfs: run delayed iput at unlink time
We have been seeing issues in production where a cleaner script will end up unlinking a bunch of files that have pending iputs. This means they will get their final iput's run at btrfs-cleaner time and thus are not throttled, which impacts the workload. Since we are unlinking these files we can just drop the delayed iput at unlink time. We are already holding a reference to the inode so this will not be the final iput and thus is completely safe to do at this point. Doing this means we are more likely to be doing the final iput at unlink time, and thus will get the IO charged to the caller and get throttled appropriately without affecting the main workload. Reviewed-by: Nikolay Borisov <nborisov@suse.com> Signed-off-by: Josef Bacik <josef@toxicpanda.com> Signed-off-by: David Sterba <dsterba@suse.com>
-rw-r--r--fs/btrfs/inode.c40
1 files changed, 34 insertions, 6 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 89b9535cda19..525790a38f6d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3295,6 +3295,28 @@ void btrfs_add_delayed_iput(struct inode *inode)
wake_up_process(fs_info->cleaner_kthread);
}
+static void run_delayed_iput_locked(struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode)
+{
+ list_del_init(&inode->delayed_iput);
+ spin_unlock(&fs_info->delayed_iput_lock);
+ iput(&inode->vfs_inode);
+ if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
+ wake_up(&fs_info->delayed_iputs_wait);
+ spin_lock(&fs_info->delayed_iput_lock);
+}
+
+static void btrfs_run_delayed_iput(struct btrfs_fs_info *fs_info,
+ struct btrfs_inode *inode)
+{
+ if (!list_empty(&inode->delayed_iput)) {
+ spin_lock(&fs_info->delayed_iput_lock);
+ if (!list_empty(&inode->delayed_iput))
+ run_delayed_iput_locked(fs_info, inode);
+ spin_unlock(&fs_info->delayed_iput_lock);
+ }
+}
+
void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
{
@@ -3304,12 +3326,7 @@ void btrfs_run_delayed_iputs(struct btrfs_fs_info *fs_info)
inode = list_first_entry(&fs_info->delayed_iputs,
struct btrfs_inode, delayed_iput);
- list_del_init(&inode->delayed_iput);
- spin_unlock(&fs_info->delayed_iput_lock);
- iput(&inode->vfs_inode);
- if (atomic_dec_and_test(&fs_info->nr_delayed_iputs))
- wake_up(&fs_info->delayed_iputs_wait);
- spin_lock(&fs_info->delayed_iput_lock);
+ run_delayed_iput_locked(fs_info, inode);
}
spin_unlock(&fs_info->delayed_iput_lock);
}
@@ -4014,6 +4031,17 @@ skip_backref:
ret = 0;
else if (ret)
btrfs_abort_transaction(trans, ret);
+
+ /*
+ * If we have a pending delayed iput we could end up with the final iput
+ * being run in btrfs-cleaner context. If we have enough of these built
+ * up we can end up burning a lot of time in btrfs-cleaner without any
+ * way to throttle the unlinks. Since we're currently holding a ref on
+ * the inode we can run the delayed iput here without any issues as the
+ * final iput won't be done until after we drop the ref we're currently
+ * holding.
+ */
+ btrfs_run_delayed_iput(fs_info, inode);
err:
btrfs_free_path(path);
if (ret)