summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-01-28 14:53:31 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-01-28 14:53:31 -0800
commit81a046b18b331ed6192e6fd9ff6d12a1f18058cf (patch)
tree1d20ebe76c82cc2be603a0a4836d08ba9ec63ee0 /fs/btrfs/disk-io.c
parent511fdb78442229ac11057b4a55c3f03c253c062f (diff)
parent4e19443da1941050b346f8fc4c368aa68413bc88 (diff)
downloadlinux-81a046b18b331ed6192e6fd9ff6d12a1f18058cf.tar.gz
linux-81a046b18b331ed6192e6fd9ff6d12a1f18058cf.tar.bz2
linux-81a046b18b331ed6192e6fd9ff6d12a1f18058cf.zip
Merge tag 'for-5.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba: "Features, highlights: - async discard - "mount -o discard=async" to enable it - freed extents are not discarded immediatelly, but grouped together and trimmed later, with IO rate limiting - the "sync" mode submits short extents that could have been ignored completely by the device, for SATA prior to 3.1 the requests are unqueued and have a big impact on performance - the actual discard IO requests have been moved out of transaction commit to a worker thread, improving commit latency - IO rate and request size can be tuned by sysfs files, for now enabled only with CONFIG_BTRFS_DEBUG as we might need to add/delete the files and don't have a stable-ish ABI for general use, defaults are conservative - export device state info in sysfs, eg. missing, writeable - no discard of extents known to be untouched on disk (eg. after reservation) - device stats reset is logged with process name and PID that called the ioctl Fixes: - fix missing hole after hole punching and fsync when using NO_HOLES - writeback: range cyclic mode could miss some dirty pages and lead to OOM - two more corner cases for metadata_uuid change after power loss during the change - fix infinite loop during fsync after mix of rename operations Core changes: - qgroup assign returns ENOTCONN when quotas not enabled, used to return EINVAL that was confusing - device closing does not need to allocate memory anymore - snapshot aware code got removed, disabled for years due to performance problems, reimplmentation will allow to select wheter defrag breaks or does not break COW on shared extents - tree-checker: - check leaf chunk item size, cross check against number of stripes - verify location keys for DIR_ITEM, DIR_INDEX and XATTR items - new self test for physical -> logical mapping code, used for super block range exclusion - assertion helpers/macros updated to avoid objtool "unreachable code" reports on older compilers or config option combinations" * tag 'for-5.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (84 commits) btrfs: free block groups after free'ing fs trees btrfs: Fix split-brain handling when changing FSID to metadata uuid btrfs: Handle another split brain scenario with metadata uuid feature btrfs: Factor out metadata_uuid code from find_fsid. btrfs: Call find_fsid from find_fsid_inprogress Btrfs: fix infinite loop during fsync after rename operations btrfs: set trans->drity in btrfs_commit_transaction btrfs: drop log root for dropped roots btrfs: sysfs, add devid/dev_state kobject and device attributes btrfs: Refactor btrfs_rmap_block to improve readability btrfs: Add self-tests for btrfs_rmap_block btrfs: selftests: Add support for dummy devices btrfs: Move and unexport btrfs_rmap_block btrfs: separate definition of assertion failure handlers btrfs: device stats, log when stats are zeroed btrfs: fix improper setting of scanned for range cyclic write cache pages btrfs: safely advance counter when looking up bio csums btrfs: remove unused member btrfs_device::work btrfs: remove unnecessary wrapper get_alloc_profile btrfs: add correction to handle -1 edge case in async discard ...
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c37
1 files changed, 24 insertions, 13 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index e0edfdc9c82b..aea48d6ddc0c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -41,6 +41,7 @@
#include "tree-checker.h"
#include "ref-verify.h"
#include "block-group.h"
+#include "discard.h"
#define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\
BTRFS_HEADER_FLAG_RELOC |\
@@ -202,8 +203,8 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb,
* that covers the entire device
*/
struct extent_map *btree_get_extent(struct btrfs_inode *inode,
- struct page *page, size_t pg_offset, u64 start, u64 len,
- int create)
+ struct page *page, size_t pg_offset,
+ u64 start, u64 len)
{
struct extent_map_tree *em_tree = &inode->extent_tree;
struct extent_map *em;
@@ -1953,6 +1954,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
btrfs_destroy_workqueue(fs_info->readahead_workers);
btrfs_destroy_workqueue(fs_info->flush_workers);
btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers);
+ if (fs_info->discard_ctl.discard_workers)
+ destroy_workqueue(fs_info->discard_ctl.discard_workers);
/*
* Now that all other work queues are destroyed, we can safely destroy
* the queues used for metadata I/O, since tasks from those other work
@@ -2148,6 +2151,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
max_active, 2);
fs_info->qgroup_rescan_workers =
btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0);
+ fs_info->discard_ctl.discard_workers =
+ alloc_workqueue("btrfs_discard", WQ_UNBOUND | WQ_FREEZABLE, 1);
if (!(fs_info->workers && fs_info->delalloc_workers &&
fs_info->flush_workers &&
@@ -2158,7 +2163,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info,
fs_info->endio_freespace_worker && fs_info->rmw_workers &&
fs_info->caching_workers && fs_info->readahead_workers &&
fs_info->fixup_workers && fs_info->delayed_workers &&
- fs_info->qgroup_rescan_workers)) {
+ fs_info->qgroup_rescan_workers &&
+ fs_info->discard_ctl.discard_workers)) {
return -ENOMEM;
}
@@ -2792,6 +2798,7 @@ int __cold open_ctree(struct super_block *sb,
btrfs_init_dev_replace_locks(fs_info);
btrfs_init_qgroup(fs_info);
+ btrfs_discard_init(fs_info);
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -3082,20 +3089,13 @@ int __cold open_ctree(struct super_block *sb,
btrfs_free_extra_devids(fs_devices, 1);
- ret = btrfs_sysfs_add_fsid(fs_devices, NULL);
+ ret = btrfs_sysfs_add_fsid(fs_devices);
if (ret) {
btrfs_err(fs_info, "failed to init sysfs fsid interface: %d",
ret);
goto fail_block_groups;
}
- ret = btrfs_sysfs_add_device(fs_devices);
- if (ret) {
- btrfs_err(fs_info, "failed to init sysfs device interface: %d",
- ret);
- goto fail_fsdev_sysfs;
- }
-
ret = btrfs_sysfs_add_mounted(fs_info);
if (ret) {
btrfs_err(fs_info, "failed to init sysfs interface: %d", ret);
@@ -3262,6 +3262,7 @@ int __cold open_ctree(struct super_block *sb,
}
btrfs_qgroup_rescan_resume(fs_info);
+ btrfs_discard_resume(fs_info);
if (!fs_info->uuid_root) {
btrfs_info(fs_info, "creating UUID tree");
@@ -3978,6 +3979,9 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
cancel_work_sync(&fs_info->async_reclaim_work);
+ /* Cancel or finish ongoing discard work */
+ btrfs_discard_cleanup(fs_info);
+
if (!sb_rdonly(fs_info->sb)) {
/*
* The cleaner kthread is stopped, so do one final pass over
@@ -4026,11 +4030,18 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
btrfs_stop_all_workers(fs_info);
- btrfs_free_block_groups(fs_info);
-
clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
free_root_pointers(fs_info, true);
+ /*
+ * We must free the block groups after dropping the fs_roots as we could
+ * have had an IO error and have left over tree log blocks that aren't
+ * cleaned up until the fs roots are freed. This makes the block group
+ * accounting appear to be wrong because there's pending reserved bytes,
+ * so make sure we do the block group cleanup afterwards.
+ */
+ btrfs_free_block_groups(fs_info);
+
iput(fs_info->btree_inode);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY