diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-28 14:53:31 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-01-28 14:53:31 -0800 |
commit | 81a046b18b331ed6192e6fd9ff6d12a1f18058cf (patch) | |
tree | 1d20ebe76c82cc2be603a0a4836d08ba9ec63ee0 /fs/btrfs/disk-io.c | |
parent | 511fdb78442229ac11057b4a55c3f03c253c062f (diff) | |
parent | 4e19443da1941050b346f8fc4c368aa68413bc88 (diff) | |
download | linux-81a046b18b331ed6192e6fd9ff6d12a1f18058cf.tar.gz linux-81a046b18b331ed6192e6fd9ff6d12a1f18058cf.tar.bz2 linux-81a046b18b331ed6192e6fd9ff6d12a1f18058cf.zip |
Merge tag 'for-5.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"Features, highlights:
- async discard
- "mount -o discard=async" to enable it
- freed extents are not discarded immediatelly, but grouped
together and trimmed later, with IO rate limiting
- the "sync" mode submits short extents that could have been
ignored completely by the device, for SATA prior to 3.1 the
requests are unqueued and have a big impact on performance
- the actual discard IO requests have been moved out of
transaction commit to a worker thread, improving commit latency
- IO rate and request size can be tuned by sysfs files, for now
enabled only with CONFIG_BTRFS_DEBUG as we might need to
add/delete the files and don't have a stable-ish ABI for
general use, defaults are conservative
- export device state info in sysfs, eg. missing, writeable
- no discard of extents known to be untouched on disk (eg. after
reservation)
- device stats reset is logged with process name and PID that called
the ioctl
Fixes:
- fix missing hole after hole punching and fsync when using NO_HOLES
- writeback: range cyclic mode could miss some dirty pages and lead
to OOM
- two more corner cases for metadata_uuid change after power loss
during the change
- fix infinite loop during fsync after mix of rename operations
Core changes:
- qgroup assign returns ENOTCONN when quotas not enabled, used to
return EINVAL that was confusing
- device closing does not need to allocate memory anymore
- snapshot aware code got removed, disabled for years due to
performance problems, reimplmentation will allow to select wheter
defrag breaks or does not break COW on shared extents
- tree-checker:
- check leaf chunk item size, cross check against number of
stripes
- verify location keys for DIR_ITEM, DIR_INDEX and XATTR items
- new self test for physical -> logical mapping code, used for super
block range exclusion
- assertion helpers/macros updated to avoid objtool "unreachable
code" reports on older compilers or config option combinations"
* tag 'for-5.6-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (84 commits)
btrfs: free block groups after free'ing fs trees
btrfs: Fix split-brain handling when changing FSID to metadata uuid
btrfs: Handle another split brain scenario with metadata uuid feature
btrfs: Factor out metadata_uuid code from find_fsid.
btrfs: Call find_fsid from find_fsid_inprogress
Btrfs: fix infinite loop during fsync after rename operations
btrfs: set trans->drity in btrfs_commit_transaction
btrfs: drop log root for dropped roots
btrfs: sysfs, add devid/dev_state kobject and device attributes
btrfs: Refactor btrfs_rmap_block to improve readability
btrfs: Add self-tests for btrfs_rmap_block
btrfs: selftests: Add support for dummy devices
btrfs: Move and unexport btrfs_rmap_block
btrfs: separate definition of assertion failure handlers
btrfs: device stats, log when stats are zeroed
btrfs: fix improper setting of scanned for range cyclic write cache pages
btrfs: safely advance counter when looking up bio csums
btrfs: remove unused member btrfs_device::work
btrfs: remove unnecessary wrapper get_alloc_profile
btrfs: add correction to handle -1 edge case in async discard
...
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 37 |
1 files changed, 24 insertions, 13 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e0edfdc9c82b..aea48d6ddc0c 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -41,6 +41,7 @@ #include "tree-checker.h" #include "ref-verify.h" #include "block-group.h" +#include "discard.h" #define BTRFS_SUPER_FLAG_SUPP (BTRFS_HEADER_FLAG_WRITTEN |\ BTRFS_HEADER_FLAG_RELOC |\ @@ -202,8 +203,8 @@ void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, * that covers the entire device */ struct extent_map *btree_get_extent(struct btrfs_inode *inode, - struct page *page, size_t pg_offset, u64 start, u64 len, - int create) + struct page *page, size_t pg_offset, + u64 start, u64 len) { struct extent_map_tree *em_tree = &inode->extent_tree; struct extent_map *em; @@ -1953,6 +1954,8 @@ static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info) btrfs_destroy_workqueue(fs_info->readahead_workers); btrfs_destroy_workqueue(fs_info->flush_workers); btrfs_destroy_workqueue(fs_info->qgroup_rescan_workers); + if (fs_info->discard_ctl.discard_workers) + destroy_workqueue(fs_info->discard_ctl.discard_workers); /* * Now that all other work queues are destroyed, we can safely destroy * the queues used for metadata I/O, since tasks from those other work @@ -2148,6 +2151,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, max_active, 2); fs_info->qgroup_rescan_workers = btrfs_alloc_workqueue(fs_info, "qgroup-rescan", flags, 1, 0); + fs_info->discard_ctl.discard_workers = + alloc_workqueue("btrfs_discard", WQ_UNBOUND | WQ_FREEZABLE, 1); if (!(fs_info->workers && fs_info->delalloc_workers && fs_info->flush_workers && @@ -2158,7 +2163,8 @@ static int btrfs_init_workqueues(struct btrfs_fs_info *fs_info, fs_info->endio_freespace_worker && fs_info->rmw_workers && fs_info->caching_workers && fs_info->readahead_workers && fs_info->fixup_workers && fs_info->delayed_workers && - fs_info->qgroup_rescan_workers)) { + fs_info->qgroup_rescan_workers && + fs_info->discard_ctl.discard_workers)) { return -ENOMEM; } @@ -2792,6 +2798,7 @@ int __cold open_ctree(struct super_block *sb, btrfs_init_dev_replace_locks(fs_info); btrfs_init_qgroup(fs_info); + btrfs_discard_init(fs_info); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); btrfs_init_free_cluster(&fs_info->data_alloc_cluster); @@ -3082,20 +3089,13 @@ int __cold open_ctree(struct super_block *sb, btrfs_free_extra_devids(fs_devices, 1); - ret = btrfs_sysfs_add_fsid(fs_devices, NULL); + ret = btrfs_sysfs_add_fsid(fs_devices); if (ret) { btrfs_err(fs_info, "failed to init sysfs fsid interface: %d", ret); goto fail_block_groups; } - ret = btrfs_sysfs_add_device(fs_devices); - if (ret) { - btrfs_err(fs_info, "failed to init sysfs device interface: %d", - ret); - goto fail_fsdev_sysfs; - } - ret = btrfs_sysfs_add_mounted(fs_info); if (ret) { btrfs_err(fs_info, "failed to init sysfs interface: %d", ret); @@ -3262,6 +3262,7 @@ int __cold open_ctree(struct super_block *sb, } btrfs_qgroup_rescan_resume(fs_info); + btrfs_discard_resume(fs_info); if (!fs_info->uuid_root) { btrfs_info(fs_info, "creating UUID tree"); @@ -3978,6 +3979,9 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) cancel_work_sync(&fs_info->async_reclaim_work); + /* Cancel or finish ongoing discard work */ + btrfs_discard_cleanup(fs_info); + if (!sb_rdonly(fs_info->sb)) { /* * The cleaner kthread is stopped, so do one final pass over @@ -4026,11 +4030,18 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info) invalidate_inode_pages2(fs_info->btree_inode->i_mapping); btrfs_stop_all_workers(fs_info); - btrfs_free_block_groups(fs_info); - clear_bit(BTRFS_FS_OPEN, &fs_info->flags); free_root_pointers(fs_info, true); + /* + * We must free the block groups after dropping the fs_roots as we could + * have had an IO error and have left over tree log blocks that aren't + * cleaned up until the fs roots are freed. This makes the block group + * accounting appear to be wrong because there's pending reserved bytes, + * so make sure we do the block group cleanup afterwards. + */ + btrfs_free_block_groups(fs_info); + iput(fs_info->btree_inode); #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY |