From 73ba39ab9307340dc98ec3622891314bbc09cc2e Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 4 Dec 2016 12:51:53 +0800 Subject: btrfs: return the actual error value from from btrfs_uuid_tree_iterate In function btrfs_uuid_tree_iterate(), errno is assigned to variable ret on errors. However, it directly returns 0. It may be better to return ret. This patch also removes the warning, because the caller already prints a warning. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=188731 Signed-off-by: Pan Bian Reviewed-by: Omar Sandoval [ edited subject ] Signed-off-by: David Sterba --- fs/btrfs/uuid-tree.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c index 161342b73ce5..726f928238d0 100644 --- a/fs/btrfs/uuid-tree.c +++ b/fs/btrfs/uuid-tree.c @@ -352,7 +352,5 @@ skip: out: btrfs_free_path(path); - if (ret) - btrfs_warn(fs_info, "btrfs_uuid_tree_iterate failed %d", ret); - return 0; + return ret; } -- cgit v1.2.3 From aa7c8da35d1905d80e840d075f07d26ec90144b5 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 20 Dec 2016 13:28:27 -0500 Subject: btrfs: fix error handling when run_delayed_extent_op fails In __btrfs_run_delayed_refs, the error path when run_delayed_extent_op fails sets locked_ref->processing = 0 but doesn't re-increment delayed_refs->num_heads_ready. As a result, we end up triggering the WARN_ON in btrfs_select_ref_head. Fixes: d7df2c796d7 (Btrfs: attach delayed ref updates to delayed ref heads) Reported-by: Jon Nelson Signed-off-by: Jeff Mahoney Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index e97302f437a1..5366e50c84c6 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2572,7 +2572,10 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, */ if (must_insert_reserved) locked_ref->must_insert_reserved = 1; + spin_lock(&delayed_refs->lock); locked_ref->processing = 0; + delayed_refs->num_heads_ready++; + spin_unlock(&delayed_refs->lock); btrfs_debug(fs_info, "run_delayed_extent_op returned %d", ret); -- cgit v1.2.3 From d0280996437081dd12ed1e982ac8aeaa62835ec4 Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Tue, 20 Dec 2016 13:28:28 -0500 Subject: btrfs: fix locking when we put back a delayed ref that's too new In __btrfs_run_delayed_refs, when we put back a delayed ref that's too new, we have already dropped the lock on locked_ref when we set ->processing = 0. This patch keeps the lock to cover that assignment. Fixes: d7df2c796d7 (Btrfs: attach delayed ref updates to delayed ref heads) Signed-off-by: Jeff Mahoney Reviewed-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 5366e50c84c6..ac7e6713033c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2522,11 +2522,11 @@ static noinline int __btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, if (ref && ref->seq && btrfs_check_delayed_seq(fs_info, delayed_refs, ref->seq)) { spin_unlock(&locked_ref->lock); - btrfs_delayed_ref_unlock(locked_ref); spin_lock(&delayed_refs->lock); locked_ref->processing = 0; delayed_refs->num_heads_ready++; spin_unlock(&delayed_refs->lock); + btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; cond_resched(); count++; -- cgit v1.2.3 From e321f8a801d7b4c40da8005257b05b9c2b51b072 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 30 Nov 2016 16:11:04 -0800 Subject: Btrfs: use down_read_nested to make lockdep silent If @block_group is not @used_bg, it'll try to get @used_bg's lock without droping @block_group 's lock and lockdep has throwed a scary deadlock warning about it. Fix it by using down_read_nested. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/extent-tree.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index ac7e6713033c..dcd2e798767e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -7387,7 +7387,8 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group, spin_unlock(&cluster->refill_lock); - down_read(&used_bg->data_rwsem); + /* We should only have one-level nested. */ + down_read_nested(&used_bg->data_rwsem, SINGLE_DEPTH_NESTING); spin_lock(&cluster->refill_lock); if (used_bg == cluster->block_group) -- cgit v1.2.3 From 781feef7e6befafd4d9787d1f7ada1f9ccd504e4 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 30 Nov 2016 16:20:25 -0800 Subject: Btrfs: fix lockdep warning about log_mutex While checking INODE_REF/INODE_EXTREF for a corner case, we may acquire a different inode's log_mutex with holding the current inode's log_mutex, and lockdep has complained this with a possilble deadlock warning. Fix this by using mutex_lock_nested() when processing the other inode's log_mutex. Reviewed-by: Filipe Manana Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/tree-log.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index f10bf5213ed8..eeffff84f280 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -37,6 +37,7 @@ */ #define LOG_INODE_ALL 0 #define LOG_INODE_EXISTS 1 +#define LOG_OTHER_INODE 2 /* * directory trouble cases @@ -4641,7 +4642,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, if (S_ISDIR(inode->i_mode) || (!test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags) && - inode_only == LOG_INODE_EXISTS)) + inode_only >= LOG_INODE_EXISTS)) max_key.type = BTRFS_XATTR_ITEM_KEY; else max_key.type = (u8)-1; @@ -4665,7 +4666,13 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, return ret; } - mutex_lock(&BTRFS_I(inode)->log_mutex); + if (inode_only == LOG_OTHER_INODE) { + inode_only = LOG_INODE_EXISTS; + mutex_lock_nested(&BTRFS_I(inode)->log_mutex, + SINGLE_DEPTH_NESTING); + } else { + mutex_lock(&BTRFS_I(inode)->log_mutex); + } /* * a brute force approach to making sure we get the most uptodate @@ -4817,7 +4824,7 @@ again: * unpin it. */ err = btrfs_log_inode(trans, root, other_inode, - LOG_INODE_EXISTS, + LOG_OTHER_INODE, 0, LLONG_MAX, ctx); iput(other_inode); if (err) -- cgit v1.2.3 From c2931667c83ded6504b3857e99cc45b21fa496fb Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 22 Dec 2016 17:13:54 -0800 Subject: Btrfs: adjust outstanding_extents counter properly when dio write is split Currently how btrfs dio deals with split dio write is not good enough if dio write is split into several segments due to the lack of contiguous space, a large dio write like 'dd bs=1G count=1' can end up with incorrect outstanding_extents counter and endio would complain loudly with an assertion. This fixes the problem by compensating the outstanding_extents counter in inode if a large dio write gets split. Reported-by: Anand Jain Tested-by: Anand Jain Signed-off-by: Liu Bo Signed-off-by: David Sterba --- fs/btrfs/inode.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a713d9d324b0..81b9d9d0450c 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7623,11 +7623,18 @@ static void adjust_dio_outstanding_extents(struct inode *inode, * within our reservation, otherwise we need to adjust our inode * counter appropriately. */ - if (dio_data->outstanding_extents) { + if (dio_data->outstanding_extents >= num_extents) { dio_data->outstanding_extents -= num_extents; } else { + /* + * If dio write length has been split due to no large enough + * contiguous space, we need to compensate our inode counter + * appropriately. + */ + u64 num_needed = num_extents - dio_data->outstanding_extents; + spin_lock(&BTRFS_I(inode)->lock); - BTRFS_I(inode)->outstanding_extents += num_extents; + BTRFS_I(inode)->outstanding_extents += num_needed; spin_unlock(&BTRFS_I(inode)->lock); } } -- cgit v1.2.3 From ac0c7cf8be00f269f82964cf7b144ca3edc5dbc4 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 6 Jan 2017 14:12:51 +0100 Subject: btrfs: fix crash when tracepoint arguments are freed by wq callbacks Enabling btrfs tracepoints leads to instant crash, as reported. The wq callbacks could free the memory and the tracepoints started to dereference the members to get to fs_info. The proposed fix https://marc.info/?l=linux-btrfs&m=148172436722606&w=2 removed the tracepoints but we could preserve them by passing only the required data in a safe way. Fixes: bc074524e123 ("btrfs: prefix fsid to all trace events") CC: stable@vger.kernel.org # 4.8+ Reported-by: Sebastian Andrzej Siewior Reviewed-by: Qu Wenruo Signed-off-by: David Sterba --- fs/btrfs/async-thread.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 63d197724519..ff0b0be92d61 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -273,6 +273,8 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) unsigned long flags; while (1) { + void *wtag; + spin_lock_irqsave(lock, flags); if (list_empty(list)) break; @@ -299,11 +301,13 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) spin_unlock_irqrestore(lock, flags); /* - * we don't want to call the ordered free functions - * with the lock held though + * We don't want to call the ordered free functions with the + * lock held though. Save the work as tag for the trace event, + * because the callback could free the structure. */ + wtag = work; work->ordered_free(work); - trace_btrfs_all_work_done(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); } spin_unlock_irqrestore(lock, flags); } @@ -311,6 +315,7 @@ static void run_ordered_work(struct __btrfs_workqueue *wq) static void normal_work_helper(struct btrfs_work *work) { struct __btrfs_workqueue *wq; + void *wtag; int need_order = 0; /* @@ -324,6 +329,8 @@ static void normal_work_helper(struct btrfs_work *work) if (work->ordered_func) need_order = 1; wq = work->wq; + /* Safe for tracepoints in case work gets freed by the callback */ + wtag = work; trace_btrfs_work_sched(work); thresh_exec_hook(wq); @@ -333,7 +340,7 @@ static void normal_work_helper(struct btrfs_work *work) run_ordered_work(wq); } if (!need_order) - trace_btrfs_all_work_done(work); + trace_btrfs_all_work_done(wq->fs_info, wtag); } void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func, -- cgit v1.2.3 From 92a1bf76a89ad338f00eb9a2c7689a3907fbcaad Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 17 Nov 2016 15:00:50 -0800 Subject: Btrfs: add 'inode' for extent map tracepoint 'inode' is an important field for btrfs_get_extent, lets trace it. Signed-off-by: Liu Bo Reviewed-by: David Sterba Signed-off-by: David Sterba --- fs/btrfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a713d9d324b0..fab189c67eff 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7059,7 +7059,7 @@ insert: write_unlock(&em_tree->lock); out: - trace_btrfs_get_extent(root, em); + trace_btrfs_get_extent(root, inode, em); btrfs_free_path(path); if (trans) { -- cgit v1.2.3