diff options
author | Filipe Manana <fdmanana@suse.com> | 2015-02-09 21:14:24 +0000 |
---|---|---|
committer | Chris Mason <clm@fb.com> | 2015-02-14 08:19:14 -0800 |
commit | f55985f4dda5cfb6967c17e96237f3c859076eb3 (patch) | |
tree | a821d5a175b47ecf2896ad0f0da451454c69f40e /fs/btrfs/scrub.c | |
parent | 575849ecf5d103ca9bbf0a6b9e89eba335d4e750 (diff) | |
download | linux-f55985f4dda5cfb6967c17e96237f3c859076eb3.tar.gz linux-f55985f4dda5cfb6967c17e96237f3c859076eb3.tar.bz2 linux-f55985f4dda5cfb6967c17e96237f3c859076eb3.zip |
Btrfs: scrub, fix sleep in atomic context
My previous patch "Btrfs: fix scrub race leading to use-after-free"
introduced the possibility to sleep in an atomic context, which happens
when the scrub_lock mutex is held at the time scrub_pending_bio_dec()
is called - this function can be called under an atomic context.
Chris ran into this in a debug kernel which gave the following trace:
[ 1928.950319] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:621
[ 1928.967334] in_atomic(): 1, irqs_disabled(): 0, pid: 149670, name: fsstress
[ 1928.981324] INFO: lockdep is turned off.
[ 1928.989244] CPU: 24 PID: 149670 Comm: fsstress Tainted: G W 3.19.0-rc7-mason+ #41
[ 1929.006418] Hardware name: ZTSYSTEMS Echo Ridge T4 /A9DRPF-10D, BIOS 1.07 05/10/2012
[ 1929.022207] ffffffff81a22cf8 ffff881076e03b78 ffffffff816b8dd9 ffff881076e03b78
[ 1929.037267] ffff880d8e828710 ffff881076e03ba8 ffffffff810856c4 ffff881076e03bc8
[ 1929.052315] 0000000000000000 000000000000026d ffffffff81a22cf8 ffff881076e03bd8
[ 1929.067381] Call Trace:
[ 1929.072344] <IRQ> [<ffffffff816b8dd9>] dump_stack+0x4f/0x6e
[ 1929.083968] [<ffffffff810856c4>] ___might_sleep+0x174/0x230
[ 1929.095352] [<ffffffff810857d2>] __might_sleep+0x52/0x90
[ 1929.106223] [<ffffffff816bb68f>] mutex_lock_nested+0x2f/0x3b0
[ 1929.117951] [<ffffffff810ab37d>] ? trace_hardirqs_on+0xd/0x10
[ 1929.129708] [<ffffffffa05dc838>] scrub_pending_bio_dec+0x38/0x70 [btrfs]
[ 1929.143370] [<ffffffffa05dd0e0>] scrub_parity_bio_endio+0x50/0x70 [btrfs]
[ 1929.157191] [<ffffffff812fa603>] bio_endio+0x53/0xa0
[ 1929.167382] [<ffffffffa05f96bc>] rbio_orig_end_io+0x7c/0xa0 [btrfs]
[ 1929.180161] [<ffffffffa05f97ba>] raid_write_parity_end_io+0x5a/0x80 [btrfs]
[ 1929.194318] [<ffffffff812fa603>] bio_endio+0x53/0xa0
[ 1929.204496] [<ffffffff8130401b>] blk_update_request+0x1eb/0x450
[ 1929.216569] [<ffffffff81096e58>] ? trigger_load_balance+0x78/0x500
[ 1929.229176] [<ffffffff8144c74d>] scsi_end_request+0x3d/0x1f0
[ 1929.240740] [<ffffffff8144ccac>] scsi_io_completion+0xac/0x5b0
[ 1929.252654] [<ffffffff81441c50>] scsi_finish_command+0xf0/0x150
[ 1929.264725] [<ffffffff8144d317>] scsi_softirq_done+0x147/0x170
[ 1929.276635] [<ffffffff8130ace6>] blk_done_softirq+0x86/0xa0
[ 1929.288014] [<ffffffff8105d92e>] __do_softirq+0xde/0x600
[ 1929.298885] [<ffffffff8105df6d>] irq_exit+0xbd/0xd0
(...)
Fix this by using a reference count on the scrub context structure
instead of locking the scrub_lock mutex.
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r-- | fs/btrfs/scrub.c | 39 |
1 files changed, 23 insertions, 16 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 6d6e155c8c8b..db21f17df996 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -193,6 +193,15 @@ struct scrub_ctx { */ struct btrfs_scrub_progress stat; spinlock_t stat_lock; + + /* + * Use a ref counter to avoid use-after-free issues. Scrub workers + * decrement bios_in_flight and workers_pending and then do a wakeup + * on the list_wait wait queue. We must ensure the main scrub task + * doesn't free the scrub context before or while the workers are + * doing the wakeup() call. + */ + atomic_t refs; }; struct scrub_fixup_nodatasum { @@ -297,26 +306,20 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, static void copy_nocow_pages_worker(struct btrfs_work *work); static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); +static void scrub_put_ctx(struct scrub_ctx *sctx); static void scrub_pending_bio_inc(struct scrub_ctx *sctx) { + atomic_inc(&sctx->refs); atomic_inc(&sctx->bios_in_flight); } static void scrub_pending_bio_dec(struct scrub_ctx *sctx) { - struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; - - /* - * Hold the scrub_lock while doing the wakeup to ensure the - * sctx (and its wait queue list_wait) isn't destroyed/freed - * during the wakeup. - */ - mutex_lock(&fs_info->scrub_lock); atomic_dec(&sctx->bios_in_flight); wake_up(&sctx->list_wait); - mutex_unlock(&fs_info->scrub_lock); + scrub_put_ctx(sctx); } static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) @@ -350,6 +353,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx) { struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; + atomic_inc(&sctx->refs); /* * increment scrubs_running to prevent cancel requests from * completing as long as a worker is running. we must also @@ -388,15 +392,11 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx) mutex_lock(&fs_info->scrub_lock); atomic_dec(&fs_info->scrubs_running); atomic_dec(&fs_info->scrubs_paused); + mutex_unlock(&fs_info->scrub_lock); atomic_dec(&sctx->workers_pending); wake_up(&fs_info->scrub_pause_wait); - /* - * Hold the scrub_lock while doing the wakeup to ensure the - * sctx (and its wait queue list_wait) isn't destroyed/freed - * during the wakeup. - */ wake_up(&sctx->list_wait); - mutex_unlock(&fs_info->scrub_lock); + scrub_put_ctx(sctx); } static void scrub_free_csums(struct scrub_ctx *sctx) @@ -442,6 +442,12 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) kfree(sctx); } +static void scrub_put_ctx(struct scrub_ctx *sctx) +{ + if (atomic_dec_and_test(&sctx->refs)) + scrub_free_ctx(sctx); +} + static noinline_for_stack struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) { @@ -466,6 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) sctx = kzalloc(sizeof(*sctx), GFP_NOFS); if (!sctx) goto nomem; + atomic_set(&sctx->refs, 1); sctx->is_dev_replace = is_dev_replace; sctx->pages_per_rd_bio = pages_per_rd_bio; sctx->curr = -1; @@ -3739,7 +3746,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, scrub_workers_put(fs_info); mutex_unlock(&fs_info->scrub_lock); - scrub_free_ctx(sctx); + scrub_put_ctx(sctx); return ret; } |