From cc27b0c78c79680d128dbac79de0d40556d041bb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 5 Jun 2017 16:49:39 +1000 Subject: md: fix deadlock between mddev_suspend() and md_write_start() If mddev_suspend() races with md_write_start() we can deadlock with mddev_suspend() waiting for the request that is currently in md_write_start() to complete the ->make_request() call, and md_write_start() waiting for the metadata to be updated to mark the array as 'dirty'. As metadata updates done by md_check_recovery() only happen then the mddev_lock() can be claimed, and as mddev_suspend() is often called with the lock held, these threads wait indefinitely for each other. We fix this by having md_write_start() abort if mddev_suspend() is happening, and ->make_request() aborts if md_write_start() aborted. md_make_request() can detect this abort, decrease the ->active_io count, and wait for mddev_suspend(). Reported-by: Nix Fix: 68866e425be2(MD: no sync IO while suspended) Cc: stable@vger.kernel.org Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- drivers/md/md.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 87edc342ccb3..d7847014821a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -277,7 +277,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) bio_endio(bio); return BLK_QC_T_NONE; } - smp_rmb(); /* Ensure implications of 'active' are visible */ +check_suspended: rcu_read_lock(); if (mddev->suspended) { DEFINE_WAIT(__wait); @@ -302,7 +302,11 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio) sectors = bio_sectors(bio); /* bio could be mergeable after passing to underlayer */ bio->bi_opf &= ~REQ_NOMERGE; - mddev->pers->make_request(mddev, bio); + if (!mddev->pers->make_request(mddev, bio)) { + atomic_dec(&mddev->active_io); + wake_up(&mddev->sb_wait); + goto check_suspended; + } cpu = part_stat_lock(); part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); @@ -327,6 +331,7 @@ void mddev_suspend(struct mddev *mddev) if (mddev->suspended++) return; synchronize_rcu(); + wake_up(&mddev->sb_wait); wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0); mddev->pers->quiesce(mddev, 1); @@ -7950,12 +7955,14 @@ EXPORT_SYMBOL(md_done_sync); * If we need to update some array metadata (e.g. 'active' flag * in superblock) before writing, schedule a superblock update * and wait for it to complete. + * A return value of 'false' means that the write wasn't recorded + * and cannot proceed as the array is being suspend. */ -void md_write_start(struct mddev *mddev, struct bio *bi) +bool md_write_start(struct mddev *mddev, struct bio *bi) { int did_change = 0; if (bio_data_dir(bi) != WRITE) - return; + return true; BUG_ON(mddev->ro == 1); if (mddev->ro == 2) { @@ -7987,7 +7994,12 @@ void md_write_start(struct mddev *mddev, struct bio *bi) if (did_change) sysfs_notify_dirent_safe(mddev->sysfs_state); wait_event(mddev->sb_wait, - !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)); + !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && !mddev->suspended); + if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) { + percpu_ref_put(&mddev->writes_pending); + return false; + } + return true; } EXPORT_SYMBOL(md_write_start); -- cgit v1.2.3