summaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.com>2017-06-05 16:49:39 +1000
committerShaohua Li <shli@fb.com>2017-06-13 10:18:01 -0700
commitcc27b0c78c79680d128dbac79de0d40556d041bb (patch)
tree1f32d4acd29d825eecc1aef90d423cdacea8121c /drivers/md/md.c
parent63f700aab4c11d46626de3cd051dae56cf7e9056 (diff)
downloadlinux-cc27b0c78c79680d128dbac79de0d40556d041bb.tar.gz
linux-cc27b0c78c79680d128dbac79de0d40556d041bb.tar.bz2
linux-cc27b0c78c79680d128dbac79de0d40556d041bb.zip
md: fix deadlock between mddev_suspend() and md_write_start()
If mddev_suspend() races with md_write_start() we can deadlock with mddev_suspend() waiting for the request that is currently in md_write_start() to complete the ->make_request() call, and md_write_start() waiting for the metadata to be updated to mark the array as 'dirty'. As metadata updates done by md_check_recovery() only happen then the mddev_lock() can be claimed, and as mddev_suspend() is often called with the lock held, these threads wait indefinitely for each other. We fix this by having md_write_start() abort if mddev_suspend() is happening, and ->make_request() aborts if md_write_start() aborted. md_make_request() can detect this abort, decrease the ->active_io count, and wait for mddev_suspend(). Reported-by: Nix <nix@esperi.org.uk> Fix: 68866e425be2(MD: no sync IO while suspended) Cc: stable@vger.kernel.org Signed-off-by: NeilBrown <neilb@suse.com> Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c22
1 files changed, 17 insertions, 5 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 87edc342ccb3..d7847014821a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -277,7 +277,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
bio_endio(bio);
return BLK_QC_T_NONE;
}
- smp_rmb(); /* Ensure implications of 'active' are visible */
+check_suspended:
rcu_read_lock();
if (mddev->suspended) {
DEFINE_WAIT(__wait);
@@ -302,7 +302,11 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
sectors = bio_sectors(bio);
/* bio could be mergeable after passing to underlayer */
bio->bi_opf &= ~REQ_NOMERGE;
- mddev->pers->make_request(mddev, bio);
+ if (!mddev->pers->make_request(mddev, bio)) {
+ atomic_dec(&mddev->active_io);
+ wake_up(&mddev->sb_wait);
+ goto check_suspended;
+ }
cpu = part_stat_lock();
part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
@@ -327,6 +331,7 @@ void mddev_suspend(struct mddev *mddev)
if (mddev->suspended++)
return;
synchronize_rcu();
+ wake_up(&mddev->sb_wait);
wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
mddev->pers->quiesce(mddev, 1);
@@ -7950,12 +7955,14 @@ EXPORT_SYMBOL(md_done_sync);
* If we need to update some array metadata (e.g. 'active' flag
* in superblock) before writing, schedule a superblock update
* and wait for it to complete.
+ * A return value of 'false' means that the write wasn't recorded
+ * and cannot proceed as the array is being suspend.
*/
-void md_write_start(struct mddev *mddev, struct bio *bi)
+bool md_write_start(struct mddev *mddev, struct bio *bi)
{
int did_change = 0;
if (bio_data_dir(bi) != WRITE)
- return;
+ return true;
BUG_ON(mddev->ro == 1);
if (mddev->ro == 2) {
@@ -7987,7 +7994,12 @@ void md_write_start(struct mddev *mddev, struct bio *bi)
if (did_change)
sysfs_notify_dirent_safe(mddev->sysfs_state);
wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && !mddev->suspended);
+ if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
+ percpu_ref_put(&mddev->writes_pending);
+ return false;
+ }
+ return true;
}
EXPORT_SYMBOL(md_write_start);