summaryrefslogtreecommitdiffstats
path: root/drivers/md/md.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c54
1 files changed, 43 insertions, 11 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 31bcbfb09fef..b01e458d31e9 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -203,6 +203,14 @@ struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
}
EXPORT_SYMBOL_GPL(bio_alloc_mddev);
+static struct bio *md_bio_alloc_sync(struct mddev *mddev)
+{
+ if (!mddev || !mddev->sync_set)
+ return bio_alloc(GFP_NOIO, 1);
+
+ return bio_alloc_bioset(GFP_NOIO, 1, mddev->sync_set);
+}
+
/*
* We have a system wide 'event count' that is incremented
* on any 'interesting' event, and readers of /proc/mdstat
@@ -277,7 +285,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
bio_endio(bio);
return BLK_QC_T_NONE;
}
- smp_rmb(); /* Ensure implications of 'active' are visible */
+check_suspended:
rcu_read_lock();
if (mddev->suspended) {
DEFINE_WAIT(__wait);
@@ -302,7 +310,11 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)
sectors = bio_sectors(bio);
/* bio could be mergeable after passing to underlayer */
bio->bi_opf &= ~REQ_NOMERGE;
- mddev->pers->make_request(mddev, bio);
+ if (!mddev->pers->make_request(mddev, bio)) {
+ atomic_dec(&mddev->active_io);
+ wake_up(&mddev->sb_wait);
+ goto check_suspended;
+ }
cpu = part_stat_lock();
part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
@@ -327,6 +339,7 @@ void mddev_suspend(struct mddev *mddev)
if (mddev->suspended++)
return;
synchronize_rcu();
+ wake_up(&mddev->sb_wait);
wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
mddev->pers->quiesce(mddev, 1);
@@ -462,7 +475,7 @@ static void mddev_delayed_delete(struct work_struct *ws);
static void mddev_put(struct mddev *mddev)
{
- struct bio_set *bs = NULL;
+ struct bio_set *bs = NULL, *sync_bs = NULL;
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
return;
@@ -472,7 +485,9 @@ static void mddev_put(struct mddev *mddev)
* so destroy it */
list_del_init(&mddev->all_mddevs);
bs = mddev->bio_set;
+ sync_bs = mddev->sync_set;
mddev->bio_set = NULL;
+ mddev->sync_set = NULL;
if (mddev->gendisk) {
/* We did a probe so need to clean up. Call
* queue_work inside the spinlock so that
@@ -487,6 +502,8 @@ static void mddev_put(struct mddev *mddev)
spin_unlock(&all_mddevs_lock);
if (bs)
bioset_free(bs);
+ if (sync_bs)
+ bioset_free(sync_bs);
}
static void md_safemode_timeout(unsigned long data);
@@ -751,7 +768,7 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
if (test_bit(Faulty, &rdev->flags))
return;
- bio = bio_alloc_mddev(GFP_NOIO, 1, mddev);
+ bio = md_bio_alloc_sync(mddev);
atomic_inc(&rdev->nr_pending);
@@ -783,7 +800,7 @@ int md_super_wait(struct mddev *mddev)
int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
struct page *page, int op, int op_flags, bool metadata_op)
{
- struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
+ struct bio *bio = md_bio_alloc_sync(rdev->mddev);
int ret;
bio->bi_bdev = (metadata_op && rdev->meta_bdev) ?
@@ -1852,7 +1869,7 @@ retry:
max_dev = le32_to_cpu(sb->max_dev);
for (i=0; i<max_dev;i++)
- sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_FAULTY);
+ sb->dev_roles[i] = cpu_to_le16(MD_DISK_ROLE_SPARE);
if (test_bit(MD_HAS_JOURNAL, &mddev->flags))
sb->feature_map |= cpu_to_le32(MD_FEATURE_JOURNAL);
@@ -2270,7 +2287,7 @@ static void export_array(struct mddev *mddev)
static bool set_in_sync(struct mddev *mddev)
{
- WARN_ON_ONCE(!spin_is_locked(&mddev->lock));
+ WARN_ON_ONCE(NR_CPUS != 1 && !spin_is_locked(&mddev->lock));
if (!mddev->in_sync) {
mddev->sync_checkers++;
spin_unlock(&mddev->lock);
@@ -5432,6 +5449,11 @@ int md_run(struct mddev *mddev)
if (!mddev->bio_set)
return -ENOMEM;
}
+ if (mddev->sync_set == NULL) {
+ mddev->sync_set = bioset_create(BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ if (!mddev->sync_set)
+ return -ENOMEM;
+ }
spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel);
@@ -7950,12 +7972,14 @@ EXPORT_SYMBOL(md_done_sync);
* If we need to update some array metadata (e.g. 'active' flag
* in superblock) before writing, schedule a superblock update
* and wait for it to complete.
+ * A return value of 'false' means that the write wasn't recorded
+ * and cannot proceed as the array is being suspend.
*/
-void md_write_start(struct mddev *mddev, struct bio *bi)
+bool md_write_start(struct mddev *mddev, struct bio *bi)
{
int did_change = 0;
if (bio_data_dir(bi) != WRITE)
- return;
+ return true;
BUG_ON(mddev->ro == 1);
if (mddev->ro == 2) {
@@ -7972,7 +7996,7 @@ void md_write_start(struct mddev *mddev, struct bio *bi)
if (mddev->safemode == 1)
mddev->safemode = 0;
/* sync_checkers is always 0 when writes_pending is in per-cpu mode */
- if (mddev->in_sync || !mddev->sync_checkers) {
+ if (mddev->in_sync || mddev->sync_checkers) {
spin_lock(&mddev->lock);
if (mddev->in_sync) {
mddev->in_sync = 0;
@@ -7987,7 +8011,12 @@ void md_write_start(struct mddev *mddev, struct bio *bi)
if (did_change)
sysfs_notify_dirent_safe(mddev->sysfs_state);
wait_event(mddev->sb_wait,
- !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags));
+ !test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags) && !mddev->suspended);
+ if (test_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags)) {
+ percpu_ref_put(&mddev->writes_pending);
+ return false;
+ }
+ return true;
}
EXPORT_SYMBOL(md_write_start);
@@ -8627,6 +8656,9 @@ void md_check_recovery(struct mddev *mddev)
if (mddev_trylock(mddev)) {
int spares = 0;
+ if (!mddev->external && mddev->safemode == 1)
+ mddev->safemode = 0;
+
if (mddev->ro) {
struct md_rdev *rdev;
if (!mddev->external && mddev->in_sync)