summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-04-04 08:31:06 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2012-04-04 08:31:06 -0700
commit36bbffc0d55fbb7ab2c2de2613e3bbb0a6749b9b (patch)
treec3091d2eb5cc6f451ce0fcee6bfb2fed63db56e9
parent20a2a811602b16c42ce88bada3d52712cdfb988b (diff)
parent5020ad7d143ccfcf8149974096220d59e5572120 (diff)
downloadlinux-36bbffc0d55fbb7ab2c2de2613e3bbb0a6749b9b.tar.gz
linux-36bbffc0d55fbb7ab2c2de2613e3bbb0a6749b9b.tar.bz2
linux-36bbffc0d55fbb7ab2c2de2613e3bbb0a6749b9b.zip
Merge tag 'md-3.4-fixes' of git://neil.brown.name/md
Pull assorted md fixes from Neil Brown: - some RAID levels didn't clear up properly if md_integrity_register failed - a 'check' of RAID5/RAID6 doesn't actually read any data since a recent patch - so fix that (and mark for -stable) - a couple of other minor bugs. * tag 'md-3.4-fixes' of git://neil.brown.name/md: md/raid1,raid10: don't compare excess byte during consistency check. md/raid5: Fix a bug about judging if the operation is syncing or replacing md/raid1:Remove unnecessary rcu_dereference(conf->mirrors[i].rdev). md: Avoid OOPS when reshaping raid1 to raid0 md/raid5: fix handling of bad blocks during recovery. md/raid1: If md_integrity_register() failed,run() must free the mem md/raid0: If md_integrity_register() fails, raid0_run() must free the mem. md/linear: If md_integrity_register() fails, linear_run() must free the mem.
-rw-r--r--drivers/md/linear.c9
-rw-r--r--drivers/md/raid0.c27
-rw-r--r--drivers/md/raid1.c13
-rw-r--r--drivers/md/raid10.c2
-rw-r--r--drivers/md/raid5.c59
5 files changed, 75 insertions, 35 deletions
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index b0fcc7d02adb..fa211d80fc0a 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -198,6 +198,7 @@ out:
static int linear_run (struct mddev *mddev)
{
struct linear_conf *conf;
+ int ret;
if (md_check_no_bitmap(mddev))
return -EINVAL;
@@ -211,7 +212,13 @@ static int linear_run (struct mddev *mddev)
blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec);
mddev->queue->backing_dev_info.congested_fn = linear_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
- return md_integrity_register(mddev);
+
+ ret = md_integrity_register(mddev);
+ if (ret) {
+ kfree(conf);
+ mddev->private = NULL;
+ }
+ return ret;
}
static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 6f31f5596e01..de63a1fc3737 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -407,6 +407,8 @@ static sector_t raid0_size(struct mddev *mddev, sector_t sectors, int raid_disks
return array_sectors;
}
+static int raid0_stop(struct mddev *mddev);
+
static int raid0_run(struct mddev *mddev)
{
struct r0conf *conf;
@@ -454,7 +456,12 @@ static int raid0_run(struct mddev *mddev)
blk_queue_merge_bvec(mddev->queue, raid0_mergeable_bvec);
dump_zones(mddev);
- return md_integrity_register(mddev);
+
+ ret = md_integrity_register(mddev);
+ if (ret)
+ raid0_stop(mddev);
+
+ return ret;
}
static int raid0_stop(struct mddev *mddev)
@@ -625,6 +632,7 @@ static void *raid0_takeover_raid10(struct mddev *mddev)
static void *raid0_takeover_raid1(struct mddev *mddev)
{
struct r0conf *priv_conf;
+ int chunksect;
/* Check layout:
* - (N - 1) mirror drives must be already faulty
@@ -635,10 +643,25 @@ static void *raid0_takeover_raid1(struct mddev *mddev)
return ERR_PTR(-EINVAL);
}
+ /*
+ * a raid1 doesn't have the notion of chunk size, so
+ * figure out the largest suitable size we can use.
+ */
+ chunksect = 64 * 2; /* 64K by default */
+
+ /* The array must be an exact multiple of chunksize */
+ while (chunksect && (mddev->array_sectors & (chunksect - 1)))
+ chunksect >>= 1;
+
+ if ((chunksect << 9) < PAGE_SIZE)
+ /* array size does not allow a suitable chunk size */
+ return ERR_PTR(-EINVAL);
+
/* Set new parameters */
mddev->new_level = 0;
mddev->new_layout = 0;
- mddev->new_chunk_sectors = 128; /* by default set chunk size to 64k */
+ mddev->new_chunk_sectors = chunksect;
+ mddev->chunk_sectors = chunksect;
mddev->delta_disks = 1 - mddev->raid_disks;
mddev->raid_disks = 1;
/* make sure it will be not marked as dirty */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 4a40a200d769..d35e4c991e38 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1738,7 +1738,7 @@ static int process_checks(struct r1bio *r1_bio)
s = sbio->bi_io_vec[j].bv_page;
if (memcmp(page_address(p),
page_address(s),
- PAGE_SIZE))
+ sbio->bi_io_vec[j].bv_len))
break;
}
} else
@@ -2386,8 +2386,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
int ok = 1;
for (i = 0 ; i < conf->raid_disks * 2 ; i++)
if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
- struct md_rdev *rdev =
- rcu_dereference(conf->mirrors[i].rdev);
+ struct md_rdev *rdev = conf->mirrors[i].rdev;
ok = rdev_set_badblocks(rdev, sector_nr,
min_bad, 0
) && ok;
@@ -2636,11 +2635,13 @@ static struct r1conf *setup_conf(struct mddev *mddev)
return ERR_PTR(err);
}
+static int stop(struct mddev *mddev);
static int run(struct mddev *mddev)
{
struct r1conf *conf;
int i;
struct md_rdev *rdev;
+ int ret;
if (mddev->level != 1) {
printk(KERN_ERR "md/raid1:%s: raid level not set to mirroring (%d)\n",
@@ -2705,7 +2706,11 @@ static int run(struct mddev *mddev)
mddev->queue->backing_dev_info.congested_data = mddev;
blk_queue_merge_bvec(mddev->queue, raid1_mergeable_bvec);
}
- return md_integrity_register(mddev);
+
+ ret = md_integrity_register(mddev);
+ if (ret)
+ stop(mddev);
+ return ret;
}
static int stop(struct mddev *mddev)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 3540316886f2..fff782189e48 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1821,7 +1821,7 @@ static void sync_request_write(struct mddev *mddev, struct r10bio *r10_bio)
for (j = 0; j < vcnt; j++)
if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
page_address(tbio->bi_io_vec[j].bv_page),
- PAGE_SIZE))
+ fbio->bi_io_vec[j].bv_len))
break;
if (j == vcnt)
continue;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 23ac880bba9a..f351422938e0 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2471,39 +2471,41 @@ handle_failed_sync(struct r5conf *conf, struct stripe_head *sh,
int abort = 0;
int i;
- md_done_sync(conf->mddev, STRIPE_SECTORS, 0);
clear_bit(STRIPE_SYNCING, &sh->state);
s->syncing = 0;
s->replacing = 0;
/* There is nothing more to do for sync/check/repair.
+ * Don't even need to abort as that is handled elsewhere
+ * if needed, and not always wanted e.g. if there is a known
+ * bad block here.
* For recover/replace we need to record a bad block on all
* non-sync devices, or abort the recovery
*/
- if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery))
- return;
- /* During recovery devices cannot be removed, so locking and
- * refcounting of rdevs is not needed
- */
- for (i = 0; i < conf->raid_disks; i++) {
- struct md_rdev *rdev = conf->disks[i].rdev;
- if (rdev
- && !test_bit(Faulty, &rdev->flags)
- && !test_bit(In_sync, &rdev->flags)
- && !rdev_set_badblocks(rdev, sh->sector,
- STRIPE_SECTORS, 0))
- abort = 1;
- rdev = conf->disks[i].replacement;
- if (rdev
- && !test_bit(Faulty, &rdev->flags)
- && !test_bit(In_sync, &rdev->flags)
- && !rdev_set_badblocks(rdev, sh->sector,
- STRIPE_SECTORS, 0))
- abort = 1;
- }
- if (abort) {
- conf->recovery_disabled = conf->mddev->recovery_disabled;
- set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery);
+ if (test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) {
+ /* During recovery devices cannot be removed, so
+ * locking and refcounting of rdevs is not needed
+ */
+ for (i = 0; i < conf->raid_disks; i++) {
+ struct md_rdev *rdev = conf->disks[i].rdev;
+ if (rdev
+ && !test_bit(Faulty, &rdev->flags)
+ && !test_bit(In_sync, &rdev->flags)
+ && !rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ rdev = conf->disks[i].replacement;
+ if (rdev
+ && !test_bit(Faulty, &rdev->flags)
+ && !test_bit(In_sync, &rdev->flags)
+ && !rdev_set_badblocks(rdev, sh->sector,
+ STRIPE_SECTORS, 0))
+ abort = 1;
+ }
+ if (abort)
+ conf->recovery_disabled =
+ conf->mddev->recovery_disabled;
}
+ md_done_sync(conf->mddev, STRIPE_SECTORS, !abort);
}
static int want_replace(struct stripe_head *sh, int disk_idx)
@@ -3203,7 +3205,8 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
/* Not in-sync */;
else if (is_bad) {
/* also not in-sync */
- if (!test_bit(WriteErrorSeen, &rdev->flags)) {
+ if (!test_bit(WriteErrorSeen, &rdev->flags) &&
+ test_bit(R5_UPTODATE, &dev->flags)) {
/* treat as in-sync, but with a read error
* which we can now try to correct
*/
@@ -3276,12 +3279,14 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s)
/* If there is a failed device being replaced,
* we must be recovering.
* else if we are after recovery_cp, we must be syncing
+ * else if MD_RECOVERY_REQUESTED is set, we also are syncing.
* else we can only be replacing
* sync and recovery both need to read all devices, and so
* use the same flag.
*/
if (do_recovery ||
- sh->sector >= conf->mddev->recovery_cp)
+ sh->sector >= conf->mddev->recovery_cp ||
+ test_bit(MD_RECOVERY_REQUESTED, &(conf->mddev->recovery)))
s->syncing = 1;
else
s->replacing = 1;