summaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorJonathan Brassow <jbrassow@redhat.com>2013-05-02 14:19:24 -0500
committerNeilBrown <neilb@suse.de>2013-06-14 08:10:24 +1000
commit9092c02d943515b3c9ffd5d0003527f8cc1dd77b (patch)
tree686180c0788704a8cbde2080b83e7ace1255b277 /drivers
parent25e33ed9c711c8d64c403a17d4a2cdeac213800b (diff)
downloadlinux-stable-9092c02d943515b3c9ffd5d0003527f8cc1dd77b.tar.gz
linux-stable-9092c02d943515b3c9ffd5d0003527f8cc1dd77b.tar.bz2
linux-stable-9092c02d943515b3c9ffd5d0003527f8cc1dd77b.zip
DM RAID: Add ability to restore transiently failed devices on resume
DM RAID: Add ability to restore transiently failed devices on resume This patch adds code to the resume function to check over the devices in the RAID array. If any are found to be marked as failed and their superblocks can be read, an attempt is made to reintegrate them into the array. This allows the user to refresh the array with a simple suspend and resume of the array - rather than having to load a completely new table, allocate and initialize all the structures and throw away the old instantiation. Signed-off-by: Jonathan Brassow <jbrassow@redhat.com> Signed-off-by: NeilBrown <neilb@suse.de>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/md/dm-raid.c44
-rw-r--r--drivers/md/raid1.c7
-rw-r--r--drivers/md/raid10.c10
3 files changed, 53 insertions, 8 deletions
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 1d3fe1a40a9b..facaf9142d5a 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -1574,12 +1574,54 @@ static void raid_postsuspend(struct dm_target *ti)
static void raid_resume(struct dm_target *ti)
{
+ int i;
+ uint64_t failed_devices, cleared_failed_devices = 0;
+ unsigned long flags;
+ struct dm_raid_superblock *sb;
struct raid_set *rs = ti->private;
+ struct md_rdev *r;
set_bit(MD_CHANGE_DEVS, &rs->md.flags);
if (!rs->bitmap_loaded) {
bitmap_load(&rs->md);
rs->bitmap_loaded = 1;
+ } else {
+ /*
+ * A secondary resume while the device is active.
+ * Take this opportunity to check whether any failed
+ * devices are reachable again.
+ */
+ for (i = 0; i < rs->md.raid_disks; i++) {
+ r = &rs->dev[i].rdev;
+ if (test_bit(Faulty, &r->flags) && r->sb_page &&
+ sync_page_io(r, 0, r->sb_size,
+ r->sb_page, READ, 1)) {
+ DMINFO("Faulty device #%d has readable super"
+ "block. Attempting to revive it.", i);
+ r->raid_disk = i;
+ r->saved_raid_disk = i;
+ flags = r->flags;
+ clear_bit(Faulty, &r->flags);
+ clear_bit(WriteErrorSeen, &r->flags);
+ clear_bit(In_sync, &r->flags);
+ if (r->mddev->pers->hot_add_disk(r->mddev, r)) {
+ r->raid_disk = -1;
+ r->saved_raid_disk = -1;
+ r->flags = flags;
+ } else {
+ r->recovery_offset = 0;
+ cleared_failed_devices |= 1 << i;
+ }
+ }
+ }
+ if (cleared_failed_devices) {
+ rdev_for_each(r, &rs->md) {
+ sb = page_address(r->sb_page);
+ failed_devices = le64_to_cpu(sb->failed_devices);
+ failed_devices &= ~cleared_failed_devices;
+ sb->failed_devices = cpu_to_le64(failed_devices);
+ }
+ }
}
clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
@@ -1588,7 +1630,7 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = {
.name = "raid",
- .version = {1, 5, 0},
+ .version = {1, 5, 1},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 6e17f8181c4b..ec734588a1c6 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1519,8 +1519,9 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
p = conf->mirrors+mirror;
if (!p->rdev) {
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
+ if (mddev->gendisk)
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
p->head_position = 0;
rdev->raid_disk = mirror;
@@ -1559,7 +1560,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
clear_bit(Unmerged, &rdev->flags);
}
md_integrity_add_rdev(rdev, mddev);
- if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+ if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
print_conf(conf);
return err;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 6ddae2501b9a..3c6b193cefd5 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1819,15 +1819,17 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
set_bit(Replacement, &rdev->flags);
rdev->raid_disk = mirror;
err = 0;
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
+ if (mddev->gendisk)
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
conf->fullsync = 1;
rcu_assign_pointer(p->replacement, rdev);
break;
}
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
+ if (mddev->gendisk)
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
p->head_position = 0;
p->recovery_disabled = mddev->recovery_disabled - 1;