diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-29 14:04:23 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-01-29 14:04:23 -0800 |
commit | 31466f3ed710e5761077190809e694f55aed5deb (patch) | |
tree | 82b1313807242796e74a29d27282fc11f30f7cd0 /fs/btrfs/raid56.c | |
parent | 6787dc24b72b88404ae652c914014e51ddf1c4fa (diff) | |
parent | 3acbcbfc8f06d4ade2aab2ebba0a2542a05ce90c (diff) | |
download | linux-stable-31466f3ed710e5761077190809e694f55aed5deb.tar.gz linux-stable-31466f3ed710e5761077190809e694f55aed5deb.tar.bz2 linux-stable-31466f3ed710e5761077190809e694f55aed5deb.zip |
Merge tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"Features or user visible changes:
- fallocate: implement zero range mode
- avoid losing data raid profile when deleting a device
- tree item checker: more checks for directory items and xattrs
Notable fixes:
- raid56 recovery: don't use cached stripes, that could be
potentially changed and a later RMW or recovery would lead to
corruptions or failures
- let raid56 try harder to rebuild damaged data, reading from all
stripes if necessary
- fix scrub to repair raid56 in a similar way as in the case above
Other:
- cleanups: device freeing, removed some call indirections, redundant
bio_put/_get, unused parameters, refactorings and renames
- RCU list traversal fixups
- simplify mount callchain, remove recursing back when mounting a
subvolume
- plug for fsync, may improve bio merging on multiple devices
- compression heurisic: replace heap sort with radix sort, gains some
performance
- add extent map selftests, buffered write vs dio"
* tag 'for-4.16-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (155 commits)
btrfs: drop devid as device_list_add() arg
btrfs: get device pointer from device_list_add()
btrfs: set the total_devices in device_list_add()
btrfs: move pr_info into device_list_add
btrfs: make btrfs_free_stale_devices() to match the path
btrfs: rename btrfs_free_stale_devices() arg to skip_dev
btrfs: make btrfs_free_stale_devices() argument optional
btrfs: make btrfs_free_stale_device() to iterate all stales
btrfs: no need to check for btrfs_fs_devices::seeding
btrfs: Use IS_ALIGNED in btrfs_truncate_block instead of opencoding it
Btrfs: noinline merge_extent_mapping
Btrfs: add WARN_ONCE to detect unexpected error from merge_extent_mapping
Btrfs: extent map selftest: dio write vs dio read
Btrfs: extent map selftest: buffered write vs dio read
Btrfs: add extent map selftests
Btrfs: move extent map specific code to extent_map.c
Btrfs: add helper for em merge logic
Btrfs: fix unexpected EEXIST from btrfs_get_extent
Btrfs: fix incorrect block_len in merge_extent_mapping
btrfs: Remove unused readahead spinlock
...
Diffstat (limited to 'fs/btrfs/raid56.c')
-rw-r--r-- | fs/btrfs/raid56.c | 119 |
1 files changed, 81 insertions, 38 deletions
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c index a7f79254ecca..dec0907dfb8a 100644 --- a/fs/btrfs/raid56.c +++ b/fs/btrfs/raid56.c @@ -231,7 +231,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) cur = h + i; INIT_LIST_HEAD(&cur->hash_list); spin_lock_init(&cur->lock); - init_waitqueue_head(&cur->wait); } x = cmpxchg(&info->stripe_hash_table, NULL, table); @@ -595,14 +594,31 @@ static int rbio_can_merge(struct btrfs_raid_bio *last, * bio list here, anyone else that wants to * change this stripe needs to do their own rmw. */ - if (last->operation == BTRFS_RBIO_PARITY_SCRUB || - cur->operation == BTRFS_RBIO_PARITY_SCRUB) + if (last->operation == BTRFS_RBIO_PARITY_SCRUB) return 0; - if (last->operation == BTRFS_RBIO_REBUILD_MISSING || - cur->operation == BTRFS_RBIO_REBUILD_MISSING) + if (last->operation == BTRFS_RBIO_REBUILD_MISSING) return 0; + if (last->operation == BTRFS_RBIO_READ_REBUILD) { + int fa = last->faila; + int fb = last->failb; + int cur_fa = cur->faila; + int cur_fb = cur->failb; + + if (last->faila >= last->failb) { + fa = last->failb; + fb = last->faila; + } + + if (cur->faila >= cur->failb) { + cur_fa = cur->failb; + cur_fb = cur->faila; + } + + if (fa != cur_fa || fb != cur_fb) + return 0; + } return 1; } @@ -670,7 +686,6 @@ static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) struct btrfs_raid_bio *cur; struct btrfs_raid_bio *pending; unsigned long flags; - DEFINE_WAIT(wait); struct btrfs_raid_bio *freeit = NULL; struct btrfs_raid_bio *cache_drop = NULL; int ret = 0; @@ -816,15 +831,6 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) } goto done_nolock; - /* - * The barrier for this waitqueue_active is not needed, - * we're protected by h->lock and can't miss a wakeup. - */ - } else if (waitqueue_active(&h->wait)) { - spin_unlock(&rbio->bio_list_lock); - spin_unlock_irqrestore(&h->lock, flags); - wake_up(&h->wait); - goto done_nolock; } } done: @@ -858,10 +864,17 @@ static void __free_raid_bio(struct btrfs_raid_bio *rbio) kfree(rbio); } -static void free_raid_bio(struct btrfs_raid_bio *rbio) +static void rbio_endio_bio_list(struct bio *cur, blk_status_t err) { - unlock_stripe(rbio); - __free_raid_bio(rbio); + struct bio *next; + + while (cur) { + next = cur->bi_next; + cur->bi_next = NULL; + cur->bi_status = err; + bio_endio(cur); + cur = next; + } } /* @@ -871,20 +884,26 @@ static void free_raid_bio(struct btrfs_raid_bio *rbio) static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err) { struct bio *cur = bio_list_get(&rbio->bio_list); - struct bio *next; + struct bio *extra; if (rbio->generic_bio_cnt) btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt); - free_raid_bio(rbio); + /* + * At this moment, rbio->bio_list is empty, however since rbio does not + * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the + * hash list, rbio may be merged with others so that rbio->bio_list + * becomes non-empty. + * Once unlock_stripe() is done, rbio->bio_list will not be updated any + * more and we can call bio_endio() on all queued bios. + */ + unlock_stripe(rbio); + extra = bio_list_get(&rbio->bio_list); + __free_raid_bio(rbio); - while (cur) { - next = cur->bi_next; - cur->bi_next = NULL; - cur->bi_status = err; - bio_endio(cur); - cur = next; - } + rbio_endio_bio_list(cur, err); + if (extra) + rbio_endio_bio_list(extra, err); } /* @@ -1435,14 +1454,13 @@ static int fail_bio_stripe(struct btrfs_raid_bio *rbio, */ static void set_bio_pages_uptodate(struct bio *bio) { - struct bio_vec bvec; - struct bvec_iter iter; + struct bio_vec *bvec; + int i; - if (bio_flagged(bio, BIO_CLONED)) - bio->bi_iter = btrfs_io_bio(bio)->iter; + ASSERT(!bio_flagged(bio, BIO_CLONED)); - bio_for_each_segment(bvec, bio, iter) - SetPageUptodate(bvec.bv_page); + bio_for_each_segment_all(bvec, bio, i) + SetPageUptodate(bvec->bv_page); } /* @@ -1969,7 +1987,22 @@ cleanup: cleanup_io: if (rbio->operation == BTRFS_RBIO_READ_REBUILD) { - if (err == BLK_STS_OK) + /* + * - In case of two failures, where rbio->failb != -1: + * + * Do not cache this rbio since the above read reconstruction + * (raid6_datap_recov() or raid6_2data_recov()) may have + * changed some content of stripes which are not identical to + * on-disk content any more, otherwise, a later write/recover + * may steal stripe_pages from this rbio and end up with + * corruptions or rebuild failures. + * + * - In case of single failure, where rbio->failb == -1: + * + * Cache this rbio iff the above read reconstruction is + * excuted without problems. + */ + if (err == BLK_STS_OK && rbio->failb < 0) cache_rbio_pages(rbio); else clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); @@ -2170,11 +2203,21 @@ int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, } /* - * reconstruct from the q stripe if they are - * asking for mirror 3 + * Loop retry: + * for 'mirror == 2', reconstruct from all other stripes. + * for 'mirror_num > 2', select a stripe to fail on every retry. */ - if (mirror_num == 3) - rbio->failb = rbio->real_stripes - 2; + if (mirror_num > 2) { + /* + * 'mirror == 3' is to fail the p stripe and + * reconstruct from the q stripe. 'mirror > 3' is to + * fail a data stripe and reconstruct from p+q stripe. + */ + rbio->failb = rbio->real_stripes - (mirror_num - 1); + ASSERT(rbio->failb > 0); + if (rbio->failb <= rbio->faila) + rbio->failb--; + } ret = lock_stripe_add(rbio); |