diff options
Diffstat (limited to 'fs/btrfs/extent_io.c')
-rw-r--r-- | fs/btrfs/extent_io.c | 482 |
1 files changed, 189 insertions, 293 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4dcf22e051ff..83dd3aa59663 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -20,7 +20,7 @@ #include "extent_map.h" #include "ctree.h" #include "btrfs_inode.h" -#include "volumes.h" +#include "bio.h" #include "check-integrity.h" #include "locking.h" #include "rcu-string.h" @@ -30,6 +30,12 @@ #include "zoned.h" #include "block-group.h" #include "compression.h" +#include "fs.h" +#include "accessors.h" +#include "file-item.h" +#include "file.h" +#include "dev-replace.h" +#include "super.h" static struct kmem_cache *extent_buffer_cache; @@ -96,24 +102,22 @@ struct btrfs_bio_ctrl { u32 len_to_stripe_boundary; u32 len_to_oe_boundary; btrfs_bio_end_io_t end_io_func; -}; -struct extent_page_data { - struct btrfs_bio_ctrl bio_ctrl; - /* tells writepage not to lock the state bits for this range - * it still does the unlocking + /* + * Tell writepage not to lock the state bits for this range, it still + * does the unlocking. */ - unsigned int extent_locked:1; + bool extent_locked; - /* tells the submit_bio code to use REQ_SYNC */ - unsigned int sync_io:1; + /* Tell the submit_bio code to use REQ_SYNC */ + bool sync_io; }; static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) { struct bio *bio; struct bio_vec *bv; - struct inode *inode; + struct btrfs_inode *inode; int mirror_num; if (!bio_ctrl->bio) @@ -121,7 +125,7 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) bio = bio_ctrl->bio; bv = bio_first_bvec_all(bio); - inode = bv->bv_page->mapping->host; + inode = BTRFS_I(bv->bv_page->mapping->host); mirror_num = bio_ctrl->mirror_num; /* Caller should ensure the bio has at least some range added */ @@ -129,7 +133,7 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) btrfs_bio(bio)->file_offset = page_offset(bv->bv_page) + bv->bv_offset; - if (!is_data_inode(inode)) + if (!is_data_inode(&inode->vfs_inode)) btrfs_submit_metadata_bio(inode, bio, mirror_num); else if (btrfs_op(bio) == BTRFS_MAP_WRITE) btrfs_submit_data_write_bio(inode, bio, mirror_num); @@ -142,11 +146,11 @@ static void submit_one_bio(struct btrfs_bio_ctrl *bio_ctrl) } /* - * Submit or fail the current bio in an extent_page_data structure. + * Submit or fail the current bio in the bio_ctrl structure. */ -static void submit_write_bio(struct extent_page_data *epd, int ret) +static void submit_write_bio(struct btrfs_bio_ctrl *bio_ctrl, int ret) { - struct bio *bio = epd->bio_ctrl.bio; + struct bio *bio = bio_ctrl->bio; if (!bio) return; @@ -155,9 +159,9 @@ static void submit_write_bio(struct extent_page_data *epd, int ret) ASSERT(ret < 0); btrfs_bio_end_io(btrfs_bio(bio), errno_to_blk_status(ret)); /* The bio is owned by the end_io handler now */ - epd->bio_ctrl.bio = NULL; + bio_ctrl->bio = NULL; } else { - submit_one_bio(&epd->bio_ctrl); + submit_one_bio(bio_ctrl); } } @@ -527,120 +531,6 @@ static void free_io_failure(struct btrfs_inode *inode, kfree(rec); } -/* - * this bypasses the standard btrfs submit functions deliberately, as - * the standard behavior is to write all copies in a raid setup. here we only - * want to write the one bad copy. so we do the mapping for ourselves and issue - * submit_bio directly. - * to avoid any synchronization issues, wait for the data after writing, which - * actually prevents the read that triggered the error from finishing. - * currently, there can be no more than two copies of every data bit. thus, - * exactly one rewrite is required. - */ -static int repair_io_failure(struct btrfs_fs_info *fs_info, u64 ino, u64 start, - u64 length, u64 logical, struct page *page, - unsigned int pg_offset, int mirror_num) -{ - struct btrfs_device *dev; - struct bio_vec bvec; - struct bio bio; - u64 map_length = 0; - u64 sector; - struct btrfs_io_context *bioc = NULL; - int ret = 0; - - ASSERT(!(fs_info->sb->s_flags & SB_RDONLY)); - BUG_ON(!mirror_num); - - if (btrfs_repair_one_zone(fs_info, logical)) - return 0; - - map_length = length; - - /* - * Avoid races with device replace and make sure our bioc has devices - * associated to its stripes that don't go away while we are doing the - * read repair operation. - */ - btrfs_bio_counter_inc_blocked(fs_info); - if (btrfs_is_parity_mirror(fs_info, logical, length)) { - /* - * Note that we don't use BTRFS_MAP_WRITE because it's supposed - * to update all raid stripes, but here we just want to correct - * bad stripe, thus BTRFS_MAP_READ is abused to only get the bad - * stripe's dev and sector. - */ - ret = btrfs_map_block(fs_info, BTRFS_MAP_READ, logical, - &map_length, &bioc, 0); - if (ret) - goto out_counter_dec; - ASSERT(bioc->mirror_num == 1); - } else { - ret = btrfs_map_block(fs_info, BTRFS_MAP_WRITE, logical, - &map_length, &bioc, mirror_num); - if (ret) - goto out_counter_dec; - BUG_ON(mirror_num != bioc->mirror_num); - } - - sector = bioc->stripes[bioc->mirror_num - 1].physical >> 9; - dev = bioc->stripes[bioc->mirror_num - 1].dev; - btrfs_put_bioc(bioc); - - if (!dev || !dev->bdev || - !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state)) { - ret = -EIO; - goto out_counter_dec; - } - - bio_init(&bio, dev->bdev, &bvec, 1, REQ_OP_WRITE | REQ_SYNC); - bio.bi_iter.bi_sector = sector; - __bio_add_page(&bio, page, length, pg_offset); - - btrfsic_check_bio(&bio); - ret = submit_bio_wait(&bio); - if (ret) { - /* try to remap that extent elsewhere? */ - btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); - goto out_bio_uninit; - } - - btrfs_info_rl_in_rcu(fs_info, - "read error corrected: ino %llu off %llu (dev %s sector %llu)", - ino, start, - rcu_str_deref(dev->name), sector); - ret = 0; - -out_bio_uninit: - bio_uninit(&bio); -out_counter_dec: - btrfs_bio_counter_dec(fs_info); - return ret; -} - -int btrfs_repair_eb_io_failure(const struct extent_buffer *eb, int mirror_num) -{ - struct btrfs_fs_info *fs_info = eb->fs_info; - u64 start = eb->start; - int i, num_pages = num_extent_pages(eb); - int ret = 0; - - if (sb_rdonly(fs_info->sb)) - return -EROFS; - - for (i = 0; i < num_pages; i++) { - struct page *p = eb->pages[i]; - - ret = repair_io_failure(fs_info, 0, start, PAGE_SIZE, start, p, - start - page_offset(p), mirror_num); - if (ret) - break; - start += PAGE_SIZE; - } - - return ret; -} - static int next_mirror(const struct io_failure_record *failrec, int cur_mirror) { if (cur_mirror == failrec->num_copies) @@ -688,7 +578,7 @@ int btrfs_clean_io_failure(struct btrfs_inode *inode, u64 start, mirror = failrec->this_mirror; do { mirror = prev_mirror(failrec, mirror); - repair_io_failure(fs_info, ino, start, failrec->len, + btrfs_repair_io_failure(fs_info, ino, start, failrec->len, failrec->logical, page, pg_offset, mirror); } while (mirror != failrec->failed_mirror); @@ -791,13 +681,13 @@ static struct io_failure_record *btrfs_get_io_failure_record(struct inode *inode return failrec; } -int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio, +int btrfs_repair_one_sector(struct btrfs_inode *inode, struct btrfs_bio *failed_bbio, u32 bio_offset, struct page *page, unsigned int pgoff, - submit_bio_hook_t *submit_bio_hook) + bool submit_buffered) { u64 start = failed_bbio->file_offset + bio_offset; struct io_failure_record *failrec; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode->root->fs_info; struct bio *failed_bio = &failed_bbio->bio; const int icsum = bio_offset >> fs_info->sectorsize_bits; struct bio *repair_bio; @@ -808,7 +698,7 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio, BUG_ON(bio_op(failed_bio) == REQ_OP_WRITE); - failrec = btrfs_get_io_failure_record(inode, failed_bbio, bio_offset); + failrec = btrfs_get_io_failure_record(&inode->vfs_inode, failed_bbio, bio_offset); if (IS_ERR(failrec)) return PTR_ERR(failrec); @@ -819,14 +709,14 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio, * * Since we're only doing repair for one sector, we only need to get * a good copy of the failed sector and if we succeed, we have setup - * everything for repair_io_failure to do the rest for us. + * everything for btrfs_repair_io_failure to do the rest for us. */ failrec->this_mirror = next_mirror(failrec, failrec->this_mirror); if (failrec->this_mirror == failrec->failed_mirror) { btrfs_debug(fs_info, "failed to repair num_copies %d this_mirror %d failed_mirror %d", failrec->num_copies, failrec->this_mirror, failrec->failed_mirror); - free_io_failure(BTRFS_I(inode), failrec); + free_io_failure(inode, failrec); return -EIO; } @@ -847,16 +737,21 @@ int btrfs_repair_one_sector(struct inode *inode, struct btrfs_bio *failed_bbio, bio_add_page(repair_bio, page, failrec->len, pgoff); repair_bbio->iter = repair_bio->bi_iter; - btrfs_debug(btrfs_sb(inode->i_sb), + btrfs_debug(fs_info, "repair read error: submitting new read to mirror %d", failrec->this_mirror); /* - * At this point we have a bio, so any errors from submit_bio_hook() - * will be handled by the endio on the repair_bio, so we can't return an + * At this point we have a bio, so any errors from bio submission will + * be handled by the endio on the repair_bio, so we can't return an * error here. */ - submit_bio_hook(inode, repair_bio, failrec->this_mirror, 0); + if (submit_buffered) + btrfs_submit_data_read_bio(inode, repair_bio, + failrec->this_mirror, 0); + else + btrfs_submit_dio_repair_bio(inode, repair_bio, failrec->this_mirror); + return BLK_STS_OK; } @@ -892,14 +787,9 @@ static void end_sector_io(struct page *page, u64 offset, bool uptodate) { struct btrfs_inode *inode = BTRFS_I(page->mapping->host); const u32 sectorsize = inode->root->fs_info->sectorsize; - struct extent_state *cached = NULL; end_page_read(page, uptodate, offset, sectorsize); - if (uptodate) - set_extent_uptodate(&inode->io_tree, offset, - offset + sectorsize - 1, &cached, GFP_ATOMIC); - unlock_extent_atomic(&inode->io_tree, offset, offset + sectorsize - 1, - &cached); + unlock_extent(&inode->io_tree, offset, offset + sectorsize - 1, NULL); } static void submit_data_read_repair(struct inode *inode, @@ -945,9 +835,9 @@ static void submit_data_read_repair(struct inode *inode, goto next; } - ret = btrfs_repair_one_sector(inode, failed_bbio, + ret = btrfs_repair_one_sector(BTRFS_I(inode), failed_bbio, bio_offset + offset, page, pgoff + offset, - btrfs_submit_data_read_bio); + true); if (!ret) { /* * We have submitted the read repair, the page release @@ -1103,7 +993,7 @@ static void endio_readpage_release_extent(struct processed_extent *processed, * Now we don't have range contiguous to the processed range, release * the processed range now. */ - unlock_extent_atomic(tree, processed->start, processed->end, &cached); + unlock_extent(tree, processed->start, processed->end, &cached); update: /* Update processed to current range */ @@ -1296,7 +1186,7 @@ static void end_bio_extent_readpage(struct btrfs_bio *bbio) bio_put(bio); } -/** +/* * Populate every free slot in a provided array with pages. * * @nr_pages: number of pages to allocate @@ -1332,16 +1222,16 @@ int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array) return 0; } -/** - * Attempt to add a page to bio +/* + * Attempt to add a page to bio. * - * @bio_ctrl: record both the bio, and its bio_flags - * @page: page to add to the bio - * @disk_bytenr: offset of the new bio or to check whether we are adding - * a contiguous page to the previous one - * @size: portion of page that we want to write - * @pg_offset: starting offset in the page - * @compress_type: compression type of the current bio to see if we can merge them + * @bio_ctrl: record both the bio, and its bio_flags + * @page: page to add to the bio + * @disk_bytenr: offset of the new bio or to check whether we are adding + * a contiguous page to the previous one + * @size: portion of page that we want to write + * @pg_offset: starting offset in the page + * @compress_type: compression type of the current bio to see if we can merge them * * Attempt to add a page to bio considering stripe alignment etc. * @@ -1772,13 +1662,9 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, ASSERT(IS_ALIGNED(cur, fs_info->sectorsize)); if (cur >= last_byte) { - struct extent_state *cached = NULL; - iosize = PAGE_SIZE - pg_offset; memzero_page(page, pg_offset, iosize); - set_extent_uptodate(tree, cur, cur + iosize - 1, - &cached, GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, &cached); + unlock_extent(tree, cur, cur + iosize - 1, NULL); end_page_read(page, true, cur, iosize); break; } @@ -1854,13 +1740,9 @@ static int btrfs_do_readpage(struct page *page, struct extent_map **em_cached, /* we've found a hole, just zero and go on */ if (block_start == EXTENT_MAP_HOLE) { - struct extent_state *cached = NULL; - memzero_page(page, pg_offset, iosize); - set_extent_uptodate(tree, cur, cur + iosize - 1, - &cached, GFP_NOFS); - unlock_extent(tree, cur, cur + iosize - 1, &cached); + unlock_extent(tree, cur, cur + iosize - 1, NULL); end_page_read(page, true, cur, iosize); cur = cur + iosize; pg_offset += iosize; @@ -2065,7 +1947,7 @@ static void find_next_dirty_byte(struct btrfs_fs_info *fs_info, static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, struct page *page, struct writeback_control *wbc, - struct extent_page_data *epd, + struct btrfs_bio_ctrl *bio_ctrl, loff_t i_size, int *nr_ret) { @@ -2097,7 +1979,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, */ wbc->nr_to_write--; - epd->bio_ctrl.end_io_func = end_bio_extent_writepage; + bio_ctrl->end_io_func = end_bio_extent_writepage; while (cur <= end) { u64 disk_bytenr; u64 em_end; @@ -2191,7 +2073,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, btrfs_page_clear_dirty(fs_info, page, cur, iosize); ret = submit_extent_page(op | write_flags, wbc, - &epd->bio_ctrl, disk_bytenr, + bio_ctrl, disk_bytenr, page, iosize, cur - page_offset(page), 0, false); @@ -2231,7 +2113,7 @@ static noinline_for_stack int __extent_writepage_io(struct btrfs_inode *inode, * Return <0 for error. */ static int __extent_writepage(struct page *page, struct writeback_control *wbc, - struct extent_page_data *epd) + struct btrfs_bio_ctrl *bio_ctrl) { struct folio *folio = page_folio(page); struct inode *inode = page->mapping->host; @@ -2268,7 +2150,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, goto done; } - if (!epd->extent_locked) { + if (!bio_ctrl->extent_locked) { ret = writepage_delalloc(BTRFS_I(inode), page, wbc); if (ret == 1) return 0; @@ -2276,7 +2158,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, goto done; } - ret = __extent_writepage_io(BTRFS_I(inode), page, wbc, epd, i_size, + ret = __extent_writepage_io(BTRFS_I(inode), page, wbc, bio_ctrl, i_size, &nr); if (ret == 1) return 0; @@ -2320,9 +2202,9 @@ done: */ if (PageError(page)) end_extent_writepage(page, ret, page_start, page_end); - if (epd->extent_locked) { + if (bio_ctrl->extent_locked) { /* - * If epd->extent_locked, it's from extent_write_locked_range(), + * If bio_ctrl->extent_locked, it's from extent_write_locked_range(), * the page can either be locked by lock_page() or * process_one_page(). * Let btrfs_page_unlock_writer() handle both cases. @@ -2361,7 +2243,7 @@ static void end_extent_buffer_writeback(struct extent_buffer *eb) * Return <0 if something went wrong, no page is locked. */ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb, - struct extent_page_data *epd) + struct btrfs_bio_ctrl *bio_ctrl) { struct btrfs_fs_info *fs_info = eb->fs_info; int i, num_pages; @@ -2369,17 +2251,17 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb int ret = 0; if (!btrfs_try_tree_write_lock(eb)) { - submit_write_bio(epd, 0); + submit_write_bio(bio_ctrl, 0); flush = 1; btrfs_tree_lock(eb); } if (test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags)) { btrfs_tree_unlock(eb); - if (!epd->sync_io) + if (!bio_ctrl->sync_io) return 0; if (!flush) { - submit_write_bio(epd, 0); + submit_write_bio(bio_ctrl, 0); flush = 1; } while (1) { @@ -2426,7 +2308,7 @@ static noinline_for_stack int lock_extent_buffer_for_io(struct extent_buffer *eb if (!trylock_page(p)) { if (!flush) { - submit_write_bio(epd, 0); + submit_write_bio(bio_ctrl, 0); flush = 1; } lock_page(p); @@ -2647,15 +2529,19 @@ static void prepare_eb_write(struct extent_buffer *eb) /* Set btree blocks beyond nritems with 0 to avoid stale content */ nritems = btrfs_header_nritems(eb); if (btrfs_header_level(eb) > 0) { - end = btrfs_node_key_ptr_offset(nritems); + end = btrfs_node_key_ptr_offset(eb, nritems); memzero_extent_buffer(eb, end, eb->len - end); } else { /* * Leaf: * header 0 1 2 .. N ... data_N .. data_2 data_1 data_0 */ - start = btrfs_item_nr_offset(nritems); - end = BTRFS_LEAF_DATA_OFFSET + leaf_data_end(eb); + start = btrfs_item_nr_offset(eb, nritems); + end = btrfs_item_nr_offset(eb, 0); + if (nritems == 0) + end += BTRFS_LEAF_DATA_SIZE(eb->fs_info); + else + end += btrfs_item_offset(eb, nritems - 1); memzero_extent_buffer(eb, start, end - start); } } @@ -2666,7 +2552,7 @@ static void prepare_eb_write(struct extent_buffer *eb) */ static int write_one_subpage_eb(struct extent_buffer *eb, struct writeback_control *wbc, - struct extent_page_data *epd) + struct btrfs_bio_ctrl *bio_ctrl) { struct btrfs_fs_info *fs_info = eb->fs_info; struct page *page = eb->pages[0]; @@ -2686,10 +2572,10 @@ static int write_one_subpage_eb(struct extent_buffer *eb, if (no_dirty_ebs) clear_page_dirty_for_io(page); - epd->bio_ctrl.end_io_func = end_bio_subpage_eb_writepage; + bio_ctrl->end_io_func = end_bio_subpage_eb_writepage; ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc, - &epd->bio_ctrl, eb->start, page, eb->len, + bio_ctrl, eb->start, page, eb->len, eb->start - page_offset(page), 0, false); if (ret) { btrfs_subpage_clear_writeback(fs_info, page, eb->start, eb->len); @@ -2712,7 +2598,7 @@ static int write_one_subpage_eb(struct extent_buffer *eb, static noinline_for_stack int write_one_eb(struct extent_buffer *eb, struct writeback_control *wbc, - struct extent_page_data *epd) + struct btrfs_bio_ctrl *bio_ctrl) { u64 disk_bytenr = eb->start; int i, num_pages; @@ -2721,7 +2607,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, prepare_eb_write(eb); - epd->bio_ctrl.end_io_func = end_bio_extent_buffer_writepage; + bio_ctrl->end_io_func = end_bio_extent_buffer_writepage; num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++) { @@ -2730,7 +2616,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, clear_page_dirty_for_io(p); set_page_writeback(p); ret = submit_extent_page(REQ_OP_WRITE | write_flags, wbc, - &epd->bio_ctrl, disk_bytenr, p, + bio_ctrl, disk_bytenr, p, PAGE_SIZE, 0, 0, false); if (ret) { set_btree_ioerr(p, eb); @@ -2773,7 +2659,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb, */ static int submit_eb_subpage(struct page *page, struct writeback_control *wbc, - struct extent_page_data *epd) + struct btrfs_bio_ctrl *bio_ctrl) { struct btrfs_fs_info *fs_info = btrfs_sb(page->mapping->host->i_sb); int submitted = 0; @@ -2826,7 +2712,7 @@ static int submit_eb_subpage(struct page *page, if (!eb) continue; - ret = lock_extent_buffer_for_io(eb, epd); + ret = lock_extent_buffer_for_io(eb, bio_ctrl); if (ret == 0) { free_extent_buffer(eb); continue; @@ -2835,7 +2721,7 @@ static int submit_eb_subpage(struct page *page, free_extent_buffer(eb); goto cleanup; } - ret = write_one_subpage_eb(eb, wbc, epd); + ret = write_one_subpage_eb(eb, wbc, bio_ctrl); free_extent_buffer(eb); if (ret < 0) goto cleanup; @@ -2845,7 +2731,7 @@ static int submit_eb_subpage(struct page *page, cleanup: /* We hit error, end bio for the submitted extent buffers */ - submit_write_bio(epd, ret); + submit_write_bio(bio_ctrl, ret); return ret; } @@ -2870,7 +2756,7 @@ cleanup: * Return <0 for fatal error. */ static int submit_eb_page(struct page *page, struct writeback_control *wbc, - struct extent_page_data *epd, + struct btrfs_bio_ctrl *bio_ctrl, struct extent_buffer **eb_context) { struct address_space *mapping = page->mapping; @@ -2882,7 +2768,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc, return 0; if (btrfs_sb(page->mapping->host->i_sb)->nodesize < PAGE_SIZE) - return submit_eb_subpage(page, wbc, epd); + return submit_eb_subpage(page, wbc, bio_ctrl); spin_lock(&mapping->private_lock); if (!PagePrivate(page)) { @@ -2925,7 +2811,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc, *eb_context = eb; - ret = lock_extent_buffer_for_io(eb, epd); + ret = lock_extent_buffer_for_io(eb, bio_ctrl); if (ret <= 0) { btrfs_revert_meta_write_pointer(cache, eb); if (cache) @@ -2940,7 +2826,7 @@ static int submit_eb_page(struct page *page, struct writeback_control *wbc, btrfs_schedule_zone_finish_bg(cache, eb); btrfs_put_block_group(cache); } - ret = write_one_eb(eb, wbc, epd); + ret = write_one_eb(eb, wbc, bio_ctrl); free_extent_buffer(eb); if (ret < 0) return ret; @@ -2951,10 +2837,9 @@ int btree_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc) { struct extent_buffer *eb_context = NULL; - struct extent_page_data epd = { - .bio_ctrl = { 0 }, + struct btrfs_bio_ctrl bio_ctrl = { .extent_locked = 0, - .sync_io = wbc->sync_mode == WB_SYNC_ALL, + .sync_io = (wbc->sync_mode == WB_SYNC_ALL), }; struct btrfs_fs_info *fs_info = BTRFS_I(mapping->host)->root->fs_info; int ret = 0; @@ -2997,7 +2882,7 @@ retry: for (i = 0; i < nr_pages; i++) { struct page *page = pvec.pages[i]; - ret = submit_eb_page(page, wbc, &epd, &eb_context); + ret = submit_eb_page(page, wbc, &bio_ctrl, &eb_context); if (ret == 0) continue; if (ret < 0) { @@ -3058,18 +2943,18 @@ retry: ret = 0; if (!ret && BTRFS_FS_ERROR(fs_info)) ret = -EROFS; - submit_write_bio(&epd, ret); + submit_write_bio(&bio_ctrl, ret); btrfs_zoned_meta_io_unlock(fs_info); return ret; } -/** +/* * Walk the list of dirty pages of the given address space and write all of them. * - * @mapping: address space structure to write - * @wbc: subtract the number of written pages from *@wbc->nr_to_write - * @epd: holds context for the write, namely the bio + * @mapping: address space structure to write + * @wbc: subtract the number of written pages from *@wbc->nr_to_write + * @bio_ctrl: holds context for the write, namely the bio * * If a page is already under I/O, write_cache_pages() skips it, even * if it's dirty. This is desirable behaviour for memory-cleaning writeback, @@ -3081,7 +2966,7 @@ retry: */ static int extent_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, - struct extent_page_data *epd) + struct btrfs_bio_ctrl *bio_ctrl) { struct inode *inode = mapping->host; int ret = 0; @@ -3162,7 +3047,7 @@ retry: * tmpfs file mapping */ if (!trylock_page(page)) { - submit_write_bio(epd, 0); + submit_write_bio(bio_ctrl, 0); lock_page(page); } @@ -3173,7 +3058,7 @@ retry: if (wbc->sync_mode != WB_SYNC_NONE) { if (PageWriteback(page)) - submit_write_bio(epd, 0); + submit_write_bio(bio_ctrl, 0); wait_on_page_writeback(page); } @@ -3183,7 +3068,7 @@ retry: continue; } - ret = __extent_writepage(page, wbc, epd); + ret = __extent_writepage(page, wbc, bio_ctrl); if (ret < 0) { done = 1; break; @@ -3213,14 +3098,14 @@ retry: * page in our current bio, and thus deadlock, so flush the * write bio here. */ - submit_write_bio(epd, 0); + submit_write_bio(bio_ctrl, 0); goto retry; } if (wbc->range_cyclic || (wbc->nr_to_write > 0 && range_whole)) mapping->writeback_index = done_index; - btrfs_add_delayed_iput(inode); + btrfs_add_delayed_iput(BTRFS_I(inode)); return ret; } @@ -3239,8 +3124,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end) u64 cur = start; unsigned long nr_pages; const u32 sectorsize = btrfs_sb(inode->i_sb)->sectorsize; - struct extent_page_data epd = { - .bio_ctrl = { 0 }, + struct btrfs_bio_ctrl bio_ctrl = { .extent_locked = 1, .sync_io = 1, }; @@ -3271,7 +3155,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end) ASSERT(PageLocked(page)); ASSERT(PageDirty(page)); clear_page_dirty_for_io(page); - ret = __extent_writepage(page, &wbc_writepages, &epd); + ret = __extent_writepage(page, &wbc_writepages, &bio_ctrl); ASSERT(ret <= 0); if (ret < 0) { found_error = true; @@ -3281,7 +3165,7 @@ int extent_write_locked_range(struct inode *inode, u64 start, u64 end) cur = cur_end + 1; } - submit_write_bio(&epd, found_error ? ret : 0); + submit_write_bio(&bio_ctrl, found_error ? ret : 0); wbc_detach_inode(&wbc_writepages); if (found_error) @@ -3294,10 +3178,9 @@ int extent_writepages(struct address_space *mapping, { struct inode *inode = mapping->host; int ret = 0; - struct extent_page_data epd = { - .bio_ctrl = { 0 }, + struct btrfs_bio_ctrl bio_ctrl = { .extent_locked = 0, - .sync_io = wbc->sync_mode == WB_SYNC_ALL, + .sync_io = (wbc->sync_mode == WB_SYNC_ALL), }; /* @@ -3305,8 +3188,8 @@ int extent_writepages(struct address_space *mapping, * protect the write pointer updates. */ btrfs_zoned_data_reloc_lock(BTRFS_I(inode)); - ret = extent_write_cache_pages(mapping, wbc, &epd); - submit_write_bio(&epd, ret); + ret = extent_write_cache_pages(mapping, wbc, &bio_ctrl); + submit_write_bio(&bio_ctrl, ret); btrfs_zoned_data_reloc_unlock(BTRFS_I(inode)); return ret; } @@ -3705,14 +3588,13 @@ static int fiemap_search_slot(struct btrfs_inode *inode, struct btrfs_path *path static int fiemap_process_hole(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, struct fiemap_cache *cache, - struct btrfs_backref_shared_cache *backref_cache, + struct extent_state **delalloc_cached_state, + struct btrfs_backref_share_check_ctx *backref_ctx, u64 disk_bytenr, u64 extent_offset, u64 extent_gen, - struct ulist *roots, struct ulist *tmp_ulist, u64 start, u64 end) { const u64 i_size = i_size_read(&inode->vfs_inode); - const u64 ino = btrfs_ino(inode); u64 cur_offset = start; u64 last_delalloc_end = 0; u32 prealloc_flags = FIEMAP_EXTENT_UNWRITTEN; @@ -3731,6 +3613,7 @@ static int fiemap_process_hole(struct btrfs_inode *inode, bool delalloc; delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, + delalloc_cached_state, &delalloc_start, &delalloc_end); if (!delalloc) @@ -3752,11 +3635,10 @@ static int fiemap_process_hole(struct btrfs_inode *inode, if (prealloc_len > 0) { if (!checked_extent_shared && fieinfo->fi_extents_max) { - ret = btrfs_is_data_extent_shared(inode->root, - ino, disk_bytenr, - extent_gen, roots, - tmp_ulist, - backref_cache); + ret = btrfs_is_data_extent_shared(inode, + disk_bytenr, + extent_gen, + backref_ctx); if (ret < 0) return ret; else if (ret > 0) @@ -3802,11 +3684,10 @@ static int fiemap_process_hole(struct btrfs_inode *inode, } if (!checked_extent_shared && fieinfo->fi_extents_max) { - ret = btrfs_is_data_extent_shared(inode->root, - ino, disk_bytenr, - extent_gen, roots, - tmp_ulist, - backref_cache); + ret = btrfs_is_data_extent_shared(inode, + disk_bytenr, + extent_gen, + backref_ctx); if (ret < 0) return ret; else if (ret > 0) @@ -3903,12 +3784,10 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, { const u64 ino = btrfs_ino(inode); struct extent_state *cached_state = NULL; + struct extent_state *delalloc_cached_state = NULL; struct btrfs_path *path; - struct btrfs_root *root = inode->root; struct fiemap_cache cache = { 0 }; - struct btrfs_backref_shared_cache *backref_cache; - struct ulist *roots; - struct ulist *tmp_ulist; + struct btrfs_backref_share_check_ctx *backref_ctx; u64 last_extent_end; u64 prev_extent_end; u64 lockstart; @@ -3916,17 +3795,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, bool stopped = false; int ret; - backref_cache = kzalloc(sizeof(*backref_cache), GFP_KERNEL); + backref_ctx = btrfs_alloc_backref_share_check_ctx(); path = btrfs_alloc_path(); - roots = ulist_alloc(GFP_KERNEL); - tmp_ulist = ulist_alloc(GFP_KERNEL); - if (!backref_cache || !path || !roots || !tmp_ulist) { + if (!backref_ctx || !path) { ret = -ENOMEM; goto out; } - lockstart = round_down(start, root->fs_info->sectorsize); - lockend = round_up(start + len, root->fs_info->sectorsize); + lockstart = round_down(start, inode->root->fs_info->sectorsize); + lockend = round_up(start + len, inode->root->fs_info->sectorsize); prev_extent_end = lockstart; lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); @@ -3975,13 +3852,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, if (extent_end <= lockstart) goto next_item; + backref_ctx->curr_leaf_bytenr = leaf->start; + /* We have in implicit hole (NO_HOLES feature enabled). */ if (prev_extent_end < key.offset) { const u64 range_end = min(key.offset, lockend) - 1; ret = fiemap_process_hole(inode, fieinfo, &cache, - backref_cache, 0, 0, 0, - roots, tmp_ulist, + &delalloc_cached_state, + backref_ctx, 0, 0, 0, prev_extent_end, range_end); if (ret < 0) { goto out_unlock; @@ -4021,25 +3900,24 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, extent_len, flags); } else if (extent_type == BTRFS_FILE_EXTENT_PREALLOC) { ret = fiemap_process_hole(inode, fieinfo, &cache, - backref_cache, + &delalloc_cached_state, + backref_ctx, disk_bytenr, extent_offset, - extent_gen, roots, tmp_ulist, - key.offset, extent_end - 1); + extent_gen, key.offset, + extent_end - 1); } else if (disk_bytenr == 0) { /* We have an explicit hole. */ ret = fiemap_process_hole(inode, fieinfo, &cache, - backref_cache, 0, 0, 0, - roots, tmp_ulist, + &delalloc_cached_state, + backref_ctx, 0, 0, 0, key.offset, extent_end - 1); } else { /* We have a regular extent. */ if (fieinfo->fi_extents_max) { - ret = btrfs_is_data_extent_shared(root, ino, + ret = btrfs_is_data_extent_shared(inode, disk_bytenr, extent_gen, - roots, - tmp_ulist, - backref_cache); + backref_ctx); if (ret < 0) goto out_unlock; else if (ret > 0) @@ -4088,9 +3966,9 @@ check_eof_delalloc: path = NULL; if (!stopped && prev_extent_end < lockend) { - ret = fiemap_process_hole(inode, fieinfo, &cache, backref_cache, - 0, 0, 0, roots, tmp_ulist, - prev_extent_end, lockend - 1); + ret = fiemap_process_hole(inode, fieinfo, &cache, + &delalloc_cached_state, backref_ctx, + 0, 0, 0, prev_extent_end, lockend - 1); if (ret < 0) goto out_unlock; prev_extent_end = lockend; @@ -4107,6 +3985,7 @@ check_eof_delalloc: delalloc = btrfs_find_delalloc_in_range(inode, prev_extent_end, i_size - 1, + &delalloc_cached_state, &delalloc_start, &delalloc_end); if (!delalloc) @@ -4121,10 +4000,9 @@ check_eof_delalloc: out_unlock: unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); out: - kfree(backref_cache); + free_extent_state(delalloc_cached_state); + btrfs_free_backref_share_ctx(backref_ctx); btrfs_free_path(path); - ulist_free(roots); - ulist_free(tmp_ulist); return ret; } @@ -4266,7 +4144,6 @@ __alloc_extent_buffer(struct btrfs_fs_info *fs_info, u64 start, eb->start = start; eb->len = len; eb->fs_info = fs_info; - eb->bflags = 0; init_rwsem(&eb->lock); btrfs_leak_debug_add_eb(eb); @@ -4299,7 +4176,6 @@ struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src) */ set_bit(EXTENT_BUFFER_UNMAPPED, &new->bflags); - memset(new->pages, 0, sizeof(*new->pages) * num_pages); ret = btrfs_alloc_page_array(num_pages, new->pages); if (ret) { btrfs_release_extent_buffer(new); @@ -4944,11 +4820,13 @@ void set_extent_buffer_uptodate(struct extent_buffer *eb) } static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, - int mirror_num) + int mirror_num, + struct btrfs_tree_parent_check *check) { struct btrfs_fs_info *fs_info = eb->fs_info; struct extent_io_tree *io_tree; struct page *page = eb->pages[0]; + struct extent_state *cached_state = NULL; struct btrfs_bio_ctrl bio_ctrl = { .mirror_num = mirror_num, }; @@ -4956,13 +4834,16 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, ASSERT(!test_bit(EXTENT_BUFFER_UNMAPPED, &eb->bflags)); ASSERT(PagePrivate(page)); + ASSERT(check); io_tree = &BTRFS_I(fs_info->btree_inode)->io_tree; if (wait == WAIT_NONE) { - if (!try_lock_extent(io_tree, eb->start, eb->start + eb->len - 1)) + if (!try_lock_extent(io_tree, eb->start, eb->start + eb->len - 1, + &cached_state)) return -EAGAIN; } else { - ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1, NULL); + ret = lock_extent(io_tree, eb->start, eb->start + eb->len - 1, + &cached_state); if (ret < 0) return ret; } @@ -4972,7 +4853,8 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, PageUptodate(page) || btrfs_subpage_test_uptodate(fs_info, page, eb->start, eb->len)) { set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, NULL); + unlock_extent(io_tree, eb->start, eb->start + eb->len - 1, + &cached_state); return ret; } @@ -4996,17 +4878,22 @@ static int read_extent_buffer_subpage(struct extent_buffer *eb, int wait, */ atomic_dec(&eb->io_pages); } + memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check)); submit_one_bio(&bio_ctrl); - if (ret || wait != WAIT_COMPLETE) + if (ret || wait != WAIT_COMPLETE) { + free_extent_state(cached_state); return ret; + } - wait_extent_bit(io_tree, eb->start, eb->start + eb->len - 1, EXTENT_LOCKED); + wait_extent_bit(io_tree, eb->start, eb->start + eb->len - 1, + EXTENT_LOCKED, &cached_state); if (!test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) ret = -EIO; return ret; } -int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) +int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, + struct btrfs_tree_parent_check *check) { int i; struct page *page; @@ -5032,7 +4919,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) return -EIO; if (eb->fs_info->nodesize < PAGE_SIZE) - return read_extent_buffer_subpage(eb, wait, mirror_num); + return read_extent_buffer_subpage(eb, wait, mirror_num, check); num_pages = num_extent_pages(eb); for (i = 0; i < num_pages; i++) { @@ -5109,6 +4996,7 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num) } } + memcpy(&btrfs_bio(bio_ctrl.bio)->parent_check, check, sizeof(*check)); submit_one_bio(&bio_ctrl); if (ret || wait != WAIT_COMPLETE) @@ -5467,11 +5355,12 @@ static inline void eb_bitmap_offset(const struct extent_buffer *eb, *page_offset = offset_in_page(offset); } -/** - * extent_buffer_test_bit - determine whether a bit in a bitmap item is set - * @eb: the extent buffer - * @start: offset of the bitmap item in the extent buffer - * @nr: bit number to test +/* + * Determine whether a bit in a bitmap item is set. + * + * @eb: the extent buffer + * @start: offset of the bitmap item in the extent buffer + * @nr: bit number to test */ int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, unsigned long nr) @@ -5488,12 +5377,13 @@ int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, return 1U & (kaddr[offset] >> (nr & (BITS_PER_BYTE - 1))); } -/** - * extent_buffer_bitmap_set - set an area of a bitmap - * @eb: the extent buffer - * @start: offset of the bitmap item in the extent buffer - * @pos: bit number of the first bit - * @len: number of bits to set +/* + * Set an area of a bitmap to 1. + * + * @eb: the extent buffer + * @start: offset of the bitmap item in the extent buffer + * @pos: bit number of the first bit + * @len: number of bits to set */ void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start, unsigned long pos, unsigned long len) @@ -5530,12 +5420,13 @@ void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long star } -/** - * extent_buffer_bitmap_clear - clear an area of a bitmap - * @eb: the extent buffer - * @start: offset of the bitmap item in the extent buffer - * @pos: bit number of the first bit - * @len: number of bits to clear +/* + * Clear an area of a bitmap. + * + * @eb: the extent buffer + * @start: offset of the bitmap item in the extent buffer + * @pos: bit number of the first bit + * @len: number of bits to clear */ void extent_buffer_bitmap_clear(const struct extent_buffer *eb, unsigned long start, unsigned long pos, @@ -5841,6 +5732,11 @@ int try_release_extent_buffer(struct page *page) void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, u64 bytenr, u64 owner_root, u64 gen, int level) { + struct btrfs_tree_parent_check check = { + .has_first_key = 0, + .level = level, + .transid = gen + }; struct extent_buffer *eb; int ret; @@ -5853,7 +5749,7 @@ void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, return; } - ret = read_extent_buffer_pages(eb, WAIT_NONE, 0); + ret = read_extent_buffer_pages(eb, WAIT_NONE, 0, &check); if (ret < 0) free_extent_buffer_stale(eb); else |