From 74dd17fbe3d65829e75d84f00a9525b2ace93998 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Tue, 7 Aug 2012 16:25:13 -0400 Subject: Btrfs: fix btrfs send for inline items and compression The btrfs send code was assuming the offset of the file item into the extent translated to bytes on disk. If we're compressed, this isn't true, and so it was off into extents owned by other files. It was also improperly handling inline extents. This solves a crash where we may have gone past the end of the file extent item by not testing early enough for an inline extent. It also solves problems where we have a whole between the end of the inline item and the start of the full extent. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4c878476bb91..19319f5a91a8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4332,7 +4332,6 @@ static int release_extent_buffer(struct extent_buffer *eb, gfp_t mask) /* Should be safe to release our pages at this point */ btrfs_release_extent_buffer_page(eb, 0); - call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu); return 1; } -- cgit v1.2.3 From 9e8a4a8b0b9484e8d14674fc62c9ad8ac9dbce5b Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 5 Sep 2012 19:10:51 -0600 Subject: Btrfs: use flag EXTENT_DEFRAG for snapshot-aware defrag We're going to use this flag EXTENT_DEFRAG to indicate which range belongs to defragment so that we can implement snapshow-aware defrag: We set the EXTENT_DEFRAG flag when dirtying the extents that need defragmented, so later on writeback thread can differentiate between normal writeback and writeback started by defragmentation. Original-Signed-off-by: Li Zefan Signed-off-by: Liu Bo --- fs/btrfs/extent_io.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 19319f5a91a8..4a41b17295dc 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -1144,6 +1144,14 @@ int set_extent_delalloc(struct extent_io_tree *tree, u64 start, u64 end, NULL, cached_state, mask); } +int set_extent_defrag(struct extent_io_tree *tree, u64 start, u64 end, + struct extent_state **cached_state, gfp_t mask) +{ + return set_extent_bit(tree, start, end, + EXTENT_DELALLOC | EXTENT_UPTODATE | EXTENT_DEFRAG, + NULL, cached_state, mask); +} + int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, gfp_t mask) { -- cgit v1.2.3 From 837e197283199de640857192ca32767cb6e24fe8 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Fri, 7 Sep 2012 03:00:48 -0600 Subject: btrfs: polish names of kmem caches Usecase: watch 'grep btrfs < /proc/slabinfo' easy to watch all caches in one go. Signed-off-by: David Sterba --- fs/btrfs/extent_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 4a41b17295dc..3ad84f500687 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -64,13 +64,13 @@ tree_fs_info(struct extent_io_tree *tree) int __init extent_io_init(void) { - extent_state_cache = kmem_cache_create("extent_state", + extent_state_cache = kmem_cache_create("btrfs_extent_state", sizeof(struct extent_state), 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); if (!extent_state_cache) return -ENOMEM; - extent_buffer_cache = kmem_cache_create("extent_buffers", + extent_buffer_cache = kmem_cache_create("btrfs_extent_buffer", sizeof(struct extent_buffer), 0, SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, NULL); if (!extent_buffer_cache) -- cgit v1.2.3 From be3940c0a90265654d778394cafe2e2cec674df8 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 11 Sep 2012 14:23:05 -0600 Subject: btrfs: Kill some bi_idx references For immutable bio vecs, I've been auditing and removing bi_idx references. These were harmless, but removing them will make auditing easier. scrub_bio_end_io_worker() was open coding a bio_reset() - but this doesn't appear to have been needed for anything as right after it does a bio_put(), and perusing the code it doesn't appear anything else was holding a reference to the bio. The other use end_bio_extent_readpage() was just for a pr_debug() - changed it to something that might be a bit more useful. Signed-off-by: Kent Overstreet CC: Chris Mason CC: Stefan Behrens --- fs/btrfs/extent_io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 3ad84f500687..90bd9f768c0a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2306,8 +2306,8 @@ static void end_bio_extent_readpage(struct bio *bio, int err) struct extent_state *cached = NULL; struct extent_state *state; - pr_debug("end_bio_extent_readpage: bi_vcnt=%d, idx=%d, err=%d, " - "mirror=%ld\n", bio->bi_vcnt, bio->bi_idx, err, + pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, " + "mirror=%ld\n", (u64)bio->bi_sector, err, (long int)bio->bi_bdev); tree = &BTRFS_I(page->mapping->host)->io_tree; -- cgit v1.2.3 From b5bae2612af92fd8e7bcdcf7ce3e0259e8d341c9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 14 Sep 2012 13:43:01 -0400 Subject: Btrfs: fix race when getting the eb out of page->private We can race when checking wether PagePrivate is set on a page and we actually have an eb saved in the pages private pointer. We could have easily written out this page and released it in the time that we did the pagevec lookup and actually got around to looking at this page. So use mapping->private_lock to ensure we get a consistent view of the page->private pointer. This is inline with the alloc and releasepage paths which use private_lock when manipulating page->private. Thanks, Reported-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 90bd9f768c0a..a2c21570adf5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3256,19 +3256,34 @@ retry: break; } + spin_lock(&mapping->private_lock); + if (!PagePrivate(page)) { + spin_unlock(&mapping->private_lock); + continue; + } + eb = (struct extent_buffer *)page->private; + + /* + * Shouldn't happen and normally this would be a BUG_ON + * but no sense in crashing the users box for something + * we can survive anyway. + */ if (!eb) { + spin_unlock(&mapping->private_lock); WARN_ON(1); continue; } - if (eb == prev_eb) + if (eb == prev_eb) { + spin_unlock(&mapping->private_lock); continue; + } - if (!atomic_inc_not_zero(&eb->refs)) { - WARN_ON(1); + ret = atomic_inc_not_zero(&eb->refs); + spin_unlock(&mapping->private_lock); + if (!ret) continue; - } prev_eb = eb; ret = lock_extent_buffer_for_io(eb, fs_info, &epd); -- cgit v1.2.3 From de0022b9da616b95ea5b41eab32da825b0b5150f Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 25 Sep 2012 14:25:58 -0400 Subject: Btrfs: do not async metadata csumming in certain situations There are a coule scenarios where farming metadata csumming off to an async thread doesn't help. The first is if our processor supports crc32c, in which case the csumming will be fast and so the overhead of the async model is not worth the cost. The other case is for our tree log. We will be making that stuff dirty and writing it out and waiting for it immediately. Even with software crc32c this gives me a ~15% increase in speed with O_SYNC workloads. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index a2c21570adf5..979fa0d6bfee 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -45,6 +45,7 @@ struct extent_page_data { struct bio *bio; struct extent_io_tree *tree; get_extent_t *get_extent; + unsigned long bio_flags; /* tells writepage not to lock the state bits for this range * it still does the unlocking @@ -3163,12 +3164,16 @@ static int write_one_eb(struct extent_buffer *eb, struct block_device *bdev = fs_info->fs_devices->latest_bdev; u64 offset = eb->start; unsigned long i, num_pages; + unsigned long bio_flags = 0; int rw = (epd->sync_io ? WRITE_SYNC : WRITE); int ret = 0; clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); num_pages = num_extent_pages(eb->start, eb->len); atomic_set(&eb->io_pages, num_pages); + if (btrfs_header_owner(eb) == BTRFS_TREE_LOG_OBJECTID) + bio_flags = EXTENT_BIO_TREE_LOG; + for (i = 0; i < num_pages; i++) { struct page *p = extent_buffer_page(eb, i); @@ -3177,7 +3182,8 @@ static int write_one_eb(struct extent_buffer *eb, ret = submit_extent_page(rw, eb->tree, p, offset >> 9, PAGE_CACHE_SIZE, 0, bdev, &epd->bio, -1, end_bio_extent_buffer_writepage, - 0, 0, 0); + 0, epd->bio_flags, bio_flags); + epd->bio_flags = bio_flags; if (ret) { set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); SetPageError(p); @@ -3212,6 +3218,7 @@ int btree_write_cache_pages(struct address_space *mapping, .tree = tree, .extent_locked = 0, .sync_io = wbc->sync_mode == WB_SYNC_ALL, + .bio_flags = 0, }; int ret = 0; int done = 0; @@ -3474,7 +3481,7 @@ static void flush_epd_write_bio(struct extent_page_data *epd) if (epd->sync_io) rw = WRITE_SYNC; - ret = submit_one_bio(rw, epd->bio, 0, 0); + ret = submit_one_bio(rw, epd->bio, 0, epd->bio_flags); BUG_ON(ret < 0); /* -ENOMEM */ epd->bio = NULL; } @@ -3497,6 +3504,7 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, .get_extent = get_extent, .extent_locked = 0, .sync_io = wbc->sync_mode == WB_SYNC_ALL, + .bio_flags = 0, }; ret = __extent_writepage(page, wbc, &epd); @@ -3521,6 +3529,7 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, .get_extent = get_extent, .extent_locked = 1, .sync_io = mode == WB_SYNC_ALL, + .bio_flags = 0, }; struct writeback_control wbc_writepages = { .sync_mode = mode, @@ -3560,6 +3569,7 @@ int extent_writepages(struct extent_io_tree *tree, .get_extent = get_extent, .extent_locked = 0, .sync_io = wbc->sync_mode == WB_SYNC_ALL, + .bio_flags = 0, }; ret = extent_write_cache_pages(tree, mapping, wbc, -- cgit v1.2.3 From e6138876ad8327250d77291b3262fee356267211 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 27 Sep 2012 17:07:30 -0400 Subject: Btrfs: cache extent state when writing out dirty metadata pages Everytime we write out dirty pages we search for an offset in the tree, convert the bits in the state, and then when we wait we search for the offset again and clear the bits. So for every dirty range in the io tree we are doing 4 rb searches, which is suboptimal. With this patch we are only doing 2 searches for every cycle (modulo weird things happening). Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 43 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 979fa0d6bfee..e8ee39b73356 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -937,6 +937,7 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, * @end: the end offset in bytes (inclusive) * @bits: the bits to set in this range * @clear_bits: the bits to clear in this range + * @cached_state: state that we're going to cache * @mask: the allocation mask * * This will go through and set bits for the given range. If any states exist @@ -946,7 +947,8 @@ int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, int bits, * boundary bits like LOCK. */ int convert_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, - int bits, int clear_bits, gfp_t mask) + int bits, int clear_bits, + struct extent_state **cached_state, gfp_t mask) { struct extent_state *state; struct extent_state *prealloc = NULL; @@ -963,6 +965,15 @@ again: } spin_lock(&tree->lock); + if (cached_state && *cached_state) { + state = *cached_state; + if (state->start <= start && state->end > start && + state->tree) { + node = &state->rb_node; + goto hit_next; + } + } + /* * this search will find all the extents that end after * our range starts. @@ -993,6 +1004,7 @@ hit_next: */ if (state->start == start && state->end <= end) { set_state_bits(tree, state, &bits); + cache_state(state, cached_state); state = clear_state_bit(tree, state, &clear_bits, 0); if (last_end == (u64)-1) goto out; @@ -1033,6 +1045,7 @@ hit_next: goto out; if (state->end <= end) { set_state_bits(tree, state, &bits); + cache_state(state, cached_state); state = clear_state_bit(tree, state, &clear_bits, 0); if (last_end == (u64)-1) goto out; @@ -1071,6 +1084,7 @@ hit_next: &bits); if (err) extent_io_tree_panic(tree, err); + cache_state(prealloc, cached_state); prealloc = NULL; start = this_end + 1; goto search_again; @@ -1093,6 +1107,7 @@ hit_next: extent_io_tree_panic(tree, err); set_state_bits(tree, prealloc, &bits); + cache_state(prealloc, cached_state); clear_state_bit(tree, prealloc, &clear_bits, 0); prealloc = NULL; goto out; @@ -1297,18 +1312,42 @@ out: * If nothing was found, 1 is returned. If found something, return 0. */ int find_first_extent_bit(struct extent_io_tree *tree, u64 start, - u64 *start_ret, u64 *end_ret, int bits) + u64 *start_ret, u64 *end_ret, int bits, + struct extent_state **cached_state) { struct extent_state *state; + struct rb_node *n; int ret = 1; spin_lock(&tree->lock); + if (cached_state && *cached_state) { + state = *cached_state; + if (state->end == start - 1 && state->tree) { + n = rb_next(&state->rb_node); + while (n) { + state = rb_entry(n, struct extent_state, + rb_node); + if (state->state & bits) + goto got_it; + n = rb_next(n); + } + free_extent_state(*cached_state); + *cached_state = NULL; + goto out; + } + free_extent_state(*cached_state); + *cached_state = NULL; + } + state = find_first_extent_bit_state(tree, start, bits); +got_it: if (state) { + cache_state(state, cached_state); *start_ret = state->start; *end_ret = state->end; ret = 0; } +out: spin_unlock(&tree->lock); return ret; } -- cgit v1.2.3 From 7a2d6a64645b38d7040bbd031c7a7b2655f5d976 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Mon, 1 Oct 2012 03:07:15 -0600 Subject: Btrfs: remove unnecessary IS_ERR in bio_readpage_error() Because the value of extent_map is only a correct value or NULL, so IS_ERR is unnecessary. Signed-off-by: Tsutomu Itoh --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index e8ee39b73356..67fe401c3209 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2110,7 +2110,7 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, } read_unlock(&em_tree->lock); - if (!em || IS_ERR(em)) { + if (!em) { kfree(failrec); return -EIO; } -- cgit v1.2.3 From 479ed9abdbeec5d9ed0005f3bee9c9bc06a102bb Mon Sep 17 00:00:00 2001 From: Robin Dong Date: Sat, 29 Sep 2012 02:07:47 -0600 Subject: btrfs: move inline function code to header file When building btrfs from kernel code, it will report: fs/btrfs/extent_io.h:281: warning: 'extent_buffer_page' declared inline after being called fs/btrfs/extent_io.h:281: warning: previous declaration of 'extent_buffer_page' was here fs/btrfs/extent_io.h:280: warning: 'num_extent_pages' declared inline after being called fs/btrfs/extent_io.h:280: warning: previous declaration of 'num_extent_pages' was here because of the wrong declaration of inline functions. Signed-off-by: Robin Dong --- fs/btrfs/extent_io.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 67fe401c3209..b82d244a2ef5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3986,18 +3986,6 @@ out: return ret; } -inline struct page *extent_buffer_page(struct extent_buffer *eb, - unsigned long i) -{ - return eb->pages[i]; -} - -inline unsigned long num_extent_pages(u64 start, u64 len) -{ - return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) - - (start >> PAGE_CACHE_SHIFT); -} - static void __free_extent_buffer(struct extent_buffer *eb) { #if LEAK_DEBUG -- cgit v1.2.3 From edd33c99c4ba26ebe17c1a3d65b4aba25482ed32 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 5 Oct 2012 16:40:32 -0400 Subject: Btrfs: don't bug on enomem in readpage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Get rid of the BUG_ON(ret == -ENOMEM) in __extent_read_full_page. Thanks, Reported-by: Jérôme Poulin Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b82d244a2ef5..8c37cb64be7a 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2751,12 +2751,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, end_bio_extent_readpage, mirror_num, *bio_flags, this_bio_flag); - BUG_ON(ret == -ENOMEM); - nr++; - *bio_flags = this_bio_flag; + if (!ret) { + nr++; + *bio_flags = this_bio_flag; + } } - if (ret) + if (ret) { SetPageError(page); + unlock_extent(tree, cur, cur + iosize - 1); + } cur = cur + iosize; pg_offset += iosize; } -- cgit v1.2.3 From 4804b38293c020e7a2c841e86402f456c19d934d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 5 Oct 2012 16:43:45 -0400 Subject: Btrfs: do not warn_on when we cannot alloc a page for an extent buffer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It's just annoying and the user will have gotten a nice OOM killer message so they are already fully aware they are screwed :). Thanks, Reported-by: Jérôme Poulin Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8c37cb64be7a..7dc69b38548d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4249,10 +4249,8 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, for (i = 0; i < num_pages; i++, index++) { p = find_or_create_page(mapping, index, GFP_NOFS); - if (!p) { - WARN_ON(1); + if (!p) goto free_eb; - } spin_lock(&mapping->private_lock); if (PagePrivate(p)) { -- cgit v1.2.3 From f60b1b49f6f72abb8bedfd49b758773bbda043c8 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 5 Oct 2012 16:53:34 -0400 Subject: Btrfs: fix page leakage Alloc_dummy_extent_buffer will not free the first page in the eb array if we fail to allocate a page, fix this. Thanks, Reported-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7dc69b38548d..64dc93f64bc0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4104,7 +4104,7 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) return eb; err: - for (i--; i > 0; i--) + for (i--; i >= 0; i--) __free_page(eb->pages[i]); __free_extent_buffer(eb); return NULL; -- cgit v1.2.3