From 14c7cca780bd210564ae964f57a8bb807d0b3dbf Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Sun, 11 Sep 2011 10:52:24 -0400 Subject: Btrfs: fix an oops when deleting snapshots We can reproduce this oops via the following steps: $ mkfs.btrfs /dev/sdb7 $ mount /dev/sdb7 /mnt/btrfs $ for ((i=0; i<3; i++)); do btrfs sub snap /mnt/btrfs /mnt/btrfs/s_$i; done $ rm -fr /mnt/btrfs/* $ rm -fr /mnt/btrfs/* then we'll get ------------[ cut here ]------------ kernel BUG at fs/btrfs/inode.c:2264! [...] Call Trace: [] btrfs_rmdir+0xf7/0x1b0 [btrfs] [] vfs_rmdir+0xa5/0xf0 [] do_rmdir+0x123/0x140 [] ? fput+0x197/0x260 [] ? audit_syscall_entry+0x1bf/0x1f0 [] sys_unlinkat+0x2d/0x40 [] system_call_fastpath+0x16/0x1b RIP [] btrfs_orphan_add+0x179/0x1a0 [btrfs] When it comes to btrfs_lookup_dentry, we may set a snapshot's inode->i_ino to BTRFS_EMPTY_SUBVOL_DIR_OBJECTID instead of BTRFS_FIRST_FREE_OBJECTID, while the snapshot's location.objectid remains unchanged. However, btrfs_ino() does not take this into account, and returns a wrong ino, and causes the oops. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/btrfs_inode.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 502b9e988679..d9f99a16edd6 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -176,7 +176,11 @@ static inline u64 btrfs_ino(struct inode *inode) { u64 ino = BTRFS_I(inode)->location.objectid; - if (ino <= BTRFS_FIRST_FREE_OBJECTID) + /* + * !ino: btree_inode + * type == BTRFS_ROOT_ITEM_KEY: subvol dir + */ + if (!ino || BTRFS_I(inode)->location.type == BTRFS_ROOT_ITEM_KEY) ino = inode->i_ino; return ino; } -- cgit v1.2.3 From e0b6d65be57fb37ca67b04ce8964546a74d2125c Mon Sep 17 00:00:00 2001 From: Sergei Trofimovich Date: Sun, 11 Sep 2011 10:52:24 -0400 Subject: btrfs: fix warning in iput for bad-inode iput() shouldn't be called for inodes in I_NEW state. We need to mark inode as constructed first. WARNING: at fs/inode.c:1309 iput+0x20b/0x210() Call Trace: [] warn_slowpath_common+0x7a/0xb0 [] warn_slowpath_null+0x15/0x20 [] iput+0x20b/0x210 [] btrfs_iget+0x1eb/0x4a0 [] btrfs_run_defrag_inodes+0x136/0x210 [] cleaner_kthread+0x17f/0x1a0 [] ? sub_preempt_count+0x9d/0xd0 [] ? transaction_kthread+0x280/0x280 [] kthread+0x96/0xa0 [] kernel_thread_helper+0x4/0x10 [] ? kthread_worker_fn+0x190/0x190 [] ? gs_change+0xb/0xb Signed-off-by: Sergei Trofimovich CC: Konstantin Khlebnikov Tested-by: David Sterba CC: Josef Bacik CC: Chris Mason Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 34195f9fc6bb..edd45f709989 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3952,7 +3952,6 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, struct btrfs_root *root, int *new) { struct inode *inode; - int bad_inode = 0; inode = btrfs_iget_locked(s, location->objectid, root); if (!inode) @@ -3968,15 +3967,12 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location, if (new) *new = 1; } else { - bad_inode = 1; + unlock_new_inode(inode); + iput(inode); + inode = ERR_PTR(-ESTALE); } } - if (bad_inode) { - iput(inode); - inode = ERR_PTR(-ESTALE); - } - return inode; } -- cgit v1.2.3 From ddf23b3fc6850bd4654d51ec9457fe7c77cde51e Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Sun, 11 Sep 2011 10:52:24 -0400 Subject: Btrfs: skip locking if searching the commit root in csum lookup It's not enough to just search the commit root, since we could be cow'ing the very block we need to search through, which would mean that its locked and we'll still deadlock. So use path->skip_locking as well. Thanks, Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/file-item.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index b910694f61ed..a1cb7821becd 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -183,8 +183,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, * read from the commit root and sidestep a nasty deadlock * between reading the free space cache and updating the csum tree. */ - if (btrfs_is_free_space_inode(root, inode)) + if (btrfs_is_free_space_inode(root, inode)) { path->search_commit_root = 1; + path->skip_locking = 1; + } disk_bytenr = (u64)bio->bi_sector << 9; if (dio) -- cgit v1.2.3 From 65450aa645b1ef7ed74e41c34b28d53333744978 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Sun, 11 Sep 2011 10:52:24 -0400 Subject: Btrfs: reset to appropriate block rsv after orphan operations While truncating free space cache, we forget to change trans->block_rsv back to the original one, but leave it with the orphan_block_rsv, and then with option inode_cache enable, it leads to countless warnings of btrfs_alloc_free_block and btrfs_orphan_commit_root: WARNING: at fs/btrfs/extent-tree.c:5711 btrfs_alloc_free_block+0x180/0x350 [btrfs]() ... WARNING: at fs/btrfs/inode.c:2193 btrfs_orphan_commit_root+0xb0/0xc0 [btrfs]() Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 6a265b9f85f2..41ac927401d0 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -190,9 +190,11 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, struct btrfs_path *path, struct inode *inode) { + struct btrfs_block_rsv *rsv; loff_t oldsize; int ret = 0; + rsv = trans->block_rsv; trans->block_rsv = root->orphan_block_rsv; ret = btrfs_block_rsv_check(trans, root, root->orphan_block_rsv, @@ -210,6 +212,8 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root, */ ret = btrfs_truncate_inode_items(trans, root, inode, 0, BTRFS_EXTENT_DATA_KEY); + + trans->block_rsv = rsv; if (ret) { WARN_ON(1); return ret; -- cgit v1.2.3 From 98c9942aca05fff198cd5ca629599cd193444809 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Sun, 11 Sep 2011 10:52:24 -0400 Subject: Btrfs: fix misuse of trans block rsv At the beginning of create_pending_snapshot, trans->block_rsv is set to pending->block_rsv and is used for snapshot things, however, when it is done, we do not recover it as will. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/transaction.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 7dc36fab4afc..e24b7964a155 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -884,6 +884,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root *tree_root = fs_info->tree_root; struct btrfs_root *root = pending->root; struct btrfs_root *parent_root; + struct btrfs_block_rsv *rsv; struct inode *parent_inode; struct dentry *parent; struct dentry *dentry; @@ -895,6 +896,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, u64 objectid; u64 root_flags; + rsv = trans->block_rsv; + new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); if (!new_root_item) { pending->error = -ENOMEM; @@ -1002,6 +1005,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, btrfs_orphan_post_snapshot(trans, pending); fail: kfree(new_root_item); + trans->block_rsv = rsv; btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); return 0; } -- cgit v1.2.3 From 5b397377e97d436fc2ed872fc53f85395bb984e0 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Sun, 11 Sep 2011 10:52:24 -0400 Subject: Btrfs: fix unclosed transaction handle in btrfs_cont_expand The function - btrfs_cont_expand() forgot to close the transaction handle before it jump out the while loop. Fix it. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index edd45f709989..c257af2ce9cb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3510,15 +3510,19 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size) err = btrfs_drop_extents(trans, inode, cur_offset, cur_offset + hole_size, &hint_byte, 1); - if (err) + if (err) { + btrfs_end_transaction(trans, root); break; + } err = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), cur_offset, 0, 0, hole_size, 0, hole_size, 0, 0, 0); - if (err) + if (err) { + btrfs_end_transaction(trans, root); break; + } btrfs_drop_extent_cache(inode, hole_start, last_byte - 1, 0); -- cgit v1.2.3 From 0c1a98c81413e00a6c379d898e06a09350d31926 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Sun, 11 Sep 2011 10:52:24 -0400 Subject: Btrfs: fix the file extent gap when doing direct IO When we write some data to the place that is beyond the end of the file in direct I/O mode, a data hole will be created. And Btrfs should insert a file extent item that point to this hole into the fs tree. But unfortunately Btrfs forgets doing it. The following is a simple way to reproduce it: # mkfs.btrfs /dev/sdc2 # mount /dev/sdc2 /test4 # touch /test4/a # dd if=/dev/zero of=/test4/a seek=8 count=1 bs=4K oflag=direct conv=nocreat,notrunc # umount /test4 # btrfsck /dev/sdc2 root 5 inode 257 errors 100 Reported-by: Tsutomu Itoh Signed-off-by: Miao Xie Tested-by: Tsutomu Itoh Signed-off-by: Chris Mason --- fs/btrfs/file.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 15e5a1cd8764..98d95bb5f253 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1075,12 +1075,6 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, start_pos = pos & ~((u64)root->sectorsize - 1); last_pos = ((u64)index + num_pages) << PAGE_CACHE_SHIFT; - if (start_pos > inode->i_size) { - err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); - if (err) - return err; - } - again: for (i = 0; i < num_pages; i++) { pages[i] = find_or_create_page(inode->i_mapping, index + i, @@ -1338,6 +1332,7 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, struct inode *inode = fdentry(file)->d_inode; struct btrfs_root *root = BTRFS_I(inode)->root; loff_t *ppos = &iocb->ki_pos; + u64 start_pos; ssize_t num_written = 0; ssize_t err = 0; size_t count, ocount; @@ -1386,6 +1381,15 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, file_update_time(file); BTRFS_I(inode)->sequence++; + start_pos = round_down(pos, root->sectorsize); + if (start_pos > i_size_read(inode)) { + err = btrfs_cont_expand(inode, i_size_read(inode), start_pos); + if (err) { + mutex_unlock(&inode->i_mutex); + goto out; + } + } + if (unlikely(file->f_flags & O_DIRECT)) { num_written = __btrfs_direct_write(iocb, iov, nr_segs, pos, ppos, count, ocount); -- cgit v1.2.3 From a39f75214358d715efa21e2bccf5a709d8649144 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Sun, 11 Sep 2011 10:52:25 -0400 Subject: Btrfs: fix wrong nbytes information of the inode If we write some data into the data hole of the file(no preallocation for this hole), Btrfs will allocate some disk space, and update nbytes of the inode, but the other element--disk_i_size needn't be updated. At this condition, we must update inode metadata though disk_i_size is not changed(btrfs_ordered_update_i_size() return 1). # mkfs.btrfs /dev/sdb1 # mount /dev/sdb1 /mnt # touch /mnt/a # truncate -s 856002 /mnt/a # dd if=/dev/zero of=/mnt/a bs=4K count=1 conv=nocreat,notrunc # umount /mnt # btrfsck /dev/sdb1 root 5 inode 257 errors 400 found 32768 bytes used err is 1 Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c257af2ce9cb..b94c0da3b43f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1786,7 +1786,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) &ordered_extent->list); ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); - if (!ret) { + if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { ret = btrfs_update_inode(trans, root, inode); BUG_ON(ret); } @@ -5788,7 +5788,7 @@ again: add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); ret = btrfs_ordered_update_i_size(inode, 0, ordered); - if (!ret) + if (!ret || !test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) btrfs_update_inode(trans, root, inode); ret = 0; out_unlock: -- cgit v1.2.3 From 4815053aba7f2304055745df820cd74a39fdaab2 Mon Sep 17 00:00:00 2001 From: David Sterba Date: Sun, 11 Sep 2011 10:52:25 -0400 Subject: btrfs: xattr: fix attribute removal An attribute is not removed by 'setfattr -x attr file' and remains visible in attr list. This makes xfstests/062 pass again. Signed-off-by: David Sterba Signed-off-by: Chris Mason --- fs/btrfs/xattr.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index d733b9cfea34..69565e5fc6a0 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -116,6 +116,12 @@ static int do_setxattr(struct btrfs_trans_handle *trans, if (ret) goto out; btrfs_release_path(path); + + /* + * remove the attribute + */ + if (!value) + goto out; } again: @@ -158,6 +164,9 @@ out: return ret; } +/* + * @value: "" makes the attribute to empty, NULL removes it + */ int __btrfs_setxattr(struct btrfs_trans_handle *trans, struct inode *inode, const char *name, const void *value, size_t size, int flags) -- cgit v1.2.3 From d72c0842ff0e71342857723bb65f35b71f57b264 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 11 Sep 2011 10:52:25 -0400 Subject: Btrfs: calc file extent num_bytes correctly in file clone num_bytes should be 4096 not 12288. Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index b3d249d6eba7..028a4b8c12cd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2333,14 +2333,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, if (type == BTRFS_FILE_EXTENT_REG || type == BTRFS_FILE_EXTENT_PREALLOC) { + /* + * a | --- range to clone ---| b + * | ------------- extent ------------- | + */ + + /* substract range b */ + if (key.offset + datal > off + len) + datal = off + len - key.offset; + + /* substract range a */ if (off > key.offset) { datao += off - key.offset; datal -= off - key.offset; } - if (key.offset + datal > off + len) - datal = off + len - key.offset; - ret = btrfs_drop_extents(trans, inode, new_key.offset, new_key.offset + datal, -- cgit v1.2.3 From d525e8ab022cb000e6e31a515ba8c3cf0d9c6130 Mon Sep 17 00:00:00 2001 From: Li Zefan Date: Sun, 11 Sep 2011 10:52:25 -0400 Subject: Btrfs: add dummy extent if dst offset excceeds file end in You can see there's no file extent with range [0, 4096]. Check this by btrfsck: # btrfsck /dev/sda7 root 5 inode 258 errors 100 ... Signed-off-by: Li Zefan Signed-off-by: Chris Mason --- fs/btrfs/ioctl.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs') diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 028a4b8c12cd..63b4de1626d2 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2228,6 +2228,12 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, !IS_ALIGNED(destoff, bs)) goto out_unlock; + if (destoff > inode->i_size) { + ret = btrfs_cont_expand(inode, inode->i_size, destoff); + if (ret) + goto out_unlock; + } + /* do any pending delalloc/csum calc on src, one way or another, and lock file content */ while (1) { -- cgit v1.2.3