From 2c5816b4beccc8ba709144539f6fdd764f8fa49c Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Tue, 10 Nov 2015 10:32:36 +0100 Subject: cuse: fix memory leak The problem is that fuse_dev_alloc() acquires an extra reference to cc.fc, and the original ref count is never dropped. Reported-by: Colin Ian King Signed-off-by: Miklos Szeredi Fixes: cc080e9e9be1 ("fuse: introduce per-instance fuse_dev structure") Cc: # v4.2+ --- fs/fuse/cuse.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index eae2c11268bc..8e3ee1936c7e 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -549,6 +549,8 @@ static int cuse_channel_release(struct inode *inode, struct file *file) unregister_chrdev_region(cc->cdev->dev, 1); cdev_del(cc->cdev); } + /* Base reference is now owned by "fud" */ + fuse_conn_put(&cc->fc); rc = fuse_dev_release(inode, file); /* puts the base reference */ -- cgit v1.2.3 From 3ca8138f014a913f98e6ef40e939868e1e9ea876 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Mon, 12 Oct 2015 16:33:44 +0300 Subject: fuse: break infinite loop in fuse_fill_write_pages() I got a report about unkillable task eating CPU. Further investigation shows, that the problem is in the fuse_fill_write_pages() function. If iov's first segment has zero length, we get an infinite loop, because we never reach iov_iter_advance() call. Fix this by calling iov_iter_advance() before repeating an attempt to copy data from userspace. A similar problem is described in 124d3b7041f ("fix writev regression: pan hanging unkillable and un-straceable"). If zero-length segmend is followed by segment with invalid address, iov_iter_fault_in_readable() checks only first segment (zero-length), iov_iter_copy_from_user_atomic() skips it, fails at second and returns zero -> goto again without skipping zero-length segment. Patch calls iov_iter_advance() before goto again: we'll skip zero-length segment at second iteraction and iov_iter_fault_in_readable() will detect invalid address. Special thanks to Konstantin Khlebnikov, who helped a lot with the commit description. Cc: Andrew Morton Cc: Maxim Patlasov Cc: Konstantin Khlebnikov Signed-off-by: Roman Gushchin Signed-off-by: Miklos Szeredi Fixes: ea9b9907b82a ("fuse: implement perform_write") Cc: --- fs/fuse/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/fuse/file.c b/fs/fuse/file.c index f523f2f04c19..195476a24148 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1049,6 +1049,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); flush_dcache_page(page); + iov_iter_advance(ii, tmp); if (!tmp) { unlock_page(page); page_cache_release(page); @@ -1061,7 +1062,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, req->page_descs[req->num_pages].length = tmp; req->num_pages++; - iov_iter_advance(ii, tmp); count += tmp; pos += tmp; offset += tmp; -- cgit v1.2.3 From a4dad1ae24f850410c4e60f22823cba1289b8d52 Mon Sep 17 00:00:00 2001 From: David Turner Date: Tue, 24 Nov 2015 14:34:37 -0500 Subject: ext4: Fix handling of extended tv_sec In ext4, the bottom two bits of {a,c,m}time_extra are used to extend the {a,c,m}time fields, deferring the year 2038 problem to the year 2446. When decoding these extended fields, for times whose bottom 32 bits would represent a negative number, sign extension causes the 64-bit extended timestamp to be negative as well, which is not what's intended. This patch corrects that issue, so that the only negative {a,c,m}times are those between 1901 and 1970 (as per 32-bit signed timestamps). Some older kernels might have written pre-1970 dates with 1,1 in the extra bits. This patch treats those incorrectly-encoded dates as pre-1970, instead of post-2311, until kernel 4.20 is released. Hopefully by then e2fsck will have fixed up the bad data. Also add a comment explaining the encoding of ext4's extra {a,c,m}time bits. Signed-off-by: David Turner Signed-off-by: Theodore Ts'o Reported-by: Mark Harris Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=23732 Cc: stable@vger.kernel.org --- fs/ext4/ext4.h | 51 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 44 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 750063f7a50c..cc7ca4e87144 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -727,19 +728,55 @@ struct move_extent { <= (EXT4_GOOD_OLD_INODE_SIZE + \ (einode)->i_extra_isize)) \ +/* + * We use an encoding that preserves the times for extra epoch "00": + * + * extra msb of adjust for signed + * epoch 32-bit 32-bit tv_sec to + * bits time decoded 64-bit tv_sec 64-bit tv_sec valid time range + * 0 0 1 -0x80000000..-0x00000001 0x000000000 1901-12-13..1969-12-31 + * 0 0 0 0x000000000..0x07fffffff 0x000000000 1970-01-01..2038-01-19 + * 0 1 1 0x080000000..0x0ffffffff 0x100000000 2038-01-19..2106-02-07 + * 0 1 0 0x100000000..0x17fffffff 0x100000000 2106-02-07..2174-02-25 + * 1 0 1 0x180000000..0x1ffffffff 0x200000000 2174-02-25..2242-03-16 + * 1 0 0 0x200000000..0x27fffffff 0x200000000 2242-03-16..2310-04-04 + * 1 1 1 0x280000000..0x2ffffffff 0x300000000 2310-04-04..2378-04-22 + * 1 1 0 0x300000000..0x37fffffff 0x300000000 2378-04-22..2446-05-10 + * + * Note that previous versions of the kernel on 64-bit systems would + * incorrectly use extra epoch bits 1,1 for dates between 1901 and + * 1970. e2fsck will correct this, assuming that it is run on the + * affected filesystem before 2242. + */ + static inline __le32 ext4_encode_extra_time(struct timespec *time) { - return cpu_to_le32((sizeof(time->tv_sec) > 4 ? - (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) | - ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK)); + u32 extra = sizeof(time->tv_sec) > 4 ? + ((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0; + return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS)); } static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) { - if (sizeof(time->tv_sec) > 4) - time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) - << 32; - time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; + if (unlikely(sizeof(time->tv_sec) > 4 && + (extra & cpu_to_le32(EXT4_EPOCH_MASK)))) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,20,0) + /* Handle legacy encoding of pre-1970 dates with epoch + * bits 1,1. We assume that by kernel version 4.20, + * everyone will have run fsck over the affected + * filesystems to correct the problem. (This + * backwards compatibility may be removed before this + * time, at the discretion of the ext4 developers.) + */ + u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK; + if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0) + extra_bits = 0; + time->tv_sec += extra_bits << 32; +#else + time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32; +#endif + } + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; } #define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ -- cgit v1.2.3 From bc23f0c8d7ccd8d924c4e70ce311288cb3e61ea8 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 24 Nov 2015 15:34:35 -0500 Subject: jbd2: Fix unreclaimed pages after truncate in data=journal mode Ted and Namjae have reported that truncated pages don't get timely reclaimed after being truncated in data=journal mode. The following test triggers the issue easily: for (i = 0; i < 1000; i++) { pwrite(fd, buf, 1024*1024, 0); fsync(fd); fsync(fd); ftruncate(fd, 0); } The reason is that journal_unmap_buffer() finds that truncated buffers are not journalled (jh->b_transaction == NULL), they are part of checkpoint list of a transaction (jh->b_cp_transaction != NULL) and have been already written out (!buffer_dirty(bh)). We clean such buffers but we leave them in the checkpoint list. Since checkpoint transaction holds a reference to the journal head, these buffers cannot be released until the checkpoint transaction is cleaned up. And at that point we don't call release_buffer_page() anymore so pages detached from mapping are lingering in the system waiting for reclaim to find them and free them. Fix the problem by removing buffers from transaction checkpoint lists when journal_unmap_buffer() finds out they don't have to be there anymore. Reported-and-tested-by: Namjae Jeon Fixes: de1b794130b130e77ffa975bb58cb843744f9ae5 Signed-off-by: Jan Kara Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- fs/jbd2/transaction.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs') diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 6b8338ec2464..b99621277c66 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -2152,6 +2152,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, if (!buffer_dirty(bh)) { /* bdflush has written it. We can drop it now */ + __jbd2_journal_remove_checkpoint(jh); goto zap_buffer; } @@ -2181,6 +2182,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, /* The orphan record's transaction has * committed. We can cleanse this buffer */ clear_buffer_jbddirty(bh); + __jbd2_journal_remove_checkpoint(jh); goto zap_buffer; } } -- cgit v1.2.3 From e2c9e0b28e146c9a3bce21408f3c02e24ac7ac31 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 26 Nov 2015 15:20:19 -0500 Subject: ext4: fix an endianness bug in ext4_encrypted_zeroout() ex->ee_block is not host-endian (note that accesses of other fields of *ex right next to that line go through the helpers that do proper conversion from little-endian to host-endian; it might make sense to add similar for ->ee_block to avoid reintroducing that kind of bugs...) Cc: stable@vger.kernel.org # v4.1+ Signed-off-by: Al Viro Signed-off-by: Theodore Ts'o --- fs/ext4/crypto.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index af06830bfc00..1a0835073663 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -389,7 +389,7 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) struct ext4_crypto_ctx *ctx; struct page *ciphertext_page = NULL; struct bio *bio; - ext4_lblk_t lblk = ex->ee_block; + ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); ext4_fsblk_t pblk = ext4_ext_pblock(ex); unsigned int len = ext4_ext_get_actual_len(ex); int ret, err = 0; -- cgit v1.2.3 From 5a1c7f47da9b32d0671e776b0f388095b7f91e2e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 26 Nov 2015 15:20:50 -0500 Subject: ext4: fix an endianness bug in ext4_encrypted_follow_link() applying le32_to_cpu() to 16bit value is a bad idea... Cc: stable@vger.kernel.org # v4.1+ Signed-off-by: Al Viro Signed-off-by: Theodore Ts'o --- fs/ext4/symlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index abe2401ce405..e8e7af62ac95 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -52,7 +52,7 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook /* Symlink is encrypted */ sd = (struct ext4_encrypted_symlink_data *)caddr; cstr.name = sd->encrypted_path; - cstr.len = le32_to_cpu(sd->len); + cstr.len = le16_to_cpu(sd->len); if ((cstr.len + sizeof(struct ext4_encrypted_symlink_data) - 1) > max_size) { -- cgit v1.2.3 From 681c46b164d79aaa5d18f7519b39e29f9b441e40 Mon Sep 17 00:00:00 2001 From: Xu Cang Date: Thu, 26 Nov 2015 15:52:24 -0500 Subject: ext4: add "static" to ext4_seq_##name##_fops struct to fix sparse warning, add static to ext4_seq_##name##_fops struct. Signed-off-by: Theodore Ts'o --- fs/ext4/sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 1b57c72f4a00..1420a3c614af 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -358,7 +358,7 @@ static int name##_open(struct inode *inode, struct file *file) \ return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \ } \ \ -const struct file_operations ext4_seq_##name##_fops = { \ +static const struct file_operations ext4_seq_##name##_fops = { \ .owner = THIS_MODULE, \ .open = name##_open, \ .read = seq_read, \ -- cgit v1.2.3 From 087ffd4eae9929afd06f6a709861df3c3508492a Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Fri, 4 Dec 2015 12:29:28 -0500 Subject: jbd2: fix null committed data return in undo_access introduced jbd2_write_access_granted() to improve write|undo_access speed, but missed to check the status of b_committed_data which caused a kernel panic on ocfs2. [ 6538.405938] ------------[ cut here ]------------ [ 6538.406686] kernel BUG at fs/ocfs2/suballoc.c:2400! [ 6538.406686] invalid opcode: 0000 [#1] SMP [ 6538.406686] Modules linked in: ocfs2 nfsd lockd grace nfs_acl auth_rpcgss sunrpc autofs4 ocfs2_dlmfs ocfs2_stack_o2cb ocfs2_dlm ocfs2_nodemanager ocfs2_stackglue configfs sd_mod sg ip6t_REJECT nf_reject_ipv6 nf_conntrack_ipv6 nf_defrag_ipv6 xt_state nf_conntrack ip6table_filter ip6_tables be2iscsi iscsi_boot_sysfs bnx2i cnic uio cxgb4i cxgb4 cxgb3i libcxgbi cxgb3 mdio ib_iser rdma_cm ib_cm iw_cm ib_sa ib_mad ib_core ib_addr ipv6 iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi ppdev xen_kbdfront xen_netfront xen_fbfront parport_pc parport pcspkr i2c_piix4 acpi_cpufreq ext4 jbd2 mbcache xen_blkfront floppy pata_acpi ata_generic ata_piix cirrus ttm drm_kms_helper drm fb_sys_fops sysimgblt sysfillrect i2c_core syscopyarea dm_mirror dm_region_hash dm_log dm_mod [ 6538.406686] CPU: 1 PID: 16265 Comm: mmap_truncate Not tainted 4.3.0 #1 [ 6538.406686] Hardware name: Xen HVM domU, BIOS 4.3.1OVM 05/14/2014 [ 6538.406686] task: ffff88007c2bab00 ti: ffff880075b78000 task.ti: ffff880075b78000 [ 6538.406686] RIP: 0010:[] [] ocfs2_block_group_clear_bits+0x23b/0x250 [ocfs2] [ 6538.406686] RSP: 0018:ffff880075b7b7f8 EFLAGS: 00010246 [ 6538.406686] RAX: ffff8800760c5b40 RBX: ffff88006c06a000 RCX: ffffffffa06e6df0 [ 6538.406686] RDX: 0000000000000000 RSI: ffff88007a6f6ea0 RDI: ffff88007a760430 [ 6538.406686] RBP: ffff880075b7b878 R08: 0000000000000002 R09: 0000000000000001 [ 6538.406686] R10: ffffffffa06769be R11: 0000000000000000 R12: 0000000000000001 [ 6538.406686] R13: ffffffffa06a1750 R14: 0000000000000001 R15: ffff88007a6f6ea0 [ 6538.406686] FS: 00007f17fde30720(0000) GS:ffff88007f040000(0000) knlGS:0000000000000000 [ 6538.406686] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6538.406686] CR2: 0000000000601730 CR3: 000000007aea0000 CR4: 00000000000406e0 [ 6538.406686] Stack: [ 6538.406686] ffff88007c2bb5b0 ffff880075b7b8e0 ffff88007a7604b0 ffff88006c640800 [ 6538.406686] ffff88007a7604b0 ffff880075d77390 0000000075b7b878 ffffffffa06a309d [ 6538.406686] ffff880075d752d8 ffff880075b7b990 ffff880075b7b898 0000000000000000 [ 6538.406686] Call Trace: [ 6538.406686] [] ? ocfs2_read_group_descriptor+0x6d/0xa0 [ocfs2] [ 6538.406686] [] _ocfs2_free_suballoc_bits+0xe4/0x320 [ocfs2] [ 6538.406686] [] ? ocfs2_put_slot+0xf0/0xf0 [ocfs2] [ 6538.406686] [] _ocfs2_free_clusters+0xee/0x210 [ocfs2] [ 6538.406686] [] ? ocfs2_put_slot+0xf0/0xf0 [ocfs2] [ 6538.406686] [] ? ocfs2_put_slot+0xf0/0xf0 [ocfs2] [ 6538.406686] [] ? ocfs2_extend_trans+0x50/0x1a0 [ocfs2] [ 6538.406686] [] ocfs2_free_clusters+0x15/0x20 [ocfs2] [ 6538.406686] [] ocfs2_replay_truncate_records+0xfc/0x290 [ocfs2] [ 6538.406686] [] ? ocfs2_start_trans+0xec/0x1d0 [ocfs2] [ 6538.406686] [] __ocfs2_flush_truncate_log+0x140/0x2d0 [ocfs2] [ 6538.406686] [] ? ocfs2_reserve_blocks_for_rec_trunc.clone.0+0x44/0x170 [ocfs2] [ 6538.406686] [] ocfs2_remove_btree_range+0x374/0x630 [ocfs2] [ 6538.406686] [] ? jbd2_journal_stop+0x25b/0x470 [jbd2] [ 6538.406686] [] ocfs2_commit_truncate+0x305/0x670 [ocfs2] [ 6538.406686] [] ? ocfs2_journal_access_eb+0x20/0x20 [ocfs2] [ 6538.406686] [] ocfs2_truncate_file+0x297/0x380 [ocfs2] [ 6538.406686] [] ? jbd2_journal_begin_ordered_truncate+0x64/0xc0 [jbd2] [ 6538.406686] [] ocfs2_setattr+0x572/0x860 [ocfs2] [ 6538.406686] [] ? current_fs_time+0x3f/0x50 [ 6538.406686] [] notify_change+0x1d7/0x340 [ 6538.406686] [] ? generic_getxattr+0x79/0x80 [ 6538.406686] [] do_truncate+0x66/0x90 [ 6538.406686] [] ? __audit_syscall_entry+0xb0/0x110 [ 6538.406686] [] do_sys_ftruncate.clone.0+0xf3/0x120 [ 6538.406686] [] SyS_ftruncate+0xe/0x10 [ 6538.406686] [] entry_SYSCALL_64_fastpath+0x12/0x71 [ 6538.406686] Code: 28 48 81 ee b0 04 00 00 48 8b 92 50 fb ff ff 48 8b 80 b0 03 00 00 48 39 90 88 00 00 00 0f 84 30 fe ff ff 0f 0b eb fe 0f 0b eb fe <0f> 0b 0f 1f 00 eb fb 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 [ 6538.406686] RIP [] ocfs2_block_group_clear_bits+0x23b/0x250 [ocfs2] [ 6538.406686] RSP [ 6538.691128] ---[ end trace 31cd7011d6770d7e ]--- [ 6538.694492] Kernel panic - not syncing: Fatal exception [ 6538.695484] Kernel Offset: disabled Fixes: de92c8caf16c("jbd2: speedup jbd2_journal_get_[write|undo]_access()") Cc: Signed-off-by: Junxiao Bi Signed-off-by: Theodore Ts'o --- fs/jbd2/transaction.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index b99621277c66..1498ad9f731a 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1009,7 +1009,8 @@ out: } /* Fast check whether buffer is already attached to the required transaction */ -static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh) +static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, + bool undo) { struct journal_head *jh; bool ret = false; @@ -1036,6 +1037,9 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh) jh = READ_ONCE(bh->b_private); if (!jh) goto out; + /* For undo access buffer must have data copied */ + if (undo && !jh->b_committed_data) + goto out; if (jh->b_transaction != handle->h_transaction && jh->b_next_transaction != handle->h_transaction) goto out; @@ -1073,7 +1077,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; int rc; - if (jbd2_write_access_granted(handle, bh)) + if (jbd2_write_access_granted(handle, bh, false)) return 0; jh = jbd2_journal_add_journal_head(bh); @@ -1210,7 +1214,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) char *committed_data = NULL; JBUFFER_TRACE(jh, "entry"); - if (jbd2_write_access_granted(handle, bh)) + if (jbd2_write_access_granted(handle, bh, true)) return 0; jh = jbd2_journal_add_journal_head(bh); -- cgit v1.2.3 From 43d1c0eb7e11919f85200d2fce211173526f7304 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 20 Nov 2015 22:22:34 +0100 Subject: block: detach bdev inode from its wb in __blkdev_put() Since 52ebea749aae ("writeback: make backing_dev_info host cgroup-specific bdi_writebacks") inode, at some point in its lifetime, gets attached to a wb (struct bdi_writeback). Detaching happens on evict, in inode_detach_wb() called from __destroy_inode(), and involves updating wb. However, detaching an internal bdev inode from its wb in __destroy_inode() is too late. Its bdi and by extension root wb are embedded into struct request_queue, which has different lifetime rules and can be freed long before the final bdput() is called (can be from __fput() of a corresponding /dev inode, through dput() - evict() - bd_forget(). bdevs hold onto the underlying disk/queue pair only while opened; as soon as bdev is closed all bets are off. In fact, disk/queue can be gone before __blkdev_put() even returns: 1499 static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) 1500 { ... 1518 if (bdev->bd_contains == bdev) { 1519 if (disk->fops->release) 1520 disk->fops->release(disk, mode); [ Driver puts its references to disk/queue ] 1521 } 1522 if (!bdev->bd_openers) { 1523 struct module *owner = disk->fops->owner; 1524 1525 disk_put_part(bdev->bd_part); 1526 bdev->bd_part = NULL; 1527 bdev->bd_disk = NULL; 1528 if (bdev != bdev->bd_contains) 1529 victim = bdev->bd_contains; 1530 bdev->bd_contains = NULL; 1531 1532 put_disk(disk); [ We put ours, the queue is gone The last bdput() would result in a write to invalid memory ] 1533 module_put(owner); ... 1539 } Since bdev inodes are special anyway, detach them in __blkdev_put() after clearing inode's dirty bits, turning the problematic inode_detach_wb() in __destroy_inode() into a noop. add_disk() grabs its disk->queue since 523e1d399ce0 ("block: make gendisk hold a reference to its queue"), so the old ->release comment is removed in favor of the new inode_detach_wb() comment. Cc: stable@vger.kernel.org # 4.2+, needs backporting Signed-off-by: Ilya Dryomov Acked-by: Tejun Heo Tested-by: Raghavendra K T Signed-off-by: Jens Axboe --- fs/block_dev.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index c25639e907bd..44d4a1e9244e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1523,11 +1523,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); + + bdev_write_inode(bdev); /* - * ->release can cause the queue to disappear, so flush all - * dirty data before. + * Detaching bdev inode from its wb in __destroy_inode() + * is too late: the queue which embeds its bdi (along with + * root wb) can be gone as soon as we put_disk() below. */ - bdev_write_inode(bdev); + inode_detach_wb(bdev->bd_inode); } if (bdev->bd_contains == bdev) { if (disk->fops->release) -- cgit v1.2.3 From acff81ec2c79492b180fade3c2894425cd35a545 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Fri, 4 Dec 2015 19:18:48 +0100 Subject: ovl: fix permission checking for setattr [Al Viro] The bug is in being too enthusiastic about optimizing ->setattr() away - instead of "copy verbatim with metadata" + "chmod/chown/utimes" (with the former being always safe and the latter failing in case of insufficient permissions) it tries to combine these two. Note that copyup itself will have to do ->setattr() anyway; _that_ is where the elevated capabilities are right. Having these two ->setattr() (one to set verbatim copy of metadata, another to do what overlayfs ->setattr() had been asked to do in the first place) combined is where it breaks. Signed-off-by: Miklos Szeredi Cc: Signed-off-by: Al Viro --- fs/overlayfs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index ec0c2a050043..961284936917 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -49,13 +49,13 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) if (err) goto out; - upperdentry = ovl_dentry_upper(dentry); - if (upperdentry) { + err = ovl_copy_up(dentry); + if (!err) { + upperdentry = ovl_dentry_upper(dentry); + mutex_lock(&upperdentry->d_inode->i_mutex); err = notify_change(upperdentry, attr, NULL); mutex_unlock(&upperdentry->d_inode->i_mutex); - } else { - err = ovl_copy_up_last(dentry, attr, false); } ovl_drop_write(dentry); out: -- cgit v1.2.3 From 0f7ff2dabbc95ed7a8019d142274f0c7e083577d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Dec 2015 12:31:07 -0500 Subject: ovl: get rid of the dead code left from broken (and disabled) optimizations Signed-off-by: Al Viro --- fs/overlayfs/copy_up.c | 23 ++++++----------------- fs/overlayfs/inode.c | 11 ++++------- fs/overlayfs/overlayfs.h | 3 +-- 3 files changed, 11 insertions(+), 26 deletions(-) (limited to 'fs') diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 871fcb67be97..0a8983492d91 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -195,8 +195,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, struct dentry *dentry, struct path *lowerpath, - struct kstat *stat, struct iattr *attr, - const char *link) + struct kstat *stat, const char *link) { struct inode *wdir = workdir->d_inode; struct inode *udir = upperdir->d_inode; @@ -240,8 +239,6 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, mutex_lock(&newdentry->d_inode->i_mutex); err = ovl_set_attr(newdentry, stat); - if (!err && attr) - err = notify_change(newdentry, attr, NULL); mutex_unlock(&newdentry->d_inode->i_mutex); if (err) goto out_cleanup; @@ -286,8 +283,7 @@ out_cleanup: * that point the file will have already been copied up anyway. */ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, - struct path *lowerpath, struct kstat *stat, - struct iattr *attr) + struct path *lowerpath, struct kstat *stat) { struct dentry *workdir = ovl_workdir(dentry); int err; @@ -345,26 +341,19 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, } upperdentry = ovl_dentry_upper(dentry); if (upperdentry) { - unlock_rename(workdir, upperdir); + /* Raced with another copy-up? Nothing to do, then... */ err = 0; - /* Raced with another copy-up? Do the setattr here */ - if (attr) { - mutex_lock(&upperdentry->d_inode->i_mutex); - err = notify_change(upperdentry, attr, NULL); - mutex_unlock(&upperdentry->d_inode->i_mutex); - } - goto out_put_cred; + goto out_unlock; } err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, - stat, attr, link); + stat, link); if (!err) { /* Restore timestamps on parent (best effort) */ ovl_set_timestamps(upperdir, &pstat); } out_unlock: unlock_rename(workdir, upperdir); -out_put_cred: revert_creds(old_cred); put_cred(override_cred); @@ -406,7 +395,7 @@ int ovl_copy_up(struct dentry *dentry) ovl_path_lower(next, &lowerpath); err = vfs_getattr(&lowerpath, &stat); if (!err) - err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); + err = ovl_copy_up_one(parent, next, &lowerpath, &stat); dput(parent); dput(next); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 961284936917..4060ffde8722 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -12,8 +12,7 @@ #include #include "overlayfs.h" -static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, - bool no_data) +static int ovl_copy_up_truncate(struct dentry *dentry) { int err; struct dentry *parent; @@ -30,10 +29,8 @@ static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, if (err) goto out_dput_parent; - if (no_data) - stat.size = 0; - - err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr); + stat.size = 0; + err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat); out_dput_parent: dput(parent); @@ -353,7 +350,7 @@ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags) return ERR_PTR(err); if (file_flags & O_TRUNC) - err = ovl_copy_up_last(dentry, NULL, true); + err = ovl_copy_up_truncate(dentry); else err = ovl_copy_up(dentry); ovl_drop_write(dentry); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index ea5a40b06e3a..e17154aeaae4 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -194,7 +194,6 @@ void ovl_cleanup(struct inode *dir, struct dentry *dentry); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, - struct path *lowerpath, struct kstat *stat, - struct iattr *attr); + struct path *lowerpath, struct kstat *stat); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); -- cgit v1.2.3 From 2788cc47f4593cca2c3c73c7bb82cd32b88c8ef7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 6 Dec 2015 12:33:02 -0500 Subject: Don't reset ->total_link_count on nested calls of vfs_path_lookup() we already zero it on outermost set_nameidata(), so initialization in path_init() is pointless and wrong. The same DoS exists on pre-4.2 kernels, but there a slightly different fix will be needed. Cc: stable@vger.kernel.org # v4.2 Signed-off-by: Al Viro --- fs/namei.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs') diff --git a/fs/namei.c b/fs/namei.c index d84d7c7515fc..0c3974cd3ecd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1996,7 +1996,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags) nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; nd->depth = 0; - nd->total_link_count = 0; if (flags & LOOKUP_ROOT) { struct dentry *root = nd->root.dentry; struct inode *inode = root->d_inode; -- cgit v1.2.3 From 756b9b37cfb2e3dc76b2e43a8c097402ac736e07 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 7 Dec 2015 12:52:23 -0800 Subject: SUNRPC: Fix callback channel The NFSv4.1 callback channel is currently broken because the receive message will keep shrinking because the backchannel receive buffer size never gets reset. The easiest solution to this problem is instead of changing the receive buffer, to rather adjust the copied request. Fixes: 38b7631fbe42 ("nfs4: limit callback decoding to received bytes") Cc: Benjamin Coddington Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/callback_xdr.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'fs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index beac58b0e09c..646cdac73488 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -78,8 +78,7 @@ static __be32 *read_buf(struct xdr_stream *xdr, int nbytes) p = xdr_inline_decode(xdr, nbytes); if (unlikely(p == NULL)) - printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed " - "or truncated request.\n"); + printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n"); return p; } @@ -890,7 +889,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r struct cb_compound_hdr_arg hdr_arg = { 0 }; struct cb_compound_hdr_res hdr_res = { NULL }; struct xdr_stream xdr_in, xdr_out; - struct xdr_buf *rq_arg = &rqstp->rq_arg; __be32 *p, status; struct cb_process_state cps = { .drc_status = 0, @@ -902,8 +900,7 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r dprintk("%s: start\n", __func__); - rq_arg->len = rq_arg->head[0].iov_len + rq_arg->page_len; - xdr_init_decode(&xdr_in, rq_arg, rq_arg->head[0].iov_base); + xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); xdr_init_encode(&xdr_out, &rqstp->rq_res, p); -- cgit v1.2.3 From 4ad78628445d26e5e9487b2e8f23274ad7b0f5d3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Dec 2015 03:07:22 -0500 Subject: 9p: ->evict_inode() should kick out ->i_data, not ->i_mapping For block devices the pagecache is associated with the inode on bdevfs, not with the aliasing ones on the mountable filesystems. The latter have its own ->i_data empty and ->i_mapping pointing to the (unique per major/minor) bdevfs inode. That guarantees cache coherence between all block device inodes with the same device number. Eviction of an alias inode has no business trying to evict the pages belonging to bdevfs one; moreover, ->i_mapping is only safe to access when the thing is opened. At the time of ->evict_inode() the victim is definitely *not* opened. We are about to kill the address space embedded into struct inode (inode->i_data) and that's what we need to empty of any pages. 9p instance tries to empty inode->i_mapping instead, which is both unsafe and bogus - if we have several device nodes with the same device number in different places, closing one of them should not try to empty the (shared) page cache. Fortunately, other instances in the tree are OK; they are evicting from &inode->i_data instead, as 9p one should. Cc: stable@vger.kernel.org # v2.6.32+, ones prior to 2.6.36 need only half of that Reported-by: "Suzuki K. Poulose" Tested-by: "Suzuki K. Poulose" Signed-off-by: Al Viro --- fs/9p/vfs_inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 699941e90667..511078586fa1 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -451,9 +451,9 @@ void v9fs_evict_inode(struct inode *inode) { struct v9fs_inode *v9inode = V9FS_I(inode); - truncate_inode_pages_final(inode->i_mapping); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - filemap_fdatawrite(inode->i_mapping); + filemap_fdatawrite(&inode->i_data); v9fs_cache_inode_put_cookie(inode); /* clunk the fid stashed in writeback_fid */ -- cgit v1.2.3 From 2d4594acbf6d8f75a27f3578476b6a27d8b13ebb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 8 Dec 2015 12:22:47 -0500 Subject: fix the regression from "direct-io: Fix negative return from dio read beyond eof" Sure, it's better to bail out of past-the-eof read and return 0 than return a bogus negative value on such. Only we'd better make sure we are bailing out with 0 and not -ENOMEM... Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- fs/direct-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/direct-io.c b/fs/direct-io.c index 1c75a3a07f8f..602e8441bc0f 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1175,6 +1175,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, if (dio->flags & DIO_LOCKING) mutex_unlock(&inode->i_mutex); kmem_cache_free(dio_cache, dio); + retval = 0; goto out; } -- cgit v1.2.3 From 3066a9670ba6f57dd5046640a2494912a2110f1e Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Fri, 11 Dec 2015 13:40:38 -0800 Subject: osd fs: __r4w_get_page rely on PageUptodate for uptodate Commit 42cb14b110a5 ("mm: migrate dirty page without clear_page_dirty_for_io etc") simplified the migration of a PageDirty pagecache page: one stat needs moving from zone to zone and that's about all. It's convenient and safest for it to shift the PageDirty bit from old page to new, just before updating the zone stats: before copying data and marking the new PageUptodate. This is all done while both pages are isolated and locked, just as before; and just as before, there's a moment when the new page is visible in the radix_tree, but not yet PageUptodate. What's new is that it may now be briefly visible as PageDirty before it is PageUptodate. When I scoured the tree to see if this could cause a problem anywhere, the only places I found were in two similar functions __r4w_get_page(): which look up a page with find_get_page() (not using page lock), then claim it's uptodate if it's PageDirty or PageWriteback or PageUptodate. I'm not sure whether that was right before, but now it might be wrong (on rare occasions): only claim the page is uptodate if PageUptodate. Or perhaps the page in question could never be migratable anyway? Signed-off-by: Hugh Dickins Tested-by: Boaz Harrosh Cc: Benny Halevy Cc: Trond Myklebust Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exofs/inode.c | 5 +---- fs/nfs/objlayout/objio_osd.c | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 73c64daa0f55..60f03b78914e 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -592,10 +592,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) } unlock_page(page); } - if (PageDirty(page) || PageWriteback(page)) - *uptodate = true; - else - *uptodate = PageUptodate(page); + *uptodate = PageUptodate(page); EXOFS_DBGMSG2("index=0x%lx uptodate=%d\n", index, *uptodate); return page; } else { diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 5c0c6b58157f..9aebffb40505 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -476,10 +476,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) } unlock_page(page); } - if (PageDirty(page) || PageWriteback(page)) - *uptodate = true; - else - *uptodate = PageUptodate(page); + *uptodate = PageUptodate(page); dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate); return page; } -- cgit v1.2.3 From 854ee2e944b4daf795e32562a7d2f9e90ab5a6a8 Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Fri, 11 Dec 2015 13:41:03 -0800 Subject: ocfs2: fix SGID not inherited issue Commit 8f1eb48758aa ("ocfs2: fix umask ignored issue") introduced an issue, SGID of sub dir was not inherited from its parents dir. It is because SGID is set into "inode->i_mode" in ocfs2_get_init_inode(), but is overwritten by "mode" which don't have SGID set later. Fixes: 8f1eb48758aa ("ocfs2: fix umask ignored issue") Signed-off-by: Junxiao Bi Cc: Mark Fasheh Cc: Joel Becker Acked-by: Srinivas Eeda Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/namei.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index a03f6f433075..3123408da935 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -367,13 +367,11 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } - status = posix_acl_create(dir, &mode, &default_acl, &acl); + status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); if (status) { mlog_errno(status); goto leave; } - /* update inode->i_mode after mask with "umask". */ - inode->i_mode = mode; handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb, S_ISDIR(mode), -- cgit v1.2.3 From dfd01f026058a59a513f8a365b439a0681b803af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sun, 13 Dec 2015 22:11:16 +0100 Subject: sched/wait: Fix the signal handling fix Jan Stancek reported that I wrecked things for him by fixing things for Vladimir :/ His report was due to an UNINTERRUPTIBLE wait getting -EINTR, which should not be possible, however my previous patch made this possible by unconditionally checking signal_pending(). We cannot use current->state as was done previously, because the instruction after the store to that variable it can be changed. We must instead pass the initial state along and use that. Fixes: 68985633bccb ("sched/wait: Fix signal handling in bit wait helpers") Reported-by: Jan Stancek Reported-by: Chris Mason Tested-by: Jan Stancek Tested-by: Vladimir Murzin Tested-by: Chris Mason Reviewed-by: Paul Turner Cc: Ingo Molnar Cc: tglx@linutronix.de Cc: Oleg Nesterov Cc: hpa@zytor.com Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Linus Torvalds --- fs/cifs/inode.c | 6 +++--- fs/nfs/inode.c | 6 +++--- fs/nfs/internal.h | 2 +- fs/nfs/pagelist.c | 2 +- fs/nfs/pnfs.c | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) (limited to 'fs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 6b66dd5d1540..a329f5ba35aa 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1831,11 +1831,11 @@ cifs_invalidate_mapping(struct inode *inode) * @word: long word containing the bit lock */ static int -cifs_wait_bit_killable(struct wait_bit_key *key) +cifs_wait_bit_killable(struct wait_bit_key *key, int mode) { - if (fatal_signal_pending(current)) - return -ERESTARTSYS; freezable_schedule_unsafe(); + if (signal_pending_state(mode, current)) + return -ERESTARTSYS; return 0; } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 31b0a52223a7..c7e8b87da5b2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -75,11 +75,11 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks * @word: long word containing the bit lock */ -int nfs_wait_bit_killable(struct wait_bit_key *key) +int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) { - if (fatal_signal_pending(current)) - return -ERESTARTSYS; freezable_schedule_unsafe(); + if (signal_pending_state(mode, current)) + return -ERESTARTSYS; return 0; } EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 56cfde26fb9c..9dea85f7f918 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -379,7 +379,7 @@ extern int nfs_drop_inode(struct inode *); extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); -extern int nfs_wait_bit_killable(struct wait_bit_key *key); +extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); /* super.c */ extern const struct super_operations nfs_sops; diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index fe3ddd20ff89..452a011ba0d8 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -129,7 +129,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c) set_bit(NFS_IO_INPROGRESS, &c->flags); if (atomic_read(&c->io_count) == 0) break; - ret = nfs_wait_bit_killable(&q.key); + ret = nfs_wait_bit_killable(&q.key, TASK_KILLABLE); } while (atomic_read(&c->io_count) != 0 && !ret); finish_wait(wq, &q.wait); return ret; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 5a8ae2125b50..bec0384499f7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1466,11 +1466,11 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, } /* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */ -static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key) +static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode) { if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags)) return 1; - return nfs_wait_bit_killable(key); + return nfs_wait_bit_killable(key, mode); } static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) -- cgit v1.2.3