From 12706394bcaa48e3d5e19c97d7b4e5683ebb12fb Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Sun, 10 Jul 2011 22:37:50 -0400 Subject: ext4: add tracepoint for ext4_journal_start This will help debug who is responsible for starting a jbd2 transaction. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 9ea71aa864b3..7910e61809e7 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -269,6 +269,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) journal_t *journal; handle_t *handle; + trace_ext4_journal_start(sb, nblocks, _RET_IP_); if (sb->s_flags & MS_RDONLY) return ERR_PTR(-EROFS); -- cgit v1.2.3 From 3eb08658431abd65c0fe6855d1860859c2d416f7 Mon Sep 17 00:00:00 2001 From: Dan Ehrenberg Date: Sun, 17 Jul 2011 21:18:51 -0400 Subject: ext4: ignore a stripe width of 1 If the stripe width was set to 1, then this patch will ignore that stripe width and ext4 will act as if the stripe width were 0 with respect to optimizing allocations. Signed-off-by: Dan Ehrenberg Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 7910e61809e7..143d763729b4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -2384,17 +2384,25 @@ static unsigned long ext4_get_stripe_size(struct ext4_sb_info *sbi) unsigned long stride = le16_to_cpu(sbi->s_es->s_raid_stride); unsigned long stripe_width = le32_to_cpu(sbi->s_es->s_raid_stripe_width); + int ret; if (sbi->s_stripe && sbi->s_stripe <= sbi->s_blocks_per_group) - return sbi->s_stripe; - - if (stripe_width <= sbi->s_blocks_per_group) - return stripe_width; + ret = sbi->s_stripe; + else if (stripe_width <= sbi->s_blocks_per_group) + ret = stripe_width; + else if (stride <= sbi->s_blocks_per_group) + ret = stride; + else + ret = 0; - if (stride <= sbi->s_blocks_per_group) - return stride; + /* + * If the stripe width is 1, this makes no sense and + * we set it to 0 to turn off stripe handling code. + */ + if (ret <= 1) + ret = 0; - return 0; + return ret; } /* sysfs supprt */ -- cgit v1.2.3 From 8f82f840ec6ab873f520364d443ff6fa1b3f8e22 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Tue, 26 Jul 2011 21:35:44 -0400 Subject: ext4: prevent parallel resizers by atomic bit ops Before this patch, parallel resizers are allowed and protected by a mutex lock, actually, there is no need to support parallel resizer, so this patch prevents parallel resizers by atmoic bit ops, like lock_page() and unlock_page() do. To do this, the patch removed the mutex lock s_resize_lock from struct ext4_sb_info and added a unsigned long field named s_resize_flags which inidicates if there is a resizer. Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 143d763729b4..cfe9f39c4ba2 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -3500,7 +3500,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */ mutex_init(&sbi->s_orphan_lock); - mutex_init(&sbi->s_resize_lock); + sbi->s_resize_flags = 0; sb->s_root = NULL; -- cgit v1.2.3 From 9933fc0ac1ac14b795819cd63d05ea92112f690a Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Aug 2011 08:45:02 -0400 Subject: ext4: introduce ext4_kvmalloc(), ext4_kzalloc(), and ext4_kvfree() Introduce new helper functions which try kmalloc, and then fall back to vmalloc if necessary, and use them for allocating and deallocating s_flex_groups. Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 54 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 18 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index cfe9f39c4ba2..658f5864e9cf 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -110,6 +110,35 @@ static struct file_system_type ext3_fs_type = { #define IS_EXT3_SB(sb) (0) #endif +void *ext4_kvmalloc(size_t size, gfp_t flags) +{ + void *ret; + + ret = kmalloc(size, flags); + if (!ret) + ret = __vmalloc(size, flags, PAGE_KERNEL); + return ret; +} + +void *ext4_kvzalloc(size_t size, gfp_t flags) +{ + void *ret; + + ret = kmalloc(size, flags); + if (!ret) + ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); + return ret; +} + +void ext4_kvfree(void *ptr) +{ + if (is_vmalloc_addr(ptr)) + vfree(ptr); + else + kfree(ptr); + +} + ext4_fsblk_t ext4_block_bitmap(struct super_block *sb, struct ext4_group_desc *bg) { @@ -791,10 +820,7 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); kfree(sbi->s_group_desc); - if (is_vmalloc_addr(sbi->s_flex_groups)) - vfree(sbi->s_flex_groups); - else - kfree(sbi->s_flex_groups); + ext4_kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); @@ -1977,15 +2003,11 @@ static int ext4_fill_flex_info(struct super_block *sb) ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << EXT4_DESC_PER_BLOCK_BITS(sb))) / groups_per_flex; size = flex_group_count * sizeof(struct flex_groups); - sbi->s_flex_groups = kzalloc(size, GFP_KERNEL); + sbi->s_flex_groups = ext4_kvzalloc(size, GFP_KERNEL); if (sbi->s_flex_groups == NULL) { - sbi->s_flex_groups = vzalloc(size); - if (sbi->s_flex_groups == NULL) { - ext4_msg(sb, KERN_ERR, - "not enough memory for %u flex groups", - flex_group_count); - goto failed; - } + ext4_msg(sb, KERN_ERR, "not enough memory for %u flex groups", + flex_group_count); + goto failed; } for (i = 0; i < sbi->s_groups_count; i++) { @@ -3750,12 +3772,8 @@ failed_mount_wq: } failed_mount3: del_timer(&sbi->s_err_report); - if (sbi->s_flex_groups) { - if (is_vmalloc_addr(sbi->s_flex_groups)) - vfree(sbi->s_flex_groups); - else - kfree(sbi->s_flex_groups); - } + if (sbi->s_flex_groups) + ext4_kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); -- cgit v1.2.3 From f18a5f21c25707b4fe64b326e2b4d150565e7300 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Mon, 1 Aug 2011 08:45:38 -0400 Subject: ext4: use ext4_kvzalloc()/ext4_kvmalloc() for s_group_desc and s_group_info Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 658f5864e9cf..e2d88baf91d3 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -819,7 +819,7 @@ static void ext4_put_super(struct super_block *sb) for (i = 0; i < sbi->s_gdb_count; i++) brelse(sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); + ext4_kvfree(sbi->s_group_desc); ext4_kvfree(sbi->s_flex_groups); percpu_counter_destroy(&sbi->s_freeblocks_counter); percpu_counter_destroy(&sbi->s_freeinodes_counter); @@ -3439,8 +3439,9 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) (EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb))); db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) / EXT4_DESC_PER_BLOCK(sb); - sbi->s_group_desc = kmalloc(db_count * sizeof(struct buffer_head *), - GFP_KERNEL); + sbi->s_group_desc = ext4_kvmalloc(db_count * + sizeof(struct buffer_head *), + GFP_KERNEL); if (sbi->s_group_desc == NULL) { ext4_msg(sb, KERN_ERR, "not enough memory"); goto failed_mount; @@ -3783,7 +3784,7 @@ failed_mount3: failed_mount2: for (i = 0; i < db_count; i++) brelse(sbi->s_group_desc[i]); - kfree(sbi->s_group_desc); + ext4_kvfree(sbi->s_group_desc); failed_mount: if (sbi->s_proc) { remove_proc_entry(sb->s_id, ext4_proc_root); -- cgit v1.2.3 From db9481c0476c6475d058ac7ecebb5a822b43cc99 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Wed, 3 Aug 2011 14:57:11 -0400 Subject: ext4: use kzalloc in ext4_kzalloc() Commit 9933fc0i (ext4: introduce ext4_kvmalloc(), ext4_kzalloc(), and ext4_kvfree()) intruduced wrappers around k*alloc/vmalloc but introduced a typo for ext4_kzalloc() by not using kzalloc() but kmalloc(). Signed-off-by: Mathias Krause Signed-off-by: "Theodore Ts'o" --- fs/ext4/super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index e2d88baf91d3..4687fea0c00f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -124,7 +124,7 @@ void *ext4_kvzalloc(size_t size, gfp_t flags) { void *ret; - ret = kmalloc(size, flags); + ret = kzalloc(size, flags); if (!ret) ret = __vmalloc(size, flags | __GFP_ZERO, PAGE_KERNEL); return ret; -- cgit v1.2.3 From 2581fdc810889fdea97689cb62481201d579c796 Mon Sep 17 00:00:00 2001 From: Jiaying Zhang Date: Sat, 13 Aug 2011 12:17:13 -0400 Subject: ext4: call ext4_ioend_wait and ext4_flush_completed_IO in ext4_evict_inode Flush inode's i_completed_io_list before calling ext4_io_wait to prevent the following deadlock scenario: A page fault happens while some process is writing inode A. During page fault, shrink_icache_memory is called that in turn evicts another inode B. Inode B has some pending io_end work so it calls ext4_ioend_wait() that waits for inode B's i_ioend_count to become zero. However, inode B's ioend work was queued behind some of inode A's ioend work on the same cpu's ext4-dio-unwritten workqueue. As the ext4-dio-unwritten thread on that cpu is processing inode A's ioend work, it tries to grab inode A's i_mutex lock. Since the i_mutex lock of inode A is still hold before the page fault happened, we enter a deadlock. Also moves ext4_flush_completed_IO and ext4_ioend_wait from ext4_destroy_inode() to ext4_evict_inode(). During inode deleteion, ext4_evict_inode() is called before ext4_destroy_inode() and in ext4_evict_inode(), we may call ext4_truncate() without holding i_mutex lock. As a result, there is a race between flush_completed_IO that is called from ext4_ext_truncate() and ext4_end_io_work, which may cause corruption on an io_end structure. This change moves ext4_flush_completed_IO and ext4_ioend_wait from ext4_destroy_inode() to ext4_evict_inode() to resolve the race between ext4_truncate() and ext4_end_io_work during inode deletion. Signed-off-by: Jiaying Zhang Signed-off-by: "Theodore Ts'o" Cc: stable@kernel.org --- fs/ext4/super.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/ext4/super.c') diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 4687fea0c00f..44d0c8db2239 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -919,7 +919,6 @@ static void ext4_i_callback(struct rcu_head *head) static void ext4_destroy_inode(struct inode *inode) { - ext4_ioend_wait(inode); if (!list_empty(&(EXT4_I(inode)->i_orphan))) { ext4_msg(inode->i_sb, KERN_ERR, "Inode %lu (%p): orphan list check failed!", -- cgit v1.2.3