From d863dc3614e489e11808f940a612b520ce1dff91 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 27 May 2013 23:32:35 -0400 Subject: Revert "ext4: remove no longer used functions in inode.c" This reverts commit ccb4d7af914e0fe9b2f1022f8ea6c300463fd5e6. This commit reintroduces functions ext4_block_truncate_page() and ext4_block_zero_page_range() which has been previously removed in favour of ext4_discard_partial_page_buffers(). In future commits we want to reintroduce those function and remove ext4_discard_partial_page_buffers() since it is duplicating some code and also partially duplicating work of truncate_pagecache_range(), moreover the old implementation was much clearer. Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 5aae3d12d400..9f9719f08490 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2096,6 +2096,10 @@ extern int ext4_alloc_da_blocks(struct inode *inode); extern void ext4_set_aops(struct inode *inode); extern int ext4_writepage_trans_blocks(struct inode *); extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); +extern int ext4_block_truncate_page(handle_t *handle, + struct address_space *mapping, loff_t from); +extern int ext4_block_zero_page_range(handle_t *handle, + struct address_space *mapping, loff_t from, loff_t length); extern int ext4_discard_partial_page_buffers(handle_t *handle, struct address_space *mapping, loff_t from, loff_t length, int flags); -- cgit v1.2.3 From a87dd18ce24dee5da1e9eb44bf8d8d48e0957efd Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 27 May 2013 23:32:35 -0400 Subject: ext4: use ext4_zero_partial_blocks in punch_hole We're doing to get rid of ext4_discard_partial_page_buffers() since it is duplicating some code and also partially duplicating work of truncate_pagecache_range(), moreover the old implementation was much clearer. Now when the truncate_inode_pages_range() can handle truncating non page aligned regions we can use this to invalidate and zero out block aligned region of the punched out range and then use ext4_block_truncate_page() to zero the unaligned blocks on the start and end of the range. This will greatly simplify the punch hole code. Moreover after this commit we can get rid of the ext4_discard_partial_page_buffers() completely. We also introduce function ext4_prepare_punch_hole() to do come common operations before we attempt to do the actual punch hole on indirect or extent file which saves us some code duplication. This has been tested on ppc64 with 1k block size with fsx and xfstests without any problems. Signed-off-by: Lukas Czerner Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 9f9719f08490..2d4b0aa74859 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2100,6 +2100,8 @@ extern int ext4_block_truncate_page(handle_t *handle, struct address_space *mapping, loff_t from); extern int ext4_block_zero_page_range(handle_t *handle, struct address_space *mapping, loff_t from, loff_t length); +extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, + loff_t lstart, loff_t lend); extern int ext4_discard_partial_page_buffers(handle_t *handle, struct address_space *mapping, loff_t from, loff_t length, int flags); -- cgit v1.2.3 From c121ffd013e5ab7c04414a5f0cb3604731775174 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Mon, 27 May 2013 23:32:35 -0400 Subject: ext4: remove unused discard_partial_page_buffers The discard_partial_page_buffers is no longer used anywhere so we can simply remove it including the *_no_lock variant and EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED define. Signed-off-by: Lukas Czerner Reviewed-by: Jan Kara Signed-off-by: Theodore Ts'o --- fs/ext4/ext4.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2d4b0aa74859..019db3c1bc3b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -580,11 +580,6 @@ enum { #define EXT4_FREE_BLOCKS_NOFREE_FIRST_CLUSTER 0x0010 #define EXT4_FREE_BLOCKS_NOFREE_LAST_CLUSTER 0x0020 -/* - * Flags used by ext4_discard_partial_page_buffers - */ -#define EXT4_DISCARD_PARTIAL_PG_ZERO_UNMAPPED 0x0001 - /* * ioctl commands */ @@ -2102,9 +2097,6 @@ extern int ext4_block_zero_page_range(handle_t *handle, struct address_space *mapping, loff_t from, loff_t length); extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, loff_t lstart, loff_t lend); -extern int ext4_discard_partial_page_buffers(handle_t *handle, - struct address_space *mapping, loff_t from, - loff_t length, int flags); extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); extern qsize_t *ext4_get_reserved_space(struct inode *inode); extern void ext4_da_update_reserve_space(struct inode *inode, -- cgit v1.2.3 From 97a851ed71cd9cc2542955e92a001c6ea3d21d35 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 11:58:58 -0400 Subject: ext4: use io_end for multiple bios Change writeback path to create just one io_end structure for the extent to which we submit IO and share it among bios writing that extent. This prevents needless splitting and joining of unwritten extents when they cannot be submitted as a single bio. Bugs in ENOMEM handling found by Linux File System Verification project (linuxtesting.org) and fixed by Alexey Khoroshilov . CC: Alexey Khoroshilov Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 019db3c1bc3b..82d2b6000a61 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -209,6 +209,7 @@ typedef struct ext4_io_end { ssize_t size; /* size of the extent */ struct kiocb *iocb; /* iocb struct for AIO */ int result; /* error value for AIO */ + atomic_t count; /* reference counter */ } ext4_io_end_t; struct ext4_io_submit { @@ -2648,11 +2649,14 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, /* page-io.c */ extern int __init ext4_init_pageio(void); -extern void ext4_add_complete_io(ext4_io_end_t *io_end); extern void ext4_exit_pageio(void); extern void ext4_ioend_shutdown(struct inode *); -extern void ext4_free_io_end(ext4_io_end_t *io); extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); +extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end); +extern int ext4_put_io_end(ext4_io_end_t *io_end); +extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); +extern void ext4_io_submit_init(struct ext4_io_submit *io, + struct writeback_control *wbc); extern void ext4_end_io_work(struct work_struct *work); extern void ext4_io_submit(struct ext4_io_submit *io); extern int ext4_bio_write_page(struct ext4_io_submit *io, -- cgit v1.2.3 From f2d50a65c93cfe718742bc85dff55bf8f11967b6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:51:16 -0400 Subject: ext4: deprecate max_writeback_mb_bump sysfs attribute This attribute is now unused so deprecate it. We still show the old default value to keep some compatibility but we don't allow writing to that attribute anymore. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 82d2b6000a61..46674058d251 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1241,7 +1241,6 @@ struct ext4_sb_info { unsigned int s_mb_stats; unsigned int s_mb_order2_reqs; unsigned int s_mb_group_prealloc; - unsigned int s_max_writeback_mb_bump; unsigned int s_max_dir_size_kb; /* where last allocation was done - for stream allocation */ unsigned long s_mb_last_group; -- cgit v1.2.3 From fa55a0ed0386e1fcbb8a229a06a5c70477d0d6e5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 12:56:55 -0400 Subject: ext4: improve writepage credit estimate for files with indirect blocks ext4_ind_trans_blocks() wrongly used 'chunk' argument to decide whether blocks mapped are logically contiguous. That is wrong since the argument informs whether the blocks are physically contiguous. As the blocks mapped are always logically contiguous and that's all ext4_ind_trans_blocks() cares about, just remove the 'chunk' argument. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 46674058d251..25e261da871f 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2109,7 +2109,7 @@ extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t offset, unsigned long nr_segs); extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); -extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks, int chunk); +extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); extern void ext4_ind_truncate(handle_t *, struct inode *inode); extern int ext4_free_hole_blocks(handle_t *handle, struct inode *inode, ext4_lblk_t first, ext4_lblk_t stop); -- cgit v1.2.3 From fffb273997cc52f255bde5f18e7f6b4686c914fb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 13:01:11 -0400 Subject: ext4: better estimate credits needed for ext4_da_writepages() We limit the number of blocks written in a single loop of ext4_da_writepages() to 64 when inode uses indirect blocks. That is unnecessary as credit estimates for mapping logically continguous run of blocks is rather low even for inode with indirect blocks. So just lift this limitation and properly calculate the number of necessary credits. This better credit estimate will also later allow us to always write at least a single page in one iteration. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 25e261da871f..2ebfcde5a156 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2596,8 +2596,7 @@ struct ext4_extent; extern int ext4_ext_tree_init(handle_t *handle, struct inode *); extern int ext4_ext_writepage_trans_blocks(struct inode *, int); -extern int ext4_ext_index_trans_blocks(struct inode *inode, int nrblocks, - int chunk); +extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents); extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); extern void ext4_ext_truncate(handle_t *, struct inode *); -- cgit v1.2.3 From 4e7ea81db53465ddd753678bc4cebf95369d0984 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 13:17:40 -0400 Subject: ext4: restructure writeback path There are two issues with current writeback path in ext4. For one we don't necessarily map complete pages when blocksize < pagesize and thus needn't do any writeback in one iteration. We always map some blocks though so we will eventually finish mapping the page. Just if writeback races with other operations on the file, forward progress is not really guaranteed. The second problem is that current code structure makes it hard to associate all the bios to some range of pages with one io_end structure so that unwritten extents can be converted after all the bios are finished. This will be especially difficult later when io_end will be associated with reserved transaction handle. We restructure the writeback path to a relatively simple loop which first prepares extent of pages, then maps one or more extents so that no page is partially mapped, and once page is fully mapped it is submitted for IO. We keep all the mapping and IO submission information in mpage_da_data structure to somewhat reduce stack usage. Resulting code is somewhat shorter than the old one and hopefully also easier to read. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2ebfcde5a156..90a164f365c4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -176,21 +176,6 @@ struct ext4_map_blocks { unsigned int m_flags; }; -/* - * For delayed allocation tracking - */ -struct mpage_da_data { - struct inode *inode; - sector_t b_blocknr; /* start block number of extent */ - size_t b_size; /* size of extent */ - unsigned long b_state; /* state of the extent */ - unsigned long first_page, next_page; /* extent of pages */ - struct writeback_control *wbc; - int io_done; - int pages_written; - int retval; -}; - /* * Flags for ext4_io_end->flags */ -- cgit v1.2.3 From 3613d22807a2616e9346800bacd88aa8bbbefcd7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 13:19:34 -0400 Subject: ext4: remove buffer_uninit handling There isn't any need for setting BH_Uninit on buffers anymore. It was only used to signal we need to mark io_end as needing extent conversion in add_bh_to_extent() but now we can mark the io_end directly when mapping extent. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 90a164f365c4..0a9b729f991b 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2653,20 +2653,17 @@ extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp); extern int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp); -/* BH_Uninit flag: blocks are allocated but uninitialized on disk */ +/* + * Note that these flags will never ever appear in a buffer_head's state flag. + * See EXT4_MAP_... to see where this is used. + */ enum ext4_state_bits { BH_Uninit /* blocks are allocated but uninitialized on disk */ - = BH_JBDPrivateStart, + = BH_JBDPrivateStart, BH_AllocFromCluster, /* allocated blocks were part of already - * allocated cluster. Note that this flag will - * never, ever appear in a buffer_head's state - * flag. See EXT4_MAP_FROM_CLUSTER to see where - * this is used. */ + * allocated cluster. */ }; -BUFFER_FNS(Uninit, uninit) -TAS_BUFFER_FNS(Uninit, uninit) - /* * Add new method to test whether block and inode bitmaps are properly * initialized. With uninit_bg reading the block from disk is not enough -- cgit v1.2.3 From 6b523df4fb5ae281ddbc817f40504b33e6226554 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 13:21:11 -0400 Subject: ext4: use transaction reservation for extent conversion in ext4_end_io Later we would like to clear PageWriteback bit only after extent conversion from unwritten to written extents is performed. However it is not possible to start a transaction after PageWriteback is set because that violates lock ordering (and is easy to deadlock). So we have to reserve a transaction before locking pages and sending them for IO and later we use the transaction for extent conversion from ext4_end_io(). Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 0a9b729f991b..8de219b758fb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -184,10 +184,13 @@ struct ext4_map_blocks { #define EXT4_IO_END_DIRECT 0x0004 /* - * For converting uninitialized extents on a work queue. + * For converting uninitialized extents on a work queue. 'handle' is used for + * buffered writeback. */ typedef struct ext4_io_end { struct list_head list; /* per-file finished IO list */ + handle_t *handle; /* handle reserved for extent + * conversion */ struct inode *inode; /* file being written to */ unsigned int flag; /* unwritten or not */ loff_t offset; /* offset in the file */ @@ -1322,6 +1325,9 @@ static inline void ext4_set_io_unwritten_flag(struct inode *inode, struct ext4_io_end *io_end) { if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { + /* Writeback has to have coversion transaction reserved */ + WARN_ON(EXT4_SB(inode->i_sb)->s_journal && !io_end->handle && + !(io_end->flag & EXT4_IO_END_DIRECT)); io_end->flag |= EXT4_IO_END_UNWRITTEN; atomic_inc(&EXT4_I(inode)->i_unwritten); } @@ -2591,8 +2597,8 @@ extern void ext4_ext_init(struct super_block *); extern void ext4_ext_release(struct super_block *); extern long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len); -extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, - ssize_t len); +extern int ext4_convert_unwritten_extents(handle_t *handle, struct inode *inode, + loff_t offset, ssize_t len); extern int ext4_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); extern int ext4_ext_calc_metadata_amount(struct inode *inode, -- cgit v1.2.3 From 2e8fa54e3b48e4ce8c4e9ca4674ffbc973f58be5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 14:21:02 -0400 Subject: ext4: split extent conversion lists to reserved & unreserved parts Now that we have extent conversions with reserved transaction, we have to prevent extent conversions without reserved transaction (from DIO code) to block these (as that would effectively void any transaction reservation we did). So split lists, work items, and work queues to reserved and unreserved parts. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 8de219b758fb..b69a733b5b42 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -887,12 +887,22 @@ struct ext4_inode_info { qsize_t i_reserved_quota; #endif - /* completed IOs that might need unwritten extents handling */ - struct list_head i_completed_io_list; + /* Lock protecting lists below */ spinlock_t i_completed_io_lock; + /* + * Completed IOs that need unwritten extents handling and have + * transaction reserved + */ + struct list_head i_rsv_conversion_list; + /* + * Completed IOs that need unwritten extents handling and don't have + * transaction reserved + */ + struct list_head i_unrsv_conversion_list; atomic_t i_ioend_count; /* Number of outstanding io_end structs */ atomic_t i_unwritten; /* Nr. of inflight conversions pending */ - struct work_struct i_unwritten_work; /* deferred extent conversion */ + struct work_struct i_rsv_conversion_work; + struct work_struct i_unrsv_conversion_work; spinlock_t i_block_reservation_lock; @@ -1264,8 +1274,10 @@ struct ext4_sb_info { struct flex_groups *s_flex_groups; ext4_group_t s_flex_groups_allocated; - /* workqueue for dio unwritten */ - struct workqueue_struct *dio_unwritten_wq; + /* workqueue for unreserved extent convertions (dio) */ + struct workqueue_struct *unrsv_conversion_wq; + /* workqueue for reserved extent conversions (buffered io) */ + struct workqueue_struct *rsv_conversion_wq; /* timer for periodic error stats printing */ struct timer_list s_err_report; @@ -2646,7 +2658,8 @@ extern int ext4_put_io_end(ext4_io_end_t *io_end); extern void ext4_put_io_end_defer(ext4_io_end_t *io_end); extern void ext4_io_submit_init(struct ext4_io_submit *io, struct writeback_control *wbc); -extern void ext4_end_io_work(struct work_struct *work); +extern void ext4_end_io_rsv_work(struct work_struct *work); +extern void ext4_end_io_unrsv_work(struct work_struct *work); extern void ext4_io_submit(struct ext4_io_submit *io); extern int ext4_bio_write_page(struct ext4_io_submit *io, struct page *page, -- cgit v1.2.3 From b0857d309faefaf5443752458e8af1a4b22b3e92 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 14:23:41 -0400 Subject: ext4: defer clearing of PageWriteback after extent conversion Currently PageWriteback bit gets cleared from put_io_page() called from ext4_end_bio(). This is somewhat inconvenient as extent tree is not fully updated at that time (unwritten extents are not marked as written) so we cannot read the data back yet. This design was dictated by lock ordering as we cannot start a transaction while PageWriteback bit is set (we could easily deadlock with ext4_da_writepages()). But now that we use transaction reservation for extent conversion, locking issues are solved and we can move PageWriteback bit clearing after extent conversion is done. As a result we can remove wait for unwritten extent conversion from ext4_sync_file() because it already implicitely happens through wait_on_page_writeback(). We implement deferring of PageWriteback clearing by queueing completed bios to appropriate io_end and processing all the pages when io_end is going to be freed instead of at the moment ext4_io_end() is called. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index b69a733b5b42..74db579bb482 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -180,8 +180,7 @@ struct ext4_map_blocks { * Flags for ext4_io_end->flags */ #define EXT4_IO_END_UNWRITTEN 0x0001 -#define EXT4_IO_END_ERROR 0x0002 -#define EXT4_IO_END_DIRECT 0x0004 +#define EXT4_IO_END_DIRECT 0x0002 /* * For converting uninitialized extents on a work queue. 'handle' is used for @@ -192,6 +191,8 @@ typedef struct ext4_io_end { handle_t *handle; /* handle reserved for extent * conversion */ struct inode *inode; /* file being written to */ + struct bio *bio; /* Linked list of completed + * bios covering the extent */ unsigned int flag; /* unwritten or not */ loff_t offset; /* offset in the file */ ssize_t size; /* size of the extent */ -- cgit v1.2.3 From c724585b62411f7abdea5b1054b9f1e1e7c964be Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 14:44:36 -0400 Subject: ext4: don't wait for extent conversion in ext4_punch_hole() We don't have to wait for extent conversion in ext4_punch_hole() as buffered IO for the punched range has been flushed and waited upon (thus all extent conversions for that range have completed). Also we wait for all DIO to finish using inode_dio_wait() so there cannot be any extent conversions pending due to direct IO. Also remove ext4_flush_unwritten_io() since it's unused now. Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 74db579bb482..be95c83f5875 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1998,7 +1998,6 @@ static inline unsigned char get_dtype(struct super_block *sb, int filetype) /* fsync.c */ extern int ext4_sync_file(struct file *, loff_t, loff_t, int); -extern int ext4_flush_unwritten_io(struct inode *); /* hash.c */ extern int ext4fs_dirhash(const char *name, int len, struct -- cgit v1.2.3 From 5dc23bdd5f846ef868e82f789dfd9b13093f9ba6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 4 Jun 2013 14:46:12 -0400 Subject: ext4: remove ext4_ioend_wait() Now that we clear PageWriteback after extent conversion, there's no need to wait for io_end processing in ext4_evict_inode(). Running AIO/DIO keeps file reference until aio_complete() is called so ext4_evict_inode() cannot be called. For io_end structures resulting from buffered IO waiting is happening because we wait for PageWriteback in truncate_inode_pages(). Reviewed-by: Zheng Liu Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index be95c83f5875..bd9890f6d9ce 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2651,7 +2651,6 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, /* page-io.c */ extern int __init ext4_init_pageio(void); extern void ext4_exit_pageio(void); -extern void ext4_ioend_shutdown(struct inode *); extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); extern ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end); extern int ext4_put_io_end(ext4_io_end_t *io_end); -- cgit v1.2.3 From 2f2e09eb15849562aede80ed007658e4504ded26 Mon Sep 17 00:00:00 2001 From: Theodore Ts'o Date: Thu, 6 Jun 2013 11:16:43 -0400 Subject: ext4: add sanity check to ext4_get_group_info() The group number passed to ext4_get_group_info() should be valid, but let's add an assert to check this before we start creating a pointer based on that group number and dereferencing it. Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index bd9890f6d9ce..f85f1fb49df8 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2313,6 +2313,7 @@ struct ext4_group_info *ext4_get_group_info(struct super_block *sb, { struct ext4_group_info ***grp_info; long indexv, indexh; + BUG_ON(group >= EXT4_SB(sb)->s_groups_count); grp_info = EXT4_SB(sb)->s_group_info; indexv = group >> (EXT4_DESC_PER_BLOCK_BITS(sb)); indexh = group & ((EXT4_DESC_PER_BLOCK(sb)) - 1); -- cgit v1.2.3 From d3922a777f9b4c4df898d326fa940f239af4f9b6 Mon Sep 17 00:00:00 2001 From: Zheng Liu Date: Mon, 1 Jul 2013 08:12:37 -0400 Subject: ext4: improve extent cache shrink mechanism to avoid to burn CPU time Now we maintain an proper in-order LRU list in ext4 to reclaim entries from extent status tree when we are under heavy memory pressure. For keeping this order, a spin lock is used to protect this list. But this lock burns a lot of CPU time. We can use the following steps to trigger it. % cd /dev/shm % dd if=/dev/zero of=ext4-img bs=1M count=2k % mkfs.ext4 ext4-img % mount -t ext4 -o loop ext4-img /mnt % cd /mnt % for ((i=0;i<160;i++)); do truncate -s 64g $i; done % for ((i=0;i<160;i++)); do cp $i /dev/null &; done % perf record -a -g % perf report This commit tries to fix this problem. Now a new member called i_touch_when is added into ext4_inode_info to record the last access time for an inode. Meanwhile we never need to keep a proper in-order LRU list. So this can avoid to burns some CPU time. When we try to reclaim some entries from extent status tree, we use list_sort() to get a proper in-order list. Then we traverse this list to discard some entries. In ext4_sb_info, we use s_es_last_sorted to record the last time of sorting this list. When we traverse the list, we skip the inode that is newer than this time, and move this inode to the tail of LRU list. When the head of the list is newer than s_es_last_sorted, we will sort the LRU list again. In this commit, we break the loop if s_extent_cache_cnt == 0 because that means that all extents in extent status tree have been reclaimed. Meanwhile in this commit, ext4_es_{un}register_shrinker()'s prototype is changed to save a local variable in these functions. Reported-by: Dave Hansen Signed-off-by: Zheng Liu Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f85f1fb49df8..f5f3b6c58240 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -864,6 +864,7 @@ struct ext4_inode_info { rwlock_t i_es_lock; struct list_head i_es_lru; unsigned int i_es_lru_nr; /* protected by i_es_lock */ + unsigned long i_touch_when; /* jiffies of last accessing */ /* ialloc */ ext4_group_t i_last_alloc_group; @@ -1303,6 +1304,7 @@ struct ext4_sb_info { /* Reclaim extents from extent status tree */ struct shrinker s_es_shrinker; struct list_head s_es_lru; + unsigned long s_es_last_sorted; struct percpu_counter s_extent_cache_cnt; spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; }; -- cgit v1.2.3 From e7c96e8e47baf263d93a8dbbebca7216a912ca05 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 1 Jul 2013 08:12:37 -0400 Subject: ext4: reduce object size when !CONFIG_PRINTK Reduce the object size ~10% could be useful for embedded systems. Add #ifdef CONFIG_PRINTK #else #endif blocks to hold formats and arguments, passing " " to functions when !CONFIG_PRINTK and still verifying format and arguments with no_printk. $ size fs/ext4/built-in.o* text data bss dec hex filename 239375 610 888 240873 3ace9 fs/ext4/built-in.o.new 264167 738 888 265793 40e41 fs/ext4/built-in.o.old $ grep -E "CONFIG_EXT4|CONFIG_PRINTK" .config # CONFIG_PRINTK is not set CONFIG_EXT4_FS=y CONFIG_EXT4_USE_FOR_EXT23=y CONFIG_EXT4_FS_POSIX_ACL=y # CONFIG_EXT4_FS_SECURITY is not set # CONFIG_EXT4_DEBUG is not set Signed-off-by: Joe Perches Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 80 ++++++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 67 insertions(+), 13 deletions(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index f5f3b6c58240..7cc6b18230ec 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2169,42 +2169,96 @@ extern int ext4_alloc_flex_bg_array(struct super_block *sb, ext4_group_t ngroup); extern const char *ext4_decode_error(struct super_block *sb, int errno, char nbuf[16]); + extern __printf(4, 5) void __ext4_error(struct super_block *, const char *, unsigned int, const char *, ...); -#define ext4_error(sb, message...) __ext4_error(sb, __func__, \ - __LINE__, ## message) extern __printf(5, 6) -void ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, +void __ext4_error_inode(struct inode *, const char *, unsigned int, ext4_fsblk_t, const char *, ...); extern __printf(5, 6) -void ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, +void __ext4_error_file(struct file *, const char *, unsigned int, ext4_fsblk_t, const char *, ...); extern void __ext4_std_error(struct super_block *, const char *, unsigned int, int); extern __printf(4, 5) void __ext4_abort(struct super_block *, const char *, unsigned int, const char *, ...); -#define ext4_abort(sb, message...) __ext4_abort(sb, __func__, \ - __LINE__, ## message) extern __printf(4, 5) void __ext4_warning(struct super_block *, const char *, unsigned int, const char *, ...); -#define ext4_warning(sb, message...) __ext4_warning(sb, __func__, \ - __LINE__, ## message) extern __printf(3, 4) -void ext4_msg(struct super_block *, const char *, const char *, ...); +void __ext4_msg(struct super_block *, const char *, const char *, ...); extern void __dump_mmp_msg(struct super_block *, struct mmp_struct *mmp, const char *, unsigned int, const char *); -#define dump_mmp_msg(sb, mmp, msg) __dump_mmp_msg(sb, mmp, __func__, \ - __LINE__, msg) extern __printf(7, 8) void __ext4_grp_locked_error(const char *, unsigned int, struct super_block *, ext4_group_t, unsigned long, ext4_fsblk_t, const char *, ...); -#define ext4_grp_locked_error(sb, grp, message...) \ - __ext4_grp_locked_error(__func__, __LINE__, (sb), (grp), ## message) + +#ifdef CONFIG_PRINTK + +#define ext4_error_inode(inode, func, line, block, fmt, ...) \ + __ext4_error_inode(inode, func, line, block, fmt, ##__VA_ARGS__) +#define ext4_error_file(file, func, line, block, fmt, ...) \ + __ext4_error_file(file, func, line, block, fmt, ##__VA_ARGS__) +#define ext4_error(sb, fmt, ...) \ + __ext4_error(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) +#define ext4_abort(sb, fmt, ...) \ + __ext4_abort(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) +#define ext4_warning(sb, fmt, ...) \ + __ext4_warning(sb, __func__, __LINE__, fmt, ##__VA_ARGS__) +#define ext4_msg(sb, level, fmt, ...) \ + __ext4_msg(sb, level, fmt, ##__VA_ARGS__) +#define dump_mmp_msg(sb, mmp, msg) \ + __dump_mmp_msg(sb, mmp, __func__, __LINE__, msg) +#define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ + __ext4_grp_locked_error(__func__, __LINE__, sb, grp, ino, block, \ + fmt, ##__VA_ARGS__) + +#else + +#define ext4_error_inode(inode, func, line, block, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __ext4_error_inode(inode, "", 0, block, " "); \ +} while (0) +#define ext4_error_file(file, func, line, block, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __ext4_error_file(file, "", 0, block, " "); \ +} while (0) +#define ext4_error(sb, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __ext4_error(sb, "", 0, " "); \ +} while (0) +#define ext4_abort(sb, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __ext4_abort(sb, "", 0, " "); \ +} while (0) +#define ext4_warning(sb, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __ext4_warning(sb, "", 0, " "); \ +} while (0) +#define ext4_msg(sb, level, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __ext4_msg(sb, "", " "); \ +} while (0) +#define dump_mmp_msg(sb, mmp, msg) \ + __dump_mmp_msg(sb, mmp, "", 0, "") +#define ext4_grp_locked_error(sb, grp, ino, block, fmt, ...) \ +do { \ + no_printk(fmt, ##__VA_ARGS__); \ + __ext4_grp_locked_error("", 0, sb, grp, ino, block, " "); \ +} while (0) + +#endif + extern void ext4_update_dynamic_rev(struct super_block *sb); extern int ext4_update_compat_feature(handle_t *handle, struct super_block *sb, __u32 compat); -- cgit v1.2.3 From aeb2817a4ea99f62532adf3377be3b282d3bda12 Mon Sep 17 00:00:00 2001 From: Ashish Sangwan Date: Mon, 1 Jul 2013 08:12:38 -0400 Subject: ext4: pass inode pointer instead of file pointer to punch hole No need to pass file pointer when we can directly pass inode pointer. Signed-off-by: Ashish Sangwan Signed-off-by: Namjae Jeon Signed-off-by: "Theodore Ts'o" --- fs/ext4/ext4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/ext4/ext4.h') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 7cc6b18230ec..6ed348d8d3eb 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2088,7 +2088,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int); extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *); extern int ext4_can_truncate(struct inode *inode); extern void ext4_truncate(struct inode *); -extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length); +extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length); extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks); extern void ext4_set_inode_flags(struct inode *); extern void ext4_get_inode_flags(struct ext4_inode_info *); -- cgit v1.2.3