From a78aaa2c3cf1e60f57ff7b2b3c07f0d469306984 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Tue, 28 Feb 2017 20:32:41 +0800 Subject: f2fs: fix an error return value in truncate_partial_data_page This patch fix a error return value in truncate_partial_data_page Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 5f7317875a67..6d120a5b8f4b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -528,7 +528,7 @@ static int truncate_partial_data_page(struct inode *inode, u64 from, page = get_lock_data_page(inode, index, true); if (IS_ERR(page)) - return 0; + return PTR_ERR(page) == -ENOENT ? 0 : PTR_ERR(page); truncate_out: f2fs_wait_on_page_writeback(page, DATA, true); zero_user(page, offset, PAGE_SIZE - offset); -- cgit v1.2.3 From 4f295443bff1701430249beec5eb9dc875decc9c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 4 Mar 2017 13:56:10 -0800 Subject: f2fs: don't need to invalidate wrong node page If f2fs_new_inode() is failed, the bad inode will invalidate 0'th node page during f2fs_evict_inode(), which doesn't need to do. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 24bb8213d974..ef8610bf950f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -411,7 +411,10 @@ no_delete: stat_dec_inline_dir(inode); stat_dec_inline_inode(inode); - invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, inode->i_ino); + /* ino == 0, if f2fs_new_inode() was failed t*/ + if (inode->i_ino) + invalidate_mapping_pages(NODE_MAPPING(sbi), inode->i_ino, + inode->i_ino); if (xnid) invalidate_mapping_pages(NODE_MAPPING(sbi), xnid, xnid); if (inode->i_nlink) { -- cgit v1.2.3 From b71deadbc4d0271cd3b298e57ad8be70b0c391c3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 10 Mar 2017 09:36:10 -0800 Subject: f2fs: le16_to_cpu for xattr->e_value_size This patch fixes missing le16 conversion, reported by kbuild test robot. Fixes: 5f35a2cd5 ("f2fs: Don't update the xattr data that same as the exist") Reviewed-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 7298a4488f7f..aff7619e3f96 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -546,7 +546,9 @@ static bool f2fs_xattr_value_same(struct f2fs_xattr_entry *entry, const void *value, size_t size) { void *pval = entry->e_name + entry->e_name_len; - return (entry->e_value_size == size) && !memcmp(pval, value, size); + + return (le16_to_cpu(entry->e_value_size) == size) && + !memcmp(pval, value, size); } static int __f2fs_setxattr(struct inode *inode, int index, -- cgit v1.2.3 From 10047f537c8fb33599bae599ed0e1d9551639d80 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 11 Mar 2017 21:18:01 +0800 Subject: f2fs: le32_to_cpu for ckpt->cp_pack_total_block_count Fixes: 22ad0b6ab4 ("f2fs: add bitmaps for empty or full NAT blocks") Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 0339daf4ca02..b72a9aad296c 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1024,7 +1024,8 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) spin_lock(&sbi->cp_lock); - if (cpc->reason == CP_UMOUNT && ckpt->cp_pack_total_block_count > + if (cpc->reason == CP_UMOUNT && + le32_to_cpu(ckpt->cp_pack_total_block_count) > sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) disable_nat_bits(sbi, false); -- cgit v1.2.3 From 8c1b3c0fb6d633d09507ab2dbb23d384941a850a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Mar 2017 13:32:20 -0800 Subject: f2fs: fix wrong error injection for evict_inode The previous one was not a proper location to inject an error, since there is no point to get errors. Instead, we can emulate EIO during truncation, and the below logic should handle it correctly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index ef8610bf950f..2520fa72b23f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -372,13 +372,6 @@ void f2fs_evict_inode(struct inode *inode) if (inode->i_nlink || is_bad_inode(inode)) goto no_delete; -#ifdef CONFIG_F2FS_FAULT_INJECTION - if (time_to_inject(sbi, FAULT_EVICT_INODE)) { - f2fs_show_injection_info(FAULT_EVICT_INODE); - goto no_delete; - } -#endif - remove_ino_entry(sbi, inode->i_ino, APPEND_INO); remove_ino_entry(sbi, inode->i_ino, UPDATE_INO); @@ -389,6 +382,12 @@ retry: if (F2FS_HAS_BLOCKS(inode)) err = f2fs_truncate(inode); +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(sbi, FAULT_EVICT_INODE)) { + f2fs_show_injection_info(FAULT_EVICT_INODE); + err = -EIO; + } +#endif if (!err) { f2fs_lock_op(sbi); err = remove_inode_page(inode); -- cgit v1.2.3 From 4f1bca9f0db3dc82297092eafdb95f58ecef5d7a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Mar 2017 11:22:45 -0800 Subject: f2fs: don't allow to get pino when filename is encrypted After renaming an encrypted file, we have no way to get its encrypted filename from its dentry. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 6d120a5b8f4b..3212dfa1a50e 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -110,6 +110,9 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) { struct dentry *dentry; + if (file_enc_name(inode)) + return 0; + inode = igrab(inode); dentry = d_find_any_alias(inode); iput(inode); -- cgit v1.2.3 From e2f0e962ac2190ab4e0d1ee4b258273d3a51e689 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 4 Mar 2017 21:48:28 +0800 Subject: f2fs: fix the fault of checking F2FS_LINK_MAX for rename inode The parent directory's nlink will change, not the inode. Signed-off-by: Kinglong Mee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/namei.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 98f00a3a7f50..25c073f6c7d4 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -908,8 +908,8 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, old_nlink = old_dir_entry ? -1 : 1; new_nlink = -old_nlink; err = -EMLINK; - if ((old_nlink > 0 && old_inode->i_nlink >= F2FS_LINK_MAX) || - (new_nlink > 0 && new_inode->i_nlink >= F2FS_LINK_MAX)) + if ((old_nlink > 0 && old_dir->i_nlink >= F2FS_LINK_MAX) || + (new_nlink > 0 && new_dir->i_nlink >= F2FS_LINK_MAX)) goto out_new_dir; } -- cgit v1.2.3 From 10a875f82b50adc83036368086df9c4cfff7888c Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 8 Mar 2017 09:49:53 +0800 Subject: f2fs: fix the fault of calculating blkstart twice When the zone type is BLK_ZONE_TYPE_CONVENTIONAL, the blkstart is calculated twice. Signed-off-by: Kinglong Mee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 29ef7088c558..4d7bf84dc393 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -796,6 +796,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) { sector_t sector, nr_sects; + block_t lblkstart = blkstart; int devi = 0; if (sbi->s_ndevs) { @@ -813,7 +814,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, case BLK_ZONE_TYPE_CONVENTIONAL: if (!blk_queue_discard(bdev_get_queue(bdev))) return 0; - return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); + return __f2fs_issue_discard_async(sbi, bdev, lblkstart, blklen); case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: sector = SECTOR_FROM_BLOCK(blkstart); -- cgit v1.2.3 From aa51d08a11784a431266055d58d28b4c4c76a79e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Mar 2017 13:41:22 -0800 Subject: f2fs: build stat_info before orphan inode recovery f2fs_sync_fs() -> write_checkpoint() calls stat_inc_cp_count(sbi->stat_info), which needs stat_info allocation. Otherwise, we can hit: [254042.598623] ? count_shadow_nodes+0xa0/0xa0 [254042.598633] f2fs_sync_fs+0x65/0xd0 [f2fs] [254042.598645] f2fs_balance_fs_bg+0xe4/0x1c0 [f2fs] [254042.598657] f2fs_write_node_pages+0x34/0x1a0 [f2fs] [254042.598664] ? pagevec_lookup_entries+0x1e/0x30 [254042.598673] do_writepages+0x1e/0x30 [254042.598682] __writeback_single_inode+0x45/0x330 [254042.598688] writeback_single_inode+0xd7/0x190 [254042.598694] write_inode_now+0x86/0xa0 [254042.598699] iput+0x122/0x200 [254042.598709] f2fs_fill_super+0xd4a/0x14d0 [f2fs] [254042.598717] mount_bdev+0x184/0x1c0 [254042.598934] ? f2fs_commit_super+0x100/0x100 [f2fs] [254042.599142] f2fs_mount+0x15/0x20 [f2fs] [254042.599349] mount_fs+0x39/0x160 [254042.599554] ? __alloc_percpu+0x15/0x20 [254042.599759] vfs_kern_mount+0x67/0x110 [254042.599972] do_mount+0x1bb/0xc80 [254042.600175] ? memdup_user+0x42/0x60 [254042.600380] SyS_mount+0x83/0xd0 [254042.600583] entry_SYSCALL_64_fastpath+0x1e/0xad Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 96fe8ed73100..cfb40d3fd875 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2022,6 +2022,10 @@ try_onemore: f2fs_join_shrinker(sbi); + err = f2fs_build_stats(sbi); + if (err) + goto free_nm; + /* if there are nt orphan nodes free them */ err = recover_orphan_inodes(sbi); if (err) @@ -2046,10 +2050,6 @@ try_onemore: goto free_root_inode; } - err = f2fs_build_stats(sbi); - if (err) - goto free_root_inode; - if (f2fs_proc_root) sbi->s_proc = proc_mkdir(sb->s_id, f2fs_proc_root); @@ -2143,7 +2143,6 @@ free_proc: remove_proc_entry("segment_bits", sbi->s_proc); remove_proc_entry(sb->s_id, f2fs_proc_root); } - f2fs_destroy_stats(sbi); free_root_inode: dput(sb->s_root); sb->s_root = NULL; @@ -2161,6 +2160,7 @@ free_node_inode: truncate_inode_pages_final(META_MAPPING(sbi)); iput(sbi->node_inode); mutex_unlock(&sbi->umount_mutex); + f2fs_destroy_stats(sbi); free_nm: destroy_node_manager(sbi); free_sm: -- cgit v1.2.3 From 8c242db9b8c01b252290e23827163787f07e01d1 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 17 Mar 2017 09:55:52 +0800 Subject: f2fs: fix stale ATOMIC_WRITTEN_PAGE private pointer When I forced to enable atomic operations intentionally, I could hit the below panic, since we didn't clear page->private in f2fs_invalidate_page called by file truncation. The panic occurs due to NULL mapping having page->private. BUG: unable to handle kernel paging request at ffffffffffffffff IP: drop_buffers+0x38/0xe0 PGD 5d00c067 PUD 5d00e067 PMD 0 CPU: 3 PID: 1648 Comm: fsstress Tainted: G D OE 4.10.0+ #5 Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 task: ffff9151952863c0 task.stack: ffffaaec40db4000 RIP: 0010:drop_buffers+0x38/0xe0 RSP: 0018:ffffaaec40db74c8 EFLAGS: 00010292 Call Trace: ? page_referenced+0x8b/0x170 try_to_free_buffers+0xc5/0xe0 try_to_release_page+0x49/0x50 shrink_page_list+0x8bc/0x9f0 shrink_inactive_list+0x1dd/0x500 ? shrink_active_list+0x2c0/0x430 shrink_node_memcg+0x5eb/0x7c0 shrink_node+0xe1/0x320 do_try_to_free_pages+0xef/0x2e0 try_to_free_pages+0xe9/0x190 __alloc_pages_slowpath+0x390/0xe70 __alloc_pages_nodemask+0x291/0x2b0 alloc_pages_current+0x95/0x140 __page_cache_alloc+0xc4/0xe0 pagecache_get_page+0xab/0x2a0 grab_cache_page_write_begin+0x20/0x40 get_read_data_page+0x2e6/0x4c0 [f2fs] ? f2fs_mark_inode_dirty_sync+0x16/0x30 [f2fs] ? truncate_data_blocks_range+0x238/0x2b0 [f2fs] get_lock_data_page+0x30/0x190 [f2fs] __exchange_data_block+0xaaf/0xf40 [f2fs] f2fs_fallocate+0x418/0xd00 [f2fs] vfs_fallocate+0x157/0x220 SyS_fallocate+0x48/0x80 Signed-off-by: Yunlei He Signed-off-by: Chao Yu [Chao Yu: use INMEM_INVALIDATE for better tracing] Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/segment.c | 30 ++++++++++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1602b4bccae6..e341d446205a 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1951,7 +1951,7 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset, /* This is atomic written page, keep Private */ if (IS_ATOMIC_WRITTEN_PAGE(page)) - return; + return drop_inmem_page(inode, page); set_page_private(page, 0); ClearPagePrivate(page); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 0a6e115562f6..264c219f41a5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -722,6 +722,7 @@ enum page_type { META_FLUSH, INMEM, /* the below types are used by tracepoints only. */ INMEM_DROP, + INMEM_INVALIDATE, INMEM_REVOKE, IPU, OPU, @@ -2184,6 +2185,7 @@ void destroy_node_manager_caches(void); */ void register_inmem_page(struct inode *inode, struct page *page); void drop_inmem_pages(struct inode *inode); +void drop_inmem_page(struct inode *inode, struct page *page); int commit_inmem_pages(struct inode *inode); void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need); void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4d7bf84dc393..cb6d9ed634a3 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -250,6 +250,36 @@ void drop_inmem_pages(struct inode *inode) stat_dec_atomic_write(inode); } +void drop_inmem_page(struct inode *inode, struct page *page) +{ + struct f2fs_inode_info *fi = F2FS_I(inode); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct list_head *head = &fi->inmem_pages; + struct inmem_pages *cur = NULL; + + f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page)); + + mutex_lock(&fi->inmem_lock); + list_for_each_entry(cur, head, list) { + if (cur->page == page) + break; + } + + f2fs_bug_on(sbi, !cur || cur->page != page); + list_del(&cur->list); + mutex_unlock(&fi->inmem_lock); + + dec_page_count(sbi, F2FS_INMEM_PAGES); + kmem_cache_free(inmem_entry_slab, cur); + + ClearPageUptodate(page); + set_page_private(page, 0); + ClearPagePrivate(page); + f2fs_put_page(page, 0); + + trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE); +} + static int __commit_inmem_pages(struct inode *inode, struct list_head *revoke_list) { -- cgit v1.2.3 From e811898c97f83aef6c1652fbfbf5294bf542159c Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 17 Mar 2017 10:04:15 +0800 Subject: f2fs: don't allow atomic writes for not regular files The atomic writes only supports regular files for database. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 3212dfa1a50e..2cf16ed5b74c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1518,6 +1518,9 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + ret = mnt_want_write_file(filp); if (ret) return ret; -- cgit v1.2.3 From 8ff0971f1500953935339c23c91f660793fd4c64 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 17 Mar 2017 15:43:57 +0800 Subject: f2fs: don't allow volatile writes for non-regular file Now f2fs only supports volatile writes for journal db regular file. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 2cf16ed5b74c..427ab1b41de3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1598,6 +1598,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (!inode_owner_or_capable(inode)) return -EACCES; + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + ret = mnt_want_write_file(filp); if (ret) return ret; -- cgit v1.2.3 From 20fda56b018550cfd3a352b2f027eb42b1991bc4 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 4 Mar 2017 22:13:10 +0800 Subject: f2fs: make sure trace all f2fs_issue_flush The root device's issue flush trace is missing, add it and tracing the result from submit. Fixes d50aaeec90 ("f2fs: show actual device info in tracepoints") Signed-off-by: Kinglong Mee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index cb6d9ed634a3..62099502dc5f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -441,7 +441,8 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) } } -static int __submit_flush_wait(struct block_device *bdev) +static int __submit_flush_wait(struct f2fs_sb_info *sbi, + struct block_device *bdev) { struct bio *bio = f2fs_bio_alloc(0); int ret; @@ -450,23 +451,24 @@ static int __submit_flush_wait(struct block_device *bdev) bio->bi_bdev = bdev; ret = submit_bio_wait(bio); bio_put(bio); + + trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER), + test_opt(sbi, FLUSH_MERGE), ret); return ret; } static int submit_flush_wait(struct f2fs_sb_info *sbi) { - int ret = __submit_flush_wait(sbi->sb->s_bdev); + int ret = __submit_flush_wait(sbi, sbi->sb->s_bdev); int i; - if (sbi->s_ndevs && !ret) { - for (i = 1; i < sbi->s_ndevs; i++) { - trace_f2fs_issue_flush(FDEV(i).bdev, - test_opt(sbi, NOBARRIER), - test_opt(sbi, FLUSH_MERGE)); - ret = __submit_flush_wait(FDEV(i).bdev); - if (ret) - break; - } + if (!sbi->s_ndevs || ret) + return ret; + + for (i = 1; i < sbi->s_ndevs; i++) { + ret = __submit_flush_wait(sbi, FDEV(i).bdev); + if (ret) + break; } return ret; } -- cgit v1.2.3 From 0b28b71e298fb7b74ee8a74e2f9d4d45c44f5483 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 28 Feb 2017 21:34:47 +0800 Subject: f2fs: drop duplicate radix tree lookup of nat_entry_set The nat entry is listed from the set list for freeing, it's duplicate to do radix tree lookup again. Signed-off-by: Kinglong Mee [Jaegeuk Kim: remove unnecessary f2fs_bug_on] Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 481aa8dc79f4..11df8ab32478 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -177,18 +177,12 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i, } static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i, - struct nat_entry *ne) + struct nat_entry_set *set, struct nat_entry *ne) { - nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid); - struct nat_entry_set *head; - - head = radix_tree_lookup(&nm_i->nat_set_root, set); - if (head) { - list_move_tail(&ne->list, &nm_i->nat_entries); - set_nat_flag(ne, IS_DIRTY, false); - head->entry_cnt--; - nm_i->dirty_nat_cnt--; - } + list_move_tail(&ne->list, &nm_i->nat_entries); + set_nat_flag(ne, IS_DIRTY, false); + set->entry_cnt--; + nm_i->dirty_nat_cnt--; } static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i, @@ -2407,7 +2401,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, } raw_nat_from_node_info(raw_ne, &ne->ni); nat_reset_flag(ne); - __clear_nat_cache_dirty(NM_I(sbi), ne); + __clear_nat_cache_dirty(NM_I(sbi), set, ne); if (nat_get_blkaddr(ne) == NULL_ADDR) { add_free_nid(sbi, nid, false); spin_lock(&NM_I(sbi)->nid_list_lock); -- cgit v1.2.3 From fc7d5bc427219eab268edb75cc9bfc7fdd8174e4 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Tue, 28 Feb 2017 21:34:37 +0800 Subject: f2fs: remove dead macro PGOFS_OF_NEXT_DNODE Fixes: 3cf4574705 ("f2fs: introduce get_next_page_offset to speed up SEEK_DATA") Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 264c219f41a5..902094f215d0 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2030,12 +2030,6 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags) ((is_inode_flag_set(i, FI_ACL_MODE)) ? \ (F2FS_I(i)->i_acl_mode) : ((i)->i_mode)) -/* get offset of first page in next direct node */ -#define PGOFS_OF_NEXT_DNODE(pgofs, inode) \ - ((pgofs < ADDRS_PER_INODE(inode)) ? ADDRS_PER_INODE(inode) : \ - (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) / \ - ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode)) - /* * file.c */ -- cgit v1.2.3 From 5ce4738a0250ba9b6fbc89900ee0fc92a5eb60db Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Mar 2017 13:54:56 -0800 Subject: f2fs: show more precise message on orphan recovery failure This case is not caused by fsck.f2fs. User needs to retry mount. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index b72a9aad296c..3c3d2d01448d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -567,7 +567,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino) if (ni.blk_addr != NULL_ADDR) { set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_msg(sbi->sb, KERN_WARNING, - "%s: orphan failed (ino=%x), run fsck to fix.", + "%s: orphan failed (ino=%x) by kernel, retry mount.", __func__, ino); return -EIO; } -- cgit v1.2.3 From a29d0e0bc0eeefad2a50a8aa7e3f4b98f72c7feb Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 1 Mar 2017 18:07:10 +0800 Subject: f2fs: skip writeback meta pages if cp_mutex acquire failed Skip writeback meta pages if cp_mutex lock acquire failed, cp will flush dirty pages instead. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 3c3d2d01448d..22348c7df67d 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -275,10 +275,11 @@ static int f2fs_write_meta_pages(struct address_space *mapping, get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META)) goto skip_write; - trace_f2fs_writepages(mapping->host, wbc, META); + /* if locked failed, cp will flush dirty pages instead */ + if (!mutex_trylock(&sbi->cp_mutex)) + goto skip_write; - /* if mounting is failed, skip writing node pages */ - mutex_lock(&sbi->cp_mutex); + trace_f2fs_writepages(mapping->host, wbc, META); diff = nr_pages_to_write(sbi, META, wbc); written = sync_meta_pages(sbi, META, wbc->nr_to_write); mutex_unlock(&sbi->cp_mutex); -- cgit v1.2.3 From c81abe34fe1ec5e60e5cba6adc912d30e44cc40d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 7 Mar 2017 18:02:02 -0800 Subject: f2fs: allocate a bio for discarding when actually issuing it Let's allocate a bio when issuing discard commands later. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 +- fs/f2fs/segment.c | 138 +++++++++++++++++++++++++++++------------------------- 2 files changed, 78 insertions(+), 64 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 902094f215d0..b549b531e586 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -197,10 +197,12 @@ enum { struct discard_cmd { struct list_head list; /* command list */ struct completion wait; /* compleation */ + struct block_device *bdev; /* bdev */ block_t lstart; /* logical start address */ + block_t start; /* actual start address in dev */ block_t len; /* length */ - struct bio *bio; /* bio */ int state; /* state */ + int error; /* bio error */ }; struct discard_cmd_control { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 62099502dc5f..85c34d4fcf3e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -666,7 +666,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) } static void __add_discard_cmd(struct f2fs_sb_info *sbi, - struct bio *bio, block_t lstart, block_t len) + struct block_device *bdev, block_t lstart, + block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct list_head *cmd_list = &(dcc->discard_cmd_list); @@ -674,11 +675,12 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); INIT_LIST_HEAD(&dc->list); - dc->bio = bio; - bio->bi_private = dc; + dc->bdev = bdev; dc->lstart = lstart; + dc->start = start; dc->len = len; dc->state = D_PREP; + dc->error = 0; init_completion(&dc->wait); mutex_lock(&dcc->cmd_lock); @@ -688,22 +690,75 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { - int err = dc->bio->bi_error; - if (dc->state == D_DONE) atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard)); - if (err == -EOPNOTSUPP) - err = 0; + if (dc->error == -EOPNOTSUPP) + dc->error = 0; - if (err) + if (dc->error) f2fs_msg(sbi->sb, KERN_INFO, - "Issue discard failed, ret: %d", err); - bio_put(dc->bio); + "Issue discard failed, ret: %d", dc->error); list_del(&dc->list); kmem_cache_free(discard_cmd_slab, dc); } +static void f2fs_submit_discard_endio(struct bio *bio) +{ + struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; + + complete(&dc->wait); + dc->error = bio->bi_error; + dc->state = D_DONE; + bio_put(bio); +} + +/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ +static void __submit_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct bio *bio = NULL; + + if (dc->state != D_PREP) + return; + + dc->error = __blkdev_issue_discard(dc->bdev, + SECTOR_FROM_BLOCK(dc->start), + SECTOR_FROM_BLOCK(dc->len), + GFP_NOFS, 0, &bio); + if (!dc->error) { + /* should keep before submission to avoid D_DONE right away */ + dc->state = D_SUBMIT; + atomic_inc(&dcc->submit_discard); + if (bio) { + bio->bi_private = dc; + bio->bi_end_io = f2fs_submit_discard_endio; + bio->bi_opf |= REQ_SYNC; + submit_bio(bio); + } + } else { + __remove_discard_cmd(sbi, dc); + } +} + +static int __queue_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) +{ + block_t lblkstart = blkstart; + + trace_f2fs_issue_discard(bdev, blkstart, blklen); + + if (sbi->s_ndevs) { + int devi = f2fs_target_device_index(sbi, blkstart); + + blkstart -= FDEV(devi).start_blk; + } + __add_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen); + wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); + return 0; +} + /* This should be covered by global mutex, &sit_i->sentry_lock */ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { @@ -719,11 +774,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) list_for_each_entry_safe(dc, tmp, wait_list, list) { if (blkaddr == NULL_ADDR) { - if (dc->state == D_PREP) { - dc->state = D_SUBMIT; - submit_bio(dc->bio); - atomic_inc(&dcc->submit_discard); - } + __submit_discard_cmd(sbi, dc); continue; } @@ -746,14 +797,6 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) mutex_unlock(&dcc->cmd_lock); } -static void f2fs_submit_discard_endio(struct bio *bio) -{ - struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; - - complete(&dc->wait); - dc->state = D_DONE; -} - static int issue_discard_thread(void *data) { struct f2fs_sb_info *sbi = data; @@ -771,15 +814,14 @@ repeat: mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, cmd_list, list) { - if (dc->state == D_PREP) { - dc->state = D_SUBMIT; - submit_bio(dc->bio); - atomic_inc(&dcc->submit_discard); - if (iter++ > DISCARD_ISSUE_RATE) - break; - } else if (dc->state == D_DONE) { + + if (is_idle(sbi)) + __submit_discard_cmd(sbi, dc); + + if (dc->state == D_PREP && iter++ > DISCARD_ISSUE_RATE) + break; + if (dc->state == D_DONE) __remove_discard_cmd(sbi, dc); - } } mutex_unlock(&dcc->cmd_lock); @@ -793,36 +835,6 @@ repeat: goto repeat; } - -/* this function is copied from blkdev_issue_discard from block/blk-lib.c */ -static int __f2fs_issue_discard_async(struct f2fs_sb_info *sbi, - struct block_device *bdev, block_t blkstart, block_t blklen) -{ - struct bio *bio = NULL; - block_t lblkstart = blkstart; - int err; - - trace_f2fs_issue_discard(bdev, blkstart, blklen); - - if (sbi->s_ndevs) { - int devi = f2fs_target_device_index(sbi, blkstart); - - blkstart -= FDEV(devi).start_blk; - } - err = __blkdev_issue_discard(bdev, - SECTOR_FROM_BLOCK(blkstart), - SECTOR_FROM_BLOCK(blklen), - GFP_NOFS, 0, &bio); - if (!err && bio) { - bio->bi_end_io = f2fs_submit_discard_endio; - bio->bi_opf |= REQ_SYNC; - - __add_discard_cmd(sbi, bio, lblkstart, blklen); - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); - } - return err; -} - #ifdef CONFIG_BLK_DEV_ZONED static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t blkstart, block_t blklen) @@ -846,7 +858,7 @@ static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi, case BLK_ZONE_TYPE_CONVENTIONAL: if (!blk_queue_discard(bdev_get_queue(bdev))) return 0; - return __f2fs_issue_discard_async(sbi, bdev, lblkstart, blklen); + return __queue_discard_cmd(sbi, bdev, lblkstart, blklen); case BLK_ZONE_TYPE_SEQWRITE_REQ: case BLK_ZONE_TYPE_SEQWRITE_PREF: sector = SECTOR_FROM_BLOCK(blkstart); @@ -878,7 +890,7 @@ static int __issue_discard_async(struct f2fs_sb_info *sbi, bdev_zoned_model(bdev) != BLK_ZONED_NONE) return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen); #endif - return __f2fs_issue_discard_async(sbi, bdev, blkstart, blklen); + return __queue_discard_cmd(sbi, bdev, blkstart, blklen); } static int f2fs_issue_discard(struct f2fs_sb_info *sbi, -- cgit v1.2.3 From 3d6a650febdd762c90fe477aa53b9413fd7d97df Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Thu, 2 Mar 2017 10:36:20 +0800 Subject: f2fs: add a punch discard command function This patch add a function to punch discard command if one segment reuse before discard. Split this segment from multi-segments discard range, and discard the left bigger range. Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 85c34d4fcf3e..c5a5258f71c5 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -759,6 +759,25 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } +static void __punch_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc, block_t blkaddr) +{ + block_t end_block = START_BLOCK(sbi, GET_SEGNO(sbi, blkaddr) + 1); + + if (dc->state == D_DONE || dc->lstart + dc->len <= end_block) { + __remove_discard_cmd(sbi, dc); + return; + } + + if (blkaddr - dc->lstart < dc->lstart + dc->len - end_block) { + dc->start += (end_block - dc->lstart); + dc->len -= (end_block - dc->lstart); + dc->lstart = end_block; + } else { + dc->len = blkaddr - dc->lstart; + } +} + /* This should be covered by global mutex, &sit_i->sentry_lock */ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { @@ -781,8 +800,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { if (dc->state == D_SUBMIT) wait_for_completion_io(&dc->wait); - else - __remove_discard_cmd(sbi, dc); + __punch_discard_cmd(sbi, dc, blkaddr); } } blk_finish_plug(&plug); -- cgit v1.2.3 From b0beab5016d04009a7c1d4639ccb5b3d46dad56f Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Wed, 8 Mar 2017 10:47:11 +0800 Subject: f2fs: use parameter max_items instead of PIDVEC_SIZE Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index 73b4e1d1912a..c82ab4048127 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -138,7 +138,7 @@ static unsigned int gang_lookup_pids(pid_t *results, unsigned long first_index, radix_tree_for_each_slot(slot, &pids, &iter, first_index) { results[ret] = iter.index; - if (++ret == PIDVEC_SIZE) + if (++ret == max_items) break; } return ret; -- cgit v1.2.3 From 1941d7bcb474aa38caca16e6d3416e623d37d800 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Wed, 8 Mar 2017 10:47:12 +0800 Subject: f2fs: check range before defragment This patch checks the parameter range passed by ioctl to void that range exceeds the max_file_blocks limit. Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 427ab1b41de3..300d62015c24 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2054,6 +2054,12 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) goto out; } + if (unlikely((range.start + range.len) >> PAGE_SHIFT > + sbi->max_file_blocks)) { + err = -EINVAL; + goto out; + } + err = f2fs_defragment_range(sbi, filp, &range); f2fs_update_time(sbi, REQ_TIME); if (err < 0) -- cgit v1.2.3 From 14b44d238ce7402a99db30b806e08b3e4eddcdbe Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 9 Mar 2017 15:24:24 -0800 Subject: f2fs: add fault injection on f2fs_truncate Inject a fault during f2fs_truncate(). Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/file.c | 6 ++++++ fs/f2fs/super.c | 1 + 3 files changed, 8 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b549b531e586..7c9685c4c904 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -50,6 +50,7 @@ enum { FAULT_BLOCK, FAULT_DIR_DEPTH, FAULT_EVICT_INODE, + FAULT_TRUNCATE, FAULT_IO, FAULT_CHECKPOINT, FAULT_MAX, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 300d62015c24..af507585c75b 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -620,6 +620,12 @@ int f2fs_truncate(struct inode *inode) trace_f2fs_truncate(inode); +#ifdef CONFIG_F2FS_FAULT_INJECTION + if (time_to_inject(F2FS_I_SB(inode), FAULT_TRUNCATE)) { + f2fs_show_injection_info(FAULT_TRUNCATE); + return -EIO; + } +#endif /* we should check inline_data size */ if (!f2fs_may_inline_data(inode)) { err = f2fs_convert_inline_inode(inode); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cfb40d3fd875..17126a2897ad 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -49,6 +49,7 @@ char *fault_name[FAULT_MAX] = { [FAULT_BLOCK] = "no more block", [FAULT_DIR_DEPTH] = "too big dir depth", [FAULT_EVICT_INODE] = "evict_inode fail", + [FAULT_TRUNCATE] = "truncate fail", [FAULT_IO] = "IO error", [FAULT_CHECKPOINT] = "checkpoint error", }; -- cgit v1.2.3 From 8a6aa32502549b1f15f0a28e3d2fcc5edabc3f19 Mon Sep 17 00:00:00 2001 From: Fan Li Date: Wed, 8 Mar 2017 13:39:16 +0800 Subject: f2fs: adjust the way of calculating nat block use a slightly simpler expression to calculate nat block with nid. Signed-off-by: Fan Li Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index 2f9603fa85a5..ebed0240aa53 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -200,13 +200,16 @@ static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) struct f2fs_nm_info *nm_i = NM_I(sbi); pgoff_t block_off; pgoff_t block_addr; - int seg_off; + /* + * block_off = segment_off * 512 + off_in_segment + * OLD = (segment_off * 512) * 2 + off_in_segment + * NEW = 2 * (segment_off * 512 + off_in_segment) - off_in_segment + */ block_off = NAT_BLOCK_OFFSET(start); - seg_off = block_off >> sbi->log_blocks_per_seg; block_addr = (pgoff_t)(nm_i->nat_blkaddr + - (seg_off << sbi->log_blocks_per_seg << 1) + + (block_off << 1) - (block_off & (sbi->blocks_per_seg - 1))); if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) -- cgit v1.2.3 From 087d3d8bae58415d9996ea5936d9738f881cd156 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 10 Mar 2017 17:54:03 +0800 Subject: f2fs: drop duplicate new_size assign in f2fs_zero_range Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index af507585c75b..c3c475675d77 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1197,8 +1197,6 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len, if (ret) return ret; - if (offset + len > new_size) - new_size = offset + len; new_size = max_t(loff_t, new_size, offset + len); } else { if (off_start) { -- cgit v1.2.3 From 3cecfa5f6700c07ef943ab3d457f82f9868c4bfa Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 10 Mar 2017 17:54:26 +0800 Subject: f2fs: avoid copy date to user-space if move file range fail If move file range return error, the data copied to user-space is duplicate. Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index c3c475675d77..cbde4d23e817 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2208,6 +2208,8 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg) range.pos_out, range.len); mnt_drop_write_file(filp); + if (err) + goto err_out; if (copy_to_user((struct f2fs_move_range __user *)arg, &range, sizeof(range))) -- cgit v1.2.3 From 46e82fb1b5349e75d8e64df40760f5f5a46bd80f Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 10 Mar 2017 17:54:52 +0800 Subject: f2fs: check new size by inode_newsize_ok in f2fs_insert_range The inode_newsize_ok is better than only checking the maxbytes, eg. the rlimit etc. Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cbde4d23e817..bc853dd9b7a3 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1264,8 +1264,9 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len) int ret = 0; new_size = i_size_read(inode) + len; - if (new_size > inode->i_sb->s_maxbytes) - return -EFBIG; + ret = inode_newsize_ok(inode, new_size); + if (ret) + return ret; if (offset >= i_size_read(inode)) return -EINVAL; -- cgit v1.2.3 From d7563861722a471cf0ca348b00fa7a1c34507dea Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 10 Mar 2017 17:55:07 +0800 Subject: f2fs: move mnt_want_write_file after arguments checking It's needless of mnt_want_write_file for arguments checking. Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 44 ++++++++++++++++++-------------------------- 1 file changed, 18 insertions(+), 26 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index bc853dd9b7a3..22c73b205cd7 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -2037,45 +2037,37 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) if (!S_ISREG(inode->i_mode)) return -EINVAL; - err = mnt_want_write_file(filp); - if (err) - return err; - - if (f2fs_readonly(sbi->sb)) { - err = -EROFS; - goto out; - } + if (f2fs_readonly(sbi->sb)) + return -EROFS; if (copy_from_user(&range, (struct f2fs_defragment __user *)arg, - sizeof(range))) { - err = -EFAULT; - goto out; - } + sizeof(range))) + return -EFAULT; /* verify alignment of offset & size */ - if (range.start & (F2FS_BLKSIZE - 1) || - range.len & (F2FS_BLKSIZE - 1)) { - err = -EINVAL; - goto out; - } + if (range.start & (F2FS_BLKSIZE - 1) || range.len & (F2FS_BLKSIZE - 1)) + return -EINVAL; if (unlikely((range.start + range.len) >> PAGE_SHIFT > - sbi->max_file_blocks)) { - err = -EINVAL; - goto out; - } + sbi->max_file_blocks)) + return -EINVAL; + + err = mnt_want_write_file(filp); + if (err) + return err; err = f2fs_defragment_range(sbi, filp, &range); + mnt_drop_write_file(filp); + f2fs_update_time(sbi, REQ_TIME); if (err < 0) - goto out; + return err; if (copy_to_user((struct f2fs_defragment __user *)arg, &range, sizeof(range))) - err = -EFAULT; -out: - mnt_drop_write_file(filp); - return err; + return -EFAULT; + + return 0; } static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, -- cgit v1.2.3 From bd4667cb4b77f02a16dd14443ba66884d755c7e6 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 10 Mar 2017 20:43:20 +0800 Subject: f2fs: clear FI_DATA_EXIST flag in truncate_inline_inode Clear FI_DATA_EXIST flag atomically in truncate_inline_inode, and the return value from truncate_inline_inode isn't used, remove it. Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 8 +------- fs/f2fs/file.c | 4 +--- fs/f2fs/inline.c | 21 +++++++++++---------- 3 files changed, 13 insertions(+), 20 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7c9685c4c904..148c8b1d506c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1873,12 +1873,6 @@ static inline int f2fs_has_inline_data(struct inode *inode) return is_inode_flag_set(inode, FI_INLINE_DATA); } -static inline void f2fs_clear_inline_inode(struct inode *inode) -{ - clear_inode_flag(inode, FI_INLINE_DATA); - clear_inode_flag(inode, FI_DATA_EXIST); -} - static inline int f2fs_exist_data(struct inode *inode) { return is_inode_flag_set(inode, FI_DATA_EXIST); @@ -2508,7 +2502,7 @@ extern struct kmem_cache *inode_entry_slab; bool f2fs_may_inline_data(struct inode *inode); bool f2fs_may_inline_dentry(struct inode *inode); void read_inline_data(struct page *page, struct page *ipage); -bool truncate_inline_inode(struct page *ipage, u64 from); +void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from); int f2fs_read_inline_data(struct inode *inode, struct page *page); int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page); int f2fs_convert_inline_inode(struct inode *inode); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 22c73b205cd7..77ebd6b9323c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -569,9 +569,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock) } if (f2fs_has_inline_data(inode)) { - truncate_inline_inode(ipage, from); - if (from == 0) - clear_inode_flag(inode, FI_DATA_EXIST); + truncate_inline_inode(inode, ipage, from); f2fs_put_page(ipage, 1); truncate_page = true; goto out; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index e32a9e527968..93c15337bcd9 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -63,19 +63,21 @@ void read_inline_data(struct page *page, struct page *ipage) SetPageUptodate(page); } -bool truncate_inline_inode(struct page *ipage, u64 from) +void truncate_inline_inode(struct inode *inode, struct page *ipage, u64 from) { void *addr; if (from >= MAX_INLINE_DATA) - return false; + return; addr = inline_data_addr(ipage); f2fs_wait_on_page_writeback(ipage, NODE, true); memset(addr + from, 0, MAX_INLINE_DATA - from); set_page_dirty(ipage); - return true; + + if (from == 0) + clear_inode_flag(inode, FI_DATA_EXIST); } int f2fs_read_inline_data(struct inode *inode, struct page *page) @@ -146,11 +148,11 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) set_inode_flag(dn->inode, FI_APPEND_WRITE); /* clear inline data and flag after data writeback */ - truncate_inline_inode(dn->inode_page, 0); + truncate_inline_inode(dn->inode, dn->inode_page, 0); clear_inline_node(dn->inode_page); clear_out: stat_dec_inline_inode(dn->inode); - f2fs_clear_inline_inode(dn->inode); + clear_inode_flag(dn->inode, FI_INLINE_DATA); f2fs_put_dnode(dn); return 0; } @@ -267,9 +269,8 @@ process_inline: if (f2fs_has_inline_data(inode)) { ipage = get_node_page(sbi, inode->i_ino); f2fs_bug_on(sbi, IS_ERR(ipage)); - if (!truncate_inline_inode(ipage, 0)) - return false; - f2fs_clear_inline_inode(inode); + truncate_inline_inode(inode, ipage, 0); + clear_inode_flag(inode, FI_INLINE_DATA); f2fs_put_page(ipage, 1); } else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) { if (truncate_blocks(inode, 0, false)) @@ -380,7 +381,7 @@ static int f2fs_move_inline_dirents(struct inode *dir, struct page *ipage, set_page_dirty(page); /* clear inline dir and flag after data writeback */ - truncate_inline_inode(ipage, 0); + truncate_inline_inode(dir, ipage, 0); stat_dec_inline_dir(dir); clear_inode_flag(dir, FI_INLINE_DENTRY); @@ -455,7 +456,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage, } memcpy(backup_dentry, inline_dentry, MAX_INLINE_DATA); - truncate_inline_inode(ipage, 0); + truncate_inline_inode(dir, ipage, 0); unlock_page(ipage); -- cgit v1.2.3 From a83d50bc16c4f5d0a359790015b5b32f3f0e52db Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 13 Mar 2017 16:35:13 +0800 Subject: f2fs: fix bad prefetchw of NULL page For f2fs_read_data_pages, the f2fs_mpage_readpages gets "page == NULL", so that, the prefetchw(&page->flags) is operated on NULL. Fixes: f1e8866016 ("f2fs: expose f2fs_mpage_readpages") Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e341d446205a..2c8485bb6eb1 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1151,9 +1151,10 @@ static int f2fs_mpage_readpages(struct address_space *mapping, for (page_idx = 0; nr_pages; page_idx++, nr_pages--) { - prefetchw(&page->flags); if (pages) { page = list_last_entry(pages, struct page, lru); + + prefetchw(&page->flags); list_del(&page->lru); if (add_to_page_cache_lru(page, mapping, page->index, -- cgit v1.2.3 From 346fe752c431bcef5d05614263a4b4a0cfe88e10 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 13 Mar 2017 20:10:41 +0800 Subject: f2fs: cover update_free_nid_bitmap with nid_list_lock free_nid_bitmap and free_nid_count in update_free_nid_bitmap should be updated atomically, use nid_list_lock cover them to avoid race in concurrent scenario. Signed-off-by: Chao Yu Reviewed-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 - fs/f2fs/node.c | 27 +++++++++++---------------- 2 files changed, 11 insertions(+), 17 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 148c8b1d506c..6c00a787c342 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -565,7 +565,6 @@ struct f2fs_nm_info { unsigned char (*free_nid_bitmap)[NAT_ENTRY_BITMAP_SIZE]; unsigned char *nat_block_bitmap; unsigned short *free_nid_count; /* free nid count of NAT block */ - spinlock_t free_nid_lock; /* protect updating of nid count */ /* for checkpoint */ char *nat_bitmap; /* NAT bitmap pointer */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 11df8ab32478..3bfffd744f87 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1815,7 +1815,7 @@ static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) } static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, - bool set, bool build, bool locked) + bool set, bool build) { struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid); @@ -1829,14 +1829,10 @@ static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid, else __clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]); - if (!locked) - spin_lock(&nm_i->free_nid_lock); if (set) nm_i->free_nid_count[nat_ofs]++; else if (!build) nm_i->free_nid_count[nat_ofs]--; - if (!locked) - spin_unlock(&nm_i->free_nid_lock); } static void scan_nat_page(struct f2fs_sb_info *sbi, @@ -1865,7 +1861,9 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, f2fs_bug_on(sbi, blk_addr == NEW_ADDR); if (blk_addr == NULL_ADDR) freed = add_free_nid(sbi, start_nid, true); - update_free_nid_bitmap(sbi, start_nid, freed, true, false); + spin_lock(&NM_I(sbi)->nid_list_lock); + update_free_nid_bitmap(sbi, start_nid, freed, true); + spin_unlock(&NM_I(sbi)->nid_list_lock); } } @@ -2020,7 +2018,7 @@ retry: __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); nm_i->available_nids--; - update_free_nid_bitmap(sbi, *nid, false, false, false); + update_free_nid_bitmap(sbi, *nid, false, false); spin_unlock(&nm_i->nid_list_lock); return true; @@ -2076,7 +2074,7 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) nm_i->available_nids++; - update_free_nid_bitmap(sbi, nid, true, false, false); + update_free_nid_bitmap(sbi, nid, true, false); spin_unlock(&nm_i->nid_list_lock); @@ -2406,11 +2404,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, add_free_nid(sbi, nid, false); spin_lock(&NM_I(sbi)->nid_list_lock); NM_I(sbi)->available_nids++; - update_free_nid_bitmap(sbi, nid, true, false, false); + update_free_nid_bitmap(sbi, nid, true, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } else { spin_lock(&NM_I(sbi)->nid_list_lock); - update_free_nid_bitmap(sbi, nid, false, false, false); + update_free_nid_bitmap(sbi, nid, false, false); spin_unlock(&NM_I(sbi)->nid_list_lock); } } @@ -2535,10 +2533,10 @@ inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi) nid = i * NAT_ENTRY_PER_BLOCK; last_nid = (i + 1) * NAT_ENTRY_PER_BLOCK; - spin_lock(&nm_i->free_nid_lock); + spin_lock(&NM_I(sbi)->nid_list_lock); for (; nid < last_nid; nid++) - update_free_nid_bitmap(sbi, nid, true, true, true); - spin_unlock(&nm_i->free_nid_lock); + update_free_nid_bitmap(sbi, nid, true, true); + spin_unlock(&NM_I(sbi)->nid_list_lock); } for (i = 0; i < nm_i->nat_blocks; i++) { @@ -2629,9 +2627,6 @@ static int init_free_nid_cache(struct f2fs_sb_info *sbi) sizeof(unsigned short), GFP_KERNEL); if (!nm_i->free_nid_count) return -ENOMEM; - - spin_lock_init(&nm_i->free_nid_lock); - return 0; } -- cgit v1.2.3 From d03ba4cc3fa639916a86b61c5a3d817cb6aa91a0 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 10 Mar 2017 16:28:46 +0800 Subject: f2fs: cleanup the disk level filename updating As discuss with Jaegeuk and Chao, "Once checkpoint is done, f2fs doesn't need to update there-in filename at all." The disk-level filename is used only one case, 1. create a file A under a dir 2. sync A 3. godown 4. umount 5. mount (roll_forward) Only the rename/cross_rename changes the filename, if it happens, a. between step 1 and 2, the sync A will caused checkpoint, so that, the roll_forward at step 5 never happens. b. after step 2, the roll_forward happens, file A will roll forward to the result as after step 1. So that, any updating the disk filename is useless, just cleanup it. Signed-off-by: Kinglong Mee Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 25 ++++--------------------- fs/f2fs/f2fs.h | 2 -- fs/f2fs/file.c | 8 -------- fs/f2fs/inline.c | 2 -- fs/f2fs/namei.c | 29 ----------------------------- 5 files changed, 4 insertions(+), 62 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 8d5c62b07b28..058c4f3afcef 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -337,24 +337,6 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage) set_page_dirty(ipage); } -int update_dent_inode(struct inode *inode, struct inode *to, - const struct qstr *name) -{ - struct page *page; - - if (file_enc_name(to)) - return 0; - - page = get_node_page(F2FS_I_SB(inode), inode->i_ino); - if (IS_ERR(page)) - return PTR_ERR(page); - - init_dent_inode(name, page); - f2fs_put_page(page, 1); - - return 0; -} - void do_make_empty_dir(struct inode *inode, struct inode *parent, struct f2fs_dentry_ptr *d) { @@ -438,8 +420,11 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir, set_cold_node(inode, page); } - if (new_name) + if (new_name) { init_dent_inode(new_name, page); + if (f2fs_encrypted_inode(dir)) + file_set_enc_name(inode); + } /* * This file should be checkpointed during fsync. @@ -599,8 +584,6 @@ add_dentry: err = PTR_ERR(page); goto fail; } - if (f2fs_encrypted_inode(dir)) - file_set_enc_name(inode); } make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6c00a787c342..a481c8e39b7c 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2087,8 +2087,6 @@ ino_t f2fs_inode_by_name(struct inode *dir, const struct qstr *qstr, struct page **page); void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de, struct page *page, struct inode *inode); -int update_dent_inode(struct inode *inode, struct inode *to, - const struct qstr *name); void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d, const struct qstr *name, f2fs_hash_t name_hash, unsigned int bit_pos); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 77ebd6b9323c..78ece6e4779c 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -110,20 +110,12 @@ static int get_parent_ino(struct inode *inode, nid_t *pino) { struct dentry *dentry; - if (file_enc_name(inode)) - return 0; - inode = igrab(inode); dentry = d_find_any_alias(inode); iput(inode); if (!dentry) return 0; - if (update_dent_inode(inode, inode, &dentry->d_name)) { - dput(dentry); - return 0; - } - *pino = parent_ino(dentry); dput(dentry); return 1; diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 93c15337bcd9..701bbd8d10ba 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -528,8 +528,6 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, err = PTR_ERR(page); goto fail; } - if (f2fs_encrypted_inode(dir)) - file_set_enc_name(inode); } f2fs_wait_on_page_writeback(ipage, NODE, true); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 25c073f6c7d4..8906c9f6cce4 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -720,13 +720,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, if (err) goto put_out_dir; - err = update_dent_inode(old_inode, new_inode, - &new_dentry->d_name); - if (err) { - release_orphan_inode(sbi); - goto put_out_dir; - } - f2fs_set_link(new_dir, new_entry, new_page, old_inode); new_inode->i_ctime = current_time(new_inode); @@ -779,8 +772,6 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry, down_write(&F2FS_I(old_inode)->i_sem); file_lost_pino(old_inode); - if (new_inode && file_enc_name(new_inode)) - file_set_enc_name(old_inode); up_write(&F2FS_I(old_inode)->i_sem); old_inode->i_ctime = current_time(old_inode); @@ -917,18 +908,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, f2fs_lock_op(sbi); - err = update_dent_inode(old_inode, new_inode, &new_dentry->d_name); - if (err) - goto out_unlock; - if (file_enc_name(new_inode)) - file_set_enc_name(old_inode); - - err = update_dent_inode(new_inode, old_inode, &old_dentry->d_name); - if (err) - goto out_undo; - if (file_enc_name(old_inode)) - file_set_enc_name(new_inode); - /* update ".." directory entry info of old dentry */ if (old_dir_entry) f2fs_set_link(old_inode, old_dir_entry, old_dir_page, new_dir); @@ -972,14 +951,6 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry, if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir)) f2fs_sync_fs(sbi->sb, 1); return 0; -out_undo: - /* - * Still we may fail to recover name info of f2fs_inode here - * Drop it, once its name is set as encrypted - */ - update_dent_inode(old_inode, old_inode, &old_dentry->d_name); -out_unlock: - f2fs_unlock_op(sbi); out_new_dir: if (new_dir_entry) { f2fs_dentry_kunmap(new_inode, new_dir_page); -- cgit v1.2.3 From c6f89dfd528041de2e71788495579a18c0363869 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Wed, 15 Mar 2017 21:12:50 +0800 Subject: f2fs: sanity check of crc_offset from raw checkpoint The crc_offset towards or beyond the end of block is wrong, sanity check it. Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 22348c7df67d..7dd7dd5914c5 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -678,7 +678,7 @@ static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr, *cp_block = (struct f2fs_checkpoint *)page_address(*cp_page); crc_offset = le32_to_cpu((*cp_block)->checksum_offset); - if (crc_offset >= blk_size) { + if (crc_offset > (blk_size - sizeof(__le32))) { f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc_offset: %zu", crc_offset); return -EINVAL; -- cgit v1.2.3 From 684ca7e55de1f3defb13c491e0b5eb8f27ffe79c Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 18 Mar 2017 09:20:55 +0800 Subject: f2fs: avoid stat_inc_atomic_write for non-atomic file After filemap_write_and_wait_range fail, the FI_ATOMIC_FILE flags is removed, so that f2fs should not increase the stat of atomic_write. Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 78ece6e4779c..7704bd99b990 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1533,17 +1533,21 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); if (!get_dirty_pages(inode)) - goto out; + goto inc_stat; f2fs_msg(F2FS_I_SB(inode)->sb, KERN_WARNING, "Unexpected flush for atomic writes: ino=%lu, npages=%u", inode->i_ino, get_dirty_pages(inode)); ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX); - if (ret) + if (ret) { clear_inode_flag(inode, FI_ATOMIC_FILE); -out: + goto out; + } + +inc_stat: stat_inc_atomic_write(inode); stat_update_max_atomic_write(inode); +out: inode_unlock(inode); mnt_drop_write_file(filp); return ret; -- cgit v1.2.3 From 70874fb34fbd8b34b1aa45b30ca7eb216b1376d4 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 18 Mar 2017 09:25:05 +0800 Subject: f2fs: calculate the f2fs_stat_info into base_mem The memory size of f2fs_stat_info also should be calculated. Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ee2d0a485fc3..ef1179df05d9 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -156,7 +156,11 @@ static void update_mem_info(struct f2fs_sb_info *sbi) if (si->base_mem) goto get_cache; - si->base_mem = sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; + /* build stat */ + si->base_mem = sizeof(struct f2fs_stat_info); + + /* build superblock */ + si->base_mem += sizeof(struct f2fs_sb_info) + sbi->sb->s_blocksize; si->base_mem += 2 * sizeof(struct f2fs_inode_info); si->base_mem += sizeof(*sbi->ckpt); si->base_mem += sizeof(struct percpu_counter) * NR_COUNT_TYPE; -- cgit v1.2.3 From 8f73cbb7d4f300eda84a00ffe8bf9f7cf2d3ea06 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Sat, 18 Mar 2017 09:26:13 +0800 Subject: f2fs: more reasonable mem_size calculating of ino_entry Signed-off-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 3bfffd744f87..b52c9c0a47ab 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -63,8 +63,9 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) int i; for (i = 0; i <= UPDATE_INO; i++) - mem_size += (sbi->im[i].ino_num * - sizeof(struct ino_entry)) >> PAGE_SHIFT; + mem_size += sbi->im[i].ino_num * + sizeof(struct ino_entry); + mem_size >>= PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1); } else if (type == EXTENT_CACHE) { mem_size = (atomic_read(&sbi->total_ext_tree) * -- cgit v1.2.3 From 9897159a7b1aa98ec0bc8fc053ab822e6634e7fa Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 21 Mar 2017 20:09:45 +0800 Subject: f2fs: fix recording invalid last_victim When doing garbage collection, we try to record segment offset which locates at next one of last victim, using it as the start offset in next searching. But in some corner cases, recorded offset may cross the end of main segment area, it will cause incorrectly searching in dirty_segmap bitmap. This patch adds modular operation to avoid this issue. Reported-by: Yunlei He Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 418fd9881646..939be88a8833 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -361,6 +361,7 @@ next: sbi->last_victim[p.gc_mode] = last_victim + 1; else sbi->last_victim[p.gc_mode] = segno + 1; + sbi->last_victim[p.gc_mode] %= MAIN_SEGS(sbi); break; } } -- cgit v1.2.3 From 5f4c3dec225e03273e1f74816e676cccc1a54329 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 22 Mar 2017 11:59:30 +0800 Subject: f2fs: use set_page_private marcro in f2fs_trace_pid Use set_page_private marcro instead of operte page struct directly Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/trace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c index c82ab4048127..bccbbf2616d2 100644 --- a/fs/f2fs/trace.c +++ b/fs/f2fs/trace.c @@ -59,7 +59,7 @@ void f2fs_trace_pid(struct page *page) pid_t pid = task_pid_nr(current); void *p; - page->private = pid; + set_page_private(page, (unsigned long)pid); if (radix_tree_preload(GFP_NOFS)) return; -- cgit v1.2.3 From 30a61ddf8117c26ac5b295e1233eaa9629a94ca3 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 22 Mar 2017 14:45:05 +0800 Subject: f2fs: fix race condition in between free nid allocator/initializer In below concurrent case, allocated nid can be loaded into free nid cache and be allocated again. Thread A Thread B - f2fs_create - f2fs_new_inode - alloc_nid - __insert_nid_to_list(ALLOC_NID_LIST) - f2fs_balance_fs_bg - build_free_nids - __build_free_nids - scan_nat_page - add_free_nid - __lookup_nat_cache - f2fs_add_link - init_inode_metadata - new_inode_page - new_node_page - set_node_addr - alloc_nid_done - __remove_nid_from_list(ALLOC_NID_LIST) - __insert_nid_to_list(FREE_NID_LIST) This patch makes nat cache lookup and free nid list operation being atomical to avoid this race condition. Signed-off-by: Jaegeuk Kim Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 63 +++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 18 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b52c9c0a47ab..29dc996b573c 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1761,40 +1761,67 @@ static void __remove_nid_from_list(struct f2fs_sb_info *sbi, static bool add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) { struct f2fs_nm_info *nm_i = NM_I(sbi); - struct free_nid *i; + struct free_nid *i, *e; struct nat_entry *ne; - int err; + int err = -EINVAL; + bool ret = false; /* 0 nid should not be used */ if (unlikely(nid == 0)) return false; - if (build) { - /* do not add allocated nids */ - ne = __lookup_nat_cache(nm_i, nid); - if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || - nat_get_blkaddr(ne) != NULL_ADDR)) - return false; - } - i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS); i->nid = nid; i->state = NID_NEW; - if (radix_tree_preload(GFP_NOFS)) { - kmem_cache_free(free_nid_slab, i); - return true; - } + if (radix_tree_preload(GFP_NOFS)) + goto err; spin_lock(&nm_i->nid_list_lock); + + if (build) { + /* + * Thread A Thread B + * - f2fs_create + * - f2fs_new_inode + * - alloc_nid + * - __insert_nid_to_list(ALLOC_NID_LIST) + * - f2fs_balance_fs_bg + * - build_free_nids + * - __build_free_nids + * - scan_nat_page + * - add_free_nid + * - __lookup_nat_cache + * - f2fs_add_link + * - init_inode_metadata + * - new_inode_page + * - new_node_page + * - set_node_addr + * - alloc_nid_done + * - __remove_nid_from_list(ALLOC_NID_LIST) + * - __insert_nid_to_list(FREE_NID_LIST) + */ + ne = __lookup_nat_cache(nm_i, nid); + if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) || + nat_get_blkaddr(ne) != NULL_ADDR)) + goto err_out; + + e = __lookup_free_nid_list(nm_i, nid); + if (e) { + if (e->state == NID_NEW) + ret = true; + goto err_out; + } + } + ret = true; err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true); +err_out: spin_unlock(&nm_i->nid_list_lock); radix_tree_preload_end(); - if (err) { +err: + if (err) kmem_cache_free(free_nid_slab, i); - return true; - } - return true; + return ret; } static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) -- cgit v1.2.3 From 648d50ba12c805d3fc75105ede7af254b3349dbd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 22 Mar 2017 17:23:45 +0800 Subject: f2fs: show the max number of volatile operations This patch adds to show the max number of volatile operations which are conducting concurrently. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 10 ++++++++-- fs/f2fs/f2fs.h | 18 +++++++++++++++++- fs/f2fs/file.c | 5 +++++ 3 files changed, 30 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index ef1179df05d9..0baa3ee39392 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -51,7 +51,9 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->ndirty_all = sbi->ndirty_inode[DIRTY_META]; si->inmem_pages = get_pages(sbi, F2FS_INMEM_PAGES); si->aw_cnt = atomic_read(&sbi->aw_cnt); + si->vw_cnt = atomic_read(&sbi->vw_cnt); si->max_aw_cnt = atomic_read(&sbi->max_aw_cnt); + si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); if (SM_I(sbi) && SM_I(sbi)->fcc_info) @@ -337,8 +339,10 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flush, si->nr_discard); - seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d)\n", - si->inmem_pages, si->aw_cnt, si->max_aw_cnt); + seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " + "volatile IO: %4d (Max. %4d)\n", + si->inmem_pages, si->aw_cnt, si->max_aw_cnt, + si->vw_cnt, si->max_vw_cnt); seq_printf(s, " - nodes: %4d in %4d\n", si->ndirty_node, si->node_pages); seq_printf(s, " - dents: %4d in dirs:%4d (%4d)\n", @@ -438,7 +442,9 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi) atomic_set(&sbi->inplace_count, 0); atomic_set(&sbi->aw_cnt, 0); + atomic_set(&sbi->vw_cnt, 0); atomic_set(&sbi->max_aw_cnt, 0); + atomic_set(&sbi->max_vw_cnt, 0); mutex_lock(&f2fs_stat_mutex); list_add_tail(&si->stat_list, &f2fs_stat_list); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index a481c8e39b7c..340d62c29a98 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -915,7 +915,9 @@ struct f2fs_sb_info { atomic_t inline_inode; /* # of inline_data inodes */ atomic_t inline_dir; /* # of inline_dentry inodes */ atomic_t aw_cnt; /* # of atomic writes */ + atomic_t vw_cnt; /* # of volatile writes */ atomic_t max_aw_cnt; /* max # of atomic writes */ + atomic_t max_vw_cnt; /* max # of volatile writes */ int bg_gc; /* background gc calls */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ #endif @@ -2328,7 +2330,7 @@ struct f2fs_stat_info { int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard; int inline_xattr, inline_inode, inline_dir, append, update, orphans; - int aw_cnt, max_aw_cnt; + int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt; unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; unsigned int bimodal, avg_vblocks; int util_free, util_valid, util_invalid; @@ -2411,6 +2413,17 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) if (cur > max) \ atomic_set(&F2FS_I_SB(inode)->max_aw_cnt, cur); \ } while (0) +#define stat_inc_volatile_write(inode) \ + (atomic_inc(&F2FS_I_SB(inode)->vw_cnt)) +#define stat_dec_volatile_write(inode) \ + (atomic_dec(&F2FS_I_SB(inode)->vw_cnt)) +#define stat_update_max_volatile_write(inode) \ + do { \ + int cur = atomic_read(&F2FS_I_SB(inode)->vw_cnt); \ + int max = atomic_read(&F2FS_I_SB(inode)->max_vw_cnt); \ + if (cur > max) \ + atomic_set(&F2FS_I_SB(inode)->max_vw_cnt, cur); \ + } while (0) #define stat_inc_seg_count(sbi, type, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ @@ -2467,6 +2480,9 @@ void f2fs_destroy_root_stats(void); #define stat_inc_atomic_write(inode) #define stat_dec_atomic_write(inode) #define stat_update_max_atomic_write(inode) +#define stat_inc_volatile_write(inode) +#define stat_dec_volatile_write(inode) +#define stat_update_max_volatile_write(inode) #define stat_inc_seg_type(sbi, curseg) #define stat_inc_block_count(sbi, curseg) #define stat_inc_inplace_blocks(sbi) diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 7704bd99b990..f3be240ef129 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1426,6 +1426,7 @@ static int f2fs_release_file(struct inode *inode, struct file *filp) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(inode, FI_VOLATILE_FILE); + stat_dec_volatile_write(inode); set_inode_flag(inode, FI_DROP_CACHE); filemap_fdatawrite(inode->i_mapping); clear_inode_flag(inode, FI_DROP_CACHE); @@ -1613,6 +1614,9 @@ static int f2fs_ioc_start_volatile_write(struct file *filp) if (ret) goto out; + stat_inc_volatile_write(inode); + stat_update_max_volatile_write(inode); + set_inode_flag(inode, FI_VOLATILE_FILE); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); out: @@ -1668,6 +1672,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp) drop_inmem_pages(inode); if (f2fs_is_volatile_file(inode)) { clear_inode_flag(inode, FI_VOLATILE_FILE); + stat_dec_volatile_write(inode); ret = f2fs_do_sync_file(filp, 0, LLONG_MAX, 0, true); } -- cgit v1.2.3 From 99f4b917b0411f660b06ffd30f3dc36cf7b05710 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 22 Mar 2017 17:23:46 +0800 Subject: f2fs: don't track volatile file in dirty inode list Don't track volatile file in dirty inode list, otherwise with data_flush option, background thread will entry into endless loop for flushing journal file's pages. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 7dd7dd5914c5..adcc2c370df9 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -817,7 +817,9 @@ static void __add_dirty_inode(struct inode *inode, enum inode_type type) return; set_inode_flag(inode, flag); - list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]); + if (!f2fs_is_volatile_file(inode)) + list_add_tail(&F2FS_I(inode)->dirty_list, + &sbi->inode_list[type]); stat_inc_dirty_inode(sbi, type); } -- cgit v1.2.3 From 89e9eabd7ded4680f3b0f8ddc7398a3909de57c8 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 23 Mar 2017 13:38:25 +0800 Subject: f2fs: clean up xattr operation 1. don't allocate redundant memory in read_all_xattrs. 2. introduce RESERVED_XATTR_SIZE for cleanup. Signed-off-by: Chao Yu Reviewed-by: Kinglong Mee Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 25 +++++++++++-------------- fs/f2fs/xattr.h | 3 ++- 2 files changed, 13 insertions(+), 15 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index aff7619e3f96..528aed399814 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -250,15 +250,13 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, void *cur_addr, *txattr_addr, *last_addr = NULL; nid_t xnid = F2FS_I(inode)->i_xattr_nid; unsigned int size = xnid ? VALID_XATTR_BLOCK_SIZE : 0; - unsigned int inline_size = 0; + unsigned int inline_size = inline_xattr_size(inode); int err = 0; - inline_size = inline_xattr_size(inode); - if (!size && !inline_size) return -ENODATA; - txattr_addr = kzalloc(inline_size + size + sizeof(__u32), + txattr_addr = kzalloc(inline_size + size + RESERVED_XATTR_SIZE, GFP_F2FS_ZERO); if (!txattr_addr) return -ENOMEM; @@ -328,13 +326,14 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); struct f2fs_xattr_header *header; - size_t size = PAGE_SIZE, inline_size = 0; + nid_t xnid = F2FS_I(inode)->i_xattr_nid; + unsigned int size = VALID_XATTR_BLOCK_SIZE; + unsigned int inline_size = inline_xattr_size(inode); void *txattr_addr; int err; - inline_size = inline_xattr_size(inode); - - txattr_addr = kzalloc(inline_size + size, GFP_F2FS_ZERO); + txattr_addr = kzalloc(inline_size + size + RESERVED_XATTR_SIZE, + GFP_F2FS_ZERO); if (!txattr_addr) return -ENOMEM; @@ -358,19 +357,19 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, } /* read from xattr node block */ - if (F2FS_I(inode)->i_xattr_nid) { + if (xnid) { struct page *xpage; void *xattr_addr; /* The inode already has an extended attribute block. */ - xpage = get_node_page(sbi, F2FS_I(inode)->i_xattr_nid); + xpage = get_node_page(sbi, xnid); if (IS_ERR(xpage)) { err = PTR_ERR(xpage); goto fail; } xattr_addr = page_address(xpage); - memcpy(txattr_addr + inline_size, xattr_addr, PAGE_SIZE); + memcpy(txattr_addr + inline_size, xattr_addr, size); f2fs_put_page(xpage, 1); } @@ -392,14 +391,12 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, void *txattr_addr, struct page *ipage) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - size_t inline_size = 0; + size_t inline_size = inline_xattr_size(inode); void *xattr_addr; struct page *xpage; nid_t new_nid = 0; int err; - inline_size = inline_xattr_size(inode); - if (hsize > inline_size && !F2FS_I(inode)->i_xattr_nid) if (!alloc_nid(sbi, &new_nid)) return -ENOSPC; diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index d5a94928c116..1e7db8d0806e 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -73,7 +73,8 @@ struct f2fs_xattr_entry { !IS_XATTR_LAST_ENTRY(entry);\ entry = XATTR_NEXT_ENTRY(entry)) #define MAX_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) -#define VALID_XATTR_BLOCK_SIZE (MAX_XATTR_BLOCK_SIZE - sizeof(__u32)) +#define RESERVED_XATTR_SIZE (sizeof(__u32)) +#define VALID_XATTR_BLOCK_SIZE (MAX_XATTR_BLOCK_SIZE - RESERVED_XATTR_SIZE) #define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \ VALID_XATTR_BLOCK_SIZE) -- cgit v1.2.3 From 22588f8773f687449ce6e97dce9f102553440d30 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 23 Mar 2017 13:38:26 +0800 Subject: f2fs: don't reserve additional space in xattr block In this patch, we change xattr block disk layout as below: Before: xattr node block layout +---------------------------------------------+---------------+-------------+ | node block xattr entries | reserved | node footer | | 4068 Bytes | 4 Bytes | 24 Bytes | In memory layout +--------------------+---------------------------------+--------------------+ | inline xattr | node block xattr entries | reserved | | 200 Bytes | 4068 Bytes | 4 Bytes | After: xattr node block layout +-------------------------------------------------------------+-------------+ | node block xattr entries | node footer | | 4072 Bytes | 24 Bytes | In memory layout +--------------------+---------------------------------+--------------------+ | inline xattr | node block xattr entries | reserved | | 200 Bytes | 4072 Bytes | 4 Bytes | With this change, we don't need to reserve additional space in node block, just keep reserved space in logical in-memory layout. So that it would help to enlarge valid free space of xattr node block. As tested, generic/026 shows max stored xattr entires number increases from 531 to 532 when inline_xattr option is enabled. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/xattr.c | 6 +++--- fs/f2fs/xattr.h | 5 ++--- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c index 528aed399814..832c5110abab 100644 --- a/fs/f2fs/xattr.c +++ b/fs/f2fs/xattr.c @@ -256,7 +256,7 @@ static int lookup_all_xattrs(struct inode *inode, struct page *ipage, if (!size && !inline_size) return -ENODATA; - txattr_addr = kzalloc(inline_size + size + RESERVED_XATTR_SIZE, + txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, GFP_F2FS_ZERO); if (!txattr_addr) return -ENOMEM; @@ -332,7 +332,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage, void *txattr_addr; int err; - txattr_addr = kzalloc(inline_size + size + RESERVED_XATTR_SIZE, + txattr_addr = kzalloc(inline_size + size + XATTR_PADDING_SIZE, GFP_F2FS_ZERO); if (!txattr_addr) return -ENOMEM; @@ -451,7 +451,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize, } xattr_addr = page_address(xpage); - memcpy(xattr_addr, txattr_addr + inline_size, MAX_XATTR_BLOCK_SIZE); + memcpy(xattr_addr, txattr_addr + inline_size, VALID_XATTR_BLOCK_SIZE); set_page_dirty(xpage); f2fs_put_page(xpage, 1); diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 1e7db8d0806e..6afcee35ebeb 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -72,9 +72,8 @@ struct f2fs_xattr_entry { for (entry = XATTR_FIRST_ENTRY(addr);\ !IS_XATTR_LAST_ENTRY(entry);\ entry = XATTR_NEXT_ENTRY(entry)) -#define MAX_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) -#define RESERVED_XATTR_SIZE (sizeof(__u32)) -#define VALID_XATTR_BLOCK_SIZE (MAX_XATTR_BLOCK_SIZE - RESERVED_XATTR_SIZE) +#define VALID_XATTR_BLOCK_SIZE (PAGE_SIZE - sizeof(struct node_footer)) +#define XATTR_PADDING_SIZE (sizeof(__u32)) #define MIN_OFFSET(i) XATTR_ALIGN(inline_xattr_size(i) + \ VALID_XATTR_BLOCK_SIZE) -- cgit v1.2.3 From 59c9081bc86ef0b273a41abf2c1f413301429a6d Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Mon, 13 Mar 2017 20:22:18 +0800 Subject: f2fs: allow write page cache when writting cp This patch allow write data to normal file when writting new checkpoint. We relax three limitations for write_begin path: 1. data allocation 2. node allocation 3. variables in checkpoint Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 40 ++++++++++++++++++++++++++++------------ fs/f2fs/data.c | 28 ++++++++++++++++++++++------ fs/f2fs/f2fs.h | 1 + fs/f2fs/node.c | 12 ++++++------ fs/f2fs/super.c | 1 + 5 files changed, 58 insertions(+), 24 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index adcc2c370df9..9aba0bb340a0 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -944,6 +944,19 @@ int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi) return 0; } +static void __prepare_cp_block(struct f2fs_sb_info *sbi) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + struct f2fs_nm_info *nm_i = NM_I(sbi); + nid_t last_nid = nm_i->next_scan_nid; + + next_free_nid(sbi, &last_nid); + ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); + ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); + ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); + ckpt->next_free_nid = cpu_to_le32(last_nid); +} + /* * Freeze all the FS-operations for checkpoint. */ @@ -970,7 +983,14 @@ retry_flush_dents: goto retry_flush_dents; } + /* + * POR: we should ensure that there are no dirty node pages + * until finishing nat/sit flush. inode->i_blocks can be updated. + */ + down_write(&sbi->node_change); + if (get_pages(sbi, F2FS_DIRTY_IMETA)) { + up_write(&sbi->node_change); f2fs_unlock_all(sbi); err = f2fs_sync_inode_meta(sbi); if (err) @@ -978,10 +998,6 @@ retry_flush_dents: goto retry_flush_dents; } - /* - * POR: we should ensure that there are no dirty node pages - * until finishing nat/sit flush. - */ retry_flush_nodes: down_write(&sbi->node_write); @@ -989,11 +1005,19 @@ retry_flush_nodes: up_write(&sbi->node_write); err = sync_node_pages(sbi, &wbc); if (err) { + up_write(&sbi->node_change); f2fs_unlock_all(sbi); goto out; } goto retry_flush_nodes; } + + /* + * sbi->node_change is used only for AIO write_begin path which produces + * dirty node blocks and some checkpoint values by block allocation. + */ + __prepare_cp_block(sbi); + up_write(&sbi->node_change); out: blk_finish_plug(&plug); return err; @@ -1061,7 +1085,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); struct f2fs_nm_info *nm_i = NM_I(sbi); unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num; - nid_t last_nid = nm_i->next_scan_nid; block_t start_blk; unsigned int data_sum_blocks, orphan_blocks; __u32 crc32 = 0; @@ -1078,14 +1101,11 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) return -EIO; } - next_free_nid(sbi, &last_nid); - /* * modify checkpoint * version number is already updated */ ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi)); - ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi)); ckpt->free_segment_count = cpu_to_le32(free_segments(sbi)); for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) { ckpt->cur_node_segno[i] = @@ -1104,10 +1124,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) curseg_alloc_type(sbi, i + CURSEG_HOT_DATA); } - ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi)); - ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi)); - ckpt->next_free_nid = cpu_to_le32(last_nid); - /* 2 cp + n data seg summary + orphan inode blocks */ data_sum_blocks = npages_for_summary_flush(sbi, false); spin_lock(&sbi->cp_lock); diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2c8485bb6eb1..090413236b27 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -787,6 +787,21 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from) return err; } +static inline void __do_map_lock(struct f2fs_sb_info *sbi, int flag, bool lock) +{ + if (flag == F2FS_GET_BLOCK_PRE_AIO) { + if (lock) + down_read(&sbi->node_change); + else + up_read(&sbi->node_change); + } else { + if (lock) + f2fs_lock_op(sbi); + else + f2fs_unlock_op(sbi); + } +} + /* * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with * f2fs_map_blocks structure. @@ -829,7 +844,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, next_dnode: if (create) - f2fs_lock_op(sbi); + __do_map_lock(sbi, flag, true); /* When reading holes, we need its node page */ set_new_dnode(&dn, inode, NULL, NULL, 0); @@ -939,7 +954,7 @@ skip: f2fs_put_dnode(&dn); if (create) { - f2fs_unlock_op(sbi); + __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } goto next_dnode; @@ -948,7 +963,7 @@ sync_out: f2fs_put_dnode(&dn); unlock_out: if (create) { - f2fs_unlock_op(sbi); + __do_map_lock(sbi, flag, false); f2fs_balance_fs(sbi, dn.node_changed); } out: @@ -1688,7 +1703,7 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi, if (f2fs_has_inline_data(inode) || (pos & PAGE_MASK) >= i_size_read(inode)) { - f2fs_lock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, true); locked = true; } restart: @@ -1724,7 +1739,8 @@ restart: err = get_dnode_of_data(&dn, index, LOOKUP_NODE); if (err || dn.data_blkaddr == NULL_ADDR) { f2fs_put_dnode(&dn); - f2fs_lock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, + true); locked = true; goto restart; } @@ -1738,7 +1754,7 @@ out: f2fs_put_dnode(&dn); unlock_out: if (locked) - f2fs_unlock_op(sbi); + __do_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO, false); return err; } diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 340d62c29a98..6fbdcac01d9a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -830,6 +830,7 @@ struct f2fs_sb_info { struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ + struct rw_semaphore node_change; /* locking node change */ wait_queue_head_t cp_wait; unsigned long last_time[MAX_TIME]; /* to store time in jiffies */ long interval_time[MAX_TIME]; /* to store thresholds */ diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 29dc996b573c..6e87178d34a2 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -2448,10 +2448,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, f2fs_put_page(page, 1); } - f2fs_bug_on(sbi, set->entry_cnt); - - radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); - kmem_cache_free(nat_entry_set_slab, set); + /* Allow dirty nats by node block allocation in write_begin */ + if (!set->entry_cnt) { + radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set); + kmem_cache_free(nat_entry_set_slab, set); + } } /* @@ -2496,8 +2497,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) __flush_nat_entry_set(sbi, set, cpc); up_write(&nm_i->nat_tree_lock); - - f2fs_bug_on(sbi, nm_i->dirty_nat_cnt); + /* Allow dirty nats by node block allocation in write_begin */ } static int __get_nat_bitmaps(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 17126a2897ad..49434f951ace 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1918,6 +1918,7 @@ try_onemore: mutex_init(&sbi->gc_mutex); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); + init_rwsem(&sbi->node_change); /* disallow all the data/node/meta page writes */ set_sbi_flag(sbi, SBI_POR_DOING); -- cgit v1.2.3 From c541a51b8ce81d003b02ed67ad3604a2e6220e3e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Sat, 25 Mar 2017 00:03:02 -0700 Subject: f2fs: fix wrong max cost initialization This patch fixes missing increased max cost caused by a patch that we increased cose of data segments in greedy algorithm. Cc: # v4.10+ Fixes: b9cd20619 "f2fs: node segment is prior to data segment selected victim" Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 939be88a8833..704bea678d37 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -182,7 +182,7 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi, if (p->alloc_mode == SSR) return sbi->blocks_per_seg; if (p->gc_mode == GC_GREEDY) - return sbi->blocks_per_seg * p->ofs_unit; + return 2 * sbi->blocks_per_seg * p->ofs_unit; else if (p->gc_mode == GC_CB) return UINT_MAX; else /* No other gc_mode */ -- cgit v1.2.3 From 7a20b8a61eff81bdb7097a578752a74860e9d142 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 24 Mar 2017 20:41:45 -0400 Subject: f2fs: allocate node and hot data in the beginning of partition In order to give more spatial locality, this patch changes the block allocation policy which assigns beginning of partition for small and hot data/node blocks. In order to do this, we set noheap allocation by default and introduce another mount option, heap, to reset it back. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 6 +++++- fs/f2fs/segment.c | 9 +++++++++ fs/f2fs/super.c | 10 +++++++++- 3 files changed, 23 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 704bea678d37..63fefef04184 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -172,7 +172,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, if (gc_type != FG_GC && p->max_search > sbi->max_victim_search) p->max_search = sbi->max_victim_search; - p->offset = sbi->last_victim[p->gc_mode]; + /* let's select beginning hot/small space first */ + if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + p->offset = 0; + else + p->offset = sbi->last_victim[p->gc_mode]; } static unsigned int get_max_cost(struct f2fs_sb_info *sbi, diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c5a5258f71c5..c5f0075764bf 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1519,6 +1519,14 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) __set_sit_entry_type(sbi, type, curseg->segno, modified); } +static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) +{ + if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) + return 0; + + return CURSEG_I(sbi, type)->segno; +} + /* * Allocate a current working segment. * This function always allocates a free segment in LFS manner. @@ -1537,6 +1545,7 @@ static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec) if (test_opt(sbi, NOHEAP)) dir = ALLOC_RIGHT; + segno = __get_next_segno(sbi, type); get_new_segment(sbi, &segno, new_sec, dir); curseg->next_segno = segno; reset_curseg(sbi, type, 1); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 49434f951ace..f315b54cd840 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -83,6 +83,7 @@ enum { Opt_discard, Opt_nodiscard, Opt_noheap, + Opt_heap, Opt_user_xattr, Opt_nouser_xattr, Opt_acl, @@ -117,6 +118,7 @@ static match_table_t f2fs_tokens = { {Opt_discard, "discard"}, {Opt_nodiscard, "nodiscard"}, {Opt_noheap, "no_heap"}, + {Opt_heap, "heap"}, {Opt_user_xattr, "user_xattr"}, {Opt_nouser_xattr, "nouser_xattr"}, {Opt_acl, "acl"}, @@ -437,6 +439,9 @@ static int parse_options(struct super_block *sb, char *options) case Opt_noheap: set_opt(sbi, NOHEAP); break; + case Opt_heap: + clear_opt(sbi, NOHEAP); + break; #ifdef CONFIG_F2FS_FS_XATTR case Opt_user_xattr: set_opt(sbi, XATTR_USER); @@ -914,7 +919,9 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root) if (test_opt(sbi, DISCARD)) seq_puts(seq, ",discard"); if (test_opt(sbi, NOHEAP)) - seq_puts(seq, ",no_heap_alloc"); + seq_puts(seq, ",no_heap"); + else + seq_puts(seq, ",heap"); #ifdef CONFIG_F2FS_FS_XATTR if (test_opt(sbi, XATTR_USER)) seq_puts(seq, ",user_xattr"); @@ -1047,6 +1054,7 @@ static void default_options(struct f2fs_sb_info *sbi) set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); set_opt(sbi, EXTENT_CACHE); + set_opt(sbi, NOHEAP); sbi->sb->s_flags |= MS_LAZYTIME; set_opt(sbi, FLUSH_MERGE); if (f2fs_sb_mounted_blkzoned(sbi->sb)) { -- cgit v1.2.3 From 796dbbfe4e52cc3ae860794fc20ea157391c4d93 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 24 Mar 2017 21:08:56 -0400 Subject: f2fs: start SSR much eariler to avoid FG_GC This patch initiates SSR much eariler, resulting in less FG_GC. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 5e8ad4280a50..31846b0fcb95 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -495,7 +495,7 @@ static inline bool need_SSR(struct f2fs_sb_info *sbi) return false; return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs + - reserved_sections(sbi) + 1); + 2 * reserved_sections(sbi)); } static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi, -- cgit v1.2.3 From c13ff37e359bb3eacf4e1760dcea8d9760aa7459 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 21 Mar 2017 10:59:50 -0400 Subject: f2fs: relax node version check for victim data in gc - has_not_enough_free_secs node_secs: 0 dent_secs: 0 freed:0 free_segments:103 reserved:104 - f2fs_gc - get_victim_by_default alloc_mode 0, gc_mode 1, max_search 2672, offset 4654, ofs_unit 1 - do_garbage_collect start_segno 3976, end_segno 3977 type 0 - is_alive nid 22797, blkaddr 2131882, ofs_in_node 0, version 0x8/0x0 - gc_data_segment 766, segno 3976, block 512/426 not alive So, this patch fixes subtle corrupted case where node version does not match to summary version which results in infinite loop by gc. Reported-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 63fefef04184..c52656ccbde5 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -555,8 +555,10 @@ static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, get_node_info(sbi, nid, dni); if (sum->version != dni->version) { - f2fs_put_page(node_page, 1); - return false; + f2fs_msg(sbi->sb, KERN_WARNING, + "%s: valid data with mismatched node version.", + __func__); + set_sbi_flag(sbi, SBI_NEED_FSCK); } *nofs = ofs_of_node(node_page); -- cgit v1.2.3 From 8b8dd65f72ccbf7111eb97c4c4f5b5df2a412a07 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 25 Mar 2017 17:19:58 +0800 Subject: f2fs: show issued flush/discard count Show historical count of flush command and discard command. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 24 ++++++++++++++++-------- fs/f2fs/f2fs.h | 9 ++++++--- fs/f2fs/segment.c | 37 +++++++++++++++++++++++-------------- 3 files changed, 45 insertions(+), 25 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 0baa3ee39392..f27e66ea7ff3 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -56,12 +56,18 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->max_vw_cnt = atomic_read(&sbi->max_vw_cnt); si->nr_wb_cp_data = get_pages(sbi, F2FS_WB_CP_DATA); si->nr_wb_data = get_pages(sbi, F2FS_WB_DATA); - if (SM_I(sbi) && SM_I(sbi)->fcc_info) - si->nr_flush = - atomic_read(&SM_I(sbi)->fcc_info->submit_flush); - if (SM_I(sbi) && SM_I(sbi)->dcc_info) - si->nr_discard = - atomic_read(&SM_I(sbi)->dcc_info->submit_discard); + if (SM_I(sbi) && SM_I(sbi)->fcc_info) { + si->nr_flushed = + atomic_read(&SM_I(sbi)->fcc_info->issued_flush); + si->nr_flushing = + atomic_read(&SM_I(sbi)->fcc_info->issing_flush); + } + if (SM_I(sbi) && SM_I(sbi)->dcc_info) { + si->nr_discarded = + atomic_read(&SM_I(sbi)->dcc_info->issued_discard); + si->nr_discarding = + atomic_read(&SM_I(sbi)->dcc_info->issing_discard); + } si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); si->overp_segs = overprovision_segments(sbi); @@ -336,9 +342,11 @@ static int stat_show(struct seq_file *s, void *v) seq_printf(s, " - Inner Struct Count: tree: %d(%d), node: %d\n", si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); - seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: %4d, Discard: %4d)\n", + seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), " + "Discard: (%4d %4d))\n", si->nr_wb_cp_data, si->nr_wb_data, - si->nr_flush, si->nr_discard); + si->nr_flushing, si->nr_flushed, + si->nr_discarding, si->nr_discarded); seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " "volatile IO: %4d (Max. %4d)\n", si->inmem_pages, si->aw_cnt, si->max_aw_cnt, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6fbdcac01d9a..dbdce5ffe45a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -214,7 +214,8 @@ struct discard_cmd_control { wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ struct mutex cmd_lock; int max_discards; /* max. discards to be issued */ - atomic_t submit_discard; /* # of issued discard */ + atomic_t issued_discard; /* # of issued discard */ + atomic_t issing_discard; /* # of issing discard */ }; /* for the list of fsync inodes, used only during recovery */ @@ -643,7 +644,8 @@ struct flush_cmd { struct flush_cmd_control { struct task_struct *f2fs_issue_flush; /* flush thread */ wait_queue_head_t flush_wait_queue; /* waiting queue for wake-up */ - atomic_t submit_flush; /* # of issued flushes */ + atomic_t issued_flush; /* # of issued flushes */ + atomic_t issing_flush; /* # of issing flushes */ struct llist_head issue_list; /* list for command issue */ struct llist_node *dispatch_list; /* list for command dispatch */ }; @@ -2329,7 +2331,8 @@ struct f2fs_stat_info { unsigned int ndirty_dirs, ndirty_files, ndirty_all; int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids; int total_count, utilization; - int bg_gc, nr_wb_cp_data, nr_wb_data, nr_flush, nr_discard; + int bg_gc, nr_wb_cp_data, nr_wb_data; + int nr_flushing, nr_flushed, nr_discarding, nr_discarded; int inline_xattr, inline_inode, inline_dir, append, update, orphans; int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt; unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c5f0075764bf..e9c55dd8579f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -490,6 +490,8 @@ repeat: fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list); ret = submit_flush_wait(sbi); + atomic_inc(&fcc->issued_flush); + llist_for_each_entry_safe(cmd, next, fcc->dispatch_list, llnode) { cmd->ret = ret; @@ -507,25 +509,29 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) { struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info; struct flush_cmd cmd; + int ret; if (test_opt(sbi, NOBARRIER)) return 0; - if (!test_opt(sbi, FLUSH_MERGE)) - return submit_flush_wait(sbi); - - if (!atomic_read(&fcc->submit_flush)) { - int ret; + if (!test_opt(sbi, FLUSH_MERGE)) { + ret = submit_flush_wait(sbi); + atomic_inc(&fcc->issued_flush); + return ret; + } - atomic_inc(&fcc->submit_flush); + if (!atomic_read(&fcc->issing_flush)) { + atomic_inc(&fcc->issing_flush); ret = submit_flush_wait(sbi); - atomic_dec(&fcc->submit_flush); + atomic_dec(&fcc->issing_flush); + + atomic_inc(&fcc->issued_flush); return ret; } init_completion(&cmd.wait); - atomic_inc(&fcc->submit_flush); + atomic_inc(&fcc->issing_flush); llist_add(&cmd.llnode, &fcc->issue_list); if (!fcc->dispatch_list) @@ -533,10 +539,10 @@ int f2fs_issue_flush(struct f2fs_sb_info *sbi) if (fcc->f2fs_issue_flush) { wait_for_completion(&cmd.wait); - atomic_dec(&fcc->submit_flush); + atomic_dec(&fcc->issing_flush); } else { llist_del_all(&fcc->issue_list); - atomic_set(&fcc->submit_flush, 0); + atomic_set(&fcc->issing_flush, 0); } return cmd.ret; @@ -556,7 +562,8 @@ int create_flush_cmd_control(struct f2fs_sb_info *sbi) fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL); if (!fcc) return -ENOMEM; - atomic_set(&fcc->submit_flush, 0); + atomic_set(&fcc->issued_flush, 0); + atomic_set(&fcc->issing_flush, 0); init_waitqueue_head(&fcc->flush_wait_queue); init_llist_head(&fcc->issue_list); SM_I(sbi)->fcc_info = fcc; @@ -691,7 +698,7 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) { if (dc->state == D_DONE) - atomic_dec(&(SM_I(sbi)->dcc_info->submit_discard)); + atomic_dec(&(SM_I(sbi)->dcc_info->issing_discard)); if (dc->error == -EOPNOTSUPP) dc->error = 0; @@ -730,7 +737,8 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, if (!dc->error) { /* should keep before submission to avoid D_DONE right away */ dc->state = D_SUBMIT; - atomic_inc(&dcc->submit_discard); + atomic_inc(&dcc->issued_discard); + atomic_inc(&dcc->issing_discard); if (bio) { bio->bi_private = dc; bio->bi_end_io = f2fs_submit_discard_endio; @@ -1135,7 +1143,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) INIT_LIST_HEAD(&dcc->discard_entry_list); INIT_LIST_HEAD(&dcc->discard_cmd_list); mutex_init(&dcc->cmd_lock); - atomic_set(&dcc->submit_discard, 0); + atomic_set(&dcc->issued_discard, 0); + atomic_set(&dcc->issing_discard, 0); dcc->nr_discards = 0; dcc->max_discards = 0; -- cgit v1.2.3 From 5f32366a29b48bf6878ede83149819e38999c2fd Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 25 Mar 2017 17:19:59 +0800 Subject: f2fs: count discard command entry Adds to count discard command entry and show the number in debugfs, also fix to add cost of discard command cache into total comsumed memory footprint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 12 +++++++++--- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/segment.c | 4 ++++ 3 files changed, 15 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index f27e66ea7ff3..906f627e44fc 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -67,6 +67,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) atomic_read(&SM_I(sbi)->dcc_info->issued_discard); si->nr_discarding = atomic_read(&SM_I(sbi)->dcc_info->issing_discard); + si->nr_discard_cmd = + atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); } si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); @@ -220,8 +222,11 @@ get_cache: /* build merge flush thread */ if (SM_I(sbi)->fcc_info) si->cache_mem += sizeof(struct flush_cmd_control); - if (SM_I(sbi)->dcc_info) + if (SM_I(sbi)->dcc_info) { si->cache_mem += sizeof(struct discard_cmd_control); + si->cache_mem += sizeof(struct discard_cmd) * + atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); + } /* free nids */ si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID_LIST] + @@ -343,10 +348,11 @@ static int stat_show(struct seq_file *s, void *v) si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), " - "Discard: (%4d %4d))\n", + "Discard: (%4d %4d)) cmd: %4d\n", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, - si->nr_discarding, si->nr_discarded); + si->nr_discarding, si->nr_discarded, + si->nr_discard_cmd); seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " "volatile IO: %4d (Max. %4d)\n", si->inmem_pages, si->aw_cnt, si->max_aw_cnt, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index dbdce5ffe45a..8741d5102f14 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -216,6 +216,7 @@ struct discard_cmd_control { int max_discards; /* max. discards to be issued */ atomic_t issued_discard; /* # of issued discard */ atomic_t issing_discard; /* # of issing discard */ + atomic_t discard_cmd_cnt; /* # of cached cmd count */ }; /* for the list of fsync inodes, used only during recovery */ @@ -2333,6 +2334,7 @@ struct f2fs_stat_info { int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; int nr_flushing, nr_flushed, nr_discarding, nr_discarded; + int nr_discard_cmd; int inline_xattr, inline_inode, inline_dir, append, update, orphans; int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt; unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e9c55dd8579f..af80d8954a01 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -693,6 +693,8 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, mutex_lock(&dcc->cmd_lock); list_add_tail(&dc->list, cmd_list); mutex_unlock(&dcc->cmd_lock); + + atomic_inc(&dcc->discard_cmd_cnt); } static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) @@ -708,6 +710,7 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d "Issue discard failed, ret: %d", dc->error); list_del(&dc->list); kmem_cache_free(discard_cmd_slab, dc); + atomic_dec(&SM_I(sbi)->dcc_info->discard_cmd_cnt); } static void f2fs_submit_discard_endio(struct bio *bio) @@ -1145,6 +1148,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); atomic_set(&dcc->issing_discard, 0); + atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = 0; -- cgit v1.2.3 From f099405fc8d6c837487dcd6ab288073a44e6a91f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 27 Mar 2017 18:14:04 +0800 Subject: f2fs: clean up destroy_discard_cmd_control Remove unneeded parameter and simply change flow in destroy_discard_cmd_control. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index af80d8954a01..d5413afdd3da 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1167,20 +1167,22 @@ init_thread: return err; } -static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi, bool free) +static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - if (dcc && dcc->f2fs_issue_discard) { + if (!dcc) + return; + + if (dcc->f2fs_issue_discard) { struct task_struct *discard_thread = dcc->f2fs_issue_discard; dcc->f2fs_issue_discard = NULL; kthread_stop(discard_thread); } - if (free) { - kfree(dcc); - SM_I(sbi)->dcc_info = NULL; - } + + kfree(dcc); + SM_I(sbi)->dcc_info = NULL; } static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno) @@ -3073,7 +3075,7 @@ void destroy_segment_manager(struct f2fs_sb_info *sbi) if (!sm_info) return; destroy_flush_cmd_control(sbi, true); - destroy_discard_cmd_control(sbi, true); + destroy_discard_cmd_control(sbi); destroy_dirty_segmap(sbi); destroy_curseg(sbi); destroy_free_segmap(sbi); -- cgit v1.2.3 From a7eeb823854c4ab19765a25fabbda07fe27be177 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 28 Mar 2017 18:18:50 +0800 Subject: f2fs: use bitmap in discard_entry This patch changes to use bitmap instead of extent in struct discard_entry to indicate discard range in one segment, for fragmented space, this implementation can save memory footprint. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 6 ++--- fs/f2fs/segment.c | 72 ++++++++++++++++++++++++++++++------------------------- 2 files changed, 43 insertions(+), 35 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 8741d5102f14..43b6e1eeceb6 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -182,11 +182,11 @@ struct inode_entry { struct inode *inode; /* vfs inode pointer */ }; -/* for the list of blockaddresses to be discarded */ +/* for the bitmap indicate blocks to be discarded */ struct discard_entry { struct list_head list; /* list head */ - block_t blkaddr; /* block address to be discarded */ - int len; /* # of consecutive blocks of the discard */ + block_t start_blkaddr; /* start blockaddr of current segment */ + unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */ }; enum { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d5413afdd3da..12d2aa7032bb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -962,32 +962,6 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi, return err; } -static void __add_discard_entry(struct f2fs_sb_info *sbi, - struct cp_control *cpc, struct seg_entry *se, - unsigned int start, unsigned int end) -{ - struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list; - struct discard_entry *new, *last; - - if (!list_empty(head)) { - last = list_last_entry(head, struct discard_entry, list); - if (START_BLOCK(sbi, cpc->trim_start) + start == - last->blkaddr + last->len && - last->len < MAX_DISCARD_BLOCKS(sbi)) { - last->len += end - start; - goto done; - } - } - - new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS); - INIT_LIST_HEAD(&new->list); - new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start; - new->len = end - start; - list_add_tail(&new->list, head); -done: - SM_I(sbi)->dcc_info->nr_discards += end - start; -} - static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, bool check_only) { @@ -1000,6 +974,8 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; bool force = (cpc->reason == CP_DISCARD); + struct discard_entry *de = NULL; + struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list; int i; if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) @@ -1031,7 +1007,17 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, if (check_only) return true; - __add_discard_entry(sbi, cpc, se, start, end); + if (!de) { + de = f2fs_kmem_cache_alloc(discard_entry_slab, + GFP_F2FS_ZERO); + de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start); + list_add_tail(&de->list, head); + } + + for (i = start; i < end; i++) + __set_bit_le(i, (void *)de->discard_map); + + SM_I(sbi)->dcc_info->nr_discards += end - start; } return false; } @@ -1117,13 +1103,35 @@ next: /* send small discards */ list_for_each_entry_safe(entry, this, head, list) { - if (force && entry->len < cpc->trim_minlen) - goto skip; - f2fs_issue_discard(sbi, entry->blkaddr, entry->len); - cpc->trimmed += entry->len; + unsigned int cur_pos = 0, next_pos, len, total_len = 0; + bool is_valid = test_bit_le(0, entry->discard_map); + +find_next: + if (is_valid) { + next_pos = find_next_zero_bit_le(entry->discard_map, + sbi->blocks_per_seg, cur_pos); + len = next_pos - cur_pos; + + if (force && len < cpc->trim_minlen) + goto skip; + + f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos, + len); + cpc->trimmed += len; + total_len += len; + } else { + next_pos = find_next_bit_le(entry->discard_map, + sbi->blocks_per_seg, cur_pos); + } skip: + cur_pos = next_pos; + is_valid = !is_valid; + + if (cur_pos < sbi->blocks_per_seg) + goto find_next; + list_del(&entry->list); - SM_I(sbi)->dcc_info->nr_discards -= entry->len; + SM_I(sbi)->dcc_info->nr_discards -= total_len; kmem_cache_free(discard_entry_slab, entry); } } -- cgit v1.2.3 From ef095d19e82f25bbdead472b8b71f4ef3b7a636d Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 24 Mar 2017 20:05:13 -0400 Subject: f2fs: write small sized IO to hot log It would better split small and large IOs separately in order to get more consecutive big writes. The default threshold is set to 64KB, but configurable by sysfs/min_hot_blocks. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 9 +++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/inline.c | 1 + fs/f2fs/segment.c | 13 ++++++------- fs/f2fs/segment.h | 1 + fs/f2fs/super.c | 2 ++ 6 files changed, 21 insertions(+), 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 090413236b27..8f36080b47c4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1432,6 +1432,8 @@ write: need_balance_fs = true; else if (has_not_enough_free_secs(sbi, 0, 0)) goto redirty_out; + else + set_inode_flag(inode, FI_HOT_DATA); err = -EAGAIN; if (f2fs_has_inline_data(inode)) { @@ -1457,6 +1459,7 @@ out: if (wbc->for_reclaim) { f2fs_submit_merged_bio_cond(sbi, inode, 0, page->index, DATA, WRITE); + clear_inode_flag(inode, FI_HOT_DATA); remove_dirty_inode(inode); submitted = NULL; } @@ -1511,6 +1514,12 @@ static int f2fs_write_cache_pages(struct address_space *mapping, pagevec_init(&pvec, 0); + if (get_dirty_pages(mapping->host) <= + SM_I(F2FS_M_SB(mapping))->min_hot_blocks) + set_inode_flag(mapping->host, FI_HOT_DATA); + else + clear_inode_flag(mapping->host, FI_HOT_DATA); + if (wbc->range_cyclic) { writeback_index = mapping->writeback_index; /* prev offset */ index = writeback_index; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 43b6e1eeceb6..fc90e5317159 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -677,6 +677,7 @@ struct f2fs_sm_info { unsigned int ipu_policy; /* in-place-update policy */ unsigned int min_ipu_util; /* in-place-update threshold */ unsigned int min_fsync_blocks; /* threshold for fsync */ + unsigned int min_hot_blocks; /* threshold for hot block allocation */ /* for flush command control */ struct flush_cmd_control *fcc_info; @@ -1716,6 +1717,7 @@ enum { FI_DO_DEFRAG, /* indicate defragment is running */ FI_DIRTY_FILE, /* indicate regular/symlink has dirty pages */ FI_NO_PREALLOC, /* indicate skipped preallocated blocks */ + FI_HOT_DATA, /* indicate file is hot */ }; static inline void __mark_inode_dirty_flag(struct inode *inode, diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 701bbd8d10ba..031c3d78cbc6 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -137,6 +137,7 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page) /* write data page to try to make data consistent */ set_page_writeback(page); fio.old_blkaddr = dn->data_blkaddr; + set_inode_flag(dn->inode, FI_HOT_DATA); write_data_page(dn, &fio); f2fs_wait_on_page_writeback(page, DATA, true); if (dirty) { diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 12d2aa7032bb..b749cff59420 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1829,18 +1829,16 @@ static int __get_segment_type_6(struct page *page, enum page_type p_type) if (p_type == DATA) { struct inode *inode = page->mapping->host; - if (S_ISDIR(inode->i_mode)) - return CURSEG_HOT_DATA; - else if (is_cold_data(page) || file_is_cold(inode)) + if (is_cold_data(page) || file_is_cold(inode)) return CURSEG_COLD_DATA; - else - return CURSEG_WARM_DATA; + if (is_inode_flag_set(inode, FI_HOT_DATA)) + return CURSEG_HOT_DATA; + return CURSEG_WARM_DATA; } else { if (IS_DNODE(page)) return is_cold_node(page) ? CURSEG_WARM_NODE : CURSEG_HOT_NODE; - else - return CURSEG_COLD_NODE; + return CURSEG_COLD_NODE; } } @@ -2947,6 +2945,7 @@ int build_segment_manager(struct f2fs_sb_info *sbi) sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC; sm_info->min_ipu_util = DEF_MIN_IPU_UTIL; sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS; + sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS; sm_info->trim_sections = DEF_BATCHED_TRIM_SECTIONS; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 31846b0fcb95..57e36c1ce7bd 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -540,6 +540,7 @@ static inline int utilization(struct f2fs_sb_info *sbi) */ #define DEF_MIN_IPU_UTIL 70 #define DEF_MIN_FSYNC_BLOCKS 8 +#define DEF_MIN_HOT_BLOCKS 16 enum { F2FS_IPU_FORCE, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index f315b54cd840..2e2e1b438ce1 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -296,6 +296,7 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, batched_trim_sections, trim_sections); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, ipu_policy, ipu_policy); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util); F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks); +F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_hot_blocks, min_hot_blocks); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages); F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio); @@ -321,6 +322,7 @@ static struct attribute *f2fs_attrs[] = { ATTR_LIST(ipu_policy), ATTR_LIST(min_ipu_util), ATTR_LIST(min_fsync_blocks), + ATTR_LIST(min_hot_blocks), ATTR_LIST(max_victim_search), ATTR_LIST(dir_level), ATTR_LIST(ram_thresh), -- cgit v1.2.3 From 687de7f1010cb819d04b768556960d3689abe02b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 28 Mar 2017 18:07:38 -0700 Subject: f2fs: avoid IO split due to mixed WB_SYNC_ALL and WB_SYNC_NONE If two threads try to flush dirty pages in different inodes respectively, f2fs_write_data_pages() will produce WRITE and WRITE_SYNC one at a time, resulting in a lot of 4KB seperated IOs. So, this patch gives higher priority to WB_SYNC_ALL IOs and gathers write IOs with a big WRITE_SYNC'ed bio. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 15 +++++++++++++-- fs/f2fs/f2fs.h | 3 +++ fs/f2fs/super.c | 2 ++ 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 8f36080b47c4..b1cac6d85bcb 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1605,8 +1605,10 @@ continue_unlock: last_idx = page->index; } - if (--wbc->nr_to_write <= 0 && - wbc->sync_mode == WB_SYNC_NONE) { + /* give a priority to WB_SYNC threads */ + if ((atomic_read(&F2FS_M_SB(mapping)->wb_sync_req) || + --wbc->nr_to_write <= 0) && + wbc->sync_mode == WB_SYNC_NONE) { done = 1; break; } @@ -1662,9 +1664,18 @@ static int f2fs_write_data_pages(struct address_space *mapping, trace_f2fs_writepages(mapping->host, wbc, DATA); + /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */ + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_inc(&sbi->wb_sync_req); + else if (atomic_read(&sbi->wb_sync_req)) + goto skip_write; + blk_start_plug(&plug); ret = f2fs_write_cache_pages(mapping, wbc); blk_finish_plug(&plug); + + if (wbc->sync_mode == WB_SYNC_ALL) + atomic_dec(&sbi->wb_sync_req); /* * if some pages were truncated, we cannot guarantee its mapping->host * to detect pending bios. diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fc90e5317159..cca46e0ff2e4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -887,6 +887,9 @@ struct f2fs_sb_info { /* # of allocated blocks */ struct percpu_counter alloc_valid_block_count; + /* writeback control */ + atomic_t wb_sync_req; /* count # of WB_SYNC threads */ + /* valid inode count */ struct percpu_counter total_valid_inode_count; diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 2e2e1b438ce1..21d5eaa2793e 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1566,6 +1566,8 @@ static void init_sb_info(struct f2fs_sb_info *sbi) for (i = 0; i < NR_COUNT_TYPE; i++) atomic_set(&sbi->nr_pages[i], 0); + atomic_set(&sbi->wb_sync_req, 0); + INIT_LIST_HEAD(&sbi->s_list); mutex_init(&sbi->umount_mutex); mutex_init(&sbi->wio_mutex[NODE]); -- cgit v1.2.3 From fc2e2875d54c3571fe7c9dbed83814bd26e8a33a Mon Sep 17 00:00:00 2001 From: Kaixu Xia Date: Sun, 2 Apr 2017 02:39:48 +0800 Subject: f2fs: remove the redundant variable definition The variable 'i' has been defined before, so here we can use it directly. Signed-off-by: Kaixu Xia Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 1 - 1 file changed, 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9aba0bb340a0..9db92990f193 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1163,7 +1163,6 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) /* write nat bits */ if (enabled_nat_bits(sbi, cpc)) { __u64 cp_ver = cur_cp_version(ckpt); - unsigned int i; block_t blk; cp_ver |= ((__u64)crc32 << 32); -- cgit v1.2.3 From d1b3e72d549094317c12c79c7817861a97004a56 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 30 Mar 2017 21:02:46 -0700 Subject: f2fs: submit bio of in-place-update pages This patch tries to split in-place-update bios from sequential bios. Suggested-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 5 ++++- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 4 ++-- 3 files changed, 7 insertions(+), 4 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b1cac6d85bcb..3d74c0ffa4c7 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -362,6 +362,9 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio) bio_set_op_attrs(bio, fio->op, fio->op_flags); __submit_bio(fio->sbi, bio, fio->type); + + if (!is_read_io(fio->op)) + inc_page_count(fio->sbi, WB_DATA_TYPE(fio->page)); return 0; } @@ -1354,7 +1357,7 @@ retry_encrypt: !is_cold_data(page) && !IS_ATOMIC_WRITTEN_PAGE(page) && need_inplace_update(inode))) { - rewrite_data_page(fio); + err = rewrite_data_page(fio); set_inode_flag(inode, FI_UPDATE_WRITE); trace_f2fs_do_write_data_page(page, IPU); } else { diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index cca46e0ff2e4..32821ff84837 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2206,7 +2206,7 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr); void write_meta_page(struct f2fs_sb_info *sbi, struct page *page); void write_node_page(unsigned int nid, struct f2fs_io_info *fio); void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio); -void rewrite_data_page(struct f2fs_io_info *fio); +int rewrite_data_page(struct f2fs_io_info *fio); void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, block_t old_blkaddr, block_t new_blkaddr, bool recover_curseg, bool recover_newaddr); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b749cff59420..df5c0b3af266 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1961,11 +1961,11 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio) f2fs_update_data_blkaddr(dn, fio->new_blkaddr); } -void rewrite_data_page(struct f2fs_io_info *fio) +int rewrite_data_page(struct f2fs_io_info *fio) { fio->new_blkaddr = fio->old_blkaddr; stat_inc_inplace_blocks(fio->sbi); - f2fs_submit_page_mbio(fio); + return f2fs_submit_page_bio(fio); } void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum, -- cgit v1.2.3 From 64c24ecb3cff412cd848db06d90409e5dd77c2e0 Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Tue, 4 Apr 2017 13:01:22 +0300 Subject: f2fs: split make_dentry_ptr() into block and inline versions Since callers statically know which type to use, make_dentry_ptr() can simply be splitted into two inline functions. This way, the code has less inlined, fewer arguments, and no cast. Signed-off-by: Tomohiro Kusumi Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 8 ++++---- fs/f2fs/f2fs.h | 32 +++++++++++++++----------------- fs/f2fs/inline.c | 10 +++++----- 3 files changed, 24 insertions(+), 26 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index 058c4f3afcef..c143dffcae6e 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -94,7 +94,7 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page, dentry_blk = (struct f2fs_dentry_block *)kmap(dentry_page); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); de = find_target_dentry(fname, namehash, max_slots, &d); if (de) *res_page = dentry_page; @@ -366,7 +366,7 @@ static int make_empty_dir(struct inode *inode, dentry_blk = kmap_atomic(dentry_page); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); do_make_empty_dir(inode, parent, &d); kunmap_atomic(dentry_blk); @@ -586,7 +586,7 @@ add_dentry: } } - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(NULL, &d, dentry_blk); f2fs_update_dentry(ino, mode, &d, new_name, dentry_hash, bit_pos); set_page_dirty(dentry_page); @@ -894,7 +894,7 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx) dentry_blk = kmap(dentry_page); - make_dentry_ptr(inode, &d, (void *)dentry_blk, 1); + make_dentry_ptr_block(inode, &d, dentry_blk); err = f2fs_fill_dentries(ctx, &d, n * NR_DENTRY_IN_BLOCK, &fstr); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 32821ff84837..7b2b40238911 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -328,26 +328,24 @@ struct f2fs_dentry_ptr { int max; }; -static inline void make_dentry_ptr(struct inode *inode, - struct f2fs_dentry_ptr *d, void *src, int type) +static inline void make_dentry_ptr_block(struct inode *inode, + struct f2fs_dentry_ptr *d, struct f2fs_dentry_block *t) { d->inode = inode; + d->max = NR_DENTRY_IN_BLOCK; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; +} - if (type == 1) { - struct f2fs_dentry_block *t = (struct f2fs_dentry_block *)src; - - d->max = NR_DENTRY_IN_BLOCK; - d->bitmap = &t->dentry_bitmap; - d->dentry = t->dentry; - d->filename = t->filename; - } else { - struct f2fs_inline_dentry *t = (struct f2fs_inline_dentry *)src; - - d->max = NR_INLINE_DENTRY; - d->bitmap = &t->dentry_bitmap; - d->dentry = t->dentry; - d->filename = t->filename; - } +static inline void make_dentry_ptr_inline(struct inode *inode, + struct f2fs_dentry_ptr *d, struct f2fs_inline_dentry *t) +{ + d->inode = inode; + d->max = NR_INLINE_DENTRY; + d->bitmap = &t->dentry_bitmap; + d->dentry = t->dentry; + d->filename = t->filename; } /* diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c index 031c3d78cbc6..0ccdefe9fdba 100644 --- a/fs/f2fs/inline.c +++ b/fs/f2fs/inline.c @@ -302,7 +302,7 @@ struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir, inline_dentry = inline_data_addr(ipage); - make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); + make_dentry_ptr_inline(NULL, &d, inline_dentry); de = find_target_dentry(fname, namehash, NULL, &d); unlock_page(ipage); if (de) @@ -321,7 +321,7 @@ int make_empty_inline_dir(struct inode *inode, struct inode *parent, dentry_blk = inline_data_addr(ipage); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); + make_dentry_ptr_inline(NULL, &d, dentry_blk); do_make_empty_dir(inode, parent, &d); set_page_dirty(ipage); @@ -402,7 +402,7 @@ static int f2fs_add_inline_entries(struct inode *dir, unsigned long bit_pos = 0; int err = 0; - make_dentry_ptr(NULL, &d, (void *)inline_dentry, 2); + make_dentry_ptr_inline(NULL, &d, inline_dentry); while (bit_pos < d.max) { struct f2fs_dir_entry *de; @@ -534,7 +534,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *new_name, f2fs_wait_on_page_writeback(ipage, NODE, true); name_hash = f2fs_dentry_hash(new_name); - make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2); + make_dentry_ptr_inline(NULL, &d, dentry_blk); f2fs_update_dentry(ino, mode, &d, new_name, name_hash, bit_pos); set_page_dirty(ipage); @@ -623,7 +623,7 @@ int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx, inline_dentry = inline_data_addr(ipage); - make_dentry_ptr(inode, &d, (void *)inline_dentry, 2); + make_dentry_ptr_inline(inode, &d, inline_dentry); err = f2fs_fill_dentries(ctx, &d, 0, fstr); if (!err) -- cgit v1.2.3 From c6f82fe90d7458e5fa190a6820bfc24f96b0de4e Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 4 Apr 2017 16:45:30 -0700 Subject: Revert "f2fs: put allocate_segment after refresh_sit_entry" This reverts commit 3436c4bdb30de421d46f58c9174669fbcfd40ce0. This makes a leak to register dirty segments. I reproduced the issue by modified postmark which injects a lot of file create/delete/update and finally triggers huge number of SSR allocations. Cc: # v4.10+ [Jaegeuk Kim: Change missing incorrect comment] Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index df5c0b3af266..2bc9014bc8fb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1881,15 +1881,14 @@ void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, stat_inc_block_count(sbi, curseg); + if (!__has_curseg_space(sbi, type)) + sit_i->s_ops->allocate_segment(sbi, type, false); /* - * SIT information should be updated before segment allocation, - * since SSR needs latest valid block information. + * SIT information should be updated after segment allocation, + * since we need to keep dirty segments precisely under SSR. */ refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr); - if (!__has_curseg_space(sbi, type)) - sit_i->s_ops->allocate_segment(sbi, type, false); - mutex_unlock(&sit_i->sentry_lock); if (page && IS_NODESEG(type)) -- cgit v1.2.3 From 22d375dd9cc6ee9ff6382487fdd6ca4759af82a9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Apr 2017 18:19:48 +0800 Subject: f2fs: split discard_cmd_list Split discard_cmd_list to discard_{pend,wait}_list, so while sending/waiting discard command, we can avoid traversing unneeded entries in original list. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 3 ++- fs/f2fs/segment.c | 47 ++++++++++++++++++++++++++++++----------------- 2 files changed, 32 insertions(+), 18 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7b2b40238911..55a050668d00 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -210,7 +210,8 @@ struct discard_cmd_control { struct task_struct *f2fs_issue_discard; /* discard thread */ struct list_head discard_entry_list; /* 4KB discard entry list */ int nr_discards; /* # of discards in the list */ - struct list_head discard_cmd_list; /* discard cmd list */ + struct list_head discard_pend_list; /* store pending entries */ + struct list_head discard_wait_list; /* store on-flushing entries */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ struct mutex cmd_lock; int max_discards; /* max. discards to be issued */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 2bc9014bc8fb..1a5b3e4770d2 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -677,7 +677,7 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *cmd_list = &(dcc->discard_cmd_list); + struct list_head *pend_list = &(dcc->discard_pend_list); struct discard_cmd *dc; dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); @@ -691,7 +691,7 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, init_completion(&dc->wait); mutex_lock(&dcc->cmd_lock); - list_add_tail(&dc->list, cmd_list); + list_add_tail(&dc->list, pend_list); mutex_unlock(&dcc->cmd_lock); atomic_inc(&dcc->discard_cmd_cnt); @@ -747,6 +747,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, bio->bi_end_io = f2fs_submit_discard_endio; bio->bi_opf |= REQ_SYNC; submit_bio(bio); + list_move_tail(&dc->list, &dcc->discard_wait_list); } } else { __remove_discard_cmd(sbi, dc); @@ -793,31 +794,37 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = &(dcc->discard_cmd_list); + struct list_head *pend_list = &(dcc->discard_pend_list); + struct list_head *wait_list = &(dcc->discard_wait_list); struct discard_cmd *dc, *tmp; struct blk_plug plug; mutex_lock(&dcc->cmd_lock); - blk_start_plug(&plug); - - list_for_each_entry_safe(dc, tmp, wait_list, list) { + if (blkaddr == NULL_ADDR) + goto release_discard; - if (blkaddr == NULL_ADDR) { - __submit_discard_cmd(sbi, dc); - continue; - } + list_for_each_entry_safe(dc, tmp, pend_list, list) { + if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) + __punch_discard_cmd(sbi, dc, blkaddr); + } + list_for_each_entry_safe(dc, tmp, wait_list, list) { if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { if (dc->state == D_SUBMIT) wait_for_completion_io(&dc->wait); __punch_discard_cmd(sbi, dc, blkaddr); } } - blk_finish_plug(&plug); +release_discard: /* this comes from f2fs_put_super */ if (blkaddr == NULL_ADDR) { + blk_start_plug(&plug); + list_for_each_entry_safe(dc, tmp, pend_list, list) + __submit_discard_cmd(sbi, dc); + blk_finish_plug(&plug); + list_for_each_entry_safe(dc, tmp, wait_list, list) { wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); @@ -831,7 +838,8 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; - struct list_head *cmd_list = &dcc->discard_cmd_list; + struct list_head *pend_list = &dcc->discard_pend_list; + struct list_head *wait_list = &dcc->discard_wait_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; int iter = 0; @@ -842,13 +850,17 @@ repeat: blk_start_plug(&plug); mutex_lock(&dcc->cmd_lock); - list_for_each_entry_safe(dc, tmp, cmd_list, list) { + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); if (is_idle(sbi)) __submit_discard_cmd(sbi, dc); - if (dc->state == D_PREP && iter++ > DISCARD_ISSUE_RATE) + if (iter++ > DISCARD_ISSUE_RATE) break; + } + + list_for_each_entry_safe(dc, tmp, wait_list, list) { if (dc->state == D_DONE) __remove_discard_cmd(sbi, dc); } @@ -859,8 +871,8 @@ repeat: iter = 0; congestion_wait(BLK_RW_SYNC, HZ/50); - wait_event_interruptible(*q, - kthread_should_stop() || !list_empty(&dcc->discard_cmd_list)); + wait_event_interruptible(*q, kthread_should_stop() || + !list_empty(pend_list) || !list_empty(wait_list)); goto repeat; } @@ -1152,7 +1164,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) return -ENOMEM; INIT_LIST_HEAD(&dcc->discard_entry_list); - INIT_LIST_HEAD(&dcc->discard_cmd_list); + INIT_LIST_HEAD(&dcc->discard_pend_list); + INIT_LIST_HEAD(&dcc->discard_wait_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); atomic_set(&dcc->issing_discard, 0); -- cgit v1.2.3 From d431413f00319919ed2adac0d58225160bd1d8fb Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Apr 2017 18:19:49 +0800 Subject: f2fs: introduce f2fs_wait_discard_bios Split f2fs_wait_discard_bios from f2fs_wait_discard_bio, just for cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- fs/f2fs/segment.c | 37 ++++++++++++++++++++++--------------- fs/f2fs/super.c | 2 +- 3 files changed, 24 insertions(+), 17 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 55a050668d00..7321f061bb28 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2193,7 +2193,7 @@ void destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free); void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr); bool is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr); void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new); -void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr); +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi); void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc); void release_discard_addrs(struct f2fs_sb_info *sbi); int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1a5b3e4770d2..a27dd1679991 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -797,13 +797,9 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) struct list_head *pend_list = &(dcc->discard_pend_list); struct list_head *wait_list = &(dcc->discard_wait_list); struct discard_cmd *dc, *tmp; - struct blk_plug plug; mutex_lock(&dcc->cmd_lock); - if (blkaddr == NULL_ADDR) - goto release_discard; - list_for_each_entry_safe(dc, tmp, pend_list, list) { if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) __punch_discard_cmd(sbi, dc, blkaddr); @@ -817,19 +813,30 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) } } -release_discard: - /* this comes from f2fs_put_super */ - if (blkaddr == NULL_ADDR) { - blk_start_plug(&plug); - list_for_each_entry_safe(dc, tmp, pend_list, list) - __submit_discard_cmd(sbi, dc); - blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); +} - list_for_each_entry_safe(dc, tmp, wait_list, list) { - wait_for_completion_io(&dc->wait); - __remove_discard_cmd(sbi, dc); - } +/* This comes from f2fs_put_super */ +void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *pend_list = &(dcc->discard_pend_list); + struct list_head *wait_list = &(dcc->discard_wait_list); + struct discard_cmd *dc, *tmp; + struct blk_plug plug; + + mutex_lock(&dcc->cmd_lock); + + blk_start_plug(&plug); + list_for_each_entry_safe(dc, tmp, pend_list, list) + __submit_discard_cmd(sbi, dc); + blk_finish_plug(&plug); + + list_for_each_entry_safe(dc, tmp, wait_list, list) { + wait_for_completion_io(&dc->wait); + __remove_discard_cmd(sbi, dc); } + mutex_unlock(&dcc->cmd_lock); } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 21d5eaa2793e..cb65e6d0d275 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -795,7 +795,7 @@ static void f2fs_put_super(struct super_block *sb) } /* be sure to wait for any on-going discard commands */ - f2fs_wait_discard_bio(sbi, NULL_ADDR); + f2fs_wait_discard_bios(sbi); /* write_checkpoint can update stat informaion */ f2fs_destroy_stats(sbi); -- cgit v1.2.3 From fa64a0036cb69c50ac156a841f1b83ef0f84fc16 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 5 Apr 2017 18:26:26 +0800 Subject: f2fs: prevent waiter encountering incorrect discard states In f2fs_submit_discard_endio, we will wake up waiter before setting discard command states, so waiter may use incorrect states. Change the order between complete() and states setting to fix this issue. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a27dd1679991..dca693c16f07 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -717,9 +717,9 @@ static void f2fs_submit_discard_endio(struct bio *bio) { struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private; - complete(&dc->wait); dc->error = bio->bi_error; dc->state = D_DONE; + complete(&dc->wait); bio_put(bio); } @@ -807,8 +807,7 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) list_for_each_entry_safe(dc, tmp, wait_list, list) { if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { - if (dc->state == D_SUBMIT) - wait_for_completion_io(&dc->wait); + wait_for_completion_io(&dc->wait); __punch_discard_cmd(sbi, dc, blkaddr); } } @@ -868,8 +867,10 @@ repeat: } list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (dc->state == D_DONE) + if (dc->state == D_DONE) { + wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); + } } mutex_unlock(&dcc->cmd_lock); -- cgit v1.2.3 From 771a9a71778def4098e8baaa23854d24e33fdb2f Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Wed, 5 Apr 2017 22:49:44 +0300 Subject: f2fs: fix comment on f2fs_flush_merged_bios() after 86531d6b Callers are to unlock the page on failure after 86531d6b. Signed-off-by: Tomohiro Kusumi Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 3d74c0ffa4c7..e984a42eabf4 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -341,7 +341,7 @@ void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi) /* * Fill the locked page with data located in the block address. - * Return unlocked page. + * A caller needs to unlock the page on failure. */ int f2fs_submit_page_bio(struct f2fs_io_info *fio) { -- cgit v1.2.3 From 68afcf2d38cd7544817558757e57e7b9d5c4e72c Mon Sep 17 00:00:00 2001 From: Tomohiro Kusumi Date: Sun, 9 Apr 2017 02:11:36 +0300 Subject: f2fs: guard macro variables with braces Add braces around variables used within macros for those make sense to do it. Many of the macros in f2fs already do this. What this commit doesn't do is anything that changes line# as a result of adding braces, which usually affects the binary via __LINE__. Confirmed no diff in fs/f2fs/f2fs.ko before/after this commit on x86_64, to make sure this has no functional change as well as there's been no unexpected side effect due to callers' arithmetics within the existing code. Signed-off-by: Tomohiro Kusumi Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 36 +++++++++++++------------- fs/f2fs/node.c | 2 +- fs/f2fs/node.h | 22 ++++++++-------- fs/f2fs/segment.h | 76 +++++++++++++++++++++++++++---------------------------- fs/f2fs/xattr.h | 4 +-- 5 files changed, 70 insertions(+), 70 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 7321f061bb28..3e1f8319d557 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -63,7 +63,7 @@ struct f2fs_fault_info { }; extern char *fault_name[FAULT_MAX]; -#define IS_FAULT_SET(fi, type) (fi->inject_type & (1 << (type))) +#define IS_FAULT_SET(fi, type) ((fi)->inject_type & (1 << (type))) #endif /* @@ -89,9 +89,9 @@ extern char *fault_name[FAULT_MAX]; #define F2FS_MOUNT_ADAPTIVE 0x00020000 #define F2FS_MOUNT_LFS 0x00040000 -#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) -#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) -#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) +#define clear_opt(sbi, option) ((sbi)->mount_opt.opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) ((sbi)->mount_opt.opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) ((sbi)->mount_opt.opt & F2FS_MOUNT_##option) #define ver_after(a, b) (typecheck(unsigned long long, a) && \ typecheck(unsigned long long, b) && \ @@ -228,13 +228,13 @@ struct fsync_inode_entry { block_t last_dentry; /* block address locating the last dentry */ }; -#define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats)) -#define sits_in_cursum(jnl) (le16_to_cpu(jnl->n_sits)) +#define nats_in_cursum(jnl) (le16_to_cpu((jnl)->n_nats)) +#define sits_in_cursum(jnl) (le16_to_cpu((jnl)->n_sits)) -#define nat_in_journal(jnl, i) (jnl->nat_j.entries[i].ne) -#define nid_in_journal(jnl, i) (jnl->nat_j.entries[i].nid) -#define sit_in_journal(jnl, i) (jnl->sit_j.entries[i].se) -#define segno_in_journal(jnl, i) (jnl->sit_j.entries[i].segno) +#define nat_in_journal(jnl, i) ((jnl)->nat_j.entries[i].ne) +#define nid_in_journal(jnl, i) ((jnl)->nat_j.entries[i].nid) +#define sit_in_journal(jnl, i) ((jnl)->sit_j.entries[i].se) +#define segno_in_journal(jnl, i) ((jnl)->sit_j.entries[i].segno) #define MAX_NAT_JENTRIES(jnl) (NAT_JOURNAL_ENTRIES - nats_in_cursum(jnl)) #define MAX_SIT_JENTRIES(jnl) (SIT_JOURNAL_ENTRIES - sits_in_cursum(jnl)) @@ -745,7 +745,7 @@ struct f2fs_io_info { bool submitted; /* indicate IO submission */ }; -#define is_read_io(rw) (rw == READ) +#define is_read_io(rw) ((rw) == READ) struct f2fs_bio_info { struct f2fs_sb_info *sbi; /* f2fs superblock */ struct bio *bio; /* bios to merge */ @@ -983,8 +983,8 @@ static inline bool time_to_inject(struct f2fs_sb_info *sbi, int type) * and the return value is in kbytes. s is of struct f2fs_sb_info. */ #define BD_PART_WRITTEN(s) \ -(((u64)part_stat_read(s->sb->s_bdev->bd_part, sectors[1]) - \ - s->sectors_written_start) >> 1) +(((u64)part_stat_read((s)->sb->s_bdev->bd_part, sectors[1]) - \ + (s)->sectors_written_start) >> 1) static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type) { @@ -2437,8 +2437,8 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) #define stat_inc_seg_count(sbi, type, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ - (si)->tot_segs++; \ - if (type == SUM_TYPE_DATA) { \ + si->tot_segs++; \ + if ((type) == SUM_TYPE_DATA) { \ si->data_segs++; \ si->bg_data_segs += (gc_type == BG_GC) ? 1 : 0; \ } else { \ @@ -2448,14 +2448,14 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) } while (0) #define stat_inc_tot_blk_count(si, blks) \ - (si->tot_blks += (blks)) + ((si)->tot_blks += (blks)) #define stat_inc_data_blk_count(sbi, blks, gc_type) \ do { \ struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->data_blks += (blks); \ - si->bg_data_blks += (gc_type == BG_GC) ? (blks) : 0; \ + si->bg_data_blks += ((gc_type) == BG_GC) ? (blks) : 0; \ } while (0) #define stat_inc_node_blk_count(sbi, blks, gc_type) \ @@ -2463,7 +2463,7 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi) struct f2fs_stat_info *si = F2FS_STAT(sbi); \ stat_inc_tot_blk_count(si, blks); \ si->node_blks += (blks); \ - si->bg_node_blks += (gc_type == BG_GC) ? (blks) : 0; \ + si->bg_node_blks += ((gc_type) == BG_GC) ? (blks) : 0; \ } while (0) int f2fs_build_stats(struct f2fs_sb_info *sbi); diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 6e87178d34a2..9422dd252813 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -22,7 +22,7 @@ #include "trace.h" #include -#define on_build_free_nids(nmi) mutex_is_locked(&nm_i->build_lock) +#define on_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock) static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h index ebed0240aa53..558048e33cf9 100644 --- a/fs/f2fs/node.h +++ b/fs/f2fs/node.h @@ -9,10 +9,10 @@ * published by the Free Software Foundation. */ /* start node id of a node block dedicated to the given node id */ -#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) +#define START_NID(nid) (((nid) / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) /* node block offset on the NAT area dedicated to the given start node id */ -#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) +#define NAT_BLOCK_OFFSET(start_nid) ((start_nid) / NAT_ENTRY_PER_BLOCK) /* # of pages to perform synchronous readahead before building free nids */ #define FREE_NID_PAGES 8 @@ -62,16 +62,16 @@ struct nat_entry { struct node_info ni; /* in-memory node information */ }; -#define nat_get_nid(nat) (nat->ni.nid) -#define nat_set_nid(nat, n) (nat->ni.nid = n) -#define nat_get_blkaddr(nat) (nat->ni.blk_addr) -#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) -#define nat_get_ino(nat) (nat->ni.ino) -#define nat_set_ino(nat, i) (nat->ni.ino = i) -#define nat_get_version(nat) (nat->ni.version) -#define nat_set_version(nat, v) (nat->ni.version = v) +#define nat_get_nid(nat) ((nat)->ni.nid) +#define nat_set_nid(nat, n) ((nat)->ni.nid = (n)) +#define nat_get_blkaddr(nat) ((nat)->ni.blk_addr) +#define nat_set_blkaddr(nat, b) ((nat)->ni.blk_addr = (b)) +#define nat_get_ino(nat) ((nat)->ni.ino) +#define nat_set_ino(nat, i) ((nat)->ni.ino = (i)) +#define nat_get_version(nat) ((nat)->ni.version) +#define nat_set_version(nat, v) ((nat)->ni.version = (v)) -#define inc_node_version(version) (++version) +#define inc_node_version(version) (++(version)) static inline void copy_node_info(struct node_info *dst, struct node_info *src) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 57e36c1ce7bd..b8a1bac9355d 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -21,78 +21,78 @@ #define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */ /* L: Logical segment # in volume, R: Relative segment # in main area */ -#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) -#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) +#define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno) +#define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno) -#define IS_DATASEG(t) (t <= CURSEG_COLD_DATA) -#define IS_NODESEG(t) (t >= CURSEG_HOT_NODE) +#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA) +#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE) #define IS_CURSEG(sbi, seg) \ - ((seg == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ - (seg == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) + (((seg) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ + ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) #define IS_CURSEC(sbi, secno) \ - ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ - sbi->segs_per_sec) || \ - (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ - sbi->segs_per_sec)) \ + (((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ + (sbi)->segs_per_sec) || \ + ((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ + (sbi)->segs_per_sec)) \ #define MAIN_BLKADDR(sbi) (SM_I(sbi)->main_blkaddr) #define SEG0_BLKADDR(sbi) (SM_I(sbi)->seg0_blkaddr) #define MAIN_SEGS(sbi) (SM_I(sbi)->main_segments) -#define MAIN_SECS(sbi) (sbi->total_sections) +#define MAIN_SECS(sbi) ((sbi)->total_sections) #define TOTAL_SEGS(sbi) (SM_I(sbi)->segment_count) -#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << sbi->log_blocks_per_seg) +#define TOTAL_BLKS(sbi) (TOTAL_SEGS(sbi) << (sbi)->log_blocks_per_seg) #define MAX_BLKADDR(sbi) (SEG0_BLKADDR(sbi) + TOTAL_BLKS(sbi)) -#define SEGMENT_SIZE(sbi) (1ULL << (sbi->log_blocksize + \ - sbi->log_blocks_per_seg)) +#define SEGMENT_SIZE(sbi) (1ULL << ((sbi)->log_blocksize + \ + (sbi)->log_blocks_per_seg)) #define START_BLOCK(sbi, segno) (SEG0_BLKADDR(sbi) + \ - (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) + (GET_R2L_SEGNO(FREE_I(sbi), segno) << (sbi)->log_blocks_per_seg)) #define NEXT_FREE_BLKADDR(sbi, curseg) \ - (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) + (START_BLOCK(sbi, (curseg)->segno) + (curseg)->next_blkoff) #define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) ((blk_addr) - SEG0_BLKADDR(sbi)) #define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> (sbi)->log_blocks_per_seg) #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ - (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1)) + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & ((sbi)->blocks_per_seg - 1)) #define GET_SEGNO(sbi, blk_addr) \ - (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ + ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define GET_SECNO(sbi, segno) \ - ((segno) / sbi->segs_per_sec) + ((segno) / (sbi)->segs_per_sec) #define GET_ZONENO_FROM_SEGNO(sbi, segno) \ - ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) + (((segno) / (sbi)->segs_per_sec) / (sbi)->secs_per_zone) #define GET_SUM_BLOCK(sbi, segno) \ - ((sbi->sm_info->ssa_blkaddr) + segno) + ((sbi)->sm_info->ssa_blkaddr + (segno)) #define GET_SUM_TYPE(footer) ((footer)->entry_type) -#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) +#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = (type)) #define SIT_ENTRY_OFFSET(sit_i, segno) \ - (segno % sit_i->sents_per_block) + ((segno) % (sit_i)->sents_per_block) #define SIT_BLOCK_OFFSET(segno) \ - (segno / SIT_ENTRY_PER_BLOCK) + ((segno) / SIT_ENTRY_PER_BLOCK) #define START_SEGNO(segno) \ (SIT_BLOCK_OFFSET(segno) * SIT_ENTRY_PER_BLOCK) #define SIT_BLK_CNT(sbi) \ @@ -103,7 +103,7 @@ #define SECTOR_FROM_BLOCK(blk_addr) \ (((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK) #define SECTOR_TO_BLOCK(sectors) \ - (sectors >> F2FS_LOG_SECTORS_PER_BLOCK) + ((sectors) >> F2FS_LOG_SECTORS_PER_BLOCK) /* * indicate a block allocation direction: RIGHT and LEFT. diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h index 6afcee35ebeb..dbcd1d16e669 100644 --- a/fs/f2fs/xattr.h +++ b/fs/f2fs/xattr.h @@ -58,10 +58,10 @@ struct f2fs_xattr_entry { #define XATTR_FIRST_ENTRY(ptr) (XATTR_ENTRY(XATTR_HDR(ptr) + 1)) #define XATTR_ROUND (3) -#define XATTR_ALIGN(size) ((size + XATTR_ROUND) & ~XATTR_ROUND) +#define XATTR_ALIGN(size) (((size) + XATTR_ROUND) & ~XATTR_ROUND) #define ENTRY_SIZE(entry) (XATTR_ALIGN(sizeof(struct f2fs_xattr_entry) + \ - entry->e_name_len + le16_to_cpu(entry->e_value_size))) + (entry)->e_name_len + le16_to_cpu((entry)->e_value_size))) #define XATTR_NEXT_ENTRY(entry) ((struct f2fs_xattr_entry *)((char *)(entry) +\ ENTRY_SIZE(entry))) -- cgit v1.2.3 From 63fcf8e8d6bff2e8228f348c78b4a4fcca44401a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Apr 2017 14:27:07 -0700 Subject: f2fs: use segment number for get_valid_blocks This patch fixes to submit a segment number for get_valid_blocks. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index b8a1bac9355d..39ef9cc0093b 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -80,6 +80,8 @@ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) #define GET_SECNO(sbi, segno) \ ((segno) / (sbi)->segs_per_sec) +#define GET_SEGNO_FROM_SECNO(sbi, secno) \ + ((secno) * (sbi)->segs_per_sec) #define GET_ZONENO_FROM_SEGNO(sbi, segno) \ (((segno) / (sbi)->segs_per_sec) / (sbi)->secs_per_zone) @@ -720,8 +722,8 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, unsigned int secno) { - if (get_valid_blocks(sbi, secno, sbi->segs_per_sec) >= - sbi->fggc_threshold) + if (get_valid_blocks(sbi, GET_SEGNO_FROM_SECNO(sbi, secno), + sbi->segs_per_sec) >= sbi->fggc_threshold) return true; return false; } -- cgit v1.2.3 From 302bd34882b1e20797f08cc13ef060ec972d0acb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Apr 2017 14:33:22 -0700 Subject: f2fs: clean up get_valid_blocks with consistent parameter This patch cleans up get_valid_blocks, which has no functional change. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 2 +- fs/f2fs/gc.c | 8 ++++---- fs/f2fs/segment.c | 8 ++++---- fs/f2fs/segment.h | 8 ++++---- fs/f2fs/super.c | 4 ++-- 5 files changed, 15 insertions(+), 15 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 906f627e44fc..dc16a52db275 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -137,7 +137,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; hblks_per_sec = blks_per_sec / 2; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { - vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + vblocks = get_valid_blocks(sbi, segno, true); dist = abs(vblocks - hblks_per_sec); bimodal += dist * dist; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c52656ccbde5..439887c3aaf4 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -229,7 +229,7 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) for (i = 0; i < sbi->segs_per_sec; i++) mtime += get_seg_entry(sbi, start + i)->mtime; - vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec); + vblocks = get_valid_blocks(sbi, segno, true); mtime = div_u64(mtime, sbi->segs_per_sec); vblocks = div_u64(vblocks, sbi->segs_per_sec); @@ -252,7 +252,7 @@ static unsigned int get_greedy_cost(struct f2fs_sb_info *sbi, unsigned int segno) { unsigned int valid_blocks = - get_valid_blocks(sbi, segno, sbi->segs_per_sec); + get_valid_blocks(sbi, segno, true); return IS_DATASEG(get_seg_entry(sbi, segno)->type) ? valid_blocks * 2 : valid_blocks; @@ -897,7 +897,7 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, GET_SUM_BLOCK(sbi, segno)); f2fs_put_page(sum_page, 0); - if (get_valid_blocks(sbi, segno, 1) == 0 || + if (get_valid_blocks(sbi, segno, false) == 0 || !PageUptodate(sum_page) || unlikely(f2fs_cp_error(sbi))) goto next; @@ -931,7 +931,7 @@ next: blk_finish_plug(&plug); if (gc_type == FG_GC && - get_valid_blocks(sbi, start_segno, sbi->segs_per_sec) == 0) + get_valid_blocks(sbi, start_segno, true) == 0) sec_freed = 1; stat_inc_call_count(sbi->stat_info); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dca693c16f07..ae8331802d05 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -636,7 +636,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t])) dirty_i->nr_dirty[t]--; - if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0) + if (get_valid_blocks(sbi, segno, true) == 0) clear_bit(GET_SECNO(sbi, segno), dirty_i->victim_secmap); } @@ -657,7 +657,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_lock(&dirty_i->seglist_lock); - valid_blocks = get_valid_blocks(sbi, segno, 0); + valid_blocks = get_valid_blocks(sbi, segno, false); if (valid_blocks == 0) { __locate_dirty_segment(sbi, segno, PRE); @@ -1109,7 +1109,7 @@ next: secno = GET_SECNO(sbi, start); start_segno = secno * sbi->segs_per_sec; if (!IS_CURSEC(sbi, secno) && - !get_valid_blocks(sbi, start, sbi->segs_per_sec)) + !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), sbi->segs_per_sec << sbi->log_blocks_per_seg); @@ -2859,7 +2859,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi) if (segno >= MAIN_SEGS(sbi)) break; offset = segno + 1; - valid_blocks = get_valid_blocks(sbi, segno, 0); + valid_blocks = get_valid_blocks(sbi, segno, false); if (valid_blocks == sbi->blocks_per_seg || !valid_blocks) continue; if (valid_blocks > sbi->blocks_per_seg) { diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 39ef9cc0093b..053166038bfe 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -309,13 +309,13 @@ static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, } static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, - unsigned int segno, int section) + unsigned int segno, bool use_section) { /* * In order to get # of valid blocks in a section instantly from many * segments, f2fs manages two counting structures separately. */ - if (section > 1) + if (use_section && sbi->segs_per_sec > 1) return get_sec_entry(sbi, segno)->valid_blocks; else return get_seg_entry(sbi, segno)->valid_blocks; @@ -722,8 +722,8 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, unsigned int secno) { - if (get_valid_blocks(sbi, GET_SEGNO_FROM_SECNO(sbi, secno), - sbi->segs_per_sec) >= sbi->fggc_threshold) + if (get_valid_blocks(sbi, GET_SEGNO_FROM_SECNO(sbi, secno), true) >= + sbi->fggc_threshold) return true; return false; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index cb65e6d0d275..97c07a5153e9 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -996,7 +996,7 @@ static int segment_info_seq_show(struct seq_file *seq, void *offset) if ((i % 10) == 0) seq_printf(seq, "%-10d", i); seq_printf(seq, "%d|%-3u", se->type, - get_valid_blocks(sbi, i, 1)); + get_valid_blocks(sbi, i, false)); if ((i % 10) == 9 || i == (total_segs - 1)) seq_putc(seq, '\n'); else @@ -1022,7 +1022,7 @@ static int segment_bits_seq_show(struct seq_file *seq, void *offset) seq_printf(seq, "%-10d", i); seq_printf(seq, "%d|%-3u|", se->type, - get_valid_blocks(sbi, i, 1)); + get_valid_blocks(sbi, i, false)); for (j = 0; j < SIT_VBLOCK_MAP_SIZE; j++) seq_printf(seq, " %.2x", se->cur_valid_map[j]); seq_putc(seq, '\n'); -- cgit v1.2.3 From 4ddb1a4d4dc20642073b7d92400a67b67601fe6f Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Apr 2017 15:08:17 -0700 Subject: f2fs: clean up some macros in terms of GET_SEGNO This patch cleans several macros by introducing: - BLKS_PER_SEC - GET_SEC_FROM_SEG - GET_SEG_FROM_SEC - GET_ZONE_FROM_SEC - GET_ZONE_FROM_SEG Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 6 +++--- fs/f2fs/f2fs.h | 7 +++---- fs/f2fs/file.c | 3 +-- fs/f2fs/gc.c | 17 ++++++++--------- fs/f2fs/segment.c | 20 ++++++++++---------- fs/f2fs/segment.h | 34 ++++++++++++++++++++-------------- 6 files changed, 45 insertions(+), 42 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index dc16a52db275..692beff66bf8 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -109,8 +109,8 @@ static void update_general_status(struct f2fs_sb_info *sbi) for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) { struct curseg_info *curseg = CURSEG_I(sbi, i); si->curseg[i] = curseg->segno; - si->cursec[i] = curseg->segno / sbi->segs_per_sec; - si->curzone[i] = si->cursec[i] / sbi->secs_per_zone; + si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno); + si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]); } for (i = 0; i < 2; i++) { @@ -134,7 +134,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi) bimodal = 0; total_vblocks = 0; - blks_per_sec = sbi->segs_per_sec * sbi->blocks_per_seg; + blks_per_sec = BLKS_PER_SEC(sbi); hblks_per_sec = blks_per_sec / 2; for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) { vblocks = get_valid_blocks(sbi, segno, true); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 3e1f8319d557..4615c96f8e39 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -135,12 +135,11 @@ enum { #define DEF_BATCHED_TRIM_SECTIONS 2048 #define BATCHED_TRIM_SEGMENTS(sbi) \ - (SM_I(sbi)->trim_sections * (sbi)->segs_per_sec) + (GET_SEG_FROM_SEC(sbi, SM_I(sbi)->trim_sections)) #define BATCHED_TRIM_BLOCKS(sbi) \ (BATCHED_TRIM_SEGMENTS(sbi) << (sbi)->log_blocks_per_seg) -#define MAX_DISCARD_BLOCKS(sbi) \ - ((1 << (sbi)->log_blocks_per_seg) * (sbi)->segs_per_sec) -#define DISCARD_ISSUE_RATE 8 +#define MAX_DISCARD_BLOCKS(sbi) BLKS_PER_SEC(sbi) +#define DISCARD_ISSUE_RATE 8 #define DEF_CP_INTERVAL 60 /* 60 secs */ #define DEF_IDLE_INTERVAL 5 /* 5 secs */ diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index f3be240ef129..4731eb587e06 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1892,7 +1892,6 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, pgoff_t pg_start, pg_end; unsigned int blk_per_seg = sbi->blocks_per_seg; unsigned int total = 0, sec_num; - unsigned int pages_per_sec = sbi->segs_per_sec * blk_per_seg; block_t blk_end = 0; bool fragmented = false; int err; @@ -1956,7 +1955,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, map.m_lblk = pg_start; map.m_len = pg_end - pg_start; - sec_num = (map.m_len + pages_per_sec - 1) / pages_per_sec; + sec_num = (map.m_len + BLKS_PER_SEC(sbi) - 1) / BLKS_PER_SEC(sbi); /* * make sure there are enough free section for LFS allocation, this can diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 439887c3aaf4..e2f9b2b12b74 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -211,7 +211,7 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) continue; clear_bit(secno, dirty_i->victim_secmap); - return secno * sbi->segs_per_sec; + return GET_SEG_FROM_SEC(sbi, secno); } return NULL_SEGNO; } @@ -219,8 +219,8 @@ static unsigned int check_bg_victims(struct f2fs_sb_info *sbi) static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - unsigned int secno = GET_SECNO(sbi, segno); - unsigned int start = secno * sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start = GET_SEG_FROM_SEC(sbi, secno); unsigned long long mtime = 0; unsigned int vblocks; unsigned char age = 0; @@ -343,7 +343,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, nsearched++; } - secno = GET_SECNO(sbi, segno); + secno = GET_SEC_FROM_SEG(sbi, segno); if (sec_usage_check(sbi, secno)) goto next; @@ -372,7 +372,7 @@ next: if (p.min_segno != NULL_SEGNO) { got_it: if (p.alloc_mode == LFS) { - secno = GET_SECNO(sbi, p.min_segno); + secno = GET_SEC_FROM_SEG(sbi, p.min_segno); if (gc_type == FG_GC) sbi->cur_victim_sec = secno; else @@ -1006,7 +1006,7 @@ stop: void build_gc_manager(struct f2fs_sb_info *sbi) { - u64 main_count, resv_count, ovp_count, blocks_per_sec; + u64 main_count, resv_count, ovp_count; DIRTY_I(sbi)->v_ops = &default_v_ops; @@ -1014,8 +1014,7 @@ void build_gc_manager(struct f2fs_sb_info *sbi) main_count = SM_I(sbi)->main_segments << sbi->log_blocks_per_seg; resv_count = SM_I(sbi)->reserved_segments << sbi->log_blocks_per_seg; ovp_count = SM_I(sbi)->ovp_segments << sbi->log_blocks_per_seg; - blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec; - sbi->fggc_threshold = div64_u64((main_count - ovp_count) * blocks_per_sec, - (main_count - resv_count)); + sbi->fggc_threshold = div64_u64((main_count - ovp_count) * + BLKS_PER_SEC(sbi), (main_count - resv_count)); } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index ae8331802d05..471ea9944412 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -637,7 +637,7 @@ static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno, dirty_i->nr_dirty[t]--; if (get_valid_blocks(sbi, segno, true) == 0) - clear_bit(GET_SECNO(sbi, segno), + clear_bit(GET_SEC_FROM_SEG(sbi, segno), dirty_i->victim_secmap); } } @@ -1106,8 +1106,8 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) continue; } next: - secno = GET_SECNO(sbi, start); - start_segno = secno * sbi->segs_per_sec; + secno = GET_SEC_FROM_SEG(sbi, start); + start_segno = GET_SEG_FROM_SEC(sbi, secno); if (!IS_CURSEC(sbi, secno) && !get_valid_blocks(sbi, start, true)) f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno), @@ -1462,8 +1462,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, struct free_segmap_info *free_i = FREE_I(sbi); unsigned int segno, secno, zoneno; unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone; - unsigned int hint = *newseg / sbi->segs_per_sec; - unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg); + unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg); + unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg); unsigned int left_start = hint; bool init = true; int go_left = 0; @@ -1473,8 +1473,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi, if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) { segno = find_next_zero_bit(free_i->free_segmap, - (hint + 1) * sbi->segs_per_sec, *newseg + 1); - if (segno < (hint + 1) * sbi->segs_per_sec) + GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1); + if (segno < GET_SEG_FROM_SEC(sbi, hint + 1)) goto got_it; } find_other_zone: @@ -1505,8 +1505,8 @@ find_other_zone: secno = left_start; skip_left: hint = secno; - segno = secno * sbi->segs_per_sec; - zoneno = secno / sbi->secs_per_zone; + segno = GET_SEG_FROM_SEC(sbi, secno); + zoneno = GET_ZONE_FROM_SEC(sbi, secno); /* give up on finding another zone */ if (!init) @@ -1550,7 +1550,7 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) struct summary_footer *sum_footer; curseg->segno = curseg->next_segno; - curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno); + curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno); curseg->next_blkoff = 0; curseg->next_segno = NULL_SEGNO; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 053166038bfe..5f6ef163aa8f 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -78,12 +78,16 @@ ((((blk_addr) == NULL_ADDR) || ((blk_addr) == NEW_ADDR)) ? \ NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ GET_SEGNO_FROM_SEG0(sbi, blk_addr))) -#define GET_SECNO(sbi, segno) \ +#define BLKS_PER_SEC(sbi) \ + ((sbi)->segs_per_sec * (sbi)->blocks_per_seg) +#define GET_SEC_FROM_SEG(sbi, segno) \ ((segno) / (sbi)->segs_per_sec) -#define GET_SEGNO_FROM_SECNO(sbi, secno) \ +#define GET_SEG_FROM_SEC(sbi, secno) \ ((secno) * (sbi)->segs_per_sec) -#define GET_ZONENO_FROM_SEGNO(sbi, segno) \ - (((segno) / (sbi)->segs_per_sec) / (sbi)->secs_per_zone) +#define GET_ZONE_FROM_SEC(sbi, secno) \ + ((secno) / (sbi)->secs_per_zone) +#define GET_ZONE_FROM_SEG(sbi, segno) \ + GET_ZONE_FROM_SEC(sbi, GET_SEC_FROM_SEG(sbi, segno)) #define GET_SUM_BLOCK(sbi, segno) \ ((sbi)->sm_info->ssa_blkaddr + (segno)) @@ -305,7 +309,7 @@ static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, unsigned int segno) { struct sit_info *sit_i = SIT_I(sbi); - return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; + return &sit_i->sec_entries[GET_SEC_FROM_SEG(sbi, segno)]; } static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, @@ -360,8 +364,8 @@ static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; - unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; spin_lock(&free_i->segmap_lock); @@ -381,7 +385,8 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + set_bit(segno, free_i->free_segmap); free_i->free_segments--; if (!test_and_set_bit(secno, free_i->free_secmap)) @@ -392,8 +397,8 @@ static inline void __set_test_and_free(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; - unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno); unsigned int next; spin_lock(&free_i->segmap_lock); @@ -414,7 +419,8 @@ static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, unsigned int segno) { struct free_segmap_info *free_i = FREE_I(sbi); - unsigned int secno = segno / sbi->segs_per_sec; + unsigned int secno = GET_SEC_FROM_SEG(sbi, segno); + spin_lock(&free_i->segmap_lock); if (!test_and_set_bit(segno, free_i->free_segmap)) { free_i->free_segments--; @@ -479,12 +485,12 @@ static inline int overprovision_segments(struct f2fs_sb_info *sbi) static inline int overprovision_sections(struct f2fs_sb_info *sbi) { - return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; + return GET_SEC_FROM_SEG(sbi, (unsigned int)overprovision_segments(sbi)); } static inline int reserved_sections(struct f2fs_sb_info *sbi) { - return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; + return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi)); } static inline bool need_SSR(struct f2fs_sb_info *sbi) @@ -722,7 +728,7 @@ static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) static inline bool no_fggc_candidate(struct f2fs_sb_info *sbi, unsigned int secno) { - if (get_valid_blocks(sbi, GET_SEGNO_FROM_SECNO(sbi, secno), true) >= + if (get_valid_blocks(sbi, GET_SEG_FROM_SEC(sbi, secno), true) >= sbi->fggc_threshold) return true; return false; -- cgit v1.2.3 From 8fd5a37efa0b036353df253e20dabe8773c039cd Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 7 Apr 2017 17:25:54 -0700 Subject: f2fs: avoid frequent checkpoint during f2fs_gc Now we're doing SSR aggressively more than ever before, so once we reach to the reserved_segment, f2fs_balance_fs will call f2fs_gc, which triggers checkpoint everytime. We actually must avoid that. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index e2f9b2b12b74..9172112d6246 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -966,9 +966,11 @@ gc_more: * threshold, we can make them free by checkpoint. Then, we * secure free segments which doesn't need fggc any more. */ - ret = write_checkpoint(sbi, &cpc); - if (ret) - goto stop; + if (prefree_segments(sbi)) { + ret = write_checkpoint(sbi, &cpc); + if (ret) + goto stop; + } if (has_not_enough_free_secs(sbi, 0, 0)) gc_type = FG_GC; } -- cgit v1.2.3 From 54c2258cd63a81283cf88b2bd9457c61ee2cd66f Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 11 Apr 2017 09:25:22 +0800 Subject: f2fs: extract rb-tree operation infrastructure rb-tree lookup/update functions are deeply coupled into extent cache codes, it's very hard to reuse these basic functions, this patch extracts common rb-tree operation infrastructure for latter reusing. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 291 +++++++++++++++++++++++++++---------------------- fs/f2fs/f2fs.h | 20 +++- 2 files changed, 179 insertions(+), 132 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index c6934f014e0f..68e649a31c7d 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -18,6 +18,146 @@ #include "node.h" #include +static struct rb_entry *__lookup_rb_tree_fast(struct rb_entry *cached_re, + unsigned int ofs) +{ + if (cached_re) { + if (cached_re->ofs <= ofs && + cached_re->ofs + cached_re->len > ofs) { + return cached_re; + } + } + return NULL; +} + +static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, + unsigned int ofs) +{ + struct rb_node *node = root->rb_node; + struct rb_entry *re; + + while (node) { + re = rb_entry(node, struct rb_entry, rb_node); + + if (ofs < re->ofs) + node = node->rb_left; + else if (ofs >= re->ofs + re->len) + node = node->rb_right; + else + return re; + } + return NULL; +} + +static struct rb_entry *__lookup_rb_tree(struct rb_root *root, + struct rb_entry *cached_re, unsigned int ofs) +{ + struct rb_entry *re; + + re = __lookup_rb_tree_fast(cached_re, ofs); + if (!re) + return __lookup_rb_tree_slow(root, ofs); + + return re; +} + +static struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, + struct rb_root *root, struct rb_node **parent, + unsigned int ofs) +{ + struct rb_node **p = &root->rb_node; + struct rb_entry *re; + + while (*p) { + *parent = *p; + re = rb_entry(*parent, struct rb_entry, rb_node); + + if (ofs < re->ofs) + p = &(*p)->rb_left; + else if (ofs >= re->ofs + re->len) + p = &(*p)->rb_right; + else + f2fs_bug_on(sbi, 1); + } + + return p; +} + +/* + * lookup rb entry in position of @ofs in rb-tree, + * if hit, return the entry, otherwise, return NULL + * @prev_ex: extent before ofs + * @next_ex: extent after ofs + * @insert_p: insert point for new extent at ofs + * in order to simpfy the insertion after. + * tree must stay unchanged between lookup and insertion. + */ +static struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, + struct rb_entry *cached_re, + unsigned int ofs, + struct rb_entry **prev_entry, + struct rb_entry **next_entry, + struct rb_node ***insert_p, + struct rb_node **insert_parent) +{ + struct rb_node **pnode = &root->rb_node; + struct rb_node *parent = NULL, *tmp_node; + struct rb_entry *re = cached_re; + + *insert_p = NULL; + *insert_parent = NULL; + *prev_entry = NULL; + *next_entry = NULL; + + if (RB_EMPTY_ROOT(root)) + return NULL; + + if (re) { + if (re->ofs <= ofs && re->ofs + re->len > ofs) + goto lookup_neighbors; + } + + while (*pnode) { + parent = *pnode; + re = rb_entry(*pnode, struct rb_entry, rb_node); + + if (ofs < re->ofs) + pnode = &(*pnode)->rb_left; + else if (ofs >= re->ofs + re->len) + pnode = &(*pnode)->rb_right; + else + goto lookup_neighbors; + } + + *insert_p = pnode; + *insert_parent = parent; + + re = rb_entry(parent, struct rb_entry, rb_node); + tmp_node = parent; + if (parent && ofs > re->ofs) + tmp_node = rb_next(parent); + *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + + tmp_node = parent; + if (parent && ofs < re->ofs) + tmp_node = rb_prev(parent); + *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + return NULL; + +lookup_neighbors: + if (ofs == re->ofs) { + /* lookup prev node for merging backward later */ + tmp_node = rb_prev(&re->rb_node); + *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + } + if (ofs == re->ofs + re->len - 1) { + /* lookup next node for merging frontward later */ + tmp_node = rb_next(&re->rb_node); + *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); + } + return re; +} + static struct kmem_cache *extent_tree_slab; static struct kmem_cache *extent_node_slab; @@ -102,36 +242,6 @@ static struct extent_tree *__grab_extent_tree(struct inode *inode) return et; } -static struct extent_node *__lookup_extent_tree(struct f2fs_sb_info *sbi, - struct extent_tree *et, unsigned int fofs) -{ - struct rb_node *node = et->root.rb_node; - struct extent_node *en = et->cached_en; - - if (en) { - struct extent_info *cei = &en->ei; - - if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) { - stat_inc_cached_node_hit(sbi); - return en; - } - } - - while (node) { - en = rb_entry(node, struct extent_node, rb_node); - - if (fofs < en->ei.fofs) { - node = node->rb_left; - } else if (fofs >= en->ei.fofs + en->ei.len) { - node = node->rb_right; - } else { - stat_inc_rbtree_node_hit(sbi); - return en; - } - } - return NULL; -} - static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi, struct extent_tree *et, struct extent_info *ei) { @@ -237,17 +347,24 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, goto out; } - en = __lookup_extent_tree(sbi, et, pgofs); - if (en) { - *ei = en->ei; - spin_lock(&sbi->extent_lock); - if (!list_empty(&en->list)) { - list_move_tail(&en->list, &sbi->extent_list); - et->cached_en = en; - } - spin_unlock(&sbi->extent_lock); - ret = true; + en = (struct extent_node *)__lookup_rb_tree(&et->root, + (struct rb_entry *)et->cached_en, pgofs); + if (!en) + goto out; + + if (en == et->cached_en) + stat_inc_cached_node_hit(sbi); + else + stat_inc_rbtree_node_hit(sbi); + + *ei = en->ei; + spin_lock(&sbi->extent_lock); + if (!list_empty(&en->list)) { + list_move_tail(&en->list, &sbi->extent_list); + et->cached_en = en; } + spin_unlock(&sbi->extent_lock); + ret = true; out: stat_inc_total_hit(sbi); read_unlock(&et->lock); @@ -256,83 +373,6 @@ out: return ret; } - -/* - * lookup extent at @fofs, if hit, return the extent - * if not, return NULL and - * @prev_ex: extent before fofs - * @next_ex: extent after fofs - * @insert_p: insert point for new extent at fofs - * in order to simpfy the insertion after. - * tree must stay unchanged between lookup and insertion. - */ -static struct extent_node *__lookup_extent_tree_ret(struct extent_tree *et, - unsigned int fofs, - struct extent_node **prev_ex, - struct extent_node **next_ex, - struct rb_node ***insert_p, - struct rb_node **insert_parent) -{ - struct rb_node **pnode = &et->root.rb_node; - struct rb_node *parent = NULL, *tmp_node; - struct extent_node *en = et->cached_en; - - *insert_p = NULL; - *insert_parent = NULL; - *prev_ex = NULL; - *next_ex = NULL; - - if (RB_EMPTY_ROOT(&et->root)) - return NULL; - - if (en) { - struct extent_info *cei = &en->ei; - - if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) - goto lookup_neighbors; - } - - while (*pnode) { - parent = *pnode; - en = rb_entry(*pnode, struct extent_node, rb_node); - - if (fofs < en->ei.fofs) - pnode = &(*pnode)->rb_left; - else if (fofs >= en->ei.fofs + en->ei.len) - pnode = &(*pnode)->rb_right; - else - goto lookup_neighbors; - } - - *insert_p = pnode; - *insert_parent = parent; - - en = rb_entry(parent, struct extent_node, rb_node); - tmp_node = parent; - if (parent && fofs > en->ei.fofs) - tmp_node = rb_next(parent); - *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); - - tmp_node = parent; - if (parent && fofs < en->ei.fofs) - tmp_node = rb_prev(parent); - *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); - return NULL; - -lookup_neighbors: - if (fofs == en->ei.fofs) { - /* lookup prev node for merging backward later */ - tmp_node = rb_prev(&en->rb_node); - *prev_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); - } - if (fofs == en->ei.fofs + en->ei.len - 1) { - /* lookup next node for merging frontward later */ - tmp_node = rb_next(&en->rb_node); - *next_ex = rb_entry_safe(tmp_node, struct extent_node, rb_node); - } - return en; -} - static struct extent_node *__try_merge_extent_node(struct inode *inode, struct extent_tree *et, struct extent_info *ei, struct extent_node *prev_ex, @@ -387,17 +427,7 @@ static struct extent_node *__insert_extent_tree(struct inode *inode, goto do_insert; } - while (*p) { - parent = *p; - en = rb_entry(parent, struct extent_node, rb_node); - - if (ei->fofs < en->ei.fofs) - p = &(*p)->rb_left; - else if (ei->fofs >= en->ei.fofs + en->ei.len) - p = &(*p)->rb_right; - else - f2fs_bug_on(sbi, 1); - } + p = __lookup_rb_tree_for_insert(sbi, &et->root, &parent, ei->fofs); do_insert: en = __attach_extent_node(sbi, et, ei, parent, p); if (!en) @@ -447,7 +477,10 @@ static void f2fs_update_extent_tree_range(struct inode *inode, __drop_largest_extent(inode, fofs, len); /* 1. lookup first extent node in range [fofs, fofs + len - 1] */ - en = __lookup_extent_tree_ret(et, fofs, &prev_en, &next_en, + en = (struct extent_node *)__lookup_rb_tree_ret(&et->root, + (struct rb_entry *)et->cached_en, fofs, + (struct rb_entry **)&prev_en, + (struct rb_entry **)&next_en, &insert_p, &insert_parent); if (!en) en = next_en; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 4615c96f8e39..562db8989a4e 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -377,16 +377,30 @@ enum { /* number of extent info in extent cache we try to shrink */ #define EXTENT_CACHE_SHRINK_NUMBER 128 +struct rb_entry { + struct rb_node rb_node; /* rb node located in rb-tree */ + unsigned int ofs; /* start offset of the entry */ + unsigned int len; /* length of the entry */ +}; + struct extent_info { unsigned int fofs; /* start offset in a file */ - u32 blk; /* start block address of the extent */ unsigned int len; /* length of the extent */ + u32 blk; /* start block address of the extent */ }; struct extent_node { - struct rb_node rb_node; /* rb node located in rb-tree */ + struct rb_node rb_node; + union { + struct { + unsigned int fofs; + unsigned int len; + u32 blk; + }; + struct extent_info ei; /* extent info */ + + }; struct list_head list; /* node in global extent list of sbi */ - struct extent_info ei; /* extent info */ struct extent_tree *et; /* extent tree pointer */ }; -- cgit v1.2.3 From a54455f5eeb8d65455a27b3ace99f9af295b2897 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 27 Mar 2017 18:14:05 +0800 Subject: f2fs: shrink blk plug region Don't use blk plug covering area where there won't be any IOs being issued. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 471ea9944412..58cfbe3d4dc7 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -853,9 +853,8 @@ repeat: if (kthread_should_stop()) return 0; - blk_start_plug(&plug); - mutex_lock(&dcc->cmd_lock); + blk_start_plug(&plug); list_for_each_entry_safe(dc, tmp, pend_list, list) { f2fs_bug_on(sbi, dc->state != D_PREP); @@ -865,6 +864,7 @@ repeat: if (iter++ > DISCARD_ISSUE_RATE) break; } + blk_finish_plug(&plug); list_for_each_entry_safe(dc, tmp, wait_list, list) { if (dc->state == D_DONE) { @@ -874,8 +874,6 @@ repeat: } mutex_unlock(&dcc->cmd_lock); - blk_finish_plug(&plug); - iter = 0; congestion_wait(BLK_RW_SYNC, HZ/50); -- cgit v1.2.3 From 9bb02c3627f46e50246bf7ab957b56ffbef623cb Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 11 Apr 2017 19:01:26 -0700 Subject: f2fs: fix fs corruption due to zero inode page This patch fixes the following scenario. - f2fs_create/f2fs_mkdir - write_checkpoint - f2fs_mark_inode_dirty_sync - block_operations - f2fs_lock_all - f2fs_sync_inode_meta - f2fs_unlock_all - sync_inode_metadata - f2fs_lock_op - f2fs_write_inode - update_inode_page - get_node_page return -ENOENT - new_inode_page - fill_node_footer - f2fs_mark_inode_dirty_sync - ... - f2fs_unlock_op - f2fs_inode_synced - f2fs_lock_all - do_checkpoint In this checkpoint, we can get an inode page which contains zeros having valid node footer only. Cc: Signed-off-by: Jaegeuk Kim --- fs/f2fs/inode.c | 2 +- fs/f2fs/namei.c | 20 ++++++++++---------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 2520fa72b23f..0900814485c7 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -316,7 +316,6 @@ retry: } else if (err != -ENOENT) { f2fs_stop_checkpoint(sbi, false); } - f2fs_inode_synced(inode); return 0; } ret = update_inode(inode, node_page); @@ -450,6 +449,7 @@ void handle_failed_inode(struct inode *inode) * in a panic when flushing dirty inodes in gdirty_list. */ update_inode_page(inode); + f2fs_inode_synced(inode); /* don't make bad inode, since it becomes a regular file. */ unlock_new_inode(inode); diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 8906c9f6cce4..8b5f596ed738 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -148,8 +148,6 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, inode->i_mapping->a_ops = &f2fs_dblock_aops; ino = inode->i_ino; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -163,6 +161,8 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out: handle_failed_inode(inode); @@ -423,8 +423,6 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry, inode_nohighmem(inode); inode->i_mapping->a_ops = &f2fs_dblock_aops; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -487,6 +485,8 @@ err_out: } kfree(sd); + + f2fs_balance_fs(sbi, true); return err; out: handle_failed_inode(inode); @@ -508,8 +508,6 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) inode->i_mapping->a_ops = &f2fs_dblock_aops; mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO); - f2fs_balance_fs(sbi, true); - set_inode_flag(inode, FI_INC_LINK); f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); @@ -524,6 +522,8 @@ static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out_fail: @@ -554,8 +554,6 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, init_special_inode(inode, inode->i_mode, rdev); inode->i_op = &f2fs_special_inode_operations; - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = f2fs_add_link(dentry, inode); if (err) @@ -569,6 +567,8 @@ static int f2fs_mknod(struct inode *dir, struct dentry *dentry, if (IS_DIRSYNC(dir)) f2fs_sync_fs(sbi->sb, 1); + + f2fs_balance_fs(sbi, true); return 0; out: handle_failed_inode(inode); @@ -595,8 +595,6 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, inode->i_mapping->a_ops = &f2fs_dblock_aops; } - f2fs_balance_fs(sbi, true); - f2fs_lock_op(sbi); err = acquire_orphan_inode(sbi); if (err) @@ -622,6 +620,8 @@ static int __f2fs_tmpfile(struct inode *dir, struct dentry *dentry, /* link_count was changed by d_tmpfile as well. */ f2fs_unlock_op(sbi); unlock_new_inode(inode); + + f2fs_balance_fs(sbi, true); return 0; release_out: -- cgit v1.2.3 From 309738835451739dc019dc3d5bb4c93d487ff36b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 11 Apr 2017 19:15:33 -0700 Subject: f2fs: give time to flush dirty pages for checkpoint If all the threads are waiting for checkpoint, we have no chance to flush required dirty pages. Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 9db92990f193..800be94f8cb3 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -980,6 +980,7 @@ retry_flush_dents: err = sync_dirty_inodes(sbi, DIR_INODE); if (err) goto out; + cond_resched(); goto retry_flush_dents; } @@ -995,6 +996,7 @@ retry_flush_dents: err = f2fs_sync_inode_meta(sbi); if (err) goto out; + cond_resched(); goto retry_flush_dents; } @@ -1009,6 +1011,7 @@ retry_flush_nodes: f2fs_unlock_all(sbi); goto out; } + cond_resched(); goto retry_flush_nodes; } -- cgit v1.2.3 From 6c3acd97572b2414075503aa79f305c1d6268114 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 12 Apr 2017 10:01:33 -0700 Subject: f2fs: allocate hot_data for atomic writes We'd better allocate atomic writes to hot_data zone. Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 4731eb587e06..0ac833dd2634 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1531,6 +1531,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp) goto out; set_inode_flag(inode, FI_ATOMIC_FILE); + set_inode_flag(inode, FI_HOT_DATA); f2fs_update_time(F2FS_I_SB(inode), REQ_TIME); if (!get_dirty_pages(inode)) -- cgit v1.2.3 From d29fd17218515387c1eb36740f0f6f03d0d264d4 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 12 Apr 2017 12:02:00 -0700 Subject: f2fs: fix not to set fsync/dentry mark Otherwise, we can see stale fsync/dentry mark given by previous calls, resulting in giving up roll-forward recovery due to wrong dentry mark. Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index 9422dd252813..ad54e907b97b 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1458,6 +1458,9 @@ continue_unlock: f2fs_wait_on_page_writeback(page, NODE, true); BUG_ON(PageWriteback(page)); + set_fsync_mark(page, 0); + set_dentry_mark(page, 0); + if (!atomic || page == last_page) { set_fsync_mark(page, 1); if (IS_INODE(page)) { -- cgit v1.2.3 From d40d30c5aa5227546030d3d7b0a6a38c6c85933a Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 14 Apr 2017 15:46:23 -0700 Subject: f2fs: avoid dirty node pages in check_only recovery In the check_only mode, we should not make any dirty node pages. Otherwise, we can get this panic: F2FS-fs (nvme0n1p1): Need to recover fsync data ------------[ cut here ]------------ kernel BUG at fs/f2fs/node.c:2204! CPU: 7 PID: 19923 Comm: mount Tainted: G OE 4.9.8 #2 RIP: 0010:[] [] flush_nat_entries+0x43b/0x7d0 [f2fs] Call Trace: [] ? __f2fs_submit_merged_bio+0x5a/0xd0 [f2fs] [] ? __f2fs_submit_merged_bio+0x5a/0xd0 [f2fs] [] ? __f2fs_submit_merged_bio+0x8b/0xd0 [f2fs] [] ? up_write+0x1f/0x40 [] ? __f2fs_submit_merged_bio+0x8b/0xd0 [f2fs] [] write_checkpoint+0x2f4/0xf20 [f2fs] [] ? trace_hardirqs_on+0xd/0x10 [] ? f2fs_sync_fs+0x79/0x190 [f2fs] [] ? f2fs_sync_fs+0x79/0x190 [f2fs] [] f2fs_sync_fs+0x85/0x190 [f2fs] [] f2fs_balance_fs_bg+0x7e/0x1c0 [f2fs] [] f2fs_write_node_pages+0x34/0x350 [f2fs] [] ? __lock_is_held+0x52/0x70 [] do_writepages+0x21/0x30 [] __writeback_single_inode+0x61/0x760 [] ? _raw_spin_unlock+0x27/0x40 [] writeback_single_inode+0xd5/0x190 [] write_inode_now+0x99/0xc0 [] iput+0x1f6/0x2c0 [] f2fs_fill_super+0xc32/0x10c0 [f2fs] [] mount_bdev+0x182/0x1b0 [] ? f2fs_commit_super+0x100/0x100 [f2fs] [] f2fs_mount+0x15/0x20 [f2fs] [] mount_fs+0x38/0x170 [] vfs_kern_mount+0x6b/0x160 [] do_mount+0x1be/0xd60 Signed-off-by: Jaegeuk Kim --- fs/f2fs/recovery.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c index d025aa83fb5b..907d6b7dde6a 100644 --- a/fs/f2fs/recovery.c +++ b/fs/f2fs/recovery.c @@ -198,7 +198,8 @@ static void recover_inode(struct inode *inode, struct page *page) ino_of_node(page), name); } -static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) +static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head, + bool check_only) { struct curseg_info *curseg; struct page *page = NULL; @@ -225,7 +226,8 @@ static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head) entry = get_fsync_inode(head, ino_of_node(page)); if (!entry) { - if (IS_INODE(page) && is_dent_dnode(page)) { + if (!check_only && + IS_INODE(page) && is_dent_dnode(page)) { err = recover_inode_page(sbi, page); if (err) break; @@ -569,7 +571,7 @@ int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only) mutex_lock(&sbi->cp_mutex); /* step #1: find fsynced inode numbers */ - err = find_fsync_dnodes(sbi, &inode_list); + err = find_fsync_dnodes(sbi, &inode_list, check_only); if (err || list_empty(&inode_list)) goto out; -- cgit v1.2.3 From 004b68621897f06aa2817e7438469d23f4a3a284 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 14 Apr 2017 23:24:55 +0800 Subject: f2fs: use rb-tree to track pending discard commands Introduce rb-tree based discard cache infrastructure to speed up lookup and merge operation of discard entry. Signed-off-by: Chao Yu [Jaegeuk Kim: initialize dc to avoid build warning] Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 15 ++-- fs/f2fs/f2fs.h | 48 ++++++++++- fs/f2fs/segment.c | 223 ++++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 236 insertions(+), 50 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 68e649a31c7d..221ad086ee00 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -49,7 +49,7 @@ static struct rb_entry *__lookup_rb_tree_slow(struct rb_root *root, return NULL; } -static struct rb_entry *__lookup_rb_tree(struct rb_root *root, +struct rb_entry *__lookup_rb_tree(struct rb_root *root, struct rb_entry *cached_re, unsigned int ofs) { struct rb_entry *re; @@ -61,7 +61,7 @@ static struct rb_entry *__lookup_rb_tree(struct rb_root *root, return re; } -static struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, +struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, struct rb_root *root, struct rb_node **parent, unsigned int ofs) { @@ -92,13 +92,14 @@ static struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, * in order to simpfy the insertion after. * tree must stay unchanged between lookup and insertion. */ -static struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, +struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, struct rb_entry *cached_re, unsigned int ofs, struct rb_entry **prev_entry, struct rb_entry **next_entry, struct rb_node ***insert_p, - struct rb_node **insert_parent) + struct rb_node **insert_parent, + bool force) { struct rb_node **pnode = &root->rb_node; struct rb_node *parent = NULL, *tmp_node; @@ -145,12 +146,12 @@ static struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, return NULL; lookup_neighbors: - if (ofs == re->ofs) { + if (ofs == re->ofs || force) { /* lookup prev node for merging backward later */ tmp_node = rb_prev(&re->rb_node); *prev_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); } - if (ofs == re->ofs + re->len - 1) { + if (ofs == re->ofs + re->len - 1 || force) { /* lookup next node for merging frontward later */ tmp_node = rb_next(&re->rb_node); *next_entry = rb_entry_safe(tmp_node, struct rb_entry, rb_node); @@ -481,7 +482,7 @@ static void f2fs_update_extent_tree_range(struct inode *inode, (struct rb_entry *)et->cached_en, fofs, (struct rb_entry **)&prev_en, (struct rb_entry **)&next_en, - &insert_p, &insert_parent); + &insert_p, &insert_parent, false); if (!en) en = next_en; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 562db8989a4e..ee7d6105a7a5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -194,13 +194,26 @@ enum { D_DONE, }; +struct discard_info { + block_t lstart; /* logical start address */ + block_t len; /* length */ + block_t start; /* actual start address in dev */ +}; + struct discard_cmd { + struct rb_node rb_node; /* rb node located in rb-tree */ + union { + struct { + block_t lstart; /* logical start address */ + block_t len; /* length */ + block_t start; /* actual start address in dev */ + }; + struct discard_info di; /* discard info */ + + }; struct list_head list; /* command list */ struct completion wait; /* compleation */ struct block_device *bdev; /* bdev */ - block_t lstart; /* logical start address */ - block_t start; /* actual start address in dev */ - block_t len; /* length */ int state; /* state */ int error; /* bio error */ }; @@ -217,6 +230,7 @@ struct discard_cmd_control { atomic_t issued_discard; /* # of issued discard */ atomic_t issing_discard; /* # of issing discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ + struct rb_root root; /* root of discard rb-tree */ }; /* for the list of fsync inodes, used only during recovery */ @@ -517,6 +531,24 @@ static inline void set_extent_info(struct extent_info *ei, unsigned int fofs, ei->len = len; } +static inline bool __is_discard_mergeable(struct discard_info *back, + struct discard_info *front) +{ + return back->lstart + back->len == front->lstart; +} + +static inline bool __is_discard_back_mergeable(struct discard_info *cur, + struct discard_info *back) +{ + return __is_discard_mergeable(back, cur); +} + +static inline bool __is_discard_front_mergeable(struct discard_info *cur, + struct discard_info *front) +{ + return __is_discard_mergeable(cur, front); +} + static inline bool __is_extent_mergeable(struct extent_info *back, struct extent_info *front) { @@ -2573,6 +2605,16 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi); /* * extent_cache.c */ +struct rb_entry *__lookup_rb_tree(struct rb_root *root, + struct rb_entry *cached_re, unsigned int ofs); +struct rb_node **__lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi, + struct rb_root *root, struct rb_node **parent, + unsigned int ofs); +struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, + struct rb_entry *cached_re, unsigned int ofs, + struct rb_entry **prev_entry, struct rb_entry **next_entry, + struct rb_node ***insert_p, struct rb_node **insert_parent, + bool force); unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); void f2fs_drop_extent_tree(struct inode *inode); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 58cfbe3d4dc7..d137a08ec3a0 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -672,7 +672,7 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno) mutex_unlock(&dirty_i->seglist_lock); } -static void __add_discard_cmd(struct f2fs_sb_info *sbi, +static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, struct block_device *bdev, block_t lstart, block_t start, block_t len) { @@ -689,18 +689,46 @@ static void __add_discard_cmd(struct f2fs_sb_info *sbi, dc->state = D_PREP; dc->error = 0; init_completion(&dc->wait); - - mutex_lock(&dcc->cmd_lock); list_add_tail(&dc->list, pend_list); - mutex_unlock(&dcc->cmd_lock); - atomic_inc(&dcc->discard_cmd_cnt); + + return dc; +} + +static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len, + struct rb_node *parent, struct rb_node **p) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *dc; + + dc = __create_discard_cmd(sbi, bdev, lstart, start, len); + + rb_link_node(&dc->rb_node, parent, p); + rb_insert_color(&dc->rb_node, &dcc->root); + + return dc; } -static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc) +static void __detach_discard_cmd(struct discard_cmd_control *dcc, + struct discard_cmd *dc) { if (dc->state == D_DONE) - atomic_dec(&(SM_I(sbi)->dcc_info->issing_discard)); + atomic_dec(&dcc->issing_discard); + + list_del(&dc->list); + rb_erase(&dc->rb_node, &dcc->root); + + kmem_cache_free(discard_cmd_slab, dc); + + atomic_dec(&dcc->discard_cmd_cnt); +} + +static void __remove_discard_cmd(struct f2fs_sb_info *sbi, + struct discard_cmd *dc) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; if (dc->error == -EOPNOTSUPP) dc->error = 0; @@ -708,9 +736,7 @@ static void __remove_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *d if (dc->error) f2fs_msg(sbi->sb, KERN_INFO, "Issue discard failed, ret: %d", dc->error); - list_del(&dc->list); - kmem_cache_free(discard_cmd_slab, dc); - atomic_dec(&SM_I(sbi)->dcc_info->discard_cmd_cnt); + __detach_discard_cmd(dcc, dc); } static void f2fs_submit_discard_endio(struct bio *bio) @@ -754,62 +780,178 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, } } -static int __queue_discard_cmd(struct f2fs_sb_info *sbi, - struct block_device *bdev, block_t blkstart, block_t blklen) +static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len, + struct rb_node **insert_p, + struct rb_node *insert_parent) { - block_t lblkstart = blkstart; + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct rb_node **p = &dcc->root.rb_node; + struct rb_node *parent = NULL; + struct discard_cmd *dc = NULL; - trace_f2fs_issue_discard(bdev, blkstart, blklen); + if (insert_p && insert_parent) { + parent = insert_parent; + p = insert_p; + goto do_insert; + } - if (sbi->s_ndevs) { - int devi = f2fs_target_device_index(sbi, blkstart); + p = __lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart); +do_insert: + dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p); + if (!dc) + return NULL; - blkstart -= FDEV(devi).start_blk; - } - __add_discard_cmd(sbi, bdev, lblkstart, blkstart, blklen); - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); - return 0; + return dc; } static void __punch_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc, block_t blkaddr) { - block_t end_block = START_BLOCK(sbi, GET_SEGNO(sbi, blkaddr) + 1); + struct discard_info di = dc->di; + bool modified = false; - if (dc->state == D_DONE || dc->lstart + dc->len <= end_block) { + if (dc->state == D_DONE || dc->len == 1) { __remove_discard_cmd(sbi, dc); return; } - if (blkaddr - dc->lstart < dc->lstart + dc->len - end_block) { - dc->start += (end_block - dc->lstart); - dc->len -= (end_block - dc->lstart); - dc->lstart = end_block; - } else { + if (blkaddr > di.lstart) { dc->len = blkaddr - dc->lstart; + modified = true; + } + + if (blkaddr < di.lstart + di.len - 1) { + if (modified) { + __insert_discard_tree(sbi, dc->bdev, blkaddr + 1, + di.start + blkaddr + 1 - di.lstart, + di.lstart + di.len - 1 - blkaddr, + NULL, NULL); + } else { + dc->lstart++; + dc->len--; + dc->start++; + } } } -/* This should be covered by global mutex, &sit_i->sentry_lock */ -void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +static void __update_discard_tree_range(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t lstart, + block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list = &(dcc->discard_pend_list); - struct list_head *wait_list = &(dcc->discard_wait_list); - struct discard_cmd *dc, *tmp; + struct discard_cmd *prev_dc = NULL, *next_dc = NULL; + struct discard_cmd *dc; + struct discard_info di = {0}; + struct rb_node **insert_p = NULL, *insert_parent = NULL; + block_t end = lstart + len; mutex_lock(&dcc->cmd_lock); - list_for_each_entry_safe(dc, tmp, pend_list, list) { - if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) - __punch_discard_cmd(sbi, dc, blkaddr); + dc = (struct discard_cmd *)__lookup_rb_tree_ret(&dcc->root, + NULL, lstart, + (struct rb_entry **)&prev_dc, + (struct rb_entry **)&next_dc, + &insert_p, &insert_parent, true); + if (dc) + prev_dc = dc; + + if (!prev_dc) { + di.lstart = lstart; + di.len = next_dc ? next_dc->lstart - lstart : len; + di.len = min(di.len, len); + di.start = start; } - list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (dc->lstart <= blkaddr && blkaddr < dc->lstart + dc->len) { - wait_for_completion_io(&dc->wait); - __punch_discard_cmd(sbi, dc, blkaddr); + while (1) { + struct rb_node *node; + bool merged = false; + struct discard_cmd *tdc = NULL; + + if (prev_dc) { + di.lstart = prev_dc->lstart + prev_dc->len; + if (di.lstart < lstart) + di.lstart = lstart; + if (di.lstart >= end) + break; + + if (!next_dc || next_dc->lstart > end) + di.len = end - di.lstart; + else + di.len = next_dc->lstart - di.lstart; + di.start = start + di.lstart - lstart; + } + + if (!di.len) + goto next; + + if (prev_dc && prev_dc->state == D_PREP && + prev_dc->bdev == bdev && + __is_discard_back_mergeable(&di, &prev_dc->di)) { + prev_dc->di.len += di.len; + di = prev_dc->di; + tdc = prev_dc; + merged = true; + } + + if (next_dc && next_dc->state == D_PREP && + next_dc->bdev == bdev && + __is_discard_front_mergeable(&di, &next_dc->di)) { + next_dc->di.lstart = di.lstart; + next_dc->di.len += di.len; + next_dc->di.start = di.start; + if (tdc) + __remove_discard_cmd(sbi, tdc); + + merged = true; } + + if (!merged) + __insert_discard_tree(sbi, bdev, di.lstart, di.start, + di.len, NULL, NULL); + next: + prev_dc = next_dc; + if (!prev_dc) + break; + + node = rb_next(&prev_dc->rb_node); + next_dc = rb_entry_safe(node, struct discard_cmd, rb_node); + } + + mutex_unlock(&dcc->cmd_lock); +} + +static int __queue_discard_cmd(struct f2fs_sb_info *sbi, + struct block_device *bdev, block_t blkstart, block_t blklen) +{ + block_t lblkstart = blkstart; + + trace_f2fs_issue_discard(bdev, blkstart, blklen); + + if (sbi->s_ndevs) { + int devi = f2fs_target_device_index(sbi, blkstart); + + blkstart -= FDEV(devi).start_blk; + } + __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); + wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); + return 0; +} + +/* This should be covered by global mutex, &sit_i->sentry_lock */ +void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct discard_cmd *dc; + + mutex_lock(&dcc->cmd_lock); + + dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); + if (dc) { + if (dc->state != D_PREP) + wait_for_completion_io(&dc->wait); + __punch_discard_cmd(sbi, dc, blkaddr); } mutex_unlock(&dcc->cmd_lock); @@ -1178,6 +1320,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = 0; + dcc->root = RB_ROOT; init_waitqueue_head(&dcc->discard_wait_queue); SM_I(sbi)->dcc_info = dcc; -- cgit v1.2.3 From 46f84c2c058784f42f2d021df79384ec66cdb256 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 15 Apr 2017 14:09:36 +0800 Subject: f2fs: clean up discard_cmd_control structure Avoid long variable name in discard_cmd_control structure, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 8 ++++---- fs/f2fs/segment.c | 24 ++++++++++++------------ 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index ee7d6105a7a5..c1faf6d35a8d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -220,12 +220,12 @@ struct discard_cmd { struct discard_cmd_control { struct task_struct *f2fs_issue_discard; /* discard thread */ - struct list_head discard_entry_list; /* 4KB discard entry list */ - int nr_discards; /* # of discards in the list */ - struct list_head discard_pend_list; /* store pending entries */ - struct list_head discard_wait_list; /* store on-flushing entries */ + struct list_head entry_list; /* 4KB discard entry list */ + struct list_head pend_list; /* store pending entries */ + struct list_head wait_list; /* store on-flushing entries */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ struct mutex cmd_lock; + int nr_discards; /* # of discards in the list */ int max_discards; /* max. discards to be issued */ atomic_t issued_discard; /* # of issued discard */ atomic_t issing_discard; /* # of issing discard */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index d137a08ec3a0..c23a52a339de 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -677,7 +677,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list = &(dcc->discard_pend_list); + struct list_head *pend_list = &(dcc->pend_list); struct discard_cmd *dc; dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); @@ -773,7 +773,7 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, bio->bi_end_io = f2fs_submit_discard_endio; bio->bi_opf |= REQ_SYNC; submit_bio(bio); - list_move_tail(&dc->list, &dcc->discard_wait_list); + list_move_tail(&dc->list, &dcc->wait_list); } } else { __remove_discard_cmd(sbi, dc); @@ -961,8 +961,8 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list = &(dcc->discard_pend_list); - struct list_head *wait_list = &(dcc->discard_wait_list); + struct list_head *pend_list = &(dcc->pend_list); + struct list_head *wait_list = &(dcc->wait_list); struct discard_cmd *dc, *tmp; struct blk_plug plug; @@ -986,8 +986,8 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; - struct list_head *pend_list = &dcc->discard_pend_list; - struct list_head *wait_list = &dcc->discard_wait_list; + struct list_head *pend_list = &dcc->pend_list; + struct list_head *wait_list = &dcc->wait_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; int iter = 0; @@ -1135,7 +1135,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, unsigned int start = 0, end = -1; bool force = (cpc->reason == CP_DISCARD); struct discard_entry *de = NULL; - struct list_head *head = &SM_I(sbi)->dcc_info->discard_entry_list; + struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; if (se->valid_blocks == max_blocks || !f2fs_discard_en(sbi)) @@ -1184,7 +1184,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, void release_discard_addrs(struct f2fs_sb_info *sbi) { - struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list); + struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; /* drop caches */ @@ -1210,7 +1210,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi) void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) { - struct list_head *head = &(SM_I(sbi)->dcc_info->discard_entry_list); + struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list); struct discard_entry *entry, *this; struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; @@ -1311,9 +1311,9 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) if (!dcc) return -ENOMEM; - INIT_LIST_HEAD(&dcc->discard_entry_list); - INIT_LIST_HEAD(&dcc->discard_pend_list); - INIT_LIST_HEAD(&dcc->discard_wait_list); + INIT_LIST_HEAD(&dcc->entry_list); + INIT_LIST_HEAD(&dcc->pend_list); + INIT_LIST_HEAD(&dcc->wait_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); atomic_set(&dcc->issing_discard, 0); -- cgit v1.2.3 From ba48a33ef6faa573257b2a4181329f2d1eaafed9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 15 Apr 2017 14:09:37 +0800 Subject: f2fs: in prior to issue big discard Keep issuing big size discard in prior instead of the one with random size, so that we expect that it will help to: - be quick to recycle unused large space in flash storage device. - give a chance for a) wait to merge small piece discards into bigger one, or b) avoid issuing discards while they have being reallocated by SSR. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 7 ++++++- fs/f2fs/segment.c | 54 +++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 45 insertions(+), 16 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c1faf6d35a8d..d81a365930b5 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -188,6 +188,11 @@ struct discard_entry { unsigned char discard_map[SIT_VBLOCK_MAP_SIZE]; /* segment discard bitmap */ }; +/* max discard pend list number */ +#define MAX_PLIST_NUM 512 +#define plist_idx(blk_num) ((blk_num) >= MAX_PLIST_NUM ? \ + (MAX_PLIST_NUM - 1) : (blk_num - 1)) + enum { D_PREP, D_SUBMIT, @@ -221,7 +226,7 @@ struct discard_cmd { struct discard_cmd_control { struct task_struct *f2fs_issue_discard; /* discard thread */ struct list_head entry_list; /* 4KB discard entry list */ - struct list_head pend_list; /* store pending entries */ + struct list_head pend_list[MAX_PLIST_NUM];/* store pending entries */ struct list_head wait_list; /* store on-flushing entries */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ struct mutex cmd_lock; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index c23a52a339de..f026f70559eb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -677,9 +677,13 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, block_t start, block_t len) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list = &(dcc->pend_list); + struct list_head *pend_list; struct discard_cmd *dc; + f2fs_bug_on(sbi, !len); + + pend_list = &dcc->pend_list[plist_idx(len)]; + dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS); INIT_LIST_HEAD(&dc->list); dc->bdev = bdev; @@ -806,9 +810,16 @@ do_insert: return dc; } +static void __relocate_discard_cmd(struct discard_cmd_control *dcc, + struct discard_cmd *dc) +{ + list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]); +} + static void __punch_discard_cmd(struct f2fs_sb_info *sbi, struct discard_cmd *dc, block_t blkaddr) { + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_info di = dc->di; bool modified = false; @@ -819,6 +830,7 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, if (blkaddr > di.lstart) { dc->len = blkaddr - dc->lstart; + __relocate_discard_cmd(dcc, dc); modified = true; } @@ -832,6 +844,7 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, dc->lstart++; dc->len--; dc->start++; + __relocate_discard_cmd(dcc, dc); } } } @@ -890,6 +903,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, prev_dc->bdev == bdev && __is_discard_back_mergeable(&di, &prev_dc->di)) { prev_dc->di.len += di.len; + __relocate_discard_cmd(dcc, prev_dc); di = prev_dc->di; tdc = prev_dc; merged = true; @@ -901,6 +915,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, next_dc->di.lstart = di.lstart; next_dc->di.len += di.len; next_dc->di.start = di.start; + __relocate_discard_cmd(dcc, next_dc); if (tdc) __remove_discard_cmd(sbi, tdc); @@ -961,16 +976,20 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list = &(dcc->pend_list); + struct list_head *pend_list; struct list_head *wait_list = &(dcc->wait_list); struct discard_cmd *dc, *tmp; struct blk_plug plug; + int i; mutex_lock(&dcc->cmd_lock); blk_start_plug(&plug); - list_for_each_entry_safe(dc, tmp, pend_list, list) - __submit_discard_cmd(sbi, dc); + for (i = 0; i < MAX_PLIST_NUM; i++) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) + __submit_discard_cmd(sbi, dc); + } blk_finish_plug(&plug); list_for_each_entry_safe(dc, tmp, wait_list, list) { @@ -986,26 +1005,30 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; - struct list_head *pend_list = &dcc->pend_list; + struct list_head *pend_list; struct list_head *wait_list = &dcc->wait_list; struct discard_cmd *dc, *tmp; struct blk_plug plug; - int iter = 0; + int iter = 0, i; repeat: if (kthread_should_stop()) return 0; mutex_lock(&dcc->cmd_lock); blk_start_plug(&plug); - list_for_each_entry_safe(dc, tmp, pend_list, list) { - f2fs_bug_on(sbi, dc->state != D_PREP); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); - if (is_idle(sbi)) - __submit_discard_cmd(sbi, dc); + if (is_idle(sbi)) + __submit_discard_cmd(sbi, dc); - if (iter++ > DISCARD_ISSUE_RATE) - break; + if (iter++ > DISCARD_ISSUE_RATE) + goto next_step; + } } +next_step: blk_finish_plug(&plug); list_for_each_entry_safe(dc, tmp, wait_list, list) { @@ -1020,7 +1043,7 @@ repeat: congestion_wait(BLK_RW_SYNC, HZ/50); wait_event_interruptible(*q, kthread_should_stop() || - !list_empty(pend_list) || !list_empty(wait_list)); + atomic_read(&dcc->discard_cmd_cnt)); goto repeat; } @@ -1300,7 +1323,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) { dev_t dev = sbi->sb->s_bdev->bd_dev; struct discard_cmd_control *dcc; - int err = 0; + int err = 0, i; if (SM_I(sbi)->dcc_info) { dcc = SM_I(sbi)->dcc_info; @@ -1312,7 +1335,8 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) return -ENOMEM; INIT_LIST_HEAD(&dcc->entry_list); - INIT_LIST_HEAD(&dcc->pend_list); + for (i = 0; i < MAX_PLIST_NUM; i++) + INIT_LIST_HEAD(&dcc->pend_list[i]); INIT_LIST_HEAD(&dcc->wait_list); mutex_init(&dcc->cmd_lock); atomic_set(&dcc->issued_discard, 0); -- cgit v1.2.3 From 0243a5f9daf149654dc68c49783b27760f513cc1 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Sat, 15 Apr 2017 14:09:38 +0800 Subject: f2fs: trace __submit_discard_cmd Add an even class f2fs_discard for introducing f2fs_queue_discard, then use f2fs_{queue,issue}_discard to trace __{queue,submit}_discard_cmd. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f026f70559eb..8da49a1750da 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -763,6 +763,8 @@ static void __submit_discard_cmd(struct f2fs_sb_info *sbi, if (dc->state != D_PREP) return; + trace_f2fs_issue_discard(dc->bdev, dc->start, dc->len); + dc->error = __blkdev_issue_discard(dc->bdev, SECTOR_FROM_BLOCK(dc->start), SECTOR_FROM_BLOCK(dc->len), @@ -942,7 +944,7 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, { block_t lblkstart = blkstart; - trace_f2fs_issue_discard(bdev, blkstart, blklen); + trace_f2fs_queue_discard(bdev, blkstart, blklen); if (sbi->s_ndevs) { int devi = f2fs_target_device_index(sbi, blkstart); -- cgit v1.2.3 From df0f6b44dd59cd07aa3ff583dee04b3b563648d0 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Mon, 17 Apr 2017 18:21:43 +0800 Subject: f2fs: introduce __check_rb_tree_consistence Introduce __check_rb_tree_consistence to check consistence of rb-tree based discard cache in runtime. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/extent_cache.c | 32 ++++++++++++++++++++++++++++++++ fs/f2fs/f2fs.h | 2 ++ fs/f2fs/segment.c | 15 +++++++++++++-- 3 files changed, 47 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c index 221ad086ee00..2f98d7039701 100644 --- a/fs/f2fs/extent_cache.c +++ b/fs/f2fs/extent_cache.c @@ -159,6 +159,38 @@ lookup_neighbors: return re; } +bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, + struct rb_root *root) +{ +#ifdef CONFIG_F2FS_CHECK_FS + struct rb_node *cur = rb_first(root), *next; + struct rb_entry *cur_re, *next_re; + + if (!cur) + return true; + + while (cur) { + next = rb_next(cur); + if (!next) + return true; + + cur_re = rb_entry(cur, struct rb_entry, rb_node); + next_re = rb_entry(next, struct rb_entry, rb_node); + + if (cur_re->ofs + cur_re->len > next_re->ofs) { + f2fs_msg(sbi->sb, KERN_INFO, "inconsistent rbtree, " + "cur(%u, %u) next(%u, %u)", + cur_re->ofs, cur_re->len, + next_re->ofs, next_re->len); + return false; + } + + cur = next; + } +#endif + return true; +} + static struct kmem_cache *extent_tree_slab; static struct kmem_cache *extent_node_slab; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d81a365930b5..252569141b69 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2620,6 +2620,8 @@ struct rb_entry *__lookup_rb_tree_ret(struct rb_root *root, struct rb_entry **prev_entry, struct rb_entry **next_entry, struct rb_node ***insert_p, struct rb_node **insert_parent, bool force); +bool __check_rb_tree_consistence(struct f2fs_sb_info *sbi, + struct rb_root *root); unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink); bool f2fs_init_extent_tree(struct inode *inode, struct f2fs_extent *i_ext); void f2fs_drop_extent_tree(struct inode *inode); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 8da49a1750da..f88271643ec4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -833,6 +833,7 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, if (blkaddr > di.lstart) { dc->len = blkaddr - dc->lstart; __relocate_discard_cmd(dcc, dc); + f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); modified = true; } @@ -842,11 +843,15 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, di.start + blkaddr + 1 - di.lstart, di.lstart + di.len - 1 - blkaddr, NULL, NULL); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); } else { dc->lstart++; dc->len--; dc->start++; __relocate_discard_cmd(dcc, dc); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); } } } @@ -906,6 +911,8 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, __is_discard_back_mergeable(&di, &prev_dc->di)) { prev_dc->di.len += di.len; __relocate_discard_cmd(dcc, prev_dc); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); di = prev_dc->di; tdc = prev_dc; merged = true; @@ -920,13 +927,17 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, __relocate_discard_cmd(dcc, next_dc); if (tdc) __remove_discard_cmd(sbi, tdc); - + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); merged = true; } - if (!merged) + if (!merged) { __insert_discard_tree(sbi, bdev, di.lstart, di.start, di.len, NULL, NULL); + f2fs_bug_on(sbi, + !__check_rb_tree_consistence(sbi, &dcc->root)); + } next: prev_dc = next_dc; if (!prev_dc) -- cgit v1.2.3 From 001c584cca6fce8e91f19eca88781b8c16d1ea42 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Apr 2017 19:23:39 +0800 Subject: f2fs: unlock cp_rwsem early for IPU writes For IPU writes, there won't be any udpates in dnode page since we will reuse old block address instead of allocating new one, so we don't need to lock cp_rwsem during IPU IO submitting. Signed-off-by: Chao Yu --- fs/f2fs/data.c | 6 +++++- fs/f2fs/f2fs.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index e984a42eabf4..32d5a3b38a3f 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1357,6 +1357,8 @@ retry_encrypt: !is_cold_data(page) && !IS_ATOMIC_WRITTEN_PAGE(page) && need_inplace_update(inode))) { + f2fs_unlock_op(F2FS_I_SB(inode)); + fio->cp_rwsem_locked = false; err = rewrite_data_page(fio); set_inode_flag(inode, FI_UPDATE_WRITE); trace_f2fs_do_write_data_page(page, IPU); @@ -1392,6 +1394,7 @@ static int __write_data_page(struct page *page, bool *submitted, .page = page, .encrypted_page = NULL, .submitted = false, + .cp_rwsem_locked = true, }; trace_f2fs_writepage(page, DATA); @@ -1449,7 +1452,8 @@ write: err = do_write_data_page(&fio); if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; - f2fs_unlock_op(sbi); + if (fio.cp_rwsem_locked) + f2fs_unlock_op(sbi); done: if (err && err != -ENOENT) goto redirty_out; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 252569141b69..37911ef7223d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -793,6 +793,7 @@ struct f2fs_io_info { struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ bool submitted; /* indicate IO submission */ + bool cp_rwsem_locked; /* indicate cp_rwsem is held */ }; #define is_read_io(rw) ((rw) == READ) -- cgit v1.2.3 From d84d1cbdec6b5d9ba391079a111572197db66d54 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 18 Apr 2017 19:27:39 +0800 Subject: f2fs: add undiscard blocks stat This patch adds to account undiscard blocks. Signed-off-by: Chao Yu --- fs/f2fs/debug.c | 5 +++-- fs/f2fs/f2fs.h | 2 ++ fs/f2fs/segment.c | 9 +++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 692beff66bf8..6102737473d4 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -69,6 +69,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) atomic_read(&SM_I(sbi)->dcc_info->issing_discard); si->nr_discard_cmd = atomic_read(&SM_I(sbi)->dcc_info->discard_cmd_cnt); + si->undiscard_blks = SM_I(sbi)->dcc_info->undiscard_blks; } si->total_count = (int)sbi->user_block_count / sbi->blocks_per_seg; si->rsvd_segs = reserved_segments(sbi); @@ -348,11 +349,11 @@ static int stat_show(struct seq_file *s, void *v) si->ext_tree, si->zombie_tree, si->ext_node); seq_puts(s, "\nBalancing F2FS Async:\n"); seq_printf(s, " - IO (CP: %4d, Data: %4d, Flush: (%4d %4d), " - "Discard: (%4d %4d)) cmd: %4d\n", + "Discard: (%4d %4d)) cmd: %4d undiscard:%4u\n", si->nr_wb_cp_data, si->nr_wb_data, si->nr_flushing, si->nr_flushed, si->nr_discarding, si->nr_discarded, - si->nr_discard_cmd); + si->nr_discard_cmd, si->undiscard_blks); seq_printf(s, " - inmem: %4d, atomic IO: %4d (Max. %4d), " "volatile IO: %4d (Max. %4d)\n", si->inmem_pages, si->aw_cnt, si->max_aw_cnt, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 37911ef7223d..58d288e596a7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -232,6 +232,7 @@ struct discard_cmd_control { struct mutex cmd_lock; int nr_discards; /* # of discards in the list */ int max_discards; /* max. discards to be issued */ + unsigned int undiscard_blks; /* # of undiscard blocks */ atomic_t issued_discard; /* # of issued discard */ atomic_t issing_discard; /* # of issing discard */ atomic_t discard_cmd_cnt; /* # of cached cmd count */ @@ -2390,6 +2391,7 @@ struct f2fs_stat_info { int bg_gc, nr_wb_cp_data, nr_wb_data; int nr_flushing, nr_flushed, nr_discarding, nr_discarded; int nr_discard_cmd; + unsigned int undiscard_blks; int inline_xattr, inline_inode, inline_dir, append, update, orphans; int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt; unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index f88271643ec4..dd07c5c91ac4 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -695,6 +695,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, init_completion(&dc->wait); list_add_tail(&dc->list, pend_list); atomic_inc(&dcc->discard_cmd_cnt); + dcc->undiscard_blks += len; return dc; } @@ -723,6 +724,7 @@ static void __detach_discard_cmd(struct discard_cmd_control *dcc, list_del(&dc->list); rb_erase(&dc->rb_node, &dcc->root); + dcc->undiscard_blks -= dc->len; kmem_cache_free(discard_cmd_slab, dc); @@ -830,8 +832,11 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, return; } + dcc->undiscard_blks -= di.len; + if (blkaddr > di.lstart) { dc->len = blkaddr - dc->lstart; + dcc->undiscard_blks += dc->len; __relocate_discard_cmd(dcc, dc); f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); modified = true; @@ -849,6 +854,7 @@ static void __punch_discard_cmd(struct f2fs_sb_info *sbi, dc->lstart++; dc->len--; dc->start++; + dcc->undiscard_blks += dc->len; __relocate_discard_cmd(dcc, dc); f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); @@ -910,6 +916,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, prev_dc->bdev == bdev && __is_discard_back_mergeable(&di, &prev_dc->di)) { prev_dc->di.len += di.len; + dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, prev_dc); f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, &dcc->root)); @@ -924,6 +931,7 @@ static void __update_discard_tree_range(struct f2fs_sb_info *sbi, next_dc->di.lstart = di.lstart; next_dc->di.len += di.len; next_dc->di.start = di.start; + dcc->undiscard_blks += di.len; __relocate_discard_cmd(dcc, next_dc); if (tdc) __remove_discard_cmd(sbi, tdc); @@ -1357,6 +1365,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; dcc->max_discards = 0; + dcc->undiscard_blks = 0; dcc->root = RB_ROOT; init_waitqueue_head(&dcc->discard_wait_queue); -- cgit v1.2.3 From 04485987f05388ffec04cdab7808ba26db30c9b8 Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Tue, 18 Apr 2017 11:57:16 +0000 Subject: f2fs: introduce async IPU policy This patch introduces an ASYNC IPU policy. Under senario of large # of async updating(e.g. log writing in Android), disk would be seriously fragmented, and higher frequent gc would be triggered. This patch uses IPU to rewrite the async update writting, since async is NOT sensitive to io latency. Signed-off-by: Hou Pengyang --- fs/f2fs/data.c | 2 +- fs/f2fs/file.c | 2 +- fs/f2fs/segment.h | 12 +++++++++++- 3 files changed, 13 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 32d5a3b38a3f..7d46a8e6d350 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1356,7 +1356,7 @@ retry_encrypt: if (unlikely(fio->old_blkaddr != NEW_ADDR && !is_cold_data(page) && !IS_ATOMIC_WRITTEN_PAGE(page) && - need_inplace_update(inode))) { + need_inplace_update(inode, fio))) { f2fs_unlock_op(F2FS_I_SB(inode)); fio->cp_rwsem_locked = false; err = rewrite_data_page(fio); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0ac833dd2634..0ccc8cf70e7a 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1898,7 +1898,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, int err; /* if in-place-update policy is enabled, don't waste time here */ - if (need_inplace_update(inode)) + if (need_inplace_update(inode, NULL)) return -EINVAL; pg_start = range->start >> PAGE_SHIFT; diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 5f6ef163aa8f..3cd780a42f51 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -556,9 +556,11 @@ enum { F2FS_IPU_UTIL, F2FS_IPU_SSR_UTIL, F2FS_IPU_FSYNC, + F2FS_IPU_ASYNC, }; -static inline bool need_inplace_update(struct inode *inode) +static inline bool need_inplace_update(struct inode *inode, + struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int policy = SM_I(sbi)->ipu_policy; @@ -581,6 +583,14 @@ static inline bool need_inplace_update(struct inode *inode) utilization(sbi) > SM_I(sbi)->min_ipu_util) return true; + /* + * IPU for rewrite async pages + */ + if (policy & (0x1 << F2FS_IPU_ASYNC) && + fio && fio->op == REQ_OP_WRITE && + !(fio->op_flags & REQ_SYNC)) + return true; + /* this is only set during fdatasync */ if (policy & (0x1 << F2FS_IPU_FSYNC) && is_inode_flag_set(inode, FI_NEED_IPU)) -- cgit v1.2.3 From e066b83c9b40f3a6951fb693ef0943fa1dfc40c2 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 13 Apr 2017 15:17:00 -0700 Subject: f2fs: add ioctl to flush data from faster device to cold area This patch adds an ioctl to flush data in faster device to cold area. User can give device number and number of segments to move. It doesn't move it if there is only one device. The parameter looks like: struct f2fs_flush_device { u32 dev_num; /* device number to flush */ u32 segments; /* # of segments to flush */ }; Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 11 +++++++-- fs/f2fs/file.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- fs/f2fs/gc.c | 42 ++++++++++++++++++++++----------- fs/f2fs/segment.c | 14 +++++++---- fs/f2fs/segment.h | 7 +++++- 5 files changed, 120 insertions(+), 23 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 58d288e596a7..59a08e73f194 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -300,6 +300,8 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8) #define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ struct f2fs_move_range) +#define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ + struct f2fs_flush_device) #define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY #define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY @@ -336,6 +338,11 @@ struct f2fs_move_range { u64 len; /* size to move */ }; +struct f2fs_flush_device { + u32 dev_num; /* device number to flush */ + u32 segments; /* # of segments to flush */ +}; + /* * For INODE and NODE manager */ @@ -980,7 +987,6 @@ struct f2fs_sb_info { int bg_gc; /* background gc calls */ unsigned int ndirty_inode[NR_INODE_TYPE]; /* # of dirty inodes */ #endif - unsigned int last_victim[2]; /* last victim segment # */ spinlock_t stat_lock; /* lock for stat operations */ /* For sysfs suppport */ @@ -2362,7 +2368,8 @@ int f2fs_migrate_page(struct address_space *mapping, struct page *newpage, int start_gc_thread(struct f2fs_sb_info *sbi); void stop_gc_thread(struct f2fs_sb_info *sbi); block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode); -int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background); +int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, + unsigned int segno); void build_gc_manager(struct f2fs_sb_info *sbi); /* diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 0ccc8cf70e7a..32050f4c3592 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1855,7 +1855,7 @@ static int f2fs_ioc_gc(struct file *filp, unsigned long arg) mutex_lock(&sbi->gc_mutex); } - ret = f2fs_gc(sbi, sync, true); + ret = f2fs_gc(sbi, sync, true, NULL_SEGNO); out: mnt_drop_write_file(filp); return ret; @@ -2211,6 +2211,69 @@ err_out: return err; } +static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg) +{ + struct inode *inode = file_inode(filp); + struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + struct sit_info *sm = SIT_I(sbi); + unsigned int start_segno = 0, end_segno = 0; + unsigned int dev_start_segno = 0, dev_end_segno = 0; + struct f2fs_flush_device range; + int ret; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (f2fs_readonly(sbi->sb)) + return -EROFS; + + if (copy_from_user(&range, (struct f2fs_flush_device __user *)arg, + sizeof(range))) + return -EFAULT; + + if (sbi->s_ndevs <= 1 || sbi->s_ndevs - 1 <= range.dev_num || + sbi->segs_per_sec != 1) { + f2fs_msg(sbi->sb, KERN_WARNING, + "Can't flush %u in %d for segs_per_sec %u != 1\n", + range.dev_num, sbi->s_ndevs, + sbi->segs_per_sec); + return -EINVAL; + } + + ret = mnt_want_write_file(filp); + if (ret) + return ret; + + if (range.dev_num != 0) + dev_start_segno = GET_SEGNO(sbi, FDEV(range.dev_num).start_blk); + dev_end_segno = GET_SEGNO(sbi, FDEV(range.dev_num).end_blk); + + start_segno = sm->last_victim[FLUSH_DEVICE]; + if (start_segno < dev_start_segno || start_segno >= dev_end_segno) + start_segno = dev_start_segno; + end_segno = min(start_segno + range.segments, dev_end_segno); + + while (start_segno < end_segno) { + if (!mutex_trylock(&sbi->gc_mutex)) { + ret = -EBUSY; + goto out; + } + sm->last_victim[GC_CB] = end_segno + 1; + sm->last_victim[GC_GREEDY] = end_segno + 1; + sm->last_victim[ALLOC_NEXT] = end_segno + 1; + ret = f2fs_gc(sbi, true, true, start_segno); + if (ret == -EAGAIN) + ret = 0; + else if (ret < 0) + break; + start_segno++; + } +out: + mnt_drop_write_file(filp); + return ret; +} + + long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) { switch (cmd) { @@ -2248,6 +2311,8 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) return f2fs_ioc_defragment(filp, arg); case F2FS_IOC_MOVE_RANGE: return f2fs_ioc_move_range(filp, arg); + case F2FS_IOC_FLUSH_DEVICE: + return f2fs_ioc_flush_device(filp, arg); default: return -ENOTTY; } @@ -2315,8 +2380,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case F2FS_IOC_GARBAGE_COLLECT: case F2FS_IOC_WRITE_CHECKPOINT: case F2FS_IOC_DEFRAGMENT: - break; case F2FS_IOC_MOVE_RANGE: + case F2FS_IOC_FLUSH_DEVICE: break; default: return -ENOIOCTLCMD; diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 9172112d6246..74a10b7675f5 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -84,7 +84,7 @@ static int gc_thread_func(void *data) stat_inc_bggc_count(sbi); /* if return value is not zero, no victim was selected */ - if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true)) + if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC), true, NULL_SEGNO)) wait_ms = gc_th->no_gc_sleep_time; trace_f2fs_background_gc(sbi->sb, wait_ms, @@ -176,7 +176,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type, if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) p->offset = 0; else - p->offset = sbi->last_victim[p->gc_mode]; + p->offset = SIT_I(sbi)->last_victim[p->gc_mode]; } static unsigned int get_max_cost(struct f2fs_sb_info *sbi, @@ -295,6 +295,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, unsigned int *result, int gc_type, int type, char alloc_mode) { struct dirty_seglist_info *dirty_i = DIRTY_I(sbi); + struct sit_info *sm = SIT_I(sbi); struct victim_sel_policy p; unsigned int secno, last_victim; unsigned int last_segment = MAIN_SEGS(sbi); @@ -308,10 +309,18 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, p.min_segno = NULL_SEGNO; p.min_cost = get_max_cost(sbi, &p); + if (*result != NULL_SEGNO) { + if (IS_DATASEG(get_seg_entry(sbi, *result)->type) && + get_valid_blocks(sbi, *result, false) && + !sec_usage_check(sbi, GET_SEC_FROM_SEG(sbi, *result))) + p.min_segno = *result; + goto out; + } + if (p.max_search == 0) goto out; - last_victim = sbi->last_victim[p.gc_mode]; + last_victim = sm->last_victim[p.gc_mode]; if (p.alloc_mode == LFS && gc_type == FG_GC) { p.min_segno = check_bg_victims(sbi); if (p.min_segno != NULL_SEGNO) @@ -324,9 +333,10 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, segno = find_next_bit(p.dirty_segmap, last_segment, p.offset); if (segno >= last_segment) { - if (sbi->last_victim[p.gc_mode]) { - last_segment = sbi->last_victim[p.gc_mode]; - sbi->last_victim[p.gc_mode] = 0; + if (sm->last_victim[p.gc_mode]) { + last_segment = + sm->last_victim[p.gc_mode]; + sm->last_victim[p.gc_mode] = 0; p.offset = 0; continue; } @@ -361,11 +371,11 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi, } next: if (nsearched >= p.max_search) { - if (!sbi->last_victim[p.gc_mode] && segno <= last_victim) - sbi->last_victim[p.gc_mode] = last_victim + 1; + if (!sm->last_victim[p.gc_mode] && segno <= last_victim) + sm->last_victim[p.gc_mode] = last_victim + 1; else - sbi->last_victim[p.gc_mode] = segno + 1; - sbi->last_victim[p.gc_mode] %= MAIN_SEGS(sbi); + sm->last_victim[p.gc_mode] = segno + 1; + sm->last_victim[p.gc_mode] %= MAIN_SEGS(sbi); break; } } @@ -912,7 +922,6 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi, * - mutex_lock(sentry_lock) - change_curseg() * - lock_page(sum_page) */ - if (type == SUM_TYPE_NODE) gc_node_segment(sbi, sum->entries, segno, gc_type); else @@ -939,13 +948,14 @@ next: return sec_freed; } -int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background) +int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, + bool background, unsigned int segno) { - unsigned int segno; int gc_type = sync ? FG_GC : BG_GC; int sec_freed = 0; int ret = -EINVAL; struct cp_control cpc; + unsigned int init_segno = segno; struct gc_inode_list gc_list = { .ilist = LIST_HEAD_INIT(gc_list.ilist), .iroot = RADIX_TREE_INIT(GFP_NOFS), @@ -990,13 +1000,17 @@ gc_more: sbi->cur_victim_sec = NULL_SEGNO; if (!sync) { - if (has_not_enough_free_secs(sbi, sec_freed, 0)) + if (has_not_enough_free_secs(sbi, sec_freed, 0)) { + segno = NULL_SEGNO; goto gc_more; + } if (gc_type == FG_GC) ret = write_checkpoint(sbi, &cpc); } stop: + SIT_I(sbi)->last_victim[ALLOC_NEXT] = 0; + SIT_I(sbi)->last_victim[FLUSH_DEVICE] = init_segno; mutex_unlock(&sbi->gc_mutex); put_gc_inode(&gc_list); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index dd07c5c91ac4..1b16770f9d2e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -401,7 +401,7 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) */ if (has_not_enough_free_secs(sbi, 0, 0)) { mutex_lock(&sbi->gc_mutex); - f2fs_gc(sbi, false, false); + f2fs_gc(sbi, false, false, NULL_SEGNO); } } @@ -1755,6 +1755,8 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) return 0; + if (SIT_I(sbi)->last_victim[ALLOC_NEXT]) + return SIT_I(sbi)->last_victim[ALLOC_NEXT]; return CURSEG_I(sbi, type)->segno; } @@ -1852,12 +1854,15 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) { struct curseg_info *curseg = CURSEG_I(sbi, type); const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops; + unsigned segno = NULL_SEGNO; int i, cnt; bool reversed = false; /* need_SSR() already forces to do this */ - if (v_ops->get_victim(sbi, &(curseg)->next_segno, BG_GC, type, SSR)) + if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) { + curseg->next_segno = segno; return 1; + } /* For node segments, let's do SSR more intensively */ if (IS_NODESEG(type)) { @@ -1881,9 +1886,10 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) for (; cnt-- > 0; reversed ? i-- : i++) { if (i == type) continue; - if (v_ops->get_victim(sbi, &(curseg)->next_segno, - BG_GC, i, SSR)) + if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) { + curseg->next_segno = segno; return 1; + } } return 0; } diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 3cd780a42f51..93cc4e504aab 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -138,7 +138,10 @@ enum { */ enum { GC_CB = 0, - GC_GREEDY + GC_GREEDY, + ALLOC_NEXT, + FLUSH_DEVICE, + MAX_GC_POLICY, }; /* @@ -233,6 +236,8 @@ struct sit_info { unsigned long long mounted_time; /* mount time */ unsigned long long min_mtime; /* min. modification time */ unsigned long long max_mtime; /* max. modification time */ + + unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */ }; struct free_segmap_info { -- cgit v1.2.3 From d07efb507779171bd670e8a12f283a3e245cde08 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 18 Apr 2017 13:47:25 -0700 Subject: f2fs: fix _IOW usage This patch fixes wrong _IOW usage. Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 59a08e73f194..d80be319ddcf 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -295,9 +295,10 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal, #define F2FS_IOC_START_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 3) #define F2FS_IOC_RELEASE_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 4) #define F2FS_IOC_ABORT_VOLATILE_WRITE _IO(F2FS_IOCTL_MAGIC, 5) -#define F2FS_IOC_GARBAGE_COLLECT _IO(F2FS_IOCTL_MAGIC, 6) +#define F2FS_IOC_GARBAGE_COLLECT _IOW(F2FS_IOCTL_MAGIC, 6, __u32) #define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7) -#define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8) +#define F2FS_IOC_DEFRAGMENT _IOWR(F2FS_IOCTL_MAGIC, 8, \ + struct f2fs_defragment) #define F2FS_IOC_MOVE_RANGE _IOWR(F2FS_IOCTL_MAGIC, 9, \ struct f2fs_move_range) #define F2FS_IOC_FLUSH_DEVICE _IOW(F2FS_IOCTL_MAGIC, 10, \ -- cgit v1.2.3 From d579324998a39fa6e13edea2f06506840df9b729 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Tue, 18 Apr 2017 15:03:15 -0700 Subject: f2fs: assign allocation hint for warm/cold data This patch gives slower device region to warm/cold data area more eagerly. Signed-off-by: Jaegeuk Kim --- fs/f2fs/gc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 74a10b7675f5..84db41ca27c1 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1033,4 +1033,9 @@ void build_gc_manager(struct f2fs_sb_info *sbi) sbi->fggc_threshold = div64_u64((main_count - ovp_count) * BLKS_PER_SEC(sbi), (main_count - resv_count)); + + /* give warm/cold data area from slower device */ + if (sbi->s_ndevs && sbi->segs_per_sec == 1) + SIT_I(sbi)->last_victim[ALLOC_NEXT] = + GET_SEGNO(sbi, FDEV(0).end_blk) + 1; } -- cgit v1.2.3 From d66450e77350d47ac865a56fec4a62dfcc7756ae Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Apr 2017 19:38:33 +0200 Subject: f2fs: improve definition of statistic macros With a recent addition of f2fs_lookup_extent_tree(), we get a warning about the use of empty macros: fs/f2fs/extent_cache.c: In function 'f2fs_lookup_extent_tree': fs/f2fs/extent_cache.c:358:32: error: suggest braces around empty body in an 'else' statement [-Werror=empty-body] stat_inc_rbtree_node_hit(sbi); A good way to avoid the warning and make the code more robust is to define all no-op macros as 'do { } while (0)'. Fixes: 54c2258cd63a ("f2fs: extract rb-tree operation infrastructure") Signed-off-by: Arnd Bergmann Reivewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 58 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d80be319ddcf..b590c0a6cfa7 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2532,35 +2532,35 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi); int __init f2fs_create_root_stats(void); void f2fs_destroy_root_stats(void); #else -#define stat_inc_cp_count(si) -#define stat_inc_bg_cp_count(si) -#define stat_inc_call_count(si) -#define stat_inc_bggc_count(si) -#define stat_inc_dirty_inode(sbi, type) -#define stat_dec_dirty_inode(sbi, type) -#define stat_inc_total_hit(sb) -#define stat_inc_rbtree_node_hit(sb) -#define stat_inc_largest_node_hit(sbi) -#define stat_inc_cached_node_hit(sbi) -#define stat_inc_inline_xattr(inode) -#define stat_dec_inline_xattr(inode) -#define stat_inc_inline_inode(inode) -#define stat_dec_inline_inode(inode) -#define stat_inc_inline_dir(inode) -#define stat_dec_inline_dir(inode) -#define stat_inc_atomic_write(inode) -#define stat_dec_atomic_write(inode) -#define stat_update_max_atomic_write(inode) -#define stat_inc_volatile_write(inode) -#define stat_dec_volatile_write(inode) -#define stat_update_max_volatile_write(inode) -#define stat_inc_seg_type(sbi, curseg) -#define stat_inc_block_count(sbi, curseg) -#define stat_inc_inplace_blocks(sbi) -#define stat_inc_seg_count(sbi, type, gc_type) -#define stat_inc_tot_blk_count(si, blks) -#define stat_inc_data_blk_count(sbi, blks, gc_type) -#define stat_inc_node_blk_count(sbi, blks, gc_type) +#define stat_inc_cp_count(si) do { } while (0) +#define stat_inc_bg_cp_count(si) do { } while (0) +#define stat_inc_call_count(si) do { } while (0) +#define stat_inc_bggc_count(si) do { } while (0) +#define stat_inc_dirty_inode(sbi, type) do { } while (0) +#define stat_dec_dirty_inode(sbi, type) do { } while (0) +#define stat_inc_total_hit(sb) do { } while (0) +#define stat_inc_rbtree_node_hit(sb) do { } while (0) +#define stat_inc_largest_node_hit(sbi) do { } while (0) +#define stat_inc_cached_node_hit(sbi) do { } while (0) +#define stat_inc_inline_xattr(inode) do { } while (0) +#define stat_dec_inline_xattr(inode) do { } while (0) +#define stat_inc_inline_inode(inode) do { } while (0) +#define stat_dec_inline_inode(inode) do { } while (0) +#define stat_inc_inline_dir(inode) do { } while (0) +#define stat_dec_inline_dir(inode) do { } while (0) +#define stat_inc_atomic_write(inode) do { } while (0) +#define stat_dec_atomic_write(inode) do { } while (0) +#define stat_update_max_atomic_write(inode) do { } while (0) +#define stat_inc_volatile_write(inode) do { } while (0) +#define stat_dec_volatile_write(inode) do { } while (0) +#define stat_update_max_volatile_write(inode) do { } while (0) +#define stat_inc_seg_type(sbi, curseg) do { } while (0) +#define stat_inc_block_count(sbi, curseg) do { } while (0) +#define stat_inc_inplace_blocks(sbi) do { } while (0) +#define stat_inc_seg_count(sbi, type, gc_type) do { } while (0) +#define stat_inc_tot_blk_count(si, blks) do { } while (0) +#define stat_inc_data_blk_count(sbi, blks, gc_type) do { } while (0) +#define stat_inc_node_blk_count(sbi, blks, gc_type) do { } while (0) static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } -- cgit v1.2.3 From a788189305df9fa617e5e26dc0914d80d981cd57 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 20 Apr 2017 13:51:57 -0700 Subject: f2fs: fix out-of free segments This patch also reverts d0db7703ac1 ("f2fs: do SSR in higher priority"). This patch fixes out of free segments caused by many small file creation by 1) mkfs -s 1 2G 2) mount 3) untar - preoduce 60000 small files burstly 4) sync - flush node pages - flush imeta Here, when we do f2fs_balance_fs, we missed # of imeta blocks, resulting in skipping to check has_not_enough_free_secs. Another test is done by 1) mkfs -s 12 2G 2) mount 3) untar - preoduce 60000 small files burstly 4) sync - flush node pages - flush imeta In this case, this patch also fixes wrong block allocation under large section size. Reported-by: William Brana Cc: Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 3 ++- fs/f2fs/inode.c | 3 ++- fs/f2fs/segment.c | 26 +++++++++++++++++++++----- 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 7d46a8e6d350..b8dcd1e224e8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1472,7 +1472,8 @@ out: } unlock_page(page); - f2fs_balance_fs(sbi, need_balance_fs); + if (!S_ISDIR(inode->i_mode)) + f2fs_balance_fs(sbi, need_balance_fs); if (unlikely(f2fs_cp_error(sbi))) { f2fs_submit_merged_bio(sbi, DATA, WRITE); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index 0900814485c7..518f49643092 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -338,7 +338,8 @@ int f2fs_write_inode(struct inode *inode, struct writeback_control *wbc) * We need to balance fs here to prevent from producing dirty node pages * during the urgent cleaning time when runing out of free sections. */ - if (update_inode_page(inode) && wbc && wbc->nr_to_write) + update_inode_page(inode); + if (wbc && wbc->nr_to_write) f2fs_balance_fs(sbi, true); return 0; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 1b16770f9d2e..45da59b72035 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -388,11 +388,8 @@ void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need) } #endif - if (!need) - return; - /* balance_fs_bg is able to be pending */ - if (excess_cached_nats(sbi)) + if (need && excess_cached_nats(sbi)) f2fs_balance_fs_bg(sbi); /* @@ -1639,6 +1636,17 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi, f2fs_put_page(page, 1); } +static int is_next_segment_free(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + unsigned int segno = curseg->segno + 1; + struct free_segmap_info *free_i = FREE_I(sbi); + + if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec) + return !test_bit(segno, free_i->free_segmap); + return 0; +} + /* * Find a new segment from the free segments bitmap to right order * This function should be returned with success, otherwise BUG @@ -1752,6 +1760,10 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified) static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type) { + /* if segs_per_sec is large than 1, we need to keep original policy. */ + if (sbi->segs_per_sec != 1) + return CURSEG_I(sbi, type)->segno; + if (type == CURSEG_HOT_DATA || IS_NODESEG(type)) return 0; @@ -1901,17 +1913,21 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type) static void allocate_segment_by_default(struct f2fs_sb_info *sbi, int type, bool force) { + struct curseg_info *curseg = CURSEG_I(sbi, type); + if (force) new_curseg(sbi, type, true); else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) && type == CURSEG_WARM_NODE) new_curseg(sbi, type, false); + else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type)) + new_curseg(sbi, type, false); else if (need_SSR(sbi) && get_ssr_segment(sbi, type)) change_curseg(sbi, type, true); else new_curseg(sbi, type, false); - stat_inc_seg_type(sbi, CURSEG_I(sbi, type)); + stat_inc_seg_type(sbi, curseg); } void allocate_new_segments(struct f2fs_sb_info *sbi) -- cgit v1.2.3 From 4086d3f61b6573f65ddc13fc375c0c7b0ac482a0 Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Fri, 21 Apr 2017 12:41:48 +0000 Subject: f2fs: skip encrypted inode in ASYNC IPU policy Async request may be throttled in block layer, so page for async may keep WRITE_BACK for a long time. For encrytped inode, we need wait on page writeback no matter if the device supports BDI_CAP_STABLE_WRITES. This may result in a higher waiting page writeback time for async encrypted inode page. This patch skips IPU for encrypted inode's updating write. Signed-off-by: Hou Pengyang Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 93cc4e504aab..8ad22b8cbba7 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -593,7 +593,8 @@ static inline bool need_inplace_update(struct inode *inode, */ if (policy & (0x1 << F2FS_IPU_ASYNC) && fio && fio->op == REQ_OP_WRITE && - !(fio->op_flags & REQ_SYNC)) + !(fio->op_flags & REQ_SYNC) && + !f2fs_encrypted_inode(inode)) return true; /* this is only set during fdatasync */ -- cgit v1.2.3 From d3bb910c15d75ee3340311c64a1c05985bb663a3 Mon Sep 17 00:00:00 2001 From: Sheng Yong Date: Sat, 22 Apr 2017 10:39:20 +0800 Subject: f2fs: fix multiple f2fs_add_link() having same name for inline dentry Commit 88c5c13a5027 (f2fs: fix multiple f2fs_add_link() calls having same name) does not cover the scenario where inline dentry is enabled. In that case, F2FS_I(dir)->task will be NULL, and __f2fs_add_link will lookup dentries one more time. This patch fixes it by moving the assigment of current task to a upper level to cover both normal and inline dentry. Cc: Fixes: 88c5c13a5027 (f2fs: fix multiple f2fs_add_link() calls having same name) Signed-off-by: Sheng Yong Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/dir.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c index c143dffcae6e..b8c62e9669bc 100644 --- a/fs/f2fs/dir.c +++ b/fs/f2fs/dir.c @@ -207,13 +207,9 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir, f2fs_put_page(dentry_page, 0); } - /* This is to increase the speed of f2fs_create */ - if (!de && room) { - F2FS_I(dir)->task = current; - if (F2FS_I(dir)->chash != namehash) { - F2FS_I(dir)->chash = namehash; - F2FS_I(dir)->clevel = level; - } + if (!de && room && F2FS_I(dir)->chash != namehash) { + F2FS_I(dir)->chash = namehash; + F2FS_I(dir)->clevel = level; } return de; @@ -254,6 +250,9 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir, break; } out: + /* This is to increase the speed of f2fs_create */ + if (!de) + F2FS_I(dir)->task = current; return de; } -- cgit v1.2.3 From 66a82d1fc7e058b28189fe068f2a5efd9abc719a Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Sat, 22 Apr 2017 18:06:26 +0800 Subject: f2fs: seperate read nat page from nat_tree_lock This patch seperate nat page read io from nat_tree_lock. -lock_page -get_node_info() -current_nat_addr ...... -> write_checkpoint -get_meta_page Because we lock node page, we can make sure no other threads modify this nid concurrently. So we just obtain current_nat_addr under nat_tree_lock, node info is always same in both nat pack. Signed-off-by: Yunlei He Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index ad54e907b97b..b99e318bce58 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -376,6 +376,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) struct page *page = NULL; struct f2fs_nat_entry ne; struct nat_entry *e; + pgoff_t index; int i; ni->nid = nid; @@ -401,17 +402,21 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni) node_info_from_raw_nat(ni, &ne); } up_read(&curseg->journal_rwsem); - if (i >= 0) + if (i >= 0) { + up_read(&nm_i->nat_tree_lock); goto cache; + } /* Fill node_info from nat page */ - page = get_current_nat_page(sbi, start_nid); + index = current_nat_addr(sbi, nid); + up_read(&nm_i->nat_tree_lock); + + page = get_meta_page(sbi, index); nat_blk = (struct f2fs_nat_block *)page_address(page); ne = nat_blk->entries[nid - start_nid]; node_info_from_raw_nat(ni, &ne); f2fs_put_page(page, 1); cache: - up_read(&nm_i->nat_tree_lock); /* cache nat entry */ down_write(&nm_i->nat_tree_lock); cache_nat_entry(sbi, nid, &ne); -- cgit v1.2.3 From 34e159da418be46986456daf21a339932f349b63 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Apr 2017 00:21:34 +0800 Subject: f2fs: delay awaking discard thread It's better to delay awaking discard thread while queuing discard commands in checkpoint, it will help to give more chances for merging big and small discard. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 45da59b72035..b27ae5f6a87f 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -968,7 +968,6 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, blkstart -= FDEV(devi).start_blk; } __update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen); - wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); return 0; } @@ -1335,6 +1334,8 @@ skip: SM_I(sbi)->dcc_info->nr_discards -= total_len; kmem_cache_free(discard_entry_slab, entry); } + + wake_up(&SM_I(sbi)->dcc_info->discard_wait_queue); } static int create_discard_cmd_control(struct f2fs_sb_info *sbi) -- cgit v1.2.3 From d618ebaf0aa83d175658aea5291e0c459d471d39 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Apr 2017 00:21:35 +0800 Subject: f2fs: enable small discard by default This patch start to enable 4K granularity small discard by default when realtime discard is on, so, in seriously fragmented space, small size discard can be issued in time to avoid useless storage space occupying of invalid filesystem's data, then performance of flash storage can be recovered. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 4 ++-- fs/f2fs/segment.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index b590c0a6cfa7..9228a36ded41 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -230,8 +230,8 @@ struct discard_cmd_control { struct list_head wait_list; /* store on-flushing entries */ wait_queue_head_t discard_wait_queue; /* waiting queue for wake-up */ struct mutex cmd_lock; - int nr_discards; /* # of discards in the list */ - int max_discards; /* max. discards to be issued */ + unsigned int nr_discards; /* # of discards in the list */ + unsigned int max_discards; /* max. discards to be issued */ unsigned int undiscard_blks; /* # of undiscard blocks */ atomic_t issued_discard; /* # of issued discard */ atomic_t issing_discard; /* # of issing discard */ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b27ae5f6a87f..7ded7da5b7cb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1362,7 +1362,7 @@ static int create_discard_cmd_control(struct f2fs_sb_info *sbi) atomic_set(&dcc->issing_discard, 0); atomic_set(&dcc->discard_cmd_cnt, 0); dcc->nr_discards = 0; - dcc->max_discards = 0; + dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg; dcc->undiscard_blks = 0; dcc->root = RB_ROOT; -- cgit v1.2.3 From bd5b07383a9057f9b6aefad6c784af95ffde8a67 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Apr 2017 20:21:37 +0800 Subject: f2fs: introduce __issue_discard_cmd Just cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 63 ++++++++++++++++++++++++++----------------------------- 1 file changed, 30 insertions(+), 33 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 7ded7da5b7cb..13e073a9d859 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -971,6 +971,32 @@ static int __queue_discard_cmd(struct f2fs_sb_info *sbi, return 0; } +static void __issue_discard_cmd(struct f2fs_sb_info *sbi, bool issue_cond) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *pend_list; + struct discard_cmd *dc, *tmp; + struct blk_plug plug; + int i, iter = 0; + + mutex_lock(&dcc->cmd_lock); + blk_start_plug(&plug); + for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { + pend_list = &dcc->pend_list[i]; + list_for_each_entry_safe(dc, tmp, pend_list, list) { + f2fs_bug_on(sbi, dc->state != D_PREP); + + if (!issue_cond || is_idle(sbi)) + __submit_discard_cmd(sbi, dc); + if (issue_cond && iter++ > DISCARD_ISSUE_RATE) + goto out; + } + } +out: + blk_finish_plug(&plug); + mutex_unlock(&dcc->cmd_lock); +} + /* This should be covered by global mutex, &sit_i->sentry_lock */ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { @@ -993,27 +1019,16 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *pend_list; struct list_head *wait_list = &(dcc->wait_list); struct discard_cmd *dc, *tmp; - struct blk_plug plug; - int i; - mutex_lock(&dcc->cmd_lock); - - blk_start_plug(&plug); - for (i = 0; i < MAX_PLIST_NUM; i++) { - pend_list = &dcc->pend_list[i]; - list_for_each_entry_safe(dc, tmp, pend_list, list) - __submit_discard_cmd(sbi, dc); - } - blk_finish_plug(&plug); + __issue_discard_cmd(sbi, false); + mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); } - mutex_unlock(&dcc->cmd_lock); } @@ -1022,32 +1037,15 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; - struct list_head *pend_list; struct list_head *wait_list = &dcc->wait_list; struct discard_cmd *dc, *tmp; - struct blk_plug plug; - int iter = 0, i; repeat: if (kthread_should_stop()) return 0; - mutex_lock(&dcc->cmd_lock); - blk_start_plug(&plug); - for (i = MAX_PLIST_NUM - 1; i >= 0; i--) { - pend_list = &dcc->pend_list[i]; - list_for_each_entry_safe(dc, tmp, pend_list, list) { - f2fs_bug_on(sbi, dc->state != D_PREP); - - if (is_idle(sbi)) - __submit_discard_cmd(sbi, dc); - - if (iter++ > DISCARD_ISSUE_RATE) - goto next_step; - } - } -next_step: - blk_finish_plug(&plug); + __issue_discard_cmd(sbi, true); + mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { if (dc->state == D_DONE) { wait_for_completion_io(&dc->wait); @@ -1056,7 +1054,6 @@ next_step: } mutex_unlock(&dcc->cmd_lock); - iter = 0; congestion_wait(BLK_RW_SYNC, HZ/50); wait_event_interruptible(*q, kthread_should_stop() || -- cgit v1.2.3 From 63a94fa1d79dfbb94a2c923f8810e3b45b27570e Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Tue, 25 Apr 2017 20:21:38 +0800 Subject: f2fs: introduce __wait_discard_cmd Just cleanup, no logic change. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 13e073a9d859..35d5d49e7614 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -997,6 +997,22 @@ out: mutex_unlock(&dcc->cmd_lock); } +static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) +{ + struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; + struct list_head *wait_list = &(dcc->wait_list); + struct discard_cmd *dc, *tmp; + + mutex_lock(&dcc->cmd_lock); + list_for_each_entry_safe(dc, tmp, wait_list, list) { + if (!wait_cond || dc->state == D_DONE) { + wait_for_completion_io(&dc->wait); + __remove_discard_cmd(sbi, dc); + } + } + mutex_unlock(&dcc->cmd_lock); +} + /* This should be covered by global mutex, &sit_i->sentry_lock */ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { @@ -1018,18 +1034,8 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) /* This comes from f2fs_put_super */ void f2fs_wait_discard_bios(struct f2fs_sb_info *sbi) { - struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; - struct list_head *wait_list = &(dcc->wait_list); - struct discard_cmd *dc, *tmp; - __issue_discard_cmd(sbi, false); - - mutex_lock(&dcc->cmd_lock); - list_for_each_entry_safe(dc, tmp, wait_list, list) { - wait_for_completion_io(&dc->wait); - __remove_discard_cmd(sbi, dc); - } - mutex_unlock(&dcc->cmd_lock); + __wait_discard_cmd(sbi, false); } static int issue_discard_thread(void *data) @@ -1037,22 +1043,12 @@ static int issue_discard_thread(void *data) struct f2fs_sb_info *sbi = data; struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; wait_queue_head_t *q = &dcc->discard_wait_queue; - struct list_head *wait_list = &dcc->wait_list; - struct discard_cmd *dc, *tmp; repeat: if (kthread_should_stop()) return 0; __issue_discard_cmd(sbi, true); - - mutex_lock(&dcc->cmd_lock); - list_for_each_entry_safe(dc, tmp, wait_list, list) { - if (dc->state == D_DONE) { - wait_for_completion_io(&dc->wait); - __remove_discard_cmd(sbi, dc); - } - } - mutex_unlock(&dcc->cmd_lock); + __wait_discard_cmd(sbi, true); congestion_wait(BLK_RW_SYNC, HZ/50); -- cgit v1.2.3 From 7eab0c0df8d1a8c460f7d660d3ffd06fd448e590 Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Tue, 25 Apr 2017 12:45:12 +0000 Subject: f2fs: reconstruct code to write a data page This patch introduces encrypt_one_page which encrypts one data page before submit_bio, and change the use of need_inplace_update. Signed-off-by: Hou Pengyang Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 81 ++++++++++++++++++++++++++++++++++--------------------- fs/f2fs/file.c | 4 +-- fs/f2fs/segment.h | 6 +---- 3 files changed, 54 insertions(+), 37 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b8dcd1e224e8..dfc974c95dd2 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1302,6 +1302,49 @@ static int f2fs_read_data_pages(struct file *file, return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages); } +static int encrypt_one_page(struct f2fs_io_info *fio) +{ + struct inode *inode = fio->page->mapping->host; + gfp_t gfp_flags = GFP_NOFS; + + if (!f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode)) + return 0; + + /* wait for GCed encrypted page writeback */ + f2fs_wait_on_encrypted_page_writeback(fio->sbi, fio->old_blkaddr); + +retry_encrypt: + fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, + PAGE_SIZE, 0, fio->page->index, gfp_flags); + if (!IS_ERR(fio->encrypted_page)) + return 0; + + /* flush pending IOs and wait for a while in the ENOMEM case */ + if (PTR_ERR(fio->encrypted_page) == -ENOMEM) { + f2fs_flush_merged_bios(fio->sbi); + congestion_wait(BLK_RW_ASYNC, HZ/50); + gfp_flags |= __GFP_NOFAIL; + goto retry_encrypt; + } + return PTR_ERR(fio->encrypted_page); +} + +static inline bool need_inplace_update(struct f2fs_io_info *fio) +{ + struct inode *inode = fio->page->mapping->host; + + if (fio->old_blkaddr == NEW_ADDR) + return false; + if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) + return false; + if (is_cold_data(fio->page)) + return false; + if (IS_ATOMIC_WRITTEN_PAGE(fio->page)) + return false; + + return need_inplace_update_policy(inode, fio); +} + int do_write_data_page(struct f2fs_io_info *fio) { struct page *page = fio->page; @@ -1322,30 +1365,9 @@ int do_write_data_page(struct f2fs_io_info *fio) goto out_writepage; } - if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) { - gfp_t gfp_flags = GFP_NOFS; - - /* wait for GCed encrypted page writeback */ - f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode), - fio->old_blkaddr); -retry_encrypt: - fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page, - PAGE_SIZE, 0, - fio->page->index, - gfp_flags); - if (IS_ERR(fio->encrypted_page)) { - err = PTR_ERR(fio->encrypted_page); - if (err == -ENOMEM) { - /* flush pending ios and wait for a while */ - f2fs_flush_merged_bios(F2FS_I_SB(inode)); - congestion_wait(BLK_RW_ASYNC, HZ/50); - gfp_flags |= __GFP_NOFAIL; - err = 0; - goto retry_encrypt; - } - goto out_writepage; - } - } + err = encrypt_one_page(fio); + if (err) + goto out_writepage; set_page_writeback(page); @@ -1353,15 +1375,14 @@ retry_encrypt: * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (unlikely(fio->old_blkaddr != NEW_ADDR && - !is_cold_data(page) && - !IS_ATOMIC_WRITTEN_PAGE(page) && - need_inplace_update(inode, fio))) { - f2fs_unlock_op(F2FS_I_SB(inode)); + if (need_inplace_update(fio)) { + f2fs_bug_on(fio->sbi, !fio->cp_rwsem_locked); + f2fs_unlock_op(fio->sbi); fio->cp_rwsem_locked = false; + err = rewrite_data_page(fio); + trace_f2fs_do_write_data_page(fio->page, IPU); set_inode_flag(inode, FI_UPDATE_WRITE); - trace_f2fs_do_write_data_page(page, IPU); } else { write_data_page(&dn, fio); trace_f2fs_do_write_data_page(page, OPU); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 32050f4c3592..cdbf1add2a11 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1898,7 +1898,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi, int err; /* if in-place-update policy is enabled, don't waste time here */ - if (need_inplace_update(inode, NULL)) + if (need_inplace_update_policy(inode, NULL)) return -EINVAL; pg_start = range->start >> PAGE_SHIFT; @@ -2033,7 +2033,7 @@ static int f2fs_ioc_defragment(struct file *filp, unsigned long arg) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!S_ISREG(inode->i_mode)) + if (!S_ISREG(inode->i_mode) || f2fs_is_atomic_file(inode)) return -EINVAL; if (f2fs_readonly(sbi->sb)) diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h index 8ad22b8cbba7..10bf05d4cff4 100644 --- a/fs/f2fs/segment.h +++ b/fs/f2fs/segment.h @@ -564,16 +564,12 @@ enum { F2FS_IPU_ASYNC, }; -static inline bool need_inplace_update(struct inode *inode, +static inline bool need_inplace_update_policy(struct inode *inode, struct f2fs_io_info *fio) { struct f2fs_sb_info *sbi = F2FS_I_SB(inode); unsigned int policy = SM_I(sbi)->ipu_policy; - /* IPU can be done only for the user data */ - if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) - return false; - if (test_opt(sbi, LFS)) return false; -- cgit v1.2.3 From e959c8f543e11dadf7f6923427fb3acb452a0de6 Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Tue, 25 Apr 2017 12:45:13 +0000 Subject: f2fs: lookup extent cache first under IPU scenario If a page is cold, NOT atomit written and need_ipu now, there is a high probability that IPU should be adapted. For IPU, we try to check extent tree to get the block index first, instead of reading the dnode page, where may lead to an useless dnode IO, since no need to update the dnode index for IPU. Signed-off-by: Hou Pengyang Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 16 ++++++++++++++-- fs/f2fs/gc.c | 1 + fs/f2fs/segment.c | 1 + 3 files changed, 16 insertions(+), 2 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index dfc974c95dd2..ca21ecbd6bbd 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1350,9 +1350,20 @@ int do_write_data_page(struct f2fs_io_info *fio) struct page *page = fio->page; struct inode *inode = page->mapping->host; struct dnode_of_data dn; + struct extent_info ei = {0,0,0}; + bool ipu_force = false; int err = 0; set_new_dnode(&dn, inode, NULL, NULL, 0); + if (need_inplace_update(fio) && + f2fs_lookup_extent_cache(inode, page->index, &ei)) { + fio->old_blkaddr = ei.blk + page->index - ei.fofs; + if (fio->old_blkaddr != NULL_ADDR && + fio->old_blkaddr != NEW_ADDR) { + ipu_force = true; + goto got_it; + } + } err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); if (err) return err; @@ -1364,7 +1375,7 @@ int do_write_data_page(struct f2fs_io_info *fio) ClearPageUptodate(page); goto out_writepage; } - +got_it: err = encrypt_one_page(fio); if (err) goto out_writepage; @@ -1375,7 +1386,7 @@ int do_write_data_page(struct f2fs_io_info *fio) * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (need_inplace_update(fio)) { + if (ipu_force || need_inplace_update(fio)) { f2fs_bug_on(fio->sbi, !fio->cp_rwsem_locked); f2fs_unlock_op(fio->sbi); fio->cp_rwsem_locked = false; @@ -1412,6 +1423,7 @@ static int __write_data_page(struct page *page, bool *submitted, .type = DATA, .op = REQ_OP_WRITE, .op_flags = wbc_to_write_flags(wbc), + .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, .submitted = false, diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 84db41ca27c1..c2a9ae8397d3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -714,6 +714,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC, + .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 35d5d49e7614..38b97ad6bceb 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -311,6 +311,7 @@ static int __commit_inmem_pages(struct inode *inode, } fio.page = page; + fio.old_blkaddr = NULL_ADDR; err = do_write_data_page(&fio); if (err) { unlock_page(page); -- cgit v1.2.3 From a817737e87d506ea7b3983d287b4578c99922d85 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 24 Apr 2017 15:20:16 -0700 Subject: f2fs: introduce valid_ipu_blkaddr to clean up This patch introduces valid_ipu_blkaddr to clean up checking block address for inplace-update. Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index ca21ecbd6bbd..2f2de18d24fe 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1333,8 +1333,6 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) { struct inode *inode = fio->page->mapping->host; - if (fio->old_blkaddr == NEW_ADDR) - return false; if (S_ISDIR(inode->i_mode) || f2fs_is_atomic_file(inode)) return false; if (is_cold_data(fio->page)) @@ -1345,6 +1343,15 @@ static inline bool need_inplace_update(struct f2fs_io_info *fio) return need_inplace_update_policy(inode, fio); } +static inline bool valid_ipu_blkaddr(struct f2fs_io_info *fio) +{ + if (fio->old_blkaddr == NEW_ADDR) + return false; + if (fio->old_blkaddr == NULL_ADDR) + return false; + return true; +} + int do_write_data_page(struct f2fs_io_info *fio) { struct page *page = fio->page; @@ -1358,8 +1365,8 @@ int do_write_data_page(struct f2fs_io_info *fio) if (need_inplace_update(fio) && f2fs_lookup_extent_cache(inode, page->index, &ei)) { fio->old_blkaddr = ei.blk + page->index - ei.fofs; - if (fio->old_blkaddr != NULL_ADDR && - fio->old_blkaddr != NEW_ADDR) { + + if (valid_ipu_blkaddr(fio)) { ipu_force = true; goto got_it; } @@ -1386,7 +1393,7 @@ got_it: * If current allocation needs SSR, * it had better in-place writes for updated data. */ - if (ipu_force || need_inplace_update(fio)) { + if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) { f2fs_bug_on(fio->sbi, !fio->cp_rwsem_locked); f2fs_unlock_op(fio->sbi); fio->cp_rwsem_locked = false; -- cgit v1.2.3 From b9dd46188edc2f0d1f37328637860bb65a771124 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Tue, 25 Apr 2017 16:28:48 -0700 Subject: f2fs: sanity check segment count F2FS uses 4 bytes to represent block address. As a result, supported size of disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments. Signed-off-by: Jin Qian Signed-off-by: Jaegeuk Kim --- fs/f2fs/super.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 97c07a5153e9..4cd3bee6775f 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1494,6 +1494,13 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi, return 1; } + if (le32_to_cpu(raw_super->segment_count) > F2FS_MAX_SEGMENT) { + f2fs_msg(sb, KERN_INFO, + "Invalid segment count (%u)", + le32_to_cpu(raw_super->segment_count)); + return 1; + } + /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ if (sanity_check_area_boundary(sbi, bh)) return 1; -- cgit v1.2.3 From 4d9780781304af0a57b6e369ce353f7bd2fb5e52 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 26 Apr 2017 11:11:12 -0700 Subject: f2fs: nullify fio->encrypted_page for each writes This makes sure each write request has nullified encrypted_page pointer. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 38b97ad6bceb..b0babb73a076 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -291,7 +291,6 @@ static int __commit_inmem_pages(struct inode *inode, .type = DATA, .op = REQ_OP_WRITE, .op_flags = REQ_SYNC | REQ_PRIO, - .encrypted_page = NULL, }; pgoff_t last_idx = ULONG_MAX; int err = 0; @@ -312,6 +311,7 @@ static int __commit_inmem_pages(struct inode *inode, fio.page = page; fio.old_blkaddr = NULL_ADDR; + fio.encrypted_page = NULL; err = do_write_data_page(&fio); if (err) { unlock_page(page); -- cgit v1.2.3 From ec9895add2c519681c45a8599262d1d9b4986349 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 26 Apr 2017 17:39:54 +0800 Subject: f2fs: don't hold cmd_lock during waiting discard command Previously, with protection of cmd_lock, we will wait for end io of discard command which potentially may lead long latency, making worse concurrency. So, in this patch, we try to add reference into discard entry to prevent the entry being released by other thread, then we can avoid holding global cmd_lock during waiting discard to finish. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 25 ++++++++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 9228a36ded41..5759671fc948 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -219,6 +219,7 @@ struct discard_cmd { struct list_head list; /* command list */ struct completion wait; /* compleation */ struct block_device *bdev; /* bdev */ + unsigned short ref; /* reference count */ int state; /* state */ int error; /* bio error */ }; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index b0babb73a076..656e1515ff56 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -688,6 +688,7 @@ static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi, dc->lstart = lstart; dc->start = start; dc->len = len; + dc->ref = 0; dc->state = D_PREP; dc->error = 0; init_completion(&dc->wait); @@ -1007,6 +1008,8 @@ static void __wait_discard_cmd(struct f2fs_sb_info *sbi, bool wait_cond) mutex_lock(&dcc->cmd_lock); list_for_each_entry_safe(dc, tmp, wait_list, list) { if (!wait_cond || dc->state == D_DONE) { + if (dc->ref) + continue; wait_for_completion_io(&dc->wait); __remove_discard_cmd(sbi, dc); } @@ -1019,17 +1022,29 @@ void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr) { struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info; struct discard_cmd *dc; + bool need_wait = false; mutex_lock(&dcc->cmd_lock); - dc = (struct discard_cmd *)__lookup_rb_tree(&dcc->root, NULL, blkaddr); if (dc) { - if (dc->state != D_PREP) - wait_for_completion_io(&dc->wait); - __punch_discard_cmd(sbi, dc, blkaddr); + if (dc->state == D_PREP) { + __punch_discard_cmd(sbi, dc, blkaddr); + } else { + dc->ref++; + need_wait = true; + } } - mutex_unlock(&dcc->cmd_lock); + + if (need_wait) { + wait_for_completion_io(&dc->wait); + mutex_lock(&dcc->cmd_lock); + f2fs_bug_on(sbi, dc->state != D_DONE); + dc->ref--; + if (!dc->ref) + __remove_discard_cmd(sbi, dc); + mutex_unlock(&dcc->cmd_lock); + } } /* This comes from f2fs_put_super */ -- cgit v1.2.3 From 9a744b92da6eacea33dec1e9280fd324736bfe81 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 26 Apr 2017 17:39:55 +0800 Subject: f2fs: shrink size of struct discard_cmd In order to shrink size of struct discard_cmd, change variable type of @state in struct discard_cmd from int to unsigned char. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 5759671fc948..6f671c719570 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -220,7 +220,7 @@ struct discard_cmd { struct completion wait; /* compleation */ struct block_device *bdev; /* bdev */ unsigned short ref; /* reference count */ - int state; /* state */ + unsigned char state; /* state */ int error; /* bio error */ }; -- cgit v1.2.3 From 279d6df20c94079d35e012f1602d40c42632e8f3 Mon Sep 17 00:00:00 2001 From: Hou Pengyang Date: Thu, 27 Apr 2017 00:17:21 +0800 Subject: f2fs: release cp and dnode lock before IPU We don't need to rewrite the page under cp_rwsem and dnode locks. Signed-off-by: Hou Pengyang Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 39 ++++++++++++++++++++++++--------------- fs/f2fs/f2fs.h | 2 +- fs/f2fs/gc.c | 1 + fs/f2fs/segment.c | 1 + 4 files changed, 27 insertions(+), 16 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 2f2de18d24fe..1254986dd6eb 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1368,12 +1368,17 @@ int do_write_data_page(struct f2fs_io_info *fio) if (valid_ipu_blkaddr(fio)) { ipu_force = true; + fio->need_lock = false; goto got_it; } } + + if (fio->need_lock) + f2fs_lock_op(fio->sbi); + err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE); if (err) - return err; + goto out; fio->old_blkaddr = dn.data_blkaddr; @@ -1394,22 +1399,26 @@ got_it: * it had better in-place writes for updated data. */ if (ipu_force || (valid_ipu_blkaddr(fio) && need_inplace_update(fio))) { - f2fs_bug_on(fio->sbi, !fio->cp_rwsem_locked); - f2fs_unlock_op(fio->sbi); - fio->cp_rwsem_locked = false; - + f2fs_put_dnode(&dn); + if (fio->need_lock) + f2fs_unlock_op(fio->sbi); err = rewrite_data_page(fio); trace_f2fs_do_write_data_page(fio->page, IPU); set_inode_flag(inode, FI_UPDATE_WRITE); - } else { - write_data_page(&dn, fio); - trace_f2fs_do_write_data_page(page, OPU); - set_inode_flag(inode, FI_APPEND_WRITE); - if (page->index == 0) - set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); + return err; } + + /* LFS mode write path */ + write_data_page(&dn, fio); + trace_f2fs_do_write_data_page(page, OPU); + set_inode_flag(inode, FI_APPEND_WRITE); + if (page->index == 0) + set_inode_flag(inode, FI_FIRST_BLOCK_WRITTEN); out_writepage: f2fs_put_dnode(&dn); +out: + if (fio->need_lock) + f2fs_unlock_op(fio->sbi); return err; } @@ -1434,7 +1443,7 @@ static int __write_data_page(struct page *page, bool *submitted, .page = page, .encrypted_page = NULL, .submitted = false, - .cp_rwsem_locked = true, + .need_lock = true, }; trace_f2fs_writepage(page, DATA); @@ -1470,6 +1479,7 @@ write: /* Dentry blocks are controlled by checkpoint */ if (S_ISDIR(inode->i_mode)) { + fio.need_lock = false; err = do_write_data_page(&fio); goto done; } @@ -1487,13 +1497,12 @@ write: if (!err) goto out; } - f2fs_lock_op(sbi); + if (err == -EAGAIN) err = do_write_data_page(&fio); if (F2FS_I(inode)->last_disk_size < psize) F2FS_I(inode)->last_disk_size = psize; - if (fio.cp_rwsem_locked) - f2fs_unlock_op(sbi); + done: if (err && err != -ENOENT) goto redirty_out; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 6f671c719570..713f072e5fbc 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -803,7 +803,7 @@ struct f2fs_io_info { struct page *page; /* page to be written */ struct page *encrypted_page; /* encrypted page */ bool submitted; /* indicate IO submission */ - bool cp_rwsem_locked; /* indicate cp_rwsem is held */ + bool need_lock; /* indicate we need to lock cp_rwsem */ }; #define is_read_io(rw) ((rw) == READ) diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index c2a9ae8397d3..026522107ca3 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -717,6 +717,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type, .old_blkaddr = NULL_ADDR, .page = page, .encrypted_page = NULL, + .need_lock = true, }; bool is_dirty = PageDirty(page); int err; diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 656e1515ff56..e302f30ec7fe 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -312,6 +312,7 @@ static int __commit_inmem_pages(struct inode *inode, fio.page = page; fio.old_blkaddr = NULL_ADDR; fio.encrypted_page = NULL; + fio.need_lock = false, err = do_write_data_page(&fio); if (err) { unlock_page(page); -- cgit v1.2.3 From c473f1a9658b6c23d576136d5a49b1c731ef1767 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Thu, 27 Apr 2017 20:40:39 +0800 Subject: f2fs: allow cpc->reason to indicate more than one reason Change to use different bits of cpc->reason to indicate different status, so cpc->reason can indicate more than one reason. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 14 +++++++------- fs/f2fs/f2fs.h | 16 +++++++--------- fs/f2fs/segment.c | 8 ++++---- 3 files changed, 18 insertions(+), 20 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 800be94f8cb3..27578903eeb6 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1054,17 +1054,17 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) spin_lock(&sbi->cp_lock); - if (cpc->reason == CP_UMOUNT && + if ((cpc->reason & CP_UMOUNT) && le32_to_cpu(ckpt->cp_pack_total_block_count) > sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) disable_nat_bits(sbi, false); - if (cpc->reason == CP_UMOUNT) + if (cpc->reason & CP_UMOUNT) __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); else __clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG); - if (cpc->reason == CP_FASTBOOT) + if (cpc->reason & CP_FASTBOOT) __set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); else __clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG); @@ -1272,8 +1272,8 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) mutex_lock(&sbi->cp_mutex); if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) && - (cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC || - (cpc->reason == CP_DISCARD && !sbi->discard_blks))) + ((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) || + ((cpc->reason & CP_DISCARD) && !sbi->discard_blks))) goto out; if (unlikely(f2fs_cp_error(sbi))) { err = -EIO; @@ -1295,7 +1295,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_flush_merged_bios(sbi); /* this is the case of multiple fstrims without any changes */ - if (cpc->reason == CP_DISCARD) { + if (cpc->reason & CP_DISCARD) { if (!exist_trim_candidates(sbi, cpc)) { unblock_operations(sbi); goto out; @@ -1333,7 +1333,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc) unblock_operations(sbi); stat_inc_cp_count(sbi->stat_info); - if (cpc->reason == CP_RECOVERY) + if (cpc->reason & CP_RECOVERY) f2fs_msg(sbi->sb, KERN_NOTICE, "checkpoint: version = %llx", ckpt_ver); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 713f072e5fbc..37360b9ad263 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -125,13 +125,11 @@ enum { SIT_BITMAP }; -enum { - CP_UMOUNT, - CP_FASTBOOT, - CP_SYNC, - CP_RECOVERY, - CP_DISCARD, -}; +#define CP_UMOUNT 0x00000001 +#define CP_FASTBOOT 0x00000002 +#define CP_SYNC 0x00000004 +#define CP_RECOVERY 0x00000008 +#define CP_DISCARD 0x00000010 #define DEF_BATCHED_TRIM_SECTIONS 2048 #define BATCHED_TRIM_SEGMENTS(sbi) \ @@ -1265,7 +1263,7 @@ static inline bool enabled_nat_bits(struct f2fs_sb_info *sbi, { bool set = is_set_ckpt_flags(sbi, CP_NAT_BITS_FLAG); - return (cpc) ? (cpc->reason == CP_UMOUNT) && set : set; + return (cpc) ? (cpc->reason & CP_UMOUNT) && set : set; } static inline void f2fs_lock_op(struct f2fs_sb_info *sbi) @@ -1301,7 +1299,7 @@ static inline int __get_cp_reason(struct f2fs_sb_info *sbi) static inline bool __remain_node_summaries(int reason) { - return (reason == CP_UMOUNT || reason == CP_FASTBOOT); + return (reason & (CP_UMOUNT | CP_FASTBOOT)); } static inline bool __exist_node_summaries(struct f2fs_sb_info *sbi) diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index e302f30ec7fe..69b99a8f9a01 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -1183,7 +1183,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc, unsigned long *discard_map = (unsigned long *)se->discard_map; unsigned long *dmap = SIT_I(sbi)->tmp_map; unsigned int start = 0, end = -1; - bool force = (cpc->reason == CP_DISCARD); + bool force = (cpc->reason & CP_DISCARD); struct discard_entry *de = NULL; struct list_head *head = &SM_I(sbi)->dcc_info->entry_list; int i; @@ -1266,7 +1266,7 @@ void clear_prefree_segments(struct f2fs_sb_info *sbi, struct cp_control *cpc) unsigned long *prefree_map = dirty_i->dirty_segmap[PRE]; unsigned int start = 0, end = -1; unsigned int secno, start_segno; - bool force = (cpc->reason == CP_DISCARD); + bool force = (cpc->reason & CP_DISCARD); mutex_lock(&dirty_i->seglist_lock); @@ -2770,7 +2770,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) se = get_seg_entry(sbi, segno); /* add discard candidates */ - if (cpc->reason != CP_DISCARD) { + if (!(cpc->reason & CP_DISCARD)) { cpc->trim_start = segno; add_discard_addrs(sbi, cpc, false); } @@ -2806,7 +2806,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc) f2fs_bug_on(sbi, !list_empty(head)); f2fs_bug_on(sbi, sit_i->dirty_sentries); out: - if (cpc->reason == CP_DISCARD) { + if (cpc->reason & CP_DISCARD) { __u64 trim_start = cpc->trim_start; for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) -- cgit v1.2.3 From 1f43e2ad7bff54f7c82a084a57e5c90da0d3f4d9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 28 Apr 2017 13:56:08 +0800 Subject: f2fs: introduce CP_TRIMMED_FLAG to avoid unneeded discard Introduce CP_TRIMMED_FLAG to indicate all invalid block were trimmed before umount, so once we do mount with image which contain the flag, we don't record invalid blocks as undiscard one, when fstrim is being triggered, we can avoid issuing redundant discard commands. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/checkpoint.c | 3 +++ fs/f2fs/f2fs.h | 1 + fs/f2fs/segment.c | 28 ++++++++++++++++++++-------- fs/f2fs/super.c | 7 +++++++ 4 files changed, 31 insertions(+), 8 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index 27578903eeb6..ea9c317b5916 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -1059,6 +1059,9 @@ static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc) sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks) disable_nat_bits(sbi, false); + if (cpc->reason & CP_TRIMMED) + __set_ckpt_flags(ckpt, CP_TRIMMED_FLAG); + if (cpc->reason & CP_UMOUNT) __set_ckpt_flags(ckpt, CP_UMOUNT_FLAG); else diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 37360b9ad263..f0481fb52142 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -130,6 +130,7 @@ enum { #define CP_SYNC 0x00000004 #define CP_RECOVERY 0x00000008 #define CP_DISCARD 0x00000010 +#define CP_TRIMMED 0x00000020 #define DEF_BATCHED_TRIM_SECTIONS 2048 #define BATCHED_TRIM_SEGMENTS(sbi) \ diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 69b99a8f9a01..a32268eeb472 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3005,10 +3005,17 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) /* build discard map only one time */ if (f2fs_discard_en(sbi)) { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += sbi->blocks_per_seg - - se->valid_blocks; + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, + se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += + sbi->blocks_per_seg - + se->valid_blocks; + } } if (sbi->segs_per_sec > 1) @@ -3032,10 +3039,15 @@ static void build_sit_entries(struct f2fs_sb_info *sbi) seg_info_from_raw_sit(se, &sit); if (f2fs_discard_en(sbi)) { - memcpy(se->discard_map, se->cur_valid_map, - SIT_VBLOCK_MAP_SIZE); - sbi->discard_blks += old_valid_blocks - - se->valid_blocks; + if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) { + memset(se->discard_map, 0xff, + SIT_VBLOCK_MAP_SIZE); + } else { + memcpy(se->discard_map, se->cur_valid_map, + SIT_VBLOCK_MAP_SIZE); + sbi->discard_blks += old_valid_blocks - + se->valid_blocks; + } } if (sbi->segs_per_sec > 1) diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 4cd3bee6775f..9a14b2590337 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -797,6 +797,13 @@ static void f2fs_put_super(struct super_block *sb) /* be sure to wait for any on-going discard commands */ f2fs_wait_discard_bios(sbi); + if (!sbi->discard_blks) { + struct cp_control cpc = { + .reason = CP_UMOUNT | CP_TRIMMED, + }; + write_checkpoint(sbi, &cpc); + } + /* write_checkpoint can update stat informaion */ f2fs_destroy_stats(sbi); -- cgit v1.2.3 From 1c0f4bf5c3df13da4090a602c377f7d7a3708ce8 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 May 2017 18:09:44 -0700 Subject: f2fs: flush dirty nats periodically This patch flushes dirty nats in order to acquire available nids by writing checkpoint. Otherwise, we can have no chance to get freed nids. Signed-off-by: Jaegeuk Kim --- fs/f2fs/segment.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index a32268eeb472..6e1c8cf75749 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -419,7 +419,7 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi) else build_free_nids(sbi, false, false); - if (!is_idle(sbi)) + if (!is_idle(sbi) && !excess_dirty_nats(sbi)) return; /* checkpoint is the only way to shrink partial cached entries */ -- cgit v1.2.3 From 5b0ef73c9d19517467e0cb36158164bf097fe148 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 1 May 2017 18:13:03 -0700 Subject: f2fs: show available_nids in f2fs/status This patch adds an entry in f2fs/status to show # of available nids. Signed-off-by: Jaegeuk Kim --- fs/f2fs/debug.c | 5 +++-- fs/f2fs/f2fs.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c index 6102737473d4..87f449845f5f 100644 --- a/fs/f2fs/debug.c +++ b/fs/f2fs/debug.c @@ -97,6 +97,7 @@ static void update_general_status(struct f2fs_sb_info *sbi) si->sits = MAIN_SEGS(sbi); si->dirty_sits = SIT_I(sbi)->dirty_sentries; si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID_LIST]; + si->avail_nids = NM_I(sbi)->available_nids; si->alloc_nids = NM_I(sbi)->nid_cnt[ALLOC_NID_LIST]; si->bg_gc = sbi->bg_gc; si->util_free = (int)(free_user_blocks(sbi) >> sbi->log_blocks_per_seg) @@ -370,8 +371,8 @@ static int stat_show(struct seq_file *s, void *v) si->ndirty_imeta); seq_printf(s, " - NATs: %9d/%9d\n - SITs: %9d/%9d\n", si->dirty_nats, si->nats, si->dirty_sits, si->sits); - seq_printf(s, " - free_nids: %9d, alloc_nids: %9d\n", - si->free_nids, si->alloc_nids); + seq_printf(s, " - free_nids: %9d/%9d\n - alloc_nids: %9d\n", + si->free_nids, si->avail_nids, si->alloc_nids); seq_puts(s, "\nDistribution of User Blocks:"); seq_puts(s, " [ valid | invalid | free ]\n"); seq_puts(s, " ["); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index f0481fb52142..4cb53fb741ad 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2394,7 +2394,8 @@ struct f2fs_stat_info { int ndirty_node, ndirty_dent, ndirty_meta, ndirty_data, ndirty_imeta; int inmem_pages; unsigned int ndirty_dirs, ndirty_files, ndirty_all; - int nats, dirty_nats, sits, dirty_sits, free_nids, alloc_nids; + int nats, dirty_nats, sits, dirty_sits; + int free_nids, avail_nids, alloc_nids; int total_count, utilization; int bg_gc, nr_wb_cp_data, nr_wb_data; int nr_flushing, nr_flushed, nr_discarding, nr_discarded; -- cgit v1.2.3 From 3adc5fcb7edf5f8dfe8d37dcb50ba6b30077c905 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 2 May 2017 17:03:47 +0200 Subject: f2fs: Make flush bios explicitely sync Commit b685d3d65ac7 "block: treat REQ_FUA and REQ_PREFLUSH as synchronous" removed REQ_SYNC flag from WRITE_{FUA|PREFLUSH|...} definitions. generic_make_request_checks() however strips REQ_FUA and REQ_PREFLUSH flags from a bio when the storage doesn't report volatile write cache and thus write effectively becomes asynchronous which can lead to performance regressions. Fix the problem by making sure all bios which are synchronous are properly marked with REQ_SYNC. Fixes: b685d3d65ac791406e0dfd8779cc9b3707fea5a3 Cc: stable@vger.kernel.org # 4.9+ CC: Jaegeuk Kim CC: linux-f2fs-devel@lists.sourceforge.net Signed-off-by: Jan Kara Acked-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/data.c | 2 +- fs/f2fs/segment.c | 2 +- fs/f2fs/super.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1254986dd6eb..7c0f6bdf817d 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -309,7 +309,7 @@ static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, if (type >= META_FLUSH) { io->fio.type = META_FLUSH; io->fio.op = REQ_OP_WRITE; - io->fio.op_flags = REQ_META | REQ_PRIO; + io->fio.op_flags = REQ_META | REQ_PRIO | REQ_SYNC; if (!test_opt(sbi, NOBARRIER)) io->fio.op_flags |= REQ_PREFLUSH | REQ_FUA; } diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 6e1c8cf75749..de31030b5041 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -446,7 +446,7 @@ static int __submit_flush_wait(struct f2fs_sb_info *sbi, struct bio *bio = f2fs_bio_alloc(0); int ret; - bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH; + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH; bio->bi_bdev = bdev; ret = submit_bio_wait(bio); bio_put(bio); diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 9a14b2590337..83355ec4a92c 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1325,7 +1325,7 @@ static int __f2fs_commit_super(struct buffer_head *bh, unlock_buffer(bh); /* it's rare case, we can do fua all the time */ - return __sync_dirty_buffer(bh, REQ_PREFLUSH | REQ_FUA); + return __sync_dirty_buffer(bh, REQ_SYNC | REQ_PREFLUSH | REQ_FUA); } static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi, -- cgit v1.2.3 From a72d4b97bb83c92a25d3eb21cc36b9bf94077c60 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 3 May 2017 23:59:13 +0800 Subject: f2fs: relocate inode_{,un}lock in F2FS_IOC_SETFLAGS This patch expands cover region of inode->i_rwsem to keep setting flag atomically. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- fs/f2fs/file.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/f2fs') diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index cdbf1add2a11..abb0403d3414 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -1473,10 +1473,10 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) if (ret) return ret; - flags = f2fs_mask_flags(inode->i_mode, flags); - inode_lock(inode); + flags = f2fs_mask_flags(inode->i_mode, flags); + oldflags = fi->i_flags; if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { @@ -1490,10 +1490,11 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) flags = flags & FS_FL_USER_MODIFIABLE; flags |= oldflags & ~FS_FL_USER_MODIFIABLE; fi->i_flags = flags; - inode_unlock(inode); inode->i_ctime = current_time(inode); f2fs_set_inode_flags(inode); + + inode_unlock(inode); out: mnt_drop_write_file(filp); return ret; -- cgit v1.2.3 From e9cdd307704b5a8f685fa3fff4403691fbf64f97 Mon Sep 17 00:00:00 2001 From: Yunlei He Date: Wed, 26 Apr 2017 15:56:52 +0800 Subject: f2fs: fix a mount fail for wrong next_scan_nid -write_checkpoint -do_checkpoint -next_free_nid <--- something wrong with next free nid -f2fs_fill_super -build_node_manager -build_free_nids -get_current_nat_page -__get_meta_page <--- attempt to access beyond end of device Signed-off-by: Yunlei He Signed-off-by: Jaegeuk Kim --- fs/f2fs/node.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/f2fs') diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b99e318bce58..98351a4a4da3 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1955,6 +1955,9 @@ static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount) int i = 0; nid_t nid = nm_i->next_scan_nid; + if (unlikely(nid >= nm_i->max_nid)) + nid = 0; + /* Enough entries */ if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK) return; -- cgit v1.2.3