diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-14 09:07:36 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2016-12-14 09:07:36 -0800 |
commit | 09cb6464fe5e7fcd5177911429badd139c4481b7 (patch) | |
tree | 5f7af2d0778f699053da6ed2e43662fff2d51e73 /fs/f2fs/node.c | |
parent | 19d37ce2a7159ee30bd59d14fe5fe13c932bd5b7 (diff) | |
parent | c0ed4405a99ec9be2a0f062eaafc002d8d26c99f (diff) | |
download | linux-stable-09cb6464fe5e7fcd5177911429badd139c4481b7.tar.gz linux-stable-09cb6464fe5e7fcd5177911429badd139c4481b7.tar.bz2 linux-stable-09cb6464fe5e7fcd5177911429badd139c4481b7.zip |
Merge tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"This patch series contains several performance tuning patches
regarding to the IO submission flow, in addition to supporting new
features such as a ZBC-base drive and multiple devices.
It also includes some major bug fixes such as:
- checkpoint version control
- fdatasync-related roll-forward recovery routine
- memory boundary or null-pointer access in corner cases
- missing error cases
It has various minor clean-up patches as well"
* tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (66 commits)
f2fs: fix a missing size change in f2fs_setattr
f2fs: fix to access nullified flush_cmd_control pointer
f2fs: free meta pages if sanity check for ckpt is failed
f2fs: detect wrong layout
f2fs: call sync_fs when f2fs is idle
Revert "f2fs: use percpu_counter for # of dirty pages in inode"
f2fs: return AOP_WRITEPAGE_ACTIVATE for writepage
f2fs: do not activate auto_recovery for fallocated i_size
f2fs: fix to determine start_cp_addr by sbi->cur_cp_pack
f2fs: fix 32-bit build
f2fs: set ->owner for debugfs status file's file_operations
f2fs: fix incorrect free inode count in ->statfs
f2fs: drop duplicate header timer.h
f2fs: fix wrong AUTO_RECOVER condition
f2fs: do not recover i_size if it's valid
f2fs: fix fdatasync
f2fs: fix to account total free nid correctly
f2fs: fix an infinite loop when flush nodes in cp
f2fs: don't wait writeback for datas during checkpoint
f2fs: fix wrong written_valid_blocks counting
...
Diffstat (limited to 'fs/f2fs/node.c')
-rw-r--r-- | fs/f2fs/node.c | 226 |
1 files changed, 141 insertions, 85 deletions
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index d1e29deb4598..b9078fdb3743 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -45,8 +45,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type) * give 25%, 25%, 50%, 50%, 50% memory for each components respectively */ if (type == FREE_NIDS) { - mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >> - PAGE_SHIFT; + mem_size = (nm_i->nid_cnt[FREE_NID_LIST] * + sizeof(struct free_nid)) >> PAGE_SHIFT; res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2); } else if (type == NAT_ENTRIES) { mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >> @@ -270,8 +270,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid, e = grab_nat_entry(nm_i, nid); node_info_from_raw_nat(&e->ni, ne); } else { - f2fs_bug_on(sbi, nat_get_ino(e) != ne->ino || - nat_get_blkaddr(e) != ne->block_addr || + f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) || + nat_get_blkaddr(e) != + le32_to_cpu(ne->block_addr) || nat_get_version(e) != ne->version); } } @@ -1204,6 +1205,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino) ret = f2fs_write_inline_data(inode, page); inode_dec_dirty_pages(inode); + remove_dirty_inode(inode); if (ret) set_page_dirty(page); page_out: @@ -1338,7 +1340,8 @@ retry: if (unlikely(f2fs_cp_error(sbi))) { f2fs_put_page(last_page, 0); pagevec_release(&pvec); - return -EIO; + ret = -EIO; + goto out; } if (!IS_DNODE(page) || !is_cold_node(page)) @@ -1407,11 +1410,12 @@ continue_unlock: "Retry to write fsync mark: ino=%u, idx=%lx", ino, last_page->index); lock_page(last_page); + f2fs_wait_on_page_writeback(last_page, NODE, true); set_page_dirty(last_page); unlock_page(last_page); goto retry; } - +out: if (nwritten) f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE); return ret ? -EIO: 0; @@ -1692,11 +1696,35 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i, return radix_tree_lookup(&nm_i->free_nid_root, n); } -static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i, - struct free_nid *i) +static int __insert_nid_to_list(struct f2fs_sb_info *sbi, + struct free_nid *i, enum nid_list list, bool new) { + struct f2fs_nm_info *nm_i = NM_I(sbi); + + if (new) { + int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i); + if (err) + return err; + } + + f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : + i->state != NID_ALLOC); + nm_i->nid_cnt[list]++; + list_add_tail(&i->list, &nm_i->nid_list[list]); + return 0; +} + +static void __remove_nid_from_list(struct f2fs_sb_info *sbi, + struct free_nid *i, enum nid_list list, bool reuse) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW : + i->state != NID_ALLOC); + nm_i->nid_cnt[list]--; list_del(&i->list); - radix_tree_delete(&nm_i->free_nid_root, i->nid); + if (!reuse) + radix_tree_delete(&nm_i->free_nid_root, i->nid); } static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) @@ -1704,9 +1732,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; struct nat_entry *ne; - - if (!available_free_memory(sbi, FREE_NIDS)) - return -1; + int err; /* 0 nid should not be used */ if (unlikely(nid == 0)) @@ -1729,33 +1755,30 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build) return 0; } - spin_lock(&nm_i->free_nid_list_lock); - if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) { - spin_unlock(&nm_i->free_nid_list_lock); - radix_tree_preload_end(); + spin_lock(&nm_i->nid_list_lock); + err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true); + spin_unlock(&nm_i->nid_list_lock); + radix_tree_preload_end(); + if (err) { kmem_cache_free(free_nid_slab, i); return 0; } - list_add_tail(&i->list, &nm_i->free_nid_list); - nm_i->fcnt++; - spin_unlock(&nm_i->free_nid_list_lock); - radix_tree_preload_end(); return 1; } -static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) +static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid) { + struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; bool need_free = false; - spin_lock(&nm_i->free_nid_list_lock); + spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); if (i && i->state == NID_NEW) { - __del_from_free_nid_list(nm_i, i); - nm_i->fcnt--; + __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); need_free = true; } - spin_unlock(&nm_i->free_nid_list_lock); + spin_unlock(&nm_i->nid_list_lock); if (need_free) kmem_cache_free(free_nid_slab, i); @@ -1778,14 +1801,12 @@ static void scan_nat_page(struct f2fs_sb_info *sbi, blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); f2fs_bug_on(sbi, blk_addr == NEW_ADDR); - if (blk_addr == NULL_ADDR) { - if (add_free_nid(sbi, start_nid, true) < 0) - break; - } + if (blk_addr == NULL_ADDR) + add_free_nid(sbi, start_nid, true); } } -void build_free_nids(struct f2fs_sb_info *sbi) +static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync) { struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); @@ -1794,7 +1815,10 @@ void build_free_nids(struct f2fs_sb_info *sbi) nid_t nid = nm_i->next_scan_nid; /* Enough entries */ - if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK) + if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK) + return; + + if (!sync && !available_free_memory(sbi, FREE_NIDS)) return; /* readahead nat pages to be scanned */ @@ -1830,7 +1854,7 @@ void build_free_nids(struct f2fs_sb_info *sbi) if (addr == NULL_ADDR) add_free_nid(sbi, nid, true); else - remove_free_nid(nm_i, nid); + remove_free_nid(sbi, nid); } up_read(&curseg->journal_rwsem); up_read(&nm_i->nat_tree_lock); @@ -1839,6 +1863,13 @@ void build_free_nids(struct f2fs_sb_info *sbi) nm_i->ra_nid_pages, META_NAT, false); } +void build_free_nids(struct f2fs_sb_info *sbi, bool sync) +{ + mutex_lock(&NM_I(sbi)->build_lock); + __build_free_nids(sbi, sync); + mutex_unlock(&NM_I(sbi)->build_lock); +} + /* * If this function returns success, caller can obtain a new nid * from second parameter of this function. @@ -1853,31 +1884,31 @@ retry: if (time_to_inject(sbi, FAULT_ALLOC_NID)) return false; #endif - if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids)) - return false; + spin_lock(&nm_i->nid_list_lock); - spin_lock(&nm_i->free_nid_list_lock); + if (unlikely(nm_i->available_nids == 0)) { + spin_unlock(&nm_i->nid_list_lock); + return false; + } /* We should not use stale free nids created by build_free_nids */ - if (nm_i->fcnt && !on_build_free_nids(nm_i)) { - f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list)); - list_for_each_entry(i, &nm_i->free_nid_list, list) - if (i->state == NID_NEW) - break; - - f2fs_bug_on(sbi, i->state != NID_NEW); + if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) { + f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST])); + i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST], + struct free_nid, list); *nid = i->nid; + + __remove_nid_from_list(sbi, i, FREE_NID_LIST, true); i->state = NID_ALLOC; - nm_i->fcnt--; - spin_unlock(&nm_i->free_nid_list_lock); + __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false); + nm_i->available_nids--; + spin_unlock(&nm_i->nid_list_lock); return true; } - spin_unlock(&nm_i->free_nid_list_lock); + spin_unlock(&nm_i->nid_list_lock); /* Let's scan nat pages and its caches to get free nids */ - mutex_lock(&nm_i->build_lock); - build_free_nids(sbi); - mutex_unlock(&nm_i->build_lock); + build_free_nids(sbi, true); goto retry; } @@ -1889,11 +1920,11 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) struct f2fs_nm_info *nm_i = NM_I(sbi); struct free_nid *i; - spin_lock(&nm_i->free_nid_list_lock); + spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); - f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); - __del_from_free_nid_list(nm_i, i); - spin_unlock(&nm_i->free_nid_list_lock); + f2fs_bug_on(sbi, !i); + __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); + spin_unlock(&nm_i->nid_list_lock); kmem_cache_free(free_nid_slab, i); } @@ -1910,17 +1941,22 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) if (!nid) return; - spin_lock(&nm_i->free_nid_list_lock); + spin_lock(&nm_i->nid_list_lock); i = __lookup_free_nid_list(nm_i, nid); - f2fs_bug_on(sbi, !i || i->state != NID_ALLOC); + f2fs_bug_on(sbi, !i); + if (!available_free_memory(sbi, FREE_NIDS)) { - __del_from_free_nid_list(nm_i, i); + __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false); need_free = true; } else { + __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true); i->state = NID_NEW; - nm_i->fcnt++; + __insert_nid_to_list(sbi, i, FREE_NID_LIST, false); } - spin_unlock(&nm_i->free_nid_list_lock); + + nm_i->available_nids++; + + spin_unlock(&nm_i->nid_list_lock); if (need_free) kmem_cache_free(free_nid_slab, i); @@ -1932,24 +1968,24 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink) struct free_nid *i, *next; int nr = nr_shrink; - if (nm_i->fcnt <= MAX_FREE_NIDS) + if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) return 0; if (!mutex_trylock(&nm_i->build_lock)) return 0; - spin_lock(&nm_i->free_nid_list_lock); - list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) { - if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS) + spin_lock(&nm_i->nid_list_lock); + list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST], + list) { + if (nr_shrink <= 0 || + nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS) break; - if (i->state == NID_ALLOC) - continue; - __del_from_free_nid_list(nm_i, i); + + __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); kmem_cache_free(free_nid_slab, i); - nm_i->fcnt--; nr_shrink--; } - spin_unlock(&nm_i->free_nid_list_lock); + spin_unlock(&nm_i->nid_list_lock); mutex_unlock(&nm_i->build_lock); return nr - nr_shrink; @@ -2005,7 +2041,7 @@ recover_xnid: if (unlikely(!inc_valid_node_count(sbi, inode))) f2fs_bug_on(sbi, 1); - remove_free_nid(NM_I(sbi), new_xnid); + remove_free_nid(sbi, new_xnid); get_node_info(sbi, new_xnid, &ni); ni.ino = inode->i_ino; set_node_addr(sbi, &ni, NEW_ADDR, false); @@ -2035,7 +2071,7 @@ retry: } /* Should not use this inode from free nid list */ - remove_free_nid(NM_I(sbi), ino); + remove_free_nid(sbi, ino); if (!PageUptodate(ipage)) SetPageUptodate(ipage); @@ -2069,7 +2105,6 @@ int restore_node_summary(struct f2fs_sb_info *sbi, struct f2fs_node *rn; struct f2fs_summary *sum_entry; block_t addr; - int bio_blocks = MAX_BIO_BLOCKS(sbi); int i, idx, last_offset, nrpages; /* scan the node segment */ @@ -2078,7 +2113,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi, sum_entry = &sum->entries[0]; for (i = 0; i < last_offset; i += nrpages, addr += nrpages) { - nrpages = min(last_offset - i, bio_blocks); + nrpages = min(last_offset - i, BIO_MAX_PAGES); /* readahead node pages */ ra_meta_pages(sbi, addr, nrpages, META_POR, true); @@ -2120,6 +2155,19 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi) ne = grab_nat_entry(nm_i, nid); node_info_from_raw_nat(&ne->ni, &raw_ne); } + + /* + * if a free nat in journal has not been used after last + * checkpoint, we should remove it from available nids, + * since later we will add it again. + */ + if (!get_nat_flag(ne, IS_DIRTY) && + le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) { + spin_lock(&nm_i->nid_list_lock); + nm_i->available_nids--; + spin_unlock(&nm_i->nid_list_lock); + } + __set_nat_cache_dirty(nm_i, ne); } update_nats_in_cursum(journal, -i); @@ -2192,8 +2240,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi, raw_nat_from_node_info(raw_ne, &ne->ni); nat_reset_flag(ne); __clear_nat_cache_dirty(NM_I(sbi), ne); - if (nat_get_blkaddr(ne) == NULL_ADDR) + if (nat_get_blkaddr(ne) == NULL_ADDR) { add_free_nid(sbi, nid, false); + spin_lock(&NM_I(sbi)->nid_list_lock); + NM_I(sbi)->available_nids++; + spin_unlock(&NM_I(sbi)->nid_list_lock); + } } if (to_journal) @@ -2268,21 +2320,24 @@ static int init_node_manager(struct f2fs_sb_info *sbi) nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks; /* not used nids: 0, node, meta, (and root counted as valid node) */ - nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM; - nm_i->fcnt = 0; + nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count - + F2FS_RESERVED_NODE_NUM; + nm_i->nid_cnt[FREE_NID_LIST] = 0; + nm_i->nid_cnt[ALLOC_NID_LIST] = 0; nm_i->nat_cnt = 0; nm_i->ram_thresh = DEF_RAM_THRESHOLD; nm_i->ra_nid_pages = DEF_RA_NID_PAGES; nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD; INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC); - INIT_LIST_HEAD(&nm_i->free_nid_list); + INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]); + INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]); INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO); INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO); INIT_LIST_HEAD(&nm_i->nat_entries); mutex_init(&nm_i->build_lock); - spin_lock_init(&nm_i->free_nid_list_lock); + spin_lock_init(&nm_i->nid_list_lock); init_rwsem(&nm_i->nat_tree_lock); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); @@ -2310,7 +2365,7 @@ int build_node_manager(struct f2fs_sb_info *sbi) if (err) return err; - build_free_nids(sbi); + build_free_nids(sbi, true); return 0; } @@ -2327,17 +2382,18 @@ void destroy_node_manager(struct f2fs_sb_info *sbi) return; /* destroy free nid list */ - spin_lock(&nm_i->free_nid_list_lock); - list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) { - f2fs_bug_on(sbi, i->state == NID_ALLOC); - __del_from_free_nid_list(nm_i, i); - nm_i->fcnt--; - spin_unlock(&nm_i->free_nid_list_lock); + spin_lock(&nm_i->nid_list_lock); + list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST], + list) { + __remove_nid_from_list(sbi, i, FREE_NID_LIST, false); + spin_unlock(&nm_i->nid_list_lock); kmem_cache_free(free_nid_slab, i); - spin_lock(&nm_i->free_nid_list_lock); + spin_lock(&nm_i->nid_list_lock); } - f2fs_bug_on(sbi, nm_i->fcnt); - spin_unlock(&nm_i->free_nid_list_lock); + f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]); + f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]); + f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST])); + spin_unlock(&nm_i->nid_list_lock); /* destroy nat cache */ down_write(&nm_i->nat_tree_lock); |