summaryrefslogtreecommitdiffstats
path: root/fs/f2fs/node.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 09:07:36 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 09:07:36 -0800
commit09cb6464fe5e7fcd5177911429badd139c4481b7 (patch)
tree5f7af2d0778f699053da6ed2e43662fff2d51e73 /fs/f2fs/node.c
parent19d37ce2a7159ee30bd59d14fe5fe13c932bd5b7 (diff)
parentc0ed4405a99ec9be2a0f062eaafc002d8d26c99f (diff)
downloadlinux-stable-09cb6464fe5e7fcd5177911429badd139c4481b7.tar.gz
linux-stable-09cb6464fe5e7fcd5177911429badd139c4481b7.tar.bz2
linux-stable-09cb6464fe5e7fcd5177911429badd139c4481b7.zip
Merge tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "This patch series contains several performance tuning patches regarding to the IO submission flow, in addition to supporting new features such as a ZBC-base drive and multiple devices. It also includes some major bug fixes such as: - checkpoint version control - fdatasync-related roll-forward recovery routine - memory boundary or null-pointer access in corner cases - missing error cases It has various minor clean-up patches as well" * tag 'for-f2fs-4.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (66 commits) f2fs: fix a missing size change in f2fs_setattr f2fs: fix to access nullified flush_cmd_control pointer f2fs: free meta pages if sanity check for ckpt is failed f2fs: detect wrong layout f2fs: call sync_fs when f2fs is idle Revert "f2fs: use percpu_counter for # of dirty pages in inode" f2fs: return AOP_WRITEPAGE_ACTIVATE for writepage f2fs: do not activate auto_recovery for fallocated i_size f2fs: fix to determine start_cp_addr by sbi->cur_cp_pack f2fs: fix 32-bit build f2fs: set ->owner for debugfs status file's file_operations f2fs: fix incorrect free inode count in ->statfs f2fs: drop duplicate header timer.h f2fs: fix wrong AUTO_RECOVER condition f2fs: do not recover i_size if it's valid f2fs: fix fdatasync f2fs: fix to account total free nid correctly f2fs: fix an infinite loop when flush nodes in cp f2fs: don't wait writeback for datas during checkpoint f2fs: fix wrong written_valid_blocks counting ...
Diffstat (limited to 'fs/f2fs/node.c')
-rw-r--r--fs/f2fs/node.c226
1 files changed, 141 insertions, 85 deletions
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d1e29deb4598..b9078fdb3743 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -45,8 +45,8 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
* give 25%, 25%, 50%, 50%, 50% memory for each components respectively
*/
if (type == FREE_NIDS) {
- mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
- PAGE_SHIFT;
+ mem_size = (nm_i->nid_cnt[FREE_NID_LIST] *
+ sizeof(struct free_nid)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
@@ -270,8 +270,9 @@ static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
e = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&e->ni, ne);
} else {
- f2fs_bug_on(sbi, nat_get_ino(e) != ne->ino ||
- nat_get_blkaddr(e) != ne->block_addr ||
+ f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
+ nat_get_blkaddr(e) !=
+ le32_to_cpu(ne->block_addr) ||
nat_get_version(e) != ne->version);
}
}
@@ -1204,6 +1205,7 @@ static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
ret = f2fs_write_inline_data(inode, page);
inode_dec_dirty_pages(inode);
+ remove_dirty_inode(inode);
if (ret)
set_page_dirty(page);
page_out:
@@ -1338,7 +1340,8 @@ retry:
if (unlikely(f2fs_cp_error(sbi))) {
f2fs_put_page(last_page, 0);
pagevec_release(&pvec);
- return -EIO;
+ ret = -EIO;
+ goto out;
}
if (!IS_DNODE(page) || !is_cold_node(page))
@@ -1407,11 +1410,12 @@ continue_unlock:
"Retry to write fsync mark: ino=%u, idx=%lx",
ino, last_page->index);
lock_page(last_page);
+ f2fs_wait_on_page_writeback(last_page, NODE, true);
set_page_dirty(last_page);
unlock_page(last_page);
goto retry;
}
-
+out:
if (nwritten)
f2fs_submit_merged_bio_cond(sbi, NULL, NULL, ino, NODE, WRITE);
return ret ? -EIO: 0;
@@ -1692,11 +1696,35 @@ static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
return radix_tree_lookup(&nm_i->free_nid_root, n);
}
-static void __del_from_free_nid_list(struct f2fs_nm_info *nm_i,
- struct free_nid *i)
+static int __insert_nid_to_list(struct f2fs_sb_info *sbi,
+ struct free_nid *i, enum nid_list list, bool new)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+ if (new) {
+ int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
+ if (err)
+ return err;
+ }
+
+ f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
+ i->state != NID_ALLOC);
+ nm_i->nid_cnt[list]++;
+ list_add_tail(&i->list, &nm_i->nid_list[list]);
+ return 0;
+}
+
+static void __remove_nid_from_list(struct f2fs_sb_info *sbi,
+ struct free_nid *i, enum nid_list list, bool reuse)
+{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
+
+ f2fs_bug_on(sbi, list == FREE_NID_LIST ? i->state != NID_NEW :
+ i->state != NID_ALLOC);
+ nm_i->nid_cnt[list]--;
list_del(&i->list);
- radix_tree_delete(&nm_i->free_nid_root, i->nid);
+ if (!reuse)
+ radix_tree_delete(&nm_i->free_nid_root, i->nid);
}
static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
@@ -1704,9 +1732,7 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
struct nat_entry *ne;
-
- if (!available_free_memory(sbi, FREE_NIDS))
- return -1;
+ int err;
/* 0 nid should not be used */
if (unlikely(nid == 0))
@@ -1729,33 +1755,30 @@ static int add_free_nid(struct f2fs_sb_info *sbi, nid_t nid, bool build)
return 0;
}
- spin_lock(&nm_i->free_nid_list_lock);
- if (radix_tree_insert(&nm_i->free_nid_root, i->nid, i)) {
- spin_unlock(&nm_i->free_nid_list_lock);
- radix_tree_preload_end();
+ spin_lock(&nm_i->nid_list_lock);
+ err = __insert_nid_to_list(sbi, i, FREE_NID_LIST, true);
+ spin_unlock(&nm_i->nid_list_lock);
+ radix_tree_preload_end();
+ if (err) {
kmem_cache_free(free_nid_slab, i);
return 0;
}
- list_add_tail(&i->list, &nm_i->free_nid_list);
- nm_i->fcnt++;
- spin_unlock(&nm_i->free_nid_list_lock);
- radix_tree_preload_end();
return 1;
}
-static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid)
+static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
bool need_free = false;
- spin_lock(&nm_i->free_nid_list_lock);
+ spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
if (i && i->state == NID_NEW) {
- __del_from_free_nid_list(nm_i, i);
- nm_i->fcnt--;
+ __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
need_free = true;
}
- spin_unlock(&nm_i->free_nid_list_lock);
+ spin_unlock(&nm_i->nid_list_lock);
if (need_free)
kmem_cache_free(free_nid_slab, i);
@@ -1778,14 +1801,12 @@ static void scan_nat_page(struct f2fs_sb_info *sbi,
blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
f2fs_bug_on(sbi, blk_addr == NEW_ADDR);
- if (blk_addr == NULL_ADDR) {
- if (add_free_nid(sbi, start_nid, true) < 0)
- break;
- }
+ if (blk_addr == NULL_ADDR)
+ add_free_nid(sbi, start_nid, true);
}
}
-void build_free_nids(struct f2fs_sb_info *sbi)
+static void __build_free_nids(struct f2fs_sb_info *sbi, bool sync)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -1794,7 +1815,10 @@ void build_free_nids(struct f2fs_sb_info *sbi)
nid_t nid = nm_i->next_scan_nid;
/* Enough entries */
- if (nm_i->fcnt >= NAT_ENTRY_PER_BLOCK)
+ if (nm_i->nid_cnt[FREE_NID_LIST] >= NAT_ENTRY_PER_BLOCK)
+ return;
+
+ if (!sync && !available_free_memory(sbi, FREE_NIDS))
return;
/* readahead nat pages to be scanned */
@@ -1830,7 +1854,7 @@ void build_free_nids(struct f2fs_sb_info *sbi)
if (addr == NULL_ADDR)
add_free_nid(sbi, nid, true);
else
- remove_free_nid(nm_i, nid);
+ remove_free_nid(sbi, nid);
}
up_read(&curseg->journal_rwsem);
up_read(&nm_i->nat_tree_lock);
@@ -1839,6 +1863,13 @@ void build_free_nids(struct f2fs_sb_info *sbi)
nm_i->ra_nid_pages, META_NAT, false);
}
+void build_free_nids(struct f2fs_sb_info *sbi, bool sync)
+{
+ mutex_lock(&NM_I(sbi)->build_lock);
+ __build_free_nids(sbi, sync);
+ mutex_unlock(&NM_I(sbi)->build_lock);
+}
+
/*
* If this function returns success, caller can obtain a new nid
* from second parameter of this function.
@@ -1853,31 +1884,31 @@ retry:
if (time_to_inject(sbi, FAULT_ALLOC_NID))
return false;
#endif
- if (unlikely(sbi->total_valid_node_count + 1 > nm_i->available_nids))
- return false;
+ spin_lock(&nm_i->nid_list_lock);
- spin_lock(&nm_i->free_nid_list_lock);
+ if (unlikely(nm_i->available_nids == 0)) {
+ spin_unlock(&nm_i->nid_list_lock);
+ return false;
+ }
/* We should not use stale free nids created by build_free_nids */
- if (nm_i->fcnt && !on_build_free_nids(nm_i)) {
- f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
- list_for_each_entry(i, &nm_i->free_nid_list, list)
- if (i->state == NID_NEW)
- break;
-
- f2fs_bug_on(sbi, i->state != NID_NEW);
+ if (nm_i->nid_cnt[FREE_NID_LIST] && !on_build_free_nids(nm_i)) {
+ f2fs_bug_on(sbi, list_empty(&nm_i->nid_list[FREE_NID_LIST]));
+ i = list_first_entry(&nm_i->nid_list[FREE_NID_LIST],
+ struct free_nid, list);
*nid = i->nid;
+
+ __remove_nid_from_list(sbi, i, FREE_NID_LIST, true);
i->state = NID_ALLOC;
- nm_i->fcnt--;
- spin_unlock(&nm_i->free_nid_list_lock);
+ __insert_nid_to_list(sbi, i, ALLOC_NID_LIST, false);
+ nm_i->available_nids--;
+ spin_unlock(&nm_i->nid_list_lock);
return true;
}
- spin_unlock(&nm_i->free_nid_list_lock);
+ spin_unlock(&nm_i->nid_list_lock);
/* Let's scan nat pages and its caches to get free nids */
- mutex_lock(&nm_i->build_lock);
- build_free_nids(sbi);
- mutex_unlock(&nm_i->build_lock);
+ build_free_nids(sbi, true);
goto retry;
}
@@ -1889,11 +1920,11 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct free_nid *i;
- spin_lock(&nm_i->free_nid_list_lock);
+ spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
- f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
- __del_from_free_nid_list(nm_i, i);
- spin_unlock(&nm_i->free_nid_list_lock);
+ f2fs_bug_on(sbi, !i);
+ __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
+ spin_unlock(&nm_i->nid_list_lock);
kmem_cache_free(free_nid_slab, i);
}
@@ -1910,17 +1941,22 @@ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
if (!nid)
return;
- spin_lock(&nm_i->free_nid_list_lock);
+ spin_lock(&nm_i->nid_list_lock);
i = __lookup_free_nid_list(nm_i, nid);
- f2fs_bug_on(sbi, !i || i->state != NID_ALLOC);
+ f2fs_bug_on(sbi, !i);
+
if (!available_free_memory(sbi, FREE_NIDS)) {
- __del_from_free_nid_list(nm_i, i);
+ __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, false);
need_free = true;
} else {
+ __remove_nid_from_list(sbi, i, ALLOC_NID_LIST, true);
i->state = NID_NEW;
- nm_i->fcnt++;
+ __insert_nid_to_list(sbi, i, FREE_NID_LIST, false);
}
- spin_unlock(&nm_i->free_nid_list_lock);
+
+ nm_i->available_nids++;
+
+ spin_unlock(&nm_i->nid_list_lock);
if (need_free)
kmem_cache_free(free_nid_slab, i);
@@ -1932,24 +1968,24 @@ int try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
struct free_nid *i, *next;
int nr = nr_shrink;
- if (nm_i->fcnt <= MAX_FREE_NIDS)
+ if (nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
return 0;
if (!mutex_trylock(&nm_i->build_lock))
return 0;
- spin_lock(&nm_i->free_nid_list_lock);
- list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
- if (nr_shrink <= 0 || nm_i->fcnt <= MAX_FREE_NIDS)
+ spin_lock(&nm_i->nid_list_lock);
+ list_for_each_entry_safe(i, next, &nm_i->nid_list[FREE_NID_LIST],
+ list) {
+ if (nr_shrink <= 0 ||
+ nm_i->nid_cnt[FREE_NID_LIST] <= MAX_FREE_NIDS)
break;
- if (i->state == NID_ALLOC)
- continue;
- __del_from_free_nid_list(nm_i, i);
+
+ __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
kmem_cache_free(free_nid_slab, i);
- nm_i->fcnt--;
nr_shrink--;
}
- spin_unlock(&nm_i->free_nid_list_lock);
+ spin_unlock(&nm_i->nid_list_lock);
mutex_unlock(&nm_i->build_lock);
return nr - nr_shrink;
@@ -2005,7 +2041,7 @@ recover_xnid:
if (unlikely(!inc_valid_node_count(sbi, inode)))
f2fs_bug_on(sbi, 1);
- remove_free_nid(NM_I(sbi), new_xnid);
+ remove_free_nid(sbi, new_xnid);
get_node_info(sbi, new_xnid, &ni);
ni.ino = inode->i_ino;
set_node_addr(sbi, &ni, NEW_ADDR, false);
@@ -2035,7 +2071,7 @@ retry:
}
/* Should not use this inode from free nid list */
- remove_free_nid(NM_I(sbi), ino);
+ remove_free_nid(sbi, ino);
if (!PageUptodate(ipage))
SetPageUptodate(ipage);
@@ -2069,7 +2105,6 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
struct f2fs_node *rn;
struct f2fs_summary *sum_entry;
block_t addr;
- int bio_blocks = MAX_BIO_BLOCKS(sbi);
int i, idx, last_offset, nrpages;
/* scan the node segment */
@@ -2078,7 +2113,7 @@ int restore_node_summary(struct f2fs_sb_info *sbi,
sum_entry = &sum->entries[0];
for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
- nrpages = min(last_offset - i, bio_blocks);
+ nrpages = min(last_offset - i, BIO_MAX_PAGES);
/* readahead node pages */
ra_meta_pages(sbi, addr, nrpages, META_POR, true);
@@ -2120,6 +2155,19 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
ne = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&ne->ni, &raw_ne);
}
+
+ /*
+ * if a free nat in journal has not been used after last
+ * checkpoint, we should remove it from available nids,
+ * since later we will add it again.
+ */
+ if (!get_nat_flag(ne, IS_DIRTY) &&
+ le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) {
+ spin_lock(&nm_i->nid_list_lock);
+ nm_i->available_nids--;
+ spin_unlock(&nm_i->nid_list_lock);
+ }
+
__set_nat_cache_dirty(nm_i, ne);
}
update_nats_in_cursum(journal, -i);
@@ -2192,8 +2240,12 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
raw_nat_from_node_info(raw_ne, &ne->ni);
nat_reset_flag(ne);
__clear_nat_cache_dirty(NM_I(sbi), ne);
- if (nat_get_blkaddr(ne) == NULL_ADDR)
+ if (nat_get_blkaddr(ne) == NULL_ADDR) {
add_free_nid(sbi, nid, false);
+ spin_lock(&NM_I(sbi)->nid_list_lock);
+ NM_I(sbi)->available_nids++;
+ spin_unlock(&NM_I(sbi)->nid_list_lock);
+ }
}
if (to_journal)
@@ -2268,21 +2320,24 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nat_blocks;
/* not used nids: 0, node, meta, (and root counted as valid node) */
- nm_i->available_nids = nm_i->max_nid - F2FS_RESERVED_NODE_NUM;
- nm_i->fcnt = 0;
+ nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
+ F2FS_RESERVED_NODE_NUM;
+ nm_i->nid_cnt[FREE_NID_LIST] = 0;
+ nm_i->nid_cnt[ALLOC_NID_LIST] = 0;
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
- INIT_LIST_HEAD(&nm_i->free_nid_list);
+ INIT_LIST_HEAD(&nm_i->nid_list[FREE_NID_LIST]);
+ INIT_LIST_HEAD(&nm_i->nid_list[ALLOC_NID_LIST]);
INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
INIT_LIST_HEAD(&nm_i->nat_entries);
mutex_init(&nm_i->build_lock);
- spin_lock_init(&nm_i->free_nid_list_lock);
+ spin_lock_init(&nm_i->nid_list_lock);
init_rwsem(&nm_i->nat_tree_lock);
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
@@ -2310,7 +2365,7 @@ int build_node_manager(struct f2fs_sb_info *sbi)
if (err)
return err;
- build_free_nids(sbi);
+ build_free_nids(sbi, true);
return 0;
}
@@ -2327,17 +2382,18 @@ void destroy_node_manager(struct f2fs_sb_info *sbi)
return;
/* destroy free nid list */
- spin_lock(&nm_i->free_nid_list_lock);
- list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
- f2fs_bug_on(sbi, i->state == NID_ALLOC);
- __del_from_free_nid_list(nm_i, i);
- nm_i->fcnt--;
- spin_unlock(&nm_i->free_nid_list_lock);
+ spin_lock(&nm_i->nid_list_lock);
+ list_for_each_entry_safe(i, next_i, &nm_i->nid_list[FREE_NID_LIST],
+ list) {
+ __remove_nid_from_list(sbi, i, FREE_NID_LIST, false);
+ spin_unlock(&nm_i->nid_list_lock);
kmem_cache_free(free_nid_slab, i);
- spin_lock(&nm_i->free_nid_list_lock);
+ spin_lock(&nm_i->nid_list_lock);
}
- f2fs_bug_on(sbi, nm_i->fcnt);
- spin_unlock(&nm_i->free_nid_list_lock);
+ f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID_LIST]);
+ f2fs_bug_on(sbi, nm_i->nid_cnt[ALLOC_NID_LIST]);
+ f2fs_bug_on(sbi, !list_empty(&nm_i->nid_list[ALLOC_NID_LIST]));
+ spin_unlock(&nm_i->nid_list_lock);
/* destroy nat cache */
down_write(&nm_i->nat_tree_lock);