summaryrefslogtreecommitdiffstats
path: root/fs/f2fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-10-16 15:14:43 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-10-16 15:14:43 -0700
commit7a3dadedc82e340f8292f64e7bfa964c525009c0 (patch)
tree27552fe7b39d671995cdc5e3add4847107b0ecd1 /fs/f2fs
parent54a4c789ca8091ab8fcd70285caeee2c5bc62997 (diff)
parent788e96d1d39949fc91457a816f4bda0d374c257b (diff)
downloadlinux-stable-7a3dadedc82e340f8292f64e7bfa964c525009c0.tar.gz
linux-stable-7a3dadedc82e340f8292f64e7bfa964c525009c0.tar.bz2
linux-stable-7a3dadedc82e340f8292f64e7bfa964c525009c0.zip
Merge tag 'f2fs-for-5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this round, we've added new features such as zone capacity for ZNS and a new GC policy, ATGC, along with in-memory segment management. In addition, we could improve the decompression speed significantly by changing virtual mapping method. Even though we've fixed lots of small bugs in compression support, I feel that it becomes more stable so that I could give it a try in production. Enhancements: - suport zone capacity in NVMe Zoned Namespace devices - introduce in-memory current segment management - add standart casefolding support - support age threshold based garbage collection - improve decompression speed by changing virtual mapping method Bug fixes: - fix condition checks in some ioctl() such as compression, move_range, etc - fix 32/64bits support in data structures - fix memory allocation in zstd decompress - add some boundary checks to avoid kernel panic on corrupted image - fix disallowing compression for non-empty file - fix slab leakage of compressed block writes In addition, it includes code refactoring for better readability and minor bug fixes for compression and zoned device support" * tag 'f2fs-for-5.10-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (51 commits) f2fs: code cleanup by removing unnecessary check f2fs: wait for sysfs kobject removal before freeing f2fs_sb_info f2fs: fix writecount false positive in releasing compress blocks f2fs: introduce check_swap_activate_fast() f2fs: don't issue flush in f2fs_flush_device_cache() for nobarrier case f2fs: handle errors of f2fs_get_meta_page_nofail f2fs: fix to set SBI_NEED_FSCK flag for inconsistent inode f2fs: reject CASEFOLD inode flag without casefold feature f2fs: fix memory alignment to support 32bit f2fs: fix slab leak of rpages pointer f2fs: compress: fix to disallow enabling compress on non-empty file f2fs: compress: introduce cic/dic slab cache f2fs: compress: introduce page array slab cache f2fs: fix to do sanity check on segment/section count f2fs: fix to check segment boundary during SIT page readahead f2fs: fix uninit-value in f2fs_lookup f2fs: remove unneeded parameter in find_in_block() f2fs: fix wrong total_sections check and fsmeta check f2fs: remove duplicated code in sanity_check_area_boundary f2fs: remove unused check on version_bitmap ...
Diffstat (limited to 'fs/f2fs')
-rw-r--r--fs/f2fs/acl.c6
-rw-r--r--fs/f2fs/checkpoint.c17
-rw-r--r--fs/f2fs/compress.c242
-rw-r--r--fs/f2fs/data.c119
-rw-r--r--fs/f2fs/debug.c18
-rw-r--r--fs/f2fs/dir.c109
-rw-r--r--fs/f2fs/extent_cache.c37
-rw-r--r--fs/f2fs/f2fs.h118
-rw-r--r--fs/f2fs/file.c88
-rw-r--r--fs/f2fs/gc.c413
-rw-r--r--fs/f2fs/gc.h69
-rw-r--r--fs/f2fs/inline.c4
-rw-r--r--fs/f2fs/inode.c21
-rw-r--r--fs/f2fs/namei.c2
-rw-r--r--fs/f2fs/node.c7
-rw-r--r--fs/f2fs/segment.c522
-rw-r--r--fs/f2fs/segment.h71
-rw-r--r--fs/f2fs/super.c168
-rw-r--r--fs/f2fs/sysfs.c22
-rw-r--r--fs/f2fs/xattr.c8
20 files changed, 1592 insertions, 469 deletions
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 217b290ae3a5..306413589827 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -160,7 +160,7 @@ static void *f2fs_acl_to_disk(struct f2fs_sb_info *sbi,
return (void *)f2fs_acl;
fail:
- kvfree(f2fs_acl);
+ kfree(f2fs_acl);
return ERR_PTR(-EINVAL);
}
@@ -190,7 +190,7 @@ static struct posix_acl *__f2fs_get_acl(struct inode *inode, int type,
acl = NULL;
else
acl = ERR_PTR(retval);
- kvfree(value);
+ kfree(value);
return acl;
}
@@ -240,7 +240,7 @@ static int __f2fs_set_acl(struct inode *inode, int type,
error = f2fs_setxattr(inode, name_index, "", value, size, ipage, 0);
- kvfree(value);
+ kfree(value);
if (!error)
set_cached_acl(inode, type, acl);
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index ff807e14c891..023462e80e58 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -107,7 +107,7 @@ struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
return __get_meta_page(sbi, index, true);
}
-struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index)
+struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index)
{
struct page *page;
int count = 0;
@@ -243,6 +243,8 @@ int f2fs_ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
+ if (unlikely(blkno >= TOTAL_SEGS(sbi)))
+ goto out;
/* get sit block addr */
fio.new_blkaddr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
@@ -1047,8 +1049,12 @@ int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
get_pages(sbi, is_dir ?
F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
retry:
- if (unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi))) {
+ trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
+ get_pages(sbi, is_dir ?
+ F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
return -EIO;
+ }
spin_lock(&sbi->inode_lock[type]);
@@ -1619,11 +1625,16 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
f2fs_flush_sit_entries(sbi, cpc);
+ /* save inmem log status */
+ f2fs_save_inmem_curseg(sbi);
+
err = do_checkpoint(sbi, cpc);
if (err)
f2fs_release_discard_addrs(sbi);
else
f2fs_clear_prefree_segments(sbi, cpc);
+
+ f2fs_restore_inmem_curseg(sbi);
stop:
unblock_operations(sbi);
stat_inc_cp_count(sbi->stat_info);
@@ -1654,7 +1665,7 @@ void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi)
}
sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
- NR_CURSEG_TYPE - __cp_payload(sbi)) *
+ NR_CURSEG_PERSIST_TYPE - __cp_payload(sbi)) *
F2FS_ORPHANS_PER_BLOCK;
}
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 1dfb126a0cb2..14262e0f1cd6 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -17,6 +17,33 @@
#include "node.h"
#include <trace/events/f2fs.h>
+static struct kmem_cache *cic_entry_slab;
+static struct kmem_cache *dic_entry_slab;
+
+static void *page_array_alloc(struct inode *inode, int nr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned int size = sizeof(struct page *) * nr;
+
+ if (likely(size <= sbi->page_array_slab_size))
+ return kmem_cache_zalloc(sbi->page_array_slab, GFP_NOFS);
+ return f2fs_kzalloc(sbi, size, GFP_NOFS);
+}
+
+static void page_array_free(struct inode *inode, void *pages, int nr)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ unsigned int size = sizeof(struct page *) * nr;
+
+ if (!pages)
+ return;
+
+ if (likely(size <= sbi->page_array_slab_size))
+ kmem_cache_free(sbi->page_array_slab, pages);
+ else
+ kfree(pages);
+}
+
struct f2fs_compress_ops {
int (*init_compress_ctx)(struct compress_ctx *cc);
void (*destroy_compress_ctx)(struct compress_ctx *cc);
@@ -130,19 +157,16 @@ struct page *f2fs_compress_control_page(struct page *page)
int f2fs_init_compress_ctx(struct compress_ctx *cc)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
-
- if (cc->nr_rpages)
+ if (cc->rpages)
return 0;
- cc->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
- cc->log_cluster_size, GFP_NOFS);
+ cc->rpages = page_array_alloc(cc->inode, cc->cluster_size);
return cc->rpages ? 0 : -ENOMEM;
}
void f2fs_destroy_compress_ctx(struct compress_ctx *cc)
{
- kfree(cc->rpages);
+ page_array_free(cc->inode, cc->rpages, cc->cluster_size);
cc->rpages = NULL;
cc->nr_rpages = 0;
cc->nr_cpages = 0;
@@ -382,16 +406,17 @@ static int zstd_init_decompress_ctx(struct decompress_io_ctx *dic)
ZSTD_DStream *stream;
void *workspace;
unsigned int workspace_size;
+ unsigned int max_window_size =
+ MAX_COMPRESS_WINDOW_SIZE(dic->log_cluster_size);
- workspace_size = ZSTD_DStreamWorkspaceBound(MAX_COMPRESS_WINDOW_SIZE);
+ workspace_size = ZSTD_DStreamWorkspaceBound(max_window_size);
workspace = f2fs_kvmalloc(F2FS_I_SB(dic->inode),
workspace_size, GFP_NOFS);
if (!workspace)
return -ENOMEM;
- stream = ZSTD_initDStream(MAX_COMPRESS_WINDOW_SIZE,
- workspace, workspace_size);
+ stream = ZSTD_initDStream(max_window_size, workspace, workspace_size);
if (!stream) {
printk_ratelimited("%sF2FS-fs (%s): %s ZSTD_initDStream failed\n",
KERN_ERR, F2FS_I_SB(dic->inode)->sb->s_id,
@@ -554,13 +579,29 @@ static void f2fs_compress_free_page(struct page *page)
mempool_free(page, compress_page_pool);
}
+#define MAX_VMAP_RETRIES 3
+
+static void *f2fs_vmap(struct page **pages, unsigned int count)
+{
+ int i;
+ void *buf = NULL;
+
+ for (i = 0; i < MAX_VMAP_RETRIES; i++) {
+ buf = vm_map_ram(pages, count, -1);
+ if (buf)
+ break;
+ vm_unmap_aliases();
+ }
+ return buf;
+}
+
static int f2fs_compress_pages(struct compress_ctx *cc)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
struct f2fs_inode_info *fi = F2FS_I(cc->inode);
const struct f2fs_compress_ops *cops =
f2fs_cops[fi->i_compress_algorithm];
- unsigned int max_len, nr_cpages;
+ unsigned int max_len, new_nr_cpages;
+ struct page **new_cpages;
int i, ret;
trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx,
@@ -575,8 +616,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
max_len = COMPRESS_HEADER_SIZE + cc->clen;
cc->nr_cpages = DIV_ROUND_UP(max_len, PAGE_SIZE);
- cc->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) *
- cc->nr_cpages, GFP_NOFS);
+ cc->cpages = page_array_alloc(cc->inode, cc->nr_cpages);
if (!cc->cpages) {
ret = -ENOMEM;
goto destroy_compress_ctx;
@@ -590,13 +630,13 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
}
}
- cc->rbuf = vmap(cc->rpages, cc->cluster_size, VM_MAP, PAGE_KERNEL_RO);
+ cc->rbuf = f2fs_vmap(cc->rpages, cc->cluster_size);
if (!cc->rbuf) {
ret = -ENOMEM;
goto out_free_cpages;
}
- cc->cbuf = vmap(cc->cpages, cc->nr_cpages, VM_MAP, PAGE_KERNEL);
+ cc->cbuf = f2fs_vmap(cc->cpages, cc->nr_cpages);
if (!cc->cbuf) {
ret = -ENOMEM;
goto out_vunmap_rbuf;
@@ -618,16 +658,28 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++)
cc->cbuf->reserved[i] = cpu_to_le32(0);
- nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);
+ new_nr_cpages = DIV_ROUND_UP(cc->clen + COMPRESS_HEADER_SIZE, PAGE_SIZE);
+
+ /* Now we're going to cut unnecessary tail pages */
+ new_cpages = page_array_alloc(cc->inode, new_nr_cpages);
+ if (!new_cpages) {
+ ret = -ENOMEM;
+ goto out_vunmap_cbuf;
+ }
/* zero out any unused part of the last page */
memset(&cc->cbuf->cdata[cc->clen], 0,
- (nr_cpages * PAGE_SIZE) - (cc->clen + COMPRESS_HEADER_SIZE));
+ (new_nr_cpages * PAGE_SIZE) -
+ (cc->clen + COMPRESS_HEADER_SIZE));
- vunmap(cc->cbuf);
- vunmap(cc->rbuf);
+ vm_unmap_ram(cc->cbuf, cc->nr_cpages);
+ vm_unmap_ram(cc->rbuf, cc->cluster_size);
- for (i = nr_cpages; i < cc->nr_cpages; i++) {
+ for (i = 0; i < cc->nr_cpages; i++) {
+ if (i < new_nr_cpages) {
+ new_cpages[i] = cc->cpages[i];
+ continue;
+ }
f2fs_compress_free_page(cc->cpages[i]);
cc->cpages[i] = NULL;
}
@@ -635,22 +687,24 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
if (cops->destroy_compress_ctx)
cops->destroy_compress_ctx(cc);
- cc->nr_cpages = nr_cpages;
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ cc->cpages = new_cpages;
+ cc->nr_cpages = new_nr_cpages;
trace_f2fs_compress_pages_end(cc->inode, cc->cluster_idx,
cc->clen, ret);
return 0;
out_vunmap_cbuf:
- vunmap(cc->cbuf);
+ vm_unmap_ram(cc->cbuf, cc->nr_cpages);
out_vunmap_rbuf:
- vunmap(cc->rbuf);
+ vm_unmap_ram(cc->rbuf, cc->cluster_size);
out_free_cpages:
for (i = 0; i < cc->nr_cpages; i++) {
if (cc->cpages[i])
f2fs_compress_free_page(cc->cpages[i]);
}
- kfree(cc->cpages);
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
cc->cpages = NULL;
destroy_compress_ctx:
if (cops->destroy_compress_ctx)
@@ -677,7 +731,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
if (bio->bi_status || PageError(page))
dic->failed = true;
- if (refcount_dec_not_one(&dic->ref))
+ if (atomic_dec_return(&dic->pending_pages))
return;
trace_f2fs_decompress_pages_start(dic->inode, dic->cluster_idx,
@@ -689,8 +743,7 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
goto out_free_dic;
}
- dic->tpages = f2fs_kzalloc(sbi, sizeof(struct page *) *
- dic->cluster_size, GFP_NOFS);
+ dic->tpages = page_array_alloc(dic->inode, dic->cluster_size);
if (!dic->tpages) {
ret = -ENOMEM;
goto out_free_dic;
@@ -715,13 +768,13 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
goto out_free_dic;
}
- dic->rbuf = vmap(dic->tpages, dic->cluster_size, VM_MAP, PAGE_KERNEL);
+ dic->rbuf = f2fs_vmap(dic->tpages, dic->cluster_size);
if (!dic->rbuf) {
ret = -ENOMEM;
goto destroy_decompress_ctx;
}
- dic->cbuf = vmap(dic->cpages, dic->nr_cpages, VM_MAP, PAGE_KERNEL_RO);
+ dic->cbuf = f2fs_vmap(dic->cpages, dic->nr_cpages);
if (!dic->cbuf) {
ret = -ENOMEM;
goto out_vunmap_rbuf;
@@ -738,15 +791,15 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
ret = cops->decompress_pages(dic);
out_vunmap_cbuf:
- vunmap(dic->cbuf);
+ vm_unmap_ram(dic->cbuf, dic->nr_cpages);
out_vunmap_rbuf:
- vunmap(dic->rbuf);
+ vm_unmap_ram(dic->rbuf, dic->cluster_size);
destroy_decompress_ctx:
if (cops->destroy_decompress_ctx)
cops->destroy_decompress_ctx(dic);
out_free_dic:
if (verity)
- refcount_set(&dic->ref, dic->nr_cpages);
+ atomic_set(&dic->pending_pages, dic->nr_cpages);
if (!verity)
f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
ret, false);
@@ -1029,6 +1082,7 @@ bool f2fs_compress_write_end(struct inode *inode, void *fsdata,
{
struct compress_ctx cc = {
+ .inode = inode,
.log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
.cluster_size = F2FS_I(inode)->i_cluster_size,
.rpages = fsdata,
@@ -1132,7 +1186,7 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
*/
down_read(&sbi->node_write);
} else if (!f2fs_trylock_op(sbi)) {
- return -EAGAIN;
+ goto out_free;
}
set_new_dnode(&dn, cc->inode, NULL, NULL, 0);
@@ -1155,15 +1209,14 @@ static int f2fs_write_compressed_pages(struct compress_ctx *cc,
fio.version = ni.version;
- cic = f2fs_kzalloc(sbi, sizeof(struct compress_io_ctx), GFP_NOFS);
+ cic = kmem_cache_zalloc(cic_entry_slab, GFP_NOFS);
if (!cic)
goto out_put_dnode;
cic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
cic->inode = inode;
- refcount_set(&cic->ref, cc->nr_cpages);
- cic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
- cc->log_cluster_size, GFP_NOFS);
+ atomic_set(&cic->pending_pages, cc->nr_cpages);
+ cic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
if (!cic->rpages)
goto out_put_cic;
@@ -1257,11 +1310,13 @@ unlock_continue:
spin_unlock(&fi->i_size_lock);
f2fs_put_rpages(cc);
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ cc->cpages = NULL;
f2fs_destroy_compress_ctx(cc);
return 0;
out_destroy_crypt:
- kfree(cic->rpages);
+ page_array_free(cc->inode, cic->rpages, cc->cluster_size);
for (--i; i >= 0; i--)
fscrypt_finalize_bounce_page(&cc->cpages[i]);
@@ -1271,7 +1326,7 @@ out_destroy_crypt:
f2fs_put_page(cc->cpages[i], 1);
}
out_put_cic:
- kfree(cic);
+ kmem_cache_free(cic_entry_slab, cic);
out_put_dnode:
f2fs_put_dnode(&dn);
out_unlock_op:
@@ -1279,6 +1334,9 @@ out_unlock_op:
up_read(&sbi->node_write);
else
f2fs_unlock_op(sbi);
+out_free:
+ page_array_free(cc->inode, cc->cpages, cc->nr_cpages);
+ cc->cpages = NULL;
return -EAGAIN;
}
@@ -1296,7 +1354,7 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
dec_page_count(sbi, F2FS_WB_DATA);
- if (refcount_dec_not_one(&cic->ref))
+ if (atomic_dec_return(&cic->pending_pages))
return;
for (i = 0; i < cic->nr_rpages; i++) {
@@ -1305,8 +1363,8 @@ void f2fs_compress_write_end_io(struct bio *bio, struct page *page)
end_page_writeback(cic->rpages[i]);
}
- kfree(cic->rpages);
- kfree(cic);
+ page_array_free(cic->inode, cic->rpages, cic->nr_rpages);
+ kmem_cache_free(cic_entry_slab, cic);
}
static int f2fs_write_raw_pages(struct compress_ctx *cc,
@@ -1388,9 +1446,6 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
struct writeback_control *wbc,
enum iostat_type io_type)
{
- struct f2fs_inode_info *fi = F2FS_I(cc->inode);
- const struct f2fs_compress_ops *cops =
- f2fs_cops[fi->i_compress_algorithm];
int err;
*submitted = 0;
@@ -1405,9 +1460,6 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
err = f2fs_write_compressed_pages(cc, submitted,
wbc, io_type);
- cops->destroy_compress_ctx(cc);
- kfree(cc->cpages);
- cc->cpages = NULL;
if (!err)
return 0;
f2fs_bug_on(F2FS_I_SB(cc->inode), err != -EAGAIN);
@@ -1424,25 +1476,23 @@ destroy_out:
struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
struct decompress_io_ctx *dic;
pgoff_t start_idx = start_idx_of_cluster(cc);
int i;
- dic = f2fs_kzalloc(sbi, sizeof(struct decompress_io_ctx), GFP_NOFS);
+ dic = kmem_cache_zalloc(dic_entry_slab, GFP_NOFS);
if (!dic)
return ERR_PTR(-ENOMEM);
- dic->rpages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
- cc->log_cluster_size, GFP_NOFS);
+ dic->rpages = page_array_alloc(cc->inode, cc->cluster_size);
if (!dic->rpages) {
- kfree(dic);
+ kmem_cache_free(dic_entry_slab, dic);
return ERR_PTR(-ENOMEM);
}
dic->magic = F2FS_COMPRESSED_PAGE_MAGIC;
dic->inode = cc->inode;
- refcount_set(&dic->ref, cc->nr_cpages);
+ atomic_set(&dic->pending_pages, cc->nr_cpages);
dic->cluster_idx = cc->cluster_idx;
dic->cluster_size = cc->cluster_size;
dic->log_cluster_size = cc->log_cluster_size;
@@ -1453,8 +1503,7 @@ struct decompress_io_ctx *f2fs_alloc_dic(struct compress_ctx *cc)
dic->rpages[i] = cc->rpages[i];
dic->nr_rpages = cc->cluster_size;
- dic->cpages = f2fs_kzalloc(sbi, sizeof(struct page *) *
- dic->nr_cpages, GFP_NOFS);
+ dic->cpages = page_array_alloc(dic->inode, dic->nr_cpages);
if (!dic->cpages)
goto out_free;
@@ -1489,7 +1538,7 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
continue;
f2fs_compress_free_page(dic->tpages[i]);
}
- kfree(dic->tpages);
+ page_array_free(dic->inode, dic->tpages, dic->cluster_size);
}
if (dic->cpages) {
@@ -1498,11 +1547,11 @@ void f2fs_free_dic(struct decompress_io_ctx *dic)
continue;
f2fs_compress_free_page(dic->cpages[i]);
}
- kfree(dic->cpages);
+ page_array_free(dic->inode, dic->cpages, dic->nr_cpages);
}
- kfree(dic->rpages);
- kfree(dic);
+ page_array_free(dic->inode, dic->rpages, dic->nr_rpages);
+ kmem_cache_free(dic_entry_slab, dic);
}
void f2fs_decompress_end_io(struct page **rpages,
@@ -1530,3 +1579,76 @@ unlock:
unlock_page(rpage);
}
}
+
+int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi)
+{
+ dev_t dev = sbi->sb->s_bdev->bd_dev;
+ char slab_name[32];
+
+ sprintf(slab_name, "f2fs_page_array_entry-%u:%u", MAJOR(dev), MINOR(dev));
+
+ sbi->page_array_slab_size = sizeof(struct page *) <<
+ F2FS_OPTION(sbi).compress_log_size;
+
+ sbi->page_array_slab = f2fs_kmem_cache_create(slab_name,
+ sbi->page_array_slab_size);
+ if (!sbi->page_array_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi)
+{
+ kmem_cache_destroy(sbi->page_array_slab);
+}
+
+static int __init f2fs_init_cic_cache(void)
+{
+ cic_entry_slab = f2fs_kmem_cache_create("f2fs_cic_entry",
+ sizeof(struct compress_io_ctx));
+ if (!cic_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+static void f2fs_destroy_cic_cache(void)
+{
+ kmem_cache_destroy(cic_entry_slab);
+}
+
+static int __init f2fs_init_dic_cache(void)
+{
+ dic_entry_slab = f2fs_kmem_cache_create("f2fs_dic_entry",
+ sizeof(struct decompress_io_ctx));
+ if (!dic_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+static void f2fs_destroy_dic_cache(void)
+{
+ kmem_cache_destroy(dic_entry_slab);
+}
+
+int __init f2fs_init_compress_cache(void)
+{
+ int err;
+
+ err = f2fs_init_cic_cache();
+ if (err)
+ goto out;
+ err = f2fs_init_dic_cache();
+ if (err)
+ goto free_cic;
+ return 0;
+free_cic:
+ f2fs_destroy_cic_cache();
+out:
+ return -ENOMEM;
+}
+
+void f2fs_destroy_compress_cache(void)
+{
+ f2fs_destroy_dic_cache();
+ f2fs_destroy_cic_cache();
+}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 73683e58a08d..be4da52604ed 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -202,7 +202,7 @@ static void f2fs_verify_bio(struct bio *bio)
dic = (struct decompress_io_ctx *)page_private(page);
if (dic) {
- if (refcount_dec_not_one(&dic->ref))
+ if (atomic_dec_return(&dic->pending_pages))
continue;
f2fs_verify_pages(dic->rpages,
dic->cluster_size);
@@ -517,7 +517,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
zero_user_segment(page, 0, PAGE_SIZE);
SetPagePrivate(page);
- set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
+ set_page_private(page, DUMMY_WRITTEN_PAGE);
lock_page(page);
if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
f2fs_bug_on(sbi, 1);
@@ -1416,7 +1416,7 @@ alloc:
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
old_blkaddr = dn->data_blkaddr;
f2fs_allocate_data_block(sbi, NULL, old_blkaddr, &dn->data_blkaddr,
- &sum, seg_type, NULL);
+ &sum, seg_type, NULL);
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
@@ -1803,10 +1803,6 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
- /* Block number less than F2FS MAX BLOCKS */
- if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
- return -EFBIG;
-
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_BMAP, NULL,
NO_CHECK_TYPE, create);
@@ -2272,8 +2268,8 @@ submit_and_realloc:
if (IS_ERR(bio)) {
ret = PTR_ERR(bio);
dic->failed = true;
- if (refcount_sub_and_test(dic->nr_cpages - i,
- &dic->ref)) {
+ if (!atomic_sub_return(dic->nr_cpages - i,
+ &dic->pending_pages)) {
f2fs_decompress_end_io(dic->rpages,
cc->cluster_size, true,
false);
@@ -3133,6 +3129,8 @@ next:
retry = 0;
}
}
+ if (f2fs_compressed_file(inode))
+ f2fs_destroy_compress_ctx(&cc);
#endif
if (retry) {
index = 0;
@@ -3574,7 +3572,7 @@ static void f2fs_dio_end_io(struct bio *bio)
bio->bi_private = dio->orig_private;
bio->bi_end_io = dio->orig_end_io;
- kvfree(dio);
+ kfree(dio);
bio_endio(bio);
}
@@ -3673,12 +3671,18 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
err);
if (!do_opu)
set_inode_flag(inode, FI_UPDATE_WRITE);
+ } else if (err == -EIOCBQUEUED) {
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_IO,
+ count - iov_iter_count(iter));
} else if (err < 0) {
f2fs_write_failed(mapping, offset + count);
}
} else {
if (err > 0)
f2fs_update_iostat(sbi, APP_DIRECT_READ_IO, err);
+ else if (err == -EIOCBQUEUED)
+ f2fs_update_iostat(F2FS_I_SB(inode), APP_DIRECT_READ_IO,
+ count - iov_iter_count(iter));
}
out:
@@ -3807,11 +3811,16 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
filemap_write_and_wait(mapping);
- if (f2fs_compressed_file(inode))
- blknr = f2fs_bmap_compress(inode, block);
+ /* Block number less than F2FS MAX BLOCKS */
+ if (unlikely(block >= F2FS_I_SB(inode)->max_file_blocks))
+ goto out;
- if (!get_data_block_bmap(inode, block, &tmp, 0))
- blknr = tmp.b_blocknr;
+ if (f2fs_compressed_file(inode)) {
+ blknr = f2fs_bmap_compress(inode, block);
+ } else {
+ if (!get_data_block_bmap(inode, block, &tmp, 0))
+ blknr = tmp.b_blocknr;
+ }
out:
trace_f2fs_bmap(inode, block, blknr);
return blknr;
@@ -3874,6 +3883,83 @@ int f2fs_migrate_page(struct address_space *mapping,
#endif
#ifdef CONFIG_SWAP
+static int check_swap_activate_fast(struct swap_info_struct *sis,
+ struct file *swap_file, sector_t *span)
+{
+ struct address_space *mapping = swap_file->f_mapping;
+ struct inode *inode = mapping->host;
+ sector_t cur_lblock;
+ sector_t last_lblock;
+ sector_t pblock;
+ sector_t lowest_pblock = -1;
+ sector_t highest_pblock = 0;
+ int nr_extents = 0;
+ unsigned long nr_pblocks;
+ unsigned long len;
+ int ret;
+
+ /*
+ * Map all the blocks into the extent list. This code doesn't try
+ * to be very smart.
+ */
+ cur_lblock = 0;
+ last_lblock = logical_to_blk(inode, i_size_read(inode));
+ len = i_size_read(inode);
+
+ while (cur_lblock <= last_lblock && cur_lblock < sis->max) {
+ struct buffer_head map_bh;
+ pgoff_t next_pgofs;
+
+ cond_resched();
+
+ memset(&map_bh, 0, sizeof(struct buffer_head));
+ map_bh.b_size = len - cur_lblock;
+
+ ret = get_data_block(inode, cur_lblock, &map_bh, 0,
+ F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
+ if (ret)
+ goto err_out;
+
+ /* hole */
+ if (!buffer_mapped(&map_bh))
+ goto err_out;
+
+ pblock = map_bh.b_blocknr;
+ nr_pblocks = logical_to_blk(inode, map_bh.b_size);
+
+ if (cur_lblock + nr_pblocks >= sis->max)
+ nr_pblocks = sis->max - cur_lblock;
+
+ if (cur_lblock) { /* exclude the header page */
+ if (pblock < lowest_pblock)
+ lowest_pblock = pblock;
+ if (pblock + nr_pblocks - 1 > highest_pblock)
+ highest_pblock = pblock + nr_pblocks - 1;
+ }
+
+ /*
+ * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
+ */
+ ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
+ if (ret < 0)
+ goto out;
+ nr_extents += ret;
+ cur_lblock += nr_pblocks;
+ }
+ ret = nr_extents;
+ *span = 1 + highest_pblock - lowest_pblock;
+ if (cur_lblock == 0)
+ cur_lblock = 1; /* force Empty message */
+ sis->max = cur_lblock;
+ sis->pages = cur_lblock - 1;
+ sis->highest_bit = cur_lblock - 1;
+out:
+ return ret;
+err_out:
+ pr_err("swapon: swapfile has holes\n");
+ return -EINVAL;
+}
+
/* Copied from generic_swapfile_activate() to check any holes */
static int check_swap_activate(struct swap_info_struct *sis,
struct file *swap_file, sector_t *span)
@@ -3890,6 +3976,9 @@ static int check_swap_activate(struct swap_info_struct *sis,
int nr_extents = 0;
int ret;
+ if (PAGE_SIZE == F2FS_BLKSIZE)
+ return check_swap_activate_fast(sis, swap_file, span);
+
blkbits = inode->i_blkbits;
blocks_per_page = PAGE_SIZE >> blkbits;
@@ -3989,7 +4078,7 @@ static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
if (ret)
return ret;
- if (f2fs_disable_compressed_file(inode))
+ if (!f2fs_disable_compressed_file(inode))
return -EINVAL;
ret = check_swap_activate(sis, file, span);
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 4276c0f79beb..a8357fd4f5fa 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -131,7 +131,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
si->inline_inode = atomic_read(&sbi->inline_inode);
si->inline_dir = atomic_read(&sbi->inline_dir);
si->compr_inode = atomic_read(&sbi->compr_inode);
- si->compr_blocks = atomic_read(&sbi->compr_blocks);
+ si->compr_blocks = atomic64_read(&sbi->compr_blocks);
si->append = sbi->im[APPEND_INO].ino_num;
si->update = sbi->im[UPDATE_INO].ino_num;
si->orphans = sbi->im[ORPHAN_INO].ino_num;
@@ -164,7 +164,7 @@ static void update_general_status(struct f2fs_sb_info *sbi)
* 100 / (int)(sbi->user_block_count >> sbi->log_blocks_per_seg)
/ 2;
si->util_invalid = 50 - si->util_free - si->util_valid;
- for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_NODE; i++) {
+ for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
si->curseg[i] = curseg->segno;
si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
@@ -342,7 +342,7 @@ static int stat_show(struct seq_file *s, void *v)
si->inline_inode);
seq_printf(s, " - Inline_dentry Inode: %u\n",
si->inline_dir);
- seq_printf(s, " - Compressed Inode: %u, Blocks: %u\n",
+ seq_printf(s, " - Compressed Inode: %u, Blocks: %llu\n",
si->compr_inode, si->compr_blocks);
seq_printf(s, " - Orphan/Append/Update Inode: %u, %u, %u\n",
si->orphans, si->append, si->update);
@@ -393,6 +393,14 @@ static int stat_show(struct seq_file *s, void *v)
si->dirty_seg[CURSEG_COLD_NODE],
si->full_seg[CURSEG_COLD_NODE],
si->valid_blks[CURSEG_COLD_NODE]);
+ seq_printf(s, " - Pinned file: %8d %8d %8d\n",
+ si->curseg[CURSEG_COLD_DATA_PINNED],
+ si->cursec[CURSEG_COLD_DATA_PINNED],
+ si->curzone[CURSEG_COLD_DATA_PINNED]);
+ seq_printf(s, " - ATGC data: %8d %8d %8d\n",
+ si->curseg[CURSEG_ALL_DATA_ATGC],
+ si->cursec[CURSEG_ALL_DATA_ATGC],
+ si->curzone[CURSEG_ALL_DATA_ATGC]);
seq_printf(s, "\n - Valid: %d\n - Dirty: %d\n",
si->main_area_segs - si->dirty_count -
si->prefree_count - si->free_segs,
@@ -542,7 +550,7 @@ int f2fs_build_stats(struct f2fs_sb_info *sbi)
atomic_set(&sbi->inline_inode, 0);
atomic_set(&sbi->inline_dir, 0);
atomic_set(&sbi->compr_inode, 0);
- atomic_set(&sbi->compr_blocks, 0);
+ atomic64_set(&sbi->compr_blocks, 0);
atomic_set(&sbi->inplace_count, 0);
for (i = META_CP; i < META_MAX; i++)
atomic_set(&sbi->meta_count[i], 0);
@@ -566,7 +574,7 @@ void f2fs_destroy_stats(struct f2fs_sb_info *sbi)
list_del(&si->stat_list);
mutex_unlock(&f2fs_stat_mutex);
- kvfree(si);
+ kfree(si);
}
void __init f2fs_create_root_stats(void)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 53fbc4dd6e48..4b9ef8bbfa4a 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -75,21 +75,22 @@ int f2fs_init_casefolded_name(const struct inode *dir,
struct f2fs_filename *fname)
{
#ifdef CONFIG_UNICODE
- struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
+ struct super_block *sb = dir->i_sb;
+ struct f2fs_sb_info *sbi = F2FS_SB(sb);
if (IS_CASEFOLDED(dir)) {
fname->cf_name.name = f2fs_kmalloc(sbi, F2FS_NAME_LEN,
GFP_NOFS);
if (!fname->cf_name.name)
return -ENOMEM;
- fname->cf_name.len = utf8_casefold(sbi->s_encoding,
+ fname->cf_name.len = utf8_casefold(sb->s_encoding,
fname->usr_fname,
fname->cf_name.name,
F2FS_NAME_LEN);
if ((int)fname->cf_name.len <= 0) {
kfree(fname->cf_name.name);
fname->cf_name.name = NULL;
- if (f2fs_has_strict_mode(sbi))
+ if (sb_has_strict_encoding(sb))
return -EINVAL;
/* fall back to treating name as opaque byte sequence */
}
@@ -190,21 +191,15 @@ static unsigned long dir_block_index(unsigned int level,
static struct f2fs_dir_entry *find_in_block(struct inode *dir,
struct page *dentry_page,
const struct f2fs_filename *fname,
- int *max_slots,
- struct page **res_page)
+ int *max_slots)
{
struct f2fs_dentry_block *dentry_blk;
- struct f2fs_dir_entry *de;
struct f2fs_dentry_ptr d;
dentry_blk = (struct f2fs_dentry_block *)page_address(dentry_page);
make_dentry_ptr_block(dir, &d, dentry_blk);
- de = f2fs_find_target_dentry(&d, fname, max_slots);
- if (de)
- *res_page = dentry_page;
-
- return de;
+ return f2fs_find_target_dentry(&d, fname, max_slots);
}
#ifdef CONFIG_UNICODE
@@ -215,8 +210,8 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
const u8 *de_name, u32 de_name_len)
{
- const struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
- const struct unicode_map *um = sbi->s_encoding;
+ const struct super_block *sb = dir->i_sb;
+ const struct unicode_map *um = sb->s_encoding;
struct qstr entry = QSTR_INIT(de_name, de_name_len);
int res;
@@ -226,7 +221,7 @@ static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
* In strict mode, ignore invalid names. In non-strict mode,
* fall back to treating them as opaque byte sequences.
*/
- if (f2fs_has_strict_mode(sbi) || name->len != entry.len)
+ if (sb_has_strict_encoding(sb) || name->len != entry.len)
return false;
return !memcmp(name->name, entry.name, name->len);
}
@@ -330,10 +325,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
}
}
- de = find_in_block(dir, dentry_page, fname, &max_slots,
- res_page);
- if (de)
+ de = find_in_block(dir, dentry_page, fname, &max_slots);
+ if (de) {
+ *res_page = dentry_page;
break;
+ }
if (max_slots >= s)
room = true;
@@ -357,16 +353,15 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
unsigned int max_depth;
unsigned int level;
+ *res_page = NULL;
+
if (f2fs_has_inline_dentry(dir)) {
- *res_page = NULL;
de = f2fs_find_in_inline_dir(dir, fname, res_page);
goto out;
}
- if (npages == 0) {
- *res_page = NULL;
+ if (npages == 0)
goto out;
- }
max_depth = F2FS_I(dir)->i_current_depth;
if (unlikely(max_depth > MAX_DIR_HASH_DEPTH)) {
@@ -377,7 +372,6 @@ struct f2fs_dir_entry *__f2fs_find_entry(struct inode *dir,
}
for (level = 0; level < max_depth; level++) {
- *res_page = NULL;
de = find_in_level(dir, level, fname, res_page);
if (de || IS_ERR(*res_page))
break;
@@ -1107,75 +1101,8 @@ const struct file_operations f2fs_dir_operations = {
};
#ifdef CONFIG_UNICODE
-static int f2fs_d_compare(const struct dentry *dentry, unsigned int len,
- const char *str, const struct qstr *name)
-{
- const struct dentry *parent = READ_ONCE(dentry->d_parent);
- const struct inode *dir = READ_ONCE(parent->d_inode);
- const struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- struct qstr entry = QSTR_INIT(str, len);
- char strbuf[DNAME_INLINE_LEN];
- int res;
-
- if (!dir || !IS_CASEFOLDED(dir))
- goto fallback;
-
- /*
- * If the dentry name is stored in-line, then it may be concurrently
- * modified by a rename. If this happens, the VFS will eventually retry
- * the lookup, so it doesn't matter what ->d_compare() returns.
- * However, it's unsafe to call utf8_strncasecmp() with an unstable
- * string. Therefore, we have to copy the name into a temporary buffer.
- */
- if (len <= DNAME_INLINE_LEN - 1) {
- memcpy(strbuf, str, len);
- strbuf[len] = 0;
- entry.name = strbuf;
- /* prevent compiler from optimizing out the temporary buffer */
- barrier();
- }
-
- res = utf8_strncasecmp(sbi->s_encoding, name, &entry);
- if (res >= 0)
- return res;
-
- if (f2fs_has_strict_mode(sbi))
- return -EINVAL;
-fallback:
- if (len != name->len)
- return 1;
- return !!memcmp(str, name->name, len);
-}
-
-static int f2fs_d_hash(const struct dentry *dentry, struct qstr *str)
-{
- struct f2fs_sb_info *sbi = F2FS_SB(dentry->d_sb);
- const struct unicode_map *um = sbi->s_encoding;
- const struct inode *inode = READ_ONCE(dentry->d_inode);
- unsigned char *norm;
- int len, ret = 0;
-
- if (!inode || !IS_CASEFOLDED(inode))
- return 0;
-
- norm = f2fs_kmalloc(sbi, PATH_MAX, GFP_ATOMIC);
- if (!norm)
- return -ENOMEM;
-
- len = utf8_casefold(um, str, norm, PATH_MAX);
- if (len < 0) {
- if (f2fs_has_strict_mode(sbi))
- ret = -EINVAL;
- goto out;
- }
- str->hash = full_name_hash(dentry, norm, len);
-out:
- kvfree(norm);
- return ret;
-}
-
const struct dentry_operations f2fs_dentry_ops = {
- .d_hash = f2fs_d_hash,
- .d_compare = f2fs_d_compare,
+ .d_hash = generic_ci_d_hash,
+ .d_compare = generic_ci_d_compare,
};
#endif
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index 686c68b98610..3ebf976a682d 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -58,6 +58,29 @@ struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
return re;
}
+struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
+ struct rb_root_cached *root,
+ struct rb_node **parent,
+ unsigned long long key, bool *leftmost)
+{
+ struct rb_node **p = &root->rb_root.rb_node;
+ struct rb_entry *re;
+
+ while (*p) {
+ *parent = *p;
+ re = rb_entry(*parent, struct rb_entry, rb_node);
+
+ if (key < re->key) {
+ p = &(*p)->rb_left;
+ } else {
+ p = &(*p)->rb_right;
+ *leftmost = false;
+ }
+ }
+
+ return p;
+}
+
struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root_cached *root,
struct rb_node **parent,
@@ -166,7 +189,7 @@ lookup_neighbors:
}
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root)
+ struct rb_root_cached *root, bool check_key)
{
#ifdef CONFIG_F2FS_CHECK_FS
struct rb_node *cur = rb_first_cached(root), *next;
@@ -183,13 +206,23 @@ bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
cur_re = rb_entry(cur, struct rb_entry, rb_node);
next_re = rb_entry(next, struct rb_entry, rb_node);
+ if (check_key) {
+ if (cur_re->key > next_re->key) {
+ f2fs_info(sbi, "inconsistent rbtree, "
+ "cur(%llu) next(%llu)",
+ cur_re->key, next_re->key);
+ return false;
+ }
+ goto next;
+ }
+
if (cur_re->ofs + cur_re->len > next_re->ofs) {
f2fs_info(sbi, "inconsistent rbtree, cur(%u, %u) next(%u, %u)",
cur_re->ofs, cur_re->len,
next_re->ofs, next_re->len);
return false;
}
-
+next:
cur = next;
}
#endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 7c089ff7ff94..cb700d797296 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -98,6 +98,7 @@ extern const char *f2fs_fault_name[FAULT_MAX];
#define F2FS_MOUNT_RESERVE_ROOT 0x01000000
#define F2FS_MOUNT_DISABLE_CHECKPOINT 0x02000000
#define F2FS_MOUNT_NORECOVERY 0x04000000
+#define F2FS_MOUNT_ATGC 0x08000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
@@ -612,8 +613,13 @@ enum {
struct rb_entry {
struct rb_node rb_node; /* rb node located in rb-tree */
- unsigned int ofs; /* start offset of the entry */
- unsigned int len; /* length of the entry */
+ union {
+ struct {
+ unsigned int ofs; /* start offset of the entry */
+ unsigned int len; /* length of the entry */
+ };
+ unsigned long long key; /* 64-bits key */
+ } __packed;
};
struct extent_info {
@@ -801,7 +807,7 @@ struct f2fs_inode_info {
struct timespec64 i_disk_time[4];/* inode disk times */
/* for file compress */
- u64 i_compr_blocks; /* # of compressed blocks */
+ atomic_t i_compr_blocks; /* # of compressed blocks */
unsigned char i_compress_algorithm; /* algorithm type */
unsigned char i_log_cluster_size; /* log of cluster size */
unsigned int i_cluster_size; /* cluster size */
@@ -973,7 +979,9 @@ static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode,
*/
#define NR_CURSEG_DATA_TYPE (3)
#define NR_CURSEG_NODE_TYPE (3)
-#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
+#define NR_CURSEG_INMEM_TYPE (2)
+#define NR_CURSEG_PERSIST_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE)
+#define NR_CURSEG_TYPE (NR_CURSEG_INMEM_TYPE + NR_CURSEG_PERSIST_TYPE)
enum {
CURSEG_HOT_DATA = 0, /* directory entry blocks */
@@ -982,8 +990,11 @@ enum {
CURSEG_HOT_NODE, /* direct node blocks of directory files */
CURSEG_WARM_NODE, /* direct node blocks of normal files */
CURSEG_COLD_NODE, /* indirect node blocks */
- NO_CHECK_TYPE,
- CURSEG_COLD_DATA_PINNED,/* cold data for pinned file */
+ NR_PERSISTENT_LOG, /* number of persistent log */
+ CURSEG_COLD_DATA_PINNED = NR_PERSISTENT_LOG,
+ /* pinned file that needs consecutive block address */
+ CURSEG_ALL_DATA_ATGC, /* SSR alloctor in hot/warm/cold data area */
+ NO_CHECK_TYPE, /* number of persistent & inmem log */
};
struct flush_cmd {
@@ -1209,6 +1220,7 @@ struct f2fs_dev_info {
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int nr_blkz; /* Total number of zones */
unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
+ block_t *zone_capacity_blocks; /* Array of zone capacity in blks */
#endif
};
@@ -1228,6 +1240,18 @@ struct inode_management {
unsigned long ino_num; /* number of entries */
};
+/* for GC_AT */
+struct atgc_management {
+ bool atgc_enabled; /* ATGC is enabled or not */
+ struct rb_root_cached root; /* root of victim rb-tree */
+ struct list_head victim_list; /* linked with all victim entries */
+ unsigned int victim_count; /* victim count in rb-tree */
+ unsigned int candidate_ratio; /* candidate ratio */
+ unsigned int max_candidate_count; /* max candidate count */
+ unsigned int age_weight; /* age weight, vblock_weight = 100 - age_weight */
+ unsigned long long age_threshold; /* age threshold */
+};
+
/* For s_flag in struct f2fs_sb_info */
enum {
SBI_IS_DIRTY, /* dirty flag for checkpoint */
@@ -1260,6 +1284,7 @@ enum {
GC_NORMAL,
GC_IDLE_CB,
GC_IDLE_GREEDY,
+ GC_IDLE_AT,
GC_URGENT_HIGH,
GC_URGENT_LOW,
};
@@ -1303,9 +1328,9 @@ enum fsync_mode {
#define DUMMY_WRITTEN_PAGE ((unsigned long)-2)
#define IS_ATOMIC_WRITTEN_PAGE(page) \
- (page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
+ (page_private(page) == ATOMIC_WRITTEN_PAGE)
#define IS_DUMMY_WRITTEN_PAGE(page) \
- (page_private(page) == (unsigned long)DUMMY_WRITTEN_PAGE)
+ (page_private(page) == DUMMY_WRITTEN_PAGE)
#ifdef CONFIG_F2FS_IO_TRACE
#define IS_IO_TRACED_PAGE(page) \
@@ -1359,7 +1384,7 @@ struct compress_io_ctx {
struct inode *inode; /* inode the context belong to */
struct page **rpages; /* pages store raw data in cluster */
unsigned int nr_rpages; /* total page number in rpages */
- refcount_t ref; /* referrence count of raw page */
+ atomic_t pending_pages; /* in-flight compressed page count */
};
/* decompress io context for read IO path */
@@ -1378,7 +1403,7 @@ struct decompress_io_ctx {
struct compress_data *cbuf; /* virtual mapped address on cpages */
size_t rlen; /* valid data length in rbuf */
size_t clen; /* valid data length in cbuf */
- refcount_t ref; /* referrence count of compressed page */
+ atomic_t pending_pages; /* in-flight compressed page count */
bool failed; /* indicate IO error during decompression */
void *private; /* payload buffer for specified decompression algorithm */
void *private2; /* extra payload buffer */
@@ -1387,7 +1412,7 @@ struct decompress_io_ctx {
#define NULL_CLUSTER ((unsigned int)(~0))
#define MIN_COMPRESS_LOG_SIZE 2
#define MAX_COMPRESS_LOG_SIZE 8
-#define MAX_COMPRESS_WINDOW_SIZE ((PAGE_SIZE) << MAX_COMPRESS_LOG_SIZE)
+#define MAX_COMPRESS_WINDOW_SIZE(log_size) ((PAGE_SIZE) << (log_size))
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
@@ -1397,10 +1422,6 @@ struct f2fs_sb_info {
int valid_super_block; /* valid super block no */
unsigned long s_flag; /* flags for sbi */
struct mutex writepages; /* mutex for writepages() */
-#ifdef CONFIG_UNICODE
- struct unicode_map *s_encoding;
- __u16 s_encoding_flags;
-#endif
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int blocks_per_blkz; /* F2FS blocks per zone */
@@ -1508,6 +1529,7 @@ struct f2fs_sb_info {
* race between GC and GC or CP
*/
struct f2fs_gc_kthread *gc_thread; /* GC thread */
+ struct atgc_management am; /* atgc management */
unsigned int cur_victim_sec; /* current victim section num */
unsigned int gc_mode; /* current GC state */
unsigned int next_victim_seg[2]; /* next segment in victim section */
@@ -1544,7 +1566,7 @@ struct f2fs_sb_info {
atomic_t inline_inode; /* # of inline_data inodes */
atomic_t inline_dir; /* # of inline_dentry inodes */
atomic_t compr_inode; /* # of compressed inodes */
- atomic_t compr_blocks; /* # of compressed blocks */
+ atomic64_t compr_blocks; /* # of compressed blocks */
atomic_t vw_cnt; /* # of volatile writes */
atomic_t max_aw_cnt; /* max # of atomic writes */
atomic_t max_vw_cnt; /* max # of volatile writes */
@@ -1593,6 +1615,11 @@ struct f2fs_sb_info {
struct kmem_cache *inline_xattr_slab; /* inline xattr entry */
unsigned int inline_xattr_slab_size; /* default inline xattr slab size */
+
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ struct kmem_cache *page_array_slab; /* page array entry */
+ unsigned int page_array_slab_size; /* default page array slab size */
+#endif
};
struct f2fs_private_dio {
@@ -3325,6 +3352,11 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi);
int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi);
+void f2fs_get_new_segment(struct f2fs_sb_info *sbi,
+ unsigned int *newseg, bool new_sec, int dir);
void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end);
void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
@@ -3343,7 +3375,8 @@ void f2fs_outplace_write_data(struct dnode_of_data *dn,
int f2fs_inplace_write_data(struct f2fs_io_info *fio);
void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
- bool recover_curseg, bool recover_newaddr);
+ bool recover_curseg, bool recover_newaddr,
+ bool from_gc);
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
block_t old_addr, block_t new_addr,
unsigned char version, bool recover_curseg,
@@ -3371,6 +3404,10 @@ void f2fs_destroy_segment_manager_caches(void);
int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
enum page_type type, enum temp_type temp);
+unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno);
+unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno);
/*
* checkpoint.c
@@ -3378,7 +3415,7 @@ enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io);
struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index);
-struct page *f2fs_get_meta_page_nofail(struct f2fs_sb_info *sbi, pgoff_t index);
+struct page *f2fs_get_meta_page_retry(struct f2fs_sb_info *sbi, pgoff_t index);
struct page *f2fs_get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index);
bool f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi,
block_t blkaddr, int type);
@@ -3486,6 +3523,8 @@ int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
unsigned int segno);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
+int __init f2fs_create_garbage_collection_cache(void);
+void f2fs_destroy_garbage_collection_cache(void);
/*
* recovery.c
@@ -3521,7 +3560,8 @@ struct f2fs_stat_info {
int nr_discard_cmd;
unsigned int undiscard_blks;
int inline_xattr, inline_inode, inline_dir, append, update, orphans;
- int compr_inode, compr_blocks;
+ int compr_inode;
+ unsigned long long compr_blocks;
int aw_cnt, max_aw_cnt, vw_cnt, max_vw_cnt;
unsigned int valid_count, valid_node_count, valid_inode_count, discard_blks;
unsigned int bimodal, avg_vblocks;
@@ -3606,9 +3646,9 @@ static inline struct f2fs_stat_info *F2FS_STAT(struct f2fs_sb_info *sbi)
(atomic_dec(&F2FS_I_SB(inode)->compr_inode)); \
} while (0)
#define stat_add_compr_blocks(inode, blocks) \
- (atomic_add(blocks, &F2FS_I_SB(inode)->compr_blocks))
+ (atomic64_add(blocks, &F2FS_I_SB(inode)->compr_blocks))
#define stat_sub_compr_blocks(inode, blocks) \
- (atomic_sub(blocks, &F2FS_I_SB(inode)->compr_blocks))
+ (atomic64_sub(blocks, &F2FS_I_SB(inode)->compr_blocks))
#define stat_inc_meta_count(sbi, blkaddr) \
do { \
if (blkaddr < SIT_I(sbi)->sit_base_addr) \
@@ -3787,6 +3827,10 @@ void f2fs_leave_shrinker(struct f2fs_sb_info *sbi);
*/
struct rb_entry *f2fs_lookup_rb_tree(struct rb_root_cached *root,
struct rb_entry *cached_re, unsigned int ofs);
+struct rb_node **f2fs_lookup_rb_tree_ext(struct f2fs_sb_info *sbi,
+ struct rb_root_cached *root,
+ struct rb_node **parent,
+ unsigned long long key, bool *left_most);
struct rb_node **f2fs_lookup_rb_tree_for_insert(struct f2fs_sb_info *sbi,
struct rb_root_cached *root,
struct rb_node **parent,
@@ -3797,7 +3841,7 @@ struct rb_entry *f2fs_lookup_rb_tree_ret(struct rb_root_cached *root,
struct rb_node ***insert_p, struct rb_node **insert_parent,
bool force, bool *leftmost);
bool f2fs_check_rb_tree_consistence(struct f2fs_sb_info *sbi,
- struct rb_root_cached *root);
+ struct rb_root_cached *root, bool check_key);
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink);
void f2fs_init_extent_tree(struct inode *inode, struct page *ipage);
void f2fs_drop_extent_tree(struct inode *inode);
@@ -3883,6 +3927,10 @@ void f2fs_decompress_end_io(struct page **rpages,
int f2fs_init_compress_ctx(struct compress_ctx *cc);
void f2fs_destroy_compress_ctx(struct compress_ctx *cc);
void f2fs_init_compress_info(struct f2fs_sb_info *sbi);
+int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi);
+void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
+int __init f2fs_init_compress_cache(void);
+void f2fs_destroy_compress_cache(void);
#else
static inline bool f2fs_is_compressed_page(struct page *page) { return false; }
static inline bool f2fs_is_compress_backend_ready(struct inode *inode)
@@ -3899,6 +3947,10 @@ static inline struct page *f2fs_compress_control_page(struct page *page)
}
static inline int f2fs_init_compress_mempool(void) { return 0; }
static inline void f2fs_destroy_compress_mempool(void) { }
+static inline int f2fs_init_page_array_cache(struct f2fs_sb_info *sbi) { return 0; }
+static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { }
+static inline int __init f2fs_init_compress_cache(void) { return 0; }
+static inline void f2fs_destroy_compress_cache(void) { }
#endif
static inline void set_compress_context(struct inode *inode)
@@ -3917,24 +3969,21 @@ static inline void set_compress_context(struct inode *inode)
f2fs_mark_inode_dirty_sync(inode, true);
}
-static inline u64 f2fs_disable_compressed_file(struct inode *inode)
+static inline bool f2fs_disable_compressed_file(struct inode *inode)
{
struct f2fs_inode_info *fi = F2FS_I(inode);
if (!f2fs_compressed_file(inode))
- return 0;
- if (S_ISREG(inode->i_mode)) {
- if (get_dirty_pages(inode))
- return 1;
- if (fi->i_compr_blocks)
- return fi->i_compr_blocks;
- }
+ return true;
+ if (S_ISREG(inode->i_mode) &&
+ (get_dirty_pages(inode) || atomic_read(&fi->i_compr_blocks)))
+ return false;
fi->i_flags &= ~F2FS_COMPR_FL;
stat_dec_compr_inode(inode);
clear_inode_flag(inode, FI_COMPRESSED_FILE);
f2fs_mark_inode_dirty_sync(inode, true);
- return 0;
+ return true;
}
#define F2FS_FEATURE_FUNCS(name, flagname) \
@@ -4028,16 +4077,17 @@ static inline void f2fs_i_compr_blocks_update(struct inode *inode,
u64 blocks, bool add)
{
int diff = F2FS_I(inode)->i_cluster_size - blocks;
+ struct f2fs_inode_info *fi = F2FS_I(inode);
/* don't update i_compr_blocks if saved blocks were released */
- if (!add && !F2FS_I(inode)->i_compr_blocks)
+ if (!add && !atomic_read(&fi->i_compr_blocks))
return;
if (add) {
- F2FS_I(inode)->i_compr_blocks += diff;
+ atomic_add(diff, &fi->i_compr_blocks);
stat_add_compr_blocks(inode, diff);
} else {
- F2FS_I(inode)->i_compr_blocks -= diff;
+ atomic_sub(diff, &fi->i_compr_blocks);
stat_sub_compr_blocks(inode, diff);
}
f2fs_mark_inode_dirty_sync(inode, true);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 8a422400e824..ee861c6d9ff0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -376,32 +376,15 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
return f2fs_do_sync_file(file, start, end, datasync, false);
}
-static pgoff_t __get_first_dirty_index(struct address_space *mapping,
- pgoff_t pgofs, int whence)
-{
- struct page *page;
- int nr_pages;
-
- if (whence != SEEK_DATA)
- return 0;
-
- /* find first dirty page index */
- nr_pages = find_get_pages_tag(mapping, &pgofs, PAGECACHE_TAG_DIRTY,
- 1, &page);
- if (!nr_pages)
- return ULONG_MAX;
- pgofs = page->index;
- put_page(page);
- return pgofs;
-}
-
-static bool __found_offset(struct f2fs_sb_info *sbi, block_t blkaddr,
- pgoff_t dirty, pgoff_t pgofs, int whence)
+static bool __found_offset(struct address_space *mapping, block_t blkaddr,
+ pgoff_t index, int whence)
{
switch (whence) {
case SEEK_DATA:
- if ((blkaddr == NEW_ADDR && dirty == pgofs) ||
- __is_valid_data_blkaddr(blkaddr))
+ if (__is_valid_data_blkaddr(blkaddr))
+ return true;
+ if (blkaddr == NEW_ADDR &&
+ xa_get_mark(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY))
return true;
break;
case SEEK_HOLE:
@@ -417,7 +400,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
struct inode *inode = file->f_mapping->host;
loff_t maxbytes = inode->i_sb->s_maxbytes;
struct dnode_of_data dn;
- pgoff_t pgofs, end_offset, dirty;
+ pgoff_t pgofs, end_offset;
loff_t data_ofs = offset;
loff_t isize;
int err = 0;
@@ -429,16 +412,13 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
goto fail;
/* handle inline data case */
- if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
- if (whence == SEEK_HOLE)
- data_ofs = isize;
+ if (f2fs_has_inline_data(inode) && whence == SEEK_HOLE) {
+ data_ofs = isize;
goto found;
}
pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
- dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
-
for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = f2fs_get_dnode_of_data(&dn, pgofs, LOOKUP_NODE);
@@ -471,7 +451,7 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
goto fail;
}
- if (__found_offset(F2FS_I_SB(inode), blkaddr, dirty,
+ if (__found_offset(file->f_mapping, blkaddr,
pgofs, whence)) {
f2fs_put_dnode(&dn);
goto found;
@@ -564,7 +544,7 @@ void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count)
bool compressed_cluster = false;
int cluster_index = 0, valid_blocks = 0;
int cluster_size = F2FS_I(dn->inode)->i_cluster_size;
- bool released = !F2FS_I(dn->inode)->i_compr_blocks;
+ bool released = !atomic_read(&F2FS_I(dn->inode)->i_compr_blocks);
if (IS_INODE(dn->node_page) && f2fs_has_extra_attr(dn->inode))
base = get_extra_isize(dn->inode);
@@ -753,11 +733,14 @@ int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock)
return err;
#ifdef CONFIG_F2FS_FS_COMPRESSION
- if (from != free_from)
+ if (from != free_from) {
err = f2fs_truncate_partial_cluster(inode, from, lock);
+ if (err)
+ return err;
+ }
#endif
- return err;
+ return 0;
}
int f2fs_truncate(struct inode *inode)
@@ -1656,13 +1639,14 @@ next_alloc:
}
down_write(&sbi->pin_sem);
- map.m_seg_type = CURSEG_COLD_DATA_PINNED;
f2fs_lock_op(sbi);
- f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA);
+ f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA_PINNED);
f2fs_unlock_op(sbi);
+ map.m_seg_type = CURSEG_COLD_DATA_PINNED;
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+
up_write(&sbi->pin_sem);
done += map.m_len;
@@ -1828,7 +1812,7 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
if ((iflags ^ masked_flags) & F2FS_COMPR_FL) {
if (masked_flags & F2FS_COMPR_FL) {
- if (f2fs_disable_compressed_file(inode))
+ if (!f2fs_disable_compressed_file(inode))
return -EINVAL;
}
if (iflags & F2FS_NOCOMP_FL)
@@ -1836,6 +1820,8 @@ static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
if (iflags & F2FS_COMPR_FL) {
if (!f2fs_may_compress(inode))
return -EINVAL;
+ if (S_ISREG(inode->i_mode) && inode->i_size)
+ return -EINVAL;
set_compress_context(inode);
}
@@ -2783,6 +2769,9 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in,
if (IS_ENCRYPTED(src) || IS_ENCRYPTED(dst))
return -EOPNOTSUPP;
+ if (pos_out < 0 || pos_in < 0)
+ return -EINVAL;
+
if (src == dst) {
if (pos_in == pos_out)
return 0;
@@ -3258,7 +3247,7 @@ static int f2fs_ioc_set_pin_file(struct file *filp, unsigned long arg)
if (ret)
goto out;
- if (f2fs_disable_compressed_file(inode)) {
+ if (!f2fs_disable_compressed_file(inode)) {
ret = -EOPNOTSUPP;
goto out;
}
@@ -3385,7 +3374,7 @@ static int f2fs_ioc_getfslabel(struct file *filp, unsigned long arg)
min(FSLABEL_MAX, count)))
err = -EFAULT;
- kvfree(vbuf);
+ kfree(vbuf);
return err;
}
@@ -3436,7 +3425,7 @@ static int f2fs_get_compress_blocks(struct file *filp, unsigned long arg)
if (!f2fs_compressed_file(inode))
return -EINVAL;
- blocks = F2FS_I(inode)->i_compr_blocks;
+ blocks = atomic_read(&F2FS_I(inode)->i_compr_blocks);
return put_user(blocks, (u64 __user *)arg);
}
@@ -3521,7 +3510,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
inode_lock(inode);
writecount = atomic_read(&inode->i_writecount);
- if ((filp->f_mode & FMODE_WRITE && writecount != 1) || writecount) {
+ if ((filp->f_mode & FMODE_WRITE && writecount != 1) ||
+ (!(filp->f_mode & FMODE_WRITE) && writecount)) {
ret = -EBUSY;
goto out;
}
@@ -3540,7 +3530,7 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg)
inode->i_ctime = current_time(inode);
f2fs_mark_inode_dirty_sync(inode, true);
- if (!F2FS_I(inode)->i_compr_blocks)
+ if (!atomic_read(&F2FS_I(inode)->i_compr_blocks))
goto out;
down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
@@ -3588,14 +3578,15 @@ out:
if (ret >= 0) {
ret = put_user(released_blocks, (u64 __user *)arg);
- } else if (released_blocks && F2FS_I(inode)->i_compr_blocks) {
+ } else if (released_blocks &&
+ atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
- "iblocks=%llu, released=%u, compr_blocks=%llu, "
+ "iblocks=%llu, released=%u, compr_blocks=%u, "
"run fsck to fix.",
__func__, inode->i_ino, inode->i_blocks,
released_blocks,
- F2FS_I(inode)->i_compr_blocks);
+ atomic_read(&F2FS_I(inode)->i_compr_blocks));
}
return ret;
@@ -3683,7 +3674,7 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg)
if (ret)
return ret;
- if (F2FS_I(inode)->i_compr_blocks)
+ if (atomic_read(&F2FS_I(inode)->i_compr_blocks))
goto out;
f2fs_balance_fs(F2FS_I_SB(inode), true);
@@ -3747,14 +3738,15 @@ out:
if (ret >= 0) {
ret = put_user(reserved_blocks, (u64 __user *)arg);
- } else if (reserved_blocks && F2FS_I(inode)->i_compr_blocks) {
+ } else if (reserved_blocks &&
+ atomic_read(&F2FS_I(inode)->i_compr_blocks)) {
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: partial blocks were released i_ino=%lx "
- "iblocks=%llu, reserved=%u, compr_blocks=%llu, "
+ "iblocks=%llu, reserved=%u, compr_blocks=%u, "
"run fsck to fix.",
__func__, inode->i_ino, inode->i_blocks,
reserved_blocks,
- F2FS_I(inode)->i_compr_blocks);
+ atomic_read(&F2FS_I(inode)->i_compr_blocks));
}
return ret;
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 11b4adde9baf..05641a1e36cc 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -21,6 +21,8 @@
#include "gc.h"
#include <trace/events/f2fs.h>
+static struct kmem_cache *victim_entry_slab;
+
static unsigned int count_bits(const unsigned long *addr,
unsigned int offset, unsigned int len);
@@ -150,7 +152,7 @@ int f2fs_start_gc_thread(struct f2fs_sb_info *sbi)
"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(gc_th->f2fs_gc_task)) {
err = PTR_ERR(gc_th->f2fs_gc_task);
- kvfree(gc_th);
+ kfree(gc_th);
sbi->gc_thread = NULL;
}
out:
@@ -163,13 +165,22 @@ void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
if (!gc_th)
return;
kthread_stop(gc_th->f2fs_gc_task);
- kvfree(gc_th);
+ kfree(gc_th);
sbi->gc_thread = NULL;
}
static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
{
- int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;
+ int gc_mode;
+
+ if (gc_type == BG_GC) {
+ if (sbi->am.atgc_enabled)
+ gc_mode = GC_AT;
+ else
+ gc_mode = GC_CB;
+ } else {
+ gc_mode = GC_GREEDY;
+ }
switch (sbi->gc_mode) {
case GC_IDLE_CB:
@@ -179,7 +190,11 @@ static int select_gc_type(struct f2fs_sb_info *sbi, int gc_type)
case GC_URGENT_HIGH:
gc_mode = GC_GREEDY;
break;
+ case GC_IDLE_AT:
+ gc_mode = GC_AT;
+ break;
}
+
return gc_mode;
}
@@ -193,6 +208,11 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
p->dirty_bitmap = dirty_i->dirty_segmap[type];
p->max_search = dirty_i->nr_dirty[type];
p->ofs_unit = 1;
+ } else if (p->alloc_mode == AT_SSR) {
+ p->gc_mode = GC_GREEDY;
+ p->dirty_bitmap = dirty_i->dirty_segmap[type];
+ p->max_search = dirty_i->nr_dirty[type];
+ p->ofs_unit = 1;
} else {
p->gc_mode = select_gc_type(sbi, gc_type);
p->ofs_unit = sbi->segs_per_sec;
@@ -212,6 +232,7 @@ static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
*/
if (gc_type != FG_GC &&
(sbi->gc_mode != GC_URGENT_HIGH) &&
+ (p->gc_mode != GC_AT && p->alloc_mode != AT_SSR) &&
p->max_search > sbi->max_victim_search)
p->max_search = sbi->max_victim_search;
@@ -229,10 +250,16 @@ static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
/* SSR allocates in a segment unit */
if (p->alloc_mode == SSR)
return sbi->blocks_per_seg;
+ else if (p->alloc_mode == AT_SSR)
+ return UINT_MAX;
+
+ /* LFS */
if (p->gc_mode == GC_GREEDY)
return 2 * sbi->blocks_per_seg * p->ofs_unit;
else if (p->gc_mode == GC_CB)
return UINT_MAX;
+ else if (p->gc_mode == GC_AT)
+ return UINT_MAX;
else /* No other gc_mode */
return 0;
}
@@ -266,13 +293,14 @@ static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned char age = 0;
unsigned char u;
unsigned int i;
+ unsigned int usable_segs_per_sec = f2fs_usable_segs_in_sec(sbi, segno);
- for (i = 0; i < sbi->segs_per_sec; i++)
+ for (i = 0; i < usable_segs_per_sec; i++)
mtime += get_seg_entry(sbi, start + i)->mtime;
vblocks = get_valid_blocks(sbi, segno, true);
- mtime = div_u64(mtime, sbi->segs_per_sec);
- vblocks = div_u64(vblocks, sbi->segs_per_sec);
+ mtime = div_u64(mtime, usable_segs_per_sec);
+ vblocks = div_u64(vblocks, usable_segs_per_sec);
u = (vblocks * 100) >> sbi->log_blocks_per_seg;
@@ -297,8 +325,11 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
/* alloc_mode == LFS */
if (p->gc_mode == GC_GREEDY)
return get_valid_blocks(sbi, segno, true);
- else
+ else if (p->gc_mode == GC_CB)
return get_cb_cost(sbi, segno);
+
+ f2fs_bug_on(sbi, 1);
+ return 0;
}
static unsigned int count_bits(const unsigned long *addr,
@@ -313,6 +344,273 @@ static unsigned int count_bits(const unsigned long *addr,
return sum;
}
+static struct victim_entry *attach_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime, unsigned int segno,
+ struct rb_node *parent, struct rb_node **p,
+ bool left_most)
+{
+ struct atgc_management *am = &sbi->am;
+ struct victim_entry *ve;
+
+ ve = f2fs_kmem_cache_alloc(victim_entry_slab, GFP_NOFS);
+
+ ve->mtime = mtime;
+ ve->segno = segno;
+
+ rb_link_node(&ve->rb_node, parent, p);
+ rb_insert_color_cached(&ve->rb_node, &am->root, left_most);
+
+ list_add_tail(&ve->list, &am->victim_list);
+
+ am->victim_count++;
+
+ return ve;
+}
+
+static void insert_victim_entry(struct f2fs_sb_info *sbi,
+ unsigned long long mtime, unsigned int segno)
+{
+ struct atgc_management *am = &sbi->am;
+ struct rb_node **p;
+ struct rb_node *parent = NULL;
+ bool left_most = true;
+
+ p = f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, mtime, &left_most);
+ attach_victim_entry(sbi, mtime, segno, parent, p, left_most);
+}
+
+static void add_victim_entry(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p, unsigned int segno)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
+ unsigned int start = GET_SEG_FROM_SEC(sbi, secno);
+ unsigned long long mtime = 0;
+ unsigned int i;
+
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (p->gc_mode == GC_AT &&
+ get_valid_blocks(sbi, segno, true) == 0)
+ return;
+
+ if (p->alloc_mode == AT_SSR &&
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks == 0)
+ return;
+ }
+
+ for (i = 0; i < sbi->segs_per_sec; i++)
+ mtime += get_seg_entry(sbi, start + i)->mtime;
+ mtime = div_u64(mtime, sbi->segs_per_sec);
+
+ /* Handle if the system time has changed by the user */
+ if (mtime < sit_i->min_mtime)
+ sit_i->min_mtime = mtime;
+ if (mtime > sit_i->max_mtime)
+ sit_i->max_mtime = mtime;
+ if (mtime < sit_i->dirty_min_mtime)
+ sit_i->dirty_min_mtime = mtime;
+ if (mtime > sit_i->dirty_max_mtime)
+ sit_i->dirty_max_mtime = mtime;
+
+ /* don't choose young section as candidate */
+ if (sit_i->dirty_max_mtime - mtime < p->age_threshold)
+ return;
+
+ insert_victim_entry(sbi, mtime, segno);
+}
+
+static struct rb_node *lookup_central_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct atgc_management *am = &sbi->am;
+ struct rb_node *parent = NULL;
+ bool left_most;
+
+ f2fs_lookup_rb_tree_ext(sbi, &am->root, &parent, p->age, &left_most);
+
+ return parent;
+}
+
+static void atgc_lookup_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct atgc_management *am = &sbi->am;
+ struct rb_root_cached *root = &am->root;
+ struct rb_node *node;
+ struct rb_entry *re;
+ struct victim_entry *ve;
+ unsigned long long total_time;
+ unsigned long long age, u, accu;
+ unsigned long long max_mtime = sit_i->dirty_max_mtime;
+ unsigned long long min_mtime = sit_i->dirty_min_mtime;
+ unsigned int sec_blocks = BLKS_PER_SEC(sbi);
+ unsigned int vblocks;
+ unsigned int dirty_threshold = max(am->max_candidate_count,
+ am->candidate_ratio *
+ am->victim_count / 100);
+ unsigned int age_weight = am->age_weight;
+ unsigned int cost;
+ unsigned int iter = 0;
+
+ if (max_mtime < min_mtime)
+ return;
+
+ max_mtime += 1;
+ total_time = max_mtime - min_mtime;
+
+ accu = div64_u64(ULLONG_MAX, total_time);
+ accu = min_t(unsigned long long, div_u64(accu, 100),
+ DEFAULT_ACCURACY_CLASS);
+
+ node = rb_first_cached(root);
+next:
+ re = rb_entry_safe(node, struct rb_entry, rb_node);
+ if (!re)
+ return;
+
+ ve = (struct victim_entry *)re;
+
+ if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
+ goto skip;
+
+ /* age = 10000 * x% * 60 */
+ age = div64_u64(accu * (max_mtime - ve->mtime), total_time) *
+ age_weight;
+
+ vblocks = get_valid_blocks(sbi, ve->segno, true);
+ f2fs_bug_on(sbi, !vblocks || vblocks == sec_blocks);
+
+ /* u = 10000 * x% * 40 */
+ u = div64_u64(accu * (sec_blocks - vblocks), sec_blocks) *
+ (100 - age_weight);
+
+ f2fs_bug_on(sbi, age + u >= UINT_MAX);
+
+ cost = UINT_MAX - (age + u);
+ iter++;
+
+ if (cost < p->min_cost ||
+ (cost == p->min_cost && age > p->oldest_age)) {
+ p->min_cost = cost;
+ p->oldest_age = age;
+ p->min_segno = ve->segno;
+ }
+skip:
+ if (iter < dirty_threshold) {
+ node = rb_next(node);
+ goto next;
+ }
+}
+
+/*
+ * select candidates around source section in range of
+ * [target - dirty_threshold, target + dirty_threshold]
+ */
+static void atssr_lookup_victim(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ struct sit_info *sit_i = SIT_I(sbi);
+ struct atgc_management *am = &sbi->am;
+ struct rb_node *node;
+ struct rb_entry *re;
+ struct victim_entry *ve;
+ unsigned long long age;
+ unsigned long long max_mtime = sit_i->dirty_max_mtime;
+ unsigned long long min_mtime = sit_i->dirty_min_mtime;
+ unsigned int seg_blocks = sbi->blocks_per_seg;
+ unsigned int vblocks;
+ unsigned int dirty_threshold = max(am->max_candidate_count,
+ am->candidate_ratio *
+ am->victim_count / 100);
+ unsigned int cost;
+ unsigned int iter = 0;
+ int stage = 0;
+
+ if (max_mtime < min_mtime)
+ return;
+ max_mtime += 1;
+next_stage:
+ node = lookup_central_victim(sbi, p);
+next_node:
+ re = rb_entry_safe(node, struct rb_entry, rb_node);
+ if (!re) {
+ if (stage == 0)
+ goto skip_stage;
+ return;
+ }
+
+ ve = (struct victim_entry *)re;
+
+ if (ve->mtime >= max_mtime || ve->mtime < min_mtime)
+ goto skip_node;
+
+ age = max_mtime - ve->mtime;
+
+ vblocks = get_seg_entry(sbi, ve->segno)->ckpt_valid_blocks;
+ f2fs_bug_on(sbi, !vblocks);
+
+ /* rare case */
+ if (vblocks == seg_blocks)
+ goto skip_node;
+
+ iter++;
+
+ age = max_mtime - abs(p->age - age);
+ cost = UINT_MAX - vblocks;
+
+ if (cost < p->min_cost ||
+ (cost == p->min_cost && age > p->oldest_age)) {
+ p->min_cost = cost;
+ p->oldest_age = age;
+ p->min_segno = ve->segno;
+ }
+skip_node:
+ if (iter < dirty_threshold) {
+ if (stage == 0)
+ node = rb_prev(node);
+ else if (stage == 1)
+ node = rb_next(node);
+ goto next_node;
+ }
+skip_stage:
+ if (stage < 1) {
+ stage++;
+ iter = 0;
+ goto next_stage;
+ }
+}
+static void lookup_victim_by_age(struct f2fs_sb_info *sbi,
+ struct victim_sel_policy *p)
+{
+ f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
+ &sbi->am.root, true));
+
+ if (p->gc_mode == GC_AT)
+ atgc_lookup_victim(sbi, p);
+ else if (p->alloc_mode == AT_SSR)
+ atssr_lookup_victim(sbi, p);
+ else
+ f2fs_bug_on(sbi, 1);
+}
+
+static void release_victim_entry(struct f2fs_sb_info *sbi)
+{
+ struct atgc_management *am = &sbi->am;
+ struct victim_entry *ve, *tmp;
+
+ list_for_each_entry_safe(ve, tmp, &am->victim_list, list) {
+ list_del(&ve->list);
+ kmem_cache_free(victim_entry_slab, ve);
+ am->victim_count--;
+ }
+
+ am->root = RB_ROOT_CACHED;
+
+ f2fs_bug_on(sbi, am->victim_count);
+ f2fs_bug_on(sbi, !list_empty(&am->victim_list));
+}
+
/*
* This function is called from two paths.
* One is garbage collection and the other is SSR segment selection.
@@ -322,25 +620,37 @@ static unsigned int count_bits(const unsigned long *addr,
* which has minimum valid blocks and removes it from dirty seglist.
*/
static int get_victim_by_default(struct f2fs_sb_info *sbi,
- unsigned int *result, int gc_type, int type, char alloc_mode)
+ unsigned int *result, int gc_type, int type,
+ char alloc_mode, unsigned long long age)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct sit_info *sm = SIT_I(sbi);
struct victim_sel_policy p;
unsigned int secno, last_victim;
unsigned int last_segment;
- unsigned int nsearched = 0;
+ unsigned int nsearched;
+ bool is_atgc;
int ret = 0;
mutex_lock(&dirty_i->seglist_lock);
last_segment = MAIN_SECS(sbi) * sbi->segs_per_sec;
p.alloc_mode = alloc_mode;
- select_policy(sbi, gc_type, type, &p);
+ p.age = age;
+ p.age_threshold = sbi->am.age_threshold;
+retry:
+ select_policy(sbi, gc_type, type, &p);
p.min_segno = NULL_SEGNO;
+ p.oldest_age = 0;
p.min_cost = get_max_cost(sbi, &p);
+ is_atgc = (p.gc_mode == GC_AT || p.alloc_mode == AT_SSR);
+ nsearched = 0;
+
+ if (is_atgc)
+ SIT_I(sbi)->dirty_min_mtime = ULLONG_MAX;
+
if (*result != NULL_SEGNO) {
if (!get_valid_blocks(sbi, *result, false)) {
ret = -ENODATA;
@@ -421,11 +731,16 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
/* Don't touch checkpointed data */
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
get_ckpt_valid_blocks(sbi, segno) &&
- p.alloc_mode != SSR))
+ p.alloc_mode == LFS))
goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
+ if (is_atgc) {
+ add_victim_entry(sbi, &p, segno);
+ goto next;
+ }
+
cost = get_gc_cost(sbi, segno, &p);
if (p.min_cost > cost) {
@@ -444,6 +759,19 @@ next:
break;
}
}
+
+ /* get victim for GC_AT/AT_SSR */
+ if (is_atgc) {
+ lookup_victim_by_age(sbi, &p);
+ release_victim_entry(sbi);
+ }
+
+ if (is_atgc && p.min_segno == NULL_SEGNO &&
+ sm->elapsed_time < p.age_threshold) {
+ p.age_threshold = 0;
+ goto retry;
+ }
+
if (p.min_segno != NULL_SEGNO) {
got_it:
*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
@@ -536,6 +864,7 @@ static int gc_node_segment(struct f2fs_sb_info *sbi,
int phase = 0;
bool fggc = (gc_type == FG_GC);
int submitted = 0;
+ unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
start_addr = START_BLOCK(sbi, segno);
@@ -545,7 +874,7 @@ next_step:
if (fggc && phase == 2)
atomic_inc(&sbi->wb_sync_req[NODE]);
- for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
+ for (off = 0; off < usable_blks_in_seg; off++, entry++) {
nid_t nid = le32_to_cpu(entry->nid);
struct page *node_page;
struct node_info ni;
@@ -791,6 +1120,8 @@ static int move_data_block(struct inode *inode, block_t bidx,
block_t newaddr;
int err = 0;
bool lfs_mode = f2fs_lfs_mode(fio.sbi);
+ int type = fio.sbi->am.atgc_enabled ?
+ CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
/* do not read out */
page = f2fs_grab_cache_page(inode->i_mapping, bidx, false);
@@ -877,7 +1208,7 @@ static int move_data_block(struct inode *inode, block_t bidx,
}
f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
- &sum, CURSEG_COLD_DATA, NULL);
+ &sum, type, NULL);
fio.encrypted_page = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
@@ -927,7 +1258,7 @@ put_page_out:
recover_block:
if (err)
f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
- true, true);
+ true, true, true);
up_out:
if (lfs_mode)
up_write(&fio.sbi->io_order_lock);
@@ -1033,13 +1364,14 @@ static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
int off;
int phase = 0;
int submitted = 0;
+ unsigned int usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
start_addr = START_BLOCK(sbi, segno);
next_step:
entry = sum;
- for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
+ for (off = 0; off < usable_blks_in_seg; off++, entry++) {
struct page *data_page;
struct inode *inode;
struct node_info dni; /* dnode info for the data */
@@ -1182,7 +1514,7 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
down_write(&sit_i->sentry_lock);
ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
- NO_CHECK_TYPE, LFS);
+ NO_CHECK_TYPE, LFS, 0);
up_write(&sit_i->sentry_lock);
return ret;
}
@@ -1204,6 +1536,17 @@ static int do_garbage_collect(struct f2fs_sb_info *sbi,
if (__is_large_section(sbi))
end_segno = rounddown(end_segno, sbi->segs_per_sec);
+ /*
+ * zone-capacity can be less than zone-size in zoned devices,
+ * resulting in less than expected usable segments in the zone,
+ * calculate the end segno in the zone which can be garbage collected
+ */
+ if (f2fs_sb_has_blkzoned(sbi))
+ end_segno -= sbi->segs_per_sec -
+ f2fs_usable_segs_in_sec(sbi, segno);
+
+ sanity_check_seg_type(sbi, get_seg_entry(sbi, segno)->type);
+
/* readahead multi ssa blocks those have contiguous address */
if (__is_large_section(sbi))
f2fs_ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
@@ -1356,7 +1699,8 @@ gc_more:
goto stop;
seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
- if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
+ if (gc_type == FG_GC &&
+ seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
sec_freed++;
total_freed += seg_freed;
@@ -1413,6 +1757,37 @@ stop:
return ret;
}
+int __init f2fs_create_garbage_collection_cache(void)
+{
+ victim_entry_slab = f2fs_kmem_cache_create("f2fs_victim_entry",
+ sizeof(struct victim_entry));
+ if (!victim_entry_slab)
+ return -ENOMEM;
+ return 0;
+}
+
+void f2fs_destroy_garbage_collection_cache(void)
+{
+ kmem_cache_destroy(victim_entry_slab);
+}
+
+static void init_atgc_management(struct f2fs_sb_info *sbi)
+{
+ struct atgc_management *am = &sbi->am;
+
+ if (test_opt(sbi, ATGC) &&
+ SIT_I(sbi)->elapsed_time >= DEF_GC_THREAD_AGE_THRESHOLD)
+ am->atgc_enabled = true;
+
+ am->root = RB_ROOT_CACHED;
+ INIT_LIST_HEAD(&am->victim_list);
+ am->victim_count = 0;
+
+ am->candidate_ratio = DEF_GC_THREAD_CANDIDATE_RATIO;
+ am->max_candidate_count = DEF_GC_THREAD_MAX_CANDIDATE_COUNT;
+ am->age_weight = DEF_GC_THREAD_AGE_WEIGHT;
+}
+
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
{
DIRTY_I(sbi)->v_ops = &default_v_ops;
@@ -1423,6 +1798,8 @@ void f2fs_build_gc_manager(struct f2fs_sb_info *sbi)
if (f2fs_is_multi_device(sbi) && !__is_large_section(sbi))
SIT_I(sbi)->last_victim[ALLOC_NEXT] =
GET_SEGNO(sbi, FDEV(0).end_blk) + 1;
+
+ init_atgc_management(sbi);
}
static int free_segment_range(struct f2fs_sb_info *sbi,
@@ -1450,7 +1827,7 @@ static int free_segment_range(struct f2fs_sb_info *sbi,
mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
/* Move out cursegs from the target range */
- for (type = CURSEG_HOT_DATA; type < NR_CURSEG_TYPE; type++)
+ for (type = CURSEG_HOT_DATA; type < NR_CURSEG_PERSIST_TYPE; type++)
f2fs_allocate_segment_for_resize(sbi, type, start, end);
/* do GC to move out valid blocks in the range */
diff --git a/fs/f2fs/gc.h b/fs/f2fs/gc.h
index db3c61046aa4..0c8dae12dc51 100644
--- a/fs/f2fs/gc.h
+++ b/fs/f2fs/gc.h
@@ -14,6 +14,14 @@
#define DEF_GC_THREAD_MIN_SLEEP_TIME 30000 /* milliseconds */
#define DEF_GC_THREAD_MAX_SLEEP_TIME 60000
#define DEF_GC_THREAD_NOGC_SLEEP_TIME 300000 /* wait 5 min */
+
+/* choose candidates from sections which has age of more than 7 days */
+#define DEF_GC_THREAD_AGE_THRESHOLD (60 * 60 * 24 * 7)
+#define DEF_GC_THREAD_CANDIDATE_RATIO 20 /* select 20% oldest sections as candidates */
+#define DEF_GC_THREAD_MAX_CANDIDATE_COUNT 10 /* select at most 10 sections as candidates */
+#define DEF_GC_THREAD_AGE_WEIGHT 60 /* age weight */
+#define DEFAULT_ACCURACY_CLASS 10000 /* accuracy class */
+
#define LIMIT_INVALID_BLOCK 40 /* percentage over total user space */
#define LIMIT_FREE_BLOCK 40 /* percentage over invalid + free space */
@@ -41,16 +49,69 @@ struct gc_inode_list {
struct radix_tree_root iroot;
};
+struct victim_info {
+ unsigned long long mtime; /* mtime of section */
+ unsigned int segno; /* section No. */
+};
+
+struct victim_entry {
+ struct rb_node rb_node; /* rb node located in rb-tree */
+ union {
+ struct {
+ unsigned long long mtime; /* mtime of section */
+ unsigned int segno; /* segment No. */
+ };
+ struct victim_info vi; /* victim info */
+ };
+ struct list_head list;
+};
+
/*
* inline functions
*/
+
+/*
+ * On a Zoned device zone-capacity can be less than zone-size and if
+ * zone-capacity is not aligned to f2fs segment size(2MB), then the segment
+ * starting just before zone-capacity has some blocks spanning across the
+ * zone-capacity, these blocks are not usable.
+ * Such spanning segments can be in free list so calculate the sum of usable
+ * blocks in currently free segments including normal and spanning segments.
+ */
+static inline block_t free_segs_blk_count_zoned(struct f2fs_sb_info *sbi)
+{
+ block_t free_seg_blks = 0;
+ struct free_segmap_info *free_i = FREE_I(sbi);
+ int j;
+
+ spin_lock(&free_i->segmap_lock);
+ for (j = 0; j < MAIN_SEGS(sbi); j++)
+ if (!test_bit(j, free_i->free_segmap))
+ free_seg_blks += f2fs_usable_blks_in_seg(sbi, j);
+ spin_unlock(&free_i->segmap_lock);
+
+ return free_seg_blks;
+}
+
+static inline block_t free_segs_blk_count(struct f2fs_sb_info *sbi)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return free_segs_blk_count_zoned(sbi);
+
+ return free_segments(sbi) << sbi->log_blocks_per_seg;
+}
+
static inline block_t free_user_blocks(struct f2fs_sb_info *sbi)
{
- if (free_segments(sbi) < overprovision_segments(sbi))
+ block_t free_blks, ovp_blks;
+
+ free_blks = free_segs_blk_count(sbi);
+ ovp_blks = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
+
+ if (free_blks < ovp_blks)
return 0;
- else
- return (free_segments(sbi) - overprovision_segments(sbi))
- << sbi->log_blocks_per_seg;
+
+ return free_blks - ovp_blks;
}
static inline block_t limit_invalid_user_blocks(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 102df444f623..70384e31788d 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -524,7 +524,7 @@ static int f2fs_move_rehashed_dirents(struct inode *dir, struct page *ipage,
!f2fs_has_inline_xattr(dir))
F2FS_I(dir)->i_inline_xattr_size = 0;
- kvfree(backup_dentry);
+ kfree(backup_dentry);
return 0;
recover:
lock_page(ipage);
@@ -535,7 +535,7 @@ recover:
set_page_dirty(ipage);
f2fs_put_page(ipage, 1);
- kvfree(backup_dentry);
+ kfree(backup_dentry);
return err;
}
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 66969ae852b9..657db2fb6739 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -287,11 +287,19 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
return false;
}
+ if ((fi->i_flags & F2FS_CASEFOLD_FL) && !f2fs_sb_has_casefold(sbi)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
+ f2fs_warn(sbi, "%s: inode (ino=%lx) has casefold flag, but casefold feature is off",
+ __func__, inode->i_ino);
+ return false;
+ }
+
if (f2fs_has_extra_attr(inode) && f2fs_sb_has_compression(sbi) &&
fi->i_flags & F2FS_COMPR_FL &&
F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
i_log_cluster_size)) {
if (ri->i_compress_algorithm >= COMPRESS_MAX) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
"compress algorithm: %u, run fsck to fix",
__func__, inode->i_ino,
@@ -300,6 +308,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
if (le64_to_cpu(ri->i_compr_blocks) >
SECTOR_TO_BLOCK(inode->i_blocks)) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has inconsistent "
"i_compr_blocks:%llu, i_blocks:%llu, run fsck to fix",
__func__, inode->i_ino,
@@ -309,6 +318,7 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page)
}
if (ri->i_log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
ri->i_log_cluster_size > MAX_COMPRESS_LOG_SIZE) {
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: inode (ino=%lx) has unsupported "
"log cluster size: %u, run fsck to fix",
__func__, inode->i_ino,
@@ -442,7 +452,8 @@ static int do_read_inode(struct inode *inode)
(fi->i_flags & F2FS_COMPR_FL)) {
if (F2FS_FITS_IN_INODE(ri, fi->i_extra_isize,
i_log_cluster_size)) {
- fi->i_compr_blocks = le64_to_cpu(ri->i_compr_blocks);
+ atomic_set(&fi->i_compr_blocks,
+ le64_to_cpu(ri->i_compr_blocks));
fi->i_compress_algorithm = ri->i_compress_algorithm;
fi->i_log_cluster_size = ri->i_log_cluster_size;
fi->i_cluster_size = 1 << fi->i_log_cluster_size;
@@ -460,7 +471,7 @@ static int do_read_inode(struct inode *inode)
stat_inc_inline_inode(inode);
stat_inc_inline_dir(inode);
stat_inc_compr_inode(inode);
- stat_add_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks);
+ stat_add_compr_blocks(inode, atomic_read(&fi->i_compr_blocks));
return 0;
}
@@ -619,7 +630,8 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
F2FS_FITS_IN_INODE(ri, F2FS_I(inode)->i_extra_isize,
i_log_cluster_size)) {
ri->i_compr_blocks =
- cpu_to_le64(F2FS_I(inode)->i_compr_blocks);
+ cpu_to_le64(atomic_read(
+ &F2FS_I(inode)->i_compr_blocks));
ri->i_compress_algorithm =
F2FS_I(inode)->i_compress_algorithm;
ri->i_log_cluster_size =
@@ -768,7 +780,8 @@ no_delete:
stat_dec_inline_dir(inode);
stat_dec_inline_inode(inode);
stat_dec_compr_inode(inode);
- stat_sub_compr_blocks(inode, F2FS_I(inode)->i_compr_blocks);
+ stat_sub_compr_blocks(inode,
+ atomic_read(&F2FS_I(inode)->i_compr_blocks));
if (likely(!f2fs_cp_error(sbi) &&
!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 45f324511a19..8fa37d1434de 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -712,7 +712,7 @@ out_f2fs_handle_failed_inode:
f2fs_handle_failed_inode(inode);
out_free_encrypted_link:
if (disk_link.name != (unsigned char *)symname)
- kvfree(disk_link.name);
+ kfree(disk_link.name);
return err;
}
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index cb1b5b61a1da..d5d8ce077f29 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page)
static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
- return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
+ return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid));
}
static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
@@ -3105,9 +3105,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
- if (!version_bitmap)
- return -EFAULT;
-
nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
GFP_KERNEL);
if (!nm_i->nat_bitmap)
@@ -3257,7 +3254,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
kvfree(nm_i->nat_bitmap_mir);
#endif
sbi->nm_info = NULL;
- kvfree(nm_i);
+ kfree(nm_i);
}
int __init f2fs_create_node_manager_caches(void)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index e247a5ef3713..1596502f7375 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -189,7 +189,7 @@ void f2fs_register_inmem_page(struct inode *inode, struct page *page)
f2fs_trace_pid(page);
- f2fs_set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
+ f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
@@ -728,7 +728,7 @@ init_thread:
"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(fcc->f2fs_issue_flush)) {
err = PTR_ERR(fcc->f2fs_issue_flush);
- kvfree(fcc);
+ kfree(fcc);
SM_I(sbi)->fcc_info = NULL;
return err;
}
@@ -747,7 +747,7 @@ void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
kthread_stop(flush_thread);
}
if (free) {
- kvfree(fcc);
+ kfree(fcc);
SM_I(sbi)->fcc_info = NULL;
}
}
@@ -759,6 +759,9 @@ int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
if (!f2fs_is_multi_device(sbi))
return 0;
+ if (test_opt(sbi, NOBARRIER))
+ return 0;
+
for (i = 1; i < sbi->s_ndevs; i++) {
if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
continue;
@@ -859,20 +862,22 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
unsigned short valid_blocks, ckpt_valid_blocks;
+ unsigned int usable_blocks;
if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
return;
+ usable_blocks = f2fs_usable_blks_in_seg(sbi, segno);
mutex_lock(&dirty_i->seglist_lock);
valid_blocks = get_valid_blocks(sbi, segno, false);
ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
- ckpt_valid_blocks == sbi->blocks_per_seg)) {
+ ckpt_valid_blocks == usable_blocks)) {
__locate_dirty_segment(sbi, segno, PRE);
__remove_dirty_segment(sbi, segno, DIRTY);
- } else if (valid_blocks < sbi->blocks_per_seg) {
+ } else if (valid_blocks < usable_blocks) {
__locate_dirty_segment(sbi, segno, DIRTY);
} else {
/* Recovery routine with SSR needs this */
@@ -915,9 +920,11 @@ block_t f2fs_get_unusable_blocks(struct f2fs_sb_info *sbi)
for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
se = get_seg_entry(sbi, segno);
if (IS_NODESEG(se->type))
- holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
+ holes[NODE] += f2fs_usable_blks_in_seg(sbi, segno) -
+ se->valid_blocks;
else
- holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
+ holes[DATA] += f2fs_usable_blks_in_seg(sbi, segno) -
+ se->valid_blocks;
}
mutex_unlock(&dirty_i->seglist_lock);
@@ -1521,7 +1528,7 @@ retry:
goto next;
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root));
+ &dcc->root, false));
blk_start_plug(&plug);
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
@@ -1958,7 +1965,7 @@ static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
mutex_lock(&dirty_i->seglist_lock);
for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
- __set_test_and_free(sbi, segno);
+ __set_test_and_free(sbi, segno, false);
mutex_unlock(&dirty_i->seglist_lock);
}
@@ -2101,7 +2108,7 @@ init_thread:
"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(dcc->f2fs_issue_discard)) {
err = PTR_ERR(dcc->f2fs_issue_discard);
- kvfree(dcc);
+ kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
return err;
}
@@ -2125,7 +2132,7 @@ static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
if (unlikely(atomic_read(&dcc->discard_cmd_cnt)))
f2fs_issue_discard_timeout(sbi);
- kvfree(dcc);
+ kfree(dcc);
SM_I(sbi)->dcc_info = NULL;
}
@@ -2150,6 +2157,39 @@ static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
__mark_sit_entry_dirty(sbi, segno);
}
+static inline unsigned long long get_segment_mtime(struct f2fs_sb_info *sbi,
+ block_t blkaddr)
+{
+ unsigned int segno = GET_SEGNO(sbi, blkaddr);
+
+ if (segno == NULL_SEGNO)
+ return 0;
+ return get_seg_entry(sbi, segno)->mtime;
+}
+
+static void update_segment_mtime(struct f2fs_sb_info *sbi, block_t blkaddr,
+ unsigned long long old_mtime)
+{
+ struct seg_entry *se;
+ unsigned int segno = GET_SEGNO(sbi, blkaddr);
+ unsigned long long ctime = get_mtime(sbi, false);
+ unsigned long long mtime = old_mtime ? old_mtime : ctime;
+
+ if (segno == NULL_SEGNO)
+ return;
+
+ se = get_seg_entry(sbi, segno);
+
+ if (!se->mtime)
+ se->mtime = mtime;
+ else
+ se->mtime = div_u64(se->mtime * se->valid_blocks + mtime,
+ se->valid_blocks + 1);
+
+ if (ctime > SIT_I(sbi)->max_mtime)
+ SIT_I(sbi)->max_mtime = ctime;
+}
+
static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
{
struct seg_entry *se;
@@ -2167,12 +2207,9 @@ static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
f2fs_bug_on(sbi, (new_vblocks < 0 ||
- (new_vblocks > sbi->blocks_per_seg)));
+ (new_vblocks > f2fs_usable_blks_in_seg(sbi, segno))));
se->valid_blocks = new_vblocks;
- se->mtime = get_mtime(sbi, false);
- if (se->mtime > SIT_I(sbi)->max_mtime)
- SIT_I(sbi)->max_mtime = se->mtime;
/* Update valid block bitmap */
if (del > 0) {
@@ -2265,6 +2302,7 @@ void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
/* add it into sit main buffer */
down_write(&sit_i->sentry_lock);
+ update_segment_mtime(sbi, addr, 0);
update_sit_entry(sbi, addr, -1);
/* add it into dirty seglist */
@@ -2344,7 +2382,9 @@ int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
*/
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
{
- return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
+ if (unlikely(f2fs_cp_error(sbi)))
+ return ERR_PTR(-EIO);
+ return f2fs_get_meta_page_retry(sbi, GET_SUM_BLOCK(sbi, segno));
}
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
@@ -2389,9 +2429,9 @@ static void write_current_sum_page(struct f2fs_sb_info *sbi,
f2fs_put_page(page, 1);
}
-static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
+static int is_next_segment_free(struct f2fs_sb_info *sbi,
+ struct curseg_info *curseg, int type)
{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int segno = curseg->segno + 1;
struct free_segmap_info *free_i = FREE_I(sbi);
@@ -2495,7 +2535,9 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
struct summary_footer *sum_footer;
+ unsigned short seg_type = curseg->seg_type;
+ curseg->inited = true;
curseg->segno = curseg->next_segno;
curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
curseg->next_blkoff = 0;
@@ -2503,24 +2545,36 @@ static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
sum_footer = &(curseg->sum_blk->footer);
memset(sum_footer, 0, sizeof(struct summary_footer));
- if (IS_DATASEG(type))
+
+ sanity_check_seg_type(sbi, seg_type);
+
+ if (IS_DATASEG(seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
- if (IS_NODESEG(type))
+ if (IS_NODESEG(seg_type))
SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
- __set_sit_entry_type(sbi, type, curseg->segno, modified);
+ __set_sit_entry_type(sbi, seg_type, curseg->segno, modified);
}
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned short seg_type = curseg->seg_type;
+
+ sanity_check_seg_type(sbi, seg_type);
+
/* if segs_per_sec is large than 1, we need to keep original policy. */
if (__is_large_section(sbi))
- return CURSEG_I(sbi, type)->segno;
+ return curseg->segno;
+
+ /* inmem log may not locate on any segment after mount */
+ if (!curseg->inited)
+ return 0;
if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
return 0;
if (test_opt(sbi, NOHEAP) &&
- (type == CURSEG_HOT_DATA || IS_NODESEG(type)))
+ (seg_type == CURSEG_HOT_DATA || IS_NODESEG(seg_type)))
return 0;
if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
@@ -2530,7 +2584,7 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
return 0;
- return CURSEG_I(sbi, type)->segno;
+ return curseg->segno;
}
/*
@@ -2540,12 +2594,14 @@ static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
+ unsigned short seg_type = curseg->seg_type;
unsigned int segno = curseg->segno;
int dir = ALLOC_LEFT;
- write_sum_page(sbi, curseg->sum_blk,
+ if (curseg->inited)
+ write_sum_page(sbi, curseg->sum_blk,
GET_SUM_BLOCK(sbi, segno));
- if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
+ if (seg_type == CURSEG_WARM_DATA || seg_type == CURSEG_COLD_DATA)
dir = ALLOC_RIGHT;
if (test_opt(sbi, NOHEAP))
@@ -2594,7 +2650,7 @@ static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
* This function always allocates a used segment(from dirty seglist) by SSR
* manner, so it should recover the existing segment information of valid blocks
*/
-static void change_curseg(struct f2fs_sb_info *sbi, int type)
+static void change_curseg(struct f2fs_sb_info *sbi, int type, bool flush)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -2602,8 +2658,10 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
struct f2fs_summary_block *sum_node;
struct page *sum_page;
- write_sum_page(sbi, curseg->sum_blk,
- GET_SUM_BLOCK(sbi, curseg->segno));
+ if (flush)
+ write_sum_page(sbi, curseg->sum_blk,
+ GET_SUM_BLOCK(sbi, curseg->segno));
+
__set_test_and_inuse(sbi, new_segno);
mutex_lock(&dirty_i->seglist_lock);
@@ -2616,29 +2674,139 @@ static void change_curseg(struct f2fs_sb_info *sbi, int type)
__next_free_blkoff(sbi, curseg, 0);
sum_page = f2fs_get_sum_page(sbi, new_segno);
- f2fs_bug_on(sbi, IS_ERR(sum_page));
+ if (IS_ERR(sum_page)) {
+ /* GC won't be able to use stale summary pages by cp_error */
+ memset(curseg->sum_blk, 0, SUM_ENTRY_SIZE);
+ return;
+ }
sum_node = (struct f2fs_summary_block *)page_address(sum_page);
memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
f2fs_put_page(sum_page, 1);
}
-static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+ int alloc_mode, unsigned long long age);
+
+static void get_atssr_segment(struct f2fs_sb_info *sbi, int type,
+ int target_type, int alloc_mode,
+ unsigned long long age)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ curseg->seg_type = target_type;
+
+ if (get_ssr_segment(sbi, type, alloc_mode, age)) {
+ struct seg_entry *se = get_seg_entry(sbi, curseg->next_segno);
+
+ curseg->seg_type = se->type;
+ change_curseg(sbi, type, true);
+ } else {
+ /* allocate cold segment by default */
+ curseg->seg_type = CURSEG_COLD_DATA;
+ new_curseg(sbi, type, true);
+ }
+ stat_inc_seg_type(sbi, curseg);
+}
+
+static void __f2fs_init_atgc_curseg(struct f2fs_sb_info *sbi)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC);
+
+ if (!sbi->am.atgc_enabled)
+ return;
+
+ down_read(&SM_I(sbi)->curseg_lock);
+
+ mutex_lock(&curseg->curseg_mutex);
+ down_write(&SIT_I(sbi)->sentry_lock);
+
+ get_atssr_segment(sbi, CURSEG_ALL_DATA_ATGC, CURSEG_COLD_DATA, SSR, 0);
+
+ up_write(&SIT_I(sbi)->sentry_lock);
+ mutex_unlock(&curseg->curseg_mutex);
+
+ up_read(&SM_I(sbi)->curseg_lock);
+
+}
+void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_init_atgc_curseg(sbi);
+}
+
+static void __f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+ if (!curseg->inited)
+ goto out;
+
+ if (get_valid_blocks(sbi, curseg->segno, false)) {
+ write_sum_page(sbi, curseg->sum_blk,
+ GET_SUM_BLOCK(sbi, curseg->segno));
+ } else {
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+ __set_test_and_free(sbi, curseg->segno, true);
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+ }
+out:
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_save_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+ if (sbi->am.atgc_enabled)
+ __f2fs_save_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+static void __f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi, int type)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+
+ mutex_lock(&curseg->curseg_mutex);
+ if (!curseg->inited)
+ goto out;
+ if (get_valid_blocks(sbi, curseg->segno, false))
+ goto out;
+
+ mutex_lock(&DIRTY_I(sbi)->seglist_lock);
+ __set_test_and_inuse(sbi, curseg->segno);
+ mutex_unlock(&DIRTY_I(sbi)->seglist_lock);
+out:
+ mutex_unlock(&curseg->curseg_mutex);
+}
+
+void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi)
+{
+ __f2fs_restore_inmem_curseg(sbi, CURSEG_COLD_DATA_PINNED);
+
+ if (sbi->am.atgc_enabled)
+ __f2fs_restore_inmem_curseg(sbi, CURSEG_ALL_DATA_ATGC);
+}
+
+static int get_ssr_segment(struct f2fs_sb_info *sbi, int type,
+ int alloc_mode, unsigned long long age)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
unsigned segno = NULL_SEGNO;
+ unsigned short seg_type = curseg->seg_type;
int i, cnt;
bool reversed = false;
+ sanity_check_seg_type(sbi, seg_type);
+
/* f2fs_need_SSR() already forces to do this */
- if (!v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
+ if (!v_ops->get_victim(sbi, &segno, BG_GC, seg_type, alloc_mode, age)) {
curseg->next_segno = segno;
return 1;
}
/* For node segments, let's do SSR more intensively */
- if (IS_NODESEG(type)) {
- if (type >= CURSEG_WARM_NODE) {
+ if (IS_NODESEG(seg_type)) {
+ if (seg_type >= CURSEG_WARM_NODE) {
reversed = true;
i = CURSEG_COLD_NODE;
} else {
@@ -2646,7 +2814,7 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
}
cnt = NR_CURSEG_NODE_TYPE;
} else {
- if (type >= CURSEG_WARM_DATA) {
+ if (seg_type >= CURSEG_WARM_DATA) {
reversed = true;
i = CURSEG_COLD_DATA;
} else {
@@ -2656,9 +2824,9 @@ static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
}
for (; cnt-- > 0; reversed ? i-- : i++) {
- if (i == type)
+ if (i == seg_type)
continue;
- if (!v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
+ if (!v_ops->get_victim(sbi, &segno, BG_GC, i, alloc_mode, age)) {
curseg->next_segno = segno;
return 1;
}
@@ -2687,13 +2855,15 @@ static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
if (force)
new_curseg(sbi, type, true);
else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
- type == CURSEG_WARM_NODE)
+ curseg->seg_type == CURSEG_WARM_NODE)
new_curseg(sbi, type, false);
- else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
+ else if (curseg->alloc_type == LFS &&
+ is_next_segment_free(sbi, curseg, type) &&
likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
new_curseg(sbi, type, false);
- else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
- change_curseg(sbi, type);
+ else if (f2fs_need_SSR(sbi) &&
+ get_ssr_segment(sbi, type, SSR, 0))
+ change_curseg(sbi, type, true);
else
new_curseg(sbi, type, false);
@@ -2714,8 +2884,8 @@ void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
if (segno < start || segno > end)
goto unlock;
- if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
- change_curseg(sbi, type);
+ if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type, SSR, 0))
+ change_curseg(sbi, type, true);
else
new_curseg(sbi, type, true);
@@ -2738,11 +2908,15 @@ static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int old_segno;
+ if (!curseg->inited)
+ goto alloc;
+
if (!curseg->next_blkoff &&
!get_valid_blocks(sbi, curseg->segno, false) &&
!get_ckpt_valid_blocks(sbi, curseg->segno))
return;
+alloc:
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
locate_dirty_segment(sbi, old_segno);
@@ -2806,7 +2980,7 @@ next:
mutex_lock(&dcc->cmd_lock);
if (unlikely(dcc->rbtree_check))
f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
- &dcc->root));
+ &dcc->root, false));
dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
NULL, start,
@@ -2930,12 +3104,11 @@ out:
return err;
}
-static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
+static bool __has_curseg_space(struct f2fs_sb_info *sbi,
+ struct curseg_info *curseg)
{
- struct curseg_info *curseg = CURSEG_I(sbi, type);
- if (curseg->next_blkoff < sbi->blocks_per_seg)
- return true;
- return false;
+ return curseg->next_blkoff < f2fs_usable_blks_in_seg(sbi,
+ curseg->segno);
}
int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
@@ -3075,8 +3248,13 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (fio->type == DATA) {
struct inode *inode = fio->page->mapping->host;
- if (is_cold_data(fio->page) || file_is_cold(inode) ||
- f2fs_compressed_file(inode))
+ if (is_cold_data(fio->page)) {
+ if (fio->sbi->am.atgc_enabled)
+ return CURSEG_ALL_DATA_ATGC;
+ else
+ return CURSEG_COLD_DATA;
+ }
+ if (file_is_cold(inode) || f2fs_compressed_file(inode))
return CURSEG_COLD_DATA;
if (file_is_hot(inode) ||
is_inode_flag_set(inode, FI_HOT_DATA) ||
@@ -3126,27 +3304,25 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, type);
- bool put_pin_sem = false;
-
- if (type == CURSEG_COLD_DATA) {
- /* GC during CURSEG_COLD_DATA_PINNED allocation */
- if (down_read_trylock(&sbi->pin_sem)) {
- put_pin_sem = true;
- } else {
- type = CURSEG_WARM_DATA;
- curseg = CURSEG_I(sbi, type);
- }
- } else if (type == CURSEG_COLD_DATA_PINNED) {
- type = CURSEG_COLD_DATA;
- }
+ unsigned long long old_mtime;
+ bool from_gc = (type == CURSEG_ALL_DATA_ATGC);
+ struct seg_entry *se = NULL;
down_read(&SM_I(sbi)->curseg_lock);
mutex_lock(&curseg->curseg_mutex);
down_write(&sit_i->sentry_lock);
+ if (from_gc) {
+ f2fs_bug_on(sbi, GET_SEGNO(sbi, old_blkaddr) == NULL_SEGNO);
+ se = get_seg_entry(sbi, GET_SEGNO(sbi, old_blkaddr));
+ sanity_check_seg_type(sbi, se->type);
+ f2fs_bug_on(sbi, IS_NODESEG(se->type));
+ }
*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
+ f2fs_bug_on(sbi, curseg->next_blkoff >= sbi->blocks_per_seg);
+
f2fs_wait_discard_bio(sbi, *new_blkaddr);
/*
@@ -3160,6 +3336,14 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
stat_inc_block_count(sbi, curseg);
+ if (from_gc) {
+ old_mtime = get_segment_mtime(sbi, old_blkaddr);
+ } else {
+ update_segment_mtime(sbi, old_blkaddr, 0);
+ old_mtime = 0;
+ }
+ update_segment_mtime(sbi, *new_blkaddr, old_mtime);
+
/*
* SIT information should be updated before segment allocation,
* since SSR needs latest valid block information.
@@ -3168,9 +3352,13 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
update_sit_entry(sbi, old_blkaddr, -1);
- if (!__has_curseg_space(sbi, type))
- sit_i->s_ops->allocate_segment(sbi, type, false);
-
+ if (!__has_curseg_space(sbi, curseg)) {
+ if (from_gc)
+ get_atssr_segment(sbi, type, se->type,
+ AT_SSR, se->mtime);
+ else
+ sit_i->s_ops->allocate_segment(sbi, type, false);
+ }
/*
* segment dirty status should be updated after segment allocation,
* so we just need to update status only one time after previous
@@ -3204,9 +3392,6 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
mutex_unlock(&curseg->curseg_mutex);
up_read(&SM_I(sbi)->curseg_lock);
-
- if (put_pin_sem)
- up_read(&sbi->pin_sem);
}
static void update_device_state(struct f2fs_io_info *fio)
@@ -3355,7 +3540,8 @@ static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
- bool recover_curseg, bool recover_newaddr)
+ bool recover_curseg, bool recover_newaddr,
+ bool from_gc)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
@@ -3400,17 +3586,22 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
/* change the current segment */
if (segno != curseg->segno) {
curseg->next_segno = segno;
- change_curseg(sbi, type);
+ change_curseg(sbi, type, true);
}
curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
__add_sum_entry(sbi, type, sum);
- if (!recover_curseg || recover_newaddr)
+ if (!recover_curseg || recover_newaddr) {
+ if (!from_gc)
+ update_segment_mtime(sbi, new_blkaddr, 0);
update_sit_entry(sbi, new_blkaddr, 1);
+ }
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
invalidate_mapping_pages(META_MAPPING(sbi),
old_blkaddr, old_blkaddr);
+ if (!from_gc)
+ update_segment_mtime(sbi, old_blkaddr, 0);
update_sit_entry(sbi, old_blkaddr, -1);
}
@@ -3422,7 +3613,7 @@ void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
if (recover_curseg) {
if (old_cursegno != curseg->segno) {
curseg->next_segno = old_cursegno;
- change_curseg(sbi, type);
+ change_curseg(sbi, type, true);
}
curseg->next_blkoff = old_blkoff;
}
@@ -3442,7 +3633,7 @@ void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
set_summary(&sum, dn->nid, dn->ofs_in_node, version);
f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
- recover_curseg, recover_newaddr);
+ recover_curseg, recover_newaddr, false);
f2fs_update_data_blkaddr(dn, new_addr);
}
@@ -3574,7 +3765,7 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
CURSEG_HOT_DATA]);
if (__exist_node_summaries(sbi))
- blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
+ blk_addr = sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type);
else
blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
} else {
@@ -3652,8 +3843,9 @@ static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
}
if (__exist_node_summaries(sbi))
- f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
- NR_CURSEG_TYPE - type, META_CP, true);
+ f2fs_ra_meta_pages(sbi,
+ sum_blk_addr(sbi, NR_CURSEG_PERSIST_TYPE, type),
+ NR_CURSEG_PERSIST_TYPE - type, META_CP, true);
for (; type <= CURSEG_COLD_NODE; type++) {
err = read_normal_summaries(sbi, type);
@@ -3781,7 +3973,7 @@ int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
unsigned int segno)
{
- return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
+ return f2fs_get_meta_page(sbi, current_sit_addr(sbi, segno));
}
static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
@@ -4155,14 +4347,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
struct curseg_info *array;
int i;
- array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
- GFP_KERNEL);
+ array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE,
+ sizeof(*array)), GFP_KERNEL);
if (!array)
return -ENOMEM;
SM_I(sbi)->curseg_array = array;
- for (i = 0; i < NR_CURSEG_TYPE; i++) {
+ for (i = 0; i < NO_CHECK_TYPE; i++) {
mutex_init(&array[i].curseg_mutex);
array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
if (!array[i].sum_blk)
@@ -4172,8 +4364,15 @@ static int build_curseg(struct f2fs_sb_info *sbi)
sizeof(struct f2fs_journal), GFP_KERNEL);
if (!array[i].journal)
return -ENOMEM;
+ if (i < NR_PERSISTENT_LOG)
+ array[i].seg_type = CURSEG_HOT_DATA + i;
+ else if (i == CURSEG_COLD_DATA_PINNED)
+ array[i].seg_type = CURSEG_COLD_DATA;
+ else if (i == CURSEG_ALL_DATA_ATGC)
+ array[i].seg_type = CURSEG_COLD_DATA;
array[i].segno = NULL_SEGNO;
array[i].next_blkoff = 0;
+ array[i].inited = false;
}
return restore_curseg_summaries(sbi);
}
@@ -4294,9 +4493,12 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
{
unsigned int start;
int type;
+ struct seg_entry *sentry;
for (start = 0; start < MAIN_SEGS(sbi); start++) {
- struct seg_entry *sentry = get_seg_entry(sbi, start);
+ if (f2fs_usable_blks_in_seg(sbi, start) == 0)
+ continue;
+ sentry = get_seg_entry(sbi, start);
if (!sentry->valid_blocks)
__set_free(sbi, start);
else
@@ -4316,7 +4518,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int segno = 0, offset = 0, secno;
- block_t valid_blocks;
+ block_t valid_blocks, usable_blks_in_seg;
block_t blks_per_sec = BLKS_PER_SEC(sbi);
while (1) {
@@ -4326,9 +4528,10 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
break;
offset = segno + 1;
valid_blocks = get_valid_blocks(sbi, segno, false);
- if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
+ usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
+ if (valid_blocks == usable_blks_in_seg || !valid_blocks)
continue;
- if (valid_blocks > sbi->blocks_per_seg) {
+ if (valid_blocks > usable_blks_in_seg) {
f2fs_bug_on(sbi, 1);
continue;
}
@@ -4408,11 +4611,13 @@ static int sanity_check_curseg(struct f2fs_sb_info *sbi)
* In LFS/SSR curseg, .next_blkoff should point to an unused blkaddr;
* In LFS curseg, all blkaddr after .next_blkoff should be unused.
*/
- for (i = 0; i < NO_CHECK_TYPE; i++) {
+ for (i = 0; i < NR_PERSISTENT_LOG; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
struct seg_entry *se = get_seg_entry(sbi, curseg->segno);
unsigned int blkofs = curseg->next_blkoff;
+ sanity_check_seg_type(sbi, curseg->seg_type);
+
if (f2fs_test_bit(blkofs, se->cur_valid_map))
goto out;
@@ -4637,7 +4842,7 @@ int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
{
int i, ret;
- for (i = 0; i < NO_CHECK_TYPE; i++) {
+ for (i = 0; i < NR_PERSISTENT_LOG; i++) {
ret = fix_curseg_write_pointer(sbi, i);
if (ret)
return ret;
@@ -4678,6 +4883,101 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
return 0;
}
+
+static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx,
+ unsigned int dev_idx)
+{
+ if (!bdev_is_zoned(FDEV(dev_idx).bdev))
+ return true;
+ return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq);
+}
+
+/* Return the zone index in the given device */
+static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int secno,
+ int dev_idx)
+{
+ block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
+
+ return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >>
+ sbi->log_blocks_per_blkz;
+}
+
+/*
+ * Return the usable segments in a section based on the zone's
+ * corresponding zone capacity. Zone is equal to a section.
+ */
+static inline unsigned int f2fs_usable_zone_segs_in_sec(
+ struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ unsigned int dev_idx, zone_idx, unusable_segs_in_sec;
+
+ dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
+ zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
+
+ /* Conventional zone's capacity is always equal to zone size */
+ if (is_conv_zone(sbi, zone_idx, dev_idx))
+ return sbi->segs_per_sec;
+
+ /*
+ * If the zone_capacity_blocks array is NULL, then zone capacity
+ * is equal to the zone size for all zones
+ */
+ if (!FDEV(dev_idx).zone_capacity_blocks)
+ return sbi->segs_per_sec;
+
+ /* Get the segment count beyond zone capacity block */
+ unusable_segs_in_sec = (sbi->blocks_per_blkz -
+ FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >>
+ sbi->log_blocks_per_seg;
+ return sbi->segs_per_sec - unusable_segs_in_sec;
+}
+
+/*
+ * Return the number of usable blocks in a segment. The number of blocks
+ * returned is always equal to the number of blocks in a segment for
+ * segments fully contained within a sequential zone capacity or a
+ * conventional zone. For segments partially contained in a sequential
+ * zone capacity, the number of usable blocks up to the zone capacity
+ * is returned. 0 is returned in all other cases.
+ */
+static inline unsigned int f2fs_usable_zone_blks_in_seg(
+ struct f2fs_sb_info *sbi, unsigned int segno)
+{
+ block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
+ unsigned int zone_idx, dev_idx, secno;
+
+ secno = GET_SEC_FROM_SEG(sbi, segno);
+ seg_start = START_BLOCK(sbi, segno);
+ dev_idx = f2fs_target_device_index(sbi, seg_start);
+ zone_idx = get_zone_idx(sbi, secno, dev_idx);
+
+ /*
+ * Conventional zone's capacity is always equal to zone size,
+ * so, blocks per segment is unchanged.
+ */
+ if (is_conv_zone(sbi, zone_idx, dev_idx))
+ return sbi->blocks_per_seg;
+
+ if (!FDEV(dev_idx).zone_capacity_blocks)
+ return sbi->blocks_per_seg;
+
+ sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
+ sec_cap_blkaddr = sec_start_blkaddr +
+ FDEV(dev_idx).zone_capacity_blocks[zone_idx];
+
+ /*
+ * If segment starts before zone capacity and spans beyond
+ * zone capacity, then usable blocks are from seg start to
+ * zone capacity. If the segment starts after the zone capacity,
+ * then there are no usable blocks.
+ */
+ if (seg_start >= sec_cap_blkaddr)
+ return 0;
+ if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
+ return sec_cap_blkaddr - seg_start;
+
+ return sbi->blocks_per_seg;
+}
#else
int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
{
@@ -4688,7 +4988,36 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
{
return 0;
}
+
+static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ return 0;
+}
+
+static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ return 0;
+}
#endif
+unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return f2fs_usable_zone_blks_in_seg(sbi, segno);
+
+ return sbi->blocks_per_seg;
+}
+
+unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
+ unsigned int segno)
+{
+ if (f2fs_sb_has_blkzoned(sbi))
+ return f2fs_usable_zone_segs_in_sec(sbi, segno);
+
+ return sbi->segs_per_sec;
+}
/*
* Update min, max modified time for cost-benefit GC algorithm
@@ -4715,6 +5044,7 @@ static void init_min_max_mtime(struct f2fs_sb_info *sbi)
sit_i->min_mtime = mtime;
}
sit_i->max_mtime = get_mtime(sbi, false);
+ sit_i->dirty_max_mtime = 0;
up_write(&sit_i->sentry_lock);
}
@@ -4830,7 +5160,7 @@ static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
destroy_victim_secmap(sbi);
SM_I(sbi)->dirty_info = NULL;
- kvfree(dirty_i);
+ kfree(dirty_i);
}
static void destroy_curseg(struct f2fs_sb_info *sbi)
@@ -4842,10 +5172,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
return;
SM_I(sbi)->curseg_array = NULL;
for (i = 0; i < NR_CURSEG_TYPE; i++) {
- kvfree(array[i].sum_blk);
- kvfree(array[i].journal);
+ kfree(array[i].sum_blk);
+ kfree(array[i].journal);
}
- kvfree(array);
+ kfree(array);
}
static void destroy_free_segmap(struct f2fs_sb_info *sbi)
@@ -4856,7 +5186,7 @@ static void destroy_free_segmap(struct f2fs_sb_info *sbi)
SM_I(sbi)->free_info = NULL;
kvfree(free_i->free_segmap);
kvfree(free_i->free_secmap);
- kvfree(free_i);
+ kfree(free_i);
}
static void destroy_sit_info(struct f2fs_sb_info *sbi)
@@ -4868,7 +5198,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
if (sit_i->sentries)
kvfree(sit_i->bitmap);
- kvfree(sit_i->tmp_map);
+ kfree(sit_i->tmp_map);
kvfree(sit_i->sentries);
kvfree(sit_i->sec_entries);
@@ -4880,7 +5210,7 @@ static void destroy_sit_info(struct f2fs_sb_info *sbi)
kvfree(sit_i->sit_bitmap_mir);
kvfree(sit_i->invalid_segmap);
#endif
- kvfree(sit_i);
+ kfree(sit_i);
}
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
@@ -4896,7 +5226,7 @@ void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
destroy_free_segmap(sbi);
destroy_sit_info(sbi);
sbi->sm_info = NULL;
- kvfree(sm_info);
+ kfree(sm_info);
}
int __init f2fs_create_segment_manager_caches(void)
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 752b177073b2..e81eb0748e2a 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -16,13 +16,20 @@
#define DEF_MAX_RECLAIM_PREFREE_SEGMENTS 4096 /* 8GB in maximum */
#define F2FS_MIN_SEGMENTS 9 /* SB + 2 (CP + SIT + NAT) + SSA + MAIN */
+#define F2FS_MIN_META_SEGMENTS 8 /* SB + 2 (CP + SIT + NAT) + SSA */
/* L: Logical segment # in volume, R: Relative segment # in main area */
#define GET_L2R_SEGNO(free_i, segno) ((segno) - (free_i)->start_segno)
#define GET_R2L_SEGNO(free_i, segno) ((segno) + (free_i)->start_segno)
#define IS_DATASEG(t) ((t) <= CURSEG_COLD_DATA)
-#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE)
+#define IS_NODESEG(t) ((t) >= CURSEG_HOT_NODE && (t) <= CURSEG_COLD_NODE)
+
+static inline void sanity_check_seg_type(struct f2fs_sb_info *sbi,
+ unsigned short seg_type)
+{
+ f2fs_bug_on(sbi, seg_type >= NR_PERSISTENT_LOG);
+}
#define IS_HOT(t) ((t) == CURSEG_HOT_NODE || (t) == CURSEG_HOT_DATA)
#define IS_WARM(t) ((t) == CURSEG_WARM_NODE || (t) == CURSEG_WARM_DATA)
@@ -34,7 +41,9 @@
((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \
((seg) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \
- ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno))
+ ((seg) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno) || \
+ ((seg) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno) || \
+ ((seg) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno))
#define IS_CURSEC(sbi, secno) \
(((secno) == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \
@@ -48,7 +57,11 @@
((secno) == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \
(sbi)->segs_per_sec) || \
((secno) == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \
- (sbi)->segs_per_sec)) \
+ (sbi)->segs_per_sec) || \
+ ((secno) == CURSEG_I(sbi, CURSEG_COLD_DATA_PINNED)->segno / \
+ (sbi)->segs_per_sec) || \
+ ((secno) == CURSEG_I(sbi, CURSEG_ALL_DATA_ATGC)->segno / \
+ (sbi)->segs_per_sec))
#define MAIN_BLKADDR(sbi) \
(SM_I(sbi) ? SM_I(sbi)->main_blkaddr : \
@@ -132,20 +145,25 @@ enum {
* In the victim_sel_policy->alloc_mode, there are two block allocation modes.
* LFS writes data sequentially with cleaning operations.
* SSR (Slack Space Recycle) reuses obsolete space without cleaning operations.
+ * AT_SSR (Age Threshold based Slack Space Recycle) merges fragments into
+ * fragmented segment which has similar aging degree.
*/
enum {
LFS = 0,
- SSR
+ SSR,
+ AT_SSR,
};
/*
* In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes.
* GC_CB is based on cost-benefit algorithm.
* GC_GREEDY is based on greedy algorithm.
+ * GC_AT is based on age-threshold algorithm.
*/
enum {
GC_CB = 0,
GC_GREEDY,
+ GC_AT,
ALLOC_NEXT,
FLUSH_DEVICE,
MAX_GC_POLICY,
@@ -174,7 +192,10 @@ struct victim_sel_policy {
unsigned int offset; /* last scanned bitmap offset */
unsigned int ofs_unit; /* bitmap search unit */
unsigned int min_cost; /* minimum cost */
+ unsigned long long oldest_age; /* oldest age of segments having the same min cost */
unsigned int min_segno; /* segment # having min. cost */
+ unsigned long long age; /* mtime of GCed section*/
+ unsigned long long age_threshold;/* age threshold */
};
struct seg_entry {
@@ -240,6 +261,8 @@ struct sit_info {
unsigned long long mounted_time; /* mount time */
unsigned long long min_mtime; /* min. modification time */
unsigned long long max_mtime; /* max. modification time */
+ unsigned long long dirty_min_mtime; /* rerange candidates in GC_AT */
+ unsigned long long dirty_max_mtime; /* rerange candidates in GC_AT */
unsigned int last_victim[MAX_GC_POLICY]; /* last victim segment # */
};
@@ -278,7 +301,7 @@ struct dirty_seglist_info {
/* victim selection function for cleaning and SSR */
struct victim_selection {
int (*get_victim)(struct f2fs_sb_info *, unsigned int *,
- int, int, char);
+ int, int, char, unsigned long long);
};
/* for active log information */
@@ -288,10 +311,12 @@ struct curseg_info {
struct rw_semaphore journal_rwsem; /* protect journal area */
struct f2fs_journal *journal; /* cached journal info */
unsigned char alloc_type; /* current allocation type */
+ unsigned short seg_type; /* segment type like CURSEG_XXX_TYPE */
unsigned int segno; /* current segment number */
unsigned short next_blkoff; /* next block offset to write */
unsigned int zone; /* current zone number */
unsigned int next_segno; /* preallocated segment */
+ bool inited; /* indicate inmem log is inited */
};
struct sit_entry_set {
@@ -305,8 +330,6 @@ struct sit_entry_set {
*/
static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type)
{
- if (type == CURSEG_COLD_DATA_PINNED)
- type = CURSEG_COLD_DATA;
return (struct curseg_info *)(SM_I(sbi)->curseg_array + type);
}
@@ -411,6 +434,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
+ unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
spin_lock(&free_i->segmap_lock);
clear_bit(segno, free_i->free_segmap);
@@ -418,7 +442,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno)
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
- if (next >= start_segno + sbi->segs_per_sec) {
+ if (next >= start_segno + usable_segs) {
clear_bit(secno, free_i->free_secmap);
free_i->free_sections++;
}
@@ -438,22 +462,23 @@ static inline void __set_inuse(struct f2fs_sb_info *sbi,
}
static inline void __set_test_and_free(struct f2fs_sb_info *sbi,
- unsigned int segno)
+ unsigned int segno, bool inmem)
{
struct free_segmap_info *free_i = FREE_I(sbi);
unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
unsigned int next;
+ unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
spin_lock(&free_i->segmap_lock);
if (test_and_clear_bit(segno, free_i->free_segmap)) {
free_i->free_segments++;
- if (IS_CURSEC(sbi, secno))
+ if (!inmem && IS_CURSEC(sbi, secno))
goto skip_free;
next = find_next_bit(free_i->free_segmap,
start_segno + sbi->segs_per_sec, start_segno);
- if (next >= start_segno + sbi->segs_per_sec) {
+ if (next >= start_segno + usable_segs) {
if (test_and_clear_bit(secno, free_i->free_secmap))
free_i->free_sections++;
}
@@ -500,7 +525,7 @@ static inline unsigned int free_segments(struct f2fs_sb_info *sbi)
return FREE_I(sbi)->free_segments;
}
-static inline int reserved_segments(struct f2fs_sb_info *sbi)
+static inline unsigned int reserved_segments(struct f2fs_sb_info *sbi)
{
return SM_I(sbi)->reserved_segments;
}
@@ -532,7 +557,7 @@ static inline int overprovision_segments(struct f2fs_sb_info *sbi)
static inline int reserved_sections(struct f2fs_sb_info *sbi)
{
- return GET_SEC_FROM_SEG(sbi, (unsigned int)reserved_segments(sbi));
+ return GET_SEC_FROM_SEG(sbi, reserved_segments(sbi));
}
static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
@@ -546,8 +571,8 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
/* check current node segment */
for (i = CURSEG_HOT_NODE; i <= CURSEG_COLD_NODE; i++) {
segno = CURSEG_I(sbi, i)->segno;
- left_blocks = sbi->blocks_per_seg -
- get_seg_entry(sbi, segno)->ckpt_valid_blocks;
+ left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
+ get_seg_entry(sbi, segno)->ckpt_valid_blocks;
if (node_blocks > left_blocks)
return false;
@@ -555,7 +580,7 @@ static inline bool has_curseg_enough_space(struct f2fs_sb_info *sbi)
/* check current data segment */
segno = CURSEG_I(sbi, CURSEG_HOT_DATA)->segno;
- left_blocks = sbi->blocks_per_seg -
+ left_blocks = f2fs_usable_blks_in_seg(sbi, segno) -
get_seg_entry(sbi, segno)->ckpt_valid_blocks;
if (dent_blocks > left_blocks)
return false;
@@ -677,21 +702,22 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
bool is_valid = test_bit_le(0, raw_sit->valid_map) ? true : false;
int valid_blocks = 0;
int cur_pos = 0, next_pos;
+ unsigned int usable_blks_per_seg = f2fs_usable_blks_in_seg(sbi, segno);
/* check bitmap with valid block count */
do {
if (is_valid) {
next_pos = find_next_zero_bit_le(&raw_sit->valid_map,
- sbi->blocks_per_seg,
+ usable_blks_per_seg,
cur_pos);
valid_blocks += next_pos - cur_pos;
} else
next_pos = find_next_bit_le(&raw_sit->valid_map,
- sbi->blocks_per_seg,
+ usable_blks_per_seg,
cur_pos);
cur_pos = next_pos;
is_valid = !is_valid;
- } while (cur_pos < sbi->blocks_per_seg);
+ } while (cur_pos < usable_blks_per_seg);
if (unlikely(GET_SIT_VBLOCKS(raw_sit) != valid_blocks)) {
f2fs_err(sbi, "Mismatch valid blocks %d vs. %d",
@@ -700,8 +726,13 @@ static inline int check_block_count(struct f2fs_sb_info *sbi,
return -EFSCORRUPTED;
}
+ if (usable_blks_per_seg < sbi->blocks_per_seg)
+ f2fs_bug_on(sbi, find_next_bit_le(&raw_sit->valid_map,
+ sbi->blocks_per_seg,
+ usable_blks_per_seg) != sbi->blocks_per_seg);
+
/* check segment usage, and check boundary of a given segment number */
- if (unlikely(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg
+ if (unlikely(GET_SIT_VBLOCKS(raw_sit) > usable_blks_per_seg
|| segno > TOTAL_SEGS(sbi) - 1)) {
f2fs_err(sbi, "Wrong valid blocks %d or segno %u",
GET_SIT_VBLOCKS(raw_sit), segno);
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index bef2be3fa3d0..0c958fed3392 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -146,6 +146,7 @@ enum {
Opt_compress_algorithm,
Opt_compress_log_size,
Opt_compress_extension,
+ Opt_atgc,
Opt_err,
};
@@ -213,6 +214,7 @@ static match_table_t f2fs_tokens = {
{Opt_compress_algorithm, "compress_algorithm=%s"},
{Opt_compress_log_size, "compress_log_size=%u"},
{Opt_compress_extension, "compress_extension=%s"},
+ {Opt_atgc, "atgc"},
{Opt_err, NULL},
};
@@ -580,7 +582,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
case Opt_active_logs:
if (args->from && match_int(args, &arg))
return -EINVAL;
- if (arg != 2 && arg != 4 && arg != NR_CURSEG_TYPE)
+ if (arg != 2 && arg != 4 &&
+ arg != NR_CURSEG_PERSIST_TYPE)
return -EINVAL;
F2FS_OPTION(sbi).active_logs = arg;
break;
@@ -868,8 +871,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
#ifdef CONFIG_F2FS_FS_COMPRESSION
case Opt_compress_algorithm:
if (!f2fs_sb_has_compression(sbi)) {
- f2fs_err(sbi, "Compression feature if off");
- return -EINVAL;
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
}
name = match_strdup(&args[0]);
if (!name)
@@ -894,8 +897,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
break;
case Opt_compress_log_size:
if (!f2fs_sb_has_compression(sbi)) {
- f2fs_err(sbi, "Compression feature is off");
- return -EINVAL;
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
}
if (args->from && match_int(args, &arg))
return -EINVAL;
@@ -909,8 +912,8 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
break;
case Opt_compress_extension:
if (!f2fs_sb_has_compression(sbi)) {
- f2fs_err(sbi, "Compression feature is off");
- return -EINVAL;
+ f2fs_info(sbi, "Image doesn't support compression");
+ break;
}
name = match_strdup(&args[0]);
if (!name)
@@ -938,6 +941,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
f2fs_info(sbi, "compression options not supported");
break;
#endif
+ case Opt_atgc:
+ set_opt(sbi, ATGC);
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
@@ -964,6 +970,17 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
return -EINVAL;
}
#endif
+ /*
+ * The BLKZONED feature indicates that the drive was formatted with
+ * zone alignment optimization. This is optional for host-aware
+ * devices, but mandatory for host-managed zoned block devices.
+ */
+#ifndef CONFIG_BLK_DEV_ZONED
+ if (f2fs_sb_has_blkzoned(sbi)) {
+ f2fs_err(sbi, "Zoned block device support is not enabled");
+ return -EINVAL;
+ }
+#endif
if (F2FS_IO_SIZE_BITS(sbi) && !f2fs_lfs_mode(sbi)) {
f2fs_err(sbi, "Should set mode=lfs with %uKB-sized IO",
@@ -1001,7 +1018,7 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
}
/* Not pass down write hints if the number of active logs is lesser
- * than NR_CURSEG_TYPE.
+ * than NR_CURSEG_PERSIST_TYPE.
*/
if (F2FS_OPTION(sbi).active_logs != NR_CURSEG_TYPE)
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
@@ -1020,6 +1037,7 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Initialize f2fs-specific inode info */
atomic_set(&fi->dirty_pages, 0);
+ atomic_set(&fi->i_compr_blocks, 0);
init_rwsem(&fi->i_sem);
spin_lock_init(&fi->i_size_lock);
INIT_LIST_HEAD(&fi->dirty_list);
@@ -1184,6 +1202,7 @@ static void destroy_device_list(struct f2fs_sb_info *sbi)
blkdev_put(FDEV(i).bdev, FMODE_EXCL);
#ifdef CONFIG_BLK_DEV_ZONED
kvfree(FDEV(i).blkz_seq);
+ kfree(FDEV(i).zone_capacity_blocks);
#endif
}
kvfree(sbi->devs);
@@ -1269,6 +1288,7 @@ static void f2fs_put_super(struct super_block *sb)
kfree(sbi->raw_super);
destroy_device_list(sbi);
+ f2fs_destroy_page_array_cache(sbi);
f2fs_destroy_xattr_caches(sbi);
mempool_destroy(sbi->write_io_dummy);
#ifdef CONFIG_QUOTA
@@ -1280,7 +1300,7 @@ static void f2fs_put_super(struct super_block *sb)
for (i = 0; i < NR_PAGE_TYPE; i++)
kvfree(sbi->write_io[i]);
#ifdef CONFIG_UNICODE
- utf8_unload(sbi->s_encoding);
+ utf8_unload(sb->s_encoding);
#endif
kfree(sbi);
}
@@ -1634,13 +1654,16 @@ static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
#ifdef CONFIG_F2FS_FS_COMPRESSION
f2fs_show_compress_options(seq, sbi->sb);
#endif
+
+ if (test_opt(sbi, ATGC))
+ seq_puts(seq, ",atgc");
return 0;
}
static void default_options(struct f2fs_sb_info *sbi)
{
/* init some FS parameters */
- F2FS_OPTION(sbi).active_logs = NR_CURSEG_TYPE;
+ F2FS_OPTION(sbi).active_logs = NR_CURSEG_PERSIST_TYPE;
F2FS_OPTION(sbi).inline_xattr_size = DEFAULT_INLINE_XATTR_ADDRS;
F2FS_OPTION(sbi).whint_mode = WHINT_MODE_OFF;
F2FS_OPTION(sbi).alloc_mode = ALLOC_MODE_DEFAULT;
@@ -1763,6 +1786,7 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
bool no_io_align = !F2FS_IO_ALIGNED(sbi);
+ bool no_atgc = !test_opt(sbi, ATGC);
bool checkpoint_changed;
#ifdef CONFIG_QUOTA
int i, j;
@@ -1835,6 +1859,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
}
}
#endif
+ /* disallow enable atgc dynamically */
+ if (no_atgc == !!test_opt(sbi, ATGC)) {
+ err = -EINVAL;
+ f2fs_warn(sbi, "switch atgc option is not allowed");
+ goto restore_opts;
+ }
+
/* disallow enable/disable extent_cache dynamically */
if (no_extent_cache == !!test_opt(sbi, EXTENT_CACHE)) {
err = -EINVAL;
@@ -2679,10 +2710,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
}
if (main_end_blkaddr > seg_end_blkaddr) {
- f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%u) block(%u)",
- main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
+ f2fs_info(sbi, "Wrong MAIN_AREA boundary, start(%u) end(%llu) block(%u)",
+ main_blkaddr, seg_end_blkaddr,
segment_count_main << log_blocks_per_seg);
return true;
} else if (main_end_blkaddr < seg_end_blkaddr) {
@@ -2700,10 +2729,8 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
err = __f2fs_commit_super(bh, NULL);
res = err ? "failed" : "done";
}
- f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%u) block(%u)",
- res, main_blkaddr,
- segment0_blkaddr +
- (segment_count << log_blocks_per_seg),
+ f2fs_info(sbi, "Fix alignment : %s, start(%u) end(%llu) block(%u)",
+ res, main_blkaddr, seg_end_blkaddr,
segment_count_main << log_blocks_per_seg);
if (err)
return true;
@@ -2714,7 +2741,7 @@ static inline bool sanity_check_area_boundary(struct f2fs_sb_info *sbi,
static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
struct buffer_head *bh)
{
- block_t segment_count, segs_per_sec, secs_per_zone;
+ block_t segment_count, segs_per_sec, secs_per_zone, segment_count_main;
block_t total_sections, blocks_per_seg;
struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
(bh->b_data + F2FS_SUPER_OFFSET);
@@ -2785,6 +2812,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
}
segment_count = le32_to_cpu(raw_super->segment_count);
+ segment_count_main = le32_to_cpu(raw_super->segment_count_main);
segs_per_sec = le32_to_cpu(raw_super->segs_per_sec);
secs_per_zone = le32_to_cpu(raw_super->secs_per_zone);
total_sections = le32_to_cpu(raw_super->section_count);
@@ -2798,14 +2826,19 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
return -EFSCORRUPTED;
}
- if (total_sections > segment_count ||
- total_sections < F2FS_MIN_SEGMENTS ||
+ if (total_sections > segment_count_main || total_sections < 1 ||
segs_per_sec > segment_count || !segs_per_sec) {
f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)",
segment_count, total_sections, segs_per_sec);
return -EFSCORRUPTED;
}
+ if (segment_count_main != total_sections * segs_per_sec) {
+ f2fs_info(sbi, "Invalid segment/section count (%u != %u * %u)",
+ segment_count_main, total_sections, segs_per_sec);
+ return -EFSCORRUPTED;
+ }
+
if ((segment_count / segs_per_sec) < total_sections) {
f2fs_info(sbi, "Small segment_count (%u < %u * %u)",
segment_count, segs_per_sec, total_sections);
@@ -2831,6 +2864,12 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
segment_count, dev_seg_count);
return -EFSCORRUPTED;
}
+ } else {
+ if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_BLKZONED) &&
+ !bdev_is_zoned(sbi->sb->s_bdev)) {
+ f2fs_info(sbi, "Zoned block device path is missing");
+ return -EFSCORRUPTED;
+ }
}
if (secs_per_zone > total_sections || !secs_per_zone) {
@@ -2906,7 +2945,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
- if (unlikely(fsmeta < F2FS_MIN_SEGMENTS ||
+ if (unlikely(fsmeta < F2FS_MIN_META_SEGMENTS ||
ovp_segments == 0 || reserved_segments == 0)) {
f2fs_err(sbi, "Wrong layout: check mkfs.f2fs version");
return 1;
@@ -2994,7 +3033,7 @@ int f2fs_sanity_check_ckpt(struct f2fs_sb_info *sbi)
cp_payload = __cp_payload(sbi);
if (cp_pack_start_sum < cp_payload + 1 ||
cp_pack_start_sum > blocks_per_seg - 1 -
- NR_CURSEG_TYPE) {
+ NR_CURSEG_PERSIST_TYPE) {
f2fs_err(sbi, "Wrong cp_pack_start_sum: %u",
cp_pack_start_sum);
return 1;
@@ -3087,13 +3126,26 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
}
#ifdef CONFIG_BLK_DEV_ZONED
+
+struct f2fs_report_zones_args {
+ struct f2fs_dev_info *dev;
+ bool zone_cap_mismatch;
+};
+
static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx,
- void *data)
+ void *data)
{
- struct f2fs_dev_info *dev = data;
+ struct f2fs_report_zones_args *rz_args = data;
+
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ return 0;
+
+ set_bit(idx, rz_args->dev->blkz_seq);
+ rz_args->dev->zone_capacity_blocks[idx] = zone->capacity >>
+ F2FS_LOG_SECTORS_PER_BLOCK;
+ if (zone->len != zone->capacity && !rz_args->zone_cap_mismatch)
+ rz_args->zone_cap_mismatch = true;
- if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
- set_bit(idx, dev->blkz_seq);
return 0;
}
@@ -3101,6 +3153,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
{
struct block_device *bdev = FDEV(devi).bdev;
sector_t nr_sectors = bdev->bd_part->nr_sects;
+ struct f2fs_report_zones_args rep_zone_arg;
int ret;
if (!f2fs_sb_has_blkzoned(sbi))
@@ -3126,12 +3179,26 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int devi)
if (!FDEV(devi).blkz_seq)
return -ENOMEM;
- /* Get block zones type */
+ /* Get block zones type and zone-capacity */
+ FDEV(devi).zone_capacity_blocks = f2fs_kzalloc(sbi,
+ FDEV(devi).nr_blkz * sizeof(block_t),
+ GFP_KERNEL);
+ if (!FDEV(devi).zone_capacity_blocks)
+ return -ENOMEM;
+
+ rep_zone_arg.dev = &FDEV(devi);
+ rep_zone_arg.zone_cap_mismatch = false;
+
ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb,
- &FDEV(devi));
+ &rep_zone_arg);
if (ret < 0)
return ret;
+ if (!rep_zone_arg.zone_cap_mismatch) {
+ kfree(FDEV(devi).zone_capacity_blocks);
+ FDEV(devi).zone_capacity_blocks = NULL;
+ }
+
return 0;
}
#endif
@@ -3328,7 +3395,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
{
#ifdef CONFIG_UNICODE
- if (f2fs_sb_has_casefold(sbi) && !sbi->s_encoding) {
+ if (f2fs_sb_has_casefold(sbi) && !sbi->sb->s_encoding) {
const struct f2fs_sb_encodings *encoding_info;
struct unicode_map *encoding;
__u16 encoding_flags;
@@ -3359,8 +3426,8 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
"%s-%s with flags 0x%hx", encoding_info->name,
encoding_info->version?:"\b", encoding_flags);
- sbi->s_encoding = encoding;
- sbi->s_encoding_flags = encoding_flags;
+ sbi->sb->s_encoding = encoding;
+ sbi->sb->s_encoding_flags = encoding_flags;
sbi->sb->s_d_op = &f2fs_dentry_ops;
}
#else
@@ -3439,18 +3506,6 @@ try_onemore:
sbi->s_chksum_seed = f2fs_chksum(sbi, ~0, raw_super->uuid,
sizeof(raw_super->uuid));
- /*
- * The BLKZONED feature indicates that the drive was formatted with
- * zone alignment optimization. This is optional for host-aware
- * devices, but mandatory for host-managed zoned block devices.
- */
-#ifndef CONFIG_BLK_DEV_ZONED
- if (f2fs_sb_has_blkzoned(sbi)) {
- f2fs_err(sbi, "Zoned block device support is not enabled");
- err = -EOPNOTSUPP;
- goto free_sb_buf;
- }
-#endif
default_options(sbi);
/* parse mount options */
options = kstrdup((const char *)data, GFP_KERNEL);
@@ -3565,13 +3620,16 @@ try_onemore:
err = f2fs_init_xattr_caches(sbi);
if (err)
goto free_io_dummy;
+ err = f2fs_init_page_array_cache(sbi);
+ if (err)
+ goto free_xattr_cache;
/* get an inode for meta space */
sbi->meta_inode = f2fs_iget(sb, F2FS_META_INO(sbi));
if (IS_ERR(sbi->meta_inode)) {
f2fs_err(sbi, "Failed to read F2FS meta data inode");
err = PTR_ERR(sbi->meta_inode);
- goto free_xattr_cache;
+ goto free_page_array_cache;
}
err = f2fs_get_valid_checkpoint(sbi);
@@ -3761,6 +3819,8 @@ try_onemore:
}
reset_checkpoint:
+ f2fs_init_inmem_curseg(sbi);
+
/* f2fs_recover_fsync_data() cleared this already */
clear_sbi_flag(sbi, SBI_POR_DOING);
@@ -3845,6 +3905,8 @@ free_meta_inode:
make_bad_inode(sbi->meta_inode);
iput(sbi->meta_inode);
sbi->meta_inode = NULL;
+free_page_array_cache:
+ f2fs_destroy_page_array_cache(sbi);
free_xattr_cache:
f2fs_destroy_xattr_caches(sbi);
free_io_dummy:
@@ -3856,7 +3918,7 @@ free_bio_info:
kvfree(sbi->write_io[i]);
#ifdef CONFIG_UNICODE
- utf8_unload(sbi->s_encoding);
+ utf8_unload(sb->s_encoding);
#endif
free_options:
#ifdef CONFIG_QUOTA
@@ -3966,9 +4028,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_create_extent_cache();
if (err)
goto free_checkpoint_caches;
- err = f2fs_init_sysfs();
+ err = f2fs_create_garbage_collection_cache();
if (err)
goto free_extent_cache;
+ err = f2fs_init_sysfs();
+ if (err)
+ goto free_garbage_collection_cache;
err = register_shrinker(&f2fs_shrinker_info);
if (err)
goto free_sysfs;
@@ -3988,7 +4053,12 @@ static int __init init_f2fs_fs(void)
err = f2fs_init_compress_mempool();
if (err)
goto free_bioset;
+ err = f2fs_init_compress_cache();
+ if (err)
+ goto free_compress_mempool;
return 0;
+free_compress_mempool:
+ f2fs_destroy_compress_mempool();
free_bioset:
f2fs_destroy_bioset();
free_bio_enrty_cache:
@@ -4002,6 +4072,8 @@ free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
free_sysfs:
f2fs_exit_sysfs();
+free_garbage_collection_cache:
+ f2fs_destroy_garbage_collection_cache();
free_extent_cache:
f2fs_destroy_extent_cache();
free_checkpoint_caches:
@@ -4018,6 +4090,7 @@ fail:
static void __exit exit_f2fs_fs(void)
{
+ f2fs_destroy_compress_cache();
f2fs_destroy_compress_mempool();
f2fs_destroy_bioset();
f2fs_destroy_bio_entry_cache();
@@ -4026,6 +4099,7 @@ static void __exit exit_f2fs_fs(void)
unregister_filesystem(&f2fs_fs_type);
unregister_shrinker(&f2fs_shrinker_info);
f2fs_exit_sysfs();
+ f2fs_destroy_garbage_collection_cache();
f2fs_destroy_extent_cache();
f2fs_destroy_checkpoint_caches();
f2fs_destroy_segment_manager_caches();
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 88ed9969cc86..ec77ccfea923 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -176,12 +176,14 @@ static ssize_t encoding_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
#ifdef CONFIG_UNICODE
+ struct super_block *sb = sbi->sb;
+
if (f2fs_sb_has_casefold(sbi))
return snprintf(buf, PAGE_SIZE, "%s (%d.%d.%d)\n",
- sbi->s_encoding->charset,
- (sbi->s_encoding->version >> 16) & 0xff,
- (sbi->s_encoding->version >> 8) & 0xff,
- sbi->s_encoding->version & 0xff);
+ sb->s_encoding->charset,
+ (sb->s_encoding->version >> 16) & 0xff,
+ (sb->s_encoding->version >> 8) & 0xff,
+ sb->s_encoding->version & 0xff);
#endif
return sprintf(buf, "(none)");
}
@@ -375,12 +377,17 @@ out:
return count;
}
if (!strcmp(a->attr.name, "gc_idle")) {
- if (t == GC_IDLE_CB)
+ if (t == GC_IDLE_CB) {
sbi->gc_mode = GC_IDLE_CB;
- else if (t == GC_IDLE_GREEDY)
+ } else if (t == GC_IDLE_GREEDY) {
sbi->gc_mode = GC_IDLE_GREEDY;
- else
+ } else if (t == GC_IDLE_AT) {
+ if (!sbi->am.atgc_enabled)
+ return -EINVAL;
+ sbi->gc_mode = GC_AT;
+ } else {
sbi->gc_mode = GC_NORMAL;
+ }
return count;
}
@@ -968,4 +975,5 @@ void f2fs_unregister_sysfs(struct f2fs_sb_info *sbi)
}
kobject_del(&sbi->s_kobj);
kobject_put(&sbi->s_kobj);
+ wait_for_completion(&sbi->s_kobj_unregister);
}
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 1b0736ce0918..65afcc3cc68a 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -39,7 +39,7 @@ static void xattr_free(struct f2fs_sb_info *sbi, void *xattr_addr,
if (is_inline)
kmem_cache_free(sbi->inline_xattr_slab, xattr_addr);
else
- kvfree(xattr_addr);
+ kfree(xattr_addr);
}
static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
@@ -425,7 +425,7 @@ static int read_all_xattrs(struct inode *inode, struct page *ipage,
*base_addr = txattr_addr;
return 0;
fail:
- kvfree(txattr_addr);
+ kfree(txattr_addr);
return err;
}
@@ -610,7 +610,7 @@ ssize_t f2fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
}
error = buffer_size - rest;
cleanup:
- kvfree(base_addr);
+ kfree(base_addr);
return error;
}
@@ -750,7 +750,7 @@ static int __f2fs_setxattr(struct inode *inode, int index,
if (!error && S_ISDIR(inode->i_mode))
set_sbi_flag(F2FS_I_SB(inode), SBI_NEED_CP);
exit:
- kvfree(base_addr);
+ kfree(base_addr);
return error;
}