From 933439c8f3474e329709b715b43b0b8168bbecf8 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Tue, 11 Oct 2016 22:57:01 +0800
Subject: f2fs: give a chance to detach from dirty list

If there is no dirty pages in inode, we should give a chance to detach
the inode from global dirty list, otherwise it needs to call another
unnecessary .writepages for detaching.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 9ae194fd2fdb..b2289dfab9a2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1785,12 +1785,14 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
 		return;
 
 	if (PageDirty(page)) {
-		if (inode->i_ino == F2FS_META_INO(sbi))
+		if (inode->i_ino == F2FS_META_INO(sbi)) {
 			dec_page_count(sbi, F2FS_DIRTY_META);
-		else if (inode->i_ino == F2FS_NODE_INO(sbi))
+		} else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
 			dec_page_count(sbi, F2FS_DIRTY_NODES);
-		else
+		} else {
 			inode_dec_dirty_pages(inode);
+			remove_dirty_inode(inode);
+		}
 	}
 
 	/* This is atomic written page, keep Private */
-- 
cgit v1.2.3


From 6f2d8ed654bfa391854df4de854953f772a16a9d Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Tue, 11 Oct 2016 22:57:03 +0800
Subject: f2fs: don't miss any f2fs_balance_fs cases

In f2fs_map_blocks, let f2fs_balance_fs detects node page modification
with dn.node_changed to avoid miss some corner cases.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b2289dfab9a2..7fb081516f5f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -676,7 +676,6 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
 	unsigned int ofs_in_node, last_ofs_in_node;
 	blkcnt_t prealloc;
 	struct extent_info ei;
-	bool allocated = false;
 	block_t blkaddr;
 
 	if (!maxblocks)
@@ -735,10 +734,8 @@ next_block:
 				}
 			} else {
 				err = __allocate_data_block(&dn);
-				if (!err) {
+				if (!err)
 					set_inode_flag(inode, FI_APPEND_WRITE);
-					allocated = true;
-				}
 			}
 			if (err)
 				goto sync_out;
@@ -793,7 +790,6 @@ skip:
 		err = reserve_new_blocks(&dn, prealloc);
 		if (err)
 			goto sync_out;
-		allocated = dn.node_changed;
 
 		map->m_len += dn.ofs_in_node - ofs_in_node;
 		if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
@@ -812,9 +808,8 @@ skip:
 
 	if (create) {
 		f2fs_unlock_op(sbi);
-		f2fs_balance_fs(sbi, allocated);
+		f2fs_balance_fs(sbi, dn.node_changed);
 	}
-	allocated = false;
 	goto next_dnode;
 
 sync_out:
@@ -822,7 +817,7 @@ sync_out:
 unlock_out:
 	if (create) {
 		f2fs_unlock_op(sbi);
-		f2fs_balance_fs(sbi, allocated);
+		f2fs_balance_fs(sbi, dn.node_changed);
 	}
 out:
 	trace_f2fs_map_blocks(inode, map, err);
-- 
cgit v1.2.3


From 58736fa60f6ae659ac72da8b1580c308b47e8edd Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Tue, 11 Oct 2016 22:57:04 +0800
Subject: f2fs: be aware of extent beyond EOF in fiemap

f2fs can support fallocating blocks beyond file size without changing the
size, but ->fiemap of f2fs was restricted and can't detect these extents
fallocated past EOF, now relieve the restriction.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 18 ++++--------------
 1 file changed, 4 insertions(+), 14 deletions(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 7fb081516f5f..b544e0b2e7e5 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -886,7 +886,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 	struct buffer_head map_bh;
 	sector_t start_blk, last_blk;
 	pgoff_t next_pgofs;
-	loff_t isize;
 	u64 logical = 0, phys = 0, size = 0;
 	u32 flags = 0;
 	int ret = 0;
@@ -903,13 +902,6 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 
 	inode_lock(inode);
 
-	isize = i_size_read(inode);
-	if (start >= isize)
-		goto out;
-
-	if (start + len > isize)
-		len = isize - start;
-
 	if (logical_to_blk(inode, len) == 0)
 		len = blk_to_logical(inode, 1);
 
@@ -928,13 +920,11 @@ next:
 	/* HOLE */
 	if (!buffer_mapped(&map_bh)) {
 		start_blk = next_pgofs;
-		/* Go through holes util pass the EOF */
-		if (blk_to_logical(inode, start_blk) < isize)
+
+		if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
+					F2FS_I_SB(inode)->max_file_blocks))
 			goto prep_next;
-		/* Found a hole beyond isize means no more extents.
-		 * Note that the premise is that filesystems don't
-		 * punch holes beyond isize and keep size unchanged.
-		 */
+
 		flags |= FIEMAP_EXTENT_LAST;
 	}
 
-- 
cgit v1.2.3


From 664ba972df9b96942191db3068274cc1db899774 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 18 Oct 2016 11:07:45 -0700
Subject: f2fs: use BIO_MAX_PAGES for bio allocation

We don't need to allocate bio partially in order to maximize sequential writes.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b544e0b2e7e5..68edb47f5f71 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -277,10 +277,8 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 		__submit_merged_bio(io);
 alloc_new:
 	if (io->bio == NULL) {
-		int bio_blocks = MAX_BIO_BLOCKS(sbi);
-
 		io->bio = __bio_alloc(sbi, fio->new_blkaddr,
-						bio_blocks, is_read);
+						BIO_MAX_PAGES, is_read);
 		io->fio = *fio;
 	}
 
-- 
cgit v1.2.3


From 230436b3ef3fd7d4a1da19edf5e87bb2d74e0fc2 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 2 Nov 2016 14:52:15 +0100
Subject: f2fs: hide a maybe-uninitialized warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

gcc is unsure about the use of last_ofs_in_node, which might happen
without a prior initialization:

fs/f2fs//git/arm-soc/fs/f2fs/data.c: In function ‘f2fs_map_blocks’:
fs/f2fs/data.c:799:54: warning: ‘last_ofs_in_node’ may be used uninitialized in this function [-Wmaybe-uninitialized]
   if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {

As pointed out by Chao Yu, the code is actually correct as 'prealloc'
is only set if the last_ofs_in_node has been set, the two always
get updated together.

This initializes last_ofs_in_node to dn.ofs_in_node for each
new dnode at the start of the 'next_block' loop, which at that
point is a correct initialization as well. I assume that compilers
that correctly track the contents of the variables and do not
warn about the condition also figure out that they can eliminate
the extra assignment here.

Fixes: 46008c6d4232 ("f2fs: support in batch multi blocks preallocation")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 68edb47f5f71..cf5ec39f907d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -713,7 +713,7 @@ next_dnode:
 	}
 
 	prealloc = 0;
-	ofs_in_node = dn.ofs_in_node;
+	last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
 	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
 
 next_block:
-- 
cgit v1.2.3


From 0bfd7a091c19132489a0f977b8dbf9f6b5ae0a1c Mon Sep 17 00:00:00 2001
From: Damien Le Moal <damien.lemoal@wdc.com>
Date: Fri, 28 Oct 2016 17:45:00 +0900
Subject: f2fs: Use generic zoned block device terminology

SMR stands for "Shingled Magnetic Recording" which makes sense
only for hard disk drives (spinning rust). The ZBC/ZAC standards
enable management of SMR disks, but solid state drives may also
support those standards. So rename the HMSMR feature to BLKZONED
to avoid a HDD centric terminology. For the same reason, rename
f2fs_sb_mounted_hmsmr to f2fs_sb_mounted_blkzoned.

Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index cf5ec39f907d..47ded0c34df1 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -110,7 +110,7 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
 {
 	if (!is_read_io(bio_op(bio))) {
 		atomic_inc(&sbi->nr_wb_bios);
-		if (f2fs_sb_mounted_hmsmr(sbi->sb) &&
+		if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
 			current->plug && (type == DATA || type == NODE))
 			blk_finish_plug(current->plug);
 	}
-- 
cgit v1.2.3


From 6ae1be13e85f4c42c8ca371fda50ae39eebbfd96 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 11 Nov 2016 12:31:40 -0800
Subject: f2fs: revert segment allocation for direct IO

Now we don't need to be too much careful about storage alignment for dio, since
its speed becomes quite fast and we'd better avoid any misalignment first.

Revert: 38aa0889b250 (f2fs: align direct_io'ed data to section)

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 47ded0c34df1..ca53da568633 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -588,7 +588,6 @@ static int __allocate_data_block(struct dnode_of_data *dn)
 	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
 	struct f2fs_summary sum;
 	struct node_info ni;
-	int seg = CURSEG_WARM_DATA;
 	pgoff_t fofs;
 	blkcnt_t count = 1;
 
@@ -606,11 +605,8 @@ alloc:
 	get_node_info(sbi, dn->nid, &ni);
 	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
 
-	if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
-		seg = CURSEG_DIRECT_IO;
-
 	allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
-								&sum, seg);
+						&sum, CURSEG_WARM_DATA);
 	set_data_blkaddr(dn);
 
 	/* update i_size */
-- 
cgit v1.2.3


From e57e9ae5b179a6b243c42bf6d9549d1595c27089 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 11 Nov 2016 12:08:22 -0800
Subject: f2fs: allow dio read for LFS mode

We can allow dio reads for LFS mode, while doing buffered writes for dio writes.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ca53da568633..996b9aef94ce 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1732,7 +1732,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 
 	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
 		return 0;
-	if (test_opt(F2FS_I_SB(inode), LFS))
+	if (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS))
 		return 0;
 
 	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
-- 
cgit v1.2.3


From 3c62be17d4f562f43fe1d03b48194399caa35aa5 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 6 Oct 2016 19:02:05 -0700
Subject: f2fs: support multiple devices

This patch implements multiple devices support for f2fs.
Given multiple devices by mkfs.f2fs, f2fs shows them entirely as one big
volume under one f2fs instance.

Internal block management is very simple, but we will modify block allocation
and background GC policy to boost IO speed by exploiting them accoording to
each device speed.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 48 insertions(+), 7 deletions(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 996b9aef94ce..861173f00459 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -87,6 +87,46 @@ static void f2fs_write_end_io(struct bio *bio)
 	bio_put(bio);
 }
 
+/*
+ * Return true, if pre_bio's bdev is same as its target device.
+ */
+struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
+				block_t blk_addr, struct bio *bio)
+{
+	struct block_device *bdev = sbi->sb->s_bdev;
+	int i;
+
+	for (i = 0; i < sbi->s_ndevs; i++) {
+		if (FDEV(i).start_blk <= blk_addr &&
+					FDEV(i).end_blk >= blk_addr) {
+			blk_addr -= FDEV(i).start_blk;
+			bdev = FDEV(i).bdev;
+			break;
+		}
+	}
+	if (bio) {
+		bio->bi_bdev = bdev;
+		bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+	}
+	return bdev;
+}
+
+int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
+{
+	int i;
+
+	for (i = 0; i < sbi->s_ndevs; i++)
+		if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
+			return i;
+	return 0;
+}
+
+static bool __same_bdev(struct f2fs_sb_info *sbi,
+				block_t blk_addr, struct bio *bio)
+{
+	return f2fs_target_device(sbi, blk_addr, NULL) == bio->bi_bdev;
+}
+
 /*
  * Low-level block read/write IO operations.
  */
@@ -97,8 +137,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
 
 	bio = f2fs_bio_alloc(npages);
 
-	bio->bi_bdev = sbi->sb->s_bdev;
-	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
+	f2fs_target_device(sbi, blk_addr, bio);
 	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
 	bio->bi_private = is_read ? NULL : sbi;
 
@@ -273,7 +312,8 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 	down_write(&io->io_rwsem);
 
 	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
-	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags)))
+	    (io->fio.op != fio->op || io->fio.op_flags != fio->op_flags) ||
+			!__same_bdev(sbi, fio->new_blkaddr, io->bio)))
 		__submit_merged_bio(io);
 alloc_new:
 	if (io->bio == NULL) {
@@ -961,7 +1001,6 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct fscrypt_ctx *ctx = NULL;
-	struct block_device *bdev = sbi->sb->s_bdev;
 	struct bio *bio;
 
 	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
@@ -979,8 +1018,7 @@ static struct bio *f2fs_grab_bio(struct inode *inode, block_t blkaddr,
 			fscrypt_release_ctx(ctx);
 		return ERR_PTR(-ENOMEM);
 	}
-	bio->bi_bdev = bdev;
-	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blkaddr);
+	f2fs_target_device(sbi, blkaddr, bio);
 	bio->bi_end_io = f2fs_read_end_io;
 	bio->bi_private = ctx;
 
@@ -1075,7 +1113,8 @@ got_it:
 		 * This page will go to BIO.  Do we need to send this
 		 * BIO off first?
 		 */
-		if (bio && (last_block_in_bio != block_nr - 1)) {
+		if (bio && (last_block_in_bio != block_nr - 1 ||
+			!__same_bdev(F2FS_I_SB(inode), block_nr, bio))) {
 submit_and_realloc:
 			__submit_bio(F2FS_I_SB(inode), bio, DATA);
 			bio = NULL;
@@ -1734,6 +1773,8 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 		return 0;
 	if (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS))
 		return 0;
+	if (F2FS_I_SB(inode)->s_ndevs)
+		return 0;
 
 	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
 
-- 
cgit v1.2.3


From a7de608691f766cd148971a71d4f13aa1692d4c8 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 11 Nov 2016 16:31:56 -0800
Subject: f2fs: use err for f2fs_preallocate_blocks

This patch has no functional change.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 861173f00459..2938311c97bf 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -658,11 +658,11 @@ alloc:
 	return 0;
 }
 
-ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
+int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
 	struct f2fs_map_blocks map;
-	ssize_t ret = 0;
+	int err = 0;
 
 	map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
 	map.m_len = F2FS_BYTES_TO_BLK(iocb->ki_pos + iov_iter_count(from));
@@ -674,19 +674,19 @@ ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 	map.m_next_pgofs = NULL;
 
 	if (iocb->ki_flags & IOCB_DIRECT) {
-		ret = f2fs_convert_inline_inode(inode);
-		if (ret)
-			return ret;
+		err = f2fs_convert_inline_inode(inode);
+		if (err)
+			return err;
 		return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
 	}
 	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
-		ret = f2fs_convert_inline_inode(inode);
-		if (ret)
-			return ret;
+		err = f2fs_convert_inline_inode(inode);
+		if (err)
+			return err;
 	}
 	if (!f2fs_has_inline_data(inode))
 		return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
-	return ret;
+	return err;
 }
 
 /*
@@ -863,19 +863,19 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
 			pgoff_t *next_pgofs)
 {
 	struct f2fs_map_blocks map;
-	int ret;
+	int err;
 
 	map.m_lblk = iblock;
 	map.m_len = bh->b_size >> inode->i_blkbits;
 	map.m_next_pgofs = next_pgofs;
 
-	ret = f2fs_map_blocks(inode, &map, create, flag);
-	if (!ret) {
+	err = f2fs_map_blocks(inode, &map, create, flag);
+	if (!err) {
 		map_bh(bh, inode->i_sb, map.m_pblk);
 		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
 		bh->b_size = map.m_len << inode->i_blkbits;
 	}
-	return ret;
+	return err;
 }
 
 static int get_data_block(struct inode *inode, sector_t iblock,
-- 
cgit v1.2.3


From c040ff9d69fd1d782fe577ba9e35c1f5798158ae Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 11 Nov 2016 16:46:40 -0800
Subject: f2fs: fix redundant block allocation

In direct_IO path of f2fs_file_write_iter(),
1. f2fs_preallocate_blocks(F2FS_GET_BLOCK_PRE_DIO)
   -> allocate LBA X
2. f2fs_direct_IO()
   -> return 0;

Then,
f2fs_write_data_page() will allocate another LBA X+1.

This makes EIO triggered by HM-SMR.

Reviewed-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 2938311c97bf..59203a3c143e 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -658,6 +658,13 @@ alloc:
 	return 0;
 }
 
+static inline bool __force_buffered_io(struct inode *inode, int rw)
+{
+	return ((f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) ||
+			(rw == WRITE && test_opt(F2FS_I_SB(inode), LFS)) ||
+			F2FS_I_SB(inode)->s_ndevs);
+}
+
 int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 {
 	struct inode *inode = file_inode(iocb->ki_filp);
@@ -677,7 +684,10 @@ int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
 		err = f2fs_convert_inline_inode(inode);
 		if (err)
 			return err;
-		return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
+		return f2fs_map_blocks(inode, &map, 1,
+			__force_buffered_io(inode, WRITE) ?
+				F2FS_GET_BLOCK_PRE_AIO :
+				F2FS_GET_BLOCK_PRE_DIO);
 	}
 	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
 		err = f2fs_convert_inline_inode(inode);
@@ -1769,11 +1779,7 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	if (err)
 		return err;
 
-	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
-		return 0;
-	if (rw == WRITE && test_opt(F2FS_I_SB(inode), LFS))
-		return 0;
-	if (F2FS_I_SB(inode)->s_ndevs)
+	if (__force_buffered_io(inode, rw))
 		return 0;
 
 	trace_f2fs_direct_IO_enter(inode, offset, count, rw);
-- 
cgit v1.2.3


From 36951b38d13ac7cce9fcf89e0e01c22ed0d05688 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Wed, 16 Nov 2016 10:41:20 +0800
Subject: f2fs: don't wait writeback for datas during checkpoint

Normally, while committing checkpoint, we will wait on all pages to be
writebacked no matter the page is data or metadata, so in scenario where
there are lots of data IO being submitted with metadata, we may suffer
long latency for waiting writeback during checkpoint.

Indeed, we only care about persistence for pages with metadata, but not
pages with data, as file system consistent are only related to metadate,
so in order to avoid encountering long latency in above scenario, let's
recognize and reference metadata in submitted IOs, wait writeback only
for metadatas.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 35 +++++++++++++++++++++++++++++------
 1 file changed, 29 insertions(+), 6 deletions(-)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 59203a3c143e..8c5b63bda68b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -29,6 +29,26 @@
 #include "trace.h"
 #include <trace/events/f2fs.h>
 
+static bool __is_cp_guaranteed(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct inode *inode;
+	struct f2fs_sb_info *sbi;
+
+	if (!mapping)
+		return false;
+
+	inode = mapping->host;
+	sbi = F2FS_I_SB(inode);
+
+	if (inode->i_ino == F2FS_META_INO(sbi) ||
+			inode->i_ino ==  F2FS_NODE_INO(sbi) ||
+			S_ISDIR(inode->i_mode) ||
+			is_cold_data(page))
+		return true;
+	return false;
+}
+
 static void f2fs_read_end_io(struct bio *bio)
 {
 	struct bio_vec *bvec;
@@ -71,6 +91,7 @@ static void f2fs_write_end_io(struct bio *bio)
 
 	bio_for_each_segment_all(bvec, bio, i) {
 		struct page *page = bvec->bv_page;
+		enum count_type type = WB_DATA_TYPE(page);
 
 		fscrypt_pullback_bio_page(&page, true);
 
@@ -78,9 +99,11 @@ static void f2fs_write_end_io(struct bio *bio)
 			mapping_set_error(page->mapping, -EIO);
 			f2fs_stop_checkpoint(sbi, true);
 		}
+		dec_page_count(sbi, type);
+		clear_cold_data(page);
 		end_page_writeback(page);
 	}
-	if (atomic_dec_and_test(&sbi->nr_wb_bios) &&
+	if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
 				wq_has_sleeper(&sbi->cp_wait))
 		wake_up(&sbi->cp_wait);
 
@@ -148,7 +171,6 @@ static inline void __submit_bio(struct f2fs_sb_info *sbi,
 				struct bio *bio, enum page_type type)
 {
 	if (!is_read_io(bio_op(bio))) {
-		atomic_inc(&sbi->nr_wb_bios);
 		if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
 			current->plug && (type == DATA || type == NODE))
 			blk_finish_plug(current->plug);
@@ -309,6 +331,11 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
 		verify_block_addr(sbi, fio->old_blkaddr);
 	verify_block_addr(sbi, fio->new_blkaddr);
 
+	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+
+	if (!is_read)
+		inc_page_count(sbi, WB_DATA_TYPE(bio_page));
+
 	down_write(&io->io_rwsem);
 
 	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
@@ -322,8 +349,6 @@ alloc_new:
 		io->fio = *fio;
 	}
 
-	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
-
 	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
 							PAGE_SIZE) {
 		__submit_merged_bio(io);
@@ -1339,7 +1364,6 @@ done:
 	if (err && err != -ENOENT)
 		goto redirty_out;
 
-	clear_cold_data(page);
 out:
 	inode_dec_dirty_pages(inode);
 	if (err)
@@ -1742,7 +1766,6 @@ static int f2fs_write_end(struct file *file,
 		goto unlock_out;
 
 	set_page_dirty(page);
-	clear_cold_data(page);
 
 	if (pos + copied > i_size_read(inode))
 		f2fs_i_size_write(inode, pos + copied);
-- 
cgit v1.2.3


From 0002b61bdaac732bcff364a18f5bd57c95def0a5 Mon Sep 17 00:00:00 2001
From: Chao Yu <yuchao0@huawei.com>
Date: Mon, 28 Nov 2016 19:13:43 -0800
Subject: f2fs: return AOP_WRITEPAGE_ACTIVATE for writepage

We should use AOP_WRITEPAGE_ACTIVATE when we bypass writing pages.

Signed-off-by: Chao Yu <yuchao0@huawei.com>
Signed-off-by: Miao Xie <miaoxie@huawei.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'fs/f2fs/data.c')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 8c5b63bda68b..b90fb010a991 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1384,6 +1384,8 @@ out:
 
 redirty_out:
 	redirty_page_for_writepage(wbc, page);
+	if (!err)
+		return AOP_WRITEPAGE_ACTIVATE;
 	unlock_page(page);
 	return err;
 }
@@ -1479,6 +1481,15 @@ continue_unlock:
 
 			ret = mapping->a_ops->writepage(page, wbc);
 			if (unlikely(ret)) {
+				/*
+				 * keep nr_to_write, since vfs uses this to
+				 * get # of written pages.
+				 */
+				if (ret == AOP_WRITEPAGE_ACTIVATE) {
+					unlock_page(page);
+					ret = 0;
+					continue;
+				}
 				done_index = page->index + 1;
 				done = 1;
 				break;
-- 
cgit v1.2.3