From f8cb556fdbc36855ef884061a1beec6124314c89 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 21 Feb 2014 11:58:54 +0300
Subject: ext3: remove unneeded check in ext3_ordered_writepage()

We already know "ret" is zero so there is no need to do:

		if (!ret)
			ret = err;

We can just assign ret directly instead.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 384b6ebb655f..491f022c476a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1673,12 +1673,9 @@ static int ext3_ordered_writepage(struct page *page,
 	 * block_write_full_page() succeeded.  Otherwise they are unmapped,
 	 * and generally junk.
 	 */
-	if (ret == 0) {
-		err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
+	if (ret == 0)
+		ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
 					NULL, journal_dirty_data_fn);
-		if (!ret)
-			ret = err;
-	}
 	walk_page_buffers(handle, page_bufs, 0,
 			PAGE_CACHE_SIZE, NULL, bput_one);
 	err = ext3_journal_stop(handle);
-- 
cgit v1.2.3


From 99128addc964d4429d1bb9be5fa9e03ce85b1e68 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 28 Feb 2014 09:09:24 +0100
Subject: ext3: Update PF_MEMALLOC handling in ext3_write_inode()

The special handling of PF_MEMALLOC callers in ext3_write_inode()
shouldn't be necessary as there shouldn't be any. Warn about it. Also
update comment before the function as it seems somewhat outdated.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 491f022c476a..2fef98abb207 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3209,21 +3209,20 @@ out_brelse:
  *
  * We are called from a few places:
  *
- * - Within generic_file_write() for O_SYNC files.
+ * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
  *   Here, there will be no transaction running. We wait for any running
  *   transaction to commit.
  *
- * - Within sys_sync(), kupdate and such.
- *   We wait on commit, if tol to.
+ * - Within flush work (for sys_sync(), kupdate and such).
+ *   We wait on commit, if told to.
  *
- * - Within prune_icache() (PF_MEMALLOC == true)
- *   Here we simply return.  We can't afford to block kswapd on the
- *   journal commit.
+ * - Within iput_final() -> write_inode_now()
+ *   We wait on commit, if told to.
  *
  * In all cases it is actually safe for us to return without doing anything,
  * because the inode has been copied into a raw inode buffer in
- * ext3_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
- * knfsd.
+ * ext3_mark_inode_dirty().  This is a correctness thing for WB_SYNC_ALL
+ * writeback.
  *
  * Note that we are absolutely dependent upon all inode dirtiers doing the
  * right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -3235,13 +3234,13 @@ out_brelse:
  *	stuff();
  *	inode->i_size = expr;
  *
- * is in error because a kswapd-driven write_inode() could occur while
- * `stuff()' is running, and the new i_size will be lost.  Plus the inode
- * will no longer be on the superblock's dirty inode list.
+ * is in error because write_inode() could occur while `stuff()' is running,
+ * and the new i_size will be lost.  Plus the inode will no longer be on the
+ * superblock's dirty inode list.
  */
 int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
-	if (current->flags & PF_MEMALLOC)
+	if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
 		return 0;
 
 	if (ext3_journal_current_handle()) {
-- 
cgit v1.2.3


From d680104f3d488ff028f7dd03b0bc055aa5e8ad8d Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 28 Feb 2014 09:31:10 +0100
Subject: ext3: Update outdated comment before ext3_ordered_writepage()

The comment is heavily outdated. The recursion into the filesystem isn't
possible because we use GFP_NOFS for our allocations, the issue about
block_write_full_page() dirtying tail page is long resolved as well
(that function doesn't dirty buffers at all), and finally we don't start
a transaction if all blocks are already allocated and mapped.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 47 ++++-------------------------------------------
 1 file changed, 4 insertions(+), 43 deletions(-)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 2fef98abb207..4ecf88fb69a8 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1559,56 +1559,17 @@ static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
 }
 
 /*
- * Note that we always start a transaction even if we're not journalling
- * data.  This is to preserve ordering: any hole instantiation within
- * __block_write_full_page -> ext3_get_block() should be journalled
- * along with the data so we don't crash and then get metadata which
+ * Note that whenever we need to map blocks we start a transaction even if
+ * we're not journalling data.  This is to preserve ordering: any hole
+ * instantiation within __block_write_full_page -> ext3_get_block() should be
+ * journalled along with the data so we don't crash and then get metadata which
  * refers to old data.
  *
  * In all journalling modes block_write_full_page() will start the I/O.
  *
- * Problem:
- *
- *	ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
- *		ext3_writepage()
- *
- * Similar for:
- *
- *	ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
- *
- * Same applies to ext3_get_block().  We will deadlock on various things like
- * lock_journal and i_truncate_mutex.
- *
- * Setting PF_MEMALLOC here doesn't work - too many internal memory
- * allocations fail.
- *
- * 16May01: If we're reentered then journal_current_handle() will be
- *	    non-zero. We simply *return*.
- *
- * 1 July 2001: @@@ FIXME:
- *   In journalled data mode, a data buffer may be metadata against the
- *   current transaction.  But the same file is part of a shared mapping
- *   and someone does a writepage() on it.
- *
- *   We will move the buffer onto the async_data list, but *after* it has
- *   been dirtied. So there's a small window where we have dirty data on
- *   BJ_Metadata.
- *
- *   Note that this only applies to the last partial page in the file.  The
- *   bit which block_write_full_page() uses prepare/commit for.  (That's
- *   broken code anyway: it's wrong for msync()).
- *
- *   It's a rare case: affects the final partial page, for journalled data
- *   where the file is subject to bith write() and writepage() in the same
- *   transction.  To fix it we'll need a custom block_write_full_page().
- *   We'll probably need that anyway for journalling writepage() output.
- *
  * We don't honour synchronous mounts for writepage().  That would be
  * disastrous.  Any write() or metadata operation will sync the fs for
  * us.
- *
- * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
- * we don't need to open a transaction here.
  */
 static int ext3_ordered_writepage(struct page *page,
 				struct writeback_control *wbc)
-- 
cgit v1.2.3


From 2299432e1950c9ac0aa649d4617374ea24b6f131 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 4 Mar 2014 11:52:16 +0100
Subject: ext3: Speedup WB_SYNC_ALL pass

When doing filesystem wide sync, there's no need to force transaction
commit separately for each inode because ext3_sync_fs() takes care of
forcing commit at the end. Most of the time this slowness doesn't
manifest because previous WB_SYNC_NONE writeback doesn't leave much to
write but when there are processes aggressively creating new files and
several filesystems to sync, the sync slowness can be noticeable. In the
following test script sync(1) takes around 6 minutes when there are two
ext3 filesystems mounted on a standard SATA drive. After this patch sync
is about twice as fast in the default data=ordered mode. For
data=writeback mode we have even bigger speedup.

function run_writers
{
  for (( i = 0; i < 10; i++ )); do
    mkdir $1/dir$i
    for (( j = 0; j < 40000; j++ )); do
      dd if=/dev/zero of=$1/dir$i/$j bs=4k count=4 &>/dev/null
    done &
  done
}

for dir in "$@"; do
  run_writers $dir
done

sleep 40
time sync

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 4ecf88fb69a8..ddf5c21cffbc 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3210,7 +3210,12 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
 		return -EIO;
 	}
 
-	if (wbc->sync_mode != WB_SYNC_ALL)
+	/*
+	 * No need to force transaction in WB_SYNC_NONE mode. Also
+	 * ext3_sync_fs() will force the commit after everything is
+	 * written.
+	 */
+	if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
 		return 0;
 
 	return ext3_force_commit(inode->i_sb);
-- 
cgit v1.2.3


From f294d3e7be28c43205f41eb0fff97393105d6d2c Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Mon, 17 Mar 2014 14:13:00 -0500
Subject: ext3: explicitly remove inode from orphan list after failed direct io

Otherwise non-empty orphan list will be triggered on umount.

This is just an application of commit da1daf by Dmitry Monakhov
to the same code in ext3.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext3/inode.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/ext3/inode.c')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ddf5c21cffbc..77042a2e017c 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1883,6 +1883,8 @@ retry:
 			 * and pretend the write failed... */
 			ext3_truncate_failed_direct_write(inode);
 			ret = PTR_ERR(handle);
+			if (inode->i_nlink)
+				ext3_orphan_del(NULL, inode);
 			goto out;
 		}
 		if (inode->i_nlink)
-- 
cgit v1.2.3