From 0c2022eccb01630c037f2024531e9ff1afbe1564 Mon Sep 17 00:00:00 2001
From: Yongqiang Yang <xiaoqiangnk@gmail.com>
Date: Mon, 20 Feb 2012 17:53:02 -0500
Subject: jbd2: allocate transaction from separate slab cache

There is normally only a handful of these active at any one time, but
putting them in a separate slab cache makes debugging memory
corruption problems easier.  Manish Katiyar also wanted this make it
easier to test memory failure scenarios in the jbd2 layer.

Cc: Manish Katiyar <mkatiyar@gmail.com>
Signed-off-by: Yongqiang Yang <xiaoqiangnk@gmail.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/jbd2/commit.c')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 5069b8475150..8adc5d460f56 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1048,7 +1048,7 @@ restart_loop:
 	jbd_debug(1, "JBD2: commit %d complete, head %d\n",
 		  journal->j_commit_sequence, journal->j_tail_sequence);
 	if (to_free)
-		kfree(commit_transaction);
+		jbd2_journal_free_transaction(commit_transaction);
 
 	wake_up(&journal->j_wait_done_commit);
 }
-- 
cgit v1.2.3


From 24bcc89c7e7c64982e6192b4952a0a92379fc341 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 Mar 2012 15:41:04 -0400
Subject: jbd2: split updating of journal superblock and marking journal empty

There are three case of updating journal superblock. In the first case, we want
to mark journal as empty (setting s_sequence to 0), in the second case we want
to update log tail, in the third case we want to update s_errno. Split these
cases into separate functions. It makes the code slightly more straightforward
and later patches will make the distinction even more important.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/jbd2/commit.c')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 8adc5d460f56..19371a8a9015 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -340,7 +340,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	/* Do we need to erase the effects of a prior jbd2_journal_flush? */
 	if (journal->j_flags & JBD2_FLUSHED) {
 		jbd_debug(3, "super block updated\n");
-		jbd2_journal_update_superblock(journal, 1);
+		jbd2_journal_update_sb_log_tail(journal);
 	} else {
 		jbd_debug(3, "superblock not updated\n");
 	}
-- 
cgit v1.2.3


From a78bb11d7acd525623c6a0c2ff4e213d527573fa Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 Mar 2012 15:43:04 -0400
Subject: jbd2: protect all log tail updates with j_checkpoint_mutex

There are some log tail updates that are not protected by j_checkpoint_mutex.
Some of these are harmless because they happen during startup or shutdown but
updates in jbd2_journal_commit_transaction() and jbd2_journal_flush() can
really race with other log tail updates (e.g. someone doing
jbd2_journal_flush() with someone running jbd2_cleanup_journal_tail()). So
protect all log tail updates with j_checkpoint_mutex.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/jbd2/commit.c')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 19371a8a9015..6705717d9b7f 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -340,7 +340,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	/* Do we need to erase the effects of a prior jbd2_journal_flush? */
 	if (journal->j_flags & JBD2_FLUSHED) {
 		jbd_debug(3, "super block updated\n");
+		mutex_lock(&journal->j_checkpoint_mutex);
 		jbd2_journal_update_sb_log_tail(journal);
+		mutex_unlock(&journal->j_checkpoint_mutex);
 	} else {
 		jbd_debug(3, "superblock not updated\n");
 	}
-- 
cgit v1.2.3


From 79feb521a44705262d15cc819a4117a447b11ea7 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 Mar 2012 22:22:54 -0400
Subject: jbd2: issue cache flush after checkpointing even with internal
 journal

When we reach jbd2_cleanup_journal_tail(), there is no guarantee that
checkpointed buffers are on a stable storage - especially if buffers were
written out by jbd2_log_do_checkpoint(), they are likely to be only in disk's
caches. Thus when we update journal superblock effectively removing old
transaction from journal, this write of superblock can get to stable storage
before those checkpointed buffers which can result in filesystem corruption
after a crash. Thus we must unconditionally issue a cache flush before we
update journal superblock in these cases.

A similar problem can also occur if journal superblock is written only in
disk's caches, other transaction starts reusing space of the transaction
cleaned from the log and power failure happens. Subsequent journal replay would
still try to replay the old transaction but some of it's blocks may be already
overwritten by the new transaction. For this reason we must use WRITE_FUA when
updating log tail and we must first write new log tail to disk and update
in-memory information only after that.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'fs/jbd2/commit.c')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6705717d9b7f..b89ef84786a7 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -341,7 +341,16 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	if (journal->j_flags & JBD2_FLUSHED) {
 		jbd_debug(3, "super block updated\n");
 		mutex_lock(&journal->j_checkpoint_mutex);
-		jbd2_journal_update_sb_log_tail(journal);
+		/*
+		 * We hold j_checkpoint_mutex so tail cannot change under us.
+		 * We don't need any special data guarantees for writing sb
+		 * since journal is empty and it is ok for write to be
+		 * flushed only with transaction commit.
+		 */
+		jbd2_journal_update_sb_log_tail(journal,
+						journal->j_tail_sequence,
+						journal->j_tail,
+						WRITE_SYNC);
 		mutex_unlock(&journal->j_checkpoint_mutex);
 	} else {
 		jbd_debug(3, "superblock not updated\n");
-- 
cgit v1.2.3


From 3339578f05787259917788f461f4196b7349c2a4 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 13 Mar 2012 22:45:38 -0400
Subject: jbd2: cleanup journal tail after transaction commit

Normally, we have to issue a cache flush before we can update journal tail in
journal superblock, effectively wiping out old transactions from the journal.
So use the fact that during transaction commit we issue cache flush anyway and
opportunistically push journal tail as far as we can. Since update of journal
superblock is still costly (we have to use WRITE_FUA), we update log tail only
if we can free significant amount of space.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

(limited to 'fs/jbd2/commit.c')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b89ef84786a7..1dfcb207ea69 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -331,6 +331,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	struct buffer_head *cbh = NULL; /* For transactional checksums */
 	__u32 crc32_sum = ~0;
 	struct blk_plug plug;
+	/* Tail of the journal */
+	unsigned long first_block;
+	tid_t first_tid;
+	int update_tail;
 
 	/*
 	 * First job: lock down the current transaction and wait for
@@ -688,10 +692,30 @@ start_journal_io:
 		err = 0;
 	}
 
+	/*
+	 * Get current oldest transaction in the log before we issue flush
+	 * to the filesystem device. After the flush we can be sure that
+	 * blocks of all older transactions are checkpointed to persistent
+	 * storage and we will be safe to update journal start in the
+	 * superblock with the numbers we get here.
+	 */
+	update_tail =
+		jbd2_journal_get_log_tail(journal, &first_tid, &first_block);
+
 	write_lock(&journal->j_state_lock);
+	if (update_tail) {
+		long freed = first_block - journal->j_tail;
+
+		if (first_block < journal->j_tail)
+			freed += journal->j_last - journal->j_first;
+		/* Update tail only if we free significant amount of space */
+		if (freed < journal->j_maxlen / 4)
+			update_tail = 0;
+	}
 	J_ASSERT(commit_transaction->t_state == T_COMMIT);
 	commit_transaction->t_state = T_COMMIT_DFLUSH;
 	write_unlock(&journal->j_state_lock);
+
 	/* 
 	 * If the journal is not located on the file system device,
 	 * then we must flush the file system device before we issue
@@ -842,6 +866,14 @@ wait_for_iobuf:
 	if (err)
 		jbd2_journal_abort(journal, err);
 
+	/*
+	 * Now disk caches for filesystem device are flushed so we are safe to
+	 * erase checkpointed transactions from the log by updating journal
+	 * superblock.
+	 */
+	if (update_tail)
+		jbd2_update_log_tail(journal, first_tid, first_block);
+
 	/* End of a transaction!  Finally, we can do checkpoint
            processing: any buffers committed as a result of this
            transaction can be removed from any checkpoint list it was on
-- 
cgit v1.2.3