From 0c2022eccb01630c037f2024531e9ff1afbe1564 Mon Sep 17 00:00:00 2001 From: Yongqiang Yang Date: Mon, 20 Feb 2012 17:53:02 -0500 Subject: jbd2: allocate transaction from separate slab cache There is normally only a handful of these active at any one time, but putting them in a separate slab cache makes debugging memory corruption problems easier. Manish Katiyar also wanted this make it easier to test memory failure scenarios in the jbd2 layer. Cc: Manish Katiyar Signed-off-by: Yongqiang Yang Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 5069b8475150..8adc5d460f56 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -1048,7 +1048,7 @@ restart_loop: jbd_debug(1, "JBD2: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); if (to_free) - kfree(commit_transaction); + jbd2_journal_free_transaction(commit_transaction); wake_up(&journal->j_wait_done_commit); } -- cgit v1.2.3 From 24bcc89c7e7c64982e6192b4952a0a92379fc341 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 Mar 2012 15:41:04 -0400 Subject: jbd2: split updating of journal superblock and marking journal empty There are three case of updating journal superblock. In the first case, we want to mark journal as empty (setting s_sequence to 0), in the second case we want to update log tail, in the third case we want to update s_errno. Split these cases into separate functions. It makes the code slightly more straightforward and later patches will make the distinction even more important. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 8adc5d460f56..19371a8a9015 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -340,7 +340,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) /* Do we need to erase the effects of a prior jbd2_journal_flush? */ if (journal->j_flags & JBD2_FLUSHED) { jbd_debug(3, "super block updated\n"); - jbd2_journal_update_superblock(journal, 1); + jbd2_journal_update_sb_log_tail(journal); } else { jbd_debug(3, "superblock not updated\n"); } -- cgit v1.2.3 From a78bb11d7acd525623c6a0c2ff4e213d527573fa Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 Mar 2012 15:43:04 -0400 Subject: jbd2: protect all log tail updates with j_checkpoint_mutex There are some log tail updates that are not protected by j_checkpoint_mutex. Some of these are harmless because they happen during startup or shutdown but updates in jbd2_journal_commit_transaction() and jbd2_journal_flush() can really race with other log tail updates (e.g. someone doing jbd2_journal_flush() with someone running jbd2_cleanup_journal_tail()). So protect all log tail updates with j_checkpoint_mutex. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 19371a8a9015..6705717d9b7f 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -340,7 +340,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) /* Do we need to erase the effects of a prior jbd2_journal_flush? */ if (journal->j_flags & JBD2_FLUSHED) { jbd_debug(3, "super block updated\n"); + mutex_lock(&journal->j_checkpoint_mutex); jbd2_journal_update_sb_log_tail(journal); + mutex_unlock(&journal->j_checkpoint_mutex); } else { jbd_debug(3, "superblock not updated\n"); } -- cgit v1.2.3 From 79feb521a44705262d15cc819a4117a447b11ea7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 Mar 2012 22:22:54 -0400 Subject: jbd2: issue cache flush after checkpointing even with internal journal When we reach jbd2_cleanup_journal_tail(), there is no guarantee that checkpointed buffers are on a stable storage - especially if buffers were written out by jbd2_log_do_checkpoint(), they are likely to be only in disk's caches. Thus when we update journal superblock effectively removing old transaction from journal, this write of superblock can get to stable storage before those checkpointed buffers which can result in filesystem corruption after a crash. Thus we must unconditionally issue a cache flush before we update journal superblock in these cases. A similar problem can also occur if journal superblock is written only in disk's caches, other transaction starts reusing space of the transaction cleaned from the log and power failure happens. Subsequent journal replay would still try to replay the old transaction but some of it's blocks may be already overwritten by the new transaction. For this reason we must use WRITE_FUA when updating log tail and we must first write new log tail to disk and update in-memory information only after that. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 6705717d9b7f..b89ef84786a7 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -341,7 +341,16 @@ void jbd2_journal_commit_transaction(journal_t *journal) if (journal->j_flags & JBD2_FLUSHED) { jbd_debug(3, "super block updated\n"); mutex_lock(&journal->j_checkpoint_mutex); - jbd2_journal_update_sb_log_tail(journal); + /* + * We hold j_checkpoint_mutex so tail cannot change under us. + * We don't need any special data guarantees for writing sb + * since journal is empty and it is ok for write to be + * flushed only with transaction commit. + */ + jbd2_journal_update_sb_log_tail(journal, + journal->j_tail_sequence, + journal->j_tail, + WRITE_SYNC); mutex_unlock(&journal->j_checkpoint_mutex); } else { jbd_debug(3, "superblock not updated\n"); -- cgit v1.2.3 From 3339578f05787259917788f461f4196b7349c2a4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 13 Mar 2012 22:45:38 -0400 Subject: jbd2: cleanup journal tail after transaction commit Normally, we have to issue a cache flush before we can update journal tail in journal superblock, effectively wiping out old transactions from the journal. So use the fact that during transaction commit we issue cache flush anyway and opportunistically push journal tail as far as we can. Since update of journal superblock is still costly (we have to use WRITE_FUA), we update log tail only if we can free significant amount of space. Signed-off-by: Jan Kara Signed-off-by: "Theodore Ts'o" --- fs/jbd2/commit.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'fs/jbd2/commit.c') diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index b89ef84786a7..1dfcb207ea69 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -331,6 +331,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; struct blk_plug plug; + /* Tail of the journal */ + unsigned long first_block; + tid_t first_tid; + int update_tail; /* * First job: lock down the current transaction and wait for @@ -688,10 +692,30 @@ start_journal_io: err = 0; } + /* + * Get current oldest transaction in the log before we issue flush + * to the filesystem device. After the flush we can be sure that + * blocks of all older transactions are checkpointed to persistent + * storage and we will be safe to update journal start in the + * superblock with the numbers we get here. + */ + update_tail = + jbd2_journal_get_log_tail(journal, &first_tid, &first_block); + write_lock(&journal->j_state_lock); + if (update_tail) { + long freed = first_block - journal->j_tail; + + if (first_block < journal->j_tail) + freed += journal->j_last - journal->j_first; + /* Update tail only if we free significant amount of space */ + if (freed < journal->j_maxlen / 4) + update_tail = 0; + } J_ASSERT(commit_transaction->t_state == T_COMMIT); commit_transaction->t_state = T_COMMIT_DFLUSH; write_unlock(&journal->j_state_lock); + /* * If the journal is not located on the file system device, * then we must flush the file system device before we issue @@ -842,6 +866,14 @@ wait_for_iobuf: if (err) jbd2_journal_abort(journal, err); + /* + * Now disk caches for filesystem device are flushed so we are safe to + * erase checkpointed transactions from the log by updating journal + * superblock. + */ + if (update_tail) + jbd2_update_log_tail(journal, first_tid, first_block); + /* End of a transaction! Finally, we can do checkpoint processing: any buffers committed as a result of this transaction can be removed from any checkpoint list it was on -- cgit v1.2.3