summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log_cil.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_cil.c')
-rw-r--r--fs/xfs/xfs_log_cil.c138
1 files changed, 104 insertions, 34 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index b0ef071b3cb5..b128aaa9b870 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -656,6 +656,8 @@ xlog_cil_push_work(
struct xfs_log_vec lvhdr = { NULL };
xfs_lsn_t commit_lsn;
xfs_lsn_t push_seq;
+ struct bio bio;
+ DECLARE_COMPLETION_ONSTACK(bdev_flush);
new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_NOFS);
new_ctx->ticket = xlog_cil_ticket_alloc(log);
@@ -668,9 +670,14 @@ xlog_cil_push_work(
ASSERT(push_seq <= ctx->sequence);
/*
- * Wake up any background push waiters now this context is being pushed.
+ * As we are about to switch to a new, empty CIL context, we no longer
+ * need to throttle tasks on CIL space overruns. Wake any waiters that
+ * the hard push throttle may have caught so they can start committing
+ * to the new context. The ctx->xc_push_lock provides the serialisation
+ * necessary for safely using the lockless waitqueue_active() check in
+ * this context.
*/
- if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log))
+ if (waitqueue_active(&cil->xc_push_wait))
wake_up_all(&cil->xc_push_wait);
/*
@@ -719,10 +726,19 @@ xlog_cil_push_work(
spin_unlock(&cil->xc_push_lock);
/*
- * pull all the log vectors off the items in the CIL, and
- * remove the items from the CIL. We don't need the CIL lock
- * here because it's only needed on the transaction commit
- * side which is currently locked out by the flush lock.
+ * The CIL is stable at this point - nothing new will be added to it
+ * because we hold the flush lock exclusively. Hence we can now issue
+ * a cache flush to ensure all the completed metadata in the journal we
+ * are about to overwrite is on stable storage.
+ */
+ xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev,
+ &bdev_flush);
+
+ /*
+ * Pull all the log vectors off the items in the CIL, and remove the
+ * items from the CIL. We don't need the CIL lock here because it's only
+ * needed on the transaction commit side which is currently locked out
+ * by the flush lock.
*/
lv = NULL;
num_iovecs = 0;
@@ -772,7 +788,7 @@ xlog_cil_push_work(
* that higher sequences will wait for us to write out a commit record
* before they do.
*
- * xfs_log_force_lsn requires us to mirror the new sequence into the cil
+ * xfs_log_force_seq requires us to mirror the new sequence into the cil
* structure atomically with the addition of this sequence to the
* committing list. This also ensures that we can do unlocked checks
* against the current sequence in log forces without risking
@@ -806,7 +822,14 @@ xlog_cil_push_work(
lvhdr.lv_iovecp = &lhdr;
lvhdr.lv_next = ctx->lv_chain;
- error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0, true);
+ /*
+ * Before we format and submit the first iclog, we have to ensure that
+ * the metadata writeback ordering cache flush is complete.
+ */
+ wait_for_completion(&bdev_flush);
+
+ error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL,
+ XLOG_START_TRANS);
if (error)
goto out_abort_free_ticket;
@@ -850,15 +873,21 @@ restart:
xfs_log_ticket_ungrant(log, tic);
- spin_lock(&commit_iclog->ic_callback_lock);
+ /*
+ * Once we attach the ctx to the iclog, a shutdown can process the
+ * iclog, run the callbacks and free the ctx. The only thing preventing
+ * this potential UAF situation here is that we are holding the
+ * icloglock. Hence we cannot access the ctx once we have attached the
+ * callbacks and dropped the icloglock.
+ */
+ spin_lock(&log->l_icloglock);
if (commit_iclog->ic_state == XLOG_STATE_IOERROR) {
- spin_unlock(&commit_iclog->ic_callback_lock);
+ spin_unlock(&log->l_icloglock);
goto out_abort;
}
ASSERT_ALWAYS(commit_iclog->ic_state == XLOG_STATE_ACTIVE ||
commit_iclog->ic_state == XLOG_STATE_WANT_SYNC);
list_add_tail(&ctx->iclog_entry, &commit_iclog->ic_callbacks);
- spin_unlock(&commit_iclog->ic_callback_lock);
/*
* now the checkpoint commit is complete and we've attached the
@@ -870,8 +899,50 @@ restart:
wake_up_all(&cil->xc_commit_wait);
spin_unlock(&cil->xc_push_lock);
- /* release the hounds! */
- xfs_log_release_iclog(commit_iclog);
+ /*
+ * If the checkpoint spans multiple iclogs, wait for all previous iclogs
+ * to complete before we submit the commit_iclog. We can't use state
+ * checks for this - ACTIVE can be either a past completed iclog or a
+ * future iclog being filled, while WANT_SYNC through SYNC_DONE can be a
+ * past or future iclog awaiting IO or ordered IO completion to be run.
+ * In the latter case, if it's a future iclog and we wait on it, the we
+ * will hang because it won't get processed through to ic_force_wait
+ * wakeup until this commit_iclog is written to disk. Hence we use the
+ * iclog header lsn and compare it to the commit lsn to determine if we
+ * need to wait on iclogs or not.
+ *
+ * NOTE: It is not safe to reference the ctx after this check as we drop
+ * the icloglock if we have to wait for completion of other iclogs.
+ */
+ if (ctx->start_lsn != commit_lsn) {
+ xfs_lsn_t plsn;
+
+ plsn = be64_to_cpu(commit_iclog->ic_prev->ic_header.h_lsn);
+ if (plsn && XFS_LSN_CMP(plsn, commit_lsn) < 0) {
+ /*
+ * Waiting on ic_force_wait orders the completion of
+ * iclogs older than ic_prev. Hence we only need to wait
+ * on the most recent older iclog here.
+ */
+ xlog_wait_on_iclog(commit_iclog->ic_prev);
+ spin_lock(&log->l_icloglock);
+ }
+
+ /*
+ * We need to issue a pre-flush so that the ordering for this
+ * checkpoint is correctly preserved down to stable storage.
+ */
+ commit_iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
+ }
+
+ /*
+ * The commit iclog must be written to stable storage to guarantee
+ * journal IO vs metadata writeback IO is correctly ordered on stable
+ * storage.
+ */
+ commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA;
+ xlog_state_release_iclog(log, commit_iclog);
+ spin_unlock(&log->l_icloglock);
return;
out_skip:
@@ -907,7 +978,7 @@ xlog_cil_push_background(
ASSERT(!list_empty(&cil->xc_cil));
/*
- * don't do a background push if we haven't used up all the
+ * Don't do a background push if we haven't used up all the
* space available yet.
*/
if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) {
@@ -931,9 +1002,16 @@ xlog_cil_push_background(
/*
* If we are well over the space limit, throttle the work that is being
- * done until the push work on this context has begun.
+ * done until the push work on this context has begun. Enforce the hard
+ * throttle on all transaction commits once it has been activated, even
+ * if the committing transactions have resulted in the space usage
+ * dipping back down under the hard limit.
+ *
+ * The ctx->xc_push_lock provides the serialisation necessary for safely
+ * using the lockless waitqueue_active() check in this context.
*/
- if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
+ if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log) ||
+ waitqueue_active(&cil->xc_push_wait)) {
trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
ASSERT(cil->xc_ctx->space_used < log->l_logsize);
xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock);
@@ -1008,16 +1086,14 @@ xlog_cil_empty(
* allowed again.
*/
void
-xfs_log_commit_cil(
- struct xfs_mount *mp,
+xlog_cil_commit(
+ struct xlog *log,
struct xfs_trans *tp,
- xfs_lsn_t *commit_lsn,
+ xfs_csn_t *commit_seq,
bool regrant)
{
- struct xlog *log = mp->m_log;
struct xfs_cil *cil = log->l_cilp;
struct xfs_log_item *lip, *next;
- xfs_lsn_t xc_commit_lsn;
/*
* Do all necessary memory allocation before we lock the CIL.
@@ -1031,10 +1107,6 @@ xfs_log_commit_cil(
xlog_cil_insert_items(log, tp);
- xc_commit_lsn = cil->xc_ctx->sequence;
- if (commit_lsn)
- *commit_lsn = xc_commit_lsn;
-
if (regrant && !XLOG_FORCED_SHUTDOWN(log))
xfs_log_ticket_regrant(log, tp->t_ticket);
else
@@ -1057,8 +1129,10 @@ xfs_log_commit_cil(
list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
xfs_trans_del_item(lip);
if (lip->li_ops->iop_committing)
- lip->li_ops->iop_committing(lip, xc_commit_lsn);
+ lip->li_ops->iop_committing(lip, cil->xc_ctx->sequence);
}
+ if (commit_seq)
+ *commit_seq = cil->xc_ctx->sequence;
/* xlog_cil_push_background() releases cil->xc_ctx_lock */
xlog_cil_push_background(log);
@@ -1075,9 +1149,9 @@ xfs_log_commit_cil(
* iclog flush is necessary following this call.
*/
xfs_lsn_t
-xlog_cil_force_lsn(
+xlog_cil_force_seq(
struct xlog *log,
- xfs_lsn_t sequence)
+ xfs_csn_t sequence)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_cil_ctx *ctx;
@@ -1173,21 +1247,17 @@ bool
xfs_log_item_in_current_chkpt(
struct xfs_log_item *lip)
{
- struct xfs_cil_ctx *ctx;
+ struct xfs_cil_ctx *ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;
if (list_empty(&lip->li_cil))
return false;
- ctx = lip->li_mountp->m_log->l_cilp->xc_ctx;
-
/*
* li_seq is written on the first commit of a log item to record the
* first checkpoint it is written to. Hence if it is different to the
* current sequence, we're in a new checkpoint.
*/
- if (XFS_LSN_CMP(lip->li_seq, ctx->sequence) != 0)
- return false;
- return true;
+ return lip->li_seq == ctx->sequence;
}
/*