summaryrefslogtreecommitdiffstats
path: root/fs/xfs/xfs_log_recover.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-06 09:50:36 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-06 09:50:36 -0400
commit0cbbc422d56668528f6efd1234fe908010284082 (patch)
treed4bebf90c29044b4a6180053fc18f9e927361012 /fs/xfs/xfs_log_recover.c
parent835c92d43b29eb354abdbd5475308a474d7efdfa (diff)
parent3481b68285238054be519ad0c8cad5cc2425e26c (diff)
downloadlinux-0cbbc422d56668528f6efd1234fe908010284082.tar.gz
linux-0cbbc422d56668528f6efd1234fe908010284082.tar.bz2
linux-0cbbc422d56668528f6efd1234fe908010284082.zip
Merge tag 'xfs-rmap-for-linus-4.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs
Pull more xfs updates from Dave Chinner: "This is the second part of the XFS updates for this merge cycle, and contains the new reverse block mapping feature for XFS. Reverse mapping allows us to track the owner of a specific block on disk precisely. It is implemented as a set of btrees (one per allocation group) that track the owners of allocated extents. Effectively it is a "used space tree" that is updated when we allocate or free extents. i.e. it is coherent with the free space btrees we already maintain and never overlaps with them. This reverse mapping infrastructure is the building block of several upcoming features - reflink, copy-on-write data, dedupe, online metadata and data scrubbing, highly accurate bad sector/data loss reporting to users, and significantly improved reconstruction of damaged and corrupted filesystems. There's a lot of new stuff coming along in the next couple of cycles,a nd it all builds in the rmap infrastructure. As such, it's a huge chunk of new code with new on-disk format features and internal infrastructure. It warns at mount time as an experimental feature and that it may eat data (as we do with all new on-disk features until they stabilise). We have not released userspace suport for it yet - userspace support currently requires download from Darrick's xfsprogs repo and build from source, so the access to this feature is really developer/tester only at this point. Initial userspace support will be released at the same time kernel with this code in it is released. The new rmap enabled code regresses 3 xfstests - all are ENOSPC related corner cases, one of which Darrick posted a fix for a few hours ago. The other two are fixed by infrastructure that is part of the upcoming reflink patchset. This new ENOSPC infrastructure requires a on-disk format tweak required to keep mount times in check - we need to keep an on-disk count of allocated rmapbt blocks so we don't have to scan the entire btrees at mount time to count them. This is currently being tested and will be part of the fixes sent in the next week or two so users will not be exposed to this change" * tag 'xfs-rmap-for-linus-4.8-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/dgc/linux-xfs: (52 commits) xfs: move (and rename) the deferred bmap-free tracepoints xfs: collapse single use static functions xfs: remove unnecessary parentheses from log redo item recovery functions xfs: remove the extents array from the rmap update done log item xfs: in btree_lshift, only allocate temporary cursor when needed xfs: remove unnecesary lshift/rshift key initialization xfs: remove the get*keys and update_keys btree ops pointers xfs: enable the rmap btree functionality xfs: don't update rmapbt when fixing agfl xfs: disable XFS_IOC_SWAPEXT when rmap btree is enabled xfs: add rmap btree block detection to log recovery xfs: add rmap btree geometry feature flag xfs: propagate bmap updates to rmapbt xfs: enable the xfs_defer mechanism to process rmaps to update xfs: log rmap intent items xfs: create rmap update intent log items xfs: add rmap btree insert and delete helpers xfs: convert unwritten status of reverse mappings xfs: remove an extent from the rmap btree xfs: add an extent to the rmap btree ...
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r--fs/xfs/xfs_log_recover.c336
1 files changed, 239 insertions, 97 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 835997843846..e8638fd2c0c3 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -43,6 +43,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_error.h"
#include "xfs_dir2.h"
+#include "xfs_rmap_item.h"
#define BLK_AVG(blk1, blk2) ((blk1+blk2) >> 1)
@@ -1911,6 +1912,8 @@ xlog_recover_reorder_trans(
case XFS_LI_QUOTAOFF:
case XFS_LI_EFD:
case XFS_LI_EFI:
+ case XFS_LI_RUI:
+ case XFS_LI_RUD:
trace_xfs_log_recover_item_reorder_tail(log,
trans, item, pass);
list_move_tail(&item->ri_list, &inode_list);
@@ -2228,6 +2231,7 @@ xlog_recover_get_buf_lsn(
case XFS_ABTC_CRC_MAGIC:
case XFS_ABTB_MAGIC:
case XFS_ABTC_MAGIC:
+ case XFS_RMAP_CRC_MAGIC:
case XFS_IBT_CRC_MAGIC:
case XFS_IBT_MAGIC: {
struct xfs_btree_block *btb = blk;
@@ -2396,6 +2400,9 @@ xlog_recover_validate_buf_type(
case XFS_BMAP_MAGIC:
bp->b_ops = &xfs_bmbt_buf_ops;
break;
+ case XFS_RMAP_CRC_MAGIC:
+ bp->b_ops = &xfs_rmapbt_buf_ops;
+ break;
default:
xfs_warn(mp, "Bad btree block magic!");
ASSERT(0);
@@ -3415,6 +3422,99 @@ xlog_recover_efd_pass2(
}
/*
+ * This routine is called to create an in-core extent rmap update
+ * item from the rui format structure which was logged on disk.
+ * It allocates an in-core rui, copies the extents from the format
+ * structure into it, and adds the rui to the AIL with the given
+ * LSN.
+ */
+STATIC int
+xlog_recover_rui_pass2(
+ struct xlog *log,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ int error;
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_rui_log_item *ruip;
+ struct xfs_rui_log_format *rui_formatp;
+
+ rui_formatp = item->ri_buf[0].i_addr;
+
+ ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
+ error = xfs_rui_copy_format(&item->ri_buf[0], &ruip->rui_format);
+ if (error) {
+ xfs_rui_item_free(ruip);
+ return error;
+ }
+ atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
+
+ spin_lock(&log->l_ailp->xa_lock);
+ /*
+ * The RUI has two references. One for the RUD and one for RUI to ensure
+ * it makes it into the AIL. Insert the RUI into the AIL directly and
+ * drop the RUI reference. Note that xfs_trans_ail_update() drops the
+ * AIL lock.
+ */
+ xfs_trans_ail_update(log->l_ailp, &ruip->rui_item, lsn);
+ xfs_rui_release(ruip);
+ return 0;
+}
+
+
+/*
+ * This routine is called when an RUD format structure is found in a committed
+ * transaction in the log. Its purpose is to cancel the corresponding RUI if it
+ * was still in the log. To do this it searches the AIL for the RUI with an id
+ * equal to that in the RUD format structure. If we find it we drop the RUD
+ * reference, which removes the RUI from the AIL and frees it.
+ */
+STATIC int
+xlog_recover_rud_pass2(
+ struct xlog *log,
+ struct xlog_recover_item *item)
+{
+ struct xfs_rud_log_format *rud_formatp;
+ struct xfs_rui_log_item *ruip = NULL;
+ struct xfs_log_item *lip;
+ __uint64_t rui_id;
+ struct xfs_ail_cursor cur;
+ struct xfs_ail *ailp = log->l_ailp;
+
+ rud_formatp = item->ri_buf[0].i_addr;
+ ASSERT(item->ri_buf[0].i_len == sizeof(struct xfs_rud_log_format));
+ rui_id = rud_formatp->rud_rui_id;
+
+ /*
+ * Search for the RUI with the id in the RUD format structure in the
+ * AIL.
+ */
+ spin_lock(&ailp->xa_lock);
+ lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+ while (lip != NULL) {
+ if (lip->li_type == XFS_LI_RUI) {
+ ruip = (struct xfs_rui_log_item *)lip;
+ if (ruip->rui_format.rui_id == rui_id) {
+ /*
+ * Drop the RUD reference to the RUI. This
+ * removes the RUI from the AIL and frees it.
+ */
+ spin_unlock(&ailp->xa_lock);
+ xfs_rui_release(ruip);
+ spin_lock(&ailp->xa_lock);
+ break;
+ }
+ }
+ lip = xfs_trans_ail_cursor_next(ailp, &cur);
+ }
+
+ xfs_trans_ail_cursor_done(&cur);
+ spin_unlock(&ailp->xa_lock);
+
+ return 0;
+}
+
+/*
* This routine is called when an inode create format structure is found in a
* committed transaction in the log. It's purpose is to initialise the inodes
* being allocated on disk. This requires us to get inode cluster buffers that
@@ -3639,6 +3739,8 @@ xlog_recover_ra_pass2(
case XFS_LI_EFI:
case XFS_LI_EFD:
case XFS_LI_QUOTAOFF:
+ case XFS_LI_RUI:
+ case XFS_LI_RUD:
default:
break;
}
@@ -3662,6 +3764,8 @@ xlog_recover_commit_pass1(
case XFS_LI_EFD:
case XFS_LI_DQUOT:
case XFS_LI_ICREATE:
+ case XFS_LI_RUI:
+ case XFS_LI_RUD:
/* nothing to do in pass 1 */
return 0;
default:
@@ -3692,6 +3796,10 @@ xlog_recover_commit_pass2(
return xlog_recover_efi_pass2(log, item, trans->r_lsn);
case XFS_LI_EFD:
return xlog_recover_efd_pass2(log, item);
+ case XFS_LI_RUI:
+ return xlog_recover_rui_pass2(log, item, trans->r_lsn);
+ case XFS_LI_RUD:
+ return xlog_recover_rud_pass2(log, item);
case XFS_LI_DQUOT:
return xlog_recover_dquot_pass2(log, buffer_list, item,
trans->r_lsn);
@@ -4164,126 +4272,156 @@ xlog_recover_process_data(
return 0;
}
-/*
- * Process an extent free intent item that was recovered from
- * the log. We need to free the extents that it describes.
- */
+/* Recover the EFI if necessary. */
STATIC int
xlog_recover_process_efi(
- xfs_mount_t *mp,
- xfs_efi_log_item_t *efip)
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
{
- xfs_efd_log_item_t *efdp;
- xfs_trans_t *tp;
- int i;
- int error = 0;
- xfs_extent_t *extp;
- xfs_fsblock_t startblock_fsb;
-
- ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags));
+ struct xfs_efi_log_item *efip;
+ int error;
/*
- * First check the validity of the extents described by the
- * EFI. If any are bad, then assume that all are bad and
- * just toss the EFI.
+ * Skip EFIs that we've already processed.
*/
- for (i = 0; i < efip->efi_format.efi_nextents; i++) {
- extp = &(efip->efi_format.efi_extents[i]);
- startblock_fsb = XFS_BB_TO_FSB(mp,
- XFS_FSB_TO_DADDR(mp, extp->ext_start));
- if ((startblock_fsb == 0) ||
- (extp->ext_len == 0) ||
- (startblock_fsb >= mp->m_sb.sb_dblocks) ||
- (extp->ext_len >= mp->m_sb.sb_agblocks)) {
- /*
- * This will pull the EFI from the AIL and
- * free the memory associated with it.
- */
- set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
- xfs_efi_release(efip);
- return -EIO;
- }
- }
+ efip = container_of(lip, struct xfs_efi_log_item, efi_item);
+ if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags))
+ return 0;
- error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
- if (error)
- return error;
- efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
+ spin_unlock(&ailp->xa_lock);
+ error = xfs_efi_recover(mp, efip);
+ spin_lock(&ailp->xa_lock);
- for (i = 0; i < efip->efi_format.efi_nextents; i++) {
- extp = &(efip->efi_format.efi_extents[i]);
- error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
- extp->ext_len);
- if (error)
- goto abort_error;
+ return error;
+}
- }
+/* Release the EFI since we're cancelling everything. */
+STATIC void
+xlog_recover_cancel_efi(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_efi_log_item *efip;
- set_bit(XFS_EFI_RECOVERED, &efip->efi_flags);
- error = xfs_trans_commit(tp);
- return error;
+ efip = container_of(lip, struct xfs_efi_log_item, efi_item);
+
+ spin_unlock(&ailp->xa_lock);
+ xfs_efi_release(efip);
+ spin_lock(&ailp->xa_lock);
+}
+
+/* Recover the RUI if necessary. */
+STATIC int
+xlog_recover_process_rui(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_rui_log_item *ruip;
+ int error;
+
+ /*
+ * Skip RUIs that we've already processed.
+ */
+ ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
+ if (test_bit(XFS_RUI_RECOVERED, &ruip->rui_flags))
+ return 0;
+
+ spin_unlock(&ailp->xa_lock);
+ error = xfs_rui_recover(mp, ruip);
+ spin_lock(&ailp->xa_lock);
-abort_error:
- xfs_trans_cancel(tp);
return error;
}
+/* Release the RUI since we're cancelling everything. */
+STATIC void
+xlog_recover_cancel_rui(
+ struct xfs_mount *mp,
+ struct xfs_ail *ailp,
+ struct xfs_log_item *lip)
+{
+ struct xfs_rui_log_item *ruip;
+
+ ruip = container_of(lip, struct xfs_rui_log_item, rui_item);
+
+ spin_unlock(&ailp->xa_lock);
+ xfs_rui_release(ruip);
+ spin_lock(&ailp->xa_lock);
+}
+
+/* Is this log item a deferred action intent? */
+static inline bool xlog_item_is_intent(struct xfs_log_item *lip)
+{
+ switch (lip->li_type) {
+ case XFS_LI_EFI:
+ case XFS_LI_RUI:
+ return true;
+ default:
+ return false;
+ }
+}
+
/*
- * When this is called, all of the EFIs which did not have
- * corresponding EFDs should be in the AIL. What we do now
- * is free the extents associated with each one.
+ * When this is called, all of the log intent items which did not have
+ * corresponding log done items should be in the AIL. What we do now
+ * is update the data structures associated with each one.
*
- * Since we process the EFIs in normal transactions, they
- * will be removed at some point after the commit. This prevents
- * us from just walking down the list processing each one.
- * We'll use a flag in the EFI to skip those that we've already
- * processed and use the AIL iteration mechanism's generation
- * count to try to speed this up at least a bit.
+ * Since we process the log intent items in normal transactions, they
+ * will be removed at some point after the commit. This prevents us
+ * from just walking down the list processing each one. We'll use a
+ * flag in the intent item to skip those that we've already processed
+ * and use the AIL iteration mechanism's generation count to try to
+ * speed this up at least a bit.
*
- * When we start, we know that the EFIs are the only things in
- * the AIL. As we process them, however, other items are added
- * to the AIL. Since everything added to the AIL must come after
- * everything already in the AIL, we stop processing as soon as
- * we see something other than an EFI in the AIL.
+ * When we start, we know that the intents are the only things in the
+ * AIL. As we process them, however, other items are added to the
+ * AIL.
*/
STATIC int
-xlog_recover_process_efis(
+xlog_recover_process_intents(
struct xlog *log)
{
struct xfs_log_item *lip;
- struct xfs_efi_log_item *efip;
int error = 0;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp;
+ xfs_lsn_t last_lsn;
ailp = log->l_ailp;
spin_lock(&ailp->xa_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+ last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
while (lip != NULL) {
/*
- * We're done when we see something other than an EFI.
- * There should be no EFIs left in the AIL now.
+ * We're done when we see something other than an intent.
+ * There should be no intents left in the AIL now.
*/
- if (lip->li_type != XFS_LI_EFI) {
+ if (!xlog_item_is_intent(lip)) {
#ifdef DEBUG
for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
- ASSERT(lip->li_type != XFS_LI_EFI);
+ ASSERT(!xlog_item_is_intent(lip));
#endif
break;
}
/*
- * Skip EFIs that we've already processed.
+ * We should never see a redo item with a LSN higher than
+ * the last transaction we found in the log at the start
+ * of recovery.
*/
- efip = container_of(lip, struct xfs_efi_log_item, efi_item);
- if (test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)) {
- lip = xfs_trans_ail_cursor_next(ailp, &cur);
- continue;
- }
+ ASSERT(XFS_LSN_CMP(last_lsn, lip->li_lsn) >= 0);
- spin_unlock(&ailp->xa_lock);
- error = xlog_recover_process_efi(log->l_mp, efip);
- spin_lock(&ailp->xa_lock);
+ switch (lip->li_type) {
+ case XFS_LI_EFI:
+ error = xlog_recover_process_efi(log->l_mp, ailp, lip);
+ break;
+ case XFS_LI_RUI:
+ error = xlog_recover_process_rui(log->l_mp, ailp, lip);
+ break;
+ }
if (error)
goto out;
lip = xfs_trans_ail_cursor_next(ailp, &cur);
@@ -4295,15 +4433,14 @@ out:
}
/*
- * A cancel occurs when the mount has failed and we're bailing out. Release all
- * pending EFIs so they don't pin the AIL.
+ * A cancel occurs when the mount has failed and we're bailing out.
+ * Release all pending log intent items so they don't pin the AIL.
*/
STATIC int
-xlog_recover_cancel_efis(
+xlog_recover_cancel_intents(
struct xlog *log)
{
struct xfs_log_item *lip;
- struct xfs_efi_log_item *efip;
int error = 0;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp;
@@ -4313,22 +4450,25 @@ xlog_recover_cancel_efis(
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
while (lip != NULL) {
/*
- * We're done when we see something other than an EFI.
- * There should be no EFIs left in the AIL now.
+ * We're done when we see something other than an intent.
+ * There should be no intents left in the AIL now.
*/
- if (lip->li_type != XFS_LI_EFI) {
+ if (!xlog_item_is_intent(lip)) {
#ifdef DEBUG
for (; lip; lip = xfs_trans_ail_cursor_next(ailp, &cur))
- ASSERT(lip->li_type != XFS_LI_EFI);
+ ASSERT(!xlog_item_is_intent(lip));
#endif
break;
}
- efip = container_of(lip, struct xfs_efi_log_item, efi_item);
-
- spin_unlock(&ailp->xa_lock);
- xfs_efi_release(efip);
- spin_lock(&ailp->xa_lock);
+ switch (lip->li_type) {
+ case XFS_LI_EFI:
+ xlog_recover_cancel_efi(log->l_mp, ailp, lip);
+ break;
+ case XFS_LI_RUI:
+ xlog_recover_cancel_rui(log->l_mp, ailp, lip);
+ break;
+ }
lip = xfs_trans_ail_cursor_next(ailp, &cur);
}
@@ -5023,6 +5163,7 @@ xlog_do_recover(
xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
return error;
}
+ mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
xlog_recover_check_summary(log);
@@ -5139,16 +5280,17 @@ xlog_recover_finish(
*/
if (log->l_flags & XLOG_RECOVERY_NEEDED) {
int error;
- error = xlog_recover_process_efis(log);
+ error = xlog_recover_process_intents(log);
if (error) {
- xfs_alert(log->l_mp, "Failed to recover EFIs");
+ xfs_alert(log->l_mp, "Failed to recover intents");
return error;
}
+
/*
- * Sync the log to get all the EFIs out of the AIL.
+ * Sync the log to get all the intents out of the AIL.
* This isn't absolutely necessary, but it helps in
* case the unlink transactions would have problems
- * pushing the EFIs out of the way.
+ * pushing the intents out of the way.
*/
xfs_log_force(log->l_mp, XFS_LOG_SYNC);
@@ -5173,7 +5315,7 @@ xlog_recover_cancel(
int error = 0;
if (log->l_flags & XLOG_RECOVERY_NEEDED)
- error = xlog_recover_cancel_efis(log);
+ error = xlog_recover_cancel_intents(log);
return error;
}