summaryrefslogtreecommitdiffstats
path: root/fs/xfs/scrub
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2024-02-22 12:30:46 -0800
committerDarrick J. Wong <djwong@kernel.org>2024-02-22 12:30:46 -0800
commitc473a3320be32b2273042bfdf0fe8db5da7ae5d0 (patch)
treeb54e7ff49047e3948050c56b21ac1edca1fa098b /fs/xfs/scrub
parent4e98cc905c0fec337416e9fd7ca4f75607a6de99 (diff)
downloadlinux-stable-c473a3320be32b2273042bfdf0fe8db5da7ae5d0.tar.gz
linux-stable-c473a3320be32b2273042bfdf0fe8db5da7ae5d0.tar.bz2
linux-stable-c473a3320be32b2273042bfdf0fe8db5da7ae5d0.zip
xfs: stagger the starting AG of scrub iscans to reduce contention
Online directory and parent repairs on parent-pointer equipped filesystems have shown that starting a large number of parallel iscans causes a lot of AGI buffer contention. Try to reduce this by making it so that iscans scan wrap around the end of the filesystem, and using a rotor to stagger where each scanner begins. Surprisingly, this boosts CPU utilization (on the author's test machines) from effectively single-threaded to 160%. Not great, but see the next patch. Signed-off-by: Darrick J. Wong <djwong@kernel.org> Reviewed-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/xfs/scrub')
-rw-r--r--fs/xfs/scrub/iscan.c87
-rw-r--r--fs/xfs/scrub/iscan.h7
-rw-r--r--fs/xfs/scrub/trace.h7
3 files changed, 89 insertions, 12 deletions
diff --git a/fs/xfs/scrub/iscan.c b/fs/xfs/scrub/iscan.c
index d13fc3b60f2e..3179c299c77f 100644
--- a/fs/xfs/scrub/iscan.c
+++ b/fs/xfs/scrub/iscan.c
@@ -170,10 +170,24 @@ xchk_iscan_move_cursor(
{
struct xfs_scrub *sc = iscan->sc;
struct xfs_mount *mp = sc->mp;
+ xfs_ino_t cursor, visited;
+
+ BUILD_BUG_ON(XFS_MAXINUMBER == NULLFSINO);
+
+ /*
+ * Special-case ino == 0 here so that we never set visited_ino to
+ * NULLFSINO when wrapping around EOFS, for that will let through all
+ * live updates.
+ */
+ cursor = XFS_AGINO_TO_INO(mp, agno, agino);
+ if (cursor == 0)
+ visited = XFS_MAXINUMBER;
+ else
+ visited = cursor - 1;
mutex_lock(&iscan->lock);
- iscan->cursor_ino = XFS_AGINO_TO_INO(mp, agno, agino);
- iscan->__visited_ino = iscan->cursor_ino - 1;
+ iscan->cursor_ino = cursor;
+ iscan->__visited_ino = visited;
trace_xchk_iscan_move_cursor(iscan);
mutex_unlock(&iscan->lock);
}
@@ -257,12 +271,13 @@ xchk_iscan_advance(
* Did not find any more inodes in this AG, move on to the next
* AG.
*/
- xchk_iscan_move_cursor(iscan, ++agno, 0);
+ agno = (agno + 1) % mp->m_sb.sb_agcount;
+ xchk_iscan_move_cursor(iscan, agno, 0);
xfs_trans_brelse(sc->tp, agi_bp);
xfs_perag_put(pag);
trace_xchk_iscan_advance_ag(iscan);
- } while (agno < mp->m_sb.sb_agcount);
+ } while (iscan->cursor_ino != iscan->scan_start_ino);
xchk_iscan_finish(iscan);
return 0;
@@ -420,6 +435,23 @@ xchk_iscan_teardown(
mutex_destroy(&iscan->lock);
}
+/* Pick an AG from which to start a scan. */
+static inline xfs_ino_t
+xchk_iscan_rotor(
+ struct xfs_mount *mp)
+{
+ static atomic_t agi_rotor;
+ unsigned int r = atomic_inc_return(&agi_rotor) - 1;
+
+ /*
+ * Rotoring *backwards* through the AGs, so we add one here before
+ * subtracting from the agcount to arrive at an AG number.
+ */
+ r = (r % mp->m_sb.sb_agcount) + 1;
+
+ return XFS_AGINO_TO_INO(mp, mp->m_sb.sb_agcount - r, 0);
+}
+
/*
* Set ourselves up to start an inode scan. If the @iget_timeout and
* @iget_retry_delay parameters are set, the scan will try to iget each inode
@@ -434,15 +466,20 @@ xchk_iscan_start(
unsigned int iget_retry_delay,
struct xchk_iscan *iscan)
{
+ xfs_ino_t start_ino;
+
+ start_ino = xchk_iscan_rotor(sc->mp);
+
iscan->sc = sc;
clear_bit(XCHK_ISCAN_OPSTATE_ABORTED, &iscan->__opstate);
iscan->iget_timeout = iget_timeout;
iscan->iget_retry_delay = iget_retry_delay;
- iscan->__visited_ino = 0;
- iscan->cursor_ino = 0;
+ iscan->__visited_ino = start_ino;
+ iscan->cursor_ino = start_ino;
+ iscan->scan_start_ino = start_ino;
mutex_init(&iscan->lock);
- trace_xchk_iscan_start(iscan);
+ trace_xchk_iscan_start(iscan, start_ino);
}
/*
@@ -471,15 +508,45 @@ xchk_iscan_want_live_update(
struct xchk_iscan *iscan,
xfs_ino_t ino)
{
- bool ret;
+ bool ret = false;
if (xchk_iscan_aborted(iscan))
return false;
mutex_lock(&iscan->lock);
+
trace_xchk_iscan_want_live_update(iscan, ino);
- ret = iscan->__visited_ino >= ino;
- mutex_unlock(&iscan->lock);
+ /* Scan is finished, caller should receive all updates. */
+ if (iscan->__visited_ino == NULLFSINO) {
+ ret = true;
+ goto unlock;
+ }
+
+ /*
+ * The visited cursor hasn't yet wrapped around the end of the FS. If
+ * @ino is inside the starred range, the caller should receive updates:
+ *
+ * 0 ------------ S ************ V ------------ EOFS
+ */
+ if (iscan->scan_start_ino <= iscan->__visited_ino) {
+ if (ino >= iscan->scan_start_ino &&
+ ino <= iscan->__visited_ino)
+ ret = true;
+
+ goto unlock;
+ }
+
+ /*
+ * The visited cursor wrapped around the end of the FS. If @ino is
+ * inside the starred range, the caller should receive updates:
+ *
+ * 0 ************ V ------------ S ************ EOFS
+ */
+ if (ino >= iscan->scan_start_ino || ino <= iscan->__visited_ino)
+ ret = true;
+
+unlock:
+ mutex_unlock(&iscan->lock);
return ret;
}
diff --git a/fs/xfs/scrub/iscan.h b/fs/xfs/scrub/iscan.h
index c25f121859ce..0db97d98ee8d 100644
--- a/fs/xfs/scrub/iscan.h
+++ b/fs/xfs/scrub/iscan.h
@@ -12,6 +12,13 @@ struct xchk_iscan {
/* Lock to protect the scan cursor. */
struct mutex lock;
+ /*
+ * This is the first inode in the inumber address space that we
+ * examined. When the scan wraps around back to here, the scan is
+ * finished.
+ */
+ xfs_ino_t scan_start_ino;
+
/* This is the inode that will be examined next. */
xfs_ino_t cursor_ino;
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 29026d1d9293..5a70968bc3e2 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1173,25 +1173,27 @@ DEFINE_EVENT(xchk_iscan_class, name, \
DEFINE_ISCAN_EVENT(xchk_iscan_move_cursor);
DEFINE_ISCAN_EVENT(xchk_iscan_visit);
DEFINE_ISCAN_EVENT(xchk_iscan_advance_ag);
-DEFINE_ISCAN_EVENT(xchk_iscan_start);
DECLARE_EVENT_CLASS(xchk_iscan_ino_class,
TP_PROTO(struct xchk_iscan *iscan, xfs_ino_t ino),
TP_ARGS(iscan, ino),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(xfs_ino_t, startino)
__field(xfs_ino_t, cursor)
__field(xfs_ino_t, visited)
__field(xfs_ino_t, ino)
),
TP_fast_assign(
__entry->dev = iscan->sc->mp->m_super->s_dev;
+ __entry->startino = iscan->scan_start_ino;
__entry->cursor = iscan->cursor_ino;
__entry->visited = iscan->__visited_ino;
__entry->ino = ino;
),
- TP_printk("dev %d:%d iscan cursor 0x%llx visited 0x%llx ino 0x%llx",
+ TP_printk("dev %d:%d iscan start 0x%llx cursor 0x%llx visited 0x%llx ino 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->startino,
__entry->cursor,
__entry->visited,
__entry->ino)
@@ -1201,6 +1203,7 @@ DEFINE_EVENT(xchk_iscan_ino_class, name, \
TP_PROTO(struct xchk_iscan *iscan, xfs_ino_t ino), \
TP_ARGS(iscan, ino))
DEFINE_ISCAN_INO_EVENT(xchk_iscan_want_live_update);
+DEFINE_ISCAN_INO_EVENT(xchk_iscan_start);
TRACE_EVENT(xchk_iscan_iget,
TP_PROTO(struct xchk_iscan *iscan, int error),