summaryrefslogtreecommitdiffstats
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Makefile8
-rw-r--r--fs/xfs/libxfs/xfs_ag.c256
-rw-r--r--fs/xfs/libxfs/xfs_ag.h205
-rw-r--r--fs/xfs/libxfs/xfs_ag_resv.c22
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c119
-rw-r--r--fs/xfs/libxfs/xfs_alloc.h19
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c30
-rw-r--r--fs/xfs/libxfs/xfs_attr.c5
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c137
-rw-r--r--fs/xfs/libxfs/xfs_bmap.h2
-rw-r--r--fs/xfs/libxfs/xfs_btree.c38
-rw-r--r--fs/xfs/libxfs/xfs_btree.h3
-rw-r--r--fs/xfs/libxfs/xfs_btree_mem.c6
-rw-r--r--fs/xfs/libxfs/xfs_defer.c6
-rw-r--r--fs/xfs/libxfs/xfs_defer.h1
-rw-r--r--fs/xfs/libxfs/xfs_dquot_buf.c190
-rw-r--r--fs/xfs/libxfs/xfs_format.h199
-rw-r--r--fs/xfs/libxfs/xfs_fs.h53
-rw-r--r--fs/xfs/libxfs/xfs_group.c225
-rw-r--r--fs/xfs/libxfs/xfs_group.h164
-rw-r--r--fs/xfs/libxfs/xfs_health.h89
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c175
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c31
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c90
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h3
-rw-r--r--fs/xfs/libxfs/xfs_inode_util.c6
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h8
-rw-r--r--fs/xfs/libxfs/xfs_log_recover.h2
-rw-r--r--fs/xfs/libxfs/xfs_metadir.c481
-rw-r--r--fs/xfs/libxfs/xfs_metadir.h47
-rw-r--r--fs/xfs/libxfs/xfs_metafile.c52
-rw-r--r--fs/xfs/libxfs/xfs_metafile.h31
-rw-r--r--fs/xfs/libxfs/xfs_ondisk.h186
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h43
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c33
-rw-r--r--fs/xfs/libxfs/xfs_refcount.h2
-rw-r--r--fs/xfs/libxfs/xfs_refcount_btree.c17
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c42
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h6
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c28
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c388
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.h247
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.c697
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.h284
-rw-r--r--fs/xfs/libxfs/xfs_sb.c276
-rw-r--r--fs/xfs/libxfs/xfs_sb.h6
-rw-r--r--fs/xfs/libxfs/xfs_shared.h4
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c6
-rw-r--r--fs/xfs/libxfs/xfs_trans_resv.c2
-rw-r--r--fs/xfs/libxfs/xfs_types.c44
-rw-r--r--fs/xfs/libxfs/xfs_types.h16
-rw-r--r--fs/xfs/scrub/agheader.c52
-rw-r--r--fs/xfs/scrub/agheader_repair.c42
-rw-r--r--fs/xfs/scrub/alloc.c2
-rw-r--r--fs/xfs/scrub/alloc_repair.c22
-rw-r--r--fs/xfs/scrub/bmap.c38
-rw-r--r--fs/xfs/scrub/bmap_repair.c11
-rw-r--r--fs/xfs/scrub/common.c149
-rw-r--r--fs/xfs/scrub/common.h40
-rw-r--r--fs/xfs/scrub/cow_repair.c21
-rw-r--r--fs/xfs/scrub/dir.c10
-rw-r--r--fs/xfs/scrub/dir_repair.c20
-rw-r--r--fs/xfs/scrub/dirtree.c32
-rw-r--r--fs/xfs/scrub/dirtree.h12
-rw-r--r--fs/xfs/scrub/findparent.c28
-rw-r--r--fs/xfs/scrub/fscounters.c35
-rw-r--r--fs/xfs/scrub/fscounters_repair.c9
-rw-r--r--fs/xfs/scrub/health.c54
-rw-r--r--fs/xfs/scrub/ialloc.c16
-rw-r--r--fs/xfs/scrub/ialloc_repair.c27
-rw-r--r--fs/xfs/scrub/inode.c35
-rw-r--r--fs/xfs/scrub/inode_repair.c39
-rw-r--r--fs/xfs/scrub/iscan.c4
-rw-r--r--fs/xfs/scrub/metapath.c689
-rw-r--r--fs/xfs/scrub/newbt.c52
-rw-r--r--fs/xfs/scrub/nlinks.c4
-rw-r--r--fs/xfs/scrub/nlinks_repair.c4
-rw-r--r--fs/xfs/scrub/orphanage.c4
-rw-r--r--fs/xfs/scrub/parent.c39
-rw-r--r--fs/xfs/scrub/parent_repair.c37
-rw-r--r--fs/xfs/scrub/quotacheck.c7
-rw-r--r--fs/xfs/scrub/reap.c10
-rw-r--r--fs/xfs/scrub/refcount.c3
-rw-r--r--fs/xfs/scrub/refcount_repair.c7
-rw-r--r--fs/xfs/scrub/repair.c61
-rw-r--r--fs/xfs/scrub/repair.h13
-rw-r--r--fs/xfs/scrub/rgsuper.c84
-rw-r--r--fs/xfs/scrub/rmap.c4
-rw-r--r--fs/xfs/scrub/rmap_repair.c25
-rw-r--r--fs/xfs/scrub/rtbitmap.c54
-rw-r--r--fs/xfs/scrub/rtsummary.c116
-rw-r--r--fs/xfs/scrub/rtsummary_repair.c22
-rw-r--r--fs/xfs/scrub/scrub.c52
-rw-r--r--fs/xfs/scrub/scrub.h17
-rw-r--r--fs/xfs/scrub/stats.c2
-rw-r--r--fs/xfs/scrub/tempfile.c105
-rw-r--r--fs/xfs/scrub/tempfile.h3
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h247
-rw-r--r--fs/xfs/xfs_bmap_item.c26
-rw-r--r--fs/xfs/xfs_bmap_util.c46
-rw-r--r--fs/xfs/xfs_buf.c7
-rw-r--r--fs/xfs/xfs_buf.h4
-rw-r--r--fs/xfs/xfs_buf_item_recover.c67
-rw-r--r--fs/xfs/xfs_discard.c308
-rw-r--r--fs/xfs/xfs_dquot.c38
-rw-r--r--fs/xfs/xfs_dquot.h18
-rw-r--r--fs/xfs/xfs_drain.c78
-rw-r--r--fs/xfs/xfs_drain.h22
-rw-r--r--fs/xfs/xfs_exchrange.c20
-rw-r--r--fs/xfs/xfs_extent_busy.c214
-rw-r--r--fs/xfs/xfs_extent_busy.h65
-rw-r--r--fs/xfs/xfs_extfree_item.c282
-rw-r--r--fs/xfs/xfs_file.c82
-rw-r--r--fs/xfs/xfs_filestream.c13
-rw-r--r--fs/xfs/xfs_fsmap.c363
-rw-r--r--fs/xfs/xfs_fsmap.h15
-rw-r--r--fs/xfs/xfs_fsops.c14
-rw-r--r--fs/xfs/xfs_handle.c16
-rw-r--r--fs/xfs/xfs_health.c278
-rw-r--r--fs/xfs/xfs_icache.c134
-rw-r--r--fs/xfs/xfs_inode.c33
-rw-r--r--fs/xfs/xfs_inode.h64
-rw-r--r--fs/xfs/xfs_inode_item.c7
-rw-r--r--fs/xfs/xfs_inode_item_recover.c2
-rw-r--r--fs/xfs/xfs_ioctl.c115
-rw-r--r--fs/xfs/xfs_iomap.c71
-rw-r--r--fs/xfs/xfs_iomap.h1
-rw-r--r--fs/xfs/xfs_iops.c47
-rw-r--r--fs/xfs/xfs_itable.c33
-rw-r--r--fs/xfs/xfs_itable.h3
-rw-r--r--fs/xfs/xfs_iunlink_item.c13
-rw-r--r--fs/xfs/xfs_iwalk.c116
-rw-r--r--fs/xfs/xfs_iwalk.h7
-rw-r--r--fs/xfs/xfs_log_cil.c3
-rw-r--r--fs/xfs/xfs_log_recover.c18
-rw-r--r--fs/xfs/xfs_message.c51
-rw-r--r--fs/xfs/xfs_message.h20
-rw-r--r--fs/xfs/xfs_mount.c61
-rw-r--r--fs/xfs/xfs_mount.h113
-rw-r--r--fs/xfs/xfs_pnfs.c3
-rw-r--r--fs/xfs/xfs_qm.c381
-rw-r--r--fs/xfs/xfs_qm_bhv.c36
-rw-r--r--fs/xfs/xfs_quota.h19
-rw-r--r--fs/xfs/xfs_refcount_item.c9
-rw-r--r--fs/xfs/xfs_reflink.c7
-rw-r--r--fs/xfs/xfs_rmap_item.c9
-rw-r--r--fs/xfs/xfs_rtalloc.c1025
-rw-r--r--fs/xfs/xfs_rtalloc.h6
-rw-r--r--fs/xfs/xfs_stats.c7
-rw-r--r--fs/xfs/xfs_super.c77
-rw-r--r--fs/xfs/xfs_trace.c5
-rw-r--r--fs/xfs/xfs_trace.h687
-rw-r--r--fs/xfs/xfs_trans.c97
-rw-r--r--fs/xfs/xfs_trans.h2
-rw-r--r--fs/xfs/xfs_trans_buf.c25
-rw-r--r--fs/xfs/xfs_trans_dquot.c17
-rw-r--r--fs/xfs/xfs_xattr.c3
158 files changed, 9666 insertions, 3027 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index dd692619bed5..ed9b0dabc1f1 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -14,7 +14,9 @@ xfs-y += xfs_trace.o
# build the libxfs code first
xfs-y += $(addprefix libxfs/, \
+ xfs_group.o \
xfs_ag.o \
+ xfs_ag_resv.o \
xfs_alloc.o \
xfs_alloc_btree.o \
xfs_attr.o \
@@ -42,7 +44,8 @@ xfs-y += $(addprefix libxfs/, \
xfs_inode_buf.o \
xfs_inode_util.o \
xfs_log_rlimit.o \
- xfs_ag_resv.o \
+ xfs_metadir.o \
+ xfs_metafile.o \
xfs_parent.o \
xfs_rmap.o \
xfs_rmap_btree.o \
@@ -58,6 +61,7 @@ xfs-y += $(addprefix libxfs/, \
# xfs_rtbitmap is shared with libxfs
xfs-$(CONFIG_XFS_RT) += $(addprefix libxfs/, \
xfs_rtbitmap.o \
+ xfs_rtgroup.o \
)
# highlevel code
@@ -171,6 +175,7 @@ xfs-y += $(addprefix scrub/, \
inode.o \
iscan.o \
listxattr.o \
+ metapath.o \
nlinks.o \
parent.o \
readdir.o \
@@ -186,6 +191,7 @@ xfs-y += $(addprefix scrub/, \
xfs-$(CONFIG_XFS_ONLINE_SCRUB_STATS) += scrub/stats.o
xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
+ rgsuper.o \
rtbitmap.o \
rtsummary.o \
)
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 5ca8d0106827..b59cb461e096 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -30,86 +30,7 @@
#include "xfs_trace.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
-
-
-/*
- * Passive reference counting access wrappers to the perag structures. If the
- * per-ag structure is to be freed, the freeing code is responsible for cleaning
- * up objects with passive references before freeing the structure. This is
- * things like cached buffers.
- */
-struct xfs_perag *
-xfs_perag_get(
- struct xfs_mount *mp,
- xfs_agnumber_t agno)
-{
- struct xfs_perag *pag;
-
- rcu_read_lock();
- pag = xa_load(&mp->m_perags, agno);
- if (pag) {
- trace_xfs_perag_get(pag, _RET_IP_);
- ASSERT(atomic_read(&pag->pag_ref) >= 0);
- atomic_inc(&pag->pag_ref);
- }
- rcu_read_unlock();
- return pag;
-}
-
-/* Get a passive reference to the given perag. */
-struct xfs_perag *
-xfs_perag_hold(
- struct xfs_perag *pag)
-{
- ASSERT(atomic_read(&pag->pag_ref) > 0 ||
- atomic_read(&pag->pag_active_ref) > 0);
-
- trace_xfs_perag_hold(pag, _RET_IP_);
- atomic_inc(&pag->pag_ref);
- return pag;
-}
-
-void
-xfs_perag_put(
- struct xfs_perag *pag)
-{
- trace_xfs_perag_put(pag, _RET_IP_);
- ASSERT(atomic_read(&pag->pag_ref) > 0);
- atomic_dec(&pag->pag_ref);
-}
-
-/*
- * Active references for perag structures. This is for short term access to the
- * per ag structures for walking trees or accessing state. If an AG is being
- * shrunk or is offline, then this will fail to find that AG and return NULL
- * instead.
- */
-struct xfs_perag *
-xfs_perag_grab(
- struct xfs_mount *mp,
- xfs_agnumber_t agno)
-{
- struct xfs_perag *pag;
-
- rcu_read_lock();
- pag = xa_load(&mp->m_perags, agno);
- if (pag) {
- trace_xfs_perag_grab(pag, _RET_IP_);
- if (!atomic_inc_not_zero(&pag->pag_active_ref))
- pag = NULL;
- }
- rcu_read_unlock();
- return pag;
-}
-
-void
-xfs_perag_rele(
- struct xfs_perag *pag)
-{
- trace_xfs_perag_rele(pag, _RET_IP_);
- if (atomic_dec_and_test(&pag->pag_active_ref))
- wake_up(&pag->pag_active_wq);
-}
+#include "xfs_group.h"
/*
* xfs_initialize_perag_data
@@ -184,6 +105,18 @@ out:
return error;
}
+static void
+xfs_perag_uninit(
+ struct xfs_group *xg)
+{
+#ifdef __KERNEL__
+ struct xfs_perag *pag = to_perag(xg);
+
+ cancel_delayed_work_sync(&pag->pag_blockgc_work);
+ xfs_buf_cache_destroy(&pag->pag_bcache);
+#endif
+}
+
/*
* Free up the per-ag resources within the specified AG range.
*/
@@ -196,22 +129,8 @@ xfs_free_perag_range(
{
xfs_agnumber_t agno;
- for (agno = first_agno; agno < end_agno; agno++) {
- struct xfs_perag *pag = xa_erase(&mp->m_perags, agno);
-
- ASSERT(pag);
- XFS_IS_CORRUPT(pag->pag_mount, atomic_read(&pag->pag_ref) != 0);
- xfs_defer_drain_free(&pag->pag_intents_drain);
-
- cancel_delayed_work_sync(&pag->pag_blockgc_work);
- xfs_buf_cache_destroy(&pag->pag_bcache);
-
- /* drop the mount's active reference */
- xfs_perag_rele(pag);
- XFS_IS_CORRUPT(pag->pag_mount,
- atomic_read(&pag->pag_active_ref) != 0);
- kfree_rcu_mightsleep(pag);
- }
+ for (agno = first_agno; agno < end_agno; agno++)
+ xfs_group_free(mp, agno, XG_TYPE_AG, xfs_perag_uninit);
}
/* Find the size of the AG, in blocks. */
@@ -273,6 +192,10 @@ xfs_agino_range(
return __xfs_agino_range(mp, xfs_ag_block_count(mp, agno), first, last);
}
+/*
+ * Update the perag of the previous tail AG if it has been changed during
+ * recovery (i.e. recovery of a growfs).
+ */
int
xfs_update_last_ag_size(
struct xfs_mount *mp,
@@ -282,88 +205,88 @@ xfs_update_last_ag_size(
if (!pag)
return -EFSCORRUPTED;
- pag->block_count = __xfs_ag_block_count(mp, prev_agcount - 1,
- mp->m_sb.sb_agcount, mp->m_sb.sb_dblocks);
- __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
+ pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp,
+ prev_agcount - 1, mp->m_sb.sb_agcount,
+ mp->m_sb.sb_dblocks);
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
&pag->agino_max);
xfs_perag_rele(pag);
return 0;
}
-int
-xfs_initialize_perag(
+static int
+xfs_perag_alloc(
struct xfs_mount *mp,
- xfs_agnumber_t old_agcount,
- xfs_agnumber_t new_agcount,
- xfs_rfsblock_t dblocks,
- xfs_agnumber_t *maxagi)
+ xfs_agnumber_t index,
+ xfs_agnumber_t agcount,
+ xfs_rfsblock_t dblocks)
{
struct xfs_perag *pag;
- xfs_agnumber_t index;
int error;
- for (index = old_agcount; index < new_agcount; index++) {
- pag = kzalloc(sizeof(*pag), GFP_KERNEL);
- if (!pag) {
- error = -ENOMEM;
- goto out_unwind_new_pags;
- }
- pag->pag_agno = index;
- pag->pag_mount = mp;
-
- error = xa_insert(&mp->m_perags, index, pag, GFP_KERNEL);
- if (error) {
- WARN_ON_ONCE(error == -EBUSY);
- goto out_free_pag;
- }
+ pag = kzalloc(sizeof(*pag), GFP_KERNEL);
+ if (!pag)
+ return -ENOMEM;
#ifdef __KERNEL__
- /* Place kernel structure only init below this point. */
- spin_lock_init(&pag->pag_ici_lock);
- spin_lock_init(&pag->pagb_lock);
- spin_lock_init(&pag->pag_state_lock);
- INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
- INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
- xfs_defer_drain_init(&pag->pag_intents_drain);
- init_waitqueue_head(&pag->pagb_wait);
- init_waitqueue_head(&pag->pag_active_wq);
- pag->pagb_count = 0;
- pag->pagb_tree = RB_ROOT;
- xfs_hooks_init(&pag->pag_rmap_update_hooks);
+ /* Place kernel structure only init below this point. */
+ spin_lock_init(&pag->pag_ici_lock);
+ INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
+ INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
#endif /* __KERNEL__ */
- error = xfs_buf_cache_init(&pag->pag_bcache);
- if (error)
- goto out_remove_pag;
-
- /* Active ref owned by mount indicates AG is online. */
- atomic_set(&pag->pag_active_ref, 1);
+ error = xfs_buf_cache_init(&pag->pag_bcache);
+ if (error)
+ goto out_free_perag;
- /*
- * Pre-calculated geometry
- */
- pag->block_count = __xfs_ag_block_count(mp, index, new_agcount,
+ /*
+ * Pre-calculated geometry
+ */
+ pag_group(pag)->xg_block_count = __xfs_ag_block_count(mp, index, agcount,
dblocks);
- pag->min_block = XFS_AGFL_BLOCK(mp);
- __xfs_agino_range(mp, pag->block_count, &pag->agino_min,
- &pag->agino_max);
- }
+ pag_group(pag)->xg_min_gbno = XFS_AGFL_BLOCK(mp) + 1;
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
+ &pag->agino_max);
- index = xfs_set_inode_alloc(mp, new_agcount);
+ error = xfs_group_insert(mp, pag_group(pag), index, XG_TYPE_AG);
+ if (error)
+ goto out_buf_cache_destroy;
- if (maxagi)
- *maxagi = index;
+ return 0;
+
+out_buf_cache_destroy:
+ xfs_buf_cache_destroy(&pag->pag_bcache);
+out_free_perag:
+ kfree(pag);
+ return error;
+}
+int
+xfs_initialize_perag(
+ struct xfs_mount *mp,
+ xfs_agnumber_t orig_agcount,
+ xfs_agnumber_t new_agcount,
+ xfs_rfsblock_t dblocks,
+ xfs_agnumber_t *maxagi)
+{
+ xfs_agnumber_t index;
+ int error;
+
+ if (orig_agcount >= new_agcount)
+ return 0;
+
+ for (index = orig_agcount; index < new_agcount; index++) {
+ error = xfs_perag_alloc(mp, index, new_agcount, dblocks);
+ if (error)
+ goto out_unwind_new_pags;
+ }
+
+ *maxagi = xfs_set_inode_alloc(mp, new_agcount);
mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp);
return 0;
-out_remove_pag:
- xfs_defer_drain_free(&pag->pag_intents_drain);
- pag = xa_erase(&mp->m_perags, index);
-out_free_pag:
- kfree(pag);
out_unwind_new_pags:
- xfs_free_perag_range(mp, old_agcount, index);
+ xfs_free_perag_range(mp, orig_agcount, index);
return error;
}
@@ -818,7 +741,7 @@ xfs_ag_shrink_space(
struct xfs_trans **tpp,
xfs_extlen_t delta)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_alloc_arg args = {
.tp = *tpp,
.mp = mp,
@@ -835,7 +758,7 @@ xfs_ag_shrink_space(
xfs_agblock_t aglen;
int error, err2;
- ASSERT(pag->pag_agno == mp->m_sb.sb_agcount - 1);
+ ASSERT(pag_agno(pag) == mp->m_sb.sb_agcount - 1);
error = xfs_ialloc_read_agi(pag, *tpp, 0, &agibp);
if (error)
return error;
@@ -872,7 +795,7 @@ xfs_ag_shrink_space(
/* internal log shouldn't also show up in the free space btrees */
error = xfs_alloc_vextent_exact_bno(&args,
- XFS_AGB_TO_FSB(mp, pag->pag_agno, aglen - delta));
+ xfs_agbno_to_fsb(pag, aglen - delta));
if (!error && args.agbno == NULLAGBLOCK)
error = -ENOSPC;
@@ -931,9 +854,9 @@ xfs_ag_shrink_space(
}
/* Update perag geometry */
- pag->block_count -= delta;
- __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min,
- &pag->agino_max);
+ pag_group(pag)->xg_block_count -= delta;
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
+ &pag->agino_max);
xfs_ialloc_log_agi(*tpp, agibp, XFS_AGI_LENGTH);
xfs_alloc_log_agf(*tpp, agfbp, XFS_AGF_LENGTH);
@@ -958,12 +881,13 @@ xfs_ag_extend_space(
struct xfs_trans *tp,
xfs_extlen_t len)
{
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_buf *bp;
struct xfs_agi *agi;
struct xfs_agf *agf;
int error;
- ASSERT(pag->pag_agno == pag->pag_mount->m_sb.sb_agcount - 1);
+ ASSERT(pag_agno(pag) == mp->m_sb.sb_agcount - 1);
error = xfs_ialloc_read_agi(pag, tp, 0, &bp);
if (error)
@@ -1002,9 +926,9 @@ xfs_ag_extend_space(
return error;
/* Update perag geometry */
- pag->block_count = be32_to_cpu(agf->agf_length);
- __xfs_agino_range(pag->pag_mount, pag->block_count, &pag->agino_min,
- &pag->agino_max);
+ pag_group(pag)->xg_block_count = be32_to_cpu(agf->agf_length);
+ __xfs_agino_range(mp, pag_group(pag)->xg_block_count, &pag->agino_min,
+ &pag->agino_max);
return 0;
}
@@ -1031,7 +955,7 @@ xfs_ag_get_geometry(
/* Fill out form. */
memset(ageo, 0, sizeof(*ageo));
- ageo->ag_number = pag->pag_agno;
+ ageo->ag_number = pag_agno(pag);
agi = agi_bp->b_addr;
ageo->ag_icount = be32_to_cpu(agi->agi_count);
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 9edfe0e96439..1f24cfa27321 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -7,6 +7,8 @@
#ifndef __LIBXFS_AG_H
#define __LIBXFS_AG_H 1
+#include "xfs_group.h"
+
struct xfs_mount;
struct xfs_trans;
struct xfs_perag;
@@ -30,11 +32,7 @@ struct xfs_ag_resv {
* performance of allocation group selection.
*/
struct xfs_perag {
- struct xfs_mount *pag_mount; /* owner filesystem */
- xfs_agnumber_t pag_agno; /* AG this structure belongs to */
- atomic_t pag_ref; /* passive reference count */
- atomic_t pag_active_ref; /* active reference count */
- wait_queue_head_t pag_active_wq;/* woken active_ref falls to zero */
+ struct xfs_group pag_group;
unsigned long pag_opstate;
uint8_t pagf_bno_level; /* # of levels in bno btree */
uint8_t pagf_cnt_level; /* # of levels in cnt btree */
@@ -55,7 +53,6 @@ struct xfs_perag {
xfs_agino_t pagl_leftrec;
xfs_agino_t pagl_rightrec;
- int pagb_count; /* pagb slots in use */
uint8_t pagf_refcount_level; /* recount btree height */
/* Blocks reserved for all kinds of metadata. */
@@ -64,21 +61,12 @@ struct xfs_perag {
struct xfs_ag_resv pag_rmapbt_resv;
/* Precalculated geometry info */
- xfs_agblock_t block_count;
- xfs_agblock_t min_block;
xfs_agino_t agino_min;
xfs_agino_t agino_max;
#ifdef __KERNEL__
/* -- kernel only structures below this line -- */
- /*
- * Bitsets of per-ag metadata that have been checked and/or are sick.
- * Callers should hold pag_state_lock before accessing this field.
- */
- uint16_t pag_checked;
- uint16_t pag_sick;
-
#ifdef CONFIG_XFS_ONLINE_REPAIR
/*
* Alternate btree heights so that online repair won't trip the write
@@ -90,13 +78,6 @@ struct xfs_perag {
uint8_t pagf_repair_rmap_level;
#endif
- spinlock_t pag_state_lock;
-
- spinlock_t pagb_lock; /* lock for pagb_tree */
- struct rb_root pagb_tree; /* ordered tree of busy extents */
- unsigned int pagb_gen; /* generation count for pagb_tree */
- wait_queue_head_t pagb_wait; /* woken when pagb_gen changes */
-
atomic_t pagf_fstrms; /* # of filestreams active in this AG */
spinlock_t pag_ici_lock; /* incore inode cache lock */
@@ -108,21 +89,29 @@ struct xfs_perag {
/* background prealloc block trimming */
struct delayed_work pag_blockgc_work;
-
- /*
- * We use xfs_drain to track the number of deferred log intent items
- * that have been queued (but not yet processed) so that waiters (e.g.
- * scrub) will not lock resources when other threads are in the middle
- * of processing a chain of intent items only to find momentary
- * inconsistencies.
- */
- struct xfs_defer_drain pag_intents_drain;
-
- /* Hook to feed rmapbt updates to an active online repair. */
- struct xfs_hooks pag_rmap_update_hooks;
#endif /* __KERNEL__ */
};
+static inline struct xfs_perag *to_perag(struct xfs_group *xg)
+{
+ return container_of(xg, struct xfs_perag, pag_group);
+}
+
+static inline struct xfs_group *pag_group(struct xfs_perag *pag)
+{
+ return &pag->pag_group;
+}
+
+static inline struct xfs_mount *pag_mount(const struct xfs_perag *pag)
+{
+ return pag->pag_group.xg_mount;
+}
+
+static inline xfs_agnumber_t pag_agno(const struct xfs_perag *pag)
+{
+ return pag->pag_group.xg_gno;
+}
+
/*
* Per-AG operational state. These are atomic flag bits.
*/
@@ -144,8 +133,8 @@ __XFS_AG_OPSTATE(prefers_metadata, PREFERS_METADATA)
__XFS_AG_OPSTATE(allows_inodes, ALLOWS_INODES)
__XFS_AG_OPSTATE(agfl_needs_reset, AGFL_NEEDS_RESET)
-int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t old_agcount,
- xfs_agnumber_t agcount, xfs_rfsblock_t dcount,
+int xfs_initialize_perag(struct xfs_mount *mp, xfs_agnumber_t orig_agcount,
+ xfs_agnumber_t new_agcount, xfs_rfsblock_t dcount,
xfs_agnumber_t *maxagi);
void xfs_free_perag_range(struct xfs_mount *mp, xfs_agnumber_t first_agno,
xfs_agnumber_t end_agno);
@@ -153,13 +142,71 @@ int xfs_initialize_perag_data(struct xfs_mount *mp, xfs_agnumber_t agno);
int xfs_update_last_ag_size(struct xfs_mount *mp, xfs_agnumber_t prev_agcount);
/* Passive AG references */
-struct xfs_perag *xfs_perag_get(struct xfs_mount *mp, xfs_agnumber_t agno);
-struct xfs_perag *xfs_perag_hold(struct xfs_perag *pag);
-void xfs_perag_put(struct xfs_perag *pag);
+static inline struct xfs_perag *
+xfs_perag_get(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ return to_perag(xfs_group_get(mp, agno, XG_TYPE_AG));
+}
+
+static inline struct xfs_perag *
+xfs_perag_hold(
+ struct xfs_perag *pag)
+{
+ return to_perag(xfs_group_hold(pag_group(pag)));
+}
+
+static inline void
+xfs_perag_put(
+ struct xfs_perag *pag)
+{
+ xfs_group_put(pag_group(pag));
+}
/* Active AG references */
-struct xfs_perag *xfs_perag_grab(struct xfs_mount *, xfs_agnumber_t);
-void xfs_perag_rele(struct xfs_perag *pag);
+static inline struct xfs_perag *
+xfs_perag_grab(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno)
+{
+ return to_perag(xfs_group_grab(mp, agno, XG_TYPE_AG));
+}
+
+static inline void
+xfs_perag_rele(
+ struct xfs_perag *pag)
+{
+ xfs_group_rele(pag_group(pag));
+}
+
+static inline struct xfs_perag *
+xfs_perag_next_range(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ xfs_agnumber_t start_agno,
+ xfs_agnumber_t end_agno)
+{
+ return to_perag(xfs_group_next_range(mp, pag ? pag_group(pag) : NULL,
+ start_agno, end_agno, XG_TYPE_AG));
+}
+
+static inline struct xfs_perag *
+xfs_perag_next_from(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag,
+ xfs_agnumber_t start_agno)
+{
+ return xfs_perag_next_range(mp, pag, start_agno, mp->m_sb.sb_agcount - 1);
+}
+
+static inline struct xfs_perag *
+xfs_perag_next(
+ struct xfs_mount *mp,
+ struct xfs_perag *pag)
+{
+ return xfs_perag_next_from(mp, pag, 0);
+}
/*
* Per-ag geometry infomation and validation
@@ -171,11 +218,7 @@ void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
static inline bool
xfs_verify_agbno(struct xfs_perag *pag, xfs_agblock_t agbno)
{
- if (agbno >= pag->block_count)
- return false;
- if (agbno <= pag->min_block)
- return false;
- return true;
+ return xfs_verify_gbno(pag_group(pag), agbno);
}
static inline bool
@@ -184,13 +227,7 @@ xfs_verify_agbext(
xfs_agblock_t agbno,
xfs_agblock_t len)
{
- if (agbno + len <= agbno)
- return false;
-
- if (!xfs_verify_agbno(pag, agbno))
- return false;
-
- return xfs_verify_agbno(pag, agbno + len - 1);
+ return xfs_verify_gbext(pag_group(pag), agbno, len);
}
/*
@@ -226,40 +263,6 @@ xfs_ag_contains_log(struct xfs_mount *mp, xfs_agnumber_t agno)
agno == XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
}
-/*
- * Perag iteration APIs
- */
-static inline struct xfs_perag *
-xfs_perag_next(
- struct xfs_perag *pag,
- xfs_agnumber_t *agno,
- xfs_agnumber_t end_agno)
-{
- struct xfs_mount *mp = pag->pag_mount;
-
- *agno = pag->pag_agno + 1;
- xfs_perag_rele(pag);
- while (*agno <= end_agno) {
- pag = xfs_perag_grab(mp, *agno);
- if (pag)
- return pag;
- (*agno)++;
- }
- return NULL;
-}
-
-#define for_each_perag_range(mp, agno, end_agno, pag) \
- for ((pag) = xfs_perag_grab((mp), (agno)); \
- (pag) != NULL; \
- (pag) = xfs_perag_next((pag), &(agno), (end_agno)))
-
-#define for_each_perag_from(mp, agno, pag) \
- for_each_perag_range((mp), (agno), (mp)->m_sb.sb_agcount - 1, (pag))
-
-#define for_each_perag(mp, agno, pag) \
- (agno) = 0; \
- for_each_perag_from((mp), (agno), (pag))
-
static inline struct xfs_perag *
xfs_perag_next_wrap(
struct xfs_perag *pag,
@@ -268,9 +271,9 @@ xfs_perag_next_wrap(
xfs_agnumber_t restart_agno,
xfs_agnumber_t wrap_agno)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
- *agno = pag->pag_agno + 1;
+ *agno = pag_agno(pag) + 1;
xfs_perag_rele(pag);
while (*agno != stop_agno) {
if (*agno >= wrap_agno) {
@@ -332,4 +335,28 @@ int xfs_ag_extend_space(struct xfs_perag *pag, struct xfs_trans *tp,
xfs_extlen_t len);
int xfs_ag_get_geometry(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
+static inline xfs_fsblock_t
+xfs_agbno_to_fsb(
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno)
+{
+ return XFS_AGB_TO_FSB(pag_mount(pag), pag_agno(pag), agbno);
+}
+
+static inline xfs_daddr_t
+xfs_agbno_to_daddr(
+ struct xfs_perag *pag,
+ xfs_agblock_t agbno)
+{
+ return XFS_AGB_TO_DADDR(pag_mount(pag), pag_agno(pag), agbno);
+}
+
+static inline xfs_ino_t
+xfs_agino_to_ino(
+ struct xfs_perag *pag,
+ xfs_agino_t agino)
+{
+ return XFS_AGINO_TO_INO(pag_mount(pag), pag_agno(pag), agino);
+}
+
#endif /* __LIBXFS_AG_H */
diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c
index 216423df939e..f5d853089019 100644
--- a/fs/xfs/libxfs/xfs_ag_resv.c
+++ b/fs/xfs/libxfs/xfs_ag_resv.c
@@ -70,6 +70,7 @@ xfs_ag_resv_critical(
struct xfs_perag *pag,
enum xfs_ag_resv_type type)
{
+ struct xfs_mount *mp = pag_mount(pag);
xfs_extlen_t avail;
xfs_extlen_t orig;
@@ -92,8 +93,8 @@ xfs_ag_resv_critical(
/* Critically low if less than 10% or max btree height remains. */
return XFS_TEST_ERROR(avail < orig / 10 ||
- avail < pag->pag_mount->m_agbtree_maxlevels,
- pag->pag_mount, XFS_ERRTAG_AG_RESV_CRITICAL);
+ avail < mp->m_agbtree_maxlevels,
+ mp, XFS_ERRTAG_AG_RESV_CRITICAL);
}
/*
@@ -137,8 +138,8 @@ __xfs_ag_resv_free(
trace_xfs_ag_resv_free(pag, type, 0);
resv = xfs_perag_resv(pag, type);
- if (pag->pag_agno == 0)
- pag->pag_mount->m_ag_max_usable += resv->ar_asked;
+ if (pag_agno(pag) == 0)
+ pag_mount(pag)->m_ag_max_usable += resv->ar_asked;
/*
* RMAPBT blocks come from the AGFL and AGFL blocks are always
* considered "free", so whatever was reserved at mount time must be
@@ -148,7 +149,7 @@ __xfs_ag_resv_free(
oldresv = resv->ar_orig_reserved;
else
oldresv = resv->ar_reserved;
- xfs_add_fdblocks(pag->pag_mount, oldresv);
+ xfs_add_fdblocks(pag_mount(pag), oldresv);
resv->ar_reserved = 0;
resv->ar_asked = 0;
resv->ar_orig_reserved = 0;
@@ -170,7 +171,7 @@ __xfs_ag_resv_init(
xfs_extlen_t ask,
xfs_extlen_t used)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_ag_resv *resv;
int error;
xfs_extlen_t hidden_space;
@@ -206,11 +207,10 @@ __xfs_ag_resv_init(
else
error = xfs_dec_fdblocks(mp, hidden_space, true);
if (error) {
- trace_xfs_ag_resv_init_error(pag->pag_mount, pag->pag_agno,
- error, _RET_IP_);
+ trace_xfs_ag_resv_init_error(pag, error, _RET_IP_);
xfs_warn(mp,
"Per-AG reservation for AG %u failed. Filesystem may run out of space.",
- pag->pag_agno);
+ pag_agno(pag));
return error;
}
@@ -220,7 +220,7 @@ __xfs_ag_resv_init(
* counter, we only make the adjustment for AG 0. This assumes that
* there aren't any AGs hungrier for per-AG reservation than AG 0.
*/
- if (pag->pag_agno == 0)
+ if (pag_agno(pag) == 0)
mp->m_ag_max_usable -= ask;
resv = xfs_perag_resv(pag, type);
@@ -238,7 +238,7 @@ xfs_ag_resv_init(
struct xfs_perag *pag,
struct xfs_trans *tp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
xfs_extlen_t ask;
xfs_extlen_t used;
int error = 0, error2;
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 22bdbb3e9980..3d33e17f2e5c 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -275,7 +275,7 @@ xfs_alloc_complain_bad_rec(
xfs_warn(mp,
"%sbt record corruption in AG %d detected at %pS!",
- cur->bc_ops->name, cur->bc_ag.pag->pag_agno, fa);
+ cur->bc_ops->name, cur->bc_group->xg_gno, fa);
xfs_warn(mp,
"start block 0x%x block count 0x%x", irec->ar_startblock,
irec->ar_blockcount);
@@ -303,7 +303,7 @@ xfs_alloc_get_rec(
return error;
xfs_alloc_btrec_to_irec(rec, &irec);
- fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec);
+ fa = xfs_alloc_check_irec(to_perag(cur->bc_group), &irec);
if (fa)
return xfs_alloc_complain_bad_rec(cur, fa, &irec);
@@ -331,7 +331,8 @@ xfs_alloc_compute_aligned(
bool busy;
/* Trim busy sections out of found extent */
- busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
+ busy = xfs_extent_busy_trim(pag_group(args->pag), args->minlen,
+ args->maxlen, &bno, &len, busy_gen);
/*
* If we have a largish extent that happens to start before min_agbno,
@@ -539,7 +540,7 @@ static int
xfs_alloc_fixup_longest(
struct xfs_btree_cur *cnt_cur)
{
- struct xfs_perag *pag = cnt_cur->bc_ag.pag;
+ struct xfs_perag *pag = to_perag(cnt_cur->bc_group);
struct xfs_buf *bp = cnt_cur->bc_ag.agbp;
struct xfs_agf *agf = bp->b_addr;
xfs_extlen_t longest = 0;
@@ -799,7 +800,7 @@ xfs_agfl_verify(
* use it by using uncached buffers that don't have the perag attached
* so we can detect and avoid this problem.
*/
- if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno)
+ if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != pag_agno((bp->b_pag)))
return __this_address;
for (i = 0; i < xfs_agfl_size(mp); i++) {
@@ -879,13 +880,12 @@ xfs_alloc_read_agfl(
struct xfs_trans *tp,
struct xfs_buf **bpp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_buf *bp;
int error;
- error = xfs_trans_read_buf(
- mp, tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGFL_DADDR(mp)),
+ error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
+ XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGFL_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);
if (xfs_metadata_is_sick(error))
xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
@@ -1252,14 +1252,14 @@ xfs_alloc_ag_vextent_small(
if (fbno == NULLAGBLOCK)
goto out;
- xfs_extent_busy_reuse(args->mp, args->pag, fbno, 1,
+ xfs_extent_busy_reuse(pag_group(args->pag), fbno, 1,
(args->datatype & XFS_ALLOC_NOBUSY));
if (args->datatype & XFS_ALLOC_USERDATA) {
struct xfs_buf *bp;
error = xfs_trans_get_buf(args->tp, args->mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(args->mp, args->agno, fbno),
+ xfs_agbno_to_daddr(args->pag, fbno),
args->mp->m_bsize, 0, &bp);
if (error)
goto error;
@@ -1365,7 +1365,8 @@ xfs_alloc_ag_vextent_exact(
*/
tbno = fbno;
tlen = flen;
- xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
+ xfs_extent_busy_trim(pag_group(args->pag), args->minlen, args->maxlen,
+ &tbno, &tlen, &busy_gen);
/*
* Give up if the start of the extent is busy, or the freespace isn't
@@ -1758,8 +1759,9 @@ restart:
* the allocation can be retried.
*/
trace_xfs_alloc_near_busy(args);
- error = xfs_extent_busy_flush(args->tp, args->pag,
- acur.busy_gen, alloc_flags);
+ error = xfs_extent_busy_flush(args->tp,
+ pag_group(args->pag), acur.busy_gen,
+ alloc_flags);
if (error)
goto out;
@@ -1874,8 +1876,9 @@ restart:
* the allocation can be retried.
*/
trace_xfs_alloc_size_busy(args);
- error = xfs_extent_busy_flush(args->tp, args->pag,
- busy_gen, alloc_flags);
+ error = xfs_extent_busy_flush(args->tp,
+ pag_group(args->pag), busy_gen,
+ alloc_flags);
if (error)
goto error0;
@@ -1973,8 +1976,9 @@ restart:
* the allocation can be retried.
*/
trace_xfs_alloc_size_busy(args);
- error = xfs_extent_busy_flush(args->tp, args->pag,
- busy_gen, alloc_flags);
+ error = xfs_extent_busy_flush(args->tp,
+ pag_group(args->pag), busy_gen,
+ alloc_flags);
if (error)
goto error0;
@@ -2037,7 +2041,6 @@ int
xfs_free_ag_extent(
struct xfs_trans *tp,
struct xfs_buf *agbp,
- xfs_agnumber_t agno,
xfs_agblock_t bno,
xfs_extlen_t len,
const struct xfs_owner_info *oinfo,
@@ -2358,19 +2361,19 @@ xfs_free_ag_extent(
* Update the freespace totals in the ag and superblock.
*/
error = xfs_alloc_update_counters(tp, agbp, len);
- xfs_ag_resv_free_extent(agbp->b_pag, type, tp, len);
+ xfs_ag_resv_free_extent(pag, type, tp, len);
if (error)
goto error0;
XFS_STATS_INC(mp, xs_freex);
XFS_STATS_ADD(mp, xs_freeb, len);
- trace_xfs_free_extent(mp, agno, bno, len, type, haveleft, haveright);
+ trace_xfs_free_extent(pag, bno, len, type, haveleft, haveright);
return 0;
error0:
- trace_xfs_free_extent(mp, agno, bno, len, type, -1, -1);
+ trace_xfs_free_extent(pag, bno, len, type, -1, -1);
if (bno_cur)
xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR);
if (cnt_cur)
@@ -2429,7 +2432,7 @@ xfs_alloc_longest_free_extent(
* reservations and AGFL rules in place, we can return this extent.
*/
if (pag->pagf_longest > delta)
- return min_t(xfs_extlen_t, pag->pag_mount->m_ag_max_usable,
+ return min_t(xfs_extlen_t, pag_mount(pag)->m_ag_max_usable,
pag->pagf_longest - delta);
/* Otherwise, let the caller try for 1 block if there's space. */
@@ -2612,7 +2615,7 @@ xfs_agfl_reset(
xfs_warn(mp,
"WARNING: Reset corrupted AGFL on AG %u. %d blocks leaked. "
"Please unmount and run xfs_repair.",
- pag->pag_agno, pag->pagf_flcount);
+ pag_agno(pag), pag->pagf_flcount);
agf->agf_flfirst = 0;
agf->agf_fllast = cpu_to_be32(xfs_agfl_size(mp) - 1);
@@ -2645,8 +2648,17 @@ xfs_defer_extent_free(
ASSERT(!isnullstartblock(bno));
ASSERT(!(free_flags & ~XFS_FREE_EXTENT_ALL_FLAGS));
- if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
- return -EFSCORRUPTED;
+ if (free_flags & XFS_FREE_EXTENT_REALTIME) {
+ if (type != XFS_AG_RESV_NONE) {
+ ASSERT(type == XFS_AG_RESV_NONE);
+ return -EFSCORRUPTED;
+ }
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_rtbext(mp, bno, len)))
+ return -EFSCORRUPTED;
+ } else {
+ if (XFS_IS_CORRUPT(mp, !xfs_verify_fsbext(mp, bno, len)))
+ return -EFSCORRUPTED;
+ }
xefi = kmem_cache_zalloc(xfs_extfree_item_cache,
GFP_KERNEL | __GFP_NOFAIL);
@@ -2655,6 +2667,8 @@ xfs_defer_extent_free(
xefi->xefi_agresv = type;
if (free_flags & XFS_FREE_EXTENT_SKIP_DISCARD)
xefi->xefi_flags |= XFS_EFI_SKIP_DISCARD;
+ if (free_flags & XFS_FREE_EXTENT_REALTIME)
+ xefi->xefi_flags |= XFS_EFI_REALTIME;
if (oinfo) {
ASSERT(oinfo->oi_offset == 0);
@@ -2934,9 +2948,8 @@ xfs_alloc_fix_freelist(
* Deferring the free disconnects freeing up the AGFL slot from
* freeing the block.
*/
- error = xfs_free_extent_later(tp,
- XFS_AGB_TO_FSB(mp, args->agno, bno), 1,
- &targs.oinfo, XFS_AG_RESV_AGFL, 0);
+ error = xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, bno),
+ 1, &targs.oinfo, XFS_AG_RESV_AGFL, 0);
if (error)
goto out_agbp_relse;
}
@@ -3156,8 +3169,6 @@ xfs_alloc_put_freelist(
logflags |= XFS_AGF_BTREEBLKS;
}
- xfs_alloc_log_agf(tp, agbp, logflags);
-
ASSERT(be32_to_cpu(agf->agf_flcount) <= xfs_agfl_size(mp));
agfl_bno = xfs_buf_to_agfl_bno(agflbp);
@@ -3190,7 +3201,7 @@ xfs_validate_ag_length(
* use it by using uncached buffers that don't have the perag attached
* so we can detect and avoid this problem.
*/
- if (bp->b_pag && seqno != bp->b_pag->pag_agno)
+ if (bp->b_pag && seqno != pag_agno(bp->b_pag))
return __this_address;
/*
@@ -3359,13 +3370,13 @@ xfs_read_agf(
int flags,
struct xfs_buf **agfbpp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
int error;
- trace_xfs_read_agf(pag->pag_mount, pag->pag_agno);
+ trace_xfs_read_agf(pag);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGF_DADDR(mp)),
+ XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGF_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), flags, agfbpp, &xfs_agf_buf_ops);
if (xfs_metadata_is_sick(error))
xfs_ag_mark_sick(pag, XFS_SICK_AG_AGF);
@@ -3388,12 +3399,13 @@ xfs_alloc_read_agf(
int flags,
struct xfs_buf **agfbpp)
{
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_buf *agfbp;
struct xfs_agf *agf;
int error;
int allocbt_blks;
- trace_xfs_alloc_read_agf(pag->pag_mount, pag->pag_agno);
+ trace_xfs_alloc_read_agf(pag);
/* We don't support trylock when freeing. */
ASSERT((flags & (XFS_ALLOC_FLAG_FREEING | XFS_ALLOC_FLAG_TRYLOCK)) !=
@@ -3414,7 +3426,7 @@ xfs_alloc_read_agf(
pag->pagf_cnt_level = be32_to_cpu(agf->agf_cnt_level);
pag->pagf_rmap_level = be32_to_cpu(agf->agf_rmap_level);
pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
- if (xfs_agfl_needs_reset(pag->pag_mount, agf))
+ if (xfs_agfl_needs_reset(mp, agf))
set_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
else
clear_bit(XFS_AGSTATE_AGFL_NEEDS_RESET, &pag->pag_opstate);
@@ -3427,16 +3439,15 @@ xfs_alloc_read_agf(
* counter only tracks non-root blocks.
*/
allocbt_blks = pag->pagf_btreeblks;
- if (xfs_has_rmapbt(pag->pag_mount))
+ if (xfs_has_rmapbt(mp))
allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
if (allocbt_blks > 0)
- atomic64_add(allocbt_blks,
- &pag->pag_mount->m_allocbt_blks);
+ atomic64_add(allocbt_blks, &mp->m_allocbt_blks);
set_bit(XFS_AGSTATE_AGF_INIT, &pag->pag_opstate);
}
#ifdef DEBUG
- else if (!xfs_is_shutdown(pag->pag_mount)) {
+ else if (!xfs_is_shutdown(mp)) {
ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks));
ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks));
ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount));
@@ -3597,7 +3608,7 @@ xfs_alloc_vextent_finish(
goto out_drop_perag;
}
- args->fsbno = XFS_AGB_TO_FSB(mp, args->agno, args->agbno);
+ args->fsbno = xfs_agbno_to_fsb(args->pag, args->agbno);
ASSERT(args->len >= args->minlen);
ASSERT(args->len <= args->maxlen);
@@ -3618,8 +3629,8 @@ xfs_alloc_vextent_finish(
if (error)
goto out_drop_perag;
- ASSERT(!xfs_extent_busy_search(mp, args->pag, args->agbno,
- args->len));
+ ASSERT(!xfs_extent_busy_search(pag_group(args->pag),
+ args->agbno, args->len));
}
xfs_ag_resv_alloc_extent(args->pag, args->resv, args);
@@ -3649,21 +3660,20 @@ xfs_alloc_vextent_this_ag(
struct xfs_alloc_arg *args,
xfs_agnumber_t agno)
{
- struct xfs_mount *mp = args->mp;
xfs_agnumber_t minimum_agno;
uint32_t alloc_flags = 0;
int error;
ASSERT(args->pag != NULL);
- ASSERT(args->pag->pag_agno == agno);
+ ASSERT(pag_agno(args->pag) == agno);
args->agno = agno;
args->agbno = 0;
trace_xfs_alloc_vextent_this_ag(args);
- error = xfs_alloc_vextent_check_args(args, XFS_AGB_TO_FSB(mp, agno, 0),
- &minimum_agno);
+ error = xfs_alloc_vextent_check_args(args,
+ xfs_agbno_to_fsb(args->pag, 0), &minimum_agno);
if (error) {
if (error == -ENOSPC)
return 0;
@@ -3868,7 +3878,7 @@ xfs_alloc_vextent_exact_bno(
int error;
ASSERT(args->pag != NULL);
- ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
+ ASSERT(pag_agno(args->pag) == XFS_FSB_TO_AGNO(mp, target));
args->agno = XFS_FSB_TO_AGNO(mp, target);
args->agbno = XFS_FSB_TO_AGBNO(mp, target);
@@ -3907,7 +3917,7 @@ xfs_alloc_vextent_near_bno(
int error;
if (!needs_perag)
- ASSERT(args->pag->pag_agno == XFS_FSB_TO_AGNO(mp, target));
+ ASSERT(pag_agno(args->pag) == XFS_FSB_TO_AGNO(mp, target));
args->agno = XFS_FSB_TO_AGNO(mp, target);
args->agbno = XFS_FSB_TO_AGBNO(mp, target);
@@ -3944,7 +3954,7 @@ xfs_free_extent_fix_freelist(
memset(&args, 0, sizeof(struct xfs_alloc_arg));
args.tp = tp;
args.mp = tp->t_mountp;
- args.agno = pag->pag_agno;
+ args.agno = pag_agno(pag);
args.pag = pag;
/*
@@ -4012,14 +4022,13 @@ __xfs_free_extent(
goto err_release;
}
- error = xfs_free_ag_extent(tp, agbp, pag->pag_agno, agbno, len, oinfo,
- type);
+ error = xfs_free_ag_extent(tp, agbp, agbno, len, oinfo, type);
if (error)
goto err_release;
if (skip_discard)
busy_flags |= XFS_EXTENT_BUSY_SKIP_DISCARD;
- xfs_extent_busy_insert(tp, pag, agbno, len, busy_flags);
+ xfs_extent_busy_insert(tp, pag_group(pag), agbno, len, busy_flags);
return 0;
err_release:
@@ -4044,7 +4053,7 @@ xfs_alloc_query_range_helper(
xfs_failaddr_t fa;
xfs_alloc_btrec_to_irec(rec, &irec);
- fa = xfs_alloc_check_irec(cur->bc_ag.pag, &irec);
+ fa = xfs_alloc_check_irec(to_perag(cur->bc_group), &irec);
if (fa)
return xfs_alloc_complain_bad_rec(cur, fa, &irec);
diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h
index 0165452e7cd0..50ef79a1ed41 100644
--- a/fs/xfs/libxfs/xfs_alloc.h
+++ b/fs/xfs/libxfs/xfs_alloc.h
@@ -79,9 +79,8 @@ int xfs_alloc_put_freelist(struct xfs_perag *pag, struct xfs_trans *tp,
struct xfs_buf *agfbp, struct xfs_buf *agflbp,
xfs_agblock_t bno, int btreeblk);
int xfs_free_ag_extent(struct xfs_trans *tp, struct xfs_buf *agbp,
- xfs_agnumber_t agno, xfs_agblock_t bno,
- xfs_extlen_t len, const struct xfs_owner_info *oinfo,
- enum xfs_ag_resv_type type);
+ xfs_agblock_t bno, xfs_extlen_t len,
+ const struct xfs_owner_info *oinfo, enum xfs_ag_resv_type type);
/*
* Compute and fill in value of m_alloc_maxlevels.
@@ -238,7 +237,11 @@ int xfs_free_extent_later(struct xfs_trans *tp, xfs_fsblock_t bno,
/* Don't issue a discard for the blocks freed. */
#define XFS_FREE_EXTENT_SKIP_DISCARD (1U << 0)
-#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD)
+/* Free blocks on the realtime device. */
+#define XFS_FREE_EXTENT_REALTIME (1U << 1)
+
+#define XFS_FREE_EXTENT_ALL_FLAGS (XFS_FREE_EXTENT_SKIP_DISCARD | \
+ XFS_FREE_EXTENT_REALTIME)
/*
* List of extents to be free "later".
@@ -249,7 +252,7 @@ struct xfs_extent_free_item {
uint64_t xefi_owner;
xfs_fsblock_t xefi_startblock;/* starting fs block number */
xfs_extlen_t xefi_blockcount;/* number of blocks in extent */
- struct xfs_perag *xefi_pag;
+ struct xfs_group *xefi_group;
unsigned int xefi_flags;
enum xfs_ag_resv_type xefi_agresv;
};
@@ -258,6 +261,12 @@ struct xfs_extent_free_item {
#define XFS_EFI_ATTR_FORK (1U << 1) /* freeing attr fork block */
#define XFS_EFI_BMBT_BLOCK (1U << 2) /* freeing bmap btree block */
#define XFS_EFI_CANCELLED (1U << 3) /* dont actually free the space */
+#define XFS_EFI_REALTIME (1U << 4) /* freeing realtime extent */
+
+static inline bool xfs_efi_is_realtime(const struct xfs_extent_free_item *xefi)
+{
+ return xefi->xefi_flags & XFS_EFI_REALTIME;
+}
struct xfs_alloc_autoreap {
struct xfs_defer_pending *dfp;
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index aada676eee51..a4ac37ba5d51 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -28,7 +28,7 @@ xfs_bnobt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_bnobt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp,
- cur->bc_ag.pag);
+ to_perag(cur->bc_group));
}
STATIC struct xfs_btree_cur *
@@ -36,29 +36,29 @@ xfs_cntbt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_cntbt_init_cursor(cur->bc_mp, cur->bc_tp, cur->bc_ag.agbp,
- cur->bc_ag.pag);
+ to_perag(cur->bc_group));
}
-
STATIC void
xfs_allocbt_set_root(
struct xfs_btree_cur *cur,
const union xfs_btree_ptr *ptr,
int inc)
{
- struct xfs_buf *agbp = cur->bc_ag.agbp;
- struct xfs_agf *agf = agbp->b_addr;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
+ struct xfs_buf *agbp = cur->bc_ag.agbp;
+ struct xfs_agf *agf = agbp->b_addr;
ASSERT(ptr->s != 0);
if (xfs_btree_is_bno(cur->bc_ops)) {
agf->agf_bno_root = ptr->s;
be32_add_cpu(&agf->agf_bno_level, inc);
- cur->bc_ag.pag->pagf_bno_level += inc;
+ pag->pagf_bno_level += inc;
} else {
agf->agf_cnt_root = ptr->s;
be32_add_cpu(&agf->agf_cnt_level, inc);
- cur->bc_ag.pag->pagf_cnt_level += inc;
+ pag->pagf_cnt_level += inc;
}
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
@@ -75,7 +75,7 @@ xfs_allocbt_alloc_block(
xfs_agblock_t bno;
/* Allocate the new block from the freelist. If we can't, give up. */
- error = xfs_alloc_get_freelist(cur->bc_ag.pag, cur->bc_tp,
+ error = xfs_alloc_get_freelist(to_perag(cur->bc_group), cur->bc_tp,
cur->bc_ag.agbp, &bno, 1);
if (error)
return error;
@@ -86,7 +86,7 @@ xfs_allocbt_alloc_block(
}
atomic64_inc(&cur->bc_mp->m_allocbt_blks);
- xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.pag, bno, 1, false);
+ xfs_extent_busy_reuse(cur->bc_group, bno, 1, false);
new->s = cpu_to_be32(bno);
@@ -104,13 +104,13 @@ xfs_allocbt_free_block(
int error;
bno = xfs_daddr_to_agbno(cur->bc_mp, xfs_buf_daddr(bp));
- error = xfs_alloc_put_freelist(cur->bc_ag.pag, cur->bc_tp, agbp, NULL,
- bno, 1);
+ error = xfs_alloc_put_freelist(to_perag(cur->bc_group), cur->bc_tp,
+ agbp, NULL, bno, 1);
if (error)
return error;
atomic64_dec(&cur->bc_mp->m_allocbt_blks);
- xfs_extent_busy_insert(cur->bc_tp, agbp->b_pag, bno, 1,
+ xfs_extent_busy_insert(cur->bc_tp, pag_group(agbp->b_pag), bno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD);
return 0;
}
@@ -178,7 +178,7 @@ xfs_allocbt_init_ptr_from_cur(
{
struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
- ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agf->agf_seqno));
if (xfs_btree_is_bno(cur->bc_ops))
ptr->s = agf->agf_bno_root;
@@ -492,7 +492,7 @@ xfs_bnobt_init_cursor(
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_bnobt_ops,
mp->m_alloc_maxlevels, xfs_allocbt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_ag.agbp = agbp;
if (agbp) {
struct xfs_agf *agf = agbp->b_addr;
@@ -518,7 +518,7 @@ xfs_cntbt_init_cursor(
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_cntbt_ops,
mp->m_alloc_maxlevels, xfs_allocbt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_ag.agbp = agbp;
if (agbp) {
struct xfs_agf *agf = agbp->b_addr;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index c63da14eee04..17875ad865f5 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -1004,7 +1004,10 @@ xfs_attr_add_fork(
unsigned int blks; /* space reservation */
int error; /* error return value */
- ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
+ if (xfs_is_metadir_inode(ip))
+ ASSERT(XFS_IS_DQDETACHED(ip));
+ else
+ ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
blks = XFS_ADDAFORK_SPACE_RES(mp);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 36dd08d13293..9052839305e2 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -40,6 +40,7 @@
#include "xfs_bmap_item.h"
#include "xfs_symlink_remote.h"
#include "xfs_inode_util.h"
+#include "xfs_rtgroup.h"
struct kmem_cache *xfs_bmap_intent_cache;
@@ -1042,7 +1043,10 @@ xfs_bmap_add_attrfork(
int error; /* error return value */
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
- ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
+ if (xfs_is_metadir_inode(ip))
+ ASSERT(XFS_IS_DQDETACHED(ip));
+ else
+ ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
ASSERT(!xfs_inode_has_attr_fork(ip));
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
@@ -1423,6 +1427,24 @@ xfs_bmap_last_offset(
* Extent tree manipulation functions used during allocation.
*/
+static inline bool
+xfs_bmap_same_rtgroup(
+ struct xfs_inode *ip,
+ int whichfork,
+ struct xfs_bmbt_irec *left,
+ struct xfs_bmbt_irec *right)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ if (xfs_ifork_is_realtime(ip, whichfork) && xfs_has_rtgroups(mp)) {
+ if (xfs_rtb_to_rgno(mp, left->br_startblock) !=
+ xfs_rtb_to_rgno(mp, right->br_startblock))
+ return false;
+ }
+
+ return true;
+}
+
/*
* Convert a delayed allocation to a real allocation.
*/
@@ -1492,7 +1514,8 @@ xfs_bmap_add_extent_delay_real(
LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
LEFT.br_state == new->br_state &&
- LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
+ xfs_bmap_same_rtgroup(bma->ip, whichfork, &LEFT, new))
state |= BMAP_LEFT_CONTIG;
/*
@@ -1516,7 +1539,8 @@ xfs_bmap_add_extent_delay_real(
(BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING) ||
LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
- <= XFS_MAX_BMBT_EXTLEN))
+ <= XFS_MAX_BMBT_EXTLEN) &&
+ xfs_bmap_same_rtgroup(bma->ip, whichfork, new, &RIGHT))
state |= BMAP_RIGHT_CONTIG;
error = 0;
@@ -2061,7 +2085,8 @@ xfs_bmap_add_extent_unwritten_real(
LEFT.br_startoff + LEFT.br_blockcount == new->br_startoff &&
LEFT.br_startblock + LEFT.br_blockcount == new->br_startblock &&
LEFT.br_state == new->br_state &&
- LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ LEFT.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
+ xfs_bmap_same_rtgroup(ip, whichfork, &LEFT, new))
state |= BMAP_LEFT_CONTIG;
/*
@@ -2085,7 +2110,8 @@ xfs_bmap_add_extent_unwritten_real(
(BMAP_LEFT_CONTIG | BMAP_LEFT_FILLING |
BMAP_RIGHT_FILLING) ||
LEFT.br_blockcount + new->br_blockcount + RIGHT.br_blockcount
- <= XFS_MAX_BMBT_EXTLEN))
+ <= XFS_MAX_BMBT_EXTLEN) &&
+ xfs_bmap_same_rtgroup(ip, whichfork, new, &RIGHT))
state |= BMAP_RIGHT_CONTIG;
/*
@@ -2594,7 +2620,8 @@ xfs_bmap_add_extent_hole_delay(
*/
if ((state & BMAP_LEFT_VALID) && (state & BMAP_LEFT_DELAY) &&
left.br_startoff + left.br_blockcount == new->br_startoff &&
- left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
+ xfs_bmap_same_rtgroup(ip, whichfork, &left, new))
state |= BMAP_LEFT_CONTIG;
if ((state & BMAP_RIGHT_VALID) && (state & BMAP_RIGHT_DELAY) &&
@@ -2602,7 +2629,8 @@ xfs_bmap_add_extent_hole_delay(
new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
(!(state & BMAP_LEFT_CONTIG) ||
(left.br_blockcount + new->br_blockcount +
- right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)))
+ right.br_blockcount <= XFS_MAX_BMBT_EXTLEN)) &&
+ xfs_bmap_same_rtgroup(ip, whichfork, new, &right))
state |= BMAP_RIGHT_CONTIG;
/*
@@ -2745,7 +2773,8 @@ xfs_bmap_add_extent_hole_real(
left.br_startoff + left.br_blockcount == new->br_startoff &&
left.br_startblock + left.br_blockcount == new->br_startblock &&
left.br_state == new->br_state &&
- left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN)
+ left.br_blockcount + new->br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
+ xfs_bmap_same_rtgroup(ip, whichfork, &left, new))
state |= BMAP_LEFT_CONTIG;
if ((state & BMAP_RIGHT_VALID) && !(state & BMAP_RIGHT_DELAY) &&
@@ -2755,7 +2784,8 @@ xfs_bmap_add_extent_hole_real(
new->br_blockcount + right.br_blockcount <= XFS_MAX_BMBT_EXTLEN &&
(!(state & BMAP_LEFT_CONTIG) ||
left.br_blockcount + new->br_blockcount +
- right.br_blockcount <= XFS_MAX_BMBT_EXTLEN))
+ right.br_blockcount <= XFS_MAX_BMBT_EXTLEN) &&
+ xfs_bmap_same_rtgroup(ip, whichfork, new, &right))
state |= BMAP_RIGHT_CONTIG;
error = 0;
@@ -3121,8 +3151,15 @@ xfs_bmap_adjacent_valid(
struct xfs_mount *mp = ap->ip->i_mount;
if (XFS_IS_REALTIME_INODE(ap->ip) &&
- (ap->datatype & XFS_ALLOC_USERDATA))
- return x < mp->m_sb.sb_rblocks;
+ (ap->datatype & XFS_ALLOC_USERDATA)) {
+ if (!xfs_has_rtgroups(mp))
+ return x < mp->m_sb.sb_rblocks;
+
+ return xfs_rtb_to_rgno(mp, x) == xfs_rtb_to_rgno(mp, y) &&
+ xfs_rtb_to_rgno(mp, x) < mp->m_sb.sb_rgcount &&
+ xfs_rtb_to_rtx(mp, x) < mp->m_sb.sb_rgextents;
+
+ }
return XFS_FSB_TO_AGNO(mp, x) == XFS_FSB_TO_AGNO(mp, y) &&
XFS_FSB_TO_AGNO(mp, x) < mp->m_sb.sb_agcount &&
@@ -3280,7 +3317,7 @@ xfs_bmap_longest_free_extent(
}
longest = xfs_alloc_longest_free_extent(pag,
- xfs_alloc_min_freelist(pag->pag_mount, pag),
+ xfs_alloc_min_freelist(pag_mount(pag), pag),
xfs_ag_resv_needed(pag, XFS_AG_RESV_NONE));
if (*blen < longest)
*blen = longest;
@@ -4091,7 +4128,7 @@ retry:
fdblocks = indlen;
if (XFS_IS_REALTIME_INODE(ip)) {
- error = xfs_dec_frextents(mp, xfs_rtb_to_rtx(mp, alen));
+ error = xfs_dec_frextents(mp, xfs_blen_to_rtbxlen(mp, alen));
if (error)
goto out_unreserve_quota;
} else {
@@ -4126,7 +4163,7 @@ retry:
out_unreserve_frextents:
if (XFS_IS_REALTIME_INODE(ip))
- xfs_add_frextents(mp, xfs_rtb_to_rtx(mp, alen));
+ xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, alen));
out_unreserve_quota:
if (XFS_IS_QUOTA_ON(mp))
xfs_quota_unreserve_blkres(ip, alen);
@@ -5034,7 +5071,7 @@ xfs_bmap_del_extent_delay(
fdblocks = da_diff;
if (isrt)
- xfs_add_frextents(mp, xfs_rtb_to_rtx(mp, del->br_blockcount));
+ xfs_add_frextents(mp, xfs_blen_to_rtbxlen(mp, del->br_blockcount));
else
fdblocks += del->br_blockcount;
@@ -5113,6 +5150,34 @@ xfs_bmap_del_extent_cow(
ip->i_delayed_blks -= del->br_blockcount;
}
+static int
+xfs_bmap_free_rtblocks(
+ struct xfs_trans *tp,
+ struct xfs_bmbt_irec *del)
+{
+ struct xfs_rtgroup *rtg;
+ int error;
+
+ rtg = xfs_rtgroup_grab(tp->t_mountp, 0);
+ if (!rtg)
+ return -EIO;
+
+ /*
+ * Ensure the bitmap and summary inodes are locked and joined to the
+ * transaction before modifying them.
+ */
+ if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) {
+ tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED;
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP);
+ xfs_rtgroup_trans_join(tp, rtg, XFS_RTGLOCK_BITMAP);
+ }
+
+ error = xfs_rtfree_blocks(tp, rtg, del->br_startblock,
+ del->br_blockcount);
+ xfs_rtgroup_rele(rtg);
+ return error;
+}
+
/*
* Called by xfs_bmapi to update file extent records and the btree
* after removing space.
@@ -5325,20 +5390,12 @@ xfs_bmap_del_extent_real(
* If we need to, add to list of extents to delete.
*/
if (!(bflags & XFS_BMAPI_REMAP)) {
+ bool isrt = xfs_ifork_is_realtime(ip, whichfork);
+
if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) {
xfs_refcount_decrease_extent(tp, del);
- } else if (xfs_ifork_is_realtime(ip, whichfork)) {
- /*
- * Ensure the bitmap and summary inodes are locked
- * and joined to the transaction before modifying them.
- */
- if (!(tp->t_flags & XFS_TRANS_RTBITMAP_LOCKED)) {
- tp->t_flags |= XFS_TRANS_RTBITMAP_LOCKED;
- xfs_rtbitmap_lock(mp);
- xfs_rtbitmap_trans_join(tp);
- }
- error = xfs_rtfree_blocks(tp, del->br_startblock,
- del->br_blockcount);
+ } else if (isrt && !xfs_has_rtgroups(mp)) {
+ error = xfs_bmap_free_rtblocks(tp, del);
} else {
unsigned int efi_flags = 0;
@@ -5346,6 +5403,19 @@ xfs_bmap_del_extent_real(
del->br_state == XFS_EXT_UNWRITTEN)
efi_flags |= XFS_FREE_EXTENT_SKIP_DISCARD;
+ /*
+ * Historically, we did not use EFIs to free realtime
+ * extents. However, when reverse mapping is enabled,
+ * we must maintain the same order of operations as the
+ * data device, which is: Remove the file mapping,
+ * remove the reverse mapping, and then free the
+ * blocks. Reflink for realtime volumes requires the
+ * same sort of ordering. Both features rely on
+ * rtgroups, so let's gate rt EFI usage on rtgroups.
+ */
+ if (isrt)
+ efi_flags |= XFS_FREE_EXTENT_REALTIME;
+
error = xfs_free_extent_later(tp, del->br_startblock,
del->br_blockcount, NULL,
XFS_AG_RESV_NONE, efi_flags);
@@ -5694,6 +5764,8 @@ xfs_bunmapi(
*/
STATIC bool
xfs_bmse_can_merge(
+ struct xfs_inode *ip,
+ int whichfork,
struct xfs_bmbt_irec *left, /* preceding extent */
struct xfs_bmbt_irec *got, /* current extent to shift */
xfs_fileoff_t shift) /* shift fsb */
@@ -5709,7 +5781,8 @@ xfs_bmse_can_merge(
if ((left->br_startoff + left->br_blockcount != startoff) ||
(left->br_startblock + left->br_blockcount != got->br_startblock) ||
(left->br_state != got->br_state) ||
- (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN))
+ (left->br_blockcount + got->br_blockcount > XFS_MAX_BMBT_EXTLEN) ||
+ !xfs_bmap_same_rtgroup(ip, whichfork, left, got))
return false;
return true;
@@ -5745,7 +5818,7 @@ xfs_bmse_merge(
blockcount = left->br_blockcount + got->br_blockcount;
xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_ILOCK_EXCL);
- ASSERT(xfs_bmse_can_merge(left, got, shift));
+ ASSERT(xfs_bmse_can_merge(ip, whichfork, left, got, shift));
new = *left;
new.br_blockcount = blockcount;
@@ -5907,7 +5980,8 @@ xfs_bmap_collapse_extents(
goto del_cursor;
}
- if (xfs_bmse_can_merge(&prev, &got, offset_shift_fsb)) {
+ if (xfs_bmse_can_merge(ip, whichfork, &prev, &got,
+ offset_shift_fsb)) {
error = xfs_bmse_merge(tp, ip, whichfork,
offset_shift_fsb, &icur, &got, &prev,
cur, &logflags);
@@ -6043,7 +6117,8 @@ xfs_bmap_insert_extents(
* never find mergeable extents in this scenario. Check anyways
* and warn if we encounter two extents that could be one.
*/
- if (xfs_bmse_can_merge(&got, &next, offset_shift_fsb))
+ if (xfs_bmse_can_merge(ip, whichfork, &got, &next,
+ offset_shift_fsb))
WARN_ON_ONCE(1);
}
diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h
index 7592d46e97c6..4b721d935994 100644
--- a/fs/xfs/libxfs/xfs_bmap.h
+++ b/fs/xfs/libxfs/xfs_bmap.h
@@ -248,7 +248,7 @@ struct xfs_bmap_intent {
enum xfs_bmap_intent_type bi_type;
int bi_whichfork;
struct xfs_inode *bi_owner;
- struct xfs_perag *bi_pag;
+ struct xfs_group *bi_group;
struct xfs_bmbt_irec bi_bmap;
};
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index a5c4af148853..2b5fc5fd1643 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -225,7 +225,7 @@ __xfs_btree_check_agblock(
struct xfs_buf *bp)
{
struct xfs_mount *mp = cur->bc_mp;
- struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
xfs_failaddr_t fa;
xfs_agblock_t agbno;
@@ -331,7 +331,7 @@ __xfs_btree_check_ptr(
return -EFSCORRUPTED;
break;
case XFS_BTREE_TYPE_AG:
- if (!xfs_verify_agbno(cur->bc_ag.pag,
+ if (!xfs_verify_agbno(to_perag(cur->bc_group),
be32_to_cpu((&ptr->s)[index])))
return -EFSCORRUPTED;
break;
@@ -372,7 +372,7 @@ xfs_btree_check_ptr(
case XFS_BTREE_TYPE_AG:
xfs_err(cur->bc_mp,
"AG %u: Corrupt %sbt pointer at level %d index %d.",
- cur->bc_ag.pag->pag_agno, cur->bc_ops->name,
+ cur->bc_group->xg_gno, cur->bc_ops->name,
level, index);
break;
}
@@ -523,20 +523,8 @@ xfs_btree_del_cursor(
ASSERT(!xfs_btree_is_bmap(cur->bc_ops) || cur->bc_bmap.allocated == 0 ||
xfs_is_shutdown(cur->bc_mp) || error != 0);
- switch (cur->bc_ops->type) {
- case XFS_BTREE_TYPE_AG:
- if (cur->bc_ag.pag)
- xfs_perag_put(cur->bc_ag.pag);
- break;
- case XFS_BTREE_TYPE_INODE:
- /* nothing to do */
- break;
- case XFS_BTREE_TYPE_MEM:
- if (cur->bc_mem.pag)
- xfs_perag_put(cur->bc_mem.pag);
- break;
- }
-
+ if (cur->bc_group)
+ xfs_group_put(cur->bc_group);
kmem_cache_free(cur->bc_cache, cur);
}
@@ -1017,22 +1005,22 @@ xfs_btree_readahead_agblock(
struct xfs_btree_block *block)
{
struct xfs_mount *mp = cur->bc_mp;
- xfs_agnumber_t agno = cur->bc_ag.pag->pag_agno;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
xfs_agblock_t left = be32_to_cpu(block->bb_u.s.bb_leftsib);
xfs_agblock_t right = be32_to_cpu(block->bb_u.s.bb_rightsib);
int rval = 0;
if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {
xfs_buf_readahead(mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(mp, agno, left),
- mp->m_bsize, cur->bc_ops->buf_ops);
+ xfs_agbno_to_daddr(pag, left), mp->m_bsize,
+ cur->bc_ops->buf_ops);
rval++;
}
if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {
xfs_buf_readahead(mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(mp, agno, right),
- mp->m_bsize, cur->bc_ops->buf_ops);
+ xfs_agbno_to_daddr(pag, right), mp->m_bsize,
+ cur->bc_ops->buf_ops);
rval++;
}
@@ -1091,7 +1079,7 @@ xfs_btree_ptr_to_daddr(
switch (cur->bc_ops->type) {
case XFS_BTREE_TYPE_AG:
- *daddr = XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_ag.pag->pag_agno,
+ *daddr = xfs_agbno_to_daddr(to_perag(cur->bc_group),
be32_to_cpu(ptr->s));
break;
case XFS_BTREE_TYPE_INODE:
@@ -1313,7 +1301,7 @@ xfs_btree_owner(
case XFS_BTREE_TYPE_INODE:
return cur->bc_ino.ip->i_ino;
case XFS_BTREE_TYPE_AG:
- return cur->bc_ag.pag->pag_agno;
+ return cur->bc_group->xg_gno;
default:
ASSERT(0);
return 0;
@@ -4745,7 +4733,7 @@ xfs_btree_agblock_v5hdr_verify(
return __this_address;
if (block->bb_u.s.bb_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
return __this_address;
- if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag->pag_agno)
+ if (pag && be32_to_cpu(block->bb_u.s.bb_owner) != pag_agno(pag))
return __this_address;
return NULL;
}
diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h
index 10b7ddc3b2b3..3b739459ebb0 100644
--- a/fs/xfs/libxfs/xfs_btree.h
+++ b/fs/xfs/libxfs/xfs_btree.h
@@ -254,6 +254,7 @@ struct xfs_btree_cur
union xfs_btree_irec bc_rec; /* current insert/search record value */
uint8_t bc_nlevels; /* number of levels in the tree */
uint8_t bc_maxlevels; /* maximum levels for this btree type */
+ struct xfs_group *bc_group;
/* per-type information */
union {
@@ -264,13 +265,11 @@ struct xfs_btree_cur
struct xbtree_ifakeroot *ifake; /* for staging cursor */
} bc_ino;
struct {
- struct xfs_perag *pag;
struct xfs_buf *agbp;
struct xbtree_afakeroot *afake; /* for staging cursor */
} bc_ag;
struct {
struct xfbtree *xfbtree;
- struct xfs_perag *pag;
} bc_mem;
};
diff --git a/fs/xfs/libxfs/xfs_btree_mem.c b/fs/xfs/libxfs/xfs_btree_mem.c
index 036061fe32cc..df3d613675a1 100644
--- a/fs/xfs/libxfs/xfs_btree_mem.c
+++ b/fs/xfs/libxfs/xfs_btree_mem.c
@@ -57,10 +57,8 @@ xfbtree_dup_cursor(
ncur->bc_flags = cur->bc_flags;
ncur->bc_nlevels = cur->bc_nlevels;
ncur->bc_mem.xfbtree = cur->bc_mem.xfbtree;
-
- if (cur->bc_mem.pag)
- ncur->bc_mem.pag = xfs_perag_hold(cur->bc_mem.pag);
-
+ if (cur->bc_group)
+ ncur->bc_group = xfs_group_hold(cur->bc_group);
return ncur;
}
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 2cd212ad2c1d..5b377cbbb1f7 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -846,6 +846,12 @@ xfs_defer_add(
ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+ if (!ops->finish_item) {
+ ASSERT(ops->finish_item != NULL);
+ xfs_force_shutdown(tp->t_mountp, SHUTDOWN_CORRUPT_INCORE);
+ return NULL;
+ }
+
dfp = xfs_defer_find_last(tp, ops);
if (!dfp || !xfs_defer_can_append(dfp, ops))
dfp = xfs_defer_alloc(&tp->t_dfops, ops);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index 8b338031e487..ec51b8465e61 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -71,6 +71,7 @@ extern const struct xfs_defer_op_type xfs_refcount_update_defer_type;
extern const struct xfs_defer_op_type xfs_rmap_update_defer_type;
extern const struct xfs_defer_op_type xfs_extent_free_defer_type;
extern const struct xfs_defer_op_type xfs_agfl_free_defer_type;
+extern const struct xfs_defer_op_type xfs_rtextent_free_defer_type;
extern const struct xfs_defer_op_type xfs_attr_defer_type;
extern const struct xfs_defer_op_type xfs_exchmaps_defer_type;
diff --git a/fs/xfs/libxfs/xfs_dquot_buf.c b/fs/xfs/libxfs/xfs_dquot_buf.c
index 15a362e2f5ea..dceef2abd4e2 100644
--- a/fs/xfs/libxfs/xfs_dquot_buf.c
+++ b/fs/xfs/libxfs/xfs_dquot_buf.c
@@ -16,6 +16,9 @@
#include "xfs_trans.h"
#include "xfs_qm.h"
#include "xfs_error.h"
+#include "xfs_health.h"
+#include "xfs_metadir.h"
+#include "xfs_metafile.h"
int
xfs_calc_dquots_per_chunk(
@@ -323,3 +326,190 @@ xfs_dquot_to_disk_ts(
return cpu_to_be32(t);
}
+
+inline unsigned int
+xfs_dqinode_sick_mask(xfs_dqtype_t type)
+{
+ switch (type) {
+ case XFS_DQTYPE_USER:
+ return XFS_SICK_FS_UQUOTA;
+ case XFS_DQTYPE_GROUP:
+ return XFS_SICK_FS_GQUOTA;
+ case XFS_DQTYPE_PROJ:
+ return XFS_SICK_FS_PQUOTA;
+ }
+
+ ASSERT(0);
+ return 0;
+}
+
+/*
+ * Load the inode for a given type of quota, assuming that the sb fields have
+ * been sorted out. This is not true when switching quota types on a V4
+ * filesystem, so do not use this function for that. If metadir is enabled,
+ * @dp must be the /quota metadir.
+ *
+ * Returns -ENOENT if the quota inode field is NULLFSINO; 0 and an inode on
+ * success; or a negative errno.
+ */
+int
+xfs_dqinode_load(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ xfs_dqtype_t type,
+ struct xfs_inode **ipp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *ip;
+ enum xfs_metafile_type metafile_type = xfs_dqinode_metafile_type(type);
+ int error;
+
+ if (!xfs_has_metadir(mp)) {
+ xfs_ino_t ino;
+
+ switch (type) {
+ case XFS_DQTYPE_USER:
+ ino = mp->m_sb.sb_uquotino;
+ break;
+ case XFS_DQTYPE_GROUP:
+ ino = mp->m_sb.sb_gquotino;
+ break;
+ case XFS_DQTYPE_PROJ:
+ ino = mp->m_sb.sb_pquotino;
+ break;
+ default:
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ /* Should have set 0 to NULLFSINO when loading superblock */
+ if (ino == NULLFSINO)
+ return -ENOENT;
+
+ error = xfs_trans_metafile_iget(tp, ino, metafile_type, &ip);
+ } else {
+ error = xfs_metadir_load(tp, dp, xfs_dqinode_path(type),
+ metafile_type, &ip);
+ if (error == -ENOENT)
+ return error;
+ }
+ if (error) {
+ if (xfs_metadata_is_sick(error))
+ xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type));
+ return error;
+ }
+
+ if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
+ ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
+ xfs_irele(ip);
+ xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type));
+ return -EFSCORRUPTED;
+ }
+
+ if (XFS_IS_CORRUPT(mp, ip->i_projid != 0)) {
+ xfs_irele(ip);
+ xfs_fs_mark_sick(mp, xfs_dqinode_sick_mask(type));
+ return -EFSCORRUPTED;
+ }
+
+ *ipp = ip;
+ return 0;
+}
+
+/* Create a metadata directory quota inode. */
+int
+xfs_dqinode_metadir_create(
+ struct xfs_inode *dp,
+ xfs_dqtype_t type,
+ struct xfs_inode **ipp)
+{
+ struct xfs_metadir_update upd = {
+ .dp = dp,
+ .metafile_type = xfs_dqinode_metafile_type(type),
+ .path = xfs_dqinode_path(type),
+ };
+ int error;
+
+ error = xfs_metadir_start_create(&upd);
+ if (error)
+ return error;
+
+ error = xfs_metadir_create(&upd, S_IFREG);
+ if (error)
+ return error;
+
+ xfs_trans_log_inode(upd.tp, upd.ip, XFS_ILOG_CORE);
+
+ error = xfs_metadir_commit(&upd);
+ if (error)
+ return error;
+
+ xfs_finish_inode_setup(upd.ip);
+ *ipp = upd.ip;
+ return 0;
+}
+
+#ifndef __KERNEL__
+/* Link a metadata directory quota inode. */
+int
+xfs_dqinode_metadir_link(
+ struct xfs_inode *dp,
+ xfs_dqtype_t type,
+ struct xfs_inode *ip)
+{
+ struct xfs_metadir_update upd = {
+ .dp = dp,
+ .metafile_type = xfs_dqinode_metafile_type(type),
+ .path = xfs_dqinode_path(type),
+ .ip = ip,
+ };
+ int error;
+
+ error = xfs_metadir_start_link(&upd);
+ if (error)
+ return error;
+
+ error = xfs_metadir_link(&upd);
+ if (error)
+ return error;
+
+ xfs_trans_log_inode(upd.tp, upd.ip, XFS_ILOG_CORE);
+
+ return xfs_metadir_commit(&upd);
+}
+#endif /* __KERNEL__ */
+
+/* Create the parent directory for all quota inodes and load it. */
+int
+xfs_dqinode_mkdir_parent(
+ struct xfs_mount *mp,
+ struct xfs_inode **dpp)
+{
+ if (!mp->m_metadirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ return xfs_metadir_mkdir(mp->m_metadirip, "quota", dpp);
+}
+
+/*
+ * Load the parent directory of all quota inodes. Pass the inode to the caller
+ * because quota functions (e.g. QUOTARM) can be called on the quota files even
+ * if quotas are not enabled.
+ */
+int
+xfs_dqinode_load_parent(
+ struct xfs_trans *tp,
+ struct xfs_inode **dpp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+
+ if (!mp->m_metadirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ return xfs_metadir_load(tp, mp->m_metadirip, "quota", XFS_METAFILE_DIR,
+ dpp);
+}
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index e1bfee0c3b1a..4d47a3e723aa 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -174,6 +174,14 @@ typedef struct xfs_sb {
xfs_lsn_t sb_lsn; /* last write sequence */
uuid_t sb_meta_uuid; /* metadata file system unique id */
+ xfs_ino_t sb_metadirino; /* metadata directory tree root */
+
+ xfs_rgnumber_t sb_rgcount; /* number of realtime groups */
+ xfs_rtxlen_t sb_rgextents; /* size of a realtime group in rtx */
+
+ uint8_t sb_rgblklog; /* rt group number shift */
+ uint8_t sb_pad[7]; /* zeroes */
+
/* must be padded to 64 bit alignment */
} xfs_sb_t;
@@ -259,7 +267,19 @@ struct xfs_dsb {
__be64 sb_lsn; /* last write sequence */
uuid_t sb_meta_uuid; /* metadata file system unique id */
- /* must be padded to 64 bit alignment */
+ __be64 sb_metadirino; /* metadata directory tree root */
+ __be32 sb_rgcount; /* # of realtime groups */
+ __be32 sb_rgextents; /* size of rtgroup in rtx */
+
+ __u8 sb_rgblklog; /* rt group number shift */
+ __u8 sb_pad[7]; /* zeroes */
+
+ /*
+ * The size of this structure must be padded to 64 bit alignment.
+ *
+ * NOTE: Don't forget to update secondary_sb_whack in xfs_repair when
+ * adding new fields here.
+ */
};
#define XFS_SB_CRC_OFF offsetof(struct xfs_dsb, sb_crc)
@@ -278,7 +298,7 @@ struct xfs_dsb {
#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
-static inline bool xfs_sb_is_v5(struct xfs_sb *sbp)
+static inline bool xfs_sb_is_v5(const struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
}
@@ -287,12 +307,12 @@ static inline bool xfs_sb_is_v5(struct xfs_sb *sbp)
* Detect a mismatched features2 field. Older kernels read/wrote
* this into the wrong slot, so to be safe we keep them in sync.
*/
-static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp)
+static inline bool xfs_sb_has_mismatched_features2(const struct xfs_sb *sbp)
{
return sbp->sb_bad_features2 != sbp->sb_features2;
}
-static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp)
+static inline bool xfs_sb_version_hasmorebits(const struct xfs_sb *sbp)
{
return xfs_sb_is_v5(sbp) ||
(sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
@@ -342,8 +362,8 @@ static inline void xfs_sb_version_addprojid32(struct xfs_sb *sbp)
#define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL
static inline bool
xfs_sb_has_compat_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_compat & feature) != 0;
}
@@ -360,8 +380,8 @@ xfs_sb_has_compat_feature(
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_ro_compat & feature) != 0;
}
@@ -374,6 +394,7 @@ xfs_sb_has_ro_compat_feature(
#define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */
#define XFS_SB_FEAT_INCOMPAT_EXCHRANGE (1 << 6) /* exchangerange supported */
#define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 7) /* parent pointers */
+#define XFS_SB_FEAT_INCOMPAT_METADIR (1 << 8) /* metadata dir tree */
#define XFS_SB_FEAT_INCOMPAT_ALL \
(XFS_SB_FEAT_INCOMPAT_FTYPE | \
XFS_SB_FEAT_INCOMPAT_SPINODES | \
@@ -382,13 +403,14 @@ xfs_sb_has_ro_compat_feature(
XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR | \
XFS_SB_FEAT_INCOMPAT_NREXT64 | \
XFS_SB_FEAT_INCOMPAT_EXCHRANGE | \
- XFS_SB_FEAT_INCOMPAT_PARENT)
+ XFS_SB_FEAT_INCOMPAT_PARENT | \
+ XFS_SB_FEAT_INCOMPAT_METADIR)
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool
xfs_sb_has_incompat_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_incompat & feature) != 0;
}
@@ -399,8 +421,8 @@ xfs_sb_has_incompat_feature(
#define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL
static inline bool
xfs_sb_has_incompat_log_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_log_incompat & feature) != 0;
}
@@ -420,7 +442,7 @@ xfs_sb_add_incompat_log_features(
sbp->sb_features_log_incompat |= features;
}
-static inline bool xfs_sb_version_haslogxattrs(struct xfs_sb *sbp)
+static inline bool xfs_sb_version_haslogxattrs(const struct xfs_sb *sbp)
{
return xfs_sb_is_v5(sbp) && (sbp->sb_features_log_incompat &
XFS_SB_FEAT_INCOMPAT_LOG_XATTRS);
@@ -694,21 +716,58 @@ struct xfs_agfl {
/*
* Realtime bitmap information is accessed by the word, which is currently
- * stored in host-endian format.
+ * stored in host-endian format. Starting with the realtime groups feature,
+ * the words are stored in be32 ondisk.
*/
union xfs_rtword_raw {
__u32 old;
+ __be32 rtg;
};
/*
* Realtime summary counts are accessed by the word, which is currently
- * stored in host-endian format.
+ * stored in host-endian format. Starting with the realtime groups feature,
+ * the words are stored in be32 ondisk.
*/
union xfs_suminfo_raw {
__u32 old;
+ __be32 rtg;
};
/*
+ * Realtime allocation groups break the rt section into multiple pieces that
+ * could be locked independently. Realtime block group numbers are 32-bit
+ * quantities. Block numbers within a group are also 32-bit quantities, but
+ * the upper bit must never be set. rtgroup 0 might have a superblock in it,
+ * so the minimum size of an rtgroup is 2 rtx.
+ */
+#define XFS_MAX_RGBLOCKS ((xfs_rgblock_t)(1U << 31) - 1)
+#define XFS_MIN_RGEXTENTS ((xfs_rtxlen_t)2)
+#define XFS_MAX_RGNUMBER ((xfs_rgnumber_t)(-1U))
+
+#define XFS_RTSB_MAGIC 0x46726F67 /* 'Frog' */
+
+/*
+ * Realtime superblock - on disk version. Must be padded to 64 bit alignment.
+ * The first block of the realtime volume contains this superblock.
+ */
+struct xfs_rtsb {
+ __be32 rsb_magicnum; /* magic number == XFS_RTSB_MAGIC */
+ __le32 rsb_crc; /* superblock crc */
+
+ __be32 rsb_pad; /* zero */
+ unsigned char rsb_fname[XFSLABEL_MAX]; /* file system name */
+
+ uuid_t rsb_uuid; /* user-visible file system unique id */
+ uuid_t rsb_meta_uuid; /* metadata file system unique id */
+
+ /* must be padded to 64 bit alignment */
+};
+
+#define XFS_RTSB_CRC_OFF offsetof(struct xfs_rtsb, rsb_crc)
+#define XFS_RTSB_DADDR ((xfs_daddr_t)0) /* daddr in rt section */
+
+/*
* XFS Timestamps
* ==============
*
@@ -790,6 +849,27 @@ static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds)
return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET;
}
+enum xfs_metafile_type {
+ XFS_METAFILE_UNKNOWN, /* unknown */
+ XFS_METAFILE_DIR, /* metadir directory */
+ XFS_METAFILE_USRQUOTA, /* user quota */
+ XFS_METAFILE_GRPQUOTA, /* group quota */
+ XFS_METAFILE_PRJQUOTA, /* project quota */
+ XFS_METAFILE_RTBITMAP, /* rt bitmap */
+ XFS_METAFILE_RTSUMMARY, /* rt summary */
+
+ XFS_METAFILE_MAX
+} __packed;
+
+#define XFS_METAFILE_TYPE_STR \
+ { XFS_METAFILE_UNKNOWN, "unknown" }, \
+ { XFS_METAFILE_DIR, "dir" }, \
+ { XFS_METAFILE_USRQUOTA, "usrquota" }, \
+ { XFS_METAFILE_GRPQUOTA, "grpquota" }, \
+ { XFS_METAFILE_PRJQUOTA, "prjquota" }, \
+ { XFS_METAFILE_RTBITMAP, "rtbitmap" }, \
+ { XFS_METAFILE_RTSUMMARY, "rtsummary" }
+
/*
* On-disk inode structure.
*
@@ -812,7 +892,7 @@ struct xfs_dinode {
__be16 di_mode; /* mode and type of file */
__u8 di_version; /* inode version */
__u8 di_format; /* format of di_c data */
- __be16 di_onlink; /* old number of links to file */
+ __be16 di_metatype; /* XFS_METAFILE_*; was di_onlink */
__be32 di_uid; /* owner's user id */
__be32 di_gid; /* owner's group id */
__be32 di_nlink; /* number of links to file */
@@ -1088,21 +1168,60 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
* Values for di_flags2 These start by being exposed to userspace in the upper
* 16 bits of the XFS_XFLAG_s range.
*/
-#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */
-#define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */
-#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */
-#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */
-#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */
+/* use DAX for this inode */
+#define XFS_DIFLAG2_DAX_BIT 0
+
+/* file's blocks may be shared */
+#define XFS_DIFLAG2_REFLINK_BIT 1
-#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
-#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT)
-#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
-#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT)
-#define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT)
+/* copy on write extent size hint */
+#define XFS_DIFLAG2_COWEXTSIZE_BIT 2
+
+/* big timestamps */
+#define XFS_DIFLAG2_BIGTIME_BIT 3
+
+/* large extent counters */
+#define XFS_DIFLAG2_NREXT64_BIT 4
+
+/*
+ * The inode contains filesystem metadata and can be found through the metadata
+ * directory tree. Metadata inodes must satisfy the following constraints:
+ *
+ * - V5 filesystem (and ftype) are enabled;
+ * - The only valid modes are regular files and directories;
+ * - The access bits must be zero;
+ * - DMAPI event and state masks are zero;
+ * - The user and group IDs must be zero;
+ * - The project ID can be used as a u32 annotation;
+ * - The immutable, sync, noatime, nodump, nodefrag flags must be set.
+ * - The dax flag must not be set.
+ * - Directories must have nosymlinks set.
+ *
+ * These requirements are chosen defensively to minimize the ability of
+ * userspace to read or modify the contents, should a metadata file ever
+ * escape to userspace.
+ *
+ * There are further constraints on the directory tree itself:
+ *
+ * - Metadata inodes must never be resolvable through the root directory;
+ * - They must never be accessed by userspace;
+ * - Metadata directory entries must have correct ftype.
+ *
+ * Superblock-rooted metadata files must have the METADATA iflag set even
+ * though they do not have a parent directory.
+ */
+#define XFS_DIFLAG2_METADATA_BIT 5
+
+#define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT)
+#define XFS_DIFLAG2_REFLINK (1ULL << XFS_DIFLAG2_REFLINK_BIT)
+#define XFS_DIFLAG2_COWEXTSIZE (1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT)
+#define XFS_DIFLAG2_BIGTIME (1ULL << XFS_DIFLAG2_BIGTIME_BIT)
+#define XFS_DIFLAG2_NREXT64 (1ULL << XFS_DIFLAG2_NREXT64_BIT)
+#define XFS_DIFLAG2_METADATA (1ULL << XFS_DIFLAG2_METADATA_BIT)
#define XFS_DIFLAG2_ANY \
(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
- XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64)
+ XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA)
static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
{
@@ -1117,6 +1236,12 @@ static inline bool xfs_dinode_has_large_extent_counts(
(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_NREXT64));
}
+static inline bool xfs_dinode_is_metadir(const struct xfs_dinode *dip)
+{
+ return dip->di_version >= 3 &&
+ (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA));
+}
+
/*
* Inode number format:
* low inopblog bits - offset in block
@@ -1165,6 +1290,24 @@ static inline bool xfs_dinode_has_large_extent_counts(
#define XFS_MIN_RTEXTSIZE (4 * 1024) /* 4kB */
/*
+ * RT bit manipulation macros.
+ */
+#define XFS_RTBITMAP_MAGIC 0x424D505A /* BMPZ */
+#define XFS_RTSUMMARY_MAGIC 0x53554D59 /* SUMY */
+
+struct xfs_rtbuf_blkinfo {
+ __be32 rt_magic; /* validity check on block */
+ __be32 rt_crc; /* CRC of block */
+ __be64 rt_owner; /* inode that owns the block */
+ __be64 rt_blkno; /* first block of the buffer */
+ __be64 rt_lsn; /* sequence number of last write */
+ uuid_t rt_uuid; /* filesystem we belong to */
+};
+
+#define XFS_RTBUF_CRC_OFF \
+ offsetof(struct xfs_rtbuf_blkinfo, rt_crc)
+
+/*
* Dquot and dquot block format definitions
*/
#define XFS_DQUOT_MAGIC 0x4451 /* 'DQ' */
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 860284064c5a..41ce4d3d650e 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -187,7 +187,9 @@ struct xfs_fsop_geom {
__u32 logsunit; /* log stripe unit, bytes */
uint32_t sick; /* o: unhealthy fs & rt metadata */
uint32_t checked; /* o: checked fs & rt metadata */
- __u64 reserved[17]; /* reserved space */
+ __u32 rgextents; /* rt extents in a realtime group */
+ __u32 rgcount; /* number of realtime groups */
+ __u64 reserved[16]; /* reserved space */
};
#define XFS_FSOP_GEOM_SICK_COUNTERS (1 << 0) /* summary counters */
@@ -198,6 +200,8 @@ struct xfs_fsop_geom {
#define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */
#define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */
#define XFS_FSOP_GEOM_SICK_NLINKS (1 << 7) /* inode link counts */
+#define XFS_FSOP_GEOM_SICK_METADIR (1 << 8) /* metadata directory */
+#define XFS_FSOP_GEOM_SICK_METAPATH (1 << 9) /* metadir tree path */
/* Output for XFS_FS_COUNTS */
typedef struct xfs_fsop_counts {
@@ -242,6 +246,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_NREXT64 (1 << 23) /* large extent counters */
#define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24) /* exchange range */
#define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */
+#define XFS_FSOP_GEOM_FLAGS_METADIR (1 << 26) /* metadata directories */
/*
* Minimum and maximum sizes need for growth checks.
@@ -489,9 +494,17 @@ struct xfs_bulk_ireq {
*/
#define XFS_BULK_IREQ_NREXT64 (1U << 2)
+/*
+ * Allow bulkstat to return information about metadata directories. This
+ * enables xfs_scrub to find them for scanning, as they are otherwise ordinary
+ * directories.
+ */
+#define XFS_BULK_IREQ_METADIR (1U << 3)
+
#define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \
XFS_BULK_IREQ_SPECIAL | \
- XFS_BULK_IREQ_NREXT64)
+ XFS_BULK_IREQ_NREXT64 | \
+ XFS_BULK_IREQ_METADIR)
/* Operate on the root directory inode. */
#define XFS_BULK_IREQ_SPECIAL_ROOT (1)
@@ -722,9 +735,11 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */
#define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */
#define XFS_SCRUB_TYPE_DIRTREE 28 /* directory tree structure */
+#define XFS_SCRUB_TYPE_METAPATH 29 /* metadata directory tree paths */
+#define XFS_SCRUB_TYPE_RGSUPER 30 /* realtime superblock */
/* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR 29
+#define XFS_SCRUB_TYPE_NR 31
/*
* This special type code only applies to the vectored scrub implementation.
@@ -803,6 +818,22 @@ struct xfs_scrub_vec_head {
#define XFS_SCRUB_VEC_FLAGS_ALL (0)
/*
+ * i: sm_ino values for XFS_SCRUB_TYPE_METAPATH to select a metadata file for
+ * path checking.
+ */
+#define XFS_SCRUB_METAPATH_PROBE (0) /* do we have a metapath scrubber? */
+#define XFS_SCRUB_METAPATH_RTDIR (1) /* rtrgroups metadir */
+#define XFS_SCRUB_METAPATH_RTBITMAP (2) /* per-rtg bitmap */
+#define XFS_SCRUB_METAPATH_RTSUMMARY (3) /* per-rtg summary */
+#define XFS_SCRUB_METAPATH_QUOTADIR (4) /* quota metadir */
+#define XFS_SCRUB_METAPATH_USRQUOTA (5) /* user quota */
+#define XFS_SCRUB_METAPATH_GRPQUOTA (6) /* group quota */
+#define XFS_SCRUB_METAPATH_PRJQUOTA (7) /* project quota */
+
+/* Number of metapath sm_ino values */
+#define XFS_SCRUB_METAPATH_NR (8)
+
+/*
* ioctl limits
*/
#ifdef XATTR_LIST_MAX
@@ -949,6 +980,21 @@ struct xfs_getparents_by_handle {
};
/*
+ * Output for XFS_IOC_RTGROUP_GEOMETRY
+ */
+struct xfs_rtgroup_geometry {
+ __u32 rg_number; /* i/o: rtgroup number */
+ __u32 rg_length; /* o: length in blocks */
+ __u32 rg_sick; /* o: sick things in ag */
+ __u32 rg_checked; /* o: checked metadata in ag */
+ __u32 rg_flags; /* i/o: flags for this ag */
+ __u32 rg_reserved[27]; /* o: zero */
+};
+#define XFS_RTGROUP_GEOM_SICK_SUPER (1U << 0) /* superblock */
+#define XFS_RTGROUP_GEOM_SICK_BITMAP (1U << 1) /* rtbitmap */
+#define XFS_RTGROUP_GEOM_SICK_SUMMARY (1U << 2) /* rtsummary */
+
+/*
* ioctl commands that are used by Linux filesystems
*/
#define XFS_IOC_GETXFLAGS FS_IOC_GETFLAGS
@@ -986,6 +1032,7 @@ struct xfs_getparents_by_handle {
#define XFS_IOC_GETPARENTS _IOWR('X', 62, struct xfs_getparents)
#define XFS_IOC_GETPARENTS_BY_HANDLE _IOWR('X', 63, struct xfs_getparents_by_handle)
#define XFS_IOC_SCRUBV_METADATA _IOWR('X', 64, struct xfs_scrub_vec_head)
+#define XFS_IOC_RTGROUP_GEOMETRY _IOWR('X', 65, struct xfs_rtgroup_geometry)
/*
* ioctl commands that replace IRIX syssgi()'s
diff --git a/fs/xfs/libxfs/xfs_group.c b/fs/xfs/libxfs/xfs_group.c
new file mode 100644
index 000000000000..e9d76bcdc820
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_group.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018 Red Hat, Inc.
+ */
+
+#include "xfs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
+#include "xfs_extent_busy.h"
+#include "xfs_group.h"
+
+/*
+ * Groups can have passive and active references.
+ *
+ * For passive references the code freeing a group is responsible for cleaning
+ * up objects that hold the passive references (e.g. cached buffers).
+ * Routines manipulating passive references are xfs_group_get, xfs_group_hold
+ * and xfs_group_put.
+ *
+ * Active references are for short term access to the group for walking trees or
+ * accessing state. If a group is being shrunk or offlined, the lookup will fail
+ * to find that group and return NULL instead.
+ * Routines manipulating active references are xfs_group_grab and
+ * xfs_group_rele.
+ */
+
+struct xfs_group *
+xfs_group_get(
+ struct xfs_mount *mp,
+ uint32_t index,
+ enum xfs_group_type type)
+{
+ struct xfs_group *xg;
+
+ rcu_read_lock();
+ xg = xa_load(&mp->m_groups[type].xa, index);
+ if (xg) {
+ trace_xfs_group_get(xg, _RET_IP_);
+ ASSERT(atomic_read(&xg->xg_ref) >= 0);
+ atomic_inc(&xg->xg_ref);
+ }
+ rcu_read_unlock();
+ return xg;
+}
+
+struct xfs_group *
+xfs_group_hold(
+ struct xfs_group *xg)
+{
+ ASSERT(atomic_read(&xg->xg_ref) > 0 ||
+ atomic_read(&xg->xg_active_ref) > 0);
+
+ trace_xfs_group_hold(xg, _RET_IP_);
+ atomic_inc(&xg->xg_ref);
+ return xg;
+}
+
+void
+xfs_group_put(
+ struct xfs_group *xg)
+{
+ trace_xfs_group_put(xg, _RET_IP_);
+
+ ASSERT(atomic_read(&xg->xg_ref) > 0);
+ atomic_dec(&xg->xg_ref);
+}
+
+struct xfs_group *
+xfs_group_grab(
+ struct xfs_mount *mp,
+ uint32_t index,
+ enum xfs_group_type type)
+{
+ struct xfs_group *xg;
+
+ rcu_read_lock();
+ xg = xa_load(&mp->m_groups[type].xa, index);
+ if (xg) {
+ trace_xfs_group_grab(xg, _RET_IP_);
+ if (!atomic_inc_not_zero(&xg->xg_active_ref))
+ xg = NULL;
+ }
+ rcu_read_unlock();
+ return xg;
+}
+
+/*
+ * Iterate to the next group. To start the iteration at @start_index, a %NULL
+ * @xg is passed, else the previous group returned from this function. The
+ * caller should break out of the loop when this returns %NULL. If the caller
+ * wants to break out of a loop that did not finish it needs to release the
+ * active reference to @xg using xfs_group_rele() itself.
+ */
+struct xfs_group *
+xfs_group_next_range(
+ struct xfs_mount *mp,
+ struct xfs_group *xg,
+ uint32_t start_index,
+ uint32_t end_index,
+ enum xfs_group_type type)
+{
+ uint32_t index = start_index;
+
+ if (xg) {
+ index = xg->xg_gno + 1;
+ xfs_group_rele(xg);
+ }
+ if (index > end_index)
+ return NULL;
+ return xfs_group_grab(mp, index, type);
+}
+
+/*
+ * Find the next group after @xg, or the first group if @xg is NULL.
+ */
+struct xfs_group *
+xfs_group_grab_next_mark(
+ struct xfs_mount *mp,
+ struct xfs_group *xg,
+ xa_mark_t mark,
+ enum xfs_group_type type)
+{
+ unsigned long index = 0;
+
+ if (xg) {
+ index = xg->xg_gno + 1;
+ xfs_group_rele(xg);
+ }
+
+ rcu_read_lock();
+ xg = xa_find(&mp->m_groups[type].xa, &index, ULONG_MAX, mark);
+ if (xg) {
+ trace_xfs_group_grab_next_tag(xg, _RET_IP_);
+ if (!atomic_inc_not_zero(&xg->xg_active_ref))
+ xg = NULL;
+ }
+ rcu_read_unlock();
+ return xg;
+}
+
+void
+xfs_group_rele(
+ struct xfs_group *xg)
+{
+ trace_xfs_group_rele(xg, _RET_IP_);
+ atomic_dec(&xg->xg_active_ref);
+}
+
+void
+xfs_group_free(
+ struct xfs_mount *mp,
+ uint32_t index,
+ enum xfs_group_type type,
+ void (*uninit)(struct xfs_group *xg))
+{
+ struct xfs_group *xg = xa_erase(&mp->m_groups[type].xa, index);
+
+ XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_ref) != 0);
+
+ xfs_defer_drain_free(&xg->xg_intents_drain);
+#ifdef __KERNEL__
+ kfree(xg->xg_busy_extents);
+#endif
+
+ if (uninit)
+ uninit(xg);
+
+ /* drop the mount's active reference */
+ xfs_group_rele(xg);
+ XFS_IS_CORRUPT(mp, atomic_read(&xg->xg_active_ref) != 0);
+ kfree_rcu_mightsleep(xg);
+}
+
+int
+xfs_group_insert(
+ struct xfs_mount *mp,
+ struct xfs_group *xg,
+ uint32_t index,
+ enum xfs_group_type type)
+{
+ int error;
+
+ xg->xg_mount = mp;
+ xg->xg_gno = index;
+ xg->xg_type = type;
+
+#ifdef __KERNEL__
+ xg->xg_busy_extents = xfs_extent_busy_alloc();
+ if (!xg->xg_busy_extents)
+ return -ENOMEM;
+ spin_lock_init(&xg->xg_state_lock);
+ xfs_hooks_init(&xg->xg_rmap_update_hooks);
+#endif
+ xfs_defer_drain_init(&xg->xg_intents_drain);
+
+ /* Active ref owned by mount indicates group is online. */
+ atomic_set(&xg->xg_active_ref, 1);
+
+ error = xa_insert(&mp->m_groups[type].xa, index, xg, GFP_KERNEL);
+ if (error) {
+ WARN_ON_ONCE(error == -EBUSY);
+ goto out_drain;
+ }
+
+ return 0;
+out_drain:
+ xfs_defer_drain_free(&xg->xg_intents_drain);
+#ifdef __KERNEL__
+ kfree(xg->xg_busy_extents);
+#endif
+ return error;
+}
+
+struct xfs_group *
+xfs_group_get_by_fsb(
+ struct xfs_mount *mp,
+ xfs_fsblock_t fsbno,
+ enum xfs_group_type type)
+{
+ return xfs_group_get(mp, xfs_fsb_to_gno(mp, fsbno, type), type);
+}
diff --git a/fs/xfs/libxfs/xfs_group.h b/fs/xfs/libxfs/xfs_group.h
new file mode 100644
index 000000000000..242b05627c7a
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_group.h
@@ -0,0 +1,164 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 Red Hat, Inc.
+ */
+#ifndef __LIBXFS_GROUP_H
+#define __LIBXFS_GROUP_H 1
+
+struct xfs_group {
+ struct xfs_mount *xg_mount;
+ uint32_t xg_gno;
+ enum xfs_group_type xg_type;
+ atomic_t xg_ref; /* passive reference count */
+ atomic_t xg_active_ref; /* active reference count */
+
+ /* Precalculated geometry info */
+ uint32_t xg_block_count; /* max usable gbno */
+ uint32_t xg_min_gbno; /* min usable gbno */
+
+#ifdef __KERNEL__
+ /* -- kernel only structures below this line -- */
+
+ /*
+ * Track freed but not yet committed extents.
+ */
+ struct xfs_extent_busy_tree *xg_busy_extents;
+
+ /*
+ * Bitsets of per-ag metadata that have been checked and/or are sick.
+ * Callers should hold xg_state_lock before accessing this field.
+ */
+ uint16_t xg_checked;
+ uint16_t xg_sick;
+ spinlock_t xg_state_lock;
+
+ /*
+ * We use xfs_drain to track the number of deferred log intent items
+ * that have been queued (but not yet processed) so that waiters (e.g.
+ * scrub) will not lock resources when other threads are in the middle
+ * of processing a chain of intent items only to find momentary
+ * inconsistencies.
+ */
+ struct xfs_defer_drain xg_intents_drain;
+
+ /*
+ * Hook to feed rmapbt updates to an active online repair.
+ */
+ struct xfs_hooks xg_rmap_update_hooks;
+#endif /* __KERNEL__ */
+};
+
+struct xfs_group *xfs_group_get(struct xfs_mount *mp, uint32_t index,
+ enum xfs_group_type type);
+struct xfs_group *xfs_group_get_by_fsb(struct xfs_mount *mp,
+ xfs_fsblock_t fsbno, enum xfs_group_type type);
+struct xfs_group *xfs_group_hold(struct xfs_group *xg);
+void xfs_group_put(struct xfs_group *xg);
+
+struct xfs_group *xfs_group_grab(struct xfs_mount *mp, uint32_t index,
+ enum xfs_group_type type);
+struct xfs_group *xfs_group_next_range(struct xfs_mount *mp,
+ struct xfs_group *xg, uint32_t start_index, uint32_t end_index,
+ enum xfs_group_type type);
+struct xfs_group *xfs_group_grab_next_mark(struct xfs_mount *mp,
+ struct xfs_group *xg, xa_mark_t mark, enum xfs_group_type type);
+void xfs_group_rele(struct xfs_group *xg);
+
+void xfs_group_free(struct xfs_mount *mp, uint32_t index,
+ enum xfs_group_type type, void (*uninit)(struct xfs_group *xg));
+int xfs_group_insert(struct xfs_mount *mp, struct xfs_group *xg,
+ uint32_t index, enum xfs_group_type);
+
+#define xfs_group_set_mark(_xg, _mark) \
+ xa_set_mark(&(_xg)->xg_mount->m_groups[(_xg)->xg_type].xa, \
+ (_xg)->xg_gno, (_mark))
+#define xfs_group_clear_mark(_xg, _mark) \
+ xa_clear_mark(&(_xg)->xg_mount->m_groups[(_xg)->xg_type].xa, \
+ (_xg)->xg_gno, (_mark))
+#define xfs_group_marked(_mp, _type, _mark) \
+ xa_marked(&(_mp)->m_groups[(_type)].xa, (_mark))
+
+static inline xfs_agblock_t
+xfs_group_max_blocks(
+ struct xfs_group *xg)
+{
+ return xg->xg_mount->m_groups[xg->xg_type].blocks;
+}
+
+static inline xfs_fsblock_t
+xfs_group_start_fsb(
+ struct xfs_group *xg)
+{
+ return ((xfs_fsblock_t)xg->xg_gno) <<
+ xg->xg_mount->m_groups[xg->xg_type].blklog;
+}
+
+static inline xfs_fsblock_t
+xfs_gbno_to_fsb(
+ struct xfs_group *xg,
+ xfs_agblock_t gbno)
+{
+ return xfs_group_start_fsb(xg) | gbno;
+}
+
+static inline xfs_daddr_t
+xfs_gbno_to_daddr(
+ struct xfs_group *xg,
+ xfs_agblock_t gbno)
+{
+ struct xfs_mount *mp = xg->xg_mount;
+ uint32_t blocks = mp->m_groups[xg->xg_type].blocks;
+
+ return XFS_FSB_TO_BB(mp, (xfs_fsblock_t)xg->xg_gno * blocks + gbno);
+}
+
+static inline uint32_t
+xfs_fsb_to_gno(
+ struct xfs_mount *mp,
+ xfs_fsblock_t fsbno,
+ enum xfs_group_type type)
+{
+ if (!mp->m_groups[type].blklog)
+ return 0;
+ return fsbno >> mp->m_groups[type].blklog;
+}
+
+static inline xfs_agblock_t
+xfs_fsb_to_gbno(
+ struct xfs_mount *mp,
+ xfs_fsblock_t fsbno,
+ enum xfs_group_type type)
+{
+ return fsbno & mp->m_groups[type].blkmask;
+}
+
+static inline bool
+xfs_verify_gbno(
+ struct xfs_group *xg,
+ uint32_t gbno)
+{
+ if (gbno >= xg->xg_block_count)
+ return false;
+ if (gbno < xg->xg_min_gbno)
+ return false;
+ return true;
+}
+
+static inline bool
+xfs_verify_gbext(
+ struct xfs_group *xg,
+ uint32_t gbno,
+ uint32_t glen)
+{
+ uint32_t end;
+
+ if (!xfs_verify_gbno(xg, gbno))
+ return false;
+ if (glen == 0 || check_add_overflow(gbno, glen - 1, &end))
+ return false;
+ if (!xfs_verify_gbno(xg, end))
+ return false;
+ return true;
+}
+
+#endif /* __LIBXFS_GROUP_H */
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index b0edb4288e59..d34986ac18c3 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -6,6 +6,8 @@
#ifndef __XFS_HEALTH_H__
#define __XFS_HEALTH_H__
+struct xfs_group;
+
/*
* In-Core Filesystem Health Assessments
* =====================================
@@ -52,6 +54,7 @@ struct xfs_inode;
struct xfs_fsop_geom;
struct xfs_btree_cur;
struct xfs_da_args;
+struct xfs_rtgroup;
/* Observable health issues for metadata spanning the entire filesystem. */
#define XFS_SICK_FS_COUNTERS (1 << 0) /* summary counters */
@@ -60,10 +63,13 @@ struct xfs_da_args;
#define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */
#define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */
#define XFS_SICK_FS_NLINKS (1 << 5) /* inode link counts */
+#define XFS_SICK_FS_METADIR (1 << 6) /* metadata directory tree */
+#define XFS_SICK_FS_METAPATH (1 << 7) /* metadata directory tree path */
-/* Observable health issues for realtime volume metadata. */
-#define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */
-#define XFS_SICK_RT_SUMMARY (1 << 1) /* realtime summary */
+/* Observable health issues for realtime group metadata. */
+#define XFS_SICK_RG_SUPER (1 << 0) /* rt group superblock */
+#define XFS_SICK_RG_BITMAP (1 << 1) /* rt group bitmap */
+#define XFS_SICK_RG_SUMMARY (1 << 2) /* rt groups summary */
/* Observable health issues for AG metadata. */
#define XFS_SICK_AG_SB (1 << 0) /* superblock */
@@ -103,10 +109,13 @@ struct xfs_da_args;
XFS_SICK_FS_GQUOTA | \
XFS_SICK_FS_PQUOTA | \
XFS_SICK_FS_QUOTACHECK | \
- XFS_SICK_FS_NLINKS)
+ XFS_SICK_FS_NLINKS | \
+ XFS_SICK_FS_METADIR | \
+ XFS_SICK_FS_METAPATH)
-#define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \
- XFS_SICK_RT_SUMMARY)
+#define XFS_SICK_RG_PRIMARY (XFS_SICK_RG_SUPER | \
+ XFS_SICK_RG_BITMAP | \
+ XFS_SICK_RG_SUMMARY)
#define XFS_SICK_AG_PRIMARY (XFS_SICK_AG_SB | \
XFS_SICK_AG_AGF | \
@@ -136,26 +145,26 @@ struct xfs_da_args;
/* Secondary state related to (but not primary evidence of) health problems. */
#define XFS_SICK_FS_SECONDARY (0)
-#define XFS_SICK_RT_SECONDARY (0)
+#define XFS_SICK_RG_SECONDARY (0)
#define XFS_SICK_AG_SECONDARY (0)
#define XFS_SICK_INO_SECONDARY (XFS_SICK_INO_FORGET)
/* Evidence of health problems elsewhere. */
#define XFS_SICK_FS_INDIRECT (0)
-#define XFS_SICK_RT_INDIRECT (0)
+#define XFS_SICK_RG_INDIRECT (0)
#define XFS_SICK_AG_INDIRECT (XFS_SICK_AG_INODES)
#define XFS_SICK_INO_INDIRECT (0)
/* All health masks. */
-#define XFS_SICK_FS_ALL (XFS_SICK_FS_PRIMARY | \
+#define XFS_SICK_FS_ALL (XFS_SICK_FS_PRIMARY | \
XFS_SICK_FS_SECONDARY | \
XFS_SICK_FS_INDIRECT)
-#define XFS_SICK_RT_ALL (XFS_SICK_RT_PRIMARY | \
- XFS_SICK_RT_SECONDARY | \
- XFS_SICK_RT_INDIRECT)
+#define XFS_SICK_RG_ALL (XFS_SICK_RG_PRIMARY | \
+ XFS_SICK_RG_SECONDARY | \
+ XFS_SICK_RG_INDIRECT)
-#define XFS_SICK_AG_ALL (XFS_SICK_AG_PRIMARY | \
+#define XFS_SICK_AG_ALL (XFS_SICK_AG_PRIMARY | \
XFS_SICK_AG_SECONDARY | \
XFS_SICK_AG_INDIRECT)
@@ -189,18 +198,17 @@ void xfs_fs_mark_healthy(struct xfs_mount *mp, unsigned int mask);
void xfs_fs_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
unsigned int *checked);
-void xfs_rt_mark_sick(struct xfs_mount *mp, unsigned int mask);
-void xfs_rt_mark_corrupt(struct xfs_mount *mp, unsigned int mask);
-void xfs_rt_mark_healthy(struct xfs_mount *mp, unsigned int mask);
-void xfs_rt_measure_sickness(struct xfs_mount *mp, unsigned int *sick,
- unsigned int *checked);
+void xfs_rgno_mark_sick(struct xfs_mount *mp, xfs_rgnumber_t rgno,
+ unsigned int mask);
void xfs_agno_mark_sick(struct xfs_mount *mp, xfs_agnumber_t agno,
unsigned int mask);
-void xfs_ag_mark_sick(struct xfs_perag *pag, unsigned int mask);
-void xfs_ag_mark_corrupt(struct xfs_perag *pag, unsigned int mask);
-void xfs_ag_mark_healthy(struct xfs_perag *pag, unsigned int mask);
-void xfs_ag_measure_sickness(struct xfs_perag *pag, unsigned int *sick,
+void xfs_group_mark_sick(struct xfs_group *xg, unsigned int mask);
+#define xfs_ag_mark_sick(pag, mask) \
+ xfs_group_mark_sick(pag_group(pag), (mask))
+void xfs_group_mark_corrupt(struct xfs_group *xg, unsigned int mask);
+void xfs_group_mark_healthy(struct xfs_group *xg, unsigned int mask);
+void xfs_group_measure_sickness(struct xfs_group *xg, unsigned int *sick,
unsigned int *checked);
void xfs_inode_mark_sick(struct xfs_inode *ip, unsigned int mask);
@@ -227,22 +235,25 @@ xfs_fs_has_sickness(struct xfs_mount *mp, unsigned int mask)
}
static inline bool
-xfs_rt_has_sickness(struct xfs_mount *mp, unsigned int mask)
+xfs_group_has_sickness(
+ struct xfs_group *xg,
+ unsigned int mask)
{
- unsigned int sick, checked;
+ unsigned int sick, checked;
- xfs_rt_measure_sickness(mp, &sick, &checked);
+ xfs_group_measure_sickness(xg, &sick, &checked);
return sick & mask;
}
-static inline bool
-xfs_ag_has_sickness(struct xfs_perag *pag, unsigned int mask)
-{
- unsigned int sick, checked;
+#define xfs_ag_has_sickness(pag, mask) \
+ xfs_group_has_sickness(pag_group(pag), (mask))
+#define xfs_ag_is_healthy(pag) \
+ (!xfs_ag_has_sickness((pag), UINT_MAX))
- xfs_ag_measure_sickness(pag, &sick, &checked);
- return sick & mask;
-}
+#define xfs_rtgroup_has_sickness(rtg, mask) \
+ xfs_group_has_sickness(rtg_group(rtg), (mask))
+#define xfs_rtgroup_is_healthy(rtg) \
+ (!xfs_rtgroup_has_sickness((rtg), UINT_MAX))
static inline bool
xfs_inode_has_sickness(struct xfs_inode *ip, unsigned int mask)
@@ -260,18 +271,6 @@ xfs_fs_is_healthy(struct xfs_mount *mp)
}
static inline bool
-xfs_rt_is_healthy(struct xfs_mount *mp)
-{
- return !xfs_rt_has_sickness(mp, -1U);
-}
-
-static inline bool
-xfs_ag_is_healthy(struct xfs_perag *pag)
-{
- return !xfs_ag_has_sickness(pag, -1U);
-}
-
-static inline bool
xfs_inode_is_healthy(struct xfs_inode *ip)
{
return !xfs_inode_has_sickness(ip, -1U);
@@ -279,6 +278,8 @@ xfs_inode_is_healthy(struct xfs_inode *ip)
void xfs_fsop_geom_health(struct xfs_mount *mp, struct xfs_fsop_geom *geo);
void xfs_ag_geom_health(struct xfs_perag *pag, struct xfs_ag_geometry *ageo);
+void xfs_rtgroup_geom_health(struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo);
void xfs_bulkstat_health(struct xfs_inode *ip, struct xfs_bulkstat *bs);
#define xfs_metadata_is_sick(error) \
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 271855227514..8b84e2cf711b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -142,7 +142,7 @@ xfs_inobt_complain_bad_rec(
xfs_warn(mp,
"%sbt record corruption in AG %d detected at %pS!",
- cur->bc_ops->name, cur->bc_ag.pag->pag_agno, fa);
+ cur->bc_ops->name, cur->bc_group->xg_gno, fa);
xfs_warn(mp,
"start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
irec->ir_startino, irec->ir_count, irec->ir_freecount,
@@ -170,7 +170,7 @@ xfs_inobt_get_rec(
return error;
xfs_inobt_btrec_to_irec(mp, rec, irec);
- fa = xfs_inobt_check_irec(cur->bc_ag.pag, irec);
+ fa = xfs_inobt_check_irec(to_perag(cur->bc_group), irec);
if (fa)
return xfs_inobt_complain_bad_rec(cur, fa, irec);
@@ -275,8 +275,10 @@ xfs_check_agi_freecount(
}
} while (i == 1);
- if (!xfs_is_shutdown(cur->bc_mp))
- ASSERT(freecount == cur->bc_ag.pag->pagi_freecount);
+ if (!xfs_is_shutdown(cur->bc_mp)) {
+ ASSERT(freecount ==
+ to_perag(cur->bc_group)->pagi_freecount);
+ }
}
return 0;
}
@@ -551,7 +553,7 @@ xfs_inobt_insert_sprec(
struct xfs_buf *agbp,
struct xfs_inobt_rec_incore *nrec) /* in/out: new/merged rec. */
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
int error;
int i;
@@ -606,15 +608,12 @@ xfs_inobt_insert_sprec(
goto error;
}
- trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino,
- rec.ir_holemask, nrec->ir_startino,
- nrec->ir_holemask);
+ trace_xfs_irec_merge_pre(pag, &rec, nrec);
/* merge to nrec to output the updated record */
__xfs_inobt_rec_merge(nrec, &rec);
- trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino,
- nrec->ir_holemask);
+ trace_xfs_irec_merge_post(pag, nrec);
error = xfs_inobt_rec_check_count(mp, nrec);
if (error)
@@ -648,7 +647,7 @@ xfs_finobt_insert_sprec(
struct xfs_buf *agbp,
struct xfs_inobt_rec_incore *nrec) /* in/out: new rec. */
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
int error;
int i;
@@ -768,8 +767,7 @@ xfs_ialloc_ag_alloc(
/* Allow space for the inode btree to split. */
args.minleft = igeo->inobt_maxlevels;
error = xfs_alloc_vextent_exact_bno(&args,
- XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
- args.agbno));
+ xfs_agbno_to_fsb(pag, args.agbno));
if (error)
return error;
@@ -811,8 +809,8 @@ xfs_ialloc_ag_alloc(
*/
args.minleft = igeo->inobt_maxlevels;
error = xfs_alloc_vextent_near_bno(&args,
- XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
- be32_to_cpu(agi->agi_root)));
+ xfs_agbno_to_fsb(pag,
+ be32_to_cpu(agi->agi_root)));
if (error)
return error;
}
@@ -824,8 +822,8 @@ xfs_ialloc_ag_alloc(
if (isaligned && args.fsbno == NULLFSBLOCK) {
args.alignment = igeo->cluster_align;
error = xfs_alloc_vextent_near_bno(&args,
- XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
- be32_to_cpu(agi->agi_root)));
+ xfs_agbno_to_fsb(pag,
+ be32_to_cpu(agi->agi_root)));
if (error)
return error;
}
@@ -860,8 +858,8 @@ sparse_alloc:
igeo->ialloc_blks;
error = xfs_alloc_vextent_near_bno(&args,
- XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
- be32_to_cpu(agi->agi_root)));
+ xfs_agbno_to_fsb(pag,
+ be32_to_cpu(agi->agi_root)));
if (error)
return error;
@@ -884,7 +882,7 @@ sparse_alloc:
* rather than a linear progression to prevent the next generation
* number from being easily guessable.
*/
- error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag->pag_agno,
+ error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag_agno(pag),
args.agbno, args.len, get_random_u32());
if (error)
@@ -915,8 +913,7 @@ sparse_alloc:
if (error == -EFSCORRUPTED) {
xfs_alert(args.mp,
"invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
- XFS_AGINO_TO_INO(args.mp, pag->pag_agno,
- rec.ir_startino),
+ xfs_agino_to_ino(pag, rec.ir_startino),
rec.ir_holemask, rec.ir_count);
xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
}
@@ -1076,7 +1073,7 @@ xfs_dialloc_check_ino(
if (error)
return -EAGAIN;
- error = xfs_imap_to_bp(pag->pag_mount, tp, &imap, &bp);
+ error = xfs_imap_to_bp(pag_mount(pag), tp, &imap, &bp);
if (error)
return -EAGAIN;
@@ -1127,7 +1124,7 @@ xfs_dialloc_ag_inobt(
/*
* If in the same AG as the parent, try to get near the parent.
*/
- if (pagno == pag->pag_agno) {
+ if (pagno == pag_agno(pag)) {
int doneleft; /* done, to the left */
int doneright; /* done, to the right */
@@ -1335,7 +1332,7 @@ alloc_inode:
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
- ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+ ino = xfs_agino_to_ino(pag, rec.ir_startino + offset);
if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
error = xfs_dialloc_check_ino(pag, tp, ino);
@@ -1604,7 +1601,7 @@ xfs_dialloc_ag(
* parent. If so, find the closest available inode to the parent. If
* not, consider the agi hint or find the first free inode in the AG.
*/
- if (pag->pag_agno == pagno)
+ if (pag_agno(pag) == pagno)
error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
else
error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
@@ -1616,7 +1613,7 @@ xfs_dialloc_ag(
ASSERT(offset < XFS_INODES_PER_CHUNK);
ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
XFS_INODES_PER_CHUNK) == 0);
- ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
+ ino = xfs_agino_to_ino(pag, rec.ir_startino + offset);
if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
error = xfs_dialloc_check_ino(pag, tp, ino);
@@ -1845,6 +1842,40 @@ out_release:
}
/*
+ * Pick an AG for the new inode.
+ *
+ * Directories, symlinks, and regular files frequently allocate at least one
+ * block, so factor that potential expansion when we examine whether an AG has
+ * enough space for file creation. Try to keep metadata files all in the same
+ * AG.
+ */
+static inline xfs_agnumber_t
+xfs_dialloc_pick_ag(
+ struct xfs_mount *mp,
+ struct xfs_inode *dp,
+ umode_t mode)
+{
+ xfs_agnumber_t start_agno;
+
+ if (!dp)
+ return 0;
+ if (xfs_is_metadir_inode(dp)) {
+ if (mp->m_sb.sb_logstart)
+ return XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
+ return 0;
+ }
+
+ if (S_ISDIR(mode))
+ return (atomic_inc_return(&mp->m_agirotor) - 1) % mp->m_maxagi;
+
+ start_agno = XFS_INO_TO_AGNO(mp, dp->i_ino);
+ if (start_agno >= mp->m_maxagi)
+ start_agno = 0;
+
+ return start_agno;
+}
+
+/*
* Allocate an on-disk inode.
*
* Mode is used to tell whether the new inode is a directory and hence where to
@@ -1859,31 +1890,19 @@ xfs_dialloc(
xfs_ino_t *new_ino)
{
struct xfs_mount *mp = (*tpp)->t_mountp;
+ struct xfs_perag *pag;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ xfs_ino_t ino = NULLFSINO;
xfs_ino_t parent = args->pip ? args->pip->i_ino : 0;
- umode_t mode = args->mode & S_IFMT;
xfs_agnumber_t agno;
- int error = 0;
xfs_agnumber_t start_agno;
- struct xfs_perag *pag;
- struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ umode_t mode = args->mode & S_IFMT;
bool ok_alloc = true;
bool low_space = false;
int flags;
- xfs_ino_t ino = NULLFSINO;
+ int error = 0;
- /*
- * Directories, symlinks, and regular files frequently allocate at least
- * one block, so factor that potential expansion when we examine whether
- * an AG has enough space for file creation.
- */
- if (S_ISDIR(mode))
- start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) %
- mp->m_maxagi;
- else {
- start_agno = XFS_INO_TO_AGNO(mp, parent);
- if (start_agno >= mp->m_maxagi)
- start_agno = 0;
- }
+ start_agno = xfs_dialloc_pick_ag(mp, args->pip, mode);
/*
* If we have already hit the ceiling of inode blocks then clear
@@ -1974,7 +1993,7 @@ retry:
static int
xfs_difree_inode_chunk(
struct xfs_trans *tp,
- xfs_agnumber_t agno,
+ struct xfs_perag *pag,
struct xfs_inobt_rec_incore *rec)
{
struct xfs_mount *mp = tp->t_mountp;
@@ -1988,8 +2007,7 @@ xfs_difree_inode_chunk(
if (!xfs_inobt_issparse(rec->ir_holemask)) {
/* not sparse, calculate extent info directly */
- return xfs_free_extent_later(tp,
- XFS_AGB_TO_FSB(mp, agno, sagbno),
+ return xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, sagbno),
M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
XFS_AG_RESV_NONE, 0);
}
@@ -2035,9 +2053,9 @@ xfs_difree_inode_chunk(
ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
- error = xfs_free_extent_later(tp,
- XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
- &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE, 0);
+ error = xfs_free_extent_later(tp, xfs_agbno_to_fsb(pag, agbno),
+ contigblk, &XFS_RMAP_OINFO_INODES,
+ XFS_AG_RESV_NONE, 0);
if (error)
return error;
@@ -2059,7 +2077,7 @@ xfs_difree_inobt(
struct xfs_icluster *xic,
struct xfs_inobt_rec_incore *orec)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_agi *agi = agbp->b_addr;
struct xfs_btree_cur *cur;
struct xfs_inobt_rec_incore rec;
@@ -2124,8 +2142,7 @@ xfs_difree_inobt(
if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
xic->deleted = true;
- xic->first_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno,
- rec.ir_startino);
+ xic->first_ino = xfs_agino_to_ino(pag, rec.ir_startino);
xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
/*
@@ -2148,7 +2165,7 @@ xfs_difree_inobt(
goto error0;
}
- error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
+ error = xfs_difree_inode_chunk(tp, pag, &rec);
if (error)
goto error0;
} else {
@@ -2194,7 +2211,7 @@ xfs_difree_finobt(
xfs_agino_t agino,
struct xfs_inobt_rec_incore *ibtrec) /* inobt record */
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
struct xfs_inobt_rec_incore rec;
int offset = agino - ibtrec->ir_startino;
@@ -2317,17 +2334,17 @@ xfs_difree(
/*
* Break up inode number into its components.
*/
- if (pag->pag_agno != XFS_INO_TO_AGNO(mp, inode)) {
- xfs_warn(mp, "%s: agno != pag->pag_agno (%d != %d).",
- __func__, XFS_INO_TO_AGNO(mp, inode), pag->pag_agno);
+ if (pag_agno(pag) != XFS_INO_TO_AGNO(mp, inode)) {
+ xfs_warn(mp, "%s: agno != pag_agno(pag) (%d != %d).",
+ __func__, XFS_INO_TO_AGNO(mp, inode), pag_agno(pag));
ASSERT(0);
return -EINVAL;
}
agino = XFS_INO_TO_AGINO(mp, inode);
- if (inode != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
- xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
+ if (inode != xfs_agino_to_ino(pag, agino)) {
+ xfs_warn(mp, "%s: inode != xfs_agino_to_ino() (%llu != %llu).",
__func__, (unsigned long long)inode,
- (unsigned long long)XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
+ (unsigned long long)xfs_agino_to_ino(pag, agino));
ASSERT(0);
return -EINVAL;
}
@@ -2380,7 +2397,7 @@ xfs_imap_lookup(
xfs_agblock_t *offset_agbno,
int flags)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_inobt_rec_incore rec;
struct xfs_btree_cur *cur;
struct xfs_buf *agbp;
@@ -2391,7 +2408,7 @@ xfs_imap_lookup(
if (error) {
xfs_alert(mp,
"%s: xfs_ialloc_read_agi() returned error %d, agno %d",
- __func__, error, pag->pag_agno);
+ __func__, error, pag_agno(pag));
return error;
}
@@ -2441,7 +2458,7 @@ xfs_imap(
struct xfs_imap *imap, /* location map structure */
uint flags) /* flags for inode btree lookup */
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
xfs_agblock_t agbno; /* block number of inode in the alloc group */
xfs_agino_t agino; /* inode number within alloc group */
xfs_agblock_t chunk_agbno; /* first block in inode chunk */
@@ -2458,7 +2475,7 @@ xfs_imap(
agino = XFS_INO_TO_AGINO(mp, ino);
agbno = XFS_AGINO_TO_AGBNO(mp, agino);
if (agbno >= mp->m_sb.sb_agblocks ||
- ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
+ ino != xfs_agino_to_ino(pag, agino)) {
error = -EINVAL;
#ifdef DEBUG
/*
@@ -2473,11 +2490,11 @@ xfs_imap(
__func__, (unsigned long long)agbno,
(unsigned long)mp->m_sb.sb_agblocks);
}
- if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
+ if (ino != xfs_agino_to_ino(pag, agino)) {
xfs_alert(mp,
- "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
+ "%s: ino (0x%llx) != xfs_agino_to_ino() (0x%llx)",
__func__, ino,
- XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
+ xfs_agino_to_ino(pag, agino));
}
xfs_stack_trace();
#endif /* DEBUG */
@@ -2507,7 +2524,7 @@ xfs_imap(
offset = XFS_INO_TO_OFFSET(mp, ino);
ASSERT(offset < mp->m_sb.sb_inopblock);
- imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, agbno);
+ imap->im_blkno = xfs_agbno_to_daddr(pag, agbno);
imap->im_len = XFS_FSB_TO_BB(mp, 1);
imap->im_boffset = (unsigned short)(offset <<
mp->m_sb.sb_inodelog);
@@ -2537,7 +2554,7 @@ out_map:
offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
XFS_INO_TO_OFFSET(mp, ino);
- imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, cluster_agbno);
+ imap->im_blkno = xfs_agbno_to_daddr(pag, cluster_agbno);
imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
@@ -2733,13 +2750,13 @@ xfs_read_agi(
xfs_buf_flags_t flags,
struct xfs_buf **agibpp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
int error;
- trace_xfs_read_agi(pag->pag_mount, pag->pag_agno);
+ trace_xfs_read_agi(pag);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGI_DADDR(mp)),
+ XFS_AG_DADDR(mp, pag_agno(pag), XFS_AGI_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), flags, agibpp, &xfs_agi_buf_ops);
if (xfs_metadata_is_sick(error))
xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
@@ -2767,7 +2784,7 @@ xfs_ialloc_read_agi(
struct xfs_agi *agi;
int error;
- trace_xfs_ialloc_read_agi(pag->pag_mount, pag->pag_agno);
+ trace_xfs_ialloc_read_agi(pag);
error = xfs_read_agi(pag, tp,
(flags & XFS_IALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,
@@ -2787,7 +2804,7 @@ xfs_ialloc_read_agi(
* we are in the middle of a forced shutdown.
*/
ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
- xfs_is_shutdown(pag->pag_mount));
+ xfs_is_shutdown(pag_mount(pag)));
if (agibpp)
*agibpp = agibp;
else
@@ -2887,7 +2904,7 @@ xfs_ialloc_count_inodes_rec(
xfs_failaddr_t fa;
xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
- fa = xfs_inobt_check_irec(cur->bc_ag.pag, &irec);
+ fa = xfs_inobt_check_irec(to_perag(cur->bc_group), &irec);
if (fa)
return xfs_inobt_complain_bad_rec(cur, fa, &irec);
@@ -3126,13 +3143,13 @@ xfs_ialloc_check_shrink(
int has;
int error;
- if (!xfs_has_sparseinodes(pag->pag_mount))
+ if (!xfs_has_sparseinodes(pag_mount(pag)))
return 0;
cur = xfs_inobt_init_cursor(pag, tp, agibp);
/* Look up the inobt record that would correspond to the new EOFS. */
- agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length);
+ agino = XFS_AGB_TO_AGINO(pag_mount(pag), new_length);
error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
if (error || !has)
goto out;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index 401b42d52af6..9b34896dd1a3 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -37,7 +37,7 @@ STATIC struct xfs_btree_cur *
xfs_inobt_dup_cursor(
struct xfs_btree_cur *cur)
{
- return xfs_inobt_init_cursor(cur->bc_ag.pag, cur->bc_tp,
+ return xfs_inobt_init_cursor(to_perag(cur->bc_group), cur->bc_tp,
cur->bc_ag.agbp);
}
@@ -45,7 +45,7 @@ STATIC struct xfs_btree_cur *
xfs_finobt_dup_cursor(
struct xfs_btree_cur *cur)
{
- return xfs_finobt_init_cursor(cur->bc_ag.pag, cur->bc_tp,
+ return xfs_finobt_init_cursor(to_perag(cur->bc_group), cur->bc_tp,
cur->bc_ag.agbp);
}
@@ -112,7 +112,7 @@ __xfs_inobt_alloc_block(
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
- args.pag = cur->bc_ag.pag;
+ args.pag = to_perag(cur->bc_group);
args.oinfo = XFS_RMAP_OINFO_INOBT;
args.minlen = 1;
args.maxlen = 1;
@@ -120,7 +120,7 @@ __xfs_inobt_alloc_block(
args.resv = resv;
error = xfs_alloc_vextent_near_bno(&args,
- XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno, sbno));
+ xfs_agbno_to_fsb(args.pag, sbno));
if (error)
return error;
@@ -248,7 +248,7 @@ xfs_inobt_init_ptr_from_cur(
{
struct xfs_agi *agi = cur->bc_ag.agbp->b_addr;
- ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agi->agi_seqno));
+ ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agi->agi_seqno));
ptr->s = agi->agi_root;
}
@@ -260,7 +260,8 @@ xfs_finobt_init_ptr_from_cur(
{
struct xfs_agi *agi = cur->bc_ag.agbp->b_addr;
- ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agi->agi_seqno));
+ ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agi->agi_seqno));
+
ptr->s = agi->agi_free_root;
}
@@ -478,12 +479,12 @@ xfs_inobt_init_cursor(
struct xfs_trans *tp,
struct xfs_buf *agbp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_inobt_ops,
M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_ag.agbp = agbp;
if (agbp) {
struct xfs_agi *agi = agbp->b_addr;
@@ -504,12 +505,12 @@ xfs_finobt_init_cursor(
struct xfs_trans *tp,
struct xfs_buf *agbp)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_btree_cur *cur;
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_finobt_ops,
M_IGEO(mp)->inobt_maxlevels, xfs_inobt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_ag.agbp = agbp;
if (agbp) {
struct xfs_agi *agi = agbp->b_addr;
@@ -715,8 +716,8 @@ static xfs_extlen_t
xfs_inobt_max_size(
struct xfs_perag *pag)
{
- struct xfs_mount *mp = pag->pag_mount;
- xfs_agblock_t agblocks = pag->block_count;
+ struct xfs_mount *mp = pag_mount(pag);
+ xfs_agblock_t agblocks = pag_group(pag)->xg_block_count;
/* Bail out if we're uninitialized, which can happen in mkfs. */
if (M_IGEO(mp)->inobt_mxr[0] == 0)
@@ -727,7 +728,7 @@ xfs_inobt_max_size(
* never be available for the kinds of things that would require btree
* expansion. We therefore can pretend the space isn't there.
*/
- if (xfs_ag_contains_log(mp, pag->pag_agno))
+ if (xfs_ag_contains_log(mp, pag_agno(pag)))
agblocks -= mp->m_sb.sb_logblocks;
return xfs_btree_calc_size(M_IGEO(mp)->inobt_mnr,
@@ -791,10 +792,10 @@ xfs_finobt_calc_reserves(
xfs_extlen_t tree_len = 0;
int error;
- if (!xfs_has_finobt(pag->pag_mount))
+ if (!xfs_has_finobt(pag_mount(pag)))
return 0;
- if (xfs_has_inobtcounts(pag->pag_mount))
+ if (xfs_has_inobtcounts(pag_mount(pag)))
error = xfs_finobt_read_blocks(pag, tp, &tree_len);
else
error = xfs_finobt_count_blocks(pag, tp, &tree_len);
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 79babeac9d75..424861fbf1bd 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -19,6 +19,7 @@
#include "xfs_ialloc.h"
#include "xfs_dir2.h"
#include "xfs_health.h"
+#include "xfs_metafile.h"
#include <linux/iversion.h>
@@ -209,12 +210,15 @@ xfs_inode_from_disk(
* They will also be unconditionally written back to disk as v2 inodes.
*/
if (unlikely(from->di_version == 1)) {
- set_nlink(inode, be16_to_cpu(from->di_onlink));
+ /* di_metatype used to be di_onlink */
+ set_nlink(inode, be16_to_cpu(from->di_metatype));
ip->i_projid = 0;
} else {
set_nlink(inode, be32_to_cpu(from->di_nlink));
ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
be16_to_cpu(from->di_projid_lo);
+ if (xfs_dinode_is_metadir(from))
+ ip->i_metatype = be16_to_cpu(from->di_metatype);
}
i_uid_write(inode, be32_to_cpu(from->di_uid));
@@ -315,7 +319,10 @@ xfs_inode_to_disk(
struct inode *inode = VFS_I(ip);
to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
- to->di_onlink = 0;
+ if (xfs_is_metadir_inode(ip))
+ to->di_metatype = cpu_to_be16(ip->i_metatype);
+ else
+ to->di_metatype = 0;
to->di_format = xfs_ifork_format(&ip->i_df);
to->di_uid = cpu_to_be32(i_uid_read(inode));
@@ -483,6 +490,69 @@ xfs_dinode_verify_nrext64(
return NULL;
}
+/*
+ * Validate all the picky requirements we have for a file that claims to be
+ * filesystem metadata.
+ */
+xfs_failaddr_t
+xfs_dinode_verify_metadir(
+ struct xfs_mount *mp,
+ struct xfs_dinode *dip,
+ uint16_t mode,
+ uint16_t flags,
+ uint64_t flags2)
+{
+ if (!xfs_has_metadir(mp))
+ return __this_address;
+
+ /* V5 filesystem only */
+ if (dip->di_version < 3)
+ return __this_address;
+
+ if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
+ return __this_address;
+
+ /* V3 inode fields that are always zero */
+ if ((flags2 & XFS_DIFLAG2_NREXT64) && dip->di_nrext64_pad)
+ return __this_address;
+ if (!(flags2 & XFS_DIFLAG2_NREXT64) && dip->di_flushiter)
+ return __this_address;
+
+ /* Metadata files can only be directories or regular files */
+ if (!S_ISDIR(mode) && !S_ISREG(mode))
+ return __this_address;
+
+ /* They must have zero access permissions */
+ if (mode & 0777)
+ return __this_address;
+
+ /* DMAPI event and state masks are zero */
+ if (dip->di_dmevmask || dip->di_dmstate)
+ return __this_address;
+
+ /*
+ * User and group IDs must be zero. The project ID is used for
+ * grouping inodes. Metadata inodes are never accounted to quotas.
+ */
+ if (dip->di_uid || dip->di_gid)
+ return __this_address;
+
+ /* Mandatory inode flags must be set */
+ if (S_ISDIR(mode)) {
+ if ((flags & XFS_METADIR_DIFLAGS) != XFS_METADIR_DIFLAGS)
+ return __this_address;
+ } else {
+ if ((flags & XFS_METAFILE_DIFLAGS) != XFS_METAFILE_DIFLAGS)
+ return __this_address;
+ }
+
+ /* dax flags2 must not be set */
+ if (flags2 & XFS_DIFLAG2_DAX)
+ return __this_address;
+
+ return NULL;
+}
+
xfs_failaddr_t
xfs_dinode_verify(
struct xfs_mount *mp,
@@ -523,8 +593,11 @@ xfs_dinode_verify(
* di_nlink==0 on a V1 inode. V2/3 inodes would get written out with
* di_onlink==0, so we can check that.
*/
- if (dip->di_version >= 2) {
- if (dip->di_onlink)
+ if (dip->di_version == 2) {
+ if (dip->di_metatype)
+ return __this_address;
+ } else if (dip->di_version >= 3) {
+ if (!xfs_dinode_is_metadir(dip) && dip->di_metatype)
return __this_address;
}
@@ -546,7 +619,8 @@ xfs_dinode_verify(
if (dip->di_nlink)
return __this_address;
} else {
- if (dip->di_onlink)
+ /* di_metatype used to be di_onlink */
+ if (dip->di_metatype)
return __this_address;
}
}
@@ -663,6 +737,12 @@ xfs_dinode_verify(
!xfs_has_bigtime(mp))
return __this_address;
+ if (flags2 & XFS_DIFLAG2_METADATA) {
+ fa = xfs_dinode_verify_metadir(mp, dip, mode, flags, flags2);
+ if (fa)
+ return fa;
+ }
+
return NULL;
}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 585ed5a110af..8d43d2641c73 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -28,6 +28,9 @@ int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
struct xfs_dinode *dip);
+xfs_failaddr_t xfs_dinode_verify_metadir(struct xfs_mount *mp,
+ struct xfs_dinode *dip, uint16_t mode, uint16_t flags,
+ uint64_t flags2);
xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp,
uint32_t extsize, uint16_t mode, uint16_t flags);
xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c
index cc38e1c3c3e1..deb0b7c00a1f 100644
--- a/fs/xfs/libxfs/xfs_inode_util.c
+++ b/fs/xfs/libxfs/xfs_inode_util.c
@@ -224,6 +224,8 @@ xfs_inode_inherit_flags2(
}
if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
ip->i_diflags2 |= XFS_DIFLAG2_DAX;
+ if (xfs_is_metadir_inode(pip))
+ ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
/* Don't let invalid cowextsize hints propagate. */
failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
@@ -442,8 +444,8 @@ xfs_iunlink_update_bucket(
ASSERT(xfs_verify_agino_or_null(pag, new_agino));
old_value = be32_to_cpu(agi->agi_unlinked[bucket_index]);
- trace_xfs_iunlink_update_bucket(tp->t_mountp, pag->pag_agno, bucket_index,
- old_value, new_agino);
+ trace_xfs_iunlink_update_bucket(pag, bucket_index, old_value,
+ new_agino);
/*
* We should never find the head of the list already set to the value
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 3e6682ed656b..15dec19b6c32 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -248,6 +248,8 @@ typedef struct xfs_trans_header {
#define XFS_LI_ATTRD 0x1247 /* attr set/remove done */
#define XFS_LI_XMI 0x1248 /* mapping exchange intent */
#define XFS_LI_XMD 0x1249 /* mapping exchange done */
+#define XFS_LI_EFI_RT 0x124a /* realtime extent free intent */
+#define XFS_LI_EFD_RT 0x124b /* realtime extent free done */
#define XFS_LI_TYPE_DESC \
{ XFS_LI_EFI, "XFS_LI_EFI" }, \
@@ -267,7 +269,9 @@ typedef struct xfs_trans_header {
{ XFS_LI_ATTRI, "XFS_LI_ATTRI" }, \
{ XFS_LI_ATTRD, "XFS_LI_ATTRD" }, \
{ XFS_LI_XMI, "XFS_LI_XMI" }, \
- { XFS_LI_XMD, "XFS_LI_XMD" }
+ { XFS_LI_XMD, "XFS_LI_XMD" }, \
+ { XFS_LI_EFI_RT, "XFS_LI_EFI_RT" }, \
+ { XFS_LI_EFD_RT, "XFS_LI_EFD_RT" }
/*
* Inode Log Item Format definitions.
@@ -404,7 +408,7 @@ struct xfs_log_dinode {
uint16_t di_mode; /* mode and type of file */
int8_t di_version; /* inode version */
int8_t di_format; /* format of di_c data */
- uint8_t di_pad3[2]; /* unused in v2/3 inodes */
+ uint16_t di_metatype; /* metadata type, if DIFLAG2_METADATA */
uint32_t di_uid; /* owner's user id */
uint32_t di_gid; /* owner's group id */
uint32_t di_nlink; /* number of links to file */
diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
index 521d327e4c89..5397a8ff004d 100644
--- a/fs/xfs/libxfs/xfs_log_recover.h
+++ b/fs/xfs/libxfs/xfs_log_recover.h
@@ -77,6 +77,8 @@ extern const struct xlog_recover_item_ops xlog_attri_item_ops;
extern const struct xlog_recover_item_ops xlog_attrd_item_ops;
extern const struct xlog_recover_item_ops xlog_xmi_item_ops;
extern const struct xlog_recover_item_ops xlog_xmd_item_ops;
+extern const struct xlog_recover_item_ops xlog_rtefi_item_ops;
+extern const struct xlog_recover_item_ops xlog_rtefd_item_ops;
/*
* Macros, structures, prototypes for internal log manager use.
diff --git a/fs/xfs/libxfs/xfs_metadir.c b/fs/xfs/libxfs/xfs_metadir.c
new file mode 100644
index 000000000000..bae7377c0f22
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metadir.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_metafile.h"
+#include "xfs_metadir.h"
+#include "xfs_trace.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_ialloc.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_parent.h"
+#include "xfs_health.h"
+
+/*
+ * Metadata Directory Tree
+ * =======================
+ *
+ * These functions provide an abstraction layer for looking up, creating, and
+ * deleting metadata inodes that live within a special metadata directory tree.
+ *
+ * This code does not manage the five existing metadata inodes: real time
+ * bitmap & summary; and the user, group, and quotas. All other metadata
+ * inodes must use only the xfs_meta{dir,file}_* functions.
+ *
+ * Callers wishing to create or hardlink a metadata inode must create an
+ * xfs_metadir_update structure, call the appropriate xfs_metadir* function,
+ * and then call xfs_metadir_commit or xfs_metadir_cancel to commit or cancel
+ * the update. Files in the metadata directory tree currently cannot be
+ * unlinked.
+ *
+ * When the metadir feature is enabled, all metadata inodes must have the
+ * "metadata" inode flag set to prevent them from being exposed to the outside
+ * world.
+ *
+ * Callers must take the ILOCK of any inode in the metadata directory tree to
+ * synchronize access to that inode. It is never necessary to take the IOLOCK
+ * or the MMAPLOCK since metadata inodes must not be exposed to user space.
+ */
+
+static inline void
+xfs_metadir_set_xname(
+ struct xfs_name *xname,
+ const char *path,
+ unsigned char ftype)
+{
+ xname->name = (const unsigned char *)path;
+ xname->len = strlen(path);
+ xname->type = ftype;
+}
+
+/*
+ * Given a parent directory @dp and a metadata inode path component @xname,
+ * Look up the inode number in the directory, returning it in @ino.
+ * @xname.type must match the directory entry's ftype.
+ *
+ * Caller must hold ILOCK_EXCL.
+ */
+static inline int
+xfs_metadir_lookup(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ struct xfs_name *xname,
+ xfs_ino_t *ino)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_args args = {
+ .trans = tp,
+ .dp = dp,
+ .geo = mp->m_dir_geo,
+ .name = xname->name,
+ .namelen = xname->len,
+ .hashval = xfs_dir2_hashname(mp, xname),
+ .whichfork = XFS_DATA_FORK,
+ .op_flags = XFS_DA_OP_OKNOENT,
+ .owner = dp->i_ino,
+ };
+ int error;
+
+ if (!S_ISDIR(VFS_I(dp)->i_mode)) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+
+ error = xfs_dir_lookup_args(&args);
+ if (error)
+ return error;
+
+ if (!xfs_verify_ino(mp, args.inumber)) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+ if (xname->type != XFS_DIR3_FT_UNKNOWN && xname->type != args.filetype) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ trace_xfs_metadir_lookup(dp, xname, args.inumber);
+ *ino = args.inumber;
+ return 0;
+}
+
+/*
+ * Look up and read a metadata inode from the metadata directory. If the path
+ * component doesn't exist, return -ENOENT.
+ */
+int
+xfs_metadir_load(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ const char *path,
+ enum xfs_metafile_type metafile_type,
+ struct xfs_inode **ipp)
+{
+ struct xfs_name xname;
+ xfs_ino_t ino;
+ int error;
+
+ xfs_metadir_set_xname(&xname, path, XFS_DIR3_FT_UNKNOWN);
+
+ xfs_ilock(dp, XFS_ILOCK_EXCL);
+ error = xfs_metadir_lookup(tp, dp, &xname, &ino);
+ xfs_iunlock(dp, XFS_ILOCK_EXCL);
+ if (error)
+ return error;
+ return xfs_trans_metafile_iget(tp, ino, metafile_type, ipp);
+}
+
+/*
+ * Unlock and release resources after committing (or cancelling) a metadata
+ * directory tree operation. The caller retains its reference to @upd->ip
+ * and must release it explicitly.
+ */
+static inline void
+xfs_metadir_teardown(
+ struct xfs_metadir_update *upd,
+ int error)
+{
+ trace_xfs_metadir_teardown(upd, error);
+
+ if (upd->ppargs) {
+ xfs_parent_finish(upd->dp->i_mount, upd->ppargs);
+ upd->ppargs = NULL;
+ }
+
+ if (upd->ip) {
+ if (upd->ip_locked)
+ xfs_iunlock(upd->ip, XFS_ILOCK_EXCL);
+ upd->ip_locked = false;
+ }
+
+ if (upd->dp_locked)
+ xfs_iunlock(upd->dp, XFS_ILOCK_EXCL);
+ upd->dp_locked = false;
+}
+
+/*
+ * Begin the process of creating a metadata file by allocating transactions
+ * and taking whatever resources we're going to need.
+ */
+int
+xfs_metadir_start_create(
+ struct xfs_metadir_update *upd)
+{
+ struct xfs_mount *mp = upd->dp->i_mount;
+ int error;
+
+ ASSERT(upd->dp != NULL);
+ ASSERT(upd->ip == NULL);
+ ASSERT(xfs_has_metadir(mp));
+ ASSERT(upd->metafile_type != XFS_METAFILE_UNKNOWN);
+
+ error = xfs_parent_start(mp, &upd->ppargs);
+ if (error)
+ return error;
+
+ /*
+ * If we ever need the ability to create rt metadata files on a
+ * pre-metadir filesystem, we'll need to dqattach the parent here.
+ * Currently we assume that mkfs will create the files and quotacheck
+ * will account for them.
+ */
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create,
+ xfs_create_space_res(mp, MAXNAMELEN), 0, 0, &upd->tp);
+ if (error)
+ goto out_teardown;
+
+ /*
+ * Lock the parent directory if there is one. We can't ijoin it to
+ * the transaction until after the child file has been created.
+ */
+ xfs_ilock(upd->dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
+ upd->dp_locked = true;
+
+ trace_xfs_metadir_start_create(upd);
+ return 0;
+out_teardown:
+ xfs_metadir_teardown(upd, error);
+ return error;
+}
+
+/*
+ * Create a metadata inode with the given @mode, and insert it into the
+ * metadata directory tree at the given @upd->path. The path up to the final
+ * component must already exist. The final path component must not exist.
+ *
+ * The new metadata inode will be attached to the update structure @upd->ip,
+ * with the ILOCK held until the caller releases it.
+ *
+ * NOTE: This function may return a new inode to the caller even if it returns
+ * a negative error code. If an inode is passed back, the caller must finish
+ * setting up the inode before releasing it.
+ */
+int
+xfs_metadir_create(
+ struct xfs_metadir_update *upd,
+ umode_t mode)
+{
+ struct xfs_icreate_args args = {
+ .pip = upd->dp,
+ .mode = mode,
+ };
+ struct xfs_name xname;
+ struct xfs_dir_update du = {
+ .dp = upd->dp,
+ .name = &xname,
+ .ppargs = upd->ppargs,
+ };
+ struct xfs_mount *mp = upd->dp->i_mount;
+ xfs_ino_t ino;
+ unsigned int resblks;
+ int error;
+
+ xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL);
+
+ /* Check that the name does not already exist in the directory. */
+ xfs_metadir_set_xname(&xname, upd->path, XFS_DIR3_FT_UNKNOWN);
+ error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino);
+ switch (error) {
+ case -ENOENT:
+ break;
+ case 0:
+ error = -EEXIST;
+ fallthrough;
+ default:
+ return error;
+ }
+
+ /*
+ * A newly created regular or special file just has one directory
+ * entry pointing to them, but a directory also the "." entry
+ * pointing to itself.
+ */
+ error = xfs_dialloc(&upd->tp, &args, &ino);
+ if (error)
+ return error;
+ error = xfs_icreate(upd->tp, ino, &args, &upd->ip);
+ if (error)
+ return error;
+ du.ip = upd->ip;
+ xfs_metafile_set_iflag(upd->tp, upd->ip, upd->metafile_type);
+ upd->ip_locked = true;
+
+ /*
+ * Join the directory inode to the transaction. We do not do it
+ * earlier because xfs_dialloc rolls the transaction.
+ */
+ xfs_trans_ijoin(upd->tp, upd->dp, 0);
+
+ /* Create the entry. */
+ if (S_ISDIR(args.mode))
+ resblks = xfs_mkdir_space_res(mp, xname.len);
+ else
+ resblks = xfs_create_space_res(mp, xname.len);
+ xname.type = xfs_mode_to_ftype(args.mode);
+
+ trace_xfs_metadir_try_create(upd);
+
+ error = xfs_dir_create_child(upd->tp, resblks, &du);
+ if (error)
+ return error;
+
+ /* Metadir files are not accounted to quota. */
+
+ trace_xfs_metadir_create(upd);
+
+ return 0;
+}
+
+#ifndef __KERNEL__
+/*
+ * Begin the process of linking a metadata file by allocating transactions
+ * and locking whatever resources we're going to need.
+ */
+int
+xfs_metadir_start_link(
+ struct xfs_metadir_update *upd)
+{
+ struct xfs_mount *mp = upd->dp->i_mount;
+ unsigned int resblks;
+ int nospace_error = 0;
+ int error;
+
+ ASSERT(upd->dp != NULL);
+ ASSERT(upd->ip != NULL);
+ ASSERT(xfs_has_metadir(mp));
+
+ error = xfs_parent_start(mp, &upd->ppargs);
+ if (error)
+ return error;
+
+ resblks = xfs_link_space_res(mp, MAXNAMELEN);
+ error = xfs_trans_alloc_dir(upd->dp, &M_RES(mp)->tr_link, upd->ip,
+ &resblks, &upd->tp, &nospace_error);
+ if (error)
+ goto out_teardown;
+ if (!resblks) {
+ /* We don't allow reservationless updates. */
+ xfs_trans_cancel(upd->tp);
+ upd->tp = NULL;
+ xfs_iunlock(upd->dp, XFS_ILOCK_EXCL);
+ xfs_iunlock(upd->ip, XFS_ILOCK_EXCL);
+ error = nospace_error;
+ goto out_teardown;
+ }
+
+ upd->dp_locked = true;
+ upd->ip_locked = true;
+
+ trace_xfs_metadir_start_link(upd);
+ return 0;
+out_teardown:
+ xfs_metadir_teardown(upd, error);
+ return error;
+}
+
+/*
+ * Link the metadata directory given by @path to the inode @upd->ip.
+ * The path (up to the final component) must already exist, but the final
+ * component must not already exist.
+ */
+int
+xfs_metadir_link(
+ struct xfs_metadir_update *upd)
+{
+ struct xfs_name xname;
+ struct xfs_dir_update du = {
+ .dp = upd->dp,
+ .name = &xname,
+ .ip = upd->ip,
+ .ppargs = upd->ppargs,
+ };
+ struct xfs_mount *mp = upd->dp->i_mount;
+ xfs_ino_t ino;
+ unsigned int resblks;
+ int error;
+
+ xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(upd->ip, XFS_ILOCK_EXCL);
+
+ /* Look up the name in the current directory. */
+ xfs_metadir_set_xname(&xname, upd->path,
+ xfs_mode_to_ftype(VFS_I(upd->ip)->i_mode));
+ error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino);
+ switch (error) {
+ case -ENOENT:
+ break;
+ case 0:
+ error = -EEXIST;
+ fallthrough;
+ default:
+ return error;
+ }
+
+ resblks = xfs_link_space_res(mp, xname.len);
+ error = xfs_dir_add_child(upd->tp, resblks, &du);
+ if (error)
+ return error;
+
+ trace_xfs_metadir_link(upd);
+
+ return 0;
+}
+#endif /* ! __KERNEL__ */
+
+/* Commit a metadir update and unlock/drop all resources. */
+int
+xfs_metadir_commit(
+ struct xfs_metadir_update *upd)
+{
+ int error;
+
+ trace_xfs_metadir_commit(upd);
+
+ error = xfs_trans_commit(upd->tp);
+ upd->tp = NULL;
+
+ xfs_metadir_teardown(upd, error);
+ return error;
+}
+
+/* Cancel a metadir update and unlock/drop all resources. */
+void
+xfs_metadir_cancel(
+ struct xfs_metadir_update *upd,
+ int error)
+{
+ trace_xfs_metadir_cancel(upd);
+
+ xfs_trans_cancel(upd->tp);
+ upd->tp = NULL;
+
+ xfs_metadir_teardown(upd, error);
+}
+
+/* Create a metadata for the last component of the path. */
+int
+xfs_metadir_mkdir(
+ struct xfs_inode *dp,
+ const char *path,
+ struct xfs_inode **ipp)
+{
+ struct xfs_metadir_update upd = {
+ .dp = dp,
+ .path = path,
+ .metafile_type = XFS_METAFILE_DIR,
+ };
+ int error;
+
+ if (xfs_is_shutdown(dp->i_mount))
+ return -EIO;
+
+ /* Allocate a transaction to create the last directory. */
+ error = xfs_metadir_start_create(&upd);
+ if (error)
+ return error;
+
+ /* Create the subdirectory and take our reference. */
+ error = xfs_metadir_create(&upd, S_IFDIR);
+ if (error)
+ goto out_cancel;
+
+ error = xfs_metadir_commit(&upd);
+ if (error)
+ goto out_irele;
+
+ xfs_finish_inode_setup(upd.ip);
+ *ipp = upd.ip;
+ return 0;
+
+out_cancel:
+ xfs_metadir_cancel(&upd, error);
+out_irele:
+ /* Have to finish setting up the inode to ensure it's deleted. */
+ if (upd.ip) {
+ xfs_finish_inode_setup(upd.ip);
+ xfs_irele(upd.ip);
+ }
+ return error;
+}
diff --git a/fs/xfs/libxfs/xfs_metadir.h b/fs/xfs/libxfs/xfs_metadir.h
new file mode 100644
index 000000000000..bfecac7d3d14
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metadir.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_METADIR_H__
+#define __XFS_METADIR_H__
+
+/* Cleanup widget for metadata inode creation and deletion. */
+struct xfs_metadir_update {
+ /* Parent directory */
+ struct xfs_inode *dp;
+
+ /* Path to metadata file */
+ const char *path;
+
+ /* Parent pointer update context */
+ struct xfs_parent_args *ppargs;
+
+ /* Child metadata file */
+ struct xfs_inode *ip;
+
+ struct xfs_trans *tp;
+
+ enum xfs_metafile_type metafile_type;
+
+ unsigned int dp_locked:1;
+ unsigned int ip_locked:1;
+};
+
+int xfs_metadir_load(struct xfs_trans *tp, struct xfs_inode *dp,
+ const char *path, enum xfs_metafile_type metafile_type,
+ struct xfs_inode **ipp);
+
+int xfs_metadir_start_create(struct xfs_metadir_update *upd);
+int xfs_metadir_create(struct xfs_metadir_update *upd, umode_t mode);
+
+int xfs_metadir_start_link(struct xfs_metadir_update *upd);
+int xfs_metadir_link(struct xfs_metadir_update *upd);
+
+int xfs_metadir_commit(struct xfs_metadir_update *upd);
+void xfs_metadir_cancel(struct xfs_metadir_update *upd, int error);
+
+int xfs_metadir_mkdir(struct xfs_inode *dp, const char *path,
+ struct xfs_inode **ipp);
+
+#endif /* __XFS_METADIR_H__ */
diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c
new file mode 100644
index 000000000000..adeb25d1a444
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metafile.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_metafile.h"
+#include "xfs_trace.h"
+#include "xfs_inode.h"
+
+/* Set up an inode to be recognized as a metadata directory inode. */
+void
+xfs_metafile_set_iflag(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ enum xfs_metafile_type metafile_type)
+{
+ VFS_I(ip)->i_mode &= ~0777;
+ VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
+ VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
+ if (S_ISDIR(VFS_I(ip)->i_mode))
+ ip->i_diflags |= XFS_METADIR_DIFLAGS;
+ else
+ ip->i_diflags |= XFS_METAFILE_DIFLAGS;
+ ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
+ ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
+ ip->i_metatype = metafile_type;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Clear the metadata directory inode flag. */
+void
+xfs_metafile_clear_iflag(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ ASSERT(xfs_is_metadir_inode(ip));
+ ASSERT(VFS_I(ip)->i_nlink == 0);
+
+ ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
diff --git a/fs/xfs/libxfs/xfs_metafile.h b/fs/xfs/libxfs/xfs_metafile.h
new file mode 100644
index 000000000000..acec400123db
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metafile.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_METAFILE_H__
+#define __XFS_METAFILE_H__
+
+/* All metadata files must have these flags set. */
+#define XFS_METAFILE_DIFLAGS (XFS_DIFLAG_IMMUTABLE | \
+ XFS_DIFLAG_SYNC | \
+ XFS_DIFLAG_NOATIME | \
+ XFS_DIFLAG_NODUMP | \
+ XFS_DIFLAG_NODEFRAG)
+
+/* All metadata directories must have these flags set. */
+#define XFS_METADIR_DIFLAGS (XFS_METAFILE_DIFLAGS | \
+ XFS_DIFLAG_NOSYMLINKS)
+
+void xfs_metafile_set_iflag(struct xfs_trans *tp, struct xfs_inode *ip,
+ enum xfs_metafile_type metafile_type);
+void xfs_metafile_clear_iflag(struct xfs_trans *tp, struct xfs_inode *ip);
+
+/* Code specific to kernel/userspace; must be provided externally. */
+
+int xfs_trans_metafile_iget(struct xfs_trans *tp, xfs_ino_t ino,
+ enum xfs_metafile_type metafile_type, struct xfs_inode **ipp);
+int xfs_metafile_iget(struct xfs_mount *mp, xfs_ino_t ino,
+ enum xfs_metafile_type metafile_type, struct xfs_inode **ipp);
+
+#endif /* __XFS_METAFILE_H__ */
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index 23c133fd36f5..ad0dedf00f18 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -19,40 +19,46 @@
static_assert((value) == (expected), \
"XFS: value of " #value " is wrong, expected " #expected)
+#define XFS_CHECK_SB_OFFSET(field, offset) \
+ XFS_CHECK_OFFSET(struct xfs_dsb, field, offset); \
+ XFS_CHECK_OFFSET(struct xfs_sb, field, offset);
+
static inline void __init
xfs_check_ondisk_structs(void)
{
- /* ag/file structures */
+ /* file structures */
XFS_CHECK_STRUCT_SIZE(struct xfs_acl, 4);
XFS_CHECK_STRUCT_SIZE(struct xfs_acl_entry, 12);
- XFS_CHECK_STRUCT_SIZE(struct xfs_agf, 224);
- XFS_CHECK_STRUCT_SIZE(struct xfs_agfl, 36);
- XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 344);
XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_key, 8);
XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_rec, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_bmdr_block, 4);
- XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_shdr, 48);
- XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_lhdr, 64);
- XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block, 72);
XFS_CHECK_STRUCT_SIZE(struct xfs_dinode, 176);
XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot, 104);
XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk, 136);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 264);
XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56);
+ XFS_CHECK_STRUCT_SIZE(xfs_timestamp_t, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_timestamp, 8);
+
+ /* space btrees */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_agf, 224);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_agfl, 36);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 344);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_alloc_rec, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block, 72);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_lhdr, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block_shdr, 48);
XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4);
XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_key, 4);
XFS_CHECK_STRUCT_SIZE(struct xfs_refcount_rec, 12);
XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_key, 20);
XFS_CHECK_STRUCT_SIZE(struct xfs_rmap_rec, 24);
- XFS_CHECK_STRUCT_SIZE(xfs_timestamp_t, 8);
- XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_timestamp, 8);
XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8);
XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4);
- XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8);
XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4);
XFS_CHECK_STRUCT_SIZE(xfs_refcount_ptr_t, 4);
XFS_CHECK_STRUCT_SIZE(xfs_rmap_ptr_t, 4);
+ XFS_CHECK_STRUCT_SIZE(xfs_bmdr_key_t, 8);
/* dir/attr trees */
XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80);
@@ -67,33 +73,34 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free_hdr, 64);
XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf, 64);
XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf_hdr, 64);
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_entry_t, 8);
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_hdr_t, 32);
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_map_t, 4);
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_local_t, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_entry, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_hdr, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_map, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_name_local, 4);
/* realtime structures */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rtsb, 56);
XFS_CHECK_STRUCT_SIZE(union xfs_rtword_raw, 4);
XFS_CHECK_STRUCT_SIZE(union xfs_suminfo_raw, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_rtbuf_blkinfo, 48);
/*
- * m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to
- * 4 bytes anyway so it's not obviously a problem. Hence for the moment
- * we don't check this structure. This can be re-instated when the attr
- * definitions are updated to use c99 VLA definitions.
+ * m68k has problems with struct xfs_attr_leaf_name_remote, but we pad
+ * it to 4 bytes anyway so it's not obviously a problem. Hence for the
+ * moment we don't check this structure. This can be re-instated when
+ * the attr definitions are updated to use c99 VLA definitions.
*
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t, 12);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leaf_name_remote, 12);
*/
- XFS_CHECK_OFFSET(struct xfs_dsb, sb_crc, 224);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, valuelen, 0);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, namelen, 2);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_local_t, nameval, 3);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valueblk, 0);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, valuelen, 4);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, namelen, 8);
- XFS_CHECK_OFFSET(xfs_attr_leaf_name_remote_t, name, 9);
- XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 32);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_local, valuelen, 0);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_local, namelen, 2);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_local, nameval, 3);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, valueblk, 0);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, valuelen, 4);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, namelen, 8);
+ XFS_CHECK_OFFSET(struct xfs_attr_leaf_name_remote, name, 9);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_leafblock, 32);
XFS_CHECK_STRUCT_SIZE(struct xfs_attr_sf_hdr, 4);
XFS_CHECK_OFFSET(struct xfs_attr_sf_hdr, totsize, 0);
XFS_CHECK_OFFSET(struct xfs_attr_sf_hdr, count, 2);
@@ -101,27 +108,41 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, valuelen, 1);
XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, flags, 2);
XFS_CHECK_OFFSET(struct xfs_attr_sf_entry, nameval, 3);
- XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t, 12);
- XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t, 8);
- XFS_CHECK_STRUCT_SIZE(xfs_da_node_hdr_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_free_t, 4);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_hdr_t, 16);
- XFS_CHECK_OFFSET(xfs_dir2_data_unused_t, freetag, 0);
- XFS_CHECK_OFFSET(xfs_dir2_data_unused_t, length, 2);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_hdr_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_entry_t, 8);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_hdr_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_t, 16);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_tail_t, 4);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_entry_t, 3);
- XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, namelen, 0);
- XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, offset, 1);
- XFS_CHECK_OFFSET(xfs_dir2_sf_entry_t, name, 3);
- XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t, 10);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_da_blkinfo, 12);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_da_intnode, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_entry, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_da_node_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16);
+ XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, freetag, 0);
+ XFS_CHECK_OFFSET(struct xfs_dir2_data_unused, length, 2);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_entry, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_tail, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_entry, 3);
+ XFS_CHECK_OFFSET(struct xfs_dir2_sf_entry, namelen, 0);
+ XFS_CHECK_OFFSET(struct xfs_dir2_sf_entry, offset, 1);
+ XFS_CHECK_OFFSET(struct xfs_dir2_sf_entry, name, 3);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_hdr, 10);
XFS_CHECK_STRUCT_SIZE(struct xfs_parent_rec, 12);
+ /* ondisk dir/attr structures from xfs/122 */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr_sf_entry, 3);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_free, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_data_unused, 6);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_free_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_entry, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_hdr, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_leaf_tail, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_entry, 3);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir2_sf_hdr, 10);
+
/* log structures */
XFS_CHECK_STRUCT_SIZE(struct xfs_buf_log_format, 88);
XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24);
@@ -157,6 +178,11 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_OFFSET(struct xfs_efi_log_format_32, efi_extents, 16);
XFS_CHECK_OFFSET(struct xfs_efi_log_format_64, efi_extents, 16);
+ /* ondisk log structures from xfs/122 */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_unmount_log_format, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_xmd_log_format, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_xmi_log_format, 88);
+
/* parent pointer ioctls */
XFS_CHECK_STRUCT_SIZE(struct xfs_getparents_rec, 32);
XFS_CHECK_STRUCT_SIZE(struct xfs_getparents, 40);
@@ -201,6 +227,70 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MIN << XFS_DQ_BIGTIME_SHIFT, 4);
XFS_CHECK_VALUE(XFS_DQ_BIGTIME_EXPIRY_MAX << XFS_DQ_BIGTIME_SHIFT,
16299260424LL);
+
+ /* superblock field checks we got from xfs/122 */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 288);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_sb, 288);
+ XFS_CHECK_SB_OFFSET(sb_magicnum, 0);
+ XFS_CHECK_SB_OFFSET(sb_blocksize, 4);
+ XFS_CHECK_SB_OFFSET(sb_dblocks, 8);
+ XFS_CHECK_SB_OFFSET(sb_rblocks, 16);
+ XFS_CHECK_SB_OFFSET(sb_rextents, 24);
+ XFS_CHECK_SB_OFFSET(sb_uuid, 32);
+ XFS_CHECK_SB_OFFSET(sb_logstart, 48);
+ XFS_CHECK_SB_OFFSET(sb_rootino, 56);
+ XFS_CHECK_SB_OFFSET(sb_rbmino, 64);
+ XFS_CHECK_SB_OFFSET(sb_rsumino, 72);
+ XFS_CHECK_SB_OFFSET(sb_rextsize, 80);
+ XFS_CHECK_SB_OFFSET(sb_agblocks, 84);
+ XFS_CHECK_SB_OFFSET(sb_agcount, 88);
+ XFS_CHECK_SB_OFFSET(sb_rbmblocks, 92);
+ XFS_CHECK_SB_OFFSET(sb_logblocks, 96);
+ XFS_CHECK_SB_OFFSET(sb_versionnum, 100);
+ XFS_CHECK_SB_OFFSET(sb_sectsize, 102);
+ XFS_CHECK_SB_OFFSET(sb_inodesize, 104);
+ XFS_CHECK_SB_OFFSET(sb_inopblock, 106);
+ XFS_CHECK_SB_OFFSET(sb_blocklog, 120);
+ XFS_CHECK_SB_OFFSET(sb_fname[12], 120);
+ XFS_CHECK_SB_OFFSET(sb_sectlog, 121);
+ XFS_CHECK_SB_OFFSET(sb_inodelog, 122);
+ XFS_CHECK_SB_OFFSET(sb_inopblog, 123);
+ XFS_CHECK_SB_OFFSET(sb_agblklog, 124);
+ XFS_CHECK_SB_OFFSET(sb_rextslog, 125);
+ XFS_CHECK_SB_OFFSET(sb_inprogress, 126);
+ XFS_CHECK_SB_OFFSET(sb_imax_pct, 127);
+ XFS_CHECK_SB_OFFSET(sb_icount, 128);
+ XFS_CHECK_SB_OFFSET(sb_ifree, 136);
+ XFS_CHECK_SB_OFFSET(sb_fdblocks, 144);
+ XFS_CHECK_SB_OFFSET(sb_frextents, 152);
+ XFS_CHECK_SB_OFFSET(sb_uquotino, 160);
+ XFS_CHECK_SB_OFFSET(sb_gquotino, 168);
+ XFS_CHECK_SB_OFFSET(sb_qflags, 176);
+ XFS_CHECK_SB_OFFSET(sb_flags, 178);
+ XFS_CHECK_SB_OFFSET(sb_shared_vn, 179);
+ XFS_CHECK_SB_OFFSET(sb_inoalignmt, 180);
+ XFS_CHECK_SB_OFFSET(sb_unit, 184);
+ XFS_CHECK_SB_OFFSET(sb_width, 188);
+ XFS_CHECK_SB_OFFSET(sb_dirblklog, 192);
+ XFS_CHECK_SB_OFFSET(sb_logsectlog, 193);
+ XFS_CHECK_SB_OFFSET(sb_logsectsize, 194);
+ XFS_CHECK_SB_OFFSET(sb_logsunit, 196);
+ XFS_CHECK_SB_OFFSET(sb_features2, 200);
+ XFS_CHECK_SB_OFFSET(sb_bad_features2, 204);
+ XFS_CHECK_SB_OFFSET(sb_features_compat, 208);
+ XFS_CHECK_SB_OFFSET(sb_features_ro_compat, 212);
+ XFS_CHECK_SB_OFFSET(sb_features_incompat, 216);
+ XFS_CHECK_SB_OFFSET(sb_features_log_incompat, 220);
+ XFS_CHECK_SB_OFFSET(sb_crc, 224);
+ XFS_CHECK_SB_OFFSET(sb_spino_align, 228);
+ XFS_CHECK_SB_OFFSET(sb_pquotino, 232);
+ XFS_CHECK_SB_OFFSET(sb_lsn, 240);
+ XFS_CHECK_SB_OFFSET(sb_meta_uuid, 248);
+ XFS_CHECK_SB_OFFSET(sb_metadirino, 264);
+ XFS_CHECK_SB_OFFSET(sb_rgcount, 272);
+ XFS_CHECK_SB_OFFSET(sb_rgextents, 276);
+ XFS_CHECK_SB_OFFSET(sb_rgblklog, 280);
+ XFS_CHECK_SB_OFFSET(sb_pad, 281);
}
#endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index fb05f44f6c75..763d941a8420 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -143,4 +143,47 @@ time64_t xfs_dquot_from_disk_ts(struct xfs_disk_dquot *ddq,
__be32 dtimer);
__be32 xfs_dquot_to_disk_ts(struct xfs_dquot *ddq, time64_t timer);
+static inline const char *
+xfs_dqinode_path(xfs_dqtype_t type)
+{
+ switch (type) {
+ case XFS_DQTYPE_USER:
+ return "user";
+ case XFS_DQTYPE_GROUP:
+ return "group";
+ case XFS_DQTYPE_PROJ:
+ return "project";
+ }
+
+ ASSERT(0);
+ return NULL;
+}
+
+static inline enum xfs_metafile_type
+xfs_dqinode_metafile_type(xfs_dqtype_t type)
+{
+ switch (type) {
+ case XFS_DQTYPE_USER:
+ return XFS_METAFILE_USRQUOTA;
+ case XFS_DQTYPE_GROUP:
+ return XFS_METAFILE_GRPQUOTA;
+ case XFS_DQTYPE_PROJ:
+ return XFS_METAFILE_PRJQUOTA;
+ }
+
+ ASSERT(0);
+ return XFS_METAFILE_UNKNOWN;
+}
+
+unsigned int xfs_dqinode_sick_mask(xfs_dqtype_t type);
+
+int xfs_dqinode_load(struct xfs_trans *tp, struct xfs_inode *dp,
+ xfs_dqtype_t type, struct xfs_inode **ipp);
+int xfs_dqinode_metadir_create(struct xfs_inode *dp, xfs_dqtype_t type,
+ struct xfs_inode **ipp);
+int xfs_dqinode_metadir_link(struct xfs_inode *dp, xfs_dqtype_t type,
+ struct xfs_inode *ip);
+int xfs_dqinode_mkdir_parent(struct xfs_mount *mp, struct xfs_inode **dpp);
+int xfs_dqinode_load_parent(struct xfs_trans *tp, struct xfs_inode **dpp);
+
#endif /* __XFS_QUOTA_H__ */
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 198b84117df1..2dbab68b4fe6 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -154,7 +154,7 @@ xfs_refcount_complain_bad_rec(
xfs_warn(mp,
"Refcount BTree record corruption in AG %d detected at %pS!",
- cur->bc_ag.pag->pag_agno, fa);
+ cur->bc_group->xg_gno, fa);
xfs_warn(mp,
"Start block 0x%x, block count 0x%x, references 0x%x",
irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
@@ -180,7 +180,7 @@ xfs_refcount_get_rec(
return error;
xfs_refcount_btrec_to_irec(rec, irec);
- fa = xfs_refcount_check_irec(cur->bc_ag.pag, irec);
+ fa = xfs_refcount_check_irec(to_perag(cur->bc_group), irec);
if (fa)
return xfs_refcount_complain_bad_rec(cur, fa, irec);
@@ -1154,8 +1154,7 @@ xfs_refcount_adjust_extents(
goto out_error;
}
} else {
- fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
- cur->bc_ag.pag->pag_agno,
+ fsbno = xfs_agbno_to_fsb(to_perag(cur->bc_group),
tmp.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
tmp.rc_blockcount, NULL,
@@ -1217,8 +1216,7 @@ xfs_refcount_adjust_extents(
}
goto advloop;
} else {
- fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
- cur->bc_ag.pag->pag_agno,
+ fsbno = xfs_agbno_to_fsb(to_perag(cur->bc_group),
ext.rc_startblock);
error = xfs_free_extent_later(cur->bc_tp, fsbno,
ext.rc_blockcount, NULL,
@@ -1312,7 +1310,7 @@ xfs_refcount_continue_op(
xfs_agblock_t new_agbno)
{
struct xfs_mount *mp = cur->bc_mp;
- struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno,
ri->ri_blockcount))) {
@@ -1320,10 +1318,10 @@ xfs_refcount_continue_op(
return -EFSCORRUPTED;
}
- ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
+ ri->ri_startblock = xfs_agbno_to_fsb(pag, new_agbno);
ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount));
- ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
+ ASSERT(pag_agno(pag) == XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
return 0;
}
@@ -1360,7 +1358,7 @@ xfs_refcount_finish_one(
* If we haven't gotten a cursor or the cursor AG doesn't match
* the startblock, get one now.
*/
- if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
+ if (rcur != NULL && rcur->bc_group != ri->ri_group) {
nr_ops = rcur->bc_refc.nr_ops;
shape_changes = rcur->bc_refc.shape_changes;
xfs_btree_del_cursor(rcur, 0);
@@ -1368,13 +1366,14 @@ xfs_refcount_finish_one(
*pcur = NULL;
}
if (rcur == NULL) {
- error = xfs_alloc_read_agf(ri->ri_pag, tp,
+ struct xfs_perag *pag = to_perag(ri->ri_group);
+
+ error = xfs_alloc_read_agf(pag, tp,
XFS_ALLOC_FLAG_FREEING, &agbp);
if (error)
return error;
- *pcur = rcur = xfs_refcountbt_init_cursor(mp, tp, agbp,
- ri->ri_pag);
+ *pcur = rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
rcur->bc_refc.nr_ops = nr_ops;
rcur->bc_refc.shape_changes = shape_changes;
}
@@ -1880,7 +1879,8 @@ xfs_refcount_recover_extent(
INIT_LIST_HEAD(&rr->rr_list);
xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
- if (xfs_refcount_check_irec(cur->bc_ag.pag, &rr->rr_rrec) != NULL ||
+ if (xfs_refcount_check_irec(to_perag(cur->bc_group), &rr->rr_rrec) !=
+ NULL ||
XFS_IS_CORRUPT(cur->bc_mp,
rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
xfs_btree_mark_sick(cur);
@@ -1956,8 +1956,7 @@ xfs_refcount_recover_cow_leftovers(
goto out_free;
/* Free the orphan record */
- fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno,
- rr->rr_rrec.rc_startblock);
+ fsb = xfs_agbno_to_fsb(pag, rr->rr_rrec.rc_startblock);
xfs_refcount_free_cow_extent(tp, fsb,
rr->rr_rrec.rc_blockcount);
@@ -2029,7 +2028,7 @@ xfs_refcount_query_range_helper(
xfs_failaddr_t fa;
xfs_refcount_btrec_to_irec(rec, &irec);
- fa = xfs_refcount_check_irec(cur->bc_ag.pag, &irec);
+ fa = xfs_refcount_check_irec(to_perag(cur->bc_group), &irec);
if (fa)
return xfs_refcount_complain_bad_rec(cur, fa, &irec);
diff --git a/fs/xfs/libxfs/xfs_refcount.h b/fs/xfs/libxfs/xfs_refcount.h
index 68acb0b1b4a8..62d78afcf1f3 100644
--- a/fs/xfs/libxfs/xfs_refcount.h
+++ b/fs/xfs/libxfs/xfs_refcount.h
@@ -56,7 +56,7 @@ enum xfs_refcount_intent_type {
struct xfs_refcount_intent {
struct list_head ri_list;
- struct xfs_perag *ri_pag;
+ struct xfs_group *ri_group;
enum xfs_refcount_intent_type ri_type;
xfs_extlen_t ri_blockcount;
xfs_fsblock_t ri_startblock;
diff --git a/fs/xfs/libxfs/xfs_refcount_btree.c b/fs/xfs/libxfs/xfs_refcount_btree.c
index 795928d1a66d..54505fee1852 100644
--- a/fs/xfs/libxfs/xfs_refcount_btree.c
+++ b/fs/xfs/libxfs/xfs_refcount_btree.c
@@ -30,7 +30,7 @@ xfs_refcountbt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_refcountbt_init_cursor(cur->bc_mp, cur->bc_tp,
- cur->bc_ag.agbp, cur->bc_ag.pag);
+ cur->bc_ag.agbp, to_perag(cur->bc_group));
}
STATIC void
@@ -68,21 +68,20 @@ xfs_refcountbt_alloc_block(
memset(&args, 0, sizeof(args));
args.tp = cur->bc_tp;
args.mp = cur->bc_mp;
- args.pag = cur->bc_ag.pag;
+ args.pag = to_perag(cur->bc_group);
args.oinfo = XFS_RMAP_OINFO_REFC;
args.minlen = args.maxlen = args.prod = 1;
args.resv = XFS_AG_RESV_METADATA;
error = xfs_alloc_vextent_near_bno(&args,
- XFS_AGB_TO_FSB(args.mp, args.pag->pag_agno,
- xfs_refc_block(args.mp)));
+ xfs_agbno_to_fsb(args.pag, xfs_refc_block(args.mp)));
if (error)
goto out_error;
if (args.fsbno == NULLFSBLOCK) {
*stat = 0;
return 0;
}
- ASSERT(args.agno == cur->bc_ag.pag->pag_agno);
+ ASSERT(args.agno == cur->bc_group->xg_gno);
ASSERT(args.len == 1);
new->s = cpu_to_be32(args.agbno);
@@ -170,7 +169,7 @@ xfs_refcountbt_init_ptr_from_cur(
{
struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
- ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agf->agf_seqno));
ptr->s = agf->agf_refcount_root;
}
@@ -362,11 +361,11 @@ xfs_refcountbt_init_cursor(
{
struct xfs_btree_cur *cur;
- ASSERT(pag->pag_agno < mp->m_sb.sb_agcount);
+ ASSERT(pag_agno(pag) < mp->m_sb.sb_agcount);
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_refcountbt_ops,
mp->m_refc_maxlevels, xfs_refcountbt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_refc.nr_ops = 0;
cur->bc_refc.shape_changes = 0;
cur->bc_ag.agbp = agbp;
@@ -515,7 +514,7 @@ xfs_refcountbt_calc_reserves(
* never be available for the kinds of things that would require btree
* expansion. We therefore can pretend the space isn't there.
*/
- if (xfs_ag_contains_log(mp, pag->pag_agno))
+ if (xfs_ag_contains_log(mp, pag_agno(pag)))
agblocks -= mp->m_sb.sb_logblocks;
*ask += xfs_refcountbt_max_size(mp, agblocks);
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 6ef4687b3aba..d0df68dc3131 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -213,7 +213,7 @@ xfs_rmap_check_irec(
struct xfs_perag *pag,
const struct xfs_rmap_irec *irec)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
bool is_inode;
bool is_unwritten;
bool is_bmbt;
@@ -269,9 +269,7 @@ xfs_rmap_check_btrec(
struct xfs_btree_cur *cur,
const struct xfs_rmap_irec *irec)
{
- if (xfs_btree_is_mem_rmap(cur->bc_ops))
- return xfs_rmap_check_irec(cur->bc_mem.pag, irec);
- return xfs_rmap_check_irec(cur->bc_ag.pag, irec);
+ return xfs_rmap_check_irec(to_perag(cur->bc_group), irec);
}
static inline int
@@ -288,7 +286,7 @@ xfs_rmap_complain_bad_rec(
else
xfs_warn(mp,
"Reverse Mapping BTree record corruption in AG %d detected at %pS!",
- cur->bc_ag.pag->pag_agno, fa);
+ cur->bc_group->xg_gno, fa);
xfs_warn(mp,
"Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x",
irec->rm_owner, irec->rm_flags, irec->rm_startblock,
@@ -835,7 +833,7 @@ xfs_rmap_hook_enable(void)
static inline void
xfs_rmap_update_hook(
struct xfs_trans *tp,
- struct xfs_perag *pag,
+ struct xfs_group *xg,
enum xfs_rmap_intent_type op,
xfs_agblock_t startblock,
xfs_extlen_t blockcount,
@@ -850,27 +848,27 @@ xfs_rmap_update_hook(
.oinfo = *oinfo, /* struct copy */
};
- if (pag)
- xfs_hooks_call(&pag->pag_rmap_update_hooks, op, &p);
+ if (xg)
+ xfs_hooks_call(&xg->xg_rmap_update_hooks, op, &p);
}
}
/* Call the specified function during a reverse mapping update. */
int
xfs_rmap_hook_add(
- struct xfs_perag *pag,
+ struct xfs_group *xg,
struct xfs_rmap_hook *hook)
{
- return xfs_hooks_add(&pag->pag_rmap_update_hooks, &hook->rmap_hook);
+ return xfs_hooks_add(&xg->xg_rmap_update_hooks, &hook->rmap_hook);
}
/* Stop calling the specified function during a reverse mapping update. */
void
xfs_rmap_hook_del(
- struct xfs_perag *pag,
+ struct xfs_group *xg,
struct xfs_rmap_hook *hook)
{
- xfs_hooks_del(&pag->pag_rmap_update_hooks, &hook->rmap_hook);
+ xfs_hooks_del(&xg->xg_rmap_update_hooks, &hook->rmap_hook);
}
/* Configure rmap update hook functions. */
@@ -905,7 +903,8 @@ xfs_rmap_free(
return 0;
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
- xfs_rmap_update_hook(tp, pag, XFS_RMAP_UNMAP, bno, len, false, oinfo);
+ xfs_rmap_update_hook(tp, pag_group(pag), XFS_RMAP_UNMAP, bno, len,
+ false, oinfo);
error = xfs_rmap_unmap(cur, bno, len, false, oinfo);
xfs_btree_del_cursor(cur, error);
@@ -1149,7 +1148,8 @@ xfs_rmap_alloc(
return 0;
cur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
- xfs_rmap_update_hook(tp, pag, XFS_RMAP_MAP, bno, len, false, oinfo);
+ xfs_rmap_update_hook(tp, pag_group(pag), XFS_RMAP_MAP, bno, len, false,
+ oinfo);
error = xfs_rmap_map(cur, bno, len, false, oinfo);
xfs_btree_del_cursor(cur, error);
@@ -2586,28 +2586,30 @@ xfs_rmap_finish_one(
* If we haven't gotten a cursor or the cursor AG doesn't match
* the startblock, get one now.
*/
- if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
+ if (rcur != NULL && rcur->bc_group != ri->ri_group) {
xfs_btree_del_cursor(rcur, 0);
rcur = NULL;
*pcur = NULL;
}
if (rcur == NULL) {
+ struct xfs_perag *pag = to_perag(ri->ri_group);
+
/*
* Refresh the freelist before we start changing the
* rmapbt, because a shape change could cause us to
* allocate blocks.
*/
- error = xfs_free_extent_fix_freelist(tp, ri->ri_pag, &agbp);
+ error = xfs_free_extent_fix_freelist(tp, pag, &agbp);
if (error) {
- xfs_ag_mark_sick(ri->ri_pag, XFS_SICK_AG_AGFL);
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
return error;
}
if (XFS_IS_CORRUPT(tp->t_mountp, !agbp)) {
- xfs_ag_mark_sick(ri->ri_pag, XFS_SICK_AG_AGFL);
+ xfs_ag_mark_sick(pag, XFS_SICK_AG_AGFL);
return -EFSCORRUPTED;
}
- *pcur = rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, ri->ri_pag);
+ *pcur = rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, pag);
}
xfs_rmap_ino_owner(&oinfo, ri->ri_owner, ri->ri_whichfork,
@@ -2620,7 +2622,7 @@ xfs_rmap_finish_one(
if (error)
return error;
- xfs_rmap_update_hook(tp, ri->ri_pag, ri->ri_type, bno,
+ xfs_rmap_update_hook(tp, ri->ri_group, ri->ri_type, bno,
ri->ri_bmap.br_blockcount, unwritten, &oinfo);
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index b783dd4dd95d..96b4321d8310 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -173,7 +173,7 @@ struct xfs_rmap_intent {
int ri_whichfork;
uint64_t ri_owner;
struct xfs_bmbt_irec ri_bmap;
- struct xfs_perag *ri_pag;
+ struct xfs_group *ri_group;
};
/* functions for updating the rmapbt based on bmbt map/unmap operations */
@@ -264,8 +264,8 @@ struct xfs_rmap_hook {
void xfs_rmap_hook_disable(void);
void xfs_rmap_hook_enable(void);
-int xfs_rmap_hook_add(struct xfs_perag *pag, struct xfs_rmap_hook *hook);
-void xfs_rmap_hook_del(struct xfs_perag *pag, struct xfs_rmap_hook *hook);
+int xfs_rmap_hook_add(struct xfs_group *xg, struct xfs_rmap_hook *hook);
+void xfs_rmap_hook_del(struct xfs_group *xg, struct xfs_rmap_hook *hook);
void xfs_rmap_hook_setup(struct xfs_rmap_hook *hook, notifier_fn_t mod_fn);
#endif
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index ac2f1f499b76..2cab694ac58a 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -57,7 +57,7 @@ xfs_rmapbt_dup_cursor(
struct xfs_btree_cur *cur)
{
return xfs_rmapbt_init_cursor(cur->bc_mp, cur->bc_tp,
- cur->bc_ag.agbp, cur->bc_ag.pag);
+ cur->bc_ag.agbp, to_perag(cur->bc_group));
}
STATIC void
@@ -66,14 +66,15 @@ xfs_rmapbt_set_root(
const union xfs_btree_ptr *ptr,
int inc)
{
- struct xfs_buf *agbp = cur->bc_ag.agbp;
- struct xfs_agf *agf = agbp->b_addr;
+ struct xfs_buf *agbp = cur->bc_ag.agbp;
+ struct xfs_agf *agf = agbp->b_addr;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
ASSERT(ptr->s != 0);
agf->agf_rmap_root = ptr->s;
be32_add_cpu(&agf->agf_rmap_level, inc);
- cur->bc_ag.pag->pagf_rmap_level += inc;
+ pag->pagf_rmap_level += inc;
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_ROOTS | XFS_AGF_LEVELS);
}
@@ -87,7 +88,7 @@ xfs_rmapbt_alloc_block(
{
struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr;
- struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
struct xfs_alloc_arg args = { .len = 1 };
int error;
xfs_agblock_t bno;
@@ -102,7 +103,7 @@ xfs_rmapbt_alloc_block(
return 0;
}
- xfs_extent_busy_reuse(cur->bc_mp, pag, bno, 1, false);
+ xfs_extent_busy_reuse(pag_group(pag), bno, 1, false);
new->s = cpu_to_be32(bno);
be32_add_cpu(&agf->agf_rmap_blocks, 1);
@@ -125,7 +126,7 @@ xfs_rmapbt_free_block(
{
struct xfs_buf *agbp = cur->bc_ag.agbp;
struct xfs_agf *agf = agbp->b_addr;
- struct xfs_perag *pag = cur->bc_ag.pag;
+ struct xfs_perag *pag = to_perag(cur->bc_group);
xfs_agblock_t bno;
int error;
@@ -136,7 +137,7 @@ xfs_rmapbt_free_block(
if (error)
return error;
- xfs_extent_busy_insert(cur->bc_tp, pag, bno, 1,
+ xfs_extent_busy_insert(cur->bc_tp, pag_group(pag), bno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD);
xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
@@ -227,7 +228,7 @@ xfs_rmapbt_init_ptr_from_cur(
{
struct xfs_agf *agf = cur->bc_ag.agbp->b_addr;
- ASSERT(cur->bc_ag.pag->pag_agno == be32_to_cpu(agf->agf_seqno));
+ ASSERT(cur->bc_group->xg_gno == be32_to_cpu(agf->agf_seqno));
ptr->s = agf->agf_rmap_root;
}
@@ -538,7 +539,7 @@ xfs_rmapbt_init_cursor(
cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_ops,
mp->m_rmap_maxlevels, xfs_rmapbt_cur_cache);
- cur->bc_ag.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
cur->bc_ag.agbp = agbp;
if (agbp) {
struct xfs_agf *agf = agbp->b_addr;
@@ -647,14 +648,13 @@ xfs_rmapbt_mem_cursor(
struct xfbtree *xfbt)
{
struct xfs_btree_cur *cur;
- struct xfs_mount *mp = pag->pag_mount;
- cur = xfs_btree_alloc_cursor(mp, tp, &xfs_rmapbt_mem_ops,
+ cur = xfs_btree_alloc_cursor(pag_mount(pag), tp, &xfs_rmapbt_mem_ops,
xfs_rmapbt_maxlevels_ondisk(), xfs_rmapbt_cur_cache);
cur->bc_mem.xfbtree = xfbt;
cur->bc_nlevels = xfbt->nlevels;
- cur->bc_mem.pag = xfs_perag_hold(pag);
+ cur->bc_group = xfs_group_hold(pag_group(pag));
return cur;
}
@@ -863,7 +863,7 @@ xfs_rmapbt_calc_reserves(
* never be available for the kinds of things that would require btree
* expansion. We therefore can pretend the space isn't there.
*/
- if (xfs_ag_contains_log(mp, pag->pag_agno))
+ if (xfs_ag_contains_log(mp, pag_agno(pag)))
agblocks -= mp->m_sb.sb_logblocks;
/* Reserve 1% of the AG or enough for 1 block per record. */
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 27a4472402ba..4ddfb7e395b3 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -20,28 +20,87 @@
#include "xfs_error.h"
#include "xfs_rtbitmap.h"
#include "xfs_health.h"
+#include "xfs_sb.h"
+#include "xfs_errortag.h"
+#include "xfs_log.h"
+#include "xfs_buf_item.h"
+#include "xfs_extent_busy.h"
/*
* Realtime allocator bitmap functions shared with userspace.
*/
-/*
- * Real time buffers need verifiers to avoid runtime warnings during IO.
- * We don't have anything to verify, however, so these are just dummy
- * operations.
- */
+static xfs_failaddr_t
+xfs_rtbuf_verify(
+ struct xfs_buf *bp)
+{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ if (!xfs_verify_magic(bp, hdr->rt_magic))
+ return __this_address;
+ if (!xfs_has_rtgroups(mp))
+ return __this_address;
+ if (!xfs_has_crc(mp))
+ return __this_address;
+ if (!uuid_equal(&hdr->rt_uuid, &mp->m_sb.sb_meta_uuid))
+ return __this_address;
+ if (hdr->rt_blkno != cpu_to_be64(xfs_buf_daddr(bp)))
+ return __this_address;
+ return NULL;
+}
+
static void
xfs_rtbuf_verify_read(
- struct xfs_buf *bp)
+ struct xfs_buf *bp)
{
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+ xfs_failaddr_t fa;
+
+ if (!xfs_has_rtgroups(mp))
+ return;
+
+ if (!xfs_log_check_lsn(mp, be64_to_cpu(hdr->rt_lsn))) {
+ fa = __this_address;
+ goto fail;
+ }
+
+ if (!xfs_buf_verify_cksum(bp, XFS_RTBUF_CRC_OFF)) {
+ fa = __this_address;
+ goto fail;
+ }
+
+ fa = xfs_rtbuf_verify(bp);
+ if (fa)
+ goto fail;
+
return;
+fail:
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
}
static void
xfs_rtbuf_verify_write(
struct xfs_buf *bp)
{
- return;
+ struct xfs_mount *mp = bp->b_mount;
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+ struct xfs_buf_log_item *bip = bp->b_log_item;
+ xfs_failaddr_t fa;
+
+ if (!xfs_has_rtgroups(mp))
+ return;
+
+ fa = xfs_rtbuf_verify(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ return;
+ }
+
+ if (bip)
+ hdr->rt_lsn = cpu_to_be64(bip->bli_item.li_lsn);
+ xfs_buf_update_cksum(bp, XFS_RTBUF_CRC_OFF);
}
const struct xfs_buf_ops xfs_rtbuf_ops = {
@@ -50,6 +109,22 @@ const struct xfs_buf_ops xfs_rtbuf_ops = {
.verify_write = xfs_rtbuf_verify_write,
};
+const struct xfs_buf_ops xfs_rtbitmap_buf_ops = {
+ .name = "xfs_rtbitmap",
+ .magic = { 0, cpu_to_be32(XFS_RTBITMAP_MAGIC) },
+ .verify_read = xfs_rtbuf_verify_read,
+ .verify_write = xfs_rtbuf_verify_write,
+ .verify_struct = xfs_rtbuf_verify,
+};
+
+const struct xfs_buf_ops xfs_rtsummary_buf_ops = {
+ .name = "xfs_rtsummary",
+ .magic = { 0, cpu_to_be32(XFS_RTSUMMARY_MAGIC) },
+ .verify_read = xfs_rtbuf_verify_read,
+ .verify_write = xfs_rtbuf_verify_write,
+ .verify_struct = xfs_rtbuf_verify,
+};
+
/* Release cached rt bitmap and summary buffers. */
void
xfs_rtbuf_cache_relse(
@@ -75,28 +150,31 @@ static int
xfs_rtbuf_get(
struct xfs_rtalloc_args *args,
xfs_fileoff_t block, /* block number in bitmap or summary */
- int issum) /* is summary not bitmap */
+ enum xfs_rtg_inodes type)
{
+ struct xfs_inode *ip = args->rtg->rtg_inodes[type];
struct xfs_mount *mp = args->mp;
struct xfs_buf **cbpp; /* cached block buffer */
xfs_fileoff_t *coffp; /* cached block number */
struct xfs_buf *bp; /* block buffer, result */
- struct xfs_inode *ip; /* bitmap or summary inode */
struct xfs_bmbt_irec map;
- enum xfs_blft type;
+ enum xfs_blft buf_type;
int nmap = 1;
int error;
- if (issum) {
+ switch (type) {
+ case XFS_RTGI_SUMMARY:
cbpp = &args->sumbp;
coffp = &args->sumoff;
- ip = mp->m_rsumip;
- type = XFS_BLFT_RTSUMMARY_BUF;
- } else {
+ buf_type = XFS_BLFT_RTSUMMARY_BUF;
+ break;
+ case XFS_RTGI_BITMAP:
cbpp = &args->rbmbp;
coffp = &args->rbmoff;
- ip = mp->m_rbmip;
- type = XFS_BLFT_RTBITMAP_BUF;
+ buf_type = XFS_BLFT_RTBITMAP_BUF;
+ break;
+ default:
+ return -EINVAL;
}
/*
@@ -119,22 +197,32 @@ xfs_rtbuf_get(
return error;
if (XFS_IS_CORRUPT(mp, nmap == 0 || !xfs_bmap_is_written_extent(&map))) {
- xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY :
- XFS_SICK_RT_BITMAP);
+ xfs_rtginode_mark_sick(args->rtg, type);
return -EFSCORRUPTED;
}
ASSERT(map.br_startblock != NULLFSBLOCK);
error = xfs_trans_read_buf(mp, args->tp, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, map.br_startblock),
- mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
+ mp->m_bsize, 0, &bp,
+ xfs_rtblock_ops(mp, type));
if (xfs_metadata_is_sick(error))
- xfs_rt_mark_sick(mp, issum ? XFS_SICK_RT_SUMMARY :
- XFS_SICK_RT_BITMAP);
+ xfs_rtginode_mark_sick(args->rtg, type);
if (error)
return error;
- xfs_trans_buf_set_type(args->tp, bp, type);
+ if (xfs_has_rtgroups(mp)) {
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ if (hdr->rt_owner != cpu_to_be64(ip->i_ino)) {
+ xfs_buf_mark_corrupt(bp);
+ xfs_trans_brelse(args->tp, bp);
+ xfs_rtginode_mark_sick(args->rtg, type);
+ return -EFSCORRUPTED;
+ }
+ }
+
+ xfs_trans_buf_set_type(args->tp, bp, buf_type);
*cbpp = bp;
*coffp = block;
return 0;
@@ -148,11 +236,11 @@ xfs_rtbitmap_read_buf(
struct xfs_mount *mp = args->mp;
if (XFS_IS_CORRUPT(mp, block >= mp->m_sb.sb_rbmblocks)) {
- xfs_rt_mark_sick(mp, XFS_SICK_RT_BITMAP);
+ xfs_rtginode_mark_sick(args->rtg, XFS_RTGI_BITMAP);
return -EFSCORRUPTED;
}
- return xfs_rtbuf_get(args, block, 0);
+ return xfs_rtbuf_get(args, block, XFS_RTGI_BITMAP);
}
int
@@ -163,10 +251,10 @@ xfs_rtsummary_read_buf(
struct xfs_mount *mp = args->mp;
if (XFS_IS_CORRUPT(mp, block >= mp->m_rsumblocks)) {
- xfs_rt_mark_sick(args->mp, XFS_SICK_RT_SUMMARY);
+ xfs_rtginode_mark_sick(args->rtg, XFS_RTGI_SUMMARY);
return -EFSCORRUPTED;
}
- return xfs_rtbuf_get(args, block, 1);
+ return xfs_rtbuf_get(args, block, XFS_RTGI_SUMMARY);
}
/*
@@ -503,6 +591,7 @@ xfs_rtmodify_summary(
{
struct xfs_mount *mp = args->mp;
xfs_rtsumoff_t so = xfs_rtsumoffs(mp, log, bbno);
+ uint8_t *rsum_cache = args->rtg->rtg_rsum_cache;
unsigned int infoword;
xfs_suminfo_t val;
int error;
@@ -514,11 +603,11 @@ xfs_rtmodify_summary(
infoword = xfs_rtsumoffs_to_infoword(mp, so);
val = xfs_suminfo_add(args, infoword, delta);
- if (mp->m_rsum_cache) {
- if (val == 0 && log + 1 == mp->m_rsum_cache[bbno])
- mp->m_rsum_cache[bbno] = log;
- if (val != 0 && log >= mp->m_rsum_cache[bbno])
- mp->m_rsum_cache[bbno] = log + 1;
+ if (rsum_cache) {
+ if (val == 0 && log + 1 == rsum_cache[bbno])
+ rsum_cache[bbno] = log;
+ if (val != 0 && log >= rsum_cache[bbno])
+ rsum_cache[bbno] = log + 1;
}
xfs_trans_log_rtsummary(args, infoword);
@@ -737,7 +826,7 @@ xfs_rtfree_range(
/*
* Find the next allocated block (end of allocated extent).
*/
- error = xfs_rtfind_forw(args, end, mp->m_sb.sb_rextents - 1,
+ error = xfs_rtfind_forw(args, end, args->rtg->rtg_extents - 1,
&postblock);
if (error)
return error;
@@ -961,19 +1050,25 @@ xfs_rtcheck_alloc_range(
int
xfs_rtfree_extent(
struct xfs_trans *tp, /* transaction pointer */
+ struct xfs_rtgroup *rtg,
xfs_rtxnum_t start, /* starting rtext number to free */
xfs_rtxlen_t len) /* length of extent freed */
{
struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
struct xfs_rtalloc_args args = {
.mp = mp,
.tp = tp,
+ .rtg = rtg,
};
int error;
struct timespec64 atime;
- ASSERT(mp->m_rbmip->i_itemp != NULL);
- xfs_assert_ilocked(mp->m_rbmip, XFS_ILOCK_EXCL);
+ ASSERT(rbmip->i_itemp != NULL);
+ xfs_assert_ilocked(rbmip, XFS_ILOCK_EXCL);
+
+ if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FREE_EXTENT))
+ return -EIO;
error = xfs_rtcheck_alloc_range(&args, start, len);
if (error)
@@ -990,19 +1085,21 @@ xfs_rtfree_extent(
* Mark more blocks free in the superblock.
*/
xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len);
+
/*
* If we've now freed all the blocks, reset the file sequence
- * number to 0.
+ * number to 0 for pre-RTG file systems.
*/
- if (tp->t_frextents_delta + mp->m_sb.sb_frextents ==
+ if (!xfs_has_rtgroups(mp) &&
+ tp->t_frextents_delta + mp->m_sb.sb_frextents ==
mp->m_sb.sb_rextents) {
- if (!(mp->m_rbmip->i_diflags & XFS_DIFLAG_NEWRTBM))
- mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
+ if (!(rbmip->i_diflags & XFS_DIFLAG_NEWRTBM))
+ rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
- atime = inode_get_atime(VFS_I(mp->m_rbmip));
+ atime = inode_get_atime(VFS_I(rbmip));
atime.tv_sec = 0;
- inode_set_atime_to_ts(VFS_I(mp->m_rbmip), atime);
- xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
+ inode_set_atime_to_ts(VFS_I(rbmip), atime);
+ xfs_trans_log_inode(tp, rbmip, XFS_ILOG_CORE);
}
error = 0;
out:
@@ -1018,15 +1115,17 @@ out:
int
xfs_rtfree_blocks(
struct xfs_trans *tp,
+ struct xfs_rtgroup *rtg,
xfs_fsblock_t rtbno,
xfs_filblks_t rtlen)
{
struct xfs_mount *mp = tp->t_mountp;
xfs_extlen_t mod;
+ int error;
ASSERT(rtlen <= XFS_MAX_BMBT_EXTLEN);
- mod = xfs_rtb_to_rtxoff(mp, rtlen);
+ mod = xfs_blen_to_rtxoff(mp, rtlen);
if (mod) {
ASSERT(mod == 0);
return -EIO;
@@ -1038,21 +1137,31 @@ xfs_rtfree_blocks(
return -EIO;
}
- return xfs_rtfree_extent(tp, xfs_rtb_to_rtx(mp, rtbno),
- xfs_rtb_to_rtx(mp, rtlen));
+ error = xfs_rtfree_extent(tp, rtg, xfs_rtb_to_rtx(mp, rtbno),
+ xfs_extlen_to_rtxlen(mp, rtlen));
+ if (error)
+ return error;
+
+ if (xfs_has_rtgroups(mp))
+ xfs_extent_busy_insert(tp, rtg_group(rtg),
+ xfs_rtb_to_rgbno(mp, rtbno), rtlen, 0);
+
+ return 0;
}
/* Find all the free records within a given range. */
int
xfs_rtalloc_query_range(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
xfs_rtxnum_t start,
xfs_rtxnum_t end,
xfs_rtalloc_query_range_fn fn,
void *priv)
{
+ struct xfs_mount *mp = rtg_mount(rtg);
struct xfs_rtalloc_args args = {
+ .rtg = rtg,
.mp = mp,
.tp = tp,
};
@@ -1060,10 +1169,10 @@ xfs_rtalloc_query_range(
if (start > end)
return -EINVAL;
- if (start == end || start >= mp->m_sb.sb_rextents)
+ if (start == end || start >= rtg->rtg_extents)
return 0;
- end = min(end, mp->m_sb.sb_rextents - 1);
+ end = min(end, rtg->rtg_extents - 1);
/* Iterate the bitmap, looking for discrepancies. */
while (start <= end) {
@@ -1086,7 +1195,7 @@ xfs_rtalloc_query_range(
rec.ar_startext = start;
rec.ar_extcount = rtend - start + 1;
- error = fn(mp, tp, &rec, priv);
+ error = fn(rtg, tp, &rec, priv);
if (error)
break;
}
@@ -1101,26 +1210,27 @@ xfs_rtalloc_query_range(
/* Find all the free records. */
int
xfs_rtalloc_query_all(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
xfs_rtalloc_query_range_fn fn,
void *priv)
{
- return xfs_rtalloc_query_range(mp, tp, 0, mp->m_sb.sb_rextents - 1, fn,
+ return xfs_rtalloc_query_range(rtg, tp, 0, rtg->rtg_extents - 1, fn,
priv);
}
/* Is the given extent all free? */
int
xfs_rtalloc_extent_is_free(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
xfs_rtxnum_t start,
xfs_rtxlen_t len,
bool *is_free)
{
struct xfs_rtalloc_args args = {
- .mp = mp,
+ .mp = rtg_mount(rtg),
+ .rtg = rtg,
.tp = tp,
};
xfs_rtxnum_t end;
@@ -1136,88 +1246,71 @@ xfs_rtalloc_extent_is_free(
return 0;
}
+/* Compute the number of rt extents tracked by a single bitmap block. */
+xfs_rtxnum_t
+xfs_rtbitmap_rtx_per_rbmblock(
+ struct xfs_mount *mp)
+{
+ unsigned int rbmblock_bytes = mp->m_sb.sb_blocksize;
+
+ if (xfs_has_rtgroups(mp))
+ rbmblock_bytes -= sizeof(struct xfs_rtbuf_blkinfo);
+
+ return rbmblock_bytes * NBBY;
+}
+
/*
* Compute the number of rtbitmap blocks needed to track the given number of rt
* extents.
*/
xfs_filblks_t
-xfs_rtbitmap_blockcount(
+xfs_rtbitmap_blockcount_len(
struct xfs_mount *mp,
xfs_rtbxlen_t rtextents)
{
- return howmany_64(rtextents, NBBY * mp->m_sb.sb_blocksize);
+ return howmany_64(rtextents, xfs_rtbitmap_rtx_per_rbmblock(mp));
}
-/* Compute the number of rtsummary blocks needed to track the given rt space. */
-xfs_filblks_t
-xfs_rtsummary_blockcount(
- struct xfs_mount *mp,
- unsigned int rsumlevels,
- xfs_extlen_t rbmblocks)
+/* How many rt extents does each rtbitmap file track? */
+static inline xfs_rtbxlen_t
+xfs_rtbitmap_bitcount(
+ struct xfs_mount *mp)
{
- unsigned long long rsumwords;
+ if (!mp->m_sb.sb_rextents)
+ return 0;
- rsumwords = (unsigned long long)rsumlevels * rbmblocks;
- return XFS_B_TO_FSB(mp, rsumwords << XFS_WORDLOG);
-}
+ /* rtgroup size can be nonzero even if rextents is zero */
+ if (xfs_has_rtgroups(mp))
+ return mp->m_sb.sb_rgextents;
-/* Lock both realtime free space metadata inodes for a freespace update. */
-void
-xfs_rtbitmap_lock(
- struct xfs_mount *mp)
-{
- xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
- xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
+ return mp->m_sb.sb_rextents;
}
/*
- * Join both realtime free space metadata inodes to the transaction. The
- * ILOCKs will be released on transaction commit.
+ * Compute the number of rtbitmap blocks used for a given file system.
*/
-void
-xfs_rtbitmap_trans_join(
- struct xfs_trans *tp)
-{
- xfs_trans_ijoin(tp, tp->t_mountp->m_rbmip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, tp->t_mountp->m_rsumip, XFS_ILOCK_EXCL);
-}
-
-/* Unlock both realtime free space metadata inodes after a freespace update. */
-void
-xfs_rtbitmap_unlock(
+xfs_filblks_t
+xfs_rtbitmap_blockcount(
struct xfs_mount *mp)
{
- xfs_iunlock(mp->m_rsumip, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
- xfs_iunlock(mp->m_rbmip, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
+ return xfs_rtbitmap_blockcount_len(mp, xfs_rtbitmap_bitcount(mp));
}
/*
- * Lock the realtime free space metadata inodes for a freespace scan. Callers
- * must walk metadata blocks in order of increasing file offset.
+ * Compute the geometry of the rtsummary file needed to track the given rt
+ * space.
*/
-void
-xfs_rtbitmap_lock_shared(
- struct xfs_mount *mp,
- unsigned int rbmlock_flags)
-{
- if (rbmlock_flags & XFS_RBMLOCK_BITMAP)
- xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
-
- if (rbmlock_flags & XFS_RBMLOCK_SUMMARY)
- xfs_ilock(mp->m_rsumip, XFS_ILOCK_SHARED | XFS_ILOCK_RTSUM);
-}
-
-/* Unlock the realtime free space metadata inodes after a freespace scan. */
-void
-xfs_rtbitmap_unlock_shared(
+xfs_filblks_t
+xfs_rtsummary_blockcount(
struct xfs_mount *mp,
- unsigned int rbmlock_flags)
+ unsigned int *rsumlevels)
{
- if (rbmlock_flags & XFS_RBMLOCK_SUMMARY)
- xfs_iunlock(mp->m_rsumip, XFS_ILOCK_SHARED | XFS_ILOCK_RTSUM);
+ xfs_rtbxlen_t rextents = xfs_rtbitmap_bitcount(mp);
+ unsigned long long rsumwords;
- if (rbmlock_flags & XFS_RBMLOCK_BITMAP)
- xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+ *rsumlevels = xfs_compute_rextslog(rextents) + 1;
+ rsumwords = xfs_rtbitmap_blockcount_len(mp, rextents) * (*rsumlevels);
+ return howmany_64(rsumwords, mp->m_blockwsize);
}
static int
@@ -1260,21 +1353,26 @@ out_trans_cancel:
/* Get a buffer for the block. */
static int
xfs_rtfile_initialize_block(
- struct xfs_inode *ip,
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type,
xfs_fsblock_t fsbno,
void *data)
{
- struct xfs_mount *mp = ip->i_mount;
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct xfs_inode *ip = rtg->rtg_inodes[type];
struct xfs_trans *tp;
struct xfs_buf *bp;
+ void *bufdata;
const size_t copylen = mp->m_blockwsize << XFS_WORDLOG;
enum xfs_blft buf_type;
int error;
- if (ip == mp->m_rsumip)
+ if (type == XFS_RTGI_BITMAP)
+ buf_type = XFS_BLFT_RTBITMAP_BUF;
+ else if (type == XFS_RTGI_SUMMARY)
buf_type = XFS_BLFT_RTSUMMARY_BUF;
else
- buf_type = XFS_BLFT_RTBITMAP_BUF;
+ return -EINVAL;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growrtzero, 0, 0, 0, &tp);
if (error)
@@ -1288,13 +1386,30 @@ xfs_rtfile_initialize_block(
xfs_trans_cancel(tp);
return error;
}
+ bufdata = bp->b_addr;
xfs_trans_buf_set_type(tp, bp, buf_type);
- bp->b_ops = &xfs_rtbuf_ops;
+ bp->b_ops = xfs_rtblock_ops(mp, type);
+
+ if (xfs_has_rtgroups(mp)) {
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ if (type == XFS_RTGI_BITMAP)
+ hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
+ else
+ hdr->rt_magic = cpu_to_be32(XFS_RTSUMMARY_MAGIC);
+ hdr->rt_owner = cpu_to_be64(ip->i_ino);
+ hdr->rt_blkno = cpu_to_be64(XFS_FSB_TO_DADDR(mp, fsbno));
+ hdr->rt_lsn = 0;
+ uuid_copy(&hdr->rt_uuid, &mp->m_sb.sb_meta_uuid);
+
+ bufdata += sizeof(*hdr);
+ }
+
if (data)
- memcpy(bp->b_addr, data, copylen);
+ memcpy(bufdata, data, copylen);
else
- memset(bp->b_addr, 0, copylen);
+ memset(bufdata, 0, copylen);
xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);
return xfs_trans_commit(tp);
}
@@ -1306,12 +1421,13 @@ xfs_rtfile_initialize_block(
*/
int
xfs_rtfile_initialize_blocks(
- struct xfs_inode *ip, /* inode (bitmap/summary) */
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type,
xfs_fileoff_t offset_fsb, /* offset to start from */
xfs_fileoff_t end_fsb, /* offset to allocate to */
void *data) /* data to fill the blocks */
{
- struct xfs_mount *mp = ip->i_mount;
+ struct xfs_mount *mp = rtg_mount(rtg);
const size_t copylen = mp->m_blockwsize << XFS_WORDLOG;
while (offset_fsb < end_fsb) {
@@ -1319,8 +1435,8 @@ xfs_rtfile_initialize_blocks(
xfs_filblks_t i;
int error;
- error = xfs_rtfile_alloc_blocks(ip, offset_fsb,
- end_fsb - offset_fsb, &map);
+ error = xfs_rtfile_alloc_blocks(rtg->rtg_inodes[type],
+ offset_fsb, end_fsb - offset_fsb, &map);
if (error)
return error;
@@ -1330,7 +1446,7 @@ xfs_rtfile_initialize_blocks(
* Do this one block per transaction, to keep it simple.
*/
for (i = 0; i < map.br_blockcount; i++) {
- error = xfs_rtfile_initialize_block(ip,
+ error = xfs_rtfile_initialize_block(rtg, type,
map.br_startblock + i, data);
if (error)
return error;
@@ -1343,3 +1459,35 @@ xfs_rtfile_initialize_blocks(
return 0;
}
+
+int
+xfs_rtbitmap_create(
+ struct xfs_rtgroup *rtg,
+ struct xfs_inode *ip,
+ struct xfs_trans *tp,
+ bool init)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+
+ ip->i_disk_size = mp->m_sb.sb_rbmblocks * mp->m_sb.sb_blocksize;
+ if (init && !xfs_has_rtgroups(mp)) {
+ ip->i_diflags |= XFS_DIFLAG_NEWRTBM;
+ inode_set_atime(VFS_I(ip), 0, 0);
+ }
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return 0;
+}
+
+int
+xfs_rtsummary_create(
+ struct xfs_rtgroup *rtg,
+ struct xfs_inode *ip,
+ struct xfs_trans *tp,
+ bool init)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+
+ ip->i_disk_size = mp->m_rsumblocks * mp->m_sb.sb_blocksize;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+ return 0;
+}
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.h b/fs/xfs/libxfs/xfs_rtbitmap.h
index 140513d1d6bc..16563a44bd13 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.h
+++ b/fs/xfs/libxfs/xfs_rtbitmap.h
@@ -6,7 +6,10 @@
#ifndef __XFS_RTBITMAP_H__
#define __XFS_RTBITMAP_H__
+#include "xfs_rtgroup.h"
+
struct xfs_rtalloc_args {
+ struct xfs_rtgroup *rtg;
struct xfs_mount *mp;
struct xfs_trans *tp;
@@ -19,13 +22,37 @@ struct xfs_rtalloc_args {
static inline xfs_rtblock_t
xfs_rtx_to_rtb(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
xfs_rtxnum_t rtx)
{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ xfs_rtblock_t start = xfs_group_start_fsb(rtg_group(rtg));
+
+ if (mp->m_rtxblklog >= 0)
+ return start + (rtx << mp->m_rtxblklog);
+ return start + (rtx * mp->m_sb.sb_rextsize);
+}
+
+/* Convert an rgbno into an rt extent number. */
+static inline xfs_rtxnum_t
+xfs_rgbno_to_rtx(
+ struct xfs_mount *mp,
+ xfs_rgblock_t rgbno)
+{
+ if (likely(mp->m_rtxblklog >= 0))
+ return rgbno >> mp->m_rtxblklog;
+ return rgbno / mp->m_sb.sb_rextsize;
+}
+
+static inline uint64_t
+xfs_rtbxlen_to_blen(
+ struct xfs_mount *mp,
+ xfs_rtbxlen_t rtbxlen)
+{
if (mp->m_rtxblklog >= 0)
- return rtx << mp->m_rtxblklog;
+ return rtbxlen << mp->m_rtxblklog;
- return rtx * mp->m_sb.sb_rextsize;
+ return rtbxlen * mp->m_sb.sb_rextsize;
}
static inline xfs_extlen_t
@@ -62,15 +89,49 @@ xfs_extlen_to_rtxlen(
return len / mp->m_sb.sb_rextsize;
}
+/* Convert an rt block count into an rt extent count. */
+static inline xfs_rtbxlen_t
+xfs_blen_to_rtbxlen(
+ struct xfs_mount *mp,
+ uint64_t blen)
+{
+ if (likely(mp->m_rtxblklog >= 0))
+ return blen >> mp->m_rtxblklog;
+
+ return div_u64(blen, mp->m_sb.sb_rextsize);
+}
+
+/* Return the offset of a file block length within an rt extent. */
+static inline xfs_extlen_t
+xfs_blen_to_rtxoff(
+ struct xfs_mount *mp,
+ xfs_filblks_t blen)
+{
+ if (likely(mp->m_rtxblklog >= 0))
+ return blen & mp->m_rtxblkmask;
+
+ return do_div(blen, mp->m_sb.sb_rextsize);
+}
+
+/* Round this block count up to the nearest rt extent size. */
+static inline xfs_filblks_t
+xfs_blen_roundup_rtx(
+ struct xfs_mount *mp,
+ xfs_filblks_t blen)
+{
+ return roundup_64(blen, mp->m_sb.sb_rextsize);
+}
+
/* Convert an rt block number into an rt extent number. */
static inline xfs_rtxnum_t
xfs_rtb_to_rtx(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
+ /* open-coded 64-bit masking operation */
+ rtbno &= mp->m_groups[XG_TYPE_RTG].blkmask;
if (likely(mp->m_rtxblklog >= 0))
return rtbno >> mp->m_rtxblklog;
-
return div_u64(rtbno, mp->m_sb.sb_rextsize);
}
@@ -80,48 +141,29 @@ xfs_rtb_to_rtxoff(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
+ /* open-coded 64-bit masking operation */
+ rtbno &= mp->m_groups[XG_TYPE_RTG].blkmask;
if (likely(mp->m_rtxblklog >= 0))
return rtbno & mp->m_rtxblkmask;
-
return do_div(rtbno, mp->m_sb.sb_rextsize);
}
-/*
- * Convert an rt block number into an rt extent number, rounding up to the next
- * rt extent if the rt block is not aligned to an rt extent boundary.
- */
-static inline xfs_rtxnum_t
-xfs_rtb_to_rtxup(
- struct xfs_mount *mp,
- xfs_rtblock_t rtbno)
-{
- if (likely(mp->m_rtxblklog >= 0)) {
- if (rtbno & mp->m_rtxblkmask)
- return (rtbno >> mp->m_rtxblklog) + 1;
- return rtbno >> mp->m_rtxblklog;
- }
-
- if (do_div(rtbno, mp->m_sb.sb_rextsize))
- rtbno++;
- return rtbno;
-}
-
-/* Round this rtblock up to the nearest rt extent size. */
+/* Round this file block offset up to the nearest rt extent size. */
static inline xfs_rtblock_t
-xfs_rtb_roundup_rtx(
+xfs_fileoff_roundup_rtx(
struct xfs_mount *mp,
- xfs_rtblock_t rtbno)
+ xfs_fileoff_t off)
{
- return roundup_64(rtbno, mp->m_sb.sb_rextsize);
+ return roundup_64(off, mp->m_sb.sb_rextsize);
}
-/* Round this rtblock down to the nearest rt extent size. */
+/* Round this file block offset down to the nearest rt extent size. */
static inline xfs_rtblock_t
-xfs_rtb_rounddown_rtx(
+xfs_fileoff_rounddown_rtx(
struct xfs_mount *mp,
- xfs_rtblock_t rtbno)
+ xfs_fileoff_t off)
{
- return rounddown_64(rtbno, mp->m_sb.sb_rextsize);
+ return rounddown_64(off, mp->m_sb.sb_rextsize);
}
/* Convert an rt extent number to a file block offset in the rt bitmap file. */
@@ -130,6 +172,9 @@ xfs_rtx_to_rbmblock(
struct xfs_mount *mp,
xfs_rtxnum_t rtx)
{
+ if (xfs_has_rtgroups(mp))
+ return div_u64(rtx, mp->m_rtx_per_rbmblock);
+
return rtx >> mp->m_blkbit_log;
}
@@ -139,6 +184,13 @@ xfs_rtx_to_rbmword(
struct xfs_mount *mp,
xfs_rtxnum_t rtx)
{
+ if (xfs_has_rtgroups(mp)) {
+ unsigned int mod;
+
+ div_u64_rem(rtx >> XFS_NBWORDLOG, mp->m_blockwsize, &mod);
+ return mod;
+ }
+
return (rtx >> XFS_NBWORDLOG) & (mp->m_blockwsize - 1);
}
@@ -148,6 +200,9 @@ xfs_rbmblock_to_rtx(
struct xfs_mount *mp,
xfs_fileoff_t rbmoff)
{
+ if (xfs_has_rtgroups(mp))
+ return rbmoff * mp->m_rtx_per_rbmblock;
+
return rbmoff << mp->m_blkbit_log;
}
@@ -157,7 +212,14 @@ xfs_rbmblock_wordptr(
struct xfs_rtalloc_args *args,
unsigned int index)
{
- union xfs_rtword_raw *words = args->rbmbp->b_addr;
+ struct xfs_mount *mp = args->mp;
+ union xfs_rtword_raw *words;
+ struct xfs_rtbuf_blkinfo *hdr = args->rbmbp->b_addr;
+
+ if (xfs_has_rtgroups(mp))
+ words = (union xfs_rtword_raw *)(hdr + 1);
+ else
+ words = args->rbmbp->b_addr;
return words + index;
}
@@ -170,6 +232,8 @@ xfs_rtbitmap_getword(
{
union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index);
+ if (xfs_has_rtgroups(args->mp))
+ return be32_to_cpu(word->rtg);
return word->old;
}
@@ -182,7 +246,10 @@ xfs_rtbitmap_setword(
{
union xfs_rtword_raw *word = xfs_rbmblock_wordptr(args, index);
- word->old = value;
+ if (xfs_has_rtgroups(args->mp))
+ word->rtg = cpu_to_be32(value);
+ else
+ word->old = value;
}
/*
@@ -207,6 +274,9 @@ xfs_rtsumoffs_to_block(
struct xfs_mount *mp,
xfs_rtsumoff_t rsumoff)
{
+ if (xfs_has_rtgroups(mp))
+ return rsumoff / mp->m_blockwsize;
+
return XFS_B_TO_FSBT(mp, rsumoff * sizeof(xfs_suminfo_t));
}
@@ -221,6 +291,9 @@ xfs_rtsumoffs_to_infoword(
{
unsigned int mask = mp->m_blockmask >> XFS_SUMINFOLOG;
+ if (xfs_has_rtgroups(mp))
+ return rsumoff % mp->m_blockwsize;
+
return rsumoff & mask;
}
@@ -230,7 +303,13 @@ xfs_rsumblock_infoptr(
struct xfs_rtalloc_args *args,
unsigned int index)
{
- union xfs_suminfo_raw *info = args->sumbp->b_addr;
+ union xfs_suminfo_raw *info;
+ struct xfs_rtbuf_blkinfo *hdr = args->sumbp->b_addr;
+
+ if (xfs_has_rtgroups(args->mp))
+ info = (union xfs_suminfo_raw *)(hdr + 1);
+ else
+ info = args->sumbp->b_addr;
return info + index;
}
@@ -243,6 +322,8 @@ xfs_suminfo_get(
{
union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index);
+ if (xfs_has_rtgroups(args->mp))
+ return be32_to_cpu(info->rtg);
return info->old;
}
@@ -255,10 +336,28 @@ xfs_suminfo_add(
{
union xfs_suminfo_raw *info = xfs_rsumblock_infoptr(args, index);
+ if (xfs_has_rtgroups(args->mp)) {
+ be32_add_cpu(&info->rtg, delta);
+ return be32_to_cpu(info->rtg);
+ }
+
info->old += delta;
return info->old;
}
+static inline const struct xfs_buf_ops *
+xfs_rtblock_ops(
+ struct xfs_mount *mp,
+ enum xfs_rtg_inodes type)
+{
+ if (xfs_has_rtgroups(mp)) {
+ if (type == XFS_RTGI_SUMMARY)
+ return &xfs_rtsummary_buf_ops;
+ return &xfs_rtbitmap_buf_ops;
+ }
+ return &xfs_rtbuf_ops;
+}
+
/*
* Functions for walking free space rtextents in the realtime bitmap.
*/
@@ -268,7 +367,7 @@ struct xfs_rtalloc_rec {
};
typedef int (*xfs_rtalloc_query_range_fn)(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
const struct xfs_rtalloc_rec *rec,
void *priv);
@@ -291,53 +390,43 @@ int xfs_rtmodify_summary(struct xfs_rtalloc_args *args, int log,
xfs_fileoff_t bbno, int delta);
int xfs_rtfree_range(struct xfs_rtalloc_args *args, xfs_rtxnum_t start,
xfs_rtxlen_t len);
-int xfs_rtalloc_query_range(struct xfs_mount *mp, struct xfs_trans *tp,
+int xfs_rtalloc_query_range(struct xfs_rtgroup *rtg, struct xfs_trans *tp,
xfs_rtxnum_t start, xfs_rtxnum_t end,
xfs_rtalloc_query_range_fn fn, void *priv);
-int xfs_rtalloc_query_all(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtalloc_query_range_fn fn,
- void *priv);
-int xfs_rtalloc_extent_is_free(struct xfs_mount *mp, struct xfs_trans *tp,
- xfs_rtxnum_t start, xfs_rtxlen_t len,
- bool *is_free);
-/*
- * Free an extent in the realtime subvolume. Length is expressed in
- * realtime extents, as is the block number.
- */
-int /* error */
-xfs_rtfree_extent(
- struct xfs_trans *tp, /* transaction pointer */
- xfs_rtxnum_t start, /* starting rtext number to free */
- xfs_rtxlen_t len); /* length of extent freed */
-
+int xfs_rtalloc_query_all(struct xfs_rtgroup *rtg, struct xfs_trans *tp,
+ xfs_rtalloc_query_range_fn fn, void *priv);
+int xfs_rtalloc_extent_is_free(struct xfs_rtgroup *rtg, struct xfs_trans *tp,
+ xfs_rtxnum_t start, xfs_rtxlen_t len, bool *is_free);
+int xfs_rtfree_extent(struct xfs_trans *tp, struct xfs_rtgroup *rtg,
+ xfs_rtxnum_t start, xfs_rtxlen_t len);
/* Same as above, but in units of rt blocks. */
-int xfs_rtfree_blocks(struct xfs_trans *tp, xfs_fsblock_t rtbno,
- xfs_filblks_t rtlen);
+int xfs_rtfree_blocks(struct xfs_trans *tp, struct xfs_rtgroup *rtg,
+ xfs_fsblock_t rtbno, xfs_filblks_t rtlen);
-xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t
- rtextents);
+xfs_rtxnum_t xfs_rtbitmap_rtx_per_rbmblock(struct xfs_mount *mp);
+xfs_filblks_t xfs_rtbitmap_blockcount(struct xfs_mount *mp);
+xfs_filblks_t xfs_rtbitmap_blockcount_len(struct xfs_mount *mp,
+ xfs_rtbxlen_t rtextents);
xfs_filblks_t xfs_rtsummary_blockcount(struct xfs_mount *mp,
- unsigned int rsumlevels, xfs_extlen_t rbmblocks);
-
-int xfs_rtfile_initialize_blocks(struct xfs_inode *ip,
- xfs_fileoff_t offset_fsb, xfs_fileoff_t end_fsb, void *data);
+ unsigned int *rsumlevels);
-void xfs_rtbitmap_lock(struct xfs_mount *mp);
-void xfs_rtbitmap_unlock(struct xfs_mount *mp);
-void xfs_rtbitmap_trans_join(struct xfs_trans *tp);
+int xfs_rtfile_initialize_blocks(struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type, xfs_fileoff_t offset_fsb,
+ xfs_fileoff_t end_fsb, void *data);
+int xfs_rtbitmap_create(struct xfs_rtgroup *rtg, struct xfs_inode *ip,
+ struct xfs_trans *tp, bool init);
+int xfs_rtsummary_create(struct xfs_rtgroup *rtg, struct xfs_inode *ip,
+ struct xfs_trans *tp, bool init);
-/* Lock the rt bitmap inode in shared mode */
-#define XFS_RBMLOCK_BITMAP (1U << 0)
-/* Lock the rt summary inode in shared mode */
-#define XFS_RBMLOCK_SUMMARY (1U << 1)
-
-void xfs_rtbitmap_lock_shared(struct xfs_mount *mp,
- unsigned int rbmlock_flags);
-void xfs_rtbitmap_unlock_shared(struct xfs_mount *mp,
- unsigned int rbmlock_flags);
#else /* CONFIG_XFS_RT */
# define xfs_rtfree_extent(t,b,l) (-ENOSYS)
-# define xfs_rtfree_blocks(t,rb,rl) (-ENOSYS)
+
+static inline int xfs_rtfree_blocks(struct xfs_trans *tp,
+ struct xfs_rtgroup *rtg, xfs_fsblock_t rtbno,
+ xfs_filblks_t rtlen)
+{
+ return -ENOSYS;
+}
# define xfs_rtalloc_query_range(m,t,l,h,f,p) (-ENOSYS)
# define xfs_rtalloc_query_all(m,t,f,p) (-ENOSYS)
# define xfs_rtbitmap_read_buf(a,b) (-ENOSYS)
@@ -345,17 +434,11 @@ void xfs_rtbitmap_unlock_shared(struct xfs_mount *mp,
# define xfs_rtbuf_cache_relse(a) (0)
# define xfs_rtalloc_extent_is_free(m,t,s,l,i) (-ENOSYS)
static inline xfs_filblks_t
-xfs_rtbitmap_blockcount(struct xfs_mount *mp, xfs_rtbxlen_t rtextents)
+xfs_rtbitmap_blockcount_len(struct xfs_mount *mp, xfs_rtbxlen_t rtextents)
{
/* shut up gcc */
return 0;
}
-# define xfs_rtsummary_blockcount(mp, l, b) (0)
-# define xfs_rtbitmap_lock(mp) do { } while (0)
-# define xfs_rtbitmap_trans_join(tp) do { } while (0)
-# define xfs_rtbitmap_unlock(mp) do { } while (0)
-# define xfs_rtbitmap_lock_shared(mp, lf) do { } while (0)
-# define xfs_rtbitmap_unlock_shared(mp, lf) do { } while (0)
#endif /* CONFIG_XFS_RT */
#endif /* __XFS_RTBITMAP_H__ */
diff --git a/fs/xfs/libxfs/xfs_rtgroup.c b/fs/xfs/libxfs/xfs_rtgroup.c
new file mode 100644
index 000000000000..e74bb059f24f
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rtgroup.c
@@ -0,0 +1,697 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2022-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_btree.h"
+#include "xfs_alloc_btree.h"
+#include "xfs_rmap_btree.h"
+#include "xfs_alloc.h"
+#include "xfs_ialloc.h"
+#include "xfs_rmap.h"
+#include "xfs_ag.h"
+#include "xfs_ag_resv.h"
+#include "xfs_health.h"
+#include "xfs_error.h"
+#include "xfs_bmap.h"
+#include "xfs_defer.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_trace.h"
+#include "xfs_inode.h"
+#include "xfs_icache.h"
+#include "xfs_buf_item.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_metafile.h"
+#include "xfs_metadir.h"
+
+/* Find the first usable fsblock in this rtgroup. */
+static inline uint32_t
+xfs_rtgroup_min_block(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ if (xfs_has_rtsb(mp) && rgno == 0)
+ return mp->m_sb.sb_rextsize;
+
+ return 0;
+}
+
+/* Precompute this group's geometry */
+void
+xfs_rtgroup_calc_geometry(
+ struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
+ xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount,
+ xfs_rtbxlen_t rextents)
+{
+ rtg->rtg_extents = __xfs_rtgroup_extents(mp, rgno, rgcount, rextents);
+ rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize;
+ rtg_group(rtg)->xg_min_gbno = xfs_rtgroup_min_block(mp, rgno);
+}
+
+int
+xfs_rtgroup_alloc(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount,
+ xfs_rtbxlen_t rextents)
+{
+ struct xfs_rtgroup *rtg;
+ int error;
+
+ rtg = kzalloc(sizeof(struct xfs_rtgroup), GFP_KERNEL);
+ if (!rtg)
+ return -ENOMEM;
+
+ xfs_rtgroup_calc_geometry(mp, rtg, rgno, rgcount, rextents);
+
+ error = xfs_group_insert(mp, rtg_group(rtg), rgno, XG_TYPE_RTG);
+ if (error)
+ goto out_free_rtg;
+ return 0;
+
+out_free_rtg:
+ kfree(rtg);
+ return error;
+}
+
+void
+xfs_rtgroup_free(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ xfs_group_free(mp, rgno, XG_TYPE_RTG, NULL);
+}
+
+/* Free a range of incore rtgroup objects. */
+void
+xfs_free_rtgroups(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t first_rgno,
+ xfs_rgnumber_t end_rgno)
+{
+ xfs_rgnumber_t rgno;
+
+ for (rgno = first_rgno; rgno < end_rgno; rgno++)
+ xfs_rtgroup_free(mp, rgno);
+}
+
+/* Initialize some range of incore rtgroup objects. */
+int
+xfs_initialize_rtgroups(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t first_rgno,
+ xfs_rgnumber_t end_rgno,
+ xfs_rtbxlen_t rextents)
+{
+ xfs_rgnumber_t index;
+ int error;
+
+ if (first_rgno >= end_rgno)
+ return 0;
+
+ for (index = first_rgno; index < end_rgno; index++) {
+ error = xfs_rtgroup_alloc(mp, index, end_rgno, rextents);
+ if (error)
+ goto out_unwind_new_rtgs;
+ }
+
+ return 0;
+
+out_unwind_new_rtgs:
+ xfs_free_rtgroups(mp, first_rgno, index);
+ return error;
+}
+
+/* Compute the number of rt extents in this realtime group. */
+xfs_rtxnum_t
+__xfs_rtgroup_extents(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount,
+ xfs_rtbxlen_t rextents)
+{
+ ASSERT(rgno < rgcount);
+ if (rgno == rgcount - 1)
+ return rextents - ((xfs_rtxnum_t)rgno * mp->m_sb.sb_rgextents);
+
+ ASSERT(xfs_has_rtgroups(mp));
+ return mp->m_sb.sb_rgextents;
+}
+
+xfs_rtxnum_t
+xfs_rtgroup_extents(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ return __xfs_rtgroup_extents(mp, rgno, mp->m_sb.sb_rgcount,
+ mp->m_sb.sb_rextents);
+}
+
+/*
+ * Update the rt extent count of the previous tail rtgroup if it changed during
+ * recovery (i.e. recovery of a growfs).
+ */
+int
+xfs_update_last_rtgroup_size(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t prev_rgcount)
+{
+ struct xfs_rtgroup *rtg;
+
+ ASSERT(prev_rgcount > 0);
+
+ rtg = xfs_rtgroup_grab(mp, prev_rgcount - 1);
+ if (!rtg)
+ return -EFSCORRUPTED;
+ rtg->rtg_extents = __xfs_rtgroup_extents(mp, prev_rgcount - 1,
+ mp->m_sb.sb_rgcount, mp->m_sb.sb_rextents);
+ rtg_group(rtg)->xg_block_count = rtg->rtg_extents * mp->m_sb.sb_rextsize;
+ xfs_rtgroup_rele(rtg);
+ return 0;
+}
+
+/* Lock metadata inodes associated with this rt group. */
+void
+xfs_rtgroup_lock(
+ struct xfs_rtgroup *rtg,
+ unsigned int rtglock_flags)
+{
+ ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS));
+ ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) ||
+ !(rtglock_flags & XFS_RTGLOCK_BITMAP));
+
+ if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
+ /*
+ * Lock both realtime free space metadata inodes for a freespace
+ * update.
+ */
+ xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL);
+ xfs_ilock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL);
+ } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
+ xfs_ilock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED);
+ }
+}
+
+/* Unlock metadata inodes associated with this rt group. */
+void
+xfs_rtgroup_unlock(
+ struct xfs_rtgroup *rtg,
+ unsigned int rtglock_flags)
+{
+ ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS));
+ ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) ||
+ !(rtglock_flags & XFS_RTGLOCK_BITMAP));
+
+ if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
+ xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_SUMMARY], XFS_ILOCK_EXCL);
+ xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_EXCL);
+ } else if (rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED) {
+ xfs_iunlock(rtg->rtg_inodes[XFS_RTGI_BITMAP], XFS_ILOCK_SHARED);
+ }
+}
+
+/*
+ * Join realtime group metadata inodes to the transaction. The ILOCKs will be
+ * released on transaction commit.
+ */
+void
+xfs_rtgroup_trans_join(
+ struct xfs_trans *tp,
+ struct xfs_rtgroup *rtg,
+ unsigned int rtglock_flags)
+{
+ ASSERT(!(rtglock_flags & ~XFS_RTGLOCK_ALL_FLAGS));
+ ASSERT(!(rtglock_flags & XFS_RTGLOCK_BITMAP_SHARED));
+
+ if (rtglock_flags & XFS_RTGLOCK_BITMAP) {
+ xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_BITMAP],
+ XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, rtg->rtg_inodes[XFS_RTGI_SUMMARY],
+ XFS_ILOCK_EXCL);
+ }
+}
+
+/* Retrieve rt group geometry. */
+int
+xfs_rtgroup_get_geometry(
+ struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo)
+{
+ /* Fill out form. */
+ memset(rgeo, 0, sizeof(*rgeo));
+ rgeo->rg_number = rtg_rgno(rtg);
+ rgeo->rg_length = rtg_group(rtg)->xg_block_count;
+ xfs_rtgroup_geom_health(rtg, rgeo);
+ return 0;
+}
+
+#ifdef CONFIG_PROVE_LOCKING
+static struct lock_class_key xfs_rtginode_lock_class;
+
+static int
+xfs_rtginode_ilock_cmp_fn(
+ const struct lockdep_map *m1,
+ const struct lockdep_map *m2)
+{
+ const struct xfs_inode *ip1 =
+ container_of(m1, struct xfs_inode, i_lock.dep_map);
+ const struct xfs_inode *ip2 =
+ container_of(m2, struct xfs_inode, i_lock.dep_map);
+
+ if (ip1->i_projid < ip2->i_projid)
+ return -1;
+ if (ip1->i_projid > ip2->i_projid)
+ return 1;
+ return 0;
+}
+
+static inline void
+xfs_rtginode_ilock_print_fn(
+ const struct lockdep_map *m)
+{
+ const struct xfs_inode *ip =
+ container_of(m, struct xfs_inode, i_lock.dep_map);
+
+ printk(KERN_CONT " rgno=%u", ip->i_projid);
+}
+
+/*
+ * Most of the time each of the RTG inode locks are only taken one at a time.
+ * But when committing deferred ops, more than one of a kind can be taken.
+ * However, deferred rt ops will be committed in rgno order so there is no
+ * potential for deadlocks. The code here is needed to tell lockdep about this
+ * order.
+ */
+static inline void
+xfs_rtginode_lockdep_setup(
+ struct xfs_inode *ip,
+ xfs_rgnumber_t rgno,
+ enum xfs_rtg_inodes type)
+{
+ lockdep_set_class_and_subclass(&ip->i_lock, &xfs_rtginode_lock_class,
+ type);
+ lock_set_cmp_fn(&ip->i_lock, xfs_rtginode_ilock_cmp_fn,
+ xfs_rtginode_ilock_print_fn);
+}
+#else
+#define xfs_rtginode_lockdep_setup(ip, rgno, type) do { } while (0)
+#endif /* CONFIG_PROVE_LOCKING */
+
+struct xfs_rtginode_ops {
+ const char *name; /* short name */
+
+ enum xfs_metafile_type metafile_type;
+
+ unsigned int sick; /* rtgroup sickness flag */
+
+ /* Does the fs have this feature? */
+ bool (*enabled)(struct xfs_mount *mp);
+
+ /* Create this rtgroup metadata inode and initialize it. */
+ int (*create)(struct xfs_rtgroup *rtg,
+ struct xfs_inode *ip,
+ struct xfs_trans *tp,
+ bool init);
+};
+
+static const struct xfs_rtginode_ops xfs_rtginode_ops[XFS_RTGI_MAX] = {
+ [XFS_RTGI_BITMAP] = {
+ .name = "bitmap",
+ .metafile_type = XFS_METAFILE_RTBITMAP,
+ .sick = XFS_SICK_RG_BITMAP,
+ .create = xfs_rtbitmap_create,
+ },
+ [XFS_RTGI_SUMMARY] = {
+ .name = "summary",
+ .metafile_type = XFS_METAFILE_RTSUMMARY,
+ .sick = XFS_SICK_RG_SUMMARY,
+ .create = xfs_rtsummary_create,
+ },
+};
+
+/* Return the shortname of this rtgroup inode. */
+const char *
+xfs_rtginode_name(
+ enum xfs_rtg_inodes type)
+{
+ return xfs_rtginode_ops[type].name;
+}
+
+/* Return the metafile type of this rtgroup inode. */
+enum xfs_metafile_type
+xfs_rtginode_metafile_type(
+ enum xfs_rtg_inodes type)
+{
+ return xfs_rtginode_ops[type].metafile_type;
+}
+
+/* Should this rtgroup inode be present? */
+bool
+xfs_rtginode_enabled(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type)
+{
+ const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
+
+ if (!ops->enabled)
+ return true;
+ return ops->enabled(rtg_mount(rtg));
+}
+
+/* Mark an rtgroup inode sick */
+void
+xfs_rtginode_mark_sick(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type)
+{
+ const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
+
+ xfs_group_mark_sick(rtg_group(rtg), ops->sick);
+}
+
+/* Load and existing rtgroup inode into the rtgroup structure. */
+int
+xfs_rtginode_load(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type,
+ struct xfs_trans *tp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *ip;
+ const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
+ int error;
+
+ if (!xfs_rtginode_enabled(rtg, type))
+ return 0;
+
+ if (!xfs_has_rtgroups(mp)) {
+ xfs_ino_t ino;
+
+ switch (type) {
+ case XFS_RTGI_BITMAP:
+ ino = mp->m_sb.sb_rbmino;
+ break;
+ case XFS_RTGI_SUMMARY:
+ ino = mp->m_sb.sb_rsumino;
+ break;
+ default:
+ /* None of the other types exist on !rtgroups */
+ return 0;
+ }
+
+ error = xfs_trans_metafile_iget(tp, ino, ops->metafile_type,
+ &ip);
+ } else {
+ const char *path;
+
+ if (!mp->m_rtdirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ path = xfs_rtginode_path(rtg_rgno(rtg), type);
+ if (!path)
+ return -ENOMEM;
+ error = xfs_metadir_load(tp, mp->m_rtdirip, path,
+ ops->metafile_type, &ip);
+ kfree(path);
+ }
+
+ if (error) {
+ if (xfs_metadata_is_sick(error))
+ xfs_rtginode_mark_sick(rtg, type);
+ return error;
+ }
+
+ if (XFS_IS_CORRUPT(mp, ip->i_df.if_format != XFS_DINODE_FMT_EXTENTS &&
+ ip->i_df.if_format != XFS_DINODE_FMT_BTREE)) {
+ xfs_irele(ip);
+ xfs_rtginode_mark_sick(rtg, type);
+ return -EFSCORRUPTED;
+ }
+
+ if (XFS_IS_CORRUPT(mp, ip->i_projid != rtg_rgno(rtg))) {
+ xfs_irele(ip);
+ xfs_rtginode_mark_sick(rtg, type);
+ return -EFSCORRUPTED;
+ }
+
+ xfs_rtginode_lockdep_setup(ip, rtg_rgno(rtg), type);
+ rtg->rtg_inodes[type] = ip;
+ return 0;
+}
+
+/* Release an rtgroup metadata inode. */
+void
+xfs_rtginode_irele(
+ struct xfs_inode **ipp)
+{
+ if (*ipp)
+ xfs_irele(*ipp);
+ *ipp = NULL;
+}
+
+/* Add a metadata inode for a realtime rmap btree. */
+int
+xfs_rtginode_create(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type,
+ bool init)
+{
+ const struct xfs_rtginode_ops *ops = &xfs_rtginode_ops[type];
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct xfs_metadir_update upd = {
+ .dp = mp->m_rtdirip,
+ .metafile_type = ops->metafile_type,
+ };
+ int error;
+
+ if (!xfs_rtginode_enabled(rtg, type))
+ return 0;
+
+ if (!mp->m_rtdirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ upd.path = xfs_rtginode_path(rtg_rgno(rtg), type);
+ if (!upd.path)
+ return -ENOMEM;
+
+ error = xfs_metadir_start_create(&upd);
+ if (error)
+ goto out_path;
+
+ error = xfs_metadir_create(&upd, S_IFREG);
+ if (error)
+ return error;
+
+ xfs_rtginode_lockdep_setup(upd.ip, rtg_rgno(rtg), type);
+
+ upd.ip->i_projid = rtg_rgno(rtg);
+ error = ops->create(rtg, upd.ip, upd.tp, init);
+ if (error)
+ goto out_cancel;
+
+ error = xfs_metadir_commit(&upd);
+ if (error)
+ goto out_path;
+
+ kfree(upd.path);
+ xfs_finish_inode_setup(upd.ip);
+ rtg->rtg_inodes[type] = upd.ip;
+ return 0;
+
+out_cancel:
+ xfs_metadir_cancel(&upd, error);
+ /* Have to finish setting up the inode to ensure it's deleted. */
+ if (upd.ip) {
+ xfs_finish_inode_setup(upd.ip);
+ xfs_irele(upd.ip);
+ }
+out_path:
+ kfree(upd.path);
+ return error;
+}
+
+/* Create the parent directory for all rtgroup inodes and load it. */
+int
+xfs_rtginode_mkdir_parent(
+ struct xfs_mount *mp)
+{
+ if (!mp->m_metadirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ return xfs_metadir_mkdir(mp->m_metadirip, "rtgroups", &mp->m_rtdirip);
+}
+
+/* Load the parent directory of all rtgroup inodes. */
+int
+xfs_rtginode_load_parent(
+ struct xfs_trans *tp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+
+ if (!mp->m_metadirip) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ return xfs_metadir_load(tp, mp->m_metadirip, "rtgroups",
+ XFS_METAFILE_DIR, &mp->m_rtdirip);
+}
+
+/* Check superblock fields for a read or a write. */
+static xfs_failaddr_t
+xfs_rtsb_verify_common(
+ struct xfs_buf *bp)
+{
+ struct xfs_rtsb *rsb = bp->b_addr;
+
+ if (!xfs_verify_magic(bp, rsb->rsb_magicnum))
+ return __this_address;
+ if (rsb->rsb_pad)
+ return __this_address;
+
+ /* Everything to the end of the fs block must be zero */
+ if (memchr_inv(rsb + 1, 0, BBTOB(bp->b_length) - sizeof(*rsb)))
+ return __this_address;
+
+ return NULL;
+}
+
+/* Check superblock fields for a read or revalidation. */
+static inline xfs_failaddr_t
+xfs_rtsb_verify_all(
+ struct xfs_buf *bp)
+{
+ struct xfs_rtsb *rsb = bp->b_addr;
+ struct xfs_mount *mp = bp->b_mount;
+ xfs_failaddr_t fa;
+
+ fa = xfs_rtsb_verify_common(bp);
+ if (fa)
+ return fa;
+
+ if (memcmp(&rsb->rsb_fname, &mp->m_sb.sb_fname, XFSLABEL_MAX))
+ return __this_address;
+ if (!uuid_equal(&rsb->rsb_uuid, &mp->m_sb.sb_uuid))
+ return __this_address;
+ if (!uuid_equal(&rsb->rsb_meta_uuid, &mp->m_sb.sb_meta_uuid))
+ return __this_address;
+
+ return NULL;
+}
+
+static void
+xfs_rtsb_read_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ if (!xfs_buf_verify_cksum(bp, XFS_RTSB_CRC_OFF)) {
+ xfs_verifier_error(bp, -EFSBADCRC, __this_address);
+ return;
+ }
+
+ fa = xfs_rtsb_verify_all(bp);
+ if (fa)
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+}
+
+static void
+xfs_rtsb_write_verify(
+ struct xfs_buf *bp)
+{
+ xfs_failaddr_t fa;
+
+ fa = xfs_rtsb_verify_common(bp);
+ if (fa) {
+ xfs_verifier_error(bp, -EFSCORRUPTED, fa);
+ return;
+ }
+
+ xfs_buf_update_cksum(bp, XFS_RTSB_CRC_OFF);
+}
+
+const struct xfs_buf_ops xfs_rtsb_buf_ops = {
+ .name = "xfs_rtsb",
+ .magic = { 0, cpu_to_be32(XFS_RTSB_MAGIC) },
+ .verify_read = xfs_rtsb_read_verify,
+ .verify_write = xfs_rtsb_write_verify,
+ .verify_struct = xfs_rtsb_verify_all,
+};
+
+/* Update a realtime superblock from the primary fs super */
+void
+xfs_update_rtsb(
+ struct xfs_buf *rtsb_bp,
+ const struct xfs_buf *sb_bp)
+{
+ const struct xfs_dsb *dsb = sb_bp->b_addr;
+ struct xfs_rtsb *rsb = rtsb_bp->b_addr;
+ const uuid_t *meta_uuid;
+
+ rsb->rsb_magicnum = cpu_to_be32(XFS_RTSB_MAGIC);
+
+ rsb->rsb_pad = 0;
+ memcpy(&rsb->rsb_fname, &dsb->sb_fname, XFSLABEL_MAX);
+
+ memcpy(&rsb->rsb_uuid, &dsb->sb_uuid, sizeof(rsb->rsb_uuid));
+
+ /*
+ * The metadata uuid is the fs uuid if the metauuid feature is not
+ * enabled.
+ */
+ if (dsb->sb_features_incompat &
+ cpu_to_be32(XFS_SB_FEAT_INCOMPAT_META_UUID))
+ meta_uuid = &dsb->sb_meta_uuid;
+ else
+ meta_uuid = &dsb->sb_uuid;
+ memcpy(&rsb->rsb_meta_uuid, meta_uuid, sizeof(rsb->rsb_meta_uuid));
+}
+
+/*
+ * Update the realtime superblock from a filesystem superblock and log it to
+ * the given transaction.
+ */
+struct xfs_buf *
+xfs_log_rtsb(
+ struct xfs_trans *tp,
+ const struct xfs_buf *sb_bp)
+{
+ struct xfs_buf *rtsb_bp;
+
+ if (!xfs_has_rtsb(tp->t_mountp))
+ return NULL;
+
+ rtsb_bp = xfs_trans_getrtsb(tp);
+ if (!rtsb_bp) {
+ /*
+ * It's possible for the rtgroups feature to be enabled but
+ * there is no incore rt superblock buffer if the rt geometry
+ * was specified at mkfs time but the rt section has not yet
+ * been attached. In this case, rblocks must be zero.
+ */
+ ASSERT(tp->t_mountp->m_sb.sb_rblocks == 0);
+ return NULL;
+ }
+
+ xfs_update_rtsb(rtsb_bp, sb_bp);
+ xfs_trans_ordered_buf(tp, rtsb_bp);
+ return rtsb_bp;
+}
diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h
new file mode 100644
index 000000000000..7e7e491ff06f
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_rtgroup.h
@@ -0,0 +1,284 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2022-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __LIBXFS_RTGROUP_H
+#define __LIBXFS_RTGROUP_H 1
+
+#include "xfs_group.h"
+
+struct xfs_mount;
+struct xfs_trans;
+
+enum xfs_rtg_inodes {
+ XFS_RTGI_BITMAP, /* allocation bitmap */
+ XFS_RTGI_SUMMARY, /* allocation summary */
+
+ XFS_RTGI_MAX,
+};
+
+#ifdef MAX_LOCKDEP_SUBCLASSES
+static_assert(XFS_RTGI_MAX <= MAX_LOCKDEP_SUBCLASSES);
+#endif
+
+/*
+ * Realtime group incore structure, similar to the per-AG structure.
+ */
+struct xfs_rtgroup {
+ struct xfs_group rtg_group;
+
+ /* per-rtgroup metadata inodes */
+ struct xfs_inode *rtg_inodes[XFS_RTGI_MAX];
+
+ /* Number of blocks in this group */
+ xfs_rtxnum_t rtg_extents;
+
+ /*
+ * Cache of rt summary level per bitmap block with the invariant that
+ * rtg_rsum_cache[bbno] > the maximum i for which rsum[i][bbno] != 0,
+ * or 0 if rsum[i][bbno] == 0 for all i.
+ *
+ * Reads and writes are serialized by the rsumip inode lock.
+ */
+ uint8_t *rtg_rsum_cache;
+};
+
+static inline struct xfs_rtgroup *to_rtg(struct xfs_group *xg)
+{
+ return container_of(xg, struct xfs_rtgroup, rtg_group);
+}
+
+static inline struct xfs_group *rtg_group(struct xfs_rtgroup *rtg)
+{
+ return &rtg->rtg_group;
+}
+
+static inline struct xfs_mount *rtg_mount(const struct xfs_rtgroup *rtg)
+{
+ return rtg->rtg_group.xg_mount;
+}
+
+static inline xfs_rgnumber_t rtg_rgno(const struct xfs_rtgroup *rtg)
+{
+ return rtg->rtg_group.xg_gno;
+}
+
+/* Passive rtgroup references */
+static inline struct xfs_rtgroup *
+xfs_rtgroup_get(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ return to_rtg(xfs_group_get(mp, rgno, XG_TYPE_RTG));
+}
+
+static inline struct xfs_rtgroup *
+xfs_rtgroup_hold(
+ struct xfs_rtgroup *rtg)
+{
+ return to_rtg(xfs_group_hold(rtg_group(rtg)));
+}
+
+static inline void
+xfs_rtgroup_put(
+ struct xfs_rtgroup *rtg)
+{
+ xfs_group_put(rtg_group(rtg));
+}
+
+/* Active rtgroup references */
+static inline struct xfs_rtgroup *
+xfs_rtgroup_grab(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno)
+{
+ return to_rtg(xfs_group_grab(mp, rgno, XG_TYPE_RTG));
+}
+
+static inline void
+xfs_rtgroup_rele(
+ struct xfs_rtgroup *rtg)
+{
+ xfs_group_rele(rtg_group(rtg));
+}
+
+static inline struct xfs_rtgroup *
+xfs_rtgroup_next_range(
+ struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
+ xfs_rgnumber_t start_rgno,
+ xfs_rgnumber_t end_rgno)
+{
+ return to_rtg(xfs_group_next_range(mp, rtg ? rtg_group(rtg) : NULL,
+ start_rgno, end_rgno, XG_TYPE_RTG));
+}
+
+static inline struct xfs_rtgroup *
+xfs_rtgroup_next(
+ struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg)
+{
+ return xfs_rtgroup_next_range(mp, rtg, 0, mp->m_sb.sb_rgcount - 1);
+}
+
+static inline xfs_rtblock_t
+xfs_rgbno_to_rtb(
+ struct xfs_rtgroup *rtg,
+ xfs_rgblock_t rgbno)
+{
+ return xfs_gbno_to_fsb(rtg_group(rtg), rgbno);
+}
+
+static inline xfs_rgnumber_t
+xfs_rtb_to_rgno(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ return xfs_fsb_to_gno(mp, rtbno, XG_TYPE_RTG);
+}
+
+static inline xfs_rgblock_t
+xfs_rtb_to_rgbno(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ return xfs_fsb_to_gbno(mp, rtbno, XG_TYPE_RTG);
+}
+
+/* Is rtbno the start of a RT group? */
+static inline bool
+xfs_rtbno_is_group_start(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ return (rtbno & mp->m_groups[XG_TYPE_RTG].blkmask) == 0;
+}
+
+/* Convert an rtgroups rt extent number into an rgbno. */
+static inline xfs_rgblock_t
+xfs_rtx_to_rgbno(
+ struct xfs_rtgroup *rtg,
+ xfs_rtxnum_t rtx)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+
+ if (likely(mp->m_rtxblklog >= 0))
+ return rtx << mp->m_rtxblklog;
+ return rtx * mp->m_sb.sb_rextsize;
+}
+
+static inline xfs_daddr_t
+xfs_rtb_to_daddr(
+ struct xfs_mount *mp,
+ xfs_rtblock_t rtbno)
+{
+ struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG];
+ xfs_rgnumber_t rgno = xfs_rtb_to_rgno(mp, rtbno);
+ uint64_t start_bno = (xfs_rtblock_t)rgno * g->blocks;
+
+ return XFS_FSB_TO_BB(mp, start_bno + (rtbno & g->blkmask));
+}
+
+static inline xfs_rtblock_t
+xfs_daddr_to_rtb(
+ struct xfs_mount *mp,
+ xfs_daddr_t daddr)
+{
+ xfs_rfsblock_t bno = XFS_BB_TO_FSBT(mp, daddr);
+
+ if (xfs_has_rtgroups(mp)) {
+ struct xfs_groups *g = &mp->m_groups[XG_TYPE_RTG];
+ xfs_rgnumber_t rgno;
+ uint32_t rgbno;
+
+ rgno = div_u64_rem(bno, g->blocks, &rgbno);
+ return ((xfs_rtblock_t)rgno << g->blklog) + rgbno;
+ }
+
+ return bno;
+}
+
+#ifdef CONFIG_XFS_RT
+int xfs_rtgroup_alloc(struct xfs_mount *mp, xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents);
+void xfs_rtgroup_free(struct xfs_mount *mp, xfs_rgnumber_t rgno);
+
+void xfs_free_rtgroups(struct xfs_mount *mp, xfs_rgnumber_t first_rgno,
+ xfs_rgnumber_t end_rgno);
+int xfs_initialize_rtgroups(struct xfs_mount *mp, xfs_rgnumber_t first_rgno,
+ xfs_rgnumber_t end_rgno, xfs_rtbxlen_t rextents);
+
+xfs_rtxnum_t __xfs_rtgroup_extents(struct xfs_mount *mp, xfs_rgnumber_t rgno,
+ xfs_rgnumber_t rgcount, xfs_rtbxlen_t rextents);
+xfs_rtxnum_t xfs_rtgroup_extents(struct xfs_mount *mp, xfs_rgnumber_t rgno);
+void xfs_rtgroup_calc_geometry(struct xfs_mount *mp, struct xfs_rtgroup *rtg,
+ xfs_rgnumber_t rgno, xfs_rgnumber_t rgcount,
+ xfs_rtbxlen_t rextents);
+
+int xfs_update_last_rtgroup_size(struct xfs_mount *mp,
+ xfs_rgnumber_t prev_rgcount);
+
+/* Lock the rt bitmap inode in exclusive mode */
+#define XFS_RTGLOCK_BITMAP (1U << 0)
+/* Lock the rt bitmap inode in shared mode */
+#define XFS_RTGLOCK_BITMAP_SHARED (1U << 1)
+
+#define XFS_RTGLOCK_ALL_FLAGS (XFS_RTGLOCK_BITMAP | \
+ XFS_RTGLOCK_BITMAP_SHARED)
+
+void xfs_rtgroup_lock(struct xfs_rtgroup *rtg, unsigned int rtglock_flags);
+void xfs_rtgroup_unlock(struct xfs_rtgroup *rtg, unsigned int rtglock_flags);
+void xfs_rtgroup_trans_join(struct xfs_trans *tp, struct xfs_rtgroup *rtg,
+ unsigned int rtglock_flags);
+
+int xfs_rtgroup_get_geometry(struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo);
+
+int xfs_rtginode_mkdir_parent(struct xfs_mount *mp);
+int xfs_rtginode_load_parent(struct xfs_trans *tp);
+
+const char *xfs_rtginode_name(enum xfs_rtg_inodes type);
+enum xfs_metafile_type xfs_rtginode_metafile_type(enum xfs_rtg_inodes type);
+bool xfs_rtginode_enabled(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type);
+void xfs_rtginode_mark_sick(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type);
+int xfs_rtginode_load(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type,
+ struct xfs_trans *tp);
+int xfs_rtginode_create(struct xfs_rtgroup *rtg, enum xfs_rtg_inodes type,
+ bool init);
+void xfs_rtginode_irele(struct xfs_inode **ipp);
+
+static inline const char *xfs_rtginode_path(xfs_rgnumber_t rgno,
+ enum xfs_rtg_inodes type)
+{
+ return kasprintf(GFP_KERNEL, "%u.%s", rgno, xfs_rtginode_name(type));
+}
+
+void xfs_update_rtsb(struct xfs_buf *rtsb_bp,
+ const struct xfs_buf *sb_bp);
+struct xfs_buf *xfs_log_rtsb(struct xfs_trans *tp,
+ const struct xfs_buf *sb_bp);
+#else
+static inline void xfs_free_rtgroups(struct xfs_mount *mp,
+ xfs_rgnumber_t first_rgno, xfs_rgnumber_t end_rgno)
+{
+}
+
+static inline int xfs_initialize_rtgroups(struct xfs_mount *mp,
+ xfs_rgnumber_t first_rgno, xfs_rgnumber_t end_rgno,
+ xfs_rtbxlen_t rextents)
+{
+ return 0;
+}
+
+# define xfs_rtgroup_extents(mp, rgno) (0)
+# define xfs_update_last_rtgroup_size(mp, rgno) (-EOPNOTSUPP)
+# define xfs_rtgroup_lock(rtg, gf) ((void)0)
+# define xfs_rtgroup_unlock(rtg, gf) ((void)0)
+# define xfs_rtgroup_trans_join(tp, rtg, gf) ((void)0)
+# define xfs_update_rtsb(bp, sb_bp) ((void)0)
+# define xfs_log_rtsb(tp, sb_bp) (NULL)
+# define xfs_rtgroup_get_geometry(rtg, rgeo) (-EOPNOTSUPP)
+#endif /* CONFIG_XFS_RT */
+
+#endif /* __LIBXFS_RTGROUP_H */
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index d95409f3cba6..e81b240b7158 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -27,6 +27,7 @@
#include "xfs_ag.h"
#include "xfs_rtbitmap.h"
#include "xfs_exchrange.h"
+#include "xfs_rtgroup.h"
/*
* Physical superblock buffer manipulations. Shared with libxfs in userspace.
@@ -180,6 +181,8 @@ xfs_sb_version_to_features(
features |= XFS_FEAT_EXCHANGE_RANGE;
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT)
features |= XFS_FEAT_PARENT;
+ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)
+ features |= XFS_FEAT_METADIR;
return features;
}
@@ -232,11 +235,37 @@ xfs_validate_sb_read(
return 0;
}
+/* Return the number of extents covered by a single rt bitmap file */
+static xfs_rtbxlen_t
+xfs_extents_per_rbm(
+ struct xfs_sb *sbp)
+{
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR))
+ return sbp->sb_rgextents;
+ return sbp->sb_rextents;
+}
+
+/*
+ * Return the payload size of a single rt bitmap block (without the metadata
+ * header if any).
+ */
+static inline unsigned int
+xfs_rtbmblock_size(
+ struct xfs_sb *sbp)
+{
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR))
+ return sbp->sb_blocksize - sizeof(struct xfs_rtbuf_blkinfo);
+ return sbp->sb_blocksize;
+}
+
static uint64_t
-xfs_sb_calc_rbmblocks(
+xfs_expected_rbmblocks(
struct xfs_sb *sbp)
{
- return howmany_64(sbp->sb_rextents, NBBY * sbp->sb_blocksize);
+ return howmany_64(xfs_extents_per_rbm(sbp),
+ NBBY * xfs_rtbmblock_size(sbp));
}
/* Validate the realtime geometry */
@@ -258,7 +287,7 @@ xfs_validate_rt_geometry(
if (sbp->sb_rextents == 0 ||
sbp->sb_rextents != div_u64(sbp->sb_rblocks, sbp->sb_rextsize) ||
sbp->sb_rextslog != xfs_compute_rextslog(sbp->sb_rextents) ||
- sbp->sb_rbmblocks != xfs_sb_calc_rbmblocks(sbp))
+ sbp->sb_rbmblocks != xfs_expected_rbmblocks(sbp))
return false;
return true;
@@ -339,6 +368,78 @@ xfs_validate_sb_write(
return 0;
}
+int
+xfs_compute_rgblklog(
+ xfs_rtxlen_t rgextents,
+ xfs_rgblock_t rextsize)
+{
+ uint64_t rgblocks = (uint64_t)rgextents * rextsize;
+
+ return xfs_highbit64(rgblocks - 1) + 1;
+}
+
+static int
+xfs_validate_sb_rtgroups(
+ struct xfs_mount *mp,
+ struct xfs_sb *sbp)
+{
+ uint64_t groups;
+ int rgblklog;
+
+ if (sbp->sb_rextsize == 0) {
+ xfs_warn(mp,
+"Realtime extent size must not be zero.");
+ return -EINVAL;
+ }
+
+ if (sbp->sb_rgextents > XFS_MAX_RGBLOCKS / sbp->sb_rextsize) {
+ xfs_warn(mp,
+"Realtime group size (%u) must be less than %u rt extents.",
+ sbp->sb_rgextents,
+ XFS_MAX_RGBLOCKS / sbp->sb_rextsize);
+ return -EINVAL;
+ }
+
+ if (sbp->sb_rgextents < XFS_MIN_RGEXTENTS) {
+ xfs_warn(mp,
+"Realtime group size (%u) must be at least %u rt extents.",
+ sbp->sb_rgextents, XFS_MIN_RGEXTENTS);
+ return -EINVAL;
+ }
+
+ if (sbp->sb_rgcount > XFS_MAX_RGNUMBER) {
+ xfs_warn(mp,
+"Realtime groups (%u) must be less than %u.",
+ sbp->sb_rgcount, XFS_MAX_RGNUMBER);
+ return -EINVAL;
+ }
+
+ groups = howmany_64(sbp->sb_rextents, sbp->sb_rgextents);
+ if (groups != sbp->sb_rgcount) {
+ xfs_warn(mp,
+"Realtime groups (%u) do not cover the entire rt section; need (%llu) groups.",
+ sbp->sb_rgcount, groups);
+ return -EINVAL;
+ }
+
+ /* Exchange-range is required for fsr to work on realtime files */
+ if (!(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_EXCHRANGE)) {
+ xfs_warn(mp,
+"Realtime groups feature requires exchange-range support.");
+ return -EINVAL;
+ }
+
+ rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents, sbp->sb_rextsize);
+ if (sbp->sb_rgblklog != rgblklog) {
+ xfs_warn(mp,
+"Realtime group log (%d) does not match expected value (%d).",
+ sbp->sb_rgblklog, rgblklog);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
/* Check the validity of the SB. */
STATIC int
xfs_validate_sb_common(
@@ -350,6 +451,7 @@ xfs_validate_sb_common(
uint32_t agcount = 0;
uint32_t rem;
bool has_dalign;
+ int error;
if (!xfs_verify_magic(bp, dsb->sb_magicnum)) {
xfs_warn(mp,
@@ -398,6 +500,32 @@ xfs_validate_sb_common(
sbp->sb_inoalignmt, align);
return -EINVAL;
}
+
+ if (!sbp->sb_spino_align ||
+ sbp->sb_spino_align > sbp->sb_inoalignmt ||
+ (sbp->sb_inoalignmt % sbp->sb_spino_align) != 0) {
+ xfs_warn(mp,
+ "Sparse inode alignment (%u) is invalid.",
+ sbp->sb_spino_align);
+ return -EINVAL;
+ }
+ } else if (sbp->sb_spino_align) {
+ xfs_warn(mp,
+ "Sparse inode alignment (%u) should be zero.",
+ sbp->sb_spino_align);
+ return -EINVAL;
+ }
+
+ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) {
+ if (memchr_inv(sbp->sb_pad, 0, sizeof(sbp->sb_pad))) {
+ xfs_warn(mp,
+"Metadir superblock padding fields must be zero.");
+ return -EINVAL;
+ }
+
+ error = xfs_validate_sb_rtgroups(mp, sbp);
+ if (error)
+ return error;
}
} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |
XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {
@@ -566,6 +694,14 @@ xfs_validate_sb_common(
void
xfs_sb_quota_from_disk(struct xfs_sb *sbp)
{
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) {
+ sbp->sb_uquotino = NULLFSINO;
+ sbp->sb_gquotino = NULLFSINO;
+ sbp->sb_pquotino = NULLFSINO;
+ return;
+ }
+
/*
* older mkfs doesn't initialize quota inodes to NULLFSINO. This
* leads to in-core values having two different values for a quota
@@ -689,6 +825,20 @@ __xfs_sb_from_disk(
/* Convert on-disk flags to in-memory flags? */
if (convert_xquota)
xfs_sb_quota_from_disk(to);
+
+ if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) {
+ to->sb_metadirino = be64_to_cpu(from->sb_metadirino);
+ to->sb_rgblklog = from->sb_rgblklog;
+ memcpy(to->sb_pad, from->sb_pad, sizeof(to->sb_pad));
+ to->sb_rgcount = be32_to_cpu(from->sb_rgcount);
+ to->sb_rgextents = be32_to_cpu(from->sb_rgextents);
+ to->sb_rbmino = NULLFSINO;
+ to->sb_rsumino = NULLFSINO;
+ } else {
+ to->sb_metadirino = NULLFSINO;
+ to->sb_rgcount = 1;
+ to->sb_rgextents = 0;
+ }
}
void
@@ -706,6 +856,15 @@ xfs_sb_quota_to_disk(
{
uint16_t qflags = from->sb_qflags;
+ if (xfs_sb_is_v5(from) &&
+ (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) {
+ to->sb_qflags = cpu_to_be16(from->sb_qflags);
+ to->sb_uquotino = cpu_to_be64(0);
+ to->sb_gquotino = cpu_to_be64(0);
+ to->sb_pquotino = cpu_to_be64(0);
+ return;
+ }
+
to->sb_uquotino = cpu_to_be64(from->sb_uquotino);
/*
@@ -836,6 +995,16 @@ xfs_sb_to_disk(
to->sb_lsn = cpu_to_be64(from->sb_lsn);
if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID)
uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid);
+
+ if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) {
+ to->sb_metadirino = cpu_to_be64(from->sb_metadirino);
+ to->sb_rgblklog = from->sb_rgblklog;
+ memset(to->sb_pad, 0, sizeof(to->sb_pad));
+ to->sb_rgcount = cpu_to_be32(from->sb_rgcount);
+ to->sb_rgextents = cpu_to_be32(from->sb_rgextents);
+ to->sb_rbmino = cpu_to_be64(0);
+ to->sb_rsumino = cpu_to_be64(0);
+ }
}
/*
@@ -965,13 +1134,43 @@ const struct xfs_buf_ops xfs_sb_quiet_buf_ops = {
.verify_write = xfs_sb_write_verify,
};
+/* Compute cached rt geometry from the incore sb. */
void
-xfs_mount_sb_set_rextsize(
+xfs_sb_mount_rextsize(
struct xfs_mount *mp,
struct xfs_sb *sbp)
{
+ struct xfs_groups *rgs = &mp->m_groups[XG_TYPE_RTG];
+
mp->m_rtxblklog = log2_if_power2(sbp->sb_rextsize);
mp->m_rtxblkmask = mask64_if_power2(sbp->sb_rextsize);
+
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)) {
+ rgs->blocks = sbp->sb_rgextents * sbp->sb_rextsize;
+ rgs->blklog = mp->m_sb.sb_rgblklog;
+ rgs->blkmask = xfs_mask32lo(mp->m_sb.sb_rgblklog);
+ } else {
+ rgs->blocks = 0;
+ rgs->blklog = 0;
+ rgs->blkmask = (uint64_t)-1;
+ }
+}
+
+/* Update incore sb rt extent size, then recompute the cached rt geometry. */
+void
+xfs_mount_sb_set_rextsize(
+ struct xfs_mount *mp,
+ struct xfs_sb *sbp,
+ xfs_agblock_t rextsize)
+{
+ sbp->sb_rextsize = rextsize;
+ if (xfs_sb_is_v5(sbp) &&
+ (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR))
+ sbp->sb_rgblklog = xfs_compute_rgblklog(sbp->sb_rgextents,
+ rextsize);
+
+ xfs_sb_mount_rextsize(mp, sbp);
}
/*
@@ -988,6 +1187,8 @@ xfs_sb_mount_common(
struct xfs_mount *mp,
struct xfs_sb *sbp)
{
+ struct xfs_groups *ags = &mp->m_groups[XG_TYPE_AG];
+
mp->m_agfrotor = 0;
atomic_set(&mp->m_agirotor, 0);
mp->m_maxagi = mp->m_sb.sb_agcount;
@@ -996,9 +1197,14 @@ xfs_sb_mount_common(
mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT;
mp->m_agno_log = xfs_highbit32(sbp->sb_agcount - 1) + 1;
mp->m_blockmask = sbp->sb_blocksize - 1;
- mp->m_blockwsize = sbp->sb_blocksize >> XFS_WORDLOG;
- mp->m_blockwmask = mp->m_blockwsize - 1;
- xfs_mount_sb_set_rextsize(mp, sbp);
+ mp->m_blockwsize = xfs_rtbmblock_size(sbp) >> XFS_WORDLOG;
+ mp->m_rtx_per_rbmblock = mp->m_blockwsize << XFS_NBWORDLOG;
+
+ ags->blocks = mp->m_sb.sb_agblocks;
+ ags->blklog = mp->m_sb.sb_agblklog;
+ ags->blkmask = xfs_mask32lo(mp->m_sb.sb_agblklog);
+
+ xfs_sb_mount_rextsize(mp, sbp);
mp->m_alloc_mxr[0] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, true);
mp->m_alloc_mxr[1] = xfs_allocbt_maxrecs(mp, sbp->sb_blocksize, false);
@@ -1045,11 +1251,6 @@ xfs_log_sb(
* reservations that have been taken out percpu counters. If we have an
* unclean shutdown, this will be corrected by log recovery rebuilding
* the counters from the AGF block counts.
- *
- * Do not update sb_frextents here because it is not part of the lazy
- * sb counters, despite having a percpu counter. It is always kept
- * consistent with the ondisk rtbitmap by xfs_trans_apply_sb_deltas()
- * and hence we don't need have to update it here.
*/
if (xfs_has_lazysbcount(mp)) {
mp->m_sb.sb_icount = percpu_counter_sum_positive(&mp->m_icount);
@@ -1060,6 +1261,16 @@ xfs_log_sb(
percpu_counter_sum_positive(&mp->m_fdblocks);
}
+ /*
+ * sb_frextents was added to the lazy sb counters when the rt groups
+ * feature was introduced. This counter can go negative due to the way
+ * we handle nearly-lockless reservations, so we must use the _positive
+ * variant here to avoid writing out nonsense frextents.
+ */
+ if (xfs_has_rtgroups(mp))
+ mp->m_sb.sb_frextents =
+ percpu_counter_sum_positive(&mp->m_frextents);
+
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1);
@@ -1109,18 +1320,17 @@ int
xfs_update_secondary_sbs(
struct xfs_mount *mp)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno = 1;
+ struct xfs_perag *pag = NULL;
int saved_error = 0;
int error = 0;
LIST_HEAD (buffer_list);
/* update secondary superblocks. */
- for_each_perag_from(mp, agno, pag) {
+ while ((pag = xfs_perag_next_from(mp, pag, 1))) {
struct xfs_buf *bp;
error = xfs_buf_get(mp->m_ddev_targp,
- XFS_AG_DADDR(mp, pag->pag_agno, XFS_SB_DADDR),
+ XFS_AG_DADDR(mp, pag_agno(pag), XFS_SB_DADDR),
XFS_FSS_TO_BB(mp, 1), &bp);
/*
* If we get an error reading or writing alternate superblocks,
@@ -1132,7 +1342,7 @@ xfs_update_secondary_sbs(
if (error) {
xfs_warn(mp,
"error allocating secondary superblock for ag %d",
- pag->pag_agno);
+ pag_agno(pag));
if (!saved_error)
saved_error = error;
continue;
@@ -1146,26 +1356,22 @@ xfs_update_secondary_sbs(
xfs_buf_relse(bp);
/* don't hold too many buffers at once */
- if (agno % 16)
+ if (pag_agno(pag) % 16)
continue;
error = xfs_buf_delwri_submit(&buffer_list);
if (error) {
xfs_warn(mp,
"write error %d updating a secondary superblock near ag %d",
- error, pag->pag_agno);
+ error, pag_agno(pag));
if (!saved_error)
saved_error = error;
continue;
}
}
error = xfs_buf_delwri_submit(&buffer_list);
- if (error) {
- xfs_warn(mp,
- "write error %d updating a secondary superblock near ag %d",
- error, agno);
- }
-
+ if (error)
+ xfs_warn(mp, "error %d writing secondary superblocks", error);
return saved_error ? saved_error : error;
}
@@ -1175,10 +1381,12 @@ xfs_update_secondary_sbs(
*/
int
xfs_sync_sb_buf(
- struct xfs_mount *mp)
+ struct xfs_mount *mp,
+ bool update_rtsb)
{
struct xfs_trans *tp;
struct xfs_buf *bp;
+ struct xfs_buf *rtsb_bp = NULL;
int error;
error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 0, &tp);
@@ -1188,6 +1396,11 @@ xfs_sync_sb_buf(
bp = xfs_trans_getsb(tp);
xfs_log_sb(tp);
xfs_trans_bhold(tp, bp);
+ if (update_rtsb) {
+ rtsb_bp = xfs_log_rtsb(tp, bp);
+ if (rtsb_bp)
+ xfs_trans_bhold(tp, rtsb_bp);
+ }
xfs_trans_set_sync(tp);
error = xfs_trans_commit(tp);
if (error)
@@ -1196,7 +1409,11 @@ xfs_sync_sb_buf(
* write out the sb buffer to get the changes to disk
*/
error = xfs_bwrite(bp);
+ if (!error && rtsb_bp)
+ error = xfs_bwrite(rtsb_bp);
out:
+ if (rtsb_bp)
+ xfs_buf_relse(rtsb_bp);
xfs_buf_relse(bp);
return error;
}
@@ -1283,6 +1500,8 @@ xfs_fs_geometry(
geo->flags |= XFS_FSOP_GEOM_FLAGS_NREXT64;
if (xfs_has_exchange_range(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE;
+ if (xfs_has_metadir(mp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR;
geo->rtsectsize = sbp->sb_blocksize;
geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp);
@@ -1298,6 +1517,11 @@ xfs_fs_geometry(
return;
geo->version = XFS_FSOP_GEOM_VERSION_V5;
+
+ if (xfs_has_rtgroups(mp)) {
+ geo->rgcount = sbp->sb_rgcount;
+ geo->rgextents = sbp->sb_rgextents;
+ }
}
/* Read a secondary superblock. */
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 885c83755991..34d0dd374e9b 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -15,10 +15,11 @@ struct xfs_perag;
extern void xfs_log_sb(struct xfs_trans *tp);
extern int xfs_sync_sb(struct xfs_mount *mp, bool wait);
-extern int xfs_sync_sb_buf(struct xfs_mount *mp);
+extern int xfs_sync_sb_buf(struct xfs_mount *mp, bool update_rtsb);
extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
+void xfs_sb_mount_rextsize(struct xfs_mount *mp, struct xfs_sb *sbp);
void xfs_mount_sb_set_rextsize(struct xfs_mount *mp,
- struct xfs_sb *sbp);
+ struct xfs_sb *sbp, xfs_agblock_t rextsize);
extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from);
extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from);
extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp);
@@ -43,5 +44,6 @@ bool xfs_validate_stripe_geometry(struct xfs_mount *mp,
bool xfs_validate_rt_geometry(struct xfs_sb *sbp);
uint8_t xfs_compute_rextslog(xfs_rtbxlen_t rtextents);
+int xfs_compute_rgblklog(xfs_rtxlen_t rgextents, xfs_rgblock_t rextsize);
#endif /* __XFS_SB_H__ */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 33b84a3a83ff..e7efdb9ceaf3 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -38,7 +38,10 @@ extern const struct xfs_buf_ops xfs_inode_buf_ops;
extern const struct xfs_buf_ops xfs_inode_buf_ra_ops;
extern const struct xfs_buf_ops xfs_refcountbt_buf_ops;
extern const struct xfs_buf_ops xfs_rmapbt_buf_ops;
+extern const struct xfs_buf_ops xfs_rtbitmap_buf_ops;
+extern const struct xfs_buf_ops xfs_rtsummary_buf_ops;
extern const struct xfs_buf_ops xfs_rtbuf_ops;
+extern const struct xfs_buf_ops xfs_rtsb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
@@ -157,6 +160,7 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp,
#define XFS_TRANS_SB_RBLOCKS 0x00000800
#define XFS_TRANS_SB_REXTENTS 0x00001000
#define XFS_TRANS_SB_REXTSLOG 0x00002000
+#define XFS_TRANS_SB_RGCOUNT 0x00004000
/*
* Here we centralize the specification of XFS meta-data buffer reference count
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 3c40f37e82c7..c962ad64b0c1 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -62,12 +62,12 @@ xfs_trans_ichgtime(
ASSERT(tp);
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
- tv = current_time(inode);
+ /* If the mtime changes, then ctime must also change */
+ ASSERT(flags & XFS_ICHGTIME_CHG);
+ tv = inode_set_ctime_current(inode);
if (flags & XFS_ICHGTIME_MOD)
inode_set_mtime_to_ts(inode, tv);
- if (flags & XFS_ICHGTIME_CHG)
- inode_set_ctime_to_ts(inode, tv);
if (flags & XFS_ICHGTIME_ACCESS)
inode_set_atime_to_ts(inode, tv);
if (flags & XFS_ICHGTIME_CREATE)
diff --git a/fs/xfs/libxfs/xfs_trans_resv.c b/fs/xfs/libxfs/xfs_trans_resv.c
index 1a7f95bcf069..bab402340b5d 100644
--- a/fs/xfs/libxfs/xfs_trans_resv.c
+++ b/fs/xfs/libxfs/xfs_trans_resv.c
@@ -224,7 +224,7 @@ xfs_rtalloc_block_count(
xfs_rtxlen_t rtxlen;
rtxlen = xfs_extlen_to_rtxlen(mp, XFS_MAX_BMBT_EXTLEN);
- rtbmp_blocks = xfs_rtbitmap_blockcount(mp, rtxlen);
+ rtbmp_blocks = xfs_rtbitmap_blockcount_len(mp, rtxlen);
return (rtbmp_blocks + 1) * num_ops;
}
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index c299b16c9365..1faf04204c5d 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -12,6 +12,8 @@
#include "xfs_bit.h"
#include "xfs_mount.h"
#include "xfs_ag.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
/*
@@ -111,7 +113,7 @@ xfs_verify_ino(
/* Is this an internal inode number? */
inline bool
-xfs_internal_inum(
+xfs_is_sb_inum(
struct xfs_mount *mp,
xfs_ino_t ino)
{
@@ -129,24 +131,42 @@ xfs_verify_dir_ino(
struct xfs_mount *mp,
xfs_ino_t ino)
{
- if (xfs_internal_inum(mp, ino))
+ if (xfs_is_sb_inum(mp, ino))
return false;
return xfs_verify_ino(mp, ino);
}
/*
- * Verify that an realtime block number pointer doesn't point off the
- * end of the realtime device.
+ * Verify that a realtime block number pointer neither points outside the
+ * allocatable areas of the rtgroup nor off the end of the realtime
+ * device.
*/
inline bool
xfs_verify_rtbno(
struct xfs_mount *mp,
xfs_rtblock_t rtbno)
{
+ if (xfs_has_rtgroups(mp)) {
+ xfs_rgnumber_t rgno = xfs_rtb_to_rgno(mp, rtbno);
+ xfs_rtxnum_t rtx = xfs_rtb_to_rtx(mp, rtbno);
+
+ if (rgno >= mp->m_sb.sb_rgcount)
+ return false;
+ if (rtx >= xfs_rtgroup_extents(mp, rgno))
+ return false;
+ if (xfs_has_rtsb(mp) && rgno == 0 && rtx == 0)
+ return false;
+ return true;
+ }
+
return rtbno < mp->m_sb.sb_rblocks;
}
-/* Verify that a realtime device extent is fully contained inside the volume. */
+/*
+ * Verify that an allocated realtime device extent neither points outside
+ * allocatable areas of the rtgroup, across an rtgroup boundary, nor off the
+ * end of the realtime device.
+ */
bool
xfs_verify_rtbext(
struct xfs_mount *mp,
@@ -159,7 +179,14 @@ xfs_verify_rtbext(
if (!xfs_verify_rtbno(mp, rtbno))
return false;
- return xfs_verify_rtbno(mp, rtbno + len - 1);
+ if (!xfs_verify_rtbno(mp, rtbno + len - 1))
+ return false;
+
+ if (xfs_has_rtgroups(mp) &&
+ xfs_rtb_to_rgno(mp, rtbno) != xfs_rtb_to_rgno(mp, rtbno + len - 1))
+ return false;
+
+ return true;
}
/* Calculate the range of valid icount values. */
@@ -170,13 +197,12 @@ xfs_icount_range(
unsigned long long *max)
{
unsigned long long nr_inos = 0;
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
/* root, rtbitmap, rtsum all live in the first chunk */
*min = XFS_INODES_PER_CHUNK;
- for_each_perag(mp, agno, pag)
+ while ((pag = xfs_perag_next(mp, pag)))
nr_inos += pag->agino_max - pag->agino_min + 1;
*max = nr_inos;
}
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index a8cd44d03ef6..bf33c2b1e43e 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -9,10 +9,12 @@
typedef uint32_t prid_t; /* project ID */
typedef uint32_t xfs_agblock_t; /* blockno in alloc. group */
+typedef uint32_t xfs_rgblock_t; /* blockno in realtime group */
typedef uint32_t xfs_agino_t; /* inode # within allocation grp */
typedef uint32_t xfs_extlen_t; /* extent length in blocks */
typedef uint32_t xfs_rtxlen_t; /* file extent length in rtextents */
typedef uint32_t xfs_agnumber_t; /* allocation group number */
+typedef uint32_t xfs_rgnumber_t; /* realtime group number */
typedef uint64_t xfs_extnum_t; /* # of extents in a file */
typedef uint32_t xfs_aextnum_t; /* # extents in an attribute fork */
typedef int64_t xfs_fsize_t; /* bytes in a file */
@@ -53,7 +55,9 @@ typedef void * xfs_failaddr_t;
#define NULLFILEOFF ((xfs_fileoff_t)-1)
#define NULLAGBLOCK ((xfs_agblock_t)-1)
+#define NULLRGBLOCK ((xfs_rgblock_t)-1)
#define NULLAGNUMBER ((xfs_agnumber_t)-1)
+#define NULLRGNUMBER ((xfs_rgnumber_t)-1)
#define NULLCOMMITLSN ((xfs_lsn_t)-1)
@@ -212,6 +216,16 @@ enum xbtree_recpacking {
XBTREE_RECPACKING_FULL,
};
+enum xfs_group_type {
+ XG_TYPE_AG,
+ XG_TYPE_RTG,
+ XG_TYPE_MAX,
+} __packed;
+
+#define XG_TYPE_STRINGS \
+ { XG_TYPE_AG, "ag" }, \
+ { XG_TYPE_RTG, "rtg" }
+
/*
* Type verifier functions
*/
@@ -222,7 +236,7 @@ bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno,
xfs_fsblock_t len);
bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino);
-bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
+bool xfs_is_sb_inum(struct xfs_mount *mp, xfs_ino_t ino);
bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino);
bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
bool xfs_verify_rtbext(struct xfs_mount *mp, xfs_rtblock_t rtbno,
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index f8e5b67128d2..61f80a6410c7 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -144,11 +144,16 @@ xchk_superblock(
if (sb->sb_rootino != cpu_to_be64(mp->m_sb.sb_rootino))
xchk_block_set_preen(sc, bp);
- if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino))
- xchk_block_set_preen(sc, bp);
+ if (xfs_has_metadir(sc->mp)) {
+ if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino))
+ xchk_block_set_preen(sc, bp);
+ } else {
+ if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino))
+ xchk_block_set_preen(sc, bp);
- if (sb->sb_rsumino != cpu_to_be64(mp->m_sb.sb_rsumino))
- xchk_block_set_preen(sc, bp);
+ if (sb->sb_rsumino != cpu_to_be64(mp->m_sb.sb_rsumino))
+ xchk_block_set_preen(sc, bp);
+ }
if (sb->sb_rextsize != cpu_to_be32(mp->m_sb.sb_rextsize))
xchk_block_set_corrupt(sc, bp);
@@ -224,11 +229,13 @@ xchk_superblock(
* sb_icount, sb_ifree, sb_fdblocks, sb_frexents
*/
- if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino))
- xchk_block_set_preen(sc, bp);
+ if (!xfs_has_metadir(mp)) {
+ if (sb->sb_uquotino != cpu_to_be64(mp->m_sb.sb_uquotino))
+ xchk_block_set_preen(sc, bp);
- if (sb->sb_gquotino != cpu_to_be64(mp->m_sb.sb_gquotino))
- xchk_block_set_preen(sc, bp);
+ if (sb->sb_gquotino != cpu_to_be64(mp->m_sb.sb_gquotino))
+ xchk_block_set_preen(sc, bp);
+ }
/*
* Skip the quota flags since repair will force quotacheck.
@@ -274,8 +281,15 @@ xchk_superblock(
if (!!(sb->sb_features2 & cpu_to_be32(~v2_ok)))
xchk_block_set_corrupt(sc, bp);
- if (sb->sb_features2 != sb->sb_bad_features2)
- xchk_block_set_preen(sc, bp);
+ if (xfs_has_metadir(mp)) {
+ if (sb->sb_rgblklog != mp->m_sb.sb_rgblklog)
+ xchk_block_set_corrupt(sc, bp);
+ if (memchr_inv(sb->sb_pad, 0, sizeof(sb->sb_pad)))
+ xchk_block_set_preen(sc, bp);
+ } else {
+ if (sb->sb_features2 != sb->sb_bad_features2)
+ xchk_block_set_preen(sc, bp);
+ }
}
/* Check sb_features2 flags that are set at mkfs time. */
@@ -337,8 +351,10 @@ xchk_superblock(
if (sb->sb_spino_align != cpu_to_be32(mp->m_sb.sb_spino_align))
xchk_block_set_corrupt(sc, bp);
- if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino))
- xchk_block_set_preen(sc, bp);
+ if (!xfs_has_metadir(mp)) {
+ if (sb->sb_pquotino != cpu_to_be64(mp->m_sb.sb_pquotino))
+ xchk_block_set_preen(sc, bp);
+ }
/* Don't care about sb_lsn */
}
@@ -349,6 +365,14 @@ xchk_superblock(
xchk_block_set_corrupt(sc, bp);
}
+ if (xfs_has_metadir(mp)) {
+ if (sb->sb_rgcount != cpu_to_be32(mp->m_sb.sb_rgcount))
+ xchk_block_set_corrupt(sc, bp);
+
+ if (sb->sb_rgextents != cpu_to_be32(mp->m_sb.sb_rgextents))
+ xchk_block_set_corrupt(sc, bp);
+ }
+
/* Everything else must be zero. */
if (memchr_inv(sb + 1, 0,
BBTOB(bp->b_length) - sizeof(struct xfs_dsb)))
@@ -552,7 +576,7 @@ xchk_agf(
/* Check the AG length */
eoag = be32_to_cpu(agf->agf_length);
- if (eoag != pag->block_count)
+ if (eoag != pag_group(pag)->xg_block_count)
xchk_block_set_corrupt(sc, sc->sa.agf_bp);
/* Check the AGF btree roots and levels */
@@ -932,7 +956,7 @@ xchk_agi(
/* Check the AG length */
eoag = be32_to_cpu(agi->agi_length);
- if (eoag != pag->block_count)
+ if (eoag != pag_group(pag)->xg_block_count)
xchk_block_set_corrupt(sc, sc->sa.agi_bp);
/* Check btree roots and levels */
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 2f98d90d7fd6..0fad0baaba2f 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -208,8 +208,8 @@ xrep_agf_init_header(
memset(agf, 0, BBTOB(agf_bp->b_length));
agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
- agf->agf_seqno = cpu_to_be32(pag->pag_agno);
- agf->agf_length = cpu_to_be32(pag->block_count);
+ agf->agf_seqno = cpu_to_be32(pag_agno(pag));
+ agf->agf_length = cpu_to_be32(pag_group(pag)->xg_block_count);
agf->agf_flfirst = old_agf->agf_flfirst;
agf->agf_fllast = old_agf->agf_fllast;
agf->agf_flcount = old_agf->agf_flcount;
@@ -384,7 +384,7 @@ xrep_agf(
* was corrupt after xfs_alloc_read_agf failed with -EFSCORRUPTED.
*/
error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, sc->sa.pag->pag_agno,
+ XFS_AG_DADDR(mp, pag_agno(sc->sa.pag),
XFS_AGF_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &agf_bp, NULL);
if (error)
@@ -687,7 +687,7 @@ xrep_agfl_init_header(
agfl = XFS_BUF_TO_AGFL(agfl_bp);
memset(agfl, 0xFF, BBTOB(agfl_bp->b_length));
agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
- agfl->agfl_seqno = cpu_to_be32(sc->sa.pag->pag_agno);
+ agfl->agfl_seqno = cpu_to_be32(pag_agno(sc->sa.pag));
uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
/*
@@ -741,7 +741,7 @@ xrep_agfl(
* was corrupt after xfs_alloc_read_agfl failed with -EFSCORRUPTED.
*/
error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, sc->sa.pag->pag_agno,
+ XFS_AG_DADDR(mp, pag_agno(sc->sa.pag),
XFS_AGFL_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &agfl_bp, NULL);
if (error)
@@ -897,8 +897,8 @@ xrep_agi_init_header(
memset(agi, 0, BBTOB(agi_bp->b_length));
agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
- agi->agi_seqno = cpu_to_be32(pag->pag_agno);
- agi->agi_length = cpu_to_be32(pag->block_count);
+ agi->agi_seqno = cpu_to_be32(pag_agno(pag));
+ agi->agi_length = cpu_to_be32(pag_group(pag)->xg_block_count);
agi->agi_newino = cpu_to_be32(NULLAGINO);
agi->agi_dirino = cpu_to_be32(NULLAGINO);
if (xfs_has_crc(mp))
@@ -1038,12 +1038,10 @@ xrep_iunlink_reload_next(
{
struct xfs_scrub *sc = ragi->sc;
struct xfs_inode *ip;
- xfs_ino_t ino;
xfs_agino_t ret = NULLAGINO;
int error;
- ino = XFS_AGINO_TO_INO(sc->mp, sc->sa.pag->pag_agno, agino);
- error = xchk_iget(ragi->sc, ino, &ip);
+ error = xchk_iget(ragi->sc, xfs_agino_to_ino(sc->sa.pag, agino), &ip);
if (error)
return ret;
@@ -1114,9 +1112,9 @@ xrep_iunlink_igrab(
struct xfs_perag *pag,
struct xfs_inode *ip)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
- if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
+ if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag_agno(pag))
return false;
if (!xfs_inode_on_unlinked_list(ip))
@@ -1140,7 +1138,7 @@ xrep_iunlink_visit(
unsigned int bucket;
int error;
- ASSERT(XFS_INO_TO_AGNO(mp, ip->i_ino) == ragi->sc->sa.pag->pag_agno);
+ ASSERT(XFS_INO_TO_AGNO(mp, ip->i_ino) == pag_agno(ragi->sc->sa.pag));
ASSERT(xfs_inode_on_unlinked_list(ip));
agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
@@ -1171,7 +1169,7 @@ xrep_iunlink_mark_incore(
struct xrep_agi *ragi)
{
struct xfs_perag *pag = ragi->sc->sa.pag;
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
uint32_t first_index = 0;
bool done = false;
unsigned int nr_found = 0;
@@ -1211,7 +1209,7 @@ xrep_iunlink_mark_incore(
* us to see this inode, so another lookup from the
* same index will not find it again.
*/
- if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
+ if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag_agno(pag))
continue;
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
@@ -1278,9 +1276,7 @@ xrep_iunlink_mark_ondisk_rec(
* on because we haven't actually scrubbed the inobt or the
* inodes yet.
*/
- error = xchk_iget(ragi->sc,
- XFS_AGINO_TO_INO(mp, sc->sa.pag->pag_agno,
- agino),
+ error = xchk_iget(ragi->sc, xfs_agino_to_ino(sc->sa.pag, agino),
&ip);
if (error)
continue;
@@ -1539,15 +1535,13 @@ xrep_iunlink_relink_next(
ip = xfs_iunlink_lookup(pag, agino);
if (!ip) {
- xfs_ino_t ino;
xfs_agino_t prev_agino;
/*
* No inode exists in cache. Load it off the disk so that we
* can reinsert it into the incore unlinked list.
*/
- ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
- error = xchk_iget(sc, ino, &ip);
+ error = xchk_iget(sc, xfs_agino_to_ino(pag, agino), &ip);
if (error)
return -EFSCORRUPTED;
@@ -1601,15 +1595,13 @@ xrep_iunlink_relink_prev(
ip = xfs_iunlink_lookup(pag, agino);
if (!ip) {
- xfs_ino_t ino;
xfs_agino_t next_agino;
/*
* No inode exists in cache. Load it off the disk so that we
* can reinsert it into the incore unlinked list.
*/
- ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
- error = xchk_iget(sc, ino, &ip);
+ error = xchk_iget(sc, xfs_agino_to_ino(pag, agino), &ip);
if (error)
return -EFSCORRUPTED;
@@ -1769,7 +1761,7 @@ xrep_agi(
* was corrupt after xfs_ialloc_read_agi failed with -EFSCORRUPTED.
*/
error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp,
- XFS_AG_DADDR(mp, sc->sa.pag->pag_agno,
+ XFS_AG_DADDR(mp, pag_agno(sc->sa.pag),
XFS_AGI_DADDR(mp)),
XFS_FSS_TO_BB(mp, 1), 0, &ragi->agi_bp, NULL);
if (error)
diff --git a/fs/xfs/scrub/alloc.c b/fs/xfs/scrub/alloc.c
index d1b8a4997dd2..8b282138097f 100644
--- a/fs/xfs/scrub/alloc.c
+++ b/fs/xfs/scrub/alloc.c
@@ -139,7 +139,7 @@ xchk_allocbt_rec(
struct xchk_alloc *ca = bs->private;
xfs_alloc_btrec_to_irec(rec, &irec);
- if (xfs_alloc_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) {
+ if (xfs_alloc_check_irec(to_perag(bs->cur->bc_group), &irec) != NULL) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return 0;
}
diff --git a/fs/xfs/scrub/alloc_repair.c b/fs/xfs/scrub/alloc_repair.c
index 30295898cc8a..0433363a90b6 100644
--- a/fs/xfs/scrub/alloc_repair.c
+++ b/fs/xfs/scrub/alloc_repair.c
@@ -132,17 +132,16 @@ int
xrep_setup_ag_allocbt(
struct xfs_scrub *sc)
{
+ struct xfs_group *xg = pag_group(sc->sa.pag);
unsigned int busy_gen;
/*
* Make sure the busy extent list is clear because we can't put extents
* on there twice.
*/
- busy_gen = READ_ONCE(sc->sa.pag->pagb_gen);
- if (xfs_extent_busy_list_empty(sc->sa.pag))
+ if (xfs_extent_busy_list_empty(xg, &busy_gen))
return 0;
-
- return xfs_extent_busy_flush(sc->tp, sc->sa.pag, busy_gen, 0);
+ return xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0);
}
/* Check for any obvious conflicts in the free extent. */
@@ -210,7 +209,7 @@ xrep_abt_stash(
if (error)
return error;
- trace_xrep_abt_found(sc->mp, sc->sa.pag->pag_agno, &arec);
+ trace_xrep_abt_found(sc->sa.pag, &arec);
error = xfarray_append(ra->free_records, &arec);
if (error)
@@ -484,8 +483,8 @@ xrep_abt_reserve_space(
ASSERT(arec.ar_blockcount <= UINT_MAX);
len = min_t(unsigned int, arec.ar_blockcount, desired);
- trace_xrep_newbt_alloc_ag_blocks(sc->mp, sc->sa.pag->pag_agno,
- arec.ar_startblock, len, XFS_RMAP_OWN_AG);
+ trace_xrep_newbt_alloc_ag_blocks(sc->sa.pag, arec.ar_startblock,
+ len, XFS_RMAP_OWN_AG);
error = xrep_newbt_add_extent(&ra->new_bnobt, sc->sa.pag,
arec.ar_startblock, len);
@@ -543,7 +542,7 @@ xrep_abt_dispose_one(
/* Add a deferred rmap for each extent we used. */
if (resv->used > 0)
- xfs_rmap_alloc_extent(sc->tp, pag->pag_agno, resv->agbno,
+ xfs_rmap_alloc_extent(sc->tp, pag_agno(pag), resv->agbno,
resv->used, XFS_RMAP_OWN_AG);
/*
@@ -554,8 +553,8 @@ xrep_abt_dispose_one(
if (free_aglen == 0)
return 0;
- trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
- free_aglen, ra->new_bnobt.oinfo.oi_owner);
+ trace_xrep_newbt_free_blocks(resv->pag, free_agbno, free_aglen,
+ ra->new_bnobt.oinfo.oi_owner);
error = __xfs_free_extent(sc->tp, resv->pag, free_agbno, free_aglen,
&ra->new_bnobt.oinfo, XFS_AG_RESV_IGNORE, true);
@@ -849,6 +848,7 @@ xrep_allocbt(
{
struct xrep_abt *ra;
struct xfs_mount *mp = sc->mp;
+ unsigned int busy_gen;
char *descr;
int error;
@@ -869,7 +869,7 @@ xrep_allocbt(
* on there twice. In theory we cleared this before we started, but
* let's not risk the filesystem.
*/
- if (!xfs_extent_busy_list_empty(sc->sa.pag)) {
+ if (!xfs_extent_busy_list_empty(pag_group(sc->sa.pag), &busy_gen)) {
error = -EDEADLOCK;
goto out_ra;
}
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index 5ab2ac53c920..7e00312225ed 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -19,6 +19,7 @@
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
+#include "xfs_rtgroup.h"
#include "xfs_health.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -314,8 +315,20 @@ xchk_bmap_rt_iextent_xref(
struct xchk_bmap_info *info,
struct xfs_bmbt_irec *irec)
{
+ int error;
+
+ error = xchk_rtgroup_init_existing(info->sc,
+ xfs_rtb_to_rgno(ip->i_mount, irec->br_startblock),
+ &info->sc->sr);
+ if (!xchk_fblock_process_error(info->sc, info->whichfork,
+ irec->br_startoff, &error))
+ return;
+
+ xchk_rtgroup_lock(&info->sc->sr, XCHK_RTGLOCK_ALL);
xchk_xref_is_used_rt_space(info->sc, irec->br_startblock,
irec->br_blockcount);
+
+ xchk_rtgroup_free(info->sc, &info->sc->sr);
}
/* Cross-reference a single datadev extent record. */
@@ -600,8 +613,8 @@ xchk_bmap_check_rmap(
if (irec.br_startoff != check_rec.rm_offset)
xchk_fblock_set_corrupt(sc, sbcri->whichfork,
check_rec.rm_offset);
- if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp,
- cur->bc_ag.pag->pag_agno,
+ if (irec.br_startblock !=
+ xfs_agbno_to_fsb(to_perag(cur->bc_group),
check_rec.rm_startblock))
xchk_fblock_set_corrupt(sc, sbcri->whichfork,
check_rec.rm_offset);
@@ -761,11 +774,10 @@ xchk_bmap_check_rmaps(
struct xfs_scrub *sc,
int whichfork)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
int error;
- for_each_perag(sc->mp, agno, pag) {
+ while ((pag = xfs_perag_next(sc->mp, pag))) {
error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag);
if (error ||
(sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) {
@@ -822,9 +834,12 @@ xchk_bmap_iext_mapping(
/* Are these two mappings contiguous with each other? */
static inline bool
xchk_are_bmaps_contiguous(
+ const struct xchk_bmap_info *info,
const struct xfs_bmbt_irec *b1,
const struct xfs_bmbt_irec *b2)
{
+ struct xfs_mount *mp = info->sc->mp;
+
/* Don't try to combine unallocated mappings. */
if (!xfs_bmap_is_real_extent(b1))
return false;
@@ -838,6 +853,17 @@ xchk_are_bmaps_contiguous(
return false;
if (b1->br_state != b2->br_state)
return false;
+
+ /*
+ * Don't combine bmaps that would cross rtgroup boundaries. This is a
+ * valid state, but if combined they will fail rtb extent checks.
+ */
+ if (info->is_rt && xfs_has_rtgroups(mp)) {
+ if (xfs_rtb_to_rgno(mp, b1->br_startblock) !=
+ xfs_rtb_to_rgno(mp, b2->br_startblock))
+ return false;
+ }
+
return true;
}
@@ -875,7 +901,7 @@ xchk_bmap_iext_iter(
* that we just read, if possible.
*/
while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) {
- if (!xchk_are_bmaps_contiguous(irec, &got))
+ if (!xchk_are_bmaps_contiguous(info, irec, &got))
break;
if (!xchk_bmap_iext_mapping(info, &got)) {
diff --git a/fs/xfs/scrub/bmap_repair.c b/fs/xfs/scrub/bmap_repair.c
index 4505f4829d53..7c4955482641 100644
--- a/fs/xfs/scrub/bmap_repair.c
+++ b/fs/xfs/scrub/bmap_repair.c
@@ -196,7 +196,7 @@ xrep_bmap_check_fork_rmap(
return -EFSCORRUPTED;
/* Check that this is within the AG. */
- if (!xfs_verify_agbext(cur->bc_ag.pag, rec->rm_startblock,
+ if (!xfs_verify_agbext(to_perag(cur->bc_group), rec->rm_startblock,
rec->rm_blockcount))
return -EFSCORRUPTED;
@@ -237,7 +237,6 @@ xrep_bmap_walk_rmap(
void *priv)
{
struct xrep_bmap *rb = priv;
- struct xfs_mount *mp = cur->bc_mp;
xfs_fsblock_t fsbno;
int error = 0;
@@ -269,8 +268,7 @@ xrep_bmap_walk_rmap(
if ((rec->rm_flags & XFS_RMAP_UNWRITTEN) && !rb->allow_unwritten)
return -EFSCORRUPTED;
- fsbno = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno,
- rec->rm_startblock);
+ fsbno = xfs_agbno_to_fsb(to_perag(cur->bc_group), rec->rm_startblock);
if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
rb->old_bmbt_block_count += rec->rm_blockcount;
@@ -409,12 +407,11 @@ xrep_bmap_find_mappings(
struct xrep_bmap *rb)
{
struct xfs_scrub *sc = rb->sc;
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
int error = 0;
/* Iterate the rmaps for extents. */
- for_each_perag(sc->mp, agno, pag) {
+ while ((pag = xfs_perag_next(sc->mp, pag))) {
error = xrep_bmap_scan_ag(rb, pag);
if (error) {
xfs_perag_rele(pag);
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 22f5f1a9d3f0..5cbd94b56582 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -34,11 +34,13 @@
#include "xfs_quota.h"
#include "xfs_exchmaps.h"
#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/health.h"
+#include "scrub/tempfile.h"
/* Common code for the metadata scrubbers. */
@@ -121,6 +123,17 @@ xchk_process_error(
}
bool
+xchk_process_rt_error(
+ struct xfs_scrub *sc,
+ xfs_rgnumber_t rgno,
+ xfs_rgblock_t rgbno,
+ int *error)
+{
+ return __xchk_process_error(sc, rgno, rgbno, error,
+ XFS_SCRUB_OFLAG_CORRUPT, __return_address);
+}
+
+bool
xchk_xref_process_error(
struct xfs_scrub *sc,
xfs_agnumber_t agno,
@@ -513,7 +526,7 @@ xchk_perag_drain_and_lock(
* Obviously, this should be slanted against scrub and in favor
* of runtime threads.
*/
- if (!xfs_perag_intent_busy(sa->pag))
+ if (!xfs_group_intent_busy(pag_group(sa->pag)))
return 0;
if (sa->agf_bp) {
@@ -528,7 +541,7 @@ xchk_perag_drain_and_lock(
if (!(sc->flags & XCHK_FSGATES_DRAIN))
return -ECHRNG;
- error = xfs_perag_intent_drain(sa->pag);
+ error = xfs_group_intent_drain(pag_group(sa->pag));
if (error == -ERESTARTSYS)
error = -EINTR;
} while (!error);
@@ -683,6 +696,72 @@ xchk_ag_init(
return 0;
}
+#ifdef CONFIG_XFS_RT
+/*
+ * For scrubbing a realtime group, grab all the in-core resources we'll need to
+ * check the metadata, which means taking the ILOCK of the realtime group's
+ * metadata inodes. Callers must not join these inodes to the transaction with
+ * non-zero lockflags or concurrency problems will result. The @rtglock_flags
+ * argument takes XFS_RTGLOCK_* flags.
+ */
+int
+xchk_rtgroup_init(
+ struct xfs_scrub *sc,
+ xfs_rgnumber_t rgno,
+ struct xchk_rt *sr)
+{
+ ASSERT(sr->rtg == NULL);
+ ASSERT(sr->rtlock_flags == 0);
+
+ sr->rtg = xfs_rtgroup_get(sc->mp, rgno);
+ if (!sr->rtg)
+ return -ENOENT;
+ return 0;
+}
+
+void
+xchk_rtgroup_lock(
+ struct xchk_rt *sr,
+ unsigned int rtglock_flags)
+{
+ xfs_rtgroup_lock(sr->rtg, rtglock_flags);
+ sr->rtlock_flags = rtglock_flags;
+}
+
+/*
+ * Unlock the realtime group. This must be done /after/ committing (or
+ * cancelling) the scrub transaction.
+ */
+static void
+xchk_rtgroup_unlock(
+ struct xchk_rt *sr)
+{
+ ASSERT(sr->rtg != NULL);
+
+ if (sr->rtlock_flags) {
+ xfs_rtgroup_unlock(sr->rtg, sr->rtlock_flags);
+ sr->rtlock_flags = 0;
+ }
+}
+
+/*
+ * Unlock the realtime group and release its resources. This must be done
+ * /after/ committing (or cancelling) the scrub transaction.
+ */
+void
+xchk_rtgroup_free(
+ struct xfs_scrub *sc,
+ struct xchk_rt *sr)
+{
+ ASSERT(sr->rtg != NULL);
+
+ xchk_rtgroup_unlock(sr);
+
+ xfs_rtgroup_put(sr->rtg);
+ sr->rtg = NULL;
+}
+#endif /* CONFIG_XFS_RT */
+
/* Per-scrubber setup functions */
void
@@ -947,9 +1026,15 @@ xchk_iget_for_scrubbing(
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino)
return xchk_install_live_inode(sc, ip_in);
- /* Reject internal metadata files and obviously bad inode numbers. */
- if (xfs_internal_inum(mp, sc->sm->sm_ino))
+ /*
+ * On pre-metadir filesystems, reject internal metadata files. For
+ * metadir filesystems, limited scrubbing of any file in the metadata
+ * directory tree by handle is allowed, because that is the only way to
+ * validate the lack of parent pointers in the sb-root metadata inodes.
+ */
+ if (!xfs_has_metadir(mp) && xfs_is_sb_inum(mp, sc->sm->sm_ino))
return -ENOENT;
+ /* Reject obviously bad inode numbers. */
if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
return -ENOENT;
@@ -1084,6 +1169,10 @@ xchk_setup_inode_contents(
if (error)
return error;
+ error = xrep_tempfile_adjust_directory_tree(sc);
+ if (error)
+ return error;
+
/* Lock the inode so the VFS cannot touch this file. */
xchk_ilock(sc, XFS_IOLOCK_EXCL);
@@ -1239,12 +1328,6 @@ xchk_metadata_inode_forks(
return 0;
}
- /* They also should never have extended attributes. */
- if (xfs_inode_hasattr(sc->ip)) {
- xchk_ino_set_corrupt(sc, sc->ip->i_ino);
- return 0;
- }
-
/* Invoke the data fork scrubber. */
error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
@@ -1261,6 +1344,21 @@ xchk_metadata_inode_forks(
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
}
+ /*
+ * Metadata files can only have extended attributes on metadir
+ * filesystems, either for parent pointers or for actual xattr data.
+ */
+ if (xfs_inode_hasattr(sc->ip)) {
+ if (!xfs_has_metadir(sc->mp)) {
+ xchk_ino_set_corrupt(sc, sc->ip->i_ino);
+ return 0;
+ }
+
+ error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
+ if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ return error;
+ }
+
return 0;
}
@@ -1336,7 +1434,7 @@ xchk_inode_is_allocated(
}
/* reject inode numbers outside existing AGs */
- ino = XFS_AGINO_TO_INO(sc->mp, pag->pag_agno, agino);
+ ino = xfs_agino_to_ino(pag, agino);
if (!xfs_verify_ino(mp, ino))
return -EINVAL;
@@ -1446,3 +1544,32 @@ out_rcu:
rcu_read_unlock();
return error;
}
+
+/* Is this inode a root directory for either tree? */
+bool
+xchk_inode_is_dirtree_root(const struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ return ip == mp->m_rootip ||
+ (xfs_has_metadir(mp) && ip == mp->m_metadirip);
+}
+
+/* Does the superblock point down to this inode? */
+bool
+xchk_inode_is_sb_rooted(const struct xfs_inode *ip)
+{
+ return xchk_inode_is_dirtree_root(ip) ||
+ xfs_is_sb_inum(ip->i_mount, ip->i_ino);
+}
+
+/* What is the root directory inumber for this inode? */
+xfs_ino_t
+xchk_inode_rootdir_inum(const struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ if (xfs_is_metadir_inode(ip))
+ return mp->m_metadirip->i_ino;
+ return mp->m_rootip->i_ino;
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 47148cc4a833..9ff3cafd8679 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -12,6 +12,8 @@ void xchk_trans_cancel(struct xfs_scrub *sc);
bool xchk_process_error(struct xfs_scrub *sc, xfs_agnumber_t agno,
xfs_agblock_t bno, int *error);
+bool xchk_process_rt_error(struct xfs_scrub *sc, xfs_rgnumber_t rgno,
+ xfs_rgblock_t rgbno, int *error);
bool xchk_fblock_process_error(struct xfs_scrub *sc, int whichfork,
xfs_fileoff_t offset, int *error);
@@ -73,12 +75,15 @@ int xchk_setup_xattr(struct xfs_scrub *sc);
int xchk_setup_symlink(struct xfs_scrub *sc);
int xchk_setup_parent(struct xfs_scrub *sc);
int xchk_setup_dirtree(struct xfs_scrub *sc);
+int xchk_setup_metapath(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xchk_setup_rtbitmap(struct xfs_scrub *sc);
int xchk_setup_rtsummary(struct xfs_scrub *sc);
+int xchk_setup_rgsuperblock(struct xfs_scrub *sc);
#else
# define xchk_setup_rtbitmap xchk_setup_nothing
# define xchk_setup_rtsummary xchk_setup_nothing
+# define xchk_setup_rgsuperblock xchk_setup_nothing
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_ino_dqattach(struct xfs_scrub *sc);
@@ -117,6 +122,34 @@ xchk_ag_init_existing(
return error == -ENOENT ? -EFSCORRUPTED : error;
}
+#ifdef CONFIG_XFS_RT
+
+/* All the locks we need to check an rtgroup. */
+#define XCHK_RTGLOCK_ALL (XFS_RTGLOCK_BITMAP)
+
+int xchk_rtgroup_init(struct xfs_scrub *sc, xfs_rgnumber_t rgno,
+ struct xchk_rt *sr);
+
+static inline int
+xchk_rtgroup_init_existing(
+ struct xfs_scrub *sc,
+ xfs_rgnumber_t rgno,
+ struct xchk_rt *sr)
+{
+ int error = xchk_rtgroup_init(sc, rgno, sr);
+
+ return error == -ENOENT ? -EFSCORRUPTED : error;
+}
+
+void xchk_rtgroup_lock(struct xchk_rt *sr, unsigned int rtglock_flags);
+void xchk_rtgroup_free(struct xfs_scrub *sc, struct xchk_rt *sr);
+#else
+# define xchk_rtgroup_init(sc, rgno, sr) (-EFSCORRUPTED)
+# define xchk_rtgroup_init_existing(sc, rgno, sr) (-EFSCORRUPTED)
+# define xchk_rtgroup_lock(sc, lockflags) do { } while (0)
+# define xchk_rtgroup_free(sc, sr) do { } while (0)
+#endif /* CONFIG_XFS_RT */
+
int xchk_ag_read_headers(struct xfs_scrub *sc, xfs_agnumber_t agno,
struct xchk_ag *sa);
void xchk_ag_btcur_free(struct xchk_ag *sa);
@@ -216,7 +249,8 @@ int xchk_metadata_inode_forks(struct xfs_scrub *sc);
#define xchk_xfile_ag_descr(sc, fmt, ...) \
kasprintf(XCHK_GFP_FLAGS, "XFS (%s): AG 0x%x " fmt, \
(sc)->mp->m_super->s_id, \
- (sc)->sa.pag ? (sc)->sa.pag->pag_agno : (sc)->sm->sm_agno, \
+ (sc)->sa.pag ? \
+ pag_agno((sc)->sa.pag) : (sc)->sm->sm_agno, \
##__VA_ARGS__)
#define xchk_xfile_ino_descr(sc, fmt, ...) \
kasprintf(XCHK_GFP_FLAGS, "XFS (%s): inode 0x%llx " fmt, \
@@ -241,4 +275,8 @@ void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks);
int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino,
bool *inuse);
+bool xchk_inode_is_dirtree_root(const struct xfs_inode *ip);
+bool xchk_inode_is_sb_rooted(const struct xfs_inode *ip);
+xfs_ino_t xchk_inode_rootdir_inum(const struct xfs_inode *ip);
+
#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/cow_repair.c b/fs/xfs/scrub/cow_repair.c
index 4de3f0f40f48..5b6194cef3e5 100644
--- a/fs/xfs/scrub/cow_repair.c
+++ b/fs/xfs/scrub/cow_repair.c
@@ -137,7 +137,6 @@ xrep_cow_mark_shared_staging(
{
struct xrep_cow *xc = priv;
struct xfs_refcount_irec rrec;
- xfs_fsblock_t fsbno;
if (!xfs_refcount_check_domain(rec) ||
rec->rc_domain != XFS_REFC_DOMAIN_SHARED)
@@ -145,9 +144,10 @@ xrep_cow_mark_shared_staging(
xrep_cow_trim_refcount(xc, &rrec, rec);
- fsbno = XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno,
- rrec.rc_startblock);
- return xrep_cow_mark_file_range(xc, fsbno, rrec.rc_blockcount);
+ return xrep_cow_mark_file_range(xc,
+ xfs_agbno_to_fsb(to_perag(cur->bc_group),
+ rrec.rc_startblock),
+ rrec.rc_blockcount);
}
/*
@@ -177,9 +177,9 @@ xrep_cow_mark_missing_staging(
if (xc->next_bno >= rrec.rc_startblock)
goto next;
+
error = xrep_cow_mark_file_range(xc,
- XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno,
- xc->next_bno),
+ xfs_agbno_to_fsb(to_perag(cur->bc_group), xc->next_bno),
rrec.rc_startblock - xc->next_bno);
if (error)
return error;
@@ -200,7 +200,6 @@ xrep_cow_mark_missing_staging_rmap(
void *priv)
{
struct xrep_cow *xc = priv;
- xfs_fsblock_t fsbno;
xfs_agblock_t rec_bno;
xfs_extlen_t rec_len;
unsigned int adj;
@@ -222,8 +221,9 @@ xrep_cow_mark_missing_staging_rmap(
rec_len -= adj;
}
- fsbno = XFS_AGB_TO_FSB(xc->sc->mp, cur->bc_ag.pag->pag_agno, rec_bno);
- return xrep_cow_mark_file_range(xc, fsbno, rec_len);
+ return xrep_cow_mark_file_range(xc,
+ xfs_agbno_to_fsb(to_perag(cur->bc_group), rec_bno),
+ rec_len);
}
/*
@@ -275,8 +275,7 @@ xrep_cow_find_bad(
if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) {
error = xrep_cow_mark_file_range(xc,
- XFS_AGB_TO_FSB(sc->mp, pag->pag_agno,
- xc->next_bno),
+ xfs_agbno_to_fsb(pag, xc->next_bno),
xc->irec_startbno + xc->irec.br_blockcount -
xc->next_bno);
if (error)
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index bf9199e8df63..c877bde71e62 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -100,6 +100,14 @@ xchk_dir_check_ftype(
if (xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
+
+ /*
+ * Metadata and regular inodes cannot cross trees. This property
+ * cannot change without a full inode free and realloc cycle, so it's
+ * safe to check this without holding locks.
+ */
+ if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(sc->ip))
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
}
/*
@@ -253,7 +261,7 @@ xchk_dir_actor(
* If this is ".." in the root inode, check that the inum
* matches this dir.
*/
- if (dp->i_ino == mp->m_sb.sb_rootino && ino != dp->i_ino)
+ if (xchk_inode_is_dirtree_root(dp) && ino != dp->i_ino)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
}
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index 64679fe08446..249313882108 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -415,6 +415,12 @@ xrep_dir_salvage_entry(
if (error)
return 0;
+ /* Don't mix metadata and regular directory trees. */
+ if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(rd->sc->ip)) {
+ xchk_irele(sc, ip);
+ return 0;
+ }
+
xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
xchk_irele(sc, ip);
@@ -1270,7 +1276,7 @@ xrep_dir_scan_dirtree(
int error;
/* Roots of directory trees are their own parents. */
- if (sc->ip == sc->mp->m_rootip)
+ if (xchk_inode_is_dirtree_root(sc->ip))
xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino);
/*
@@ -1632,6 +1638,7 @@ xrep_dir_swap(
struct xrep_dir *rd)
{
struct xfs_scrub *sc = rd->sc;
+ xfs_ino_t ino;
bool ip_local, temp_local;
int error = 0;
@@ -1649,14 +1656,17 @@ xrep_dir_swap(
/*
* Reset the temporary directory's '..' entry to point to the parent
- * that we found. The temporary directory was created with the root
- * directory as the parent, so we can skip this if repairing a
- * subdirectory of the root.
+ * that we found. The dirent replace code asserts if the dirent
+ * already points at the new inumber, so we look it up here.
*
* It's also possible that this replacement could also expand a sf
* tempdir into block format.
*/
- if (rd->pscan.parent_ino != sc->mp->m_rootip->i_ino) {
+ error = xchk_dir_lookup(sc, rd->sc->tempip, &xfs_name_dotdot, &ino);
+ if (error)
+ return error;
+
+ if (rd->pscan.parent_ino != ino) {
error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot,
rd->pscan.parent_ino, rd->tx.req.resblks);
if (error)
diff --git a/fs/xfs/scrub/dirtree.c b/fs/xfs/scrub/dirtree.c
index bde58fb561ea..3a9cdf8738b6 100644
--- a/fs/xfs/scrub/dirtree.c
+++ b/fs/xfs/scrub/dirtree.c
@@ -362,7 +362,8 @@ xchk_dirpath_set_outcome(
STATIC int
xchk_dirpath_step_up(
struct xchk_dirtree *dl,
- struct xchk_dirpath *path)
+ struct xchk_dirpath *path,
+ bool is_metadir)
{
struct xfs_scrub *sc = dl->sc;
struct xfs_inode *dp;
@@ -435,6 +436,14 @@ xchk_dirpath_step_up(
goto out_scanlock;
}
+ /* Parent must be in the same directory tree. */
+ if (is_metadir != xfs_is_metadir_inode(dp)) {
+ trace_xchk_dirpath_crosses_tree(dl->sc, dp, path->path_nr,
+ path->nr_steps, &dl->xname, &dl->pptr_rec);
+ error = -EFSCORRUPTED;
+ goto out_scanlock;
+ }
+
/*
* If the extended attributes look as though they has been zapped by
* the inode record repair code, we cannot scan for parent pointers.
@@ -508,6 +517,7 @@ xchk_dirpath_walk_upwards(
struct xchk_dirpath *path)
{
struct xfs_scrub *sc = dl->sc;
+ bool is_metadir;
int error;
ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
@@ -538,6 +548,7 @@ xchk_dirpath_walk_upwards(
* ILOCK state is no longer tracked in the scrub context. Hence we
* must drop @sc->ip's ILOCK during the walk.
*/
+ is_metadir = xfs_is_metadir_inode(sc->ip);
mutex_unlock(&dl->lock);
xchk_iunlock(sc, XFS_ILOCK_EXCL);
@@ -547,7 +558,7 @@ xchk_dirpath_walk_upwards(
* If we see any kind of error here (including corruptions), the parent
* pointer of @sc->ip is corrupt. Stop the whole scan.
*/
- error = xchk_dirpath_step_up(dl, path);
+ error = xchk_dirpath_step_up(dl, path, is_metadir);
if (error) {
xchk_ilock(sc, XFS_ILOCK_EXCL);
mutex_lock(&dl->lock);
@@ -560,7 +571,7 @@ xchk_dirpath_walk_upwards(
* *somewhere* in the path, but we don't need to stop scanning.
*/
while (!error && path->outcome == XCHK_DIRPATH_SCANNING)
- error = xchk_dirpath_step_up(dl, path);
+ error = xchk_dirpath_step_up(dl, path, is_metadir);
/* Retake the locks we had, mark paths, etc. */
xchk_ilock(sc, XFS_ILOCK_EXCL);
@@ -917,7 +928,7 @@ xchk_dirtree(
* scan, because the hook doesn't detach until after sc->ip gets
* released during teardown.
*/
- dl->root_ino = sc->mp->m_rootip->i_ino;
+ dl->root_ino = xchk_inode_rootdir_inum(sc->ip);
dl->scan_ino = sc->ip->i_ino;
trace_xchk_dirtree_start(sc->ip, sc->sm, 0);
@@ -983,3 +994,16 @@ out:
trace_xchk_dirtree_done(sc->ip, sc->sm, error);
return error;
}
+
+/* Does the directory targetted by this scrub have no parents? */
+bool
+xchk_dirtree_parentless(const struct xchk_dirtree *dl)
+{
+ struct xfs_scrub *sc = dl->sc;
+
+ if (xchk_inode_is_dirtree_root(sc->ip))
+ return true;
+ if (VFS_I(sc->ip)->i_nlink == 0)
+ return true;
+ return false;
+}
diff --git a/fs/xfs/scrub/dirtree.h b/fs/xfs/scrub/dirtree.h
index 1e1686365c61..9e5d95492717 100644
--- a/fs/xfs/scrub/dirtree.h
+++ b/fs/xfs/scrub/dirtree.h
@@ -156,17 +156,7 @@ struct xchk_dirtree {
#define xchk_dirtree_for_each_path(dl, path) \
list_for_each_entry((path), &(dl)->path_list, list)
-static inline bool
-xchk_dirtree_parentless(const struct xchk_dirtree *dl)
-{
- struct xfs_scrub *sc = dl->sc;
-
- if (sc->ip == sc->mp->m_rootip)
- return true;
- if (VFS_I(sc->ip)->i_nlink == 0)
- return true;
- return false;
-}
+bool xchk_dirtree_parentless(const struct xchk_dirtree *dl);
int xchk_dirtree_find_paths_to_root(struct xchk_dirtree *dl);
int xchk_dirpath_append(struct xchk_dirtree *dl, struct xfs_inode *ip,
diff --git a/fs/xfs/scrub/findparent.c b/fs/xfs/scrub/findparent.c
index 01766041ba2c..84487072b6dd 100644
--- a/fs/xfs/scrub/findparent.c
+++ b/fs/xfs/scrub/findparent.c
@@ -172,6 +172,10 @@ xrep_findparent_walk_directory(
*/
lock_mode = xfs_ilock_data_map_shared(dp);
+ /* Don't mix metadata and regular directory trees. */
+ if (xfs_is_metadir_inode(dp) != xfs_is_metadir_inode(sc->ip))
+ goto out_unlock;
+
/*
* If this directory is known to be sick, we cannot scan it reliably
* and must abort.
@@ -362,15 +366,24 @@ xrep_findparent_confirm(
};
int error;
- /*
- * The root directory always points to itself. Unlinked dirs can point
- * anywhere, so we point them at the root dir too.
- */
- if (sc->ip == sc->mp->m_rootip || VFS_I(sc->ip)->i_nlink == 0) {
+ /* The root directory always points to itself. */
+ if (sc->ip == sc->mp->m_rootip) {
*parent_ino = sc->mp->m_sb.sb_rootino;
return 0;
}
+ /* The metadata root directory always points to itself. */
+ if (sc->ip == sc->mp->m_metadirip) {
+ *parent_ino = sc->mp->m_sb.sb_metadirino;
+ return 0;
+ }
+
+ /* Unlinked dirs can point anywhere; point them up to the root dir. */
+ if (VFS_I(sc->ip)->i_nlink == 0) {
+ *parent_ino = xchk_inode_rootdir_inum(sc->ip);
+ return 0;
+ }
+
/* Reject garbage parent inode numbers and self-referential parents. */
if (*parent_ino == NULLFSINO)
return 0;
@@ -412,8 +425,11 @@ xrep_findparent_self_reference(
if (sc->ip->i_ino == sc->mp->m_sb.sb_rootino)
return sc->mp->m_sb.sb_rootino;
+ if (sc->ip->i_ino == sc->mp->m_sb.sb_metadirino)
+ return sc->mp->m_sb.sb_metadirino;
+
if (VFS_I(sc->ip)->i_nlink == 0)
- return sc->mp->m_sb.sb_rootino;
+ return xchk_inode_rootdir_inum(sc->ip);
return NULLFSINO;
}
diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c
index 1d3e98346933..4a50f8e00040 100644
--- a/fs/xfs/scrub/fscounters.c
+++ b/fs/xfs/scrub/fscounters.c
@@ -19,6 +19,7 @@
#include "xfs_rtbitmap.h"
#include "xfs_inode.h"
#include "xfs_icache.h"
+#include "xfs_rtgroup.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -74,10 +75,9 @@ xchk_fscount_warmup(
struct xfs_buf *agi_bp = NULL;
struct xfs_buf *agf_bp = NULL;
struct xfs_perag *pag = NULL;
- xfs_agnumber_t agno;
int error = 0;
- for_each_perag(mp, agno, pag) {
+ while ((pag = xfs_perag_next(mp, pag))) {
if (xchk_should_terminate(sc, &error))
break;
if (xfs_perag_initialised_agi(pag) &&
@@ -295,9 +295,8 @@ xchk_fscount_aggregate_agcounts(
struct xchk_fscounters *fsc)
{
struct xfs_mount *mp = sc->mp;
- struct xfs_perag *pag;
+ struct xfs_perag *pag = NULL;
uint64_t delayed;
- xfs_agnumber_t agno;
int tries = 8;
int error = 0;
@@ -306,7 +305,7 @@ retry:
fsc->ifree = 0;
fsc->fdblocks = 0;
- for_each_perag(mp, agno, pag) {
+ while ((pag = xfs_perag_next(mp, pag))) {
if (xchk_should_terminate(sc, &error))
break;
@@ -327,7 +326,7 @@ retry:
if (xfs_has_lazysbcount(sc->mp)) {
fsc->fdblocks += pag->pagf_btreeblks;
} else {
- error = xchk_fscount_btreeblks(sc, fsc, agno);
+ error = xchk_fscount_btreeblks(sc, fsc, pag_agno(pag));
if (error)
break;
}
@@ -388,7 +387,7 @@ retry:
#ifdef CONFIG_XFS_RT
STATIC int
xchk_fscount_add_frextent(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
const struct xfs_rtalloc_rec *rec,
void *priv)
@@ -409,6 +408,7 @@ xchk_fscount_count_frextents(
struct xchk_fscounters *fsc)
{
struct xfs_mount *mp = sc->mp;
+ struct xfs_rtgroup *rtg = NULL;
int error;
fsc->frextents = 0;
@@ -416,19 +416,20 @@ xchk_fscount_count_frextents(
if (!xfs_has_realtime(mp))
return 0;
- xfs_rtbitmap_lock_shared(sc->mp, XFS_RBMLOCK_BITMAP);
- error = xfs_rtalloc_query_all(sc->mp, sc->tp,
- xchk_fscount_add_frextent, fsc);
- if (error) {
- xchk_set_incomplete(sc);
- goto out_unlock;
+ while ((rtg = xfs_rtgroup_next(mp, rtg))) {
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ error = xfs_rtalloc_query_all(rtg, sc->tp,
+ xchk_fscount_add_frextent, fsc);
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ if (error) {
+ xchk_set_incomplete(sc);
+ xfs_rtgroup_rele(rtg);
+ return error;
+ }
}
fsc->frextents_delayed = percpu_counter_sum(&mp->m_delalloc_rtextents);
-
-out_unlock:
- xfs_rtbitmap_unlock_shared(sc->mp, XFS_RBMLOCK_BITMAP);
- return error;
+ return 0;
}
#else
STATIC int
diff --git a/fs/xfs/scrub/fscounters_repair.c b/fs/xfs/scrub/fscounters_repair.c
index 469bf645dbea..cda13447a373 100644
--- a/fs/xfs/scrub/fscounters_repair.c
+++ b/fs/xfs/scrub/fscounters_repair.c
@@ -68,15 +68,16 @@ xrep_fscounters(
/*
* Online repair is only supported on v5 file systems, which require
- * lazy sb counters and thus no update of sb_fdblocks here. But as of
- * now we don't support lazy counting sb_frextents yet, and thus need
- * to also update it directly here. And for that we need to keep
+ * lazy sb counters and thus no update of sb_fdblocks here. But
+ * sb_frextents only uses a lazy counter with rtgroups, and thus needs
+ * to be updated directly here otherwise. And for that we need to keep
* track of the delalloc reservations separately, as they are are
* subtracted from m_frextents, but not included in sb_frextents.
*/
percpu_counter_set(&mp->m_frextents,
fsc->frextents - fsc->frextents_delayed);
- mp->m_sb.sb_frextents = fsc->frextents;
+ if (!xfs_has_rtgroups(mp))
+ mp->m_sb.sb_frextents = fsc->frextents;
return 0;
}
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index b712a8bd34f5..ce86bdad37fa 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -12,6 +12,7 @@
#include "xfs_btree.h"
#include "xfs_ag.h"
#include "xfs_health.h"
+#include "xfs_rtgroup.h"
#include "scrub/scrub.h"
#include "scrub/health.h"
#include "scrub/common.h"
@@ -71,9 +72,9 @@
enum xchk_health_group {
XHG_FS = 1,
- XHG_RT,
XHG_AG,
XHG_INO,
+ XHG_RTGROUP,
};
struct xchk_health_map {
@@ -100,8 +101,8 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_XATTR] = { XHG_INO, XFS_SICK_INO_XATTR },
[XFS_SCRUB_TYPE_SYMLINK] = { XHG_INO, XFS_SICK_INO_SYMLINK },
[XFS_SCRUB_TYPE_PARENT] = { XHG_INO, XFS_SICK_INO_PARENT },
- [XFS_SCRUB_TYPE_RTBITMAP] = { XHG_RT, XFS_SICK_RT_BITMAP },
- [XFS_SCRUB_TYPE_RTSUM] = { XHG_RT, XFS_SICK_RT_SUMMARY },
+ [XFS_SCRUB_TYPE_RTBITMAP] = { XHG_RTGROUP, XFS_SICK_RG_BITMAP },
+ [XFS_SCRUB_TYPE_RTSUM] = { XHG_RTGROUP, XFS_SICK_RG_SUMMARY },
[XFS_SCRUB_TYPE_UQUOTA] = { XHG_FS, XFS_SICK_FS_UQUOTA },
[XFS_SCRUB_TYPE_GQUOTA] = { XHG_FS, XFS_SICK_FS_GQUOTA },
[XFS_SCRUB_TYPE_PQUOTA] = { XHG_FS, XFS_SICK_FS_PQUOTA },
@@ -109,6 +110,8 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK },
[XFS_SCRUB_TYPE_NLINKS] = { XHG_FS, XFS_SICK_FS_NLINKS },
[XFS_SCRUB_TYPE_DIRTREE] = { XHG_INO, XFS_SICK_INO_DIRTREE },
+ [XFS_SCRUB_TYPE_METAPATH] = { XHG_FS, XFS_SICK_FS_METAPATH },
+ [XFS_SCRUB_TYPE_RGSUPER] = { XHG_RTGROUP, XFS_SICK_RG_SUPER },
};
/* Return the health status mask for this scrub type. */
@@ -160,13 +163,14 @@ STATIC void
xchk_mark_all_healthy(
struct xfs_mount *mp)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
+ struct xfs_rtgroup *rtg = NULL;
xfs_fs_mark_healthy(mp, XFS_SICK_FS_INDIRECT);
- xfs_rt_mark_healthy(mp, XFS_SICK_RT_INDIRECT);
- for_each_perag(mp, agno, pag)
- xfs_ag_mark_healthy(pag, XFS_SICK_AG_INDIRECT);
+ while ((pag = xfs_perag_next(mp, pag)))
+ xfs_group_mark_healthy(pag_group(pag), XFS_SICK_AG_INDIRECT);
+ while ((rtg = xfs_rtgroup_next(mp, rtg)))
+ xfs_group_mark_healthy(rtg_group(rtg), XFS_SICK_RG_INDIRECT);
}
/*
@@ -184,6 +188,7 @@ xchk_update_health(
struct xfs_scrub *sc)
{
struct xfs_perag *pag;
+ struct xfs_rtgroup *rtg;
bool bad;
/*
@@ -207,9 +212,9 @@ xchk_update_health(
case XHG_AG:
pag = xfs_perag_get(sc->mp, sc->sm->sm_agno);
if (bad)
- xfs_ag_mark_corrupt(pag, sc->sick_mask);
+ xfs_group_mark_corrupt(pag_group(pag), sc->sick_mask);
else
- xfs_ag_mark_healthy(pag, sc->sick_mask);
+ xfs_group_mark_healthy(pag_group(pag), sc->sick_mask);
xfs_perag_put(pag);
break;
case XHG_INO:
@@ -236,11 +241,13 @@ xchk_update_health(
else
xfs_fs_mark_healthy(sc->mp, sc->sick_mask);
break;
- case XHG_RT:
+ case XHG_RTGROUP:
+ rtg = xfs_rtgroup_get(sc->mp, sc->sm->sm_agno);
if (bad)
- xfs_rt_mark_corrupt(sc->mp, sc->sick_mask);
+ xfs_group_mark_corrupt(rtg_group(rtg), sc->sick_mask);
else
- xfs_rt_mark_healthy(sc->mp, sc->sick_mask);
+ xfs_group_mark_healthy(rtg_group(rtg), sc->sick_mask);
+ xfs_rtgroup_put(rtg);
break;
default:
ASSERT(0);
@@ -277,7 +284,7 @@ xchk_ag_btree_del_cursor_if_sick(
type_to_health_flag[sc->sm->sm_type].group == XHG_AG)
mask &= ~sc->sick_mask;
- if (xfs_ag_has_sickness((*curp)->bc_ag.pag, mask)) {
+ if (xfs_group_has_sickness((*curp)->bc_group, mask)) {
sc->sm->sm_flags |= XFS_SCRUB_OFLAG_XFAIL;
xfs_btree_del_cursor(*curp, XFS_BTREE_NOERROR);
*curp = NULL;
@@ -294,9 +301,8 @@ xchk_health_record(
struct xfs_scrub *sc)
{
struct xfs_mount *mp = sc->mp;
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
-
+ struct xfs_perag *pag = NULL;
+ struct xfs_rtgroup *rtg = NULL;
unsigned int sick;
unsigned int checked;
@@ -304,15 +310,17 @@ xchk_health_record(
if (sick & XFS_SICK_FS_PRIMARY)
xchk_set_corrupt(sc);
- xfs_rt_measure_sickness(mp, &sick, &checked);
- if (sick & XFS_SICK_RT_PRIMARY)
- xchk_set_corrupt(sc);
-
- for_each_perag(mp, agno, pag) {
- xfs_ag_measure_sickness(pag, &sick, &checked);
+ while ((pag = xfs_perag_next(mp, pag))) {
+ xfs_group_measure_sickness(pag_group(pag), &sick, &checked);
if (sick & XFS_SICK_AG_PRIMARY)
xchk_set_corrupt(sc);
}
+ while ((rtg = xfs_rtgroup_next(mp, rtg))) {
+ xfs_group_measure_sickness(rtg_group(rtg), &sick, &checked);
+ if (sick & XFS_SICK_RG_PRIMARY)
+ xchk_set_corrupt(sc);
+ }
+
return 0;
}
diff --git a/fs/xfs/scrub/ialloc.c b/fs/xfs/scrub/ialloc.c
index 750d7b0cd25a..abad54c3621d 100644
--- a/fs/xfs/scrub/ialloc.c
+++ b/fs/xfs/scrub/ialloc.c
@@ -258,7 +258,7 @@ xchk_iallocbt_chunk(
{
struct xfs_scrub *sc = bs->sc;
struct xfs_mount *mp = bs->cur->bc_mp;
- struct xfs_perag *pag = bs->cur->bc_ag.pag;
+ struct xfs_perag *pag = to_perag(bs->cur->bc_group);
xfs_agblock_t agbno;
xfs_extlen_t len;
@@ -303,7 +303,6 @@ xchk_iallocbt_check_cluster_ifree(
unsigned int irec_ino,
struct xfs_dinode *dip)
{
- struct xfs_mount *mp = bs->cur->bc_mp;
xfs_ino_t fsino;
xfs_agino_t agino;
bool irec_free;
@@ -319,7 +318,7 @@ xchk_iallocbt_check_cluster_ifree(
* the record, compute which fs inode we're talking about.
*/
agino = irec->ir_startino + irec_ino;
- fsino = XFS_AGINO_TO_INO(mp, bs->cur->bc_ag.pag->pag_agno, agino);
+ fsino = xfs_agino_to_ino(to_perag(bs->cur->bc_group), agino);
irec_free = (irec->ir_free & XFS_INOBT_MASK(irec_ino));
if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC ||
@@ -368,7 +367,6 @@ xchk_iallocbt_check_cluster(
struct xfs_mount *mp = bs->cur->bc_mp;
struct xfs_buf *cluster_bp;
unsigned int nr_inodes;
- xfs_agnumber_t agno = bs->cur->bc_ag.pag->pag_agno;
xfs_agblock_t agbno;
unsigned int cluster_index;
uint16_t cluster_mask = 0;
@@ -396,7 +394,7 @@ xchk_iallocbt_check_cluster(
* ir_startino can be large enough to make im_boffset nonzero.
*/
ir_holemask = (irec->ir_holemask & cluster_mask);
- imap.im_blkno = XFS_AGB_TO_DADDR(mp, agno, agbno);
+ imap.im_blkno = xfs_agbno_to_daddr(to_perag(bs->cur->bc_group), agbno);
imap.im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
imap.im_boffset = XFS_INO_TO_OFFSET(mp, irec->ir_startino) <<
mp->m_sb.sb_inodelog;
@@ -407,9 +405,9 @@ xchk_iallocbt_check_cluster(
return 0;
}
- trace_xchk_iallocbt_check_cluster(mp, agno, irec->ir_startino,
- imap.im_blkno, imap.im_len, cluster_base, nr_inodes,
- cluster_mask, ir_holemask,
+ trace_xchk_iallocbt_check_cluster(to_perag(bs->cur->bc_group),
+ irec->ir_startino, imap.im_blkno, imap.im_len,
+ cluster_base, nr_inodes, cluster_mask, ir_holemask,
XFS_INO_TO_OFFSET(mp, irec->ir_startino +
cluster_base));
@@ -585,7 +583,7 @@ xchk_iallocbt_rec(
uint16_t holemask;
xfs_inobt_btrec_to_irec(mp, rec, &irec);
- if (xfs_inobt_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) {
+ if (xfs_inobt_check_irec(to_perag(bs->cur->bc_group), &irec) != NULL) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return 0;
}
diff --git a/fs/xfs/scrub/ialloc_repair.c b/fs/xfs/scrub/ialloc_repair.c
index c8d2196a04e1..14e48d3f1912 100644
--- a/fs/xfs/scrub/ialloc_repair.c
+++ b/fs/xfs/scrub/ialloc_repair.c
@@ -146,15 +146,12 @@ xrep_ibt_check_ifree(
struct xfs_scrub *sc = ri->sc;
struct xfs_mount *mp = sc->mp;
struct xfs_dinode *dip;
- xfs_ino_t fsino;
xfs_agino_t agino;
- xfs_agnumber_t agno = ri->sc->sa.pag->pag_agno;
unsigned int cluster_buf_base;
unsigned int offset;
int error;
agino = cluster_ag_base + cluster_index;
- fsino = XFS_AGINO_TO_INO(mp, agno, agino);
/* Inode uncached or half assembled, read disk buffer */
cluster_buf_base = XFS_INO_TO_OFFSET(mp, cluster_ag_base);
@@ -165,7 +162,8 @@ xrep_ibt_check_ifree(
if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC)
return -EFSCORRUPTED;
- if (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino)
+ if (dip->di_version >= 3 &&
+ be64_to_cpu(dip->di_ino) != xfs_agino_to_ino(ri->sc->sa.pag, agino))
return -EFSCORRUPTED;
/* Will the in-core inode tell us if it's in use? */
@@ -194,7 +192,7 @@ xrep_ibt_stash(
if (ri->rie.ir_freecount > 0)
ri->finobt_recs++;
- trace_xrep_ibt_found(ri->sc->mp, ri->sc->sa.pag->pag_agno, &ri->rie);
+ trace_xrep_ibt_found(ri->sc->sa.pag, &ri->rie);
error = xfarray_append(ri->inode_records, &ri->rie);
if (error)
@@ -307,7 +305,7 @@ xrep_ibt_process_cluster(
* inobt because imap_to_bp directly maps the buffer without touching
* either inode btree.
*/
- imap.im_blkno = XFS_AGB_TO_DADDR(mp, sc->sa.pag->pag_agno, cluster_bno);
+ imap.im_blkno = xfs_agbno_to_daddr(sc->sa.pag, cluster_bno);
imap.im_len = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
imap.im_boffset = 0;
error = xfs_imap_to_bp(mp, sc->tp, &imap, &cluster_bp);
@@ -423,9 +421,7 @@ xrep_ibt_record_inode_blocks(
if (error)
return error;
- trace_xrep_ibt_walk_rmap(mp, ri->sc->sa.pag->pag_agno,
- rec->rm_startblock, rec->rm_blockcount, rec->rm_owner,
- rec->rm_offset, rec->rm_flags);
+ trace_xrep_ibt_walk_rmap(ri->sc->sa.pag, rec);
/*
* Record the free/hole masks for each inode cluster that could be
@@ -634,7 +630,6 @@ xrep_ibt_build_new_trees(
struct xfs_scrub *sc = ri->sc;
struct xfs_btree_cur *ino_cur;
struct xfs_btree_cur *fino_cur = NULL;
- xfs_fsblock_t fsbno;
bool need_finobt;
int error;
@@ -656,9 +651,8 @@ xrep_ibt_build_new_trees(
*
* Start by setting up the inobt staging cursor.
*/
- fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
- XFS_IBT_BLOCK(sc->mp));
- xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno,
+ xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT,
+ xfs_agbno_to_fsb(sc->sa.pag, XFS_IBT_BLOCK(sc->mp)),
XFS_AG_RESV_NONE);
ri->new_inobt.bload.claim_block = xrep_ibt_claim_block;
ri->new_inobt.bload.get_records = xrep_ibt_get_records;
@@ -677,10 +671,9 @@ xrep_ibt_build_new_trees(
if (sc->mp->m_finobt_nores)
resv = XFS_AG_RESV_NONE;
- fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
- XFS_FIBT_BLOCK(sc->mp));
xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT,
- fsbno, resv);
+ xfs_agbno_to_fsb(sc->sa.pag, XFS_FIBT_BLOCK(sc->mp)),
+ resv);
ri->new_finobt.bload.claim_block = xrep_fibt_claim_block;
ri->new_finobt.bload.get_records = xrep_fibt_get_records;
@@ -821,7 +814,7 @@ xrep_iallocbt(
sc->sick_mask = XFS_SICK_AG_INOBT | XFS_SICK_AG_FINOBT;
/* Set up enough storage to handle an AG with nothing but inodes. */
- xfs_agino_range(mp, sc->sa.pag->pag_agno, &first_agino, &last_agino);
+ xfs_agino_range(mp, pag_agno(sc->sa.pag), &first_agino, &last_agino);
last_agino /= XFS_INODES_PER_CHUNK;
descr = xchk_xfile_ag_descr(sc, "inode index records");
error = xfarray_create(descr, last_agino,
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index d32716fb2fec..25ee66e7649d 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -60,6 +60,22 @@ xchk_install_handle_iscrub(
if (error)
return error;
+ /*
+ * Don't allow scrubbing by handle of any non-directory inode records
+ * in the metadata directory tree. We don't know if any of the scans
+ * launched by this scrubber will end up indirectly trying to lock this
+ * file.
+ *
+ * Scrubbers of inode-rooted metadata files (e.g. quota files) will
+ * attach all the resources needed to scrub the inode and call
+ * xchk_inode directly. Userspace cannot call this directly.
+ */
+ if (xfs_is_metadir_inode(ip) && !S_ISDIR(VFS_I(ip)->i_mode)) {
+ xchk_irele(sc, ip);
+ sc->ip = NULL;
+ return -ENOENT;
+ }
+
return xchk_prepare_iscrub(sc);
}
@@ -94,9 +110,15 @@ xchk_setup_inode(
return xchk_prepare_iscrub(sc);
}
- /* Reject internal metadata files and obviously bad inode numbers. */
- if (xfs_internal_inum(mp, sc->sm->sm_ino))
+ /*
+ * On pre-metadir filesystems, reject internal metadata files. For
+ * metadir filesystems, limited scrubbing of any file in the metadata
+ * directory tree by handle is allowed, because that is the only way to
+ * validate the lack of parent pointers in the sb-root metadata inodes.
+ */
+ if (!xfs_has_metadir(mp) && xfs_is_sb_inum(mp, sc->sm->sm_ino))
return -ENOENT;
+ /* Reject obviously bad inode numbers. */
if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
return -ENOENT;
@@ -421,8 +443,13 @@ xchk_dinode(
break;
case 2:
case 3:
- if (dip->di_onlink != 0)
- xchk_ino_set_corrupt(sc, ino);
+ if (xfs_dinode_is_metadir(dip)) {
+ if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
+ xchk_ino_set_corrupt(sc, ino);
+ } else {
+ if (dip->di_metatype != 0)
+ xchk_ino_set_corrupt(sc, ino);
+ }
if (dip->di_mode == 0 && sc->ip)
xchk_ino_set_corrupt(sc, ino);
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index 3e45b9b72312..5a58ddd27bd2 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -521,10 +521,17 @@ STATIC void
xrep_dinode_nlinks(
struct xfs_dinode *dip)
{
- if (dip->di_version > 1)
- dip->di_onlink = 0;
- else
+ if (dip->di_version < 2) {
dip->di_nlink = 0;
+ return;
+ }
+
+ if (xfs_dinode_is_metadir(dip)) {
+ if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
+ dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN);
+ } else {
+ dip->di_metatype = 0;
+ }
}
/* Fix any conflicting flags that the verifiers complain about. */
@@ -565,6 +572,16 @@ xrep_dinode_flags(
dip->di_nrext64_pad = 0;
else if (dip->di_version >= 3)
dip->di_v3_pad = 0;
+
+ if (flags2 & XFS_DIFLAG2_METADATA) {
+ xfs_failaddr_t fa;
+
+ fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags,
+ flags2);
+ if (fa)
+ flags2 &= ~XFS_DIFLAG2_METADATA;
+ }
+
dip->di_flags = cpu_to_be16(flags);
dip->di_flags2 = cpu_to_be64(flags2);
}
@@ -761,14 +778,13 @@ STATIC int
xrep_dinode_count_rmaps(
struct xrep_inode *ri)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
int error;
if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp))
return -EOPNOTSUPP;
- for_each_perag(ri->sc->mp, agno, pag) {
+ while ((pag = xfs_perag_next(ri->sc->mp, pag))) {
error = xrep_dinode_count_ag_rmaps(ri, pag);
if (error) {
xfs_perag_rele(pag);
@@ -1755,15 +1771,8 @@ xrep_inode_pptr(
if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
return 0;
- /* The root directory doesn't have a parent pointer. */
- if (ip == mp->m_rootip)
- return 0;
-
- /*
- * Metadata inodes are rooted in the superblock and do not have any
- * parents.
- */
- if (xfs_is_metadata_inode(ip))
+ /* Children of the superblock do not have parent pointers. */
+ if (xchk_inode_is_sb_rooted(ip))
return 0;
/* Inode already has an attr fork; no further work possible here. */
diff --git a/fs/xfs/scrub/iscan.c b/fs/xfs/scrub/iscan.c
index cf9d983667ce..84f117667ca2 100644
--- a/fs/xfs/scrub/iscan.c
+++ b/fs/xfs/scrub/iscan.c
@@ -67,7 +67,7 @@ xchk_iscan_mask_skipino(
xfs_agnumber_t skip_agno = XFS_INO_TO_AGNO(mp, iscan->skip_ino);
xfs_agnumber_t skip_agino = XFS_INO_TO_AGINO(mp, iscan->skip_ino);
- if (pag->pag_agno != skip_agno)
+ if (pag_agno(pag) != skip_agno)
return;
if (skip_agino < rec->ir_startino)
return;
@@ -95,7 +95,7 @@ xchk_iscan_find_next(
struct xfs_btree_cur *cur;
struct xfs_mount *mp = sc->mp;
struct xfs_trans *tp = sc->tp;
- xfs_agnumber_t agno = pag->pag_agno;
+ xfs_agnumber_t agno = pag_agno(pag);
xfs_agino_t lastino = NULLAGINO;
xfs_agino_t first, last;
xfs_agino_t agino = *cursor;
diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c
new file mode 100644
index 000000000000..b78db6513465
--- /dev/null
+++ b/fs/xfs/scrub/metapath.c
@@ -0,0 +1,689 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2023-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_inode.h"
+#include "xfs_metafile.h"
+#include "xfs_quota.h"
+#include "xfs_qm.h"
+#include "xfs_dir2.h"
+#include "xfs_parent.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_attr.h"
+#include "xfs_rtgroup.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/readdir.h"
+#include "scrub/repair.h"
+
+/*
+ * Metadata Directory Tree Paths
+ * =============================
+ *
+ * A filesystem with metadir enabled expects to find metadata structures
+ * attached to files that are accessible by walking a path down the metadata
+ * directory tree. Given the metadir path and the incore inode storing the
+ * metadata, this scrubber ensures that the ondisk metadir path points to the
+ * ondisk inode represented by the incore inode.
+ */
+
+struct xchk_metapath {
+ struct xfs_scrub *sc;
+
+ /* Name for lookup */
+ struct xfs_name xname;
+
+ /* Directory update for repairs */
+ struct xfs_dir_update du;
+
+ /* Path down to this metadata file from the parent directory */
+ const char *path;
+
+ /* Directory parent of the metadata file. */
+ struct xfs_inode *dp;
+
+ /* Locks held on dp */
+ unsigned int dp_ilock_flags;
+
+ /* Transaction block reservations */
+ unsigned int link_resblks;
+ unsigned int unlink_resblks;
+
+ /* Parent pointer updates */
+ struct xfs_parent_args link_ppargs;
+ struct xfs_parent_args unlink_ppargs;
+
+ /* Scratchpads for removing links */
+ struct xfs_da_args pptr_args;
+};
+
+/* Release resources tracked in the buffer. */
+static inline void
+xchk_metapath_cleanup(
+ void *buf)
+{
+ struct xchk_metapath *mpath = buf;
+
+ if (mpath->dp_ilock_flags)
+ xfs_iunlock(mpath->dp, mpath->dp_ilock_flags);
+ kfree(mpath->path);
+}
+
+/* Set up a metadir path scan. @path must be dynamically allocated. */
+static inline int
+xchk_setup_metapath_scan(
+ struct xfs_scrub *sc,
+ struct xfs_inode *dp,
+ const char *path,
+ struct xfs_inode *ip)
+{
+ struct xchk_metapath *mpath;
+ int error;
+
+ if (!path)
+ return -ENOMEM;
+
+ error = xchk_install_live_inode(sc, ip);
+ if (error) {
+ kfree(path);
+ return error;
+ }
+
+ mpath = kzalloc(sizeof(struct xchk_metapath), XCHK_GFP_FLAGS);
+ if (!mpath) {
+ kfree(path);
+ return -ENOMEM;
+ }
+
+ mpath->sc = sc;
+ sc->buf = mpath;
+ sc->buf_cleanup = xchk_metapath_cleanup;
+
+ mpath->dp = dp;
+ mpath->path = path; /* path is now owned by mpath */
+
+ mpath->xname.name = mpath->path;
+ mpath->xname.len = strlen(mpath->path);
+ mpath->xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
+
+ return 0;
+}
+
+#ifdef CONFIG_XFS_RT
+/* Scan the /rtgroups directory itself. */
+static int
+xchk_setup_metapath_rtdir(
+ struct xfs_scrub *sc)
+{
+ if (!sc->mp->m_rtdirip)
+ return -ENOENT;
+
+ return xchk_setup_metapath_scan(sc, sc->mp->m_metadirip,
+ kasprintf(GFP_KERNEL, "rtgroups"), sc->mp->m_rtdirip);
+}
+
+/* Scan a rtgroup inode under the /rtgroups directory. */
+static int
+xchk_setup_metapath_rtginode(
+ struct xfs_scrub *sc,
+ enum xfs_rtg_inodes type)
+{
+ struct xfs_rtgroup *rtg;
+ struct xfs_inode *ip;
+ int error;
+
+ rtg = xfs_rtgroup_get(sc->mp, sc->sm->sm_agno);
+ if (!rtg)
+ return -ENOENT;
+
+ ip = rtg->rtg_inodes[type];
+ if (!ip) {
+ error = -ENOENT;
+ goto out_put_rtg;
+ }
+
+ error = xchk_setup_metapath_scan(sc, sc->mp->m_rtdirip,
+ xfs_rtginode_path(rtg_rgno(rtg), type), ip);
+
+out_put_rtg:
+ xfs_rtgroup_put(rtg);
+ return error;
+}
+#else
+# define xchk_setup_metapath_rtdir(...) (-ENOENT)
+# define xchk_setup_metapath_rtginode(...) (-ENOENT)
+#endif /* CONFIG_XFS_RT */
+
+#ifdef CONFIG_XFS_QUOTA
+/* Scan the /quota directory itself. */
+static int
+xchk_setup_metapath_quotadir(
+ struct xfs_scrub *sc)
+{
+ struct xfs_trans *tp;
+ struct xfs_inode *dp = NULL;
+ int error;
+
+ error = xfs_trans_alloc_empty(sc->mp, &tp);
+ if (error)
+ return error;
+
+ error = xfs_dqinode_load_parent(tp, &dp);
+ xfs_trans_cancel(tp);
+ if (error)
+ return error;
+
+ error = xchk_setup_metapath_scan(sc, sc->mp->m_metadirip,
+ kasprintf(GFP_KERNEL, "quota"), dp);
+ xfs_irele(dp);
+ return error;
+}
+
+/* Scan a quota inode under the /quota directory. */
+static int
+xchk_setup_metapath_dqinode(
+ struct xfs_scrub *sc,
+ xfs_dqtype_t type)
+{
+ struct xfs_trans *tp = NULL;
+ struct xfs_inode *dp = NULL;
+ struct xfs_inode *ip = NULL;
+ const char *path;
+ int error;
+
+ error = xfs_trans_alloc_empty(sc->mp, &tp);
+ if (error)
+ return error;
+
+ error = xfs_dqinode_load_parent(tp, &dp);
+ if (error)
+ goto out_cancel;
+
+ error = xfs_dqinode_load(tp, dp, type, &ip);
+ if (error)
+ goto out_dp;
+
+ xfs_trans_cancel(tp);
+ tp = NULL;
+
+ path = kasprintf(GFP_KERNEL, "%s", xfs_dqinode_path(type));
+ error = xchk_setup_metapath_scan(sc, dp, path, ip);
+
+ xfs_irele(ip);
+out_dp:
+ xfs_irele(dp);
+out_cancel:
+ if (tp)
+ xfs_trans_cancel(tp);
+ return error;
+}
+#else
+# define xchk_setup_metapath_quotadir(...) (-ENOENT)
+# define xchk_setup_metapath_dqinode(...) (-ENOENT)
+#endif /* CONFIG_XFS_QUOTA */
+
+int
+xchk_setup_metapath(
+ struct xfs_scrub *sc)
+{
+ if (!xfs_has_metadir(sc->mp))
+ return -ENOENT;
+ if (sc->sm->sm_gen)
+ return -EINVAL;
+
+ switch (sc->sm->sm_ino) {
+ case XFS_SCRUB_METAPATH_PROBE:
+ /* Just probing, nothing else to do. */
+ if (sc->sm->sm_agno)
+ return -EINVAL;
+ return 0;
+ case XFS_SCRUB_METAPATH_RTDIR:
+ return xchk_setup_metapath_rtdir(sc);
+ case XFS_SCRUB_METAPATH_RTBITMAP:
+ return xchk_setup_metapath_rtginode(sc, XFS_RTGI_BITMAP);
+ case XFS_SCRUB_METAPATH_RTSUMMARY:
+ return xchk_setup_metapath_rtginode(sc, XFS_RTGI_SUMMARY);
+ case XFS_SCRUB_METAPATH_QUOTADIR:
+ return xchk_setup_metapath_quotadir(sc);
+ case XFS_SCRUB_METAPATH_USRQUOTA:
+ return xchk_setup_metapath_dqinode(sc, XFS_DQTYPE_USER);
+ case XFS_SCRUB_METAPATH_GRPQUOTA:
+ return xchk_setup_metapath_dqinode(sc, XFS_DQTYPE_GROUP);
+ case XFS_SCRUB_METAPATH_PRJQUOTA:
+ return xchk_setup_metapath_dqinode(sc, XFS_DQTYPE_PROJ);
+ default:
+ return -ENOENT;
+ }
+}
+
+/*
+ * Take the ILOCK on the metadata directory parent and child. We do not know
+ * that the metadata directory is not corrupt, so we lock the parent and try
+ * to lock the child. Returns 0 if successful, or -EINTR to abort the scrub.
+ */
+STATIC int
+xchk_metapath_ilock_both(
+ struct xchk_metapath *mpath)
+{
+ struct xfs_scrub *sc = mpath->sc;
+ int error = 0;
+
+ while (true) {
+ xfs_ilock(mpath->dp, XFS_ILOCK_EXCL);
+ if (xchk_ilock_nowait(sc, XFS_ILOCK_EXCL)) {
+ mpath->dp_ilock_flags |= XFS_ILOCK_EXCL;
+ return 0;
+ }
+ xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL);
+
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ delay(1);
+ }
+
+ ASSERT(0);
+ return -EINTR;
+}
+
+/* Unlock parent and child inodes. */
+static inline void
+xchk_metapath_iunlock(
+ struct xchk_metapath *mpath)
+{
+ struct xfs_scrub *sc = mpath->sc;
+
+ xchk_iunlock(sc, XFS_ILOCK_EXCL);
+
+ mpath->dp_ilock_flags &= ~XFS_ILOCK_EXCL;
+ xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL);
+}
+
+int
+xchk_metapath(
+ struct xfs_scrub *sc)
+{
+ struct xchk_metapath *mpath = sc->buf;
+ xfs_ino_t ino = NULLFSINO;
+ int error;
+
+ /* Just probing, nothing else to do. */
+ if (sc->sm->sm_ino == XFS_SCRUB_METAPATH_PROBE)
+ return 0;
+
+ /* Parent required to do anything else. */
+ if (mpath->dp == NULL) {
+ xchk_ino_set_corrupt(sc, sc->ip->i_ino);
+ return 0;
+ }
+
+ error = xchk_trans_alloc_empty(sc);
+ if (error)
+ return error;
+
+ error = xchk_metapath_ilock_both(mpath);
+ if (error)
+ goto out_cancel;
+
+ /* Make sure the parent dir has a dirent pointing to this file. */
+ error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino);
+ trace_xchk_metapath_lookup(sc, mpath->path, mpath->dp, ino);
+ if (error == -ENOENT) {
+ /* No directory entry at all */
+ xchk_ino_set_corrupt(sc, sc->ip->i_ino);
+ error = 0;
+ goto out_ilock;
+ }
+ if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
+ goto out_ilock;
+ if (ino != sc->ip->i_ino) {
+ /* Pointing to wrong inode */
+ xchk_ino_set_corrupt(sc, sc->ip->i_ino);
+ }
+
+out_ilock:
+ xchk_metapath_iunlock(mpath);
+out_cancel:
+ xchk_trans_cancel(sc);
+ return error;
+}
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+/* Create the dirent represented by the final component of the path. */
+STATIC int
+xrep_metapath_link(
+ struct xchk_metapath *mpath)
+{
+ struct xfs_scrub *sc = mpath->sc;
+
+ mpath->du.dp = mpath->dp;
+ mpath->du.name = &mpath->xname;
+ mpath->du.ip = sc->ip;
+
+ if (xfs_has_parent(sc->mp))
+ mpath->du.ppargs = &mpath->link_ppargs;
+ else
+ mpath->du.ppargs = NULL;
+
+ trace_xrep_metapath_link(sc, mpath->path, mpath->dp, sc->ip->i_ino);
+
+ return xfs_dir_add_child(sc->tp, mpath->link_resblks, &mpath->du);
+}
+
+/* Remove the dirent at the final component of the path. */
+STATIC int
+xrep_metapath_unlink(
+ struct xchk_metapath *mpath,
+ xfs_ino_t ino,
+ struct xfs_inode *ip)
+{
+ struct xfs_parent_rec rec;
+ struct xfs_scrub *sc = mpath->sc;
+ struct xfs_mount *mp = sc->mp;
+ int error;
+
+ trace_xrep_metapath_unlink(sc, mpath->path, mpath->dp, ino);
+
+ if (!ip) {
+ /* The child inode isn't allocated. Junk the dirent. */
+ xfs_trans_log_inode(sc->tp, mpath->dp, XFS_ILOG_CORE);
+ return xfs_dir_removename(sc->tp, mpath->dp, &mpath->xname,
+ ino, mpath->unlink_resblks);
+ }
+
+ mpath->du.dp = mpath->dp;
+ mpath->du.name = &mpath->xname;
+ mpath->du.ip = ip;
+ mpath->du.ppargs = NULL;
+
+ /* Figure out if we're removing a parent pointer too. */
+ if (xfs_has_parent(mp)) {
+ xfs_inode_to_parent_rec(&rec, ip);
+ error = xfs_parent_lookup(sc->tp, ip, &mpath->xname, &rec,
+ &mpath->pptr_args);
+ switch (error) {
+ case -ENOATTR:
+ break;
+ case 0:
+ mpath->du.ppargs = &mpath->unlink_ppargs;
+ break;
+ default:
+ return error;
+ }
+ }
+
+ return xfs_dir_remove_child(sc->tp, mpath->unlink_resblks, &mpath->du);
+}
+
+/*
+ * Try to create a dirent in @mpath->dp with the name @mpath->xname that points
+ * to @sc->ip. Returns:
+ *
+ * -EEXIST and an @alleged_child if the dirent that points to the wrong inode;
+ * 0 if there is now a dirent pointing to @sc->ip; or
+ * A negative errno on error.
+ */
+STATIC int
+xrep_metapath_try_link(
+ struct xchk_metapath *mpath,
+ xfs_ino_t *alleged_child)
+{
+ struct xfs_scrub *sc = mpath->sc;
+ xfs_ino_t ino;
+ int error;
+
+ /* Allocate transaction, lock inodes, join to transaction. */
+ error = xchk_trans_alloc(sc, mpath->link_resblks);
+ if (error)
+ return error;
+
+ error = xchk_metapath_ilock_both(mpath);
+ if (error) {
+ xchk_trans_cancel(sc);
+ return error;
+ }
+ xfs_trans_ijoin(sc->tp, mpath->dp, 0);
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+
+ error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino);
+ trace_xrep_metapath_lookup(sc, mpath->path, mpath->dp, ino);
+ if (error == -ENOENT) {
+ /*
+ * There is no dirent in the directory. Create an entry
+ * pointing to @sc->ip.
+ */
+ error = xrep_metapath_link(mpath);
+ if (error)
+ goto out_cancel;
+
+ error = xrep_trans_commit(sc);
+ xchk_metapath_iunlock(mpath);
+ return error;
+ }
+ if (error)
+ goto out_cancel;
+
+ if (ino == sc->ip->i_ino) {
+ /* The dirent already points to @sc->ip; we're done. */
+ error = 0;
+ goto out_cancel;
+ }
+
+ /*
+ * The dirent points elsewhere; pass that back so that the caller
+ * can try to remove the dirent.
+ */
+ *alleged_child = ino;
+ error = -EEXIST;
+
+out_cancel:
+ xchk_trans_cancel(sc);
+ xchk_metapath_iunlock(mpath);
+ return error;
+}
+
+/*
+ * Take the ILOCK on the metadata directory parent and a bad child, if one is
+ * supplied. We do not know that the metadata directory is not corrupt, so we
+ * lock the parent and try to lock the child. Returns 0 if successful, or
+ * -EINTR to abort the repair. The lock state of @dp is not recorded in @mpath.
+ */
+STATIC int
+xchk_metapath_ilock_parent_and_child(
+ struct xchk_metapath *mpath,
+ struct xfs_inode *ip)
+{
+ struct xfs_scrub *sc = mpath->sc;
+ int error = 0;
+
+ while (true) {
+ xfs_ilock(mpath->dp, XFS_ILOCK_EXCL);
+ if (!ip || xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+ return 0;
+ xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL);
+
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ delay(1);
+ }
+
+ ASSERT(0);
+ return -EINTR;
+}
+
+/*
+ * Try to remove a dirent in @mpath->dp with the name @mpath->xname that points
+ * to @alleged_child. Returns:
+ *
+ * 0 if there is no longer a dirent;
+ * -EEXIST if the dirent points to @sc->ip;
+ * -EAGAIN and an updated @alleged_child if the dirent points elsewhere; or
+ * A negative errno for any other error.
+ */
+STATIC int
+xrep_metapath_try_unlink(
+ struct xchk_metapath *mpath,
+ xfs_ino_t *alleged_child)
+{
+ struct xfs_scrub *sc = mpath->sc;
+ struct xfs_inode *ip = NULL;
+ xfs_ino_t ino;
+ int error;
+
+ ASSERT(*alleged_child != sc->ip->i_ino);
+
+ trace_xrep_metapath_try_unlink(sc, mpath->path, mpath->dp,
+ *alleged_child);
+
+ /*
+ * Allocate transaction, grab the alleged child inode, lock inodes,
+ * join to transaction.
+ */
+ error = xchk_trans_alloc(sc, mpath->unlink_resblks);
+ if (error)
+ return error;
+
+ error = xchk_iget(sc, *alleged_child, &ip);
+ if (error == -EINVAL || error == -ENOENT) {
+ /* inode number is bogus, junk the dirent */
+ error = 0;
+ }
+ if (error) {
+ xchk_trans_cancel(sc);
+ return error;
+ }
+
+ error = xchk_metapath_ilock_parent_and_child(mpath, ip);
+ if (error) {
+ xchk_trans_cancel(sc);
+ return error;
+ }
+ xfs_trans_ijoin(sc->tp, mpath->dp, 0);
+ if (ip)
+ xfs_trans_ijoin(sc->tp, ip, 0);
+
+ error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino);
+ trace_xrep_metapath_lookup(sc, mpath->path, mpath->dp, ino);
+ if (error == -ENOENT) {
+ /*
+ * There is no dirent in the directory anymore. We're ready to
+ * try the link operation again.
+ */
+ error = 0;
+ goto out_cancel;
+ }
+ if (error)
+ goto out_cancel;
+
+ if (ino == sc->ip->i_ino) {
+ /* The dirent already points to @sc->ip; we're done. */
+ error = -EEXIST;
+ goto out_cancel;
+ }
+
+ /*
+ * The dirent does not point to the alleged child. Update the caller
+ * and signal that we want to be called again.
+ */
+ if (ino != *alleged_child) {
+ *alleged_child = ino;
+ error = -EAGAIN;
+ goto out_cancel;
+ }
+
+ /* Remove the link to the child. */
+ error = xrep_metapath_unlink(mpath, ino, ip);
+ if (error)
+ goto out_cancel;
+
+ error = xrep_trans_commit(sc);
+ goto out_unlock;
+
+out_cancel:
+ xchk_trans_cancel(sc);
+out_unlock:
+ xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL);
+ if (ip) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xchk_irele(sc, ip);
+ }
+ return error;
+}
+
+/*
+ * Make sure the metadata directory path points to the child being examined.
+ *
+ * Repair needs to be able to create a directory structure, create its own
+ * transactions, and take ILOCKs. This function /must/ be called after all
+ * other repairs have completed.
+ */
+int
+xrep_metapath(
+ struct xfs_scrub *sc)
+{
+ struct xchk_metapath *mpath = sc->buf;
+ struct xfs_mount *mp = sc->mp;
+ int error = 0;
+
+ /* Just probing, nothing to repair. */
+ if (sc->sm->sm_ino == XFS_SCRUB_METAPATH_PROBE)
+ return 0;
+
+ /* Parent required to do anything else. */
+ if (mpath->dp == NULL)
+ return -EFSCORRUPTED;
+
+ /*
+ * Make sure the child file actually has an attr fork to receive a new
+ * parent pointer if the fs has parent pointers.
+ */
+ if (xfs_has_parent(mp)) {
+ error = xfs_attr_add_fork(sc->ip,
+ sizeof(struct xfs_attr_sf_hdr), 1);
+ if (error)
+ return error;
+ }
+
+ /* Compute block reservation required to unlink and link a file. */
+ mpath->unlink_resblks = xfs_remove_space_res(mp, MAXNAMELEN);
+ mpath->link_resblks = xfs_link_space_res(mp, MAXNAMELEN);
+
+ do {
+ xfs_ino_t alleged_child;
+
+ /* Re-establish the link, or tell us which inode to remove. */
+ error = xrep_metapath_try_link(mpath, &alleged_child);
+ if (!error)
+ return 0;
+ if (error != -EEXIST)
+ return error;
+
+ /*
+ * Remove an incorrect link to an alleged child, or tell us
+ * which inode to remove.
+ */
+ do {
+ error = xrep_metapath_try_unlink(mpath, &alleged_child);
+ } while (error == -EAGAIN);
+ if (error == -EEXIST) {
+ /* Link established; we're done. */
+ error = 0;
+ break;
+ }
+ } while (!error);
+
+ return error;
+}
+#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/newbt.c b/fs/xfs/scrub/newbt.c
index 2aa14b7ab630..70af27d98734 100644
--- a/fs/xfs/scrub/newbt.c
+++ b/fs/xfs/scrub/newbt.c
@@ -58,7 +58,7 @@ xrep_newbt_estimate_slack(
if (sc->ops->type == ST_PERAG) {
free = sc->sa.pag->pagf_freeblks;
- sz = xfs_ag_block_count(sc->mp, sc->sa.pag->pag_agno);
+ sz = xfs_ag_block_count(sc->mp, pag_agno(sc->sa.pag));
} else {
free = percpu_counter_sum(&sc->mp->m_fdblocks);
sz = sc->mp->m_sb.sb_dblocks;
@@ -186,11 +186,10 @@ xrep_newbt_add_extent(
xfs_agblock_t agbno,
xfs_extlen_t len)
{
- struct xfs_mount *mp = xnr->sc->mp;
struct xfs_alloc_arg args = {
.tp = NULL, /* no autoreap */
.oinfo = xnr->oinfo,
- .fsbno = XFS_AGB_TO_FSB(mp, pag->pag_agno, agbno),
+ .fsbno = xfs_agbno_to_fsb(pag, agbno),
.len = len,
.resv = xnr->resv,
};
@@ -206,12 +205,12 @@ xrep_newbt_validate_ag_alloc_hint(
struct xfs_scrub *sc = xnr->sc;
xfs_agnumber_t agno = XFS_FSB_TO_AGNO(sc->mp, xnr->alloc_hint);
- if (agno == sc->sa.pag->pag_agno &&
+ if (agno == pag_agno(sc->sa.pag) &&
xfs_verify_fsbno(sc->mp, xnr->alloc_hint))
return;
- xnr->alloc_hint = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno,
- XFS_AGFL_BLOCK(sc->mp) + 1);
+ xnr->alloc_hint =
+ xfs_agbno_to_fsb(sc->sa.pag, XFS_AGFL_BLOCK(sc->mp) + 1);
}
/* Allocate disk space for a new per-AG btree. */
@@ -251,16 +250,15 @@ xrep_newbt_alloc_ag_blocks(
return -ENOSPC;
agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
+ if (agno != pag_agno(sc->sa.pag)) {
+ ASSERT(agno == pag_agno(sc->sa.pag));
+ return -EFSCORRUPTED;
+ }
- trace_xrep_newbt_alloc_ag_blocks(mp, agno,
+ trace_xrep_newbt_alloc_ag_blocks(sc->sa.pag,
XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
xnr->oinfo.oi_owner);
- if (agno != sc->sa.pag->pag_agno) {
- ASSERT(agno == sc->sa.pag->pag_agno);
- return -EFSCORRUPTED;
- }
-
error = xrep_newbt_add_blocks(xnr, sc->sa.pag, &args);
if (error)
return error;
@@ -326,16 +324,16 @@ xrep_newbt_alloc_file_blocks(
agno = XFS_FSB_TO_AGNO(mp, args.fsbno);
- trace_xrep_newbt_alloc_file_blocks(mp, agno,
- XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
- xnr->oinfo.oi_owner);
-
pag = xfs_perag_get(mp, agno);
if (!pag) {
ASSERT(0);
return -EFSCORRUPTED;
}
+ trace_xrep_newbt_alloc_file_blocks(pag,
+ XFS_FSB_TO_AGBNO(mp, args.fsbno), args.len,
+ xnr->oinfo.oi_owner);
+
error = xrep_newbt_add_blocks(xnr, pag, &args);
xfs_perag_put(pag);
if (error)
@@ -376,7 +374,6 @@ xrep_newbt_free_extent(
struct xfs_scrub *sc = xnr->sc;
xfs_agblock_t free_agbno = resv->agbno;
xfs_extlen_t free_aglen = resv->len;
- xfs_fsblock_t fsbno;
int error;
if (!btree_committed || resv->used == 0) {
@@ -385,8 +382,8 @@ xrep_newbt_free_extent(
* space reservation, let the existing EFI free the entire
* space extent.
*/
- trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno,
- free_agbno, free_aglen, xnr->oinfo.oi_owner);
+ trace_xrep_newbt_free_blocks(resv->pag, free_agbno, free_aglen,
+ xnr->oinfo.oi_owner);
xfs_alloc_commit_autoreap(sc->tp, &resv->autoreap);
return 1;
}
@@ -403,8 +400,8 @@ xrep_newbt_free_extent(
if (free_aglen == 0)
return 0;
- trace_xrep_newbt_free_blocks(sc->mp, resv->pag->pag_agno, free_agbno,
- free_aglen, xnr->oinfo.oi_owner);
+ trace_xrep_newbt_free_blocks(resv->pag, free_agbno, free_aglen,
+ xnr->oinfo.oi_owner);
ASSERT(xnr->resv != XFS_AG_RESV_AGFL);
ASSERT(xnr->resv != XFS_AG_RESV_IGNORE);
@@ -413,9 +410,9 @@ xrep_newbt_free_extent(
* Use EFIs to free the reservations. This reduces the chance
* that we leak blocks if the system goes down.
*/
- fsbno = XFS_AGB_TO_FSB(sc->mp, resv->pag->pag_agno, free_agbno);
- error = xfs_free_extent_later(sc->tp, fsbno, free_aglen, &xnr->oinfo,
- xnr->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
+ error = xfs_free_extent_later(sc->tp,
+ xfs_agbno_to_fsb(resv->pag, free_agbno), free_aglen,
+ &xnr->oinfo, xnr->resv, XFS_FREE_EXTENT_SKIP_DISCARD);
if (error)
return error;
@@ -516,7 +513,6 @@ xrep_newbt_claim_block(
union xfs_btree_ptr *ptr)
{
struct xrep_newbt_resv *resv;
- struct xfs_mount *mp = cur->bc_mp;
xfs_agblock_t agbno;
/*
@@ -541,12 +537,10 @@ xrep_newbt_claim_block(
if (resv->used == resv->len)
list_move_tail(&resv->list, &xnr->resv_list);
- trace_xrep_newbt_claim_block(mp, resv->pag->pag_agno, agbno, 1,
- xnr->oinfo.oi_owner);
+ trace_xrep_newbt_claim_block(resv->pag, agbno, 1, xnr->oinfo.oi_owner);
if (cur->bc_ops->ptr_len == XFS_BTREE_LONG_PTR_LEN)
- ptr->l = cpu_to_be64(XFS_AGB_TO_FSB(mp, resv->pag->pag_agno,
- agbno));
+ ptr->l = cpu_to_be64(xfs_agbno_to_fsb(resv->pag, agbno));
else
ptr->s = cpu_to_be32(agbno);
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
index 80aee30886c4..4a47d0aabf73 100644
--- a/fs/xfs/scrub/nlinks.c
+++ b/fs/xfs/scrub/nlinks.c
@@ -279,7 +279,7 @@ xchk_nlinks_collect_dirent(
* determine the backref count.
*/
if (dotdot) {
- if (dp == sc->mp->m_rootip)
+ if (xchk_inode_is_dirtree_root(dp))
error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
else if (!xfs_has_parent(sc->mp))
error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0);
@@ -735,7 +735,7 @@ xchk_nlinks_compare_inode(
}
}
- if (ip == sc->mp->m_rootip) {
+ if (xchk_inode_is_dirtree_root(ip)) {
/*
* For the root of a directory tree, both the '.' and '..'
* entries should point to the root directory. The dotdot
diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c
index b3e707f47b7b..4ebdee095428 100644
--- a/fs/xfs/scrub/nlinks_repair.c
+++ b/fs/xfs/scrub/nlinks_repair.c
@@ -60,11 +60,9 @@ xrep_nlinks_is_orphaned(
unsigned int actual_nlink,
const struct xchk_nlink *obs)
{
- struct xfs_mount *mp = ip->i_mount;
-
if (obs->parents != 0)
return false;
- if (ip == mp->m_rootip || ip == sc->orphanage)
+ if (xchk_inode_is_dirtree_root(ip) || ip == sc->orphanage)
return false;
return actual_nlink != 0;
}
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
index 7148d8362db8..c287c755f2c5 100644
--- a/fs/xfs/scrub/orphanage.c
+++ b/fs/xfs/scrub/orphanage.c
@@ -295,7 +295,9 @@ xrep_orphanage_can_adopt(
return false;
if (sc->ip == sc->orphanage)
return false;
- if (xfs_internal_inum(sc->mp, sc->ip->i_ino))
+ if (xchk_inode_is_sb_rooted(sc->ip))
+ return false;
+ if (xfs_is_internal_inode(sc->ip))
return false;
return true;
}
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 91e7b51ce068..3b692c4acc1e 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -132,6 +132,14 @@ xchk_parent_validate(
return 0;
}
+ /* Is this the metadata root dir? Then '..' must point to itself. */
+ if (sc->ip == mp->m_metadirip) {
+ if (sc->ip->i_ino != mp->m_sb.sb_metadirino ||
+ sc->ip->i_ino != parent_ino)
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+ return 0;
+ }
+
/* '..' must not point to ourselves. */
if (sc->ip->i_ino == parent_ino) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
@@ -185,6 +193,12 @@ xchk_parent_validate(
goto out_unlock;
}
+ /* Metadata and regular inodes cannot cross trees. */
+ if (xfs_is_metadir_inode(dp) != xfs_is_metadir_inode(sc->ip)) {
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+ goto out_unlock;
+ }
+
/* Look for a directory entry in the parent pointing to the child. */
error = xchk_dir_walk(sc, dp, xchk_parent_actor, &spc);
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
@@ -300,7 +314,7 @@ xchk_parent_pptr_and_dotdot(
}
/* Is this the root dir? Then '..' must point to itself. */
- if (sc->ip == sc->mp->m_rootip) {
+ if (xchk_inode_is_dirtree_root(sc->ip)) {
if (sc->ip->i_ino != pp->parent_ino)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
return 0;
@@ -711,7 +725,7 @@ xchk_parent_count_pptrs(
}
if (S_ISDIR(VFS_I(sc->ip)->i_mode)) {
- if (sc->ip == sc->mp->m_rootip)
+ if (xchk_inode_is_dirtree_root(sc->ip))
pp->pptrs_found++;
if (VFS_I(sc->ip)->i_nlink == 0 && pp->pptrs_found > 0)
@@ -720,6 +734,14 @@ xchk_parent_count_pptrs(
pp->pptrs_found == 0)
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
} else {
+ /*
+ * Starting with metadir, we allow checking of parent pointers
+ * of non-directory files that are children of the superblock.
+ * Pretend that we found a parent pointer attr.
+ */
+ if (xfs_has_metadir(sc->mp) && xchk_inode_is_sb_rooted(sc->ip))
+ pp->pptrs_found++;
+
if (VFS_I(sc->ip)->i_nlink != pp->pptrs_found)
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
}
@@ -885,10 +907,9 @@ bool
xchk_pptr_looks_zapped(
struct xfs_inode *ip)
{
- struct xfs_mount *mp = ip->i_mount;
struct inode *inode = VFS_I(ip);
- ASSERT(xfs_has_parent(mp));
+ ASSERT(xfs_has_parent(ip->i_mount));
/*
* Temporary files that cannot be linked into the directory tree do not
@@ -902,15 +923,15 @@ xchk_pptr_looks_zapped(
* of a parent pointer scan is always the empty set. It's safe to scan
* them even if the attr fork was zapped.
*/
- if (ip == mp->m_rootip)
+ if (xchk_inode_is_dirtree_root(ip))
return false;
/*
- * Metadata inodes are all rooted in the superblock and do not have
- * any parents. Hence the attr fork will not be initialized, but
- * there are no parent pointers that might have been zapped.
+ * Metadata inodes that are rooted in the superblock do not have any
+ * parents. Hence the attr fork will not be initialized, but there are
+ * no parent pointers that might have been zapped.
*/
- if (xfs_is_metadata_inode(ip))
+ if (xchk_inode_is_sb_rooted(ip))
return false;
/*
diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c
index 7b42b7f65a0b..31bfe10be22a 100644
--- a/fs/xfs/scrub/parent_repair.c
+++ b/fs/xfs/scrub/parent_repair.c
@@ -1334,7 +1334,7 @@ xrep_parent_rebuild_pptrs(
* so that we can decide if we're moving this file to the orphanage.
* For this purpose, root directories are their own parents.
*/
- if (sc->ip == sc->mp->m_rootip) {
+ if (xchk_inode_is_dirtree_root(sc->ip)) {
xrep_findparent_scan_found(&rp->pscan, sc->ip->i_ino);
} else {
error = xrep_parent_lookup_pptrs(sc, &parent_ino);
@@ -1354,21 +1354,40 @@ STATIC int
xrep_parent_rebuild_tree(
struct xrep_parent *rp)
{
+ struct xfs_scrub *sc = rp->sc;
+ bool try_adoption;
int error;
- if (xfs_has_parent(rp->sc->mp)) {
+ if (xfs_has_parent(sc->mp)) {
error = xrep_parent_rebuild_pptrs(rp);
if (error)
return error;
}
- if (rp->pscan.parent_ino == NULLFSINO) {
- if (xrep_orphanage_can_adopt(rp->sc))
+ /*
+ * Any file with no parent could be adopted. This check happens after
+ * rebuilding the parent pointer structure because we might have cycled
+ * the ILOCK during that process.
+ */
+ try_adoption = rp->pscan.parent_ino == NULLFSINO;
+
+ /*
+ * Starting with metadir, we allow checking of parent pointers
+ * of non-directory files that are children of the superblock.
+ * Lack of parent is ok here.
+ */
+ if (try_adoption && xfs_has_metadir(sc->mp) &&
+ xchk_inode_is_sb_rooted(sc->ip))
+ try_adoption = false;
+
+ if (try_adoption) {
+ if (xrep_orphanage_can_adopt(sc))
return xrep_parent_move_to_orphanage(rp);
return -EFSCORRUPTED;
+
}
- if (S_ISDIR(VFS_I(rp->sc->ip)->i_mode))
+ if (S_ISDIR(VFS_I(sc->ip)->i_mode))
return xrep_parent_reset_dotdot(rp);
return 0;
@@ -1422,6 +1441,14 @@ xrep_parent_set_nondir_nlink(
if (error)
return error;
+ /*
+ * Starting with metadir, we allow checking of parent pointers of
+ * non-directory files that are children of the superblock. Pretend
+ * that we found a parent pointer attr.
+ */
+ if (xfs_has_metadir(sc->mp) && xchk_inode_is_sb_rooted(sc->ip))
+ rp->parents++;
+
if (rp->parents > 0 && xfs_inode_on_unlinked_list(ip)) {
xfs_trans_ijoin(sc->tp, sc->ip, 0);
joined = true;
diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c
index c77eb2de8df7..dc4033b91e44 100644
--- a/fs/xfs/scrub/quotacheck.c
+++ b/fs/xfs/scrub/quotacheck.c
@@ -398,10 +398,13 @@ xqcheck_collect_inode(
bool isreg = S_ISREG(VFS_I(ip)->i_mode);
int error = 0;
- if (xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) {
+ if (xfs_is_metadir_inode(ip) ||
+ xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) {
/*
* Quota files are never counted towards quota, so we do not
- * need to take the lock.
+ * need to take the lock. Files do not switch between the
+ * metadata and regular directory trees without a reallocation,
+ * so we do not need to ILOCK them either.
*/
xchk_iscan_mark_visited(&xqc->iscan, ip);
return 0;
diff --git a/fs/xfs/scrub/reap.c b/fs/xfs/scrub/reap.c
index 53697f3c5e1b..08230952053b 100644
--- a/fs/xfs/scrub/reap.c
+++ b/fs/xfs/scrub/reap.c
@@ -137,7 +137,7 @@ xreap_put_freelist(
agfl_bp, agbno, 0);
if (error)
return error;
- xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1,
+ xfs_extent_busy_insert(sc->tp, pag_group(sc->sa.pag), agbno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD);
return 0;
@@ -263,7 +263,6 @@ xreap_agextent_binval(
struct xfs_scrub *sc = rs->sc;
struct xfs_perag *pag = sc->sa.pag;
struct xfs_mount *mp = sc->mp;
- xfs_agnumber_t agno = sc->sa.pag->pag_agno;
xfs_agblock_t agbno_next = agbno + *aglenp;
xfs_agblock_t bno = agbno;
@@ -284,7 +283,7 @@ xreap_agextent_binval(
*/
while (bno < agbno_next) {
struct xrep_bufscan scan = {
- .daddr = XFS_AGB_TO_DADDR(mp, agno, bno),
+ .daddr = xfs_agbno_to_daddr(pag, bno),
.max_sectors = xrep_bufscan_max_sectors(mp,
agbno_next - bno),
.daddr_step = XFS_FSB_TO_BB(mp, 1),
@@ -391,7 +390,7 @@ xreap_agextent_iter(
xfs_fsblock_t fsbno;
int error = 0;
- fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, agbno);
+ fsbno = xfs_agbno_to_fsb(sc->sa.pag, agbno);
/*
* If there are other rmappings, this block is cross linked and must
@@ -780,7 +779,6 @@ xreap_bmapi_binval(
xfs_fileoff_t off;
xfs_fileoff_t max_off;
xfs_extlen_t scan_blocks;
- xfs_agnumber_t agno = sc->sa.pag->pag_agno;
xfs_agblock_t bno;
xfs_agblock_t agbno;
xfs_agblock_t agbno_next;
@@ -837,7 +835,7 @@ xreap_bmapi_binval(
*/
while (bno < agbno_next) {
struct xrep_bufscan scan = {
- .daddr = XFS_AGB_TO_DADDR(mp, agno, bno),
+ .daddr = xfs_agbno_to_daddr(pag, bno),
.max_sectors = xrep_bufscan_max_sectors(mp,
scan_blocks),
.daddr_step = XFS_FSB_TO_BB(mp, 1),
diff --git a/fs/xfs/scrub/refcount.c b/fs/xfs/scrub/refcount.c
index d0c7d4a29c0f..2b6be75e9424 100644
--- a/fs/xfs/scrub/refcount.c
+++ b/fs/xfs/scrub/refcount.c
@@ -453,7 +453,8 @@ xchk_refcountbt_rec(
struct xchk_refcbt_records *rrc = bs->private;
xfs_refcount_btrec_to_irec(rec, &irec);
- if (xfs_refcount_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) {
+ if (xfs_refcount_check_irec(to_perag(bs->cur->bc_group), &irec) !=
+ NULL) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return 0;
}
diff --git a/fs/xfs/scrub/refcount_repair.c b/fs/xfs/scrub/refcount_repair.c
index a00d7ce7ae5b..4e572b81c986 100644
--- a/fs/xfs/scrub/refcount_repair.c
+++ b/fs/xfs/scrub/refcount_repair.c
@@ -215,7 +215,7 @@ xrep_refc_rmap_shareable(
return false;
/* Metadata in files are never shareable */
- if (xfs_internal_inum(mp, rmap->rm_owner))
+ if (xfs_is_sb_inum(mp, rmap->rm_owner))
return false;
/* Metadata and unwritten file blocks are not shareable. */
@@ -590,7 +590,6 @@ xrep_refc_build_new_tree(
struct xfs_scrub *sc = rr->sc;
struct xfs_btree_cur *refc_cur;
struct xfs_perag *pag = sc->sa.pag;
- xfs_fsblock_t fsbno;
int error;
error = xrep_refc_sort_records(rr);
@@ -603,8 +602,8 @@ xrep_refc_build_new_tree(
* to root the new btree while it's under construction and before we
* attach it to the AG header.
*/
- fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, xfs_refc_block(sc->mp));
- xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_REFC, fsbno,
+ xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_REFC,
+ xfs_agbno_to_fsb(pag, xfs_refc_block(sc->mp)),
XFS_AG_RESV_METADATA);
rr->new_btree.bload.get_records = xrep_refc_get_records;
rr->new_btree.bload.claim_block = xrep_refc_claim_block;
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 155bbaaa496e..91c8bc055a4f 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -21,6 +21,7 @@
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
#include "xfs_refcount_btree.h"
+#include "xfs_rtbitmap.h"
#include "xfs_extent_busy.h"
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
@@ -305,7 +306,7 @@ xrep_calc_ag_resblks(
/* Now grab the block counters from the AGF. */
error = xfs_alloc_read_agf(pag, NULL, 0, &bp);
if (error) {
- aglen = pag->block_count;
+ aglen = pag_group(pag)->xg_block_count;
freelen = aglen;
usedlen = aglen;
} else {
@@ -325,16 +326,14 @@ xrep_calc_ag_resblks(
/* If the block counts are impossible, make worst-case assumptions. */
if (aglen == NULLAGBLOCK ||
- aglen != pag->block_count ||
+ aglen != pag_group(pag)->xg_block_count ||
freelen >= aglen) {
- aglen = pag->block_count;
+ aglen = pag_group(pag)->xg_block_count;
freelen = aglen;
usedlen = aglen;
}
- xfs_perag_put(pag);
- trace_xrep_calc_ag_resblks(mp, sm->sm_agno, icount, aglen,
- freelen, usedlen);
+ trace_xrep_calc_ag_resblks(pag, icount, aglen, freelen, usedlen);
/*
* Figure out how many blocks we'd need worst case to rebuild
@@ -372,8 +371,9 @@ xrep_calc_ag_resblks(
rmapbt_sz = 0;
}
- trace_xrep_calc_ag_resblks_btsize(mp, sm->sm_agno, bnobt_sz,
- inobt_sz, rmapbt_sz, refcbt_sz);
+ trace_xrep_calc_ag_resblks_btsize(pag, bnobt_sz, inobt_sz, rmapbt_sz,
+ refcbt_sz);
+ xfs_perag_put(pag);
return max(max(bnobt_sz, inobt_sz), max(rmapbt_sz, refcbt_sz));
}
@@ -414,7 +414,7 @@ xrep_fix_freelist(
args.mp = sc->mp;
args.tp = sc->tp;
- args.agno = sc->sa.pag->pag_agno;
+ args.agno = pag_agno(sc->sa.pag);
args.alignment = 1;
args.pag = sc->sa.pag;
@@ -483,7 +483,7 @@ xrep_findroot_block(
int block_level;
int error = 0;
- daddr = XFS_AGB_TO_DADDR(mp, ri->sc->sa.pag->pag_agno, agbno);
+ daddr = xfs_agbno_to_daddr(ri->sc->sa.pag, agbno);
/*
* Blocks in the AGFL have stale contents that might just happen to
@@ -612,7 +612,7 @@ xrep_findroot_block(
else
fab->root = NULLAGBLOCK;
- trace_xrep_findroot_block(mp, ri->sc->sa.pag->pag_agno, agbno,
+ trace_xrep_findroot_block(ri->sc->sa.pag, agbno,
be32_to_cpu(btblock->bb_magic), fab->height - 1);
out:
xfs_trans_brelse(ri->sc->tp, bp);
@@ -953,6 +953,29 @@ xrep_ag_init(
return 0;
}
+#ifdef CONFIG_XFS_RT
+/*
+ * Given a reference to a rtgroup structure, lock rtgroup btree inodes and
+ * create btree cursors. Must only be called to repair a regular rt file.
+ */
+int
+xrep_rtgroup_init(
+ struct xfs_scrub *sc,
+ struct xfs_rtgroup *rtg,
+ struct xchk_rt *sr,
+ unsigned int rtglock_flags)
+{
+ ASSERT(sr->rtg == NULL);
+
+ xfs_rtgroup_lock(rtg, rtglock_flags);
+ sr->rtlock_flags = rtglock_flags;
+
+ /* Grab our own passive reference from the caller's ref. */
+ sr->rtg = xfs_rtgroup_hold(rtg);
+ return 0;
+}
+#endif /* CONFIG_XFS_RT */
+
/* Reinitialize the per-AG block reservation for the AG we just fixed. */
int
xrep_reset_perag_resv(
@@ -973,7 +996,7 @@ xrep_reset_perag_resv(
if (error == -ENOSPC) {
xfs_err(sc->mp,
"Insufficient free space to reset per-AG reservation for AG %u after repair.",
- sc->sa.pag->pag_agno);
+ pag_agno(sc->sa.pag));
error = 0;
}
@@ -1083,7 +1106,12 @@ xrep_metadata_inode_forks(
if (error)
return error;
- /* Make sure the attr fork looks ok before we delete it. */
+ /*
+ * Metadata files can only have extended attributes on metadir
+ * filesystems, either for parent pointers or for actual xattr data.
+ * For a non-metadir filesystem, make sure the attr fork looks ok
+ * before we delete it.
+ */
if (xfs_inode_hasattr(sc->ip)) {
error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
if (error)
@@ -1099,8 +1127,11 @@ xrep_metadata_inode_forks(
return error;
}
- /* Clear the attr forks since metadata shouldn't have that. */
- if (xfs_inode_hasattr(sc->ip)) {
+ /*
+ * Metadata files on non-metadir filesystems cannot have attr forks,
+ * so clear them now.
+ */
+ if (xfs_inode_hasattr(sc->ip) && !xfs_has_metadir(sc->mp)) {
if (!dirty) {
dirty = true;
xfs_trans_ijoin(sc->tp, sc->ip, 0);
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 0e0dc2bf985c..b649da1a93eb 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -8,6 +8,7 @@
#include "xfs_quota_defs.h"
+struct xfs_rtgroup;
struct xchk_stats_run;
static inline int xrep_notsupported(struct xfs_scrub *sc)
@@ -106,6 +107,12 @@ int xrep_setup_inode(struct xfs_scrub *sc, const struct xfs_imap *imap);
void xrep_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa);
int xrep_ag_init(struct xfs_scrub *sc, struct xfs_perag *pag,
struct xchk_ag *sa);
+#ifdef CONFIG_XFS_RT
+int xrep_rtgroup_init(struct xfs_scrub *sc, struct xfs_rtgroup *rtg,
+ struct xchk_rt *sr, unsigned int rtglock_flags);
+#else
+# define xrep_rtgroup_init(sc, rtg, sr, lockflags) (-ENOSYS)
+#endif /* CONFIG_XFS_RT */
/* Metadata revalidators */
@@ -134,13 +141,16 @@ int xrep_directory(struct xfs_scrub *sc);
int xrep_parent(struct xfs_scrub *sc);
int xrep_symlink(struct xfs_scrub *sc);
int xrep_dirtree(struct xfs_scrub *sc);
+int xrep_metapath(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xrep_rtbitmap(struct xfs_scrub *sc);
int xrep_rtsummary(struct xfs_scrub *sc);
+int xrep_rgsuperblock(struct xfs_scrub *sc);
#else
# define xrep_rtbitmap xrep_notsupported
# define xrep_rtsummary xrep_notsupported
+# define xrep_rgsuperblock xrep_notsupported
#endif /* CONFIG_XFS_RT */
#ifdef CONFIG_XFS_QUOTA
@@ -208,6 +218,7 @@ xrep_setup_nothing(
#define xrep_setup_parent xrep_setup_nothing
#define xrep_setup_nlinks xrep_setup_nothing
#define xrep_setup_dirtree xrep_setup_nothing
+#define xrep_setup_metapath xrep_setup_nothing
#define xrep_setup_inode(sc, imap) ((void)0)
@@ -243,6 +254,8 @@ static inline int xrep_setup_symlink(struct xfs_scrub *sc, unsigned int *x)
#define xrep_parent xrep_notsupported
#define xrep_symlink xrep_notsupported
#define xrep_dirtree xrep_notsupported
+#define xrep_metapath xrep_notsupported
+#define xrep_rgsuperblock xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rgsuper.c b/fs/xfs/scrub/rgsuper.c
new file mode 100644
index 000000000000..463b3573bb76
--- /dev/null
+++ b/fs/xfs/scrub/rgsuper.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2022-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_rtgroup.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/repair.h"
+
+/* Set us up with a transaction and an empty context. */
+int
+xchk_setup_rgsuperblock(
+ struct xfs_scrub *sc)
+{
+ return xchk_trans_alloc(sc, 0);
+}
+
+/* Cross-reference with the other rt metadata. */
+STATIC void
+xchk_rgsuperblock_xref(
+ struct xfs_scrub *sc)
+{
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return;
+
+ xchk_xref_is_used_rt_space(sc, xfs_rgbno_to_rtb(sc->sr.rtg, 0), 1);
+}
+
+int
+xchk_rgsuperblock(
+ struct xfs_scrub *sc)
+{
+ xfs_rgnumber_t rgno = sc->sm->sm_agno;
+ int error;
+
+ /*
+ * Only rtgroup 0 has a superblock. We may someday want to use higher
+ * rgno for other functions, similar to what we do with the primary
+ * super scrub function.
+ */
+ if (rgno != 0)
+ return -ENOENT;
+
+ /*
+ * Grab an active reference to the rtgroup structure. If we can't get
+ * it, we're racing with something that's tearing down the group, so
+ * signal that the group no longer exists. Take the rtbitmap in shared
+ * mode so that the group can't change while we're doing things.
+ */
+ error = xchk_rtgroup_init_existing(sc, rgno, &sc->sr);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+
+ xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP_SHARED);
+
+ /*
+ * Since we already validated the rt superblock at mount time, we don't
+ * need to check its contents again. All we need is to cross-reference.
+ */
+ xchk_rgsuperblock_xref(sc);
+ return 0;
+}
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+int
+xrep_rgsuperblock(
+ struct xfs_scrub *sc)
+{
+ ASSERT(rtg_rgno(sc->sr.rtg) == 0);
+
+ xfs_log_sb(sc->tp);
+ return 0;
+}
+#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/rmap.c b/fs/xfs/scrub/rmap.c
index ba5bbc3fb754..39e9ad7cd8ae 100644
--- a/fs/xfs/scrub/rmap.c
+++ b/fs/xfs/scrub/rmap.c
@@ -358,7 +358,7 @@ xchk_rmapbt_rec(
struct xfs_rmap_irec irec;
if (xfs_rmap_btrec_to_irec(rec, &irec) != NULL ||
- xfs_rmap_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) {
+ xfs_rmap_check_irec(to_perag(bs->cur->bc_group), &irec) != NULL) {
xchk_btree_set_corrupt(bs->sc, bs->cur, 0);
return 0;
}
@@ -410,7 +410,7 @@ xchk_rmapbt_walk_ag_metadata(
goto out;
/* OWN_LOG: Internal log */
- if (xfs_ag_contains_log(mp, sc->sa.pag->pag_agno)) {
+ if (xfs_ag_contains_log(mp, pag_agno(sc->sa.pag))) {
error = xagb_bitmap_set(&cr->log_owned,
XFS_FSB_TO_AGBNO(mp, mp->m_sb.sb_logstart),
mp->m_sb.sb_logblocks);
diff --git a/fs/xfs/scrub/rmap_repair.c b/fs/xfs/scrub/rmap_repair.c
index e8080eba37d2..a0a227d183d2 100644
--- a/fs/xfs/scrub/rmap_repair.c
+++ b/fs/xfs/scrub/rmap_repair.c
@@ -231,7 +231,7 @@ xrep_rmap_stash(
if (xchk_iscan_aborted(&rr->iscan))
return -EFSCORRUPTED;
- trace_xrep_rmap_found(sc->mp, sc->sa.pag->pag_agno, &rmap);
+ trace_xrep_rmap_found(sc->sa.pag, &rmap);
mutex_lock(&rr->lock);
mcur = xfs_rmapbt_mem_cursor(sc->sa.pag, sc->tp, &rr->rmap_btree);
@@ -344,7 +344,7 @@ xrep_rmap_visit_bmbt(
int error;
if (XFS_FSB_TO_AGNO(mp, rec->br_startblock) !=
- rf->rr->sc->sa.pag->pag_agno)
+ pag_agno(rf->rr->sc->sa.pag))
return 0;
agbno = XFS_FSB_TO_AGBNO(mp, rec->br_startblock);
@@ -391,7 +391,7 @@ xrep_rmap_visit_iroot_btree_block(
return 0;
fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, xfs_buf_daddr(bp));
- if (XFS_FSB_TO_AGNO(cur->bc_mp, fsbno) != rf->rr->sc->sa.pag->pag_agno)
+ if (XFS_FSB_TO_AGNO(cur->bc_mp, fsbno) != pag_agno(rf->rr->sc->sa.pag))
return 0;
agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsbno);
@@ -622,7 +622,7 @@ xrep_rmap_walk_inobt(
return error;
xfs_inobt_btrec_to_irec(mp, rec, &irec);
- if (xfs_inobt_check_irec(cur->bc_ag.pag, &irec) != NULL)
+ if (xfs_inobt_check_irec(to_perag(cur->bc_group), &irec) != NULL)
return -EFSCORRUPTED;
agino = irec.ir_startino;
@@ -801,7 +801,7 @@ xrep_rmap_find_log_rmaps(
{
struct xfs_scrub *sc = rr->sc;
- if (!xfs_ag_contains_log(sc->mp, sc->sa.pag->pag_agno))
+ if (!xfs_ag_contains_log(sc->mp, pag_agno(sc->sa.pag)))
return 0;
return xrep_rmap_stash(rr,
@@ -976,7 +976,7 @@ xrep_rmap_try_reserve(
{
struct xrep_rmap_agfl ra = {
.bitmap = freesp_blocks,
- .agno = rr->sc->sa.pag->pag_agno,
+ .agno = pag_agno(rr->sc->sa.pag),
};
struct xfs_scrub *sc = rr->sc;
struct xrep_newbt_resv *resv, *n;
@@ -1272,7 +1272,6 @@ xrep_rmap_build_new_tree(
struct xfs_perag *pag = sc->sa.pag;
struct xfs_agf *agf = sc->sa.agf_bp->b_addr;
struct xfs_btree_cur *rmap_cur;
- xfs_fsblock_t fsbno;
int error;
/*
@@ -1290,9 +1289,9 @@ xrep_rmap_build_new_tree(
* rmapbt per-AG reservation, which we will adjust further after
* committing the new btree.
*/
- fsbno = XFS_AGB_TO_FSB(sc->mp, pag->pag_agno, XFS_RMAP_BLOCK(sc->mp));
xrep_newbt_init_ag(&rr->new_btree, sc, &XFS_RMAP_OINFO_SKIP_UPDATE,
- fsbno, XFS_AG_RESV_RMAPBT);
+ xfs_agbno_to_fsb(pag, XFS_RMAP_BLOCK(sc->mp)),
+ XFS_AG_RESV_RMAPBT);
rr->new_btree.bload.get_records = xrep_rmap_get_records;
rr->new_btree.bload.claim_block = xrep_rmap_claim_block;
rr->new_btree.alloc_vextent = xrep_rmap_alloc_vextent;
@@ -1553,7 +1552,7 @@ xrep_rmapbt_live_update(
if (!xrep_rmapbt_want_live_update(&rr->iscan, &p->oinfo))
goto out_unlock;
- trace_xrep_rmap_live_update(mp, rr->sc->sa.pag->pag_agno, action, p);
+ trace_xrep_rmap_live_update(rr->sc->sa.pag, action, p);
error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp);
if (error)
@@ -1597,7 +1596,7 @@ xrep_rmap_setup_scan(
/* Set up in-memory rmap btree */
error = xfs_rmapbt_mem_init(sc->mp, &rr->rmap_btree, sc->xmbtp,
- sc->sa.pag->pag_agno);
+ pag_agno(sc->sa.pag));
if (error)
goto out_mutex;
@@ -1612,7 +1611,7 @@ xrep_rmap_setup_scan(
*/
ASSERT(sc->flags & XCHK_FSGATES_RMAP);
xfs_rmap_hook_setup(&rr->rhook, xrep_rmapbt_live_update);
- error = xfs_rmap_hook_add(sc->sa.pag, &rr->rhook);
+ error = xfs_rmap_hook_add(pag_group(sc->sa.pag), &rr->rhook);
if (error)
goto out_iscan;
return 0;
@@ -1633,7 +1632,7 @@ xrep_rmap_teardown(
struct xfs_scrub *sc = rr->sc;
xchk_iscan_abort(&rr->iscan);
- xfs_rmap_hook_del(sc->sa.pag, &rr->rhook);
+ xfs_rmap_hook_del(pag_group(sc->sa.pag), &rr->rhook);
xchk_iscan_teardown(&rr->iscan);
xfbtree_destroy(&rr->rmap_btree);
mutex_destroy(&rr->lock);
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 46583517377f..376a36fd9a9c 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -35,6 +35,10 @@ xchk_setup_rtbitmap(
return -ENOMEM;
sc->buf = rtb;
+ error = xchk_rtgroup_init(sc, sc->sm->sm_agno, &sc->sr);
+ if (error)
+ return error;
+
if (xchk_could_repair(sc)) {
error = xrep_setup_rtbitmap(sc, rtb);
if (error)
@@ -45,7 +49,8 @@ xchk_setup_rtbitmap(
if (error)
return error;
- error = xchk_install_live_inode(sc, sc->mp->m_rbmip);
+ error = xchk_install_live_inode(sc,
+ sc->sr.rtg->rtg_inodes[XFS_RTGI_BITMAP]);
if (error)
return error;
@@ -53,18 +58,18 @@ xchk_setup_rtbitmap(
if (error)
return error;
- xchk_ilock(sc, XFS_ILOCK_EXCL | XFS_ILOCK_RTBITMAP);
-
/*
* Now that we've locked the rtbitmap, we can't race with growfsrt
* trying to expand the bitmap or change the size of the rt volume.
* Hence it is safe to compute and check the geometry values.
*/
+ xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP);
if (mp->m_sb.sb_rblocks) {
- rtb->rextents = xfs_rtb_to_rtx(mp, mp->m_sb.sb_rblocks);
+ rtb->rextents = xfs_blen_to_rtbxlen(mp, mp->m_sb.sb_rblocks);
rtb->rextslog = xfs_compute_rextslog(rtb->rextents);
- rtb->rbmblocks = xfs_rtbitmap_blockcount(mp, rtb->rextents);
+ rtb->rbmblocks = xfs_rtbitmap_blockcount(mp);
}
+
return 0;
}
@@ -73,7 +78,7 @@ xchk_setup_rtbitmap(
/* Scrub a free extent record from the realtime bitmap. */
STATIC int
xchk_rtbitmap_rec(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
const struct xfs_rtalloc_rec *rec,
void *priv)
@@ -82,10 +87,10 @@ xchk_rtbitmap_rec(
xfs_rtblock_t startblock;
xfs_filblks_t blockcount;
- startblock = xfs_rtx_to_rtb(mp, rec->ar_startext);
- blockcount = xfs_rtx_to_rtb(mp, rec->ar_extcount);
+ startblock = xfs_rtx_to_rtb(rtg, rec->ar_startext);
+ blockcount = xfs_rtxlen_to_extlen(rtg_mount(rtg), rec->ar_extcount);
- if (!xfs_verify_rtbext(mp, startblock, blockcount))
+ if (!xfs_verify_rtbext(rtg_mount(rtg), startblock, blockcount))
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
return 0;
}
@@ -140,18 +145,20 @@ xchk_rtbitmap(
struct xfs_scrub *sc)
{
struct xfs_mount *mp = sc->mp;
+ struct xfs_rtgroup *rtg = sc->sr.rtg;
+ struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
struct xchk_rtbitmap *rtb = sc->buf;
int error;
/* Is sb_rextents correct? */
if (mp->m_sb.sb_rextents != rtb->rextents) {
- xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
+ xchk_ino_set_corrupt(sc, rbmip->i_ino);
return 0;
}
/* Is sb_rextslog correct? */
if (mp->m_sb.sb_rextslog != rtb->rextslog) {
- xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
+ xchk_ino_set_corrupt(sc, rbmip->i_ino);
return 0;
}
@@ -160,17 +167,17 @@ xchk_rtbitmap(
* case can we exceed 4bn bitmap blocks since the super field is a u32.
*/
if (rtb->rbmblocks > U32_MAX) {
- xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
+ xchk_ino_set_corrupt(sc, rbmip->i_ino);
return 0;
}
if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks) {
- xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
+ xchk_ino_set_corrupt(sc, rbmip->i_ino);
return 0;
}
/* The bitmap file length must be aligned to an fsblock. */
- if (mp->m_rbmip->i_disk_size & mp->m_blockmask) {
- xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
+ if (rbmip->i_disk_size & mp->m_blockmask) {
+ xchk_ino_set_corrupt(sc, rbmip->i_ino);
return 0;
}
@@ -179,8 +186,8 @@ xchk_rtbitmap(
* growfsrt expands the bitmap file before updating sb_rextents, so the
* file can be larger than sb_rbmblocks.
*/
- if (mp->m_rbmip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks)) {
- xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
+ if (rbmip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks)) {
+ xchk_ino_set_corrupt(sc, rbmip->i_ino);
return 0;
}
@@ -193,7 +200,7 @@ xchk_rtbitmap(
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
return error;
- error = xfs_rtalloc_query_all(mp, sc->tp, xchk_rtbitmap_rec, sc);
+ error = xfs_rtalloc_query_all(rtg, sc->tp, xchk_rtbitmap_rec, sc);
if (!xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error))
return error;
@@ -207,6 +214,8 @@ xchk_xref_is_used_rt_space(
xfs_rtblock_t rtbno,
xfs_extlen_t len)
{
+ struct xfs_rtgroup *rtg = sc->sr.rtg;
+ struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
xfs_rtxnum_t startext;
xfs_rtxnum_t endext;
bool is_free;
@@ -217,13 +226,10 @@ xchk_xref_is_used_rt_space(
startext = xfs_rtb_to_rtx(sc->mp, rtbno);
endext = xfs_rtb_to_rtx(sc->mp, rtbno + len - 1);
- xfs_ilock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
- error = xfs_rtalloc_extent_is_free(sc->mp, sc->tp, startext,
+ error = xfs_rtalloc_extent_is_free(rtg, sc->tp, startext,
endext - startext + 1, &is_free);
if (!xchk_should_check_xref(sc, &error, NULL))
- goto out_unlock;
+ return;
if (is_free)
- xchk_ino_xref_set_corrupt(sc, sc->mp->m_rbmip->i_ino);
-out_unlock:
- xfs_iunlock(sc->mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
+ xchk_ino_xref_set_corrupt(sc, rbmip->i_ino);
}
diff --git a/fs/xfs/scrub/rtsummary.c b/fs/xfs/scrub/rtsummary.c
index 7c7366c98338..49fc6250bafc 100644
--- a/fs/xfs/scrub/rtsummary.c
+++ b/fs/xfs/scrub/rtsummary.c
@@ -18,6 +18,7 @@
#include "xfs_bmap.h"
#include "xfs_sb.h"
#include "xfs_exchmaps.h"
+#include "xfs_rtgroup.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -46,12 +47,19 @@ xchk_setup_rtsummary(
struct xchk_rtsummary *rts;
int error;
+ if (xchk_need_intent_drain(sc))
+ xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN);
+
rts = kvzalloc(struct_size(rts, words, mp->m_blockwsize),
XCHK_GFP_FLAGS);
if (!rts)
return -ENOMEM;
sc->buf = rts;
+ error = xchk_rtgroup_init(sc, sc->sm->sm_agno, &sc->sr);
+ if (error)
+ return error;
+
if (xchk_could_repair(sc)) {
error = xrep_setup_rtsummary(sc, rts);
if (error)
@@ -73,7 +81,8 @@ xchk_setup_rtsummary(
if (error)
return error;
- error = xchk_install_live_inode(sc, mp->m_rsumip);
+ error = xchk_install_live_inode(sc,
+ sc->sr.rtg->rtg_inodes[XFS_RTGI_SUMMARY]);
if (error)
return error;
@@ -82,29 +91,23 @@ xchk_setup_rtsummary(
return error;
/*
- * Locking order requires us to take the rtbitmap first. We must be
- * careful to unlock it ourselves when we are done with the rtbitmap
- * file since the scrub infrastructure won't do that for us. Only
- * then we can lock the rtsummary inode.
- */
- xfs_ilock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
- xchk_ilock(sc, XFS_ILOCK_EXCL | XFS_ILOCK_RTSUM);
-
- /*
* Now that we've locked the rtbitmap and rtsummary, we can't race with
* growfsrt trying to expand the summary or change the size of the rt
* volume. Hence it is safe to compute and check the geometry values.
+ *
+ * Note that there is no strict requirement for an exclusive lock on the
+ * summary here, but to keep the locking APIs simple we lock both inodes
+ * exclusively here. If we ever start caring about running concurrent
+ * fsmap with scrub this could be changed.
*/
+ xchk_rtgroup_lock(&sc->sr, XFS_RTGLOCK_BITMAP);
if (mp->m_sb.sb_rblocks) {
- int rextslog;
-
- rts->rextents = xfs_rtb_to_rtx(mp, mp->m_sb.sb_rblocks);
- rextslog = xfs_compute_rextslog(rts->rextents);
- rts->rsumlevels = rextslog + 1;
- rts->rbmblocks = xfs_rtbitmap_blockcount(mp, rts->rextents);
- rts->rsumblocks = xfs_rtsummary_blockcount(mp, rts->rsumlevels,
- rts->rbmblocks);
+ rts->rextents = xfs_blen_to_rtbxlen(mp, mp->m_sb.sb_rblocks);
+ rts->rbmblocks = xfs_rtbitmap_blockcount(mp);
+ rts->rsumblocks =
+ xfs_rtsummary_blockcount(mp, &rts->rsumlevels);
}
+
return 0;
}
@@ -148,6 +151,11 @@ xchk_rtsum_inc(
struct xfs_mount *mp,
union xfs_suminfo_raw *v)
{
+ if (xfs_has_rtgroups(mp)) {
+ be32_add_cpu(&v->rtg, 1);
+ return be32_to_cpu(v->rtg);
+ }
+
v->old += 1;
return v->old;
}
@@ -155,11 +163,12 @@ xchk_rtsum_inc(
/* Update the summary file to reflect the free extent that we've accumulated. */
STATIC int
xchk_rtsum_record_free(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
const struct xfs_rtalloc_rec *rec,
void *priv)
{
+ struct xfs_mount *mp = rtg_mount(rtg);
struct xfs_scrub *sc = priv;
xfs_fileoff_t rbmoff;
xfs_rtblock_t rtbno;
@@ -178,11 +187,12 @@ xchk_rtsum_record_free(
lenlog = xfs_highbit64(rec->ar_extcount);
offs = xfs_rtsumoffs(mp, lenlog, rbmoff);
- rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
- rtlen = xfs_rtx_to_rtb(mp, rec->ar_extcount);
+ rtbno = xfs_rtx_to_rtb(rtg, rec->ar_startext);
+ rtlen = xfs_rtxlen_to_extlen(mp, rec->ar_extcount);
if (!xfs_verify_rtbext(mp, rtbno, rtlen)) {
- xchk_ino_xref_set_corrupt(sc, mp->m_rbmip->i_ino);
+ xchk_ino_xref_set_corrupt(sc,
+ rtg->rtg_inodes[XFS_RTGI_BITMAP]->i_ino);
return -EFSCORRUPTED;
}
@@ -204,15 +214,14 @@ xchk_rtsum_compute(
struct xfs_scrub *sc)
{
struct xfs_mount *mp = sc->mp;
- unsigned long long rtbmp_blocks;
+ struct xfs_rtgroup *rtg = sc->sr.rtg;
/* If the bitmap size doesn't match the computed size, bail. */
- rtbmp_blocks = xfs_rtbitmap_blockcount(mp, mp->m_sb.sb_rextents);
- if (XFS_FSB_TO_B(mp, rtbmp_blocks) != mp->m_rbmip->i_disk_size)
+ if (XFS_FSB_TO_B(mp, xfs_rtbitmap_blockcount(mp)) !=
+ rtg->rtg_inodes[XFS_RTGI_BITMAP]->i_disk_size)
return -EFSCORRUPTED;
- return xfs_rtalloc_query_all(sc->mp, sc->tp, xchk_rtsum_record_free,
- sc);
+ return xfs_rtalloc_query_all(rtg, sc->tp, xchk_rtsum_record_free, sc);
}
/* Compare the rtsummary file against the one we computed. */
@@ -231,8 +240,9 @@ xchk_rtsum_compare(
xfs_rtsumoff_t sumoff = 0;
int error = 0;
- rts->args.mp = sc->mp;
+ rts->args.mp = mp;
rts->args.tp = sc->tp;
+ rts->args.rtg = sc->sr.rtg;
/* Mappings may not cross or lie beyond EOF. */
endoff = XFS_B_TO_FSB(mp, ip->i_disk_size);
@@ -299,31 +309,34 @@ xchk_rtsummary(
struct xfs_scrub *sc)
{
struct xfs_mount *mp = sc->mp;
+ struct xfs_rtgroup *rtg = sc->sr.rtg;
+ struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
+ struct xfs_inode *rsumip = rtg->rtg_inodes[XFS_RTGI_SUMMARY];
struct xchk_rtsummary *rts = sc->buf;
- int error = 0;
+ int error;
/* Is sb_rextents correct? */
if (mp->m_sb.sb_rextents != rts->rextents) {
- xchk_ino_set_corrupt(sc, mp->m_rbmip->i_ino);
- goto out_rbm;
+ xchk_ino_set_corrupt(sc, rbmip->i_ino);
+ return 0;
}
/* Is m_rsumlevels correct? */
if (mp->m_rsumlevels != rts->rsumlevels) {
- xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
- goto out_rbm;
+ xchk_ino_set_corrupt(sc, rsumip->i_ino);
+ return 0;
}
/* Is m_rsumsize correct? */
if (mp->m_rsumblocks != rts->rsumblocks) {
- xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
- goto out_rbm;
+ xchk_ino_set_corrupt(sc, rsumip->i_ino);
+ return 0;
}
/* The summary file length must be aligned to an fsblock. */
- if (mp->m_rsumip->i_disk_size & mp->m_blockmask) {
- xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
- goto out_rbm;
+ if (rsumip->i_disk_size & mp->m_blockmask) {
+ xchk_ino_set_corrupt(sc, rsumip->i_ino);
+ return 0;
}
/*
@@ -331,15 +344,15 @@ xchk_rtsummary(
* growfsrt expands the summary file before updating sb_rextents, so
* the file can be larger than rsumsize.
*/
- if (mp->m_rsumip->i_disk_size < XFS_FSB_TO_B(mp, rts->rsumblocks)) {
- xchk_ino_set_corrupt(sc, mp->m_rsumip->i_ino);
- goto out_rbm;
+ if (rsumip->i_disk_size < XFS_FSB_TO_B(mp, rts->rsumblocks)) {
+ xchk_ino_set_corrupt(sc, rsumip->i_ino);
+ return 0;
}
/* Invoke the fork scrubber. */
error = xchk_metadata_inode_forks(sc);
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
- goto out_rbm;
+ return error;
/* Construct the new summary file from the rtbitmap. */
error = xchk_rtsum_compute(sc);
@@ -348,23 +361,12 @@ xchk_rtsummary(
* EFSCORRUPTED means the rtbitmap is corrupt, which is an xref
* error since we're checking the summary file.
*/
- xchk_ino_xref_set_corrupt(sc, mp->m_rbmip->i_ino);
- error = 0;
- goto out_rbm;
+ xchk_ino_set_corrupt(sc, rbmip->i_ino);
+ return 0;
}
if (error)
- goto out_rbm;
+ return error;
/* Does the computed summary file match the actual rtsummary file? */
- error = xchk_rtsum_compare(sc);
-
-out_rbm:
- /*
- * Unlock the rtbitmap since we're done with it. All other writers of
- * the rt free space metadata grab the bitmap and summary ILOCKs in
- * that order, so we're still protected against allocation activities
- * even if we continue on to the repair function.
- */
- xfs_iunlock(mp->m_rbmip, XFS_ILOCK_SHARED | XFS_ILOCK_RTBITMAP);
- return error;
+ return xchk_rtsum_compare(sc);
}
diff --git a/fs/xfs/scrub/rtsummary_repair.c b/fs/xfs/scrub/rtsummary_repair.c
index 7deeb948cb70..8198ea84ad70 100644
--- a/fs/xfs/scrub/rtsummary_repair.c
+++ b/fs/xfs/scrub/rtsummary_repair.c
@@ -76,18 +76,30 @@ xrep_rtsummary_prep_buf(
union xfs_suminfo_raw *ondisk;
int error;
- rts->args.mp = sc->mp;
+ rts->args.mp = mp;
rts->args.tp = sc->tp;
+ rts->args.rtg = sc->sr.rtg;
rts->args.sumbp = bp;
ondisk = xfs_rsumblock_infoptr(&rts->args, 0);
rts->args.sumbp = NULL;
- bp->b_ops = &xfs_rtbuf_ops;
-
error = xfsum_copyout(sc, rts->prep_wordoff, ondisk, mp->m_blockwsize);
if (error)
return error;
+ if (xfs_has_rtgroups(sc->mp)) {
+ struct xfs_rtbuf_blkinfo *hdr = bp->b_addr;
+
+ hdr->rt_magic = cpu_to_be32(XFS_RTSUMMARY_MAGIC);
+ hdr->rt_owner = cpu_to_be64(sc->ip->i_ino);
+ hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp));
+ hdr->rt_lsn = 0;
+ uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid);
+ bp->b_ops = &xfs_rtsummary_buf_ops;
+ } else {
+ bp->b_ops = &xfs_rtbuf_ops;
+ }
+
rts->prep_wordoff += mp->m_blockwsize;
xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTSUMMARY_BUF);
return 0;
@@ -162,8 +174,8 @@ xrep_rtsummary(
return error;
/* Reset incore state and blow out the summary cache. */
- if (mp->m_rsum_cache)
- memset(mp->m_rsum_cache, 0xFF, mp->m_sb.sb_rbmblocks);
+ if (sc->sr.rtg->rtg_rsum_cache)
+ memset(sc->sr.rtg->rtg_rsum_cache, 0xFF, mp->m_sb.sb_rbmblocks);
mp->m_rsumlevels = rts->rsumlevels;
mp->m_rsumblocks = rts->rsumblocks;
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 4cbcf7a86dbe..950f5a58dcd9 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -225,6 +225,8 @@ xchk_teardown(
xfs_trans_cancel(sc->tp);
sc->tp = NULL;
}
+ if (sc->sr.rtg)
+ xchk_rtgroup_free(sc, &sc->sr);
if (sc->ip) {
if (sc->ilock_flags)
xchk_iunlock(sc, sc->ilock_flags);
@@ -382,13 +384,13 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.repair = xrep_parent,
},
[XFS_SCRUB_TYPE_RTBITMAP] = { /* realtime bitmap */
- .type = ST_FS,
+ .type = ST_RTGROUP,
.setup = xchk_setup_rtbitmap,
.scrub = xchk_rtbitmap,
.repair = xrep_rtbitmap,
},
[XFS_SCRUB_TYPE_RTSUM] = { /* realtime summary */
- .type = ST_FS,
+ .type = ST_RTGROUP,
.setup = xchk_setup_rtsummary,
.scrub = xchk_rtsummary,
.repair = xrep_rtsummary,
@@ -442,6 +444,20 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.has = xfs_has_parent,
.repair = xrep_dirtree,
},
+ [XFS_SCRUB_TYPE_METAPATH] = { /* metadata directory tree path */
+ .type = ST_GENERIC,
+ .setup = xchk_setup_metapath,
+ .scrub = xchk_metapath,
+ .has = xfs_has_metadir,
+ .repair = xrep_metapath,
+ },
+ [XFS_SCRUB_TYPE_RGSUPER] = { /* realtime group superblock */
+ .type = ST_RTGROUP,
+ .setup = xchk_setup_rgsuperblock,
+ .scrub = xchk_rgsuperblock,
+ .has = xfs_has_rtsb,
+ .repair = xrep_rgsuperblock,
+ },
};
static int
@@ -489,6 +505,35 @@ xchk_validate_inputs(
if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino))
goto out;
break;
+ case ST_GENERIC:
+ break;
+ case ST_RTGROUP:
+ if (sm->sm_ino || sm->sm_gen)
+ goto out;
+ if (xfs_has_rtgroups(mp)) {
+ /*
+ * On a rtgroups filesystem, there won't be an rtbitmap
+ * or rtsummary file for group 0 unless there's
+ * actually a realtime volume attached. However, older
+ * xfs_scrub always calls the rtbitmap/rtsummary
+ * scrubbers with sm_agno==0 so transform the error
+ * code to ENOENT.
+ */
+ if (sm->sm_agno >= mp->m_sb.sb_rgcount) {
+ if (sm->sm_agno == 0)
+ error = -ENOENT;
+ goto out;
+ }
+ } else {
+ /*
+ * Prior to rtgroups, the rtbitmap/rtsummary scrubbers
+ * accepted sm_agno==0, so we still accept that for
+ * scrubbing pre-rtgroups filesystems.
+ */
+ if (sm->sm_agno != 0)
+ goto out;
+ }
+ break;
default:
goto out;
}
@@ -605,8 +650,7 @@ xfs_scrub_metadata(
if (error)
goto out;
- xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB,
- "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_SCRUB);
sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
if (!sc) {
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 5993fcaffb2c..a7fda3e2b013 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -73,6 +73,8 @@ enum xchk_type {
ST_PERAG, /* per-AG metadata */
ST_FS, /* per-FS metadata */
ST_INODE, /* per-inode metadata */
+ ST_GENERIC, /* determined by the scrubber */
+ ST_RTGROUP, /* rtgroup metadata */
};
struct xchk_meta_ops {
@@ -117,6 +119,15 @@ struct xchk_ag {
struct xfs_btree_cur *refc_cur;
};
+/* Inode lock state for the RT volume. */
+struct xchk_rt {
+ /* incore rtgroup, if applicable */
+ struct xfs_rtgroup *rtg;
+
+ /* XFS_RTGLOCK_* lock state if locked */
+ unsigned int rtlock_flags;
+};
+
struct xfs_scrub {
/* General scrub state. */
struct xfs_mount *mp;
@@ -178,6 +189,9 @@ struct xfs_scrub {
/* State tracking for single-AG operations. */
struct xchk_ag sa;
+
+ /* State tracking for realtime operations. */
+ struct xchk_rt sr;
};
/* XCHK state flags grow up from zero, XREP state flags grown down from 2^31 */
@@ -255,12 +269,15 @@ int xchk_xattr(struct xfs_scrub *sc);
int xchk_symlink(struct xfs_scrub *sc);
int xchk_parent(struct xfs_scrub *sc);
int xchk_dirtree(struct xfs_scrub *sc);
+int xchk_metapath(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xchk_rtbitmap(struct xfs_scrub *sc);
int xchk_rtsummary(struct xfs_scrub *sc);
+int xchk_rgsuperblock(struct xfs_scrub *sc);
#else
# define xchk_rtbitmap xchk_nothing
# define xchk_rtsummary xchk_nothing
+# define xchk_rgsuperblock xchk_nothing
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_quota(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c
index 7996c2335476..a476c7b2ab75 100644
--- a/fs/xfs/scrub/stats.c
+++ b/fs/xfs/scrub/stats.c
@@ -80,6 +80,8 @@ static const char *name_map[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_QUOTACHECK] = "quotacheck",
[XFS_SCRUB_TYPE_NLINKS] = "nlinks",
[XFS_SCRUB_TYPE_DIRTREE] = "dirtree",
+ [XFS_SCRUB_TYPE_METAPATH] = "metapath",
+ [XFS_SCRUB_TYPE_RGSUPER] = "rgsuper",
};
/* Format the scrub stats into a text buffer, similar to pcp style. */
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index 177f922acfaf..4b7f7860e37e 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -22,6 +22,7 @@
#include "xfs_exchmaps.h"
#include "xfs_defer.h"
#include "xfs_symlink_remote.h"
+#include "xfs_metafile.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
@@ -182,6 +183,101 @@ out_release_dquots:
return error;
}
+/*
+ * Temporary files have to be created before we even know which inode we're
+ * going to scrub, so we assume that they will be part of the regular directory
+ * tree. If it turns out that we're actually scrubbing a file from the
+ * metadata directory tree, we have to subtract the temp file from the root
+ * dquots and detach the dquots.
+ */
+int
+xrep_tempfile_adjust_directory_tree(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ if (!sc->tempip)
+ return 0;
+
+ ASSERT(sc->tp == NULL);
+ ASSERT(!xfs_is_metadir_inode(sc->tempip));
+
+ if (!sc->ip || !xfs_is_metadir_inode(sc->ip))
+ return 0;
+
+ xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
+ sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
+
+ error = xchk_trans_alloc(sc, 0);
+ if (error)
+ goto out_iolock;
+
+ xrep_tempfile_ilock(sc);
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+
+ /* Metadir files are not accounted in quota, so drop icount */
+ xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, -1L);
+ xfs_metafile_set_iflag(sc->tp, sc->tempip, XFS_METAFILE_UNKNOWN);
+
+ error = xrep_trans_commit(sc);
+ if (error)
+ goto out_ilock;
+
+ xfs_qm_dqdetach(sc->tempip);
+out_ilock:
+ xrep_tempfile_iunlock(sc);
+out_iolock:
+ xrep_tempfile_iounlock(sc);
+ return error;
+}
+
+/*
+ * Remove this temporary file from the metadata directory tree so that it can
+ * be inactivated the normal way.
+ */
+STATIC int
+xrep_tempfile_remove_metadir(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ if (!sc->tempip || !xfs_is_metadir_inode(sc->tempip))
+ return 0;
+
+ ASSERT(sc->tp == NULL);
+
+ xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
+ sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
+
+ error = xchk_trans_alloc(sc, 0);
+ if (error)
+ goto out_iolock;
+
+ xrep_tempfile_ilock(sc);
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+
+ xfs_metafile_clear_iflag(sc->tp, sc->tempip);
+
+ /* Non-metadir files are accounted in quota, so bump bcount/icount */
+ error = xfs_qm_dqattach_locked(sc->tempip, false);
+ if (error)
+ goto out_cancel;
+
+ xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, 1L);
+ xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_BCOUNT,
+ sc->tempip->i_nblocks);
+ error = xrep_trans_commit(sc);
+ goto out_ilock;
+
+out_cancel:
+ xchk_trans_cancel(sc);
+out_ilock:
+ xrep_tempfile_iunlock(sc);
+out_iolock:
+ xrep_tempfile_iounlock(sc);
+ return error;
+}
+
/* Take IOLOCK_EXCL on the temporary file, maybe. */
bool
xrep_tempfile_iolock_nowait(
@@ -290,6 +386,7 @@ xrep_tempfile_rele(
sc->temp_ilock_flags = 0;
}
+ xrep_tempfile_remove_metadir(sc);
xchk_irele(sc, sc->tempip);
sc->tempip = NULL;
}
@@ -844,6 +941,14 @@ xrep_is_tempfile(
const struct xfs_inode *ip)
{
const struct inode *inode = &ip->i_vnode;
+ struct xfs_mount *mp = ip->i_mount;
+
+ /*
+ * Files in the metadata directory tree also have S_PRIVATE set and
+ * IOP_XATTR unset, so we must distinguish them separately.
+ */
+ if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA))
+ return false;
if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
return true;
diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h
index e51399f595fe..71c1b54599c3 100644
--- a/fs/xfs/scrub/tempfile.h
+++ b/fs/xfs/scrub/tempfile.h
@@ -10,6 +10,8 @@
int xrep_tempfile_create(struct xfs_scrub *sc, uint16_t mode);
void xrep_tempfile_rele(struct xfs_scrub *sc);
+int xrep_tempfile_adjust_directory_tree(struct xfs_scrub *sc);
+
bool xrep_tempfile_iolock_nowait(struct xfs_scrub *sc);
int xrep_tempfile_iolock_polled(struct xfs_scrub *sc);
void xrep_tempfile_iounlock(struct xfs_scrub *sc);
@@ -42,6 +44,7 @@ static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc)
xchk_ilock(sc, XFS_IOLOCK_EXCL);
}
# define xrep_is_tempfile(ip) (false)
+# define xrep_tempfile_adjust_directory_tree(sc) (0)
# define xrep_tempfile_rele(sc)
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 4470ad0533b8..98f923ae664d 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -20,6 +20,7 @@
#include "xfs_dir2.h"
#include "xfs_rmap.h"
#include "xfs_parent.h"
+#include "xfs_metafile.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index c886d5d0eb02..9b38f5ad1eaf 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -70,6 +70,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_NLINKS);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_METAPATH);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_RGSUPER);
#define XFS_SCRUB_TYPE_STRINGS \
{ XFS_SCRUB_TYPE_PROBE, "probe" }, \
@@ -101,7 +103,9 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER);
{ XFS_SCRUB_TYPE_NLINKS, "nlinks" }, \
{ XFS_SCRUB_TYPE_HEALTHY, "healthy" }, \
{ XFS_SCRUB_TYPE_DIRTREE, "dirtree" }, \
- { XFS_SCRUB_TYPE_BARRIER, "barrier" }
+ { XFS_SCRUB_TYPE_BARRIER, "barrier" }, \
+ { XFS_SCRUB_TYPE_METAPATH, "metapath" }, \
+ { XFS_SCRUB_TYPE_RGSUPER, "rgsuper" }
#define XFS_SCRUB_FLAG_STRINGS \
{ XFS_SCRUB_IFLAG_REPAIR, "repair" }, \
@@ -772,12 +776,12 @@ TRACE_EVENT(xchk_xref_error,
);
TRACE_EVENT(xchk_iallocbt_check_cluster,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agino_t startino, xfs_daddr_t map_daddr,
- unsigned short map_len, unsigned int chunk_ino,
- unsigned int nr_inodes, uint16_t cluster_mask,
- uint16_t holemask, unsigned int cluster_ino),
- TP_ARGS(mp, agno, startino, map_daddr, map_len, chunk_ino, nr_inodes,
+ TP_PROTO(const struct xfs_perag *pag, xfs_agino_t startino,
+ xfs_daddr_t map_daddr, unsigned short map_len,
+ unsigned int chunk_ino, unsigned int nr_inodes,
+ uint16_t cluster_mask, uint16_t holemask,
+ unsigned int cluster_ino),
+ TP_ARGS(pag, startino, map_daddr, map_len, chunk_ino, nr_inodes,
cluster_mask, holemask, cluster_ino),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -792,8 +796,8 @@ TRACE_EVENT(xchk_iallocbt_check_cluster,
__field(uint16_t, holemask)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->startino = startino;
__entry->map_daddr = map_daddr;
__entry->map_len = map_len;
@@ -922,7 +926,8 @@ DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsfreeze);
DEFINE_XCHK_FSFREEZE_EVENT(xchk_fsthaw);
TRACE_EVENT(xchk_refcount_incorrect,
- TP_PROTO(struct xfs_perag *pag, const struct xfs_refcount_irec *irec,
+ TP_PROTO(const struct xfs_perag *pag,
+ const struct xfs_refcount_irec *irec,
xfs_nlink_t seen),
TP_ARGS(pag, irec, seen),
TP_STRUCT__entry(
@@ -935,8 +940,8 @@ TRACE_EVENT(xchk_refcount_incorrect,
__field(xfs_nlink_t, seen)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->domain = irec->rc_domain;
__entry->startblock = irec->rc_startblock;
__entry->blockcount = irec->rc_blockcount;
@@ -1752,6 +1757,7 @@ DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_badgen);
DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_nondir_parent);
DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_unlinked_parent);
DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_found_next_step);
+DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_crosses_tree);
TRACE_DEFINE_ENUM(XCHK_DIRPATH_SCANNING);
TRACE_DEFINE_ENUM(XCHK_DIRPATH_DELETE);
@@ -1914,11 +1920,44 @@ TRACE_EVENT(xchk_dirtree_live_update,
__get_str(name))
);
+DECLARE_EVENT_CLASS(xchk_metapath_class,
+ TP_PROTO(struct xfs_scrub *sc, const char *path,
+ struct xfs_inode *dp, xfs_ino_t ino),
+ TP_ARGS(sc, path, dp, ino),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, scrub_ino)
+ __field(xfs_ino_t, parent_ino)
+ __field(xfs_ino_t, ino)
+ __string(name, path)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->scrub_ino = sc->ip ? sc->ip->i_ino : NULLFSINO;
+ __entry->parent_ino = dp ? dp->i_ino : NULLFSINO;
+ __entry->ino = ino;
+ __assign_str(name);
+ ),
+ TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx name '%s' ino 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->scrub_ino,
+ __entry->parent_ino,
+ __get_str(name),
+ __entry->ino)
+);
+#define DEFINE_XCHK_METAPATH_EVENT(name) \
+DEFINE_EVENT(xchk_metapath_class, name, \
+ TP_PROTO(struct xfs_scrub *sc, const char *path, \
+ struct xfs_inode *dp, xfs_ino_t ino), \
+ TP_ARGS(sc, path, dp, ino))
+DEFINE_XCHK_METAPATH_EVENT(xchk_metapath_lookup);
+
/* repair tracepoints */
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
DECLARE_EVENT_CLASS(xrep_extent_class,
- TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len),
+ TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno,
+ xfs_extlen_t len),
TP_ARGS(pag, agbno, len),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -1927,8 +1966,8 @@ DECLARE_EVENT_CLASS(xrep_extent_class,
__field(xfs_extlen_t, len)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->agbno = agbno;
__entry->len = len;
),
@@ -1940,7 +1979,8 @@ DECLARE_EVENT_CLASS(xrep_extent_class,
);
#define DEFINE_REPAIR_EXTENT_EVENT(name) \
DEFINE_EVENT(xrep_extent_class, name, \
- TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len), \
+ TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno, \
+ xfs_extlen_t len), \
TP_ARGS(pag, agbno, len))
DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_unmap_extent);
DEFINE_REPAIR_EXTENT_EVENT(xreap_dispose_free_extent);
@@ -1949,8 +1989,8 @@ DEFINE_REPAIR_EXTENT_EVENT(xreap_bmapi_binval);
DEFINE_REPAIR_EXTENT_EVENT(xrep_agfl_insert);
DECLARE_EVENT_CLASS(xrep_reap_find_class,
- TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len,
- bool crosslinked),
+ TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno,
+ xfs_extlen_t len, bool crosslinked),
TP_ARGS(pag, agbno, len, crosslinked),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -1960,8 +2000,8 @@ DECLARE_EVENT_CLASS(xrep_reap_find_class,
__field(bool, crosslinked)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->agbno = agbno;
__entry->len = len;
__entry->crosslinked = crosslinked;
@@ -1975,17 +2015,15 @@ DECLARE_EVENT_CLASS(xrep_reap_find_class,
);
#define DEFINE_REPAIR_REAP_FIND_EVENT(name) \
DEFINE_EVENT(xrep_reap_find_class, name, \
- TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno, xfs_extlen_t len, \
- bool crosslinked), \
+ TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno, \
+ xfs_extlen_t len, bool crosslinked), \
TP_ARGS(pag, agbno, len, crosslinked))
DEFINE_REPAIR_REAP_FIND_EVENT(xreap_agextent_select);
DEFINE_REPAIR_REAP_FIND_EVENT(xreap_bmapi_select);
-DECLARE_EVENT_CLASS(xrep_rmap_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t len,
- uint64_t owner, uint64_t offset, unsigned int flags),
- TP_ARGS(mp, agno, agbno, len, owner, offset, flags),
+TRACE_EVENT(xrep_ibt_walk_rmap,
+ TP_PROTO(const struct xfs_perag *pag, const struct xfs_rmap_irec *rec),
+ TP_ARGS(pag, rec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -1996,13 +2034,13 @@ DECLARE_EVENT_CLASS(xrep_rmap_class,
__field(unsigned int, flags)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->agbno = agbno;
- __entry->len = len;
- __entry->owner = owner;
- __entry->offset = offset;
- __entry->flags = flags;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
+ __entry->agbno = rec->rm_startblock;
+ __entry->len = rec->rm_blockcount;
+ __entry->owner = rec->rm_owner;
+ __entry->offset = rec->rm_offset;
+ __entry->flags = rec->rm_flags;
),
TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -2013,19 +2051,11 @@ DECLARE_EVENT_CLASS(xrep_rmap_class,
__entry->offset,
__entry->flags)
);
-#define DEFINE_REPAIR_RMAP_EVENT(name) \
-DEFINE_EVENT(xrep_rmap_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- xfs_agblock_t agbno, xfs_extlen_t len, \
- uint64_t owner, uint64_t offset, unsigned int flags), \
- TP_ARGS(mp, agno, agbno, len, owner, offset, flags))
-DEFINE_REPAIR_RMAP_EVENT(xrep_ibt_walk_rmap);
-DEFINE_REPAIR_RMAP_EVENT(xrep_bmap_walk_rmap);
TRACE_EVENT(xrep_abt_found,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ TP_PROTO(const struct xfs_perag *pag,
const struct xfs_alloc_rec_incore *rec),
- TP_ARGS(mp, agno, rec),
+ TP_ARGS(pag, rec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -2033,8 +2063,8 @@ TRACE_EVENT(xrep_abt_found,
__field(xfs_extlen_t, blockcount)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->startblock = rec->ar_startblock;
__entry->blockcount = rec->ar_blockcount;
),
@@ -2046,9 +2076,9 @@ TRACE_EVENT(xrep_abt_found,
)
TRACE_EVENT(xrep_ibt_found,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ TP_PROTO(const struct xfs_perag *pag,
const struct xfs_inobt_rec_incore *rec),
- TP_ARGS(mp, agno, rec),
+ TP_ARGS(pag, rec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -2059,8 +2089,8 @@ TRACE_EVENT(xrep_ibt_found,
__field(uint64_t, freemask)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->startino = rec->ir_startino;
__entry->holemask = rec->ir_holemask;
__entry->count = rec->ir_count;
@@ -2078,7 +2108,8 @@ TRACE_EVENT(xrep_ibt_found,
)
TRACE_EVENT(xrep_refc_found,
- TP_PROTO(struct xfs_perag *pag, const struct xfs_refcount_irec *rec),
+ TP_PROTO(const struct xfs_perag *pag,
+ const struct xfs_refcount_irec *rec),
TP_ARGS(pag, rec),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -2089,8 +2120,8 @@ TRACE_EVENT(xrep_refc_found,
__field(xfs_nlink_t, refcount)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->domain = rec->rc_domain;
__entry->startblock = rec->rc_startblock;
__entry->blockcount = rec->rc_blockcount;
@@ -2138,9 +2169,8 @@ TRACE_EVENT(xrep_bmap_found,
);
TRACE_EVENT(xrep_rmap_found,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- const struct xfs_rmap_irec *rec),
- TP_ARGS(mp, agno, rec),
+ TP_PROTO(const struct xfs_perag *pag, const struct xfs_rmap_irec *rec),
+ TP_ARGS(pag, rec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -2151,8 +2181,8 @@ TRACE_EVENT(xrep_rmap_found,
__field(unsigned int, flags)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->agbno = rec->rm_startblock;
__entry->len = rec->rm_blockcount;
__entry->owner = rec->rm_owner;
@@ -2170,9 +2200,9 @@ TRACE_EVENT(xrep_rmap_found,
);
TRACE_EVENT(xrep_findroot_block,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+ TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno,
uint32_t magic, uint16_t level),
- TP_ARGS(mp, agno, agbno, magic, level),
+ TP_ARGS(pag, agbno, magic, level),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -2181,8 +2211,8 @@ TRACE_EVENT(xrep_findroot_block,
__field(uint16_t, level)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->agbno = agbno;
__entry->magic = magic;
__entry->level = level;
@@ -2195,10 +2225,10 @@ TRACE_EVENT(xrep_findroot_block,
__entry->level)
)
TRACE_EVENT(xrep_calc_ag_resblks,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agino_t icount, xfs_agblock_t aglen, xfs_agblock_t freelen,
+ TP_PROTO(const struct xfs_perag *pag, xfs_agino_t icount,
+ xfs_agblock_t aglen, xfs_agblock_t freelen,
xfs_agblock_t usedlen),
- TP_ARGS(mp, agno, icount, aglen, freelen, usedlen),
+ TP_ARGS(pag, icount, aglen, freelen, usedlen),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -2208,8 +2238,8 @@ TRACE_EVENT(xrep_calc_ag_resblks,
__field(xfs_agblock_t, usedlen)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->icount = icount;
__entry->aglen = aglen;
__entry->freelen = freelen;
@@ -2224,10 +2254,10 @@ TRACE_EVENT(xrep_calc_ag_resblks,
__entry->usedlen)
)
TRACE_EVENT(xrep_calc_ag_resblks_btsize,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t bnobt_sz, xfs_agblock_t inobt_sz,
- xfs_agblock_t rmapbt_sz, xfs_agblock_t refcbt_sz),
- TP_ARGS(mp, agno, bnobt_sz, inobt_sz, rmapbt_sz, refcbt_sz),
+ TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t bnobt_sz,
+ xfs_agblock_t inobt_sz, xfs_agblock_t rmapbt_sz,
+ xfs_agblock_t refcbt_sz),
+ TP_ARGS(pag, bnobt_sz, inobt_sz, rmapbt_sz, refcbt_sz),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -2237,8 +2267,8 @@ TRACE_EVENT(xrep_calc_ag_resblks_btsize,
__field(xfs_agblock_t, refcbt_sz)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->bnobt_sz = bnobt_sz;
__entry->inobt_sz = inobt_sz;
__entry->rmapbt_sz = rmapbt_sz;
@@ -2278,10 +2308,9 @@ TRACE_EVENT(xrep_reset_counters,
)
DECLARE_EVENT_CLASS(xrep_newbt_extent_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t len,
- int64_t owner),
- TP_ARGS(mp, agno, agbno, len, owner),
+ TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno,
+ xfs_extlen_t len, int64_t owner),
+ TP_ARGS(pag, agbno, len, owner),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -2290,8 +2319,8 @@ DECLARE_EVENT_CLASS(xrep_newbt_extent_class,
__field(int64_t, owner)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->agbno = agbno;
__entry->len = len;
__entry->owner = owner;
@@ -2305,10 +2334,9 @@ DECLARE_EVENT_CLASS(xrep_newbt_extent_class,
);
#define DEFINE_NEWBT_EXTENT_EVENT(name) \
DEFINE_EVENT(xrep_newbt_extent_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- xfs_agblock_t agbno, xfs_extlen_t len, \
- int64_t owner), \
- TP_ARGS(mp, agno, agbno, len, owner))
+ TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno, \
+ xfs_extlen_t len, int64_t owner), \
+ TP_ARGS(pag, agbno, len, owner))
DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_alloc_ag_blocks);
DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_alloc_file_blocks);
DEFINE_NEWBT_EXTENT_EVENT(xrep_newbt_free_blocks);
@@ -2596,7 +2624,7 @@ TRACE_EVENT(xrep_cow_replace_mapping,
);
TRACE_EVENT(xrep_cow_free_staging,
- TP_PROTO(struct xfs_perag *pag, xfs_agblock_t agbno,
+ TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno,
xfs_extlen_t blockcount),
TP_ARGS(pag, agbno, blockcount),
TP_STRUCT__entry(
@@ -2606,8 +2634,8 @@ TRACE_EVENT(xrep_cow_free_staging,
__field(xfs_extlen_t, blockcount)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->agbno = agbno;
__entry->blockcount = blockcount;
),
@@ -2652,9 +2680,9 @@ DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_update_inode);
DEFINE_SCRUB_NLINKS_DIFF_EVENT(xrep_nlinks_unfixable_inode);
TRACE_EVENT(xrep_rmap_live_update,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int op,
+ TP_PROTO(const struct xfs_perag *pag, unsigned int op,
const struct xfs_rmap_update_params *p),
- TP_ARGS(mp, agno, op, p),
+ TP_ARGS(pag, op, p),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -2666,8 +2694,8 @@ TRACE_EVENT(xrep_rmap_live_update,
__field(unsigned int, flags)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->op = op;
__entry->agbno = p->startblock;
__entry->len = p->blockcount;
@@ -3313,7 +3341,7 @@ DEFINE_XREP_SYMLINK_EVENT(xrep_symlink_rebuild);
DEFINE_XREP_SYMLINK_EVENT(xrep_symlink_reset_fork);
TRACE_EVENT(xrep_iunlink_visit,
- TP_PROTO(struct xfs_perag *pag, unsigned int bucket,
+ TP_PROTO(const struct xfs_perag *pag, unsigned int bucket,
xfs_agino_t bucket_agino, struct xfs_inode *ip),
TP_ARGS(pag, bucket, bucket_agino, ip),
TP_STRUCT__entry(
@@ -3326,9 +3354,9 @@ TRACE_EVENT(xrep_iunlink_visit,
__field(xfs_agino_t, next_agino)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
- __entry->agino = XFS_INO_TO_AGINO(pag->pag_mount, ip->i_ino);
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
+ __entry->agino = XFS_INO_TO_AGINO(pag_mount(pag), ip->i_ino);
__entry->bucket = bucket;
__entry->bucket_agino = bucket_agino;
__entry->prev_agino = ip->i_prev_unlinked;
@@ -3403,7 +3431,7 @@ TRACE_EVENT(xrep_iunlink_reload_ondisk,
);
TRACE_EVENT(xrep_iunlink_walk_ondisk_bucket,
- TP_PROTO(struct xfs_perag *pag, unsigned int bucket,
+ TP_PROTO(const struct xfs_perag *pag, unsigned int bucket,
xfs_agino_t prev_agino, xfs_agino_t next_agino),
TP_ARGS(pag, bucket, prev_agino, next_agino),
TP_STRUCT__entry(
@@ -3414,8 +3442,8 @@ TRACE_EVENT(xrep_iunlink_walk_ondisk_bucket,
__field(xfs_agino_t, next_agino)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->bucket = bucket;
__entry->prev_agino = prev_agino;
__entry->next_agino = next_agino;
@@ -3429,7 +3457,7 @@ TRACE_EVENT(xrep_iunlink_walk_ondisk_bucket,
);
DECLARE_EVENT_CLASS(xrep_iunlink_resolve_class,
- TP_PROTO(struct xfs_perag *pag, unsigned int bucket,
+ TP_PROTO(const struct xfs_perag *pag, unsigned int bucket,
xfs_agino_t prev_agino, xfs_agino_t next_agino),
TP_ARGS(pag, bucket, prev_agino, next_agino),
TP_STRUCT__entry(
@@ -3440,8 +3468,8 @@ DECLARE_EVENT_CLASS(xrep_iunlink_resolve_class,
__field(xfs_agino_t, next_agino)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->bucket = bucket;
__entry->prev_agino = prev_agino;
__entry->next_agino = next_agino;
@@ -3455,7 +3483,7 @@ DECLARE_EVENT_CLASS(xrep_iunlink_resolve_class,
);
#define DEFINE_REPAIR_IUNLINK_RESOLVE_EVENT(name) \
DEFINE_EVENT(xrep_iunlink_resolve_class, name, \
- TP_PROTO(struct xfs_perag *pag, unsigned int bucket, \
+ TP_PROTO(const struct xfs_perag *pag, unsigned int bucket, \
xfs_agino_t prev_agino, xfs_agino_t next_agino), \
TP_ARGS(pag, bucket, prev_agino, next_agino))
DEFINE_REPAIR_IUNLINK_RESOLVE_EVENT(xrep_iunlink_resolve_uncached);
@@ -3516,7 +3544,7 @@ TRACE_EVENT(xrep_iunlink_relink_prev,
);
TRACE_EVENT(xrep_iunlink_add_to_bucket,
- TP_PROTO(struct xfs_perag *pag, unsigned int bucket,
+ TP_PROTO(const struct xfs_perag *pag, unsigned int bucket,
xfs_agino_t agino, xfs_agino_t curr_head),
TP_ARGS(pag, bucket, agino, curr_head),
TP_STRUCT__entry(
@@ -3527,8 +3555,8 @@ TRACE_EVENT(xrep_iunlink_add_to_bucket,
__field(xfs_agino_t, next_agino)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->bucket = bucket;
__entry->agino = agino;
__entry->next_agino = curr_head;
@@ -3542,7 +3570,7 @@ TRACE_EVENT(xrep_iunlink_add_to_bucket,
);
TRACE_EVENT(xrep_iunlink_commit_bucket,
- TP_PROTO(struct xfs_perag *pag, unsigned int bucket,
+ TP_PROTO(const struct xfs_perag *pag, unsigned int bucket,
xfs_agino_t old_agino, xfs_agino_t agino),
TP_ARGS(pag, bucket, old_agino, agino),
TP_STRUCT__entry(
@@ -3553,8 +3581,8 @@ TRACE_EVENT(xrep_iunlink_commit_bucket,
__field(xfs_agino_t, agino)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->bucket = bucket;
__entry->old_agino = old_agino;
__entry->agino = agino;
@@ -3572,6 +3600,11 @@ DEFINE_XCHK_DIRTREE_EVENT(xrep_dirtree_delete_path);
DEFINE_XCHK_DIRTREE_EVENT(xrep_dirtree_create_adoption);
DEFINE_XCHK_DIRTREE_EVALUATE_EVENT(xrep_dirtree_decided_fate);
+DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_lookup);
+DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_try_unlink);
+DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_unlink);
+DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_link);
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 35a8c1b8b3cb..3d52e9d7ad57 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -318,14 +318,16 @@ xfs_bmap_update_create_done(
return &budp->bud_item;
}
-/* Take a passive ref to the AG containing the space we're mapping. */
+/* Take a passive ref to the group containing the space we're mapping. */
static inline void
xfs_bmap_update_get_group(
struct xfs_mount *mp,
struct xfs_bmap_intent *bi)
{
+ enum xfs_group_type type = XG_TYPE_AG;
+
if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork))
- return;
+ type = XG_TYPE_RTG;
/*
* Bump the intent count on behalf of the deferred rmap and refcount
@@ -334,7 +336,8 @@ xfs_bmap_update_get_group(
* intent drops the intent count, ensuring that the intent count
* remains nonzero across the transaction roll.
*/
- bi->bi_pag = xfs_perag_intent_get(mp, bi->bi_bmap.br_startblock);
+ bi->bi_group = xfs_group_intent_get(mp, bi->bi_bmap.br_startblock,
+ type);
}
/* Add this deferred BUI to the transaction. */
@@ -343,8 +346,6 @@ xfs_bmap_defer_add(
struct xfs_trans *tp,
struct xfs_bmap_intent *bi)
{
- trace_xfs_bmap_defer(bi);
-
xfs_bmap_update_get_group(tp->t_mountp, bi);
/*
@@ -357,18 +358,9 @@ xfs_bmap_defer_add(
*/
if (bi->bi_type == XFS_BMAP_MAP)
bi->bi_owner->i_delayed_blks += bi->bi_bmap.br_blockcount;
- xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type);
-}
-
-/* Release a passive AG ref after finishing mapping work. */
-static inline void
-xfs_bmap_update_put_group(
- struct xfs_bmap_intent *bi)
-{
- if (xfs_ifork_is_realtime(bi->bi_owner, bi->bi_whichfork))
- return;
- xfs_perag_intent_put(bi->bi_pag);
+ trace_xfs_bmap_defer(bi);
+ xfs_defer_add(tp, &bi->bi_list, &xfs_bmap_update_defer_type);
}
/* Cancel a deferred bmap update. */
@@ -381,7 +373,7 @@ xfs_bmap_update_cancel_item(
if (bi->bi_type == XFS_BMAP_MAP)
bi->bi_owner->i_delayed_blks -= bi->bi_bmap.br_blockcount;
- xfs_bmap_update_put_group(bi);
+ xfs_group_intent_put(bi->bi_group);
kmem_cache_free(xfs_bmap_intent_cache, bi);
}
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 4719ec90029c..a59bbe767a7d 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -29,6 +29,7 @@
#include "xfs_iomap.h"
#include "xfs_reflink.h"
#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
/* Kernel only BMAP related definitions and functions */
@@ -41,16 +42,12 @@ xfs_daddr_t
xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
{
if (XFS_IS_REALTIME_INODE(ip))
- return XFS_FSB_TO_BB(ip->i_mount, fsb);
+ return xfs_rtb_to_daddr(ip->i_mount, fsb);
return XFS_FSB_TO_DADDR(ip->i_mount, fsb);
}
/*
* Routine to zero an extent on disk allocated to the specific inode.
- *
- * The VFS functions take a linearised filesystem block offset, so we have to
- * convert the sparse xfs fsb to the right format first.
- * VFS types are real funky, too.
*/
int
xfs_zero_extent(
@@ -58,15 +55,10 @@ xfs_zero_extent(
xfs_fsblock_t start_fsb,
xfs_off_t count_fsb)
{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_buftarg *target = xfs_inode_buftarg(ip);
- xfs_daddr_t sector = xfs_fsb_to_db(ip, start_fsb);
- sector_t block = XFS_BB_TO_FSBT(mp, sector);
-
- return blkdev_issue_zeroout(target->bt_bdev,
- block << (mp->m_super->s_blocksize_bits - 9),
- count_fsb << (mp->m_super->s_blocksize_bits - 9),
- GFP_KERNEL, 0);
+ return blkdev_issue_zeroout(xfs_inode_buftarg(ip)->bt_bdev,
+ xfs_fsb_to_db(ip, start_fsb),
+ XFS_FSB_TO_BB(ip->i_mount, count_fsb),
+ GFP_KERNEL, 0);
}
/*
@@ -540,16 +532,20 @@ xfs_can_free_eofblocks(
*/
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
if (xfs_inode_has_bigrtalloc(ip))
- end_fsb = xfs_rtb_roundup_rtx(mp, end_fsb);
+ end_fsb = xfs_fileoff_roundup_rtx(mp, end_fsb);
last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
if (last_fsb <= end_fsb)
return false;
/*
- * Check if there is an post-EOF extent to free.
+ * Check if there is an post-EOF extent to free. If there are any
+ * delalloc blocks attached to the inode (data fork delalloc
+ * reservations or CoW extents of any kind), we need to free them so
+ * that inactivation doesn't fail to erase them.
*/
xfs_ilock(ip, XFS_ILOCK_SHARED);
- if (xfs_iext_lookup_extent(ip, &ip->i_df, end_fsb, &icur, &imap))
+ if (ip->i_delayed_blks ||
+ xfs_iext_lookup_extent(ip, &ip->i_df, end_fsb, &icur, &imap))
found_blocks = true;
xfs_iunlock(ip, XFS_ILOCK_SHARED);
return found_blocks;
@@ -858,8 +854,8 @@ xfs_free_file_space(
/* We can only free complete realtime extents. */
if (xfs_inode_has_bigrtalloc(ip)) {
- startoffset_fsb = xfs_rtb_roundup_rtx(mp, startoffset_fsb);
- endoffset_fsb = xfs_rtb_rounddown_rtx(mp, endoffset_fsb);
+ startoffset_fsb = xfs_fileoff_roundup_rtx(mp, startoffset_fsb);
+ endoffset_fsb = xfs_fileoff_rounddown_rtx(mp, endoffset_fsb);
}
/*
@@ -1527,6 +1523,18 @@ xfs_swap_extents(
goto out_unlock;
}
+ /*
+ * The rmapbt implementation is unable to resume a swapext operation
+ * after a crash if the allocation unit size is larger than a block.
+ * This (deprecated) interface will not be upgraded to handle this
+ * situation. Defragmentation must be performed with the commit range
+ * ioctl.
+ */
+ if (XFS_IS_REALTIME_INODE(ip) && xfs_has_rtgroups(ip->i_mount)) {
+ error = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
error = xfs_qm_dqattach(ip);
if (error)
goto out_unlock;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index aa4dbda7b536..e8196f5778e2 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2115,6 +2115,13 @@ xfs_alloc_buftarg(
btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
mp, ops);
+ if (bdev_can_atomic_write(btp->bt_bdev)) {
+ btp->bt_bdev_awu_min = bdev_atomic_write_unit_min_bytes(
+ btp->bt_bdev);
+ btp->bt_bdev_awu_max = bdev_atomic_write_unit_max_bytes(
+ btp->bt_bdev);
+ }
+
/*
* When allocating the buftargs we have not yet read the super block and
* thus don't know the file system sector size yet.
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 209a389f2abc..3d56bc7a35cc 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -124,6 +124,10 @@ struct xfs_buftarg {
struct percpu_counter bt_io_count;
struct ratelimit_state bt_ioerror_rl;
+ /* Atomic write unit values */
+ unsigned int bt_bdev_awu_min;
+ unsigned int bt_bdev_awu_max;
+
/* built-in cache, if we're not using the perag one */
struct xfs_buf_cache bt_cache[];
};
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 5180cbf5a90b..3d0c6402cb36 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -25,6 +25,8 @@
#include "xfs_alloc.h"
#include "xfs_ag.h"
#include "xfs_sb.h"
+#include "xfs_rtgroup.h"
+#include "xfs_rtbitmap.h"
/*
* This is the number of entries in the l_buf_cancel_table used during
@@ -393,9 +395,18 @@ xlog_recover_validate_buf_type(
break;
#ifdef CONFIG_XFS_RT
case XFS_BLFT_RTBITMAP_BUF:
+ if (xfs_has_rtgroups(mp) && magic32 != XFS_RTBITMAP_MAGIC) {
+ warnmsg = "Bad rtbitmap magic!";
+ break;
+ }
+ bp->b_ops = xfs_rtblock_ops(mp, XFS_RTGI_BITMAP);
+ break;
case XFS_BLFT_RTSUMMARY_BUF:
- /* no magic numbers for verification of RT buffers */
- bp->b_ops = &xfs_rtbuf_ops;
+ if (xfs_has_rtgroups(mp) && magic32 != XFS_RTSUMMARY_MAGIC) {
+ warnmsg = "Bad rtsummary magic!";
+ break;
+ }
+ bp->b_ops = xfs_rtblock_ops(mp, XFS_RTGI_SUMMARY);
break;
#endif /* CONFIG_XFS_RT */
default:
@@ -704,6 +715,7 @@ xlog_recover_do_primary_sb_buffer(
{
struct xfs_dsb *dsb = bp->b_addr;
xfs_agnumber_t orig_agcount = mp->m_sb.sb_agcount;
+ xfs_rgnumber_t orig_rgcount = mp->m_sb.sb_rgcount;
int error;
xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
@@ -722,17 +734,32 @@ xlog_recover_do_primary_sb_buffer(
xfs_alert(mp, "Shrinking AG count in log recovery not supported");
return -EFSCORRUPTED;
}
+ if (mp->m_sb.sb_rgcount < orig_rgcount) {
+ xfs_warn(mp,
+ "Shrinking rtgroup count in log recovery not supported");
+ return -EFSCORRUPTED;
+ }
/*
- * Growfs can also grow the last existing AG. In this case we also need
- * to update the length in the in-core perag structure and values
- * depending on it.
+ * If the last AG was grown or shrunk, we also need to update the
+ * length in the in-core perag structure and values depending on it.
*/
error = xfs_update_last_ag_size(mp, orig_agcount);
if (error)
return error;
/*
+ * If the last rtgroup was grown or shrunk, we also need to update the
+ * length in the in-core rtgroup structure and values depending on it.
+ * Ignore this on any filesystem with zero rtgroups.
+ */
+ if (orig_rgcount > 0) {
+ error = xfs_update_last_rtgroup_size(mp, orig_rgcount);
+ if (error)
+ return error;
+ }
+
+ /*
* Initialize the new perags, and also update various block and inode
* allocator setting based off the number of AGs or total blocks.
* Because of the latter this also needs to happen if the agcount did
@@ -745,6 +772,13 @@ xlog_recover_do_primary_sb_buffer(
return error;
}
mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
+
+ error = xfs_initialize_rtgroups(mp, orig_rgcount, mp->m_sb.sb_rgcount,
+ mp->m_sb.sb_rextents);
+ if (error) {
+ xfs_warn(mp, "Failed recovery rtgroup init: %d", error);
+ return error;
+ }
return 0;
}
@@ -791,11 +825,20 @@ xlog_recover_get_buf_lsn(
* UUIDs, so we must recover them immediately.
*/
blft = xfs_blft_from_flags(buf_f);
- if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF)
+ if (!xfs_has_rtgroups(mp) && (blft == XFS_BLFT_RTBITMAP_BUF ||
+ blft == XFS_BLFT_RTSUMMARY_BUF))
goto recover_immediately;
magic32 = be32_to_cpu(*(__be32 *)blk);
switch (magic32) {
+ case XFS_RTSUMMARY_MAGIC:
+ case XFS_RTBITMAP_MAGIC: {
+ struct xfs_rtbuf_blkinfo *hdr = blk;
+
+ lsn = be64_to_cpu(hdr->rt_lsn);
+ uuid = &hdr->rt_uuid;
+ break;
+ }
case XFS_ABTB_CRC_MAGIC:
case XFS_ABTC_CRC_MAGIC:
case XFS_ABTB_MAGIC:
@@ -1037,6 +1080,18 @@ xlog_recover_buf_commit_pass2(
current_lsn);
if (error)
goto out_release;
+
+ /* Update the rt superblock if we have one. */
+ if (xfs_has_rtsb(mp) && mp->m_rtsb_bp) {
+ struct xfs_buf *rtsb_bp = mp->m_rtsb_bp;
+
+ xfs_buf_lock(rtsb_bp);
+ xfs_buf_hold(rtsb_bp);
+ xfs_update_rtsb(rtsb_bp, bp);
+ rtsb_bp->b_flags |= _XBF_LOGRECOVERY;
+ xfs_buf_delwri_queue(rtsb_bp, buffer_list);
+ xfs_buf_relse(rtsb_bp);
+ }
} else {
xlog_recover_do_reg_buffer(mp, item, bp, buf_f, current_lsn);
}
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index d8c4a5dcca7a..c4bd145f5ec1 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -21,6 +21,7 @@
#include "xfs_ag.h"
#include "xfs_health.h"
#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
/*
* Notes on an efficient, low latency fstrim algorithm
@@ -72,6 +73,8 @@
* extent search so that it overlaps in flight discard IO.
*/
+#define XFS_DISCARD_MAX_EXAMINE (100)
+
struct workqueue_struct *xfs_discard_wq;
static void
@@ -81,7 +84,7 @@ xfs_discard_endio_work(
struct xfs_busy_extents *extents =
container_of(work, struct xfs_busy_extents, endio_work);
- xfs_extent_busy_clear(extents->mount, &extents->extent_list, false);
+ xfs_extent_busy_clear(&extents->extent_list, false);
kfree(extents->owner);
}
@@ -100,6 +103,24 @@ xfs_discard_endio(
bio_put(bio);
}
+static inline struct block_device *
+xfs_group_bdev(
+ const struct xfs_group *xg)
+{
+ struct xfs_mount *mp = xg->xg_mount;
+
+ switch (xg->xg_type) {
+ case XG_TYPE_AG:
+ return mp->m_ddev_targp->bt_bdev;
+ case XG_TYPE_RTG:
+ return mp->m_rtdev_targp->bt_bdev;
+ default:
+ ASSERT(0);
+ break;
+ }
+ return NULL;
+}
+
/*
* Walk the discard list and issue discards on all the busy extents in the
* list. We plug and chain the bios so that we only need a single completion
@@ -117,11 +138,11 @@ xfs_discard_extents(
blk_start_plug(&plug);
list_for_each_entry(busyp, &extents->extent_list, list) {
- trace_xfs_discard_extent(mp, busyp->agno, busyp->bno,
- busyp->length);
+ trace_xfs_discard_extent(busyp->group, busyp->bno,
+ busyp->length);
- error = __blkdev_issue_discard(mp->m_ddev_targp->bt_bdev,
- XFS_AGB_TO_DADDR(mp, busyp->agno, busyp->bno),
+ error = __blkdev_issue_discard(xfs_group_bdev(busyp->group),
+ xfs_gbno_to_daddr(busyp->group, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
GFP_KERNEL, &bio);
if (error && error != -EOPNOTSUPP) {
@@ -160,13 +181,13 @@ xfs_trim_gather_extents(
struct xfs_trim_cur *tcur,
struct xfs_busy_extents *extents)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_trans *tp;
struct xfs_btree_cur *cur;
struct xfs_buf *agbp;
int error;
int i;
- int batch = 100;
+ int batch = XFS_DISCARD_MAX_EXAMINE;
/*
* Force out the log. This means any transactions that might have freed
@@ -239,11 +260,11 @@ xfs_trim_gather_extents(
* overlapping ranges for now.
*/
if (fbno + flen < tcur->start) {
- trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen);
+ trace_xfs_discard_exclude(pag_group(pag), fbno, flen);
goto next_extent;
}
if (fbno > tcur->end) {
- trace_xfs_discard_exclude(mp, pag->pag_agno, fbno, flen);
+ trace_xfs_discard_exclude(pag_group(pag), fbno, flen);
if (tcur->by_bno) {
tcur->count = 0;
break;
@@ -261,7 +282,7 @@ xfs_trim_gather_extents(
/* Too small? Give up. */
if (flen < tcur->minlen) {
- trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno, flen);
+ trace_xfs_discard_toosmall(pag_group(pag), fbno, flen);
if (tcur->by_bno)
goto next_extent;
tcur->count = 0;
@@ -272,12 +293,12 @@ xfs_trim_gather_extents(
* If any blocks in the range are still busy, skip the
* discard and try again the next time.
*/
- if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
- trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen);
+ if (xfs_extent_busy_search(pag_group(pag), fbno, flen)) {
+ trace_xfs_discard_busy(pag_group(pag), fbno, flen);
goto next_extent;
}
- xfs_extent_busy_insert_discard(pag, fbno, flen,
+ xfs_extent_busy_insert_discard(pag_group(pag), fbno, flen,
&extents->extent_list);
next_extent:
if (tcur->by_bno)
@@ -301,7 +322,7 @@ next_extent:
* we aren't going to issue a discard on them any more.
*/
if (error)
- xfs_extent_busy_clear(mp, &extents->extent_list, false);
+ xfs_extent_busy_clear(&extents->extent_list, false);
out_del_cursor:
xfs_btree_del_cursor(cur, error);
out_trans_cancel:
@@ -335,7 +356,7 @@ xfs_trim_perag_extents(
};
int error = 0;
- if (start != 0 || end != pag->block_count)
+ if (start != 0 || end != pag_group(pag)->xg_block_count)
tcur.by_bno = true;
do {
@@ -347,7 +368,6 @@ xfs_trim_perag_extents(
break;
}
- extents->mount = pag->pag_mount;
extents->owner = extents;
INIT_LIST_HEAD(&extents->extent_list);
@@ -367,7 +387,7 @@ xfs_trim_perag_extents(
* list after this function call, as it may have been freed by
* the time control returns to us.
*/
- error = xfs_discard_extents(pag->pag_mount, extents);
+ error = xfs_discard_extents(pag_mount(pag), extents);
if (error)
break;
@@ -389,8 +409,8 @@ xfs_trim_datadev_extents(
{
xfs_agnumber_t start_agno, end_agno;
xfs_agblock_t start_agbno, end_agbno;
+ struct xfs_perag *pag = NULL;
xfs_daddr_t ddev_end;
- struct xfs_perag *pag;
int last_error = 0, error;
ddev_end = min_t(xfs_daddr_t, end,
@@ -401,10 +421,10 @@ xfs_trim_datadev_extents(
end_agno = xfs_daddr_to_agno(mp, ddev_end);
end_agbno = xfs_daddr_to_agbno(mp, ddev_end);
- for_each_perag_range(mp, start_agno, end_agno, pag) {
- xfs_agblock_t agend = pag->block_count;
+ while ((pag = xfs_perag_next_range(mp, pag, start_agno, end_agno))) {
+ xfs_agblock_t agend = pag_group(pag)->xg_block_count;
- if (start_agno == end_agno)
+ if (pag_agno(pag) == end_agno)
agend = end_agbno;
error = xfs_trim_perag_extents(pag, start_agbno, agend, minlen);
if (error)
@@ -479,7 +499,7 @@ xfs_discard_rtdev_extents(
trace_xfs_discard_rtextent(mp, busyp->bno, busyp->length);
error = __blkdev_issue_discard(bdev,
- XFS_FSB_TO_BB(mp, busyp->bno),
+ xfs_rtb_to_daddr(mp, busyp->bno),
XFS_FSB_TO_BB(mp, busyp->length),
GFP_NOFS, &bio);
if (error)
@@ -506,7 +526,7 @@ xfs_discard_rtdev_extents(
static int
xfs_trim_gather_rtextent(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
const struct xfs_rtalloc_rec *rec,
void *priv)
@@ -525,12 +545,12 @@ xfs_trim_gather_rtextent(
return -ECANCELED;
}
- rbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
- rlen = xfs_rtx_to_rtb(mp, rec->ar_extcount);
+ rbno = xfs_rtx_to_rtb(rtg, rec->ar_startext);
+ rlen = xfs_rtbxlen_to_blen(rtg_mount(rtg), rec->ar_extcount);
/* Ignore too small. */
if (rlen < tr->minlen_fsb) {
- trace_xfs_discard_rttoosmall(mp, rbno, rlen);
+ trace_xfs_discard_rttoosmall(rtg_mount(rtg), rbno, rlen);
return 0;
}
@@ -547,70 +567,185 @@ xfs_trim_gather_rtextent(
return 0;
}
+/* Trim extents on an !rtgroups realtime device */
static int
-xfs_trim_rtdev_extents(
- struct xfs_mount *mp,
- xfs_daddr_t start,
- xfs_daddr_t end,
+xfs_trim_rtextents(
+ struct xfs_rtgroup *rtg,
+ xfs_rtxnum_t low,
+ xfs_rtxnum_t high,
xfs_daddr_t minlen)
{
+ struct xfs_mount *mp = rtg_mount(rtg);
struct xfs_trim_rtdev tr = {
.minlen_fsb = XFS_BB_TO_FSB(mp, minlen),
+ .extent_list = LIST_HEAD_INIT(tr.extent_list),
};
- xfs_rtxnum_t low, high;
struct xfs_trans *tp;
- xfs_daddr_t rtdev_daddr;
int error;
- INIT_LIST_HEAD(&tr.extent_list);
-
- /* Shift the start and end downwards to match the rt device. */
- rtdev_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
- if (start > rtdev_daddr)
- start -= rtdev_daddr;
- else
- start = 0;
-
- if (end <= rtdev_daddr)
- return 0;
- end -= rtdev_daddr;
-
error = xfs_trans_alloc_empty(mp, &tp);
if (error)
return error;
- end = min_t(xfs_daddr_t, end,
- XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks) - 1);
-
- /* Convert the rt blocks to rt extents */
- low = xfs_rtb_to_rtxup(mp, XFS_BB_TO_FSB(mp, start));
- high = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSBT(mp, end));
-
/*
* Walk the free ranges between low and high. The query_range function
* trims the extents returned.
*/
do {
- tr.stop_rtx = low + (mp->m_sb.sb_blocksize * NBBY);
- xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP);
- error = xfs_rtalloc_query_range(mp, tp, low, high,
+ tr.stop_rtx = low + xfs_rtbitmap_rtx_per_rbmblock(mp);
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ error = xfs_rtalloc_query_range(rtg, tp, low, high,
xfs_trim_gather_rtextent, &tr);
if (error == -ECANCELED)
error = 0;
if (error) {
- xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
xfs_discard_free_rtdev_extents(&tr);
break;
}
if (list_empty(&tr.extent_list)) {
- xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
break;
}
error = xfs_discard_rtdev_extents(mp, &tr);
- xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ if (error)
+ break;
+
+ low = tr.restart_rtx;
+ } while (!xfs_trim_should_stop() && low <= high);
+
+ xfs_trans_cancel(tp);
+ return error;
+}
+
+struct xfs_trim_rtgroup {
+ /* list of rtgroup extents to free */
+ struct xfs_busy_extents *extents;
+
+ /* minimum length that caller allows us to trim */
+ xfs_rtblock_t minlen_fsb;
+
+ /* restart point for the rtbitmap walk */
+ xfs_rtxnum_t restart_rtx;
+
+ /* number of extents to examine before stopping to issue discard ios */
+ int batch;
+
+ /* number of extents queued for discard */
+ int queued;
+};
+
+static int
+xfs_trim_gather_rtgroup_extent(
+ struct xfs_rtgroup *rtg,
+ struct xfs_trans *tp,
+ const struct xfs_rtalloc_rec *rec,
+ void *priv)
+{
+ struct xfs_trim_rtgroup *tr = priv;
+ xfs_rgblock_t rgbno;
+ xfs_extlen_t len;
+
+ if (--tr->batch <= 0) {
+ /*
+ * If we've checked a large number of extents, update the
+ * cursor to point at this extent so we restart the next batch
+ * from this extent.
+ */
+ tr->restart_rtx = rec->ar_startext;
+ return -ECANCELED;
+ }
+
+ rgbno = xfs_rtx_to_rgbno(rtg, rec->ar_startext);
+ len = xfs_rtxlen_to_extlen(rtg_mount(rtg), rec->ar_extcount);
+
+ /* Ignore too small. */
+ if (len < tr->minlen_fsb) {
+ trace_xfs_discard_toosmall(rtg_group(rtg), rgbno, len);
+ return 0;
+ }
+
+ /*
+ * If any blocks in the range are still busy, skip the discard and try
+ * again the next time.
+ */
+ if (xfs_extent_busy_search(rtg_group(rtg), rgbno, len)) {
+ trace_xfs_discard_busy(rtg_group(rtg), rgbno, len);
+ return 0;
+ }
+
+ xfs_extent_busy_insert_discard(rtg_group(rtg), rgbno, len,
+ &tr->extents->extent_list);
+
+ tr->queued++;
+ tr->restart_rtx = rec->ar_startext + rec->ar_extcount;
+ return 0;
+}
+
+/* Trim extents in this rtgroup using the busy extent machinery. */
+static int
+xfs_trim_rtgroup_extents(
+ struct xfs_rtgroup *rtg,
+ xfs_rtxnum_t low,
+ xfs_rtxnum_t high,
+ xfs_daddr_t minlen)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct xfs_trim_rtgroup tr = {
+ .minlen_fsb = XFS_BB_TO_FSB(mp, minlen),
+ };
+ struct xfs_trans *tp;
+ int error;
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+
+ /*
+ * Walk the free ranges between low and high. The query_range function
+ * trims the extents returned.
+ */
+ do {
+ tr.extents = kzalloc(sizeof(*tr.extents), GFP_KERNEL);
+ if (!tr.extents) {
+ error = -ENOMEM;
+ break;
+ }
+
+ tr.queued = 0;
+ tr.batch = XFS_DISCARD_MAX_EXAMINE;
+ tr.extents->owner = tr.extents;
+ INIT_LIST_HEAD(&tr.extents->extent_list);
+
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ error = xfs_rtalloc_query_range(rtg, tp, low, high,
+ xfs_trim_gather_rtgroup_extent, &tr);
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ if (error == -ECANCELED)
+ error = 0;
+ if (error) {
+ kfree(tr.extents);
+ break;
+ }
+
+ if (!tr.queued)
+ break;
+
+ /*
+ * We hand the extent list to the discard function here so the
+ * discarded extents can be removed from the busy extent list.
+ * This allows the discards to run asynchronously with
+ * gathering the next round of extents to discard.
+ *
+ * However, we must ensure that we do not reference the extent
+ * list after this function call, as it may have been freed by
+ * the time control returns to us.
+ */
+ error = xfs_discard_extents(rtg_mount(rtg), tr.extents);
if (error)
break;
@@ -620,6 +755,63 @@ xfs_trim_rtdev_extents(
xfs_trans_cancel(tp);
return error;
}
+
+static int
+xfs_trim_rtdev_extents(
+ struct xfs_mount *mp,
+ xfs_daddr_t start,
+ xfs_daddr_t end,
+ xfs_daddr_t minlen)
+{
+ xfs_rtblock_t start_rtbno, end_rtbno;
+ xfs_rtxnum_t start_rtx, end_rtx;
+ xfs_rgnumber_t start_rgno, end_rgno;
+ xfs_daddr_t daddr_offset;
+ int last_error = 0, error;
+ struct xfs_rtgroup *rtg = NULL;
+
+ /* Shift the start and end downwards to match the rt device. */
+ daddr_offset = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
+ if (start > daddr_offset)
+ start -= daddr_offset;
+ else
+ start = 0;
+ start_rtbno = xfs_daddr_to_rtb(mp, start);
+ start_rtx = xfs_rtb_to_rtx(mp, start_rtbno);
+ start_rgno = xfs_rtb_to_rgno(mp, start_rtbno);
+
+ if (end <= daddr_offset)
+ return 0;
+ else
+ end -= daddr_offset;
+ end_rtbno = xfs_daddr_to_rtb(mp, end);
+ end_rtx = xfs_rtb_to_rtx(mp, end_rtbno + mp->m_sb.sb_rextsize - 1);
+ end_rgno = xfs_rtb_to_rgno(mp, end_rtbno);
+
+ while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rgno, end_rgno))) {
+ xfs_rtxnum_t rtg_end = rtg->rtg_extents;
+
+ if (rtg_rgno(rtg) == end_rgno)
+ rtg_end = min(rtg_end, end_rtx);
+
+ if (xfs_has_rtgroups(mp))
+ error = xfs_trim_rtgroup_extents(rtg, start_rtx,
+ rtg_end, minlen);
+ else
+ error = xfs_trim_rtextents(rtg, start_rtx, rtg_end,
+ minlen);
+ if (error)
+ last_error = error;
+
+ if (xfs_trim_should_stop()) {
+ xfs_rtgroup_rele(rtg);
+ break;
+ }
+ start_rtx = 0;
+ }
+
+ return last_error;
+}
#else
# define xfs_trim_rtdev_extents(...) (-EOPNOTSUPP)
#endif /* CONFIG_XFS_RT */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index c1b211c260a9..ff982d983989 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -277,6 +277,25 @@ xfs_qm_init_dquot_blk(
xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1);
}
+static void
+xfs_dquot_set_prealloc(
+ struct xfs_dquot_pre *pre,
+ const struct xfs_dquot_res *res)
+{
+ xfs_qcnt_t space;
+
+ pre->q_prealloc_hi_wmark = res->hardlimit;
+ pre->q_prealloc_lo_wmark = res->softlimit;
+
+ space = div_u64(pre->q_prealloc_hi_wmark, 100);
+ if (!pre->q_prealloc_lo_wmark)
+ pre->q_prealloc_lo_wmark = space * 95;
+
+ pre->q_low_space[XFS_QLOWSP_1_PCNT] = space;
+ pre->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
+ pre->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
+}
+
/*
* Initialize the dynamic speculative preallocation thresholds. The lo/hi
* watermarks correspond to the soft and hard limits by default. If a soft limit
@@ -285,22 +304,8 @@ xfs_qm_init_dquot_blk(
void
xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)
{
- uint64_t space;
-
- dqp->q_prealloc_hi_wmark = dqp->q_blk.hardlimit;
- dqp->q_prealloc_lo_wmark = dqp->q_blk.softlimit;
- if (!dqp->q_prealloc_lo_wmark) {
- dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark;
- do_div(dqp->q_prealloc_lo_wmark, 100);
- dqp->q_prealloc_lo_wmark *= 95;
- }
-
- space = dqp->q_prealloc_hi_wmark;
-
- do_div(space, 100);
- dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space;
- dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3;
- dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;
+ xfs_dquot_set_prealloc(&dqp->q_blk_prealloc, &dqp->q_blk);
+ xfs_dquot_set_prealloc(&dqp->q_rtb_prealloc, &dqp->q_rtb);
}
/*
@@ -983,6 +988,7 @@ xfs_qm_dqget_inode(
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(xfs_inode_dquot(ip, type) == NULL);
+ ASSERT(!xfs_is_metadir_inode(ip));
id = xfs_qm_id_for_quotatype(ip, type);
diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h
index 677bb2dc9ac9..d73d179df009 100644
--- a/fs/xfs/xfs_dquot.h
+++ b/fs/xfs/xfs_dquot.h
@@ -56,6 +56,12 @@ xfs_dquot_res_over_limits(
return false;
}
+struct xfs_dquot_pre {
+ xfs_qcnt_t q_prealloc_lo_wmark;
+ xfs_qcnt_t q_prealloc_hi_wmark;
+ int64_t q_low_space[XFS_QLOWSP_MAX];
+};
+
/*
* The incore dquot structure
*/
@@ -76,9 +82,9 @@ struct xfs_dquot {
struct xfs_dq_logitem q_logitem;
- xfs_qcnt_t q_prealloc_lo_wmark;
- xfs_qcnt_t q_prealloc_hi_wmark;
- int64_t q_low_space[XFS_QLOWSP_MAX];
+ struct xfs_dquot_pre q_blk_prealloc;
+ struct xfs_dquot_pre q_rtb_prealloc;
+
struct mutex q_qlock;
struct completion q_flush;
atomic_t q_pincount;
@@ -192,7 +198,11 @@ static inline bool xfs_dquot_lowsp(struct xfs_dquot *dqp)
int64_t freesp;
freesp = dqp->q_blk.hardlimit - dqp->q_blk.reserved;
- if (freesp < dqp->q_low_space[XFS_QLOWSP_1_PCNT])
+ if (freesp < dqp->q_blk_prealloc.q_low_space[XFS_QLOWSP_1_PCNT])
+ return true;
+
+ freesp = dqp->q_rtb.hardlimit - dqp->q_rtb.reserved;
+ if (freesp < dqp->q_rtb_prealloc.q_low_space[XFS_QLOWSP_1_PCNT])
return true;
return false;
diff --git a/fs/xfs/xfs_drain.c b/fs/xfs/xfs_drain.c
index 7bdb9688c0f5..5ede81fadbd8 100644
--- a/fs/xfs/xfs_drain.c
+++ b/fs/xfs/xfs_drain.c
@@ -94,55 +94,39 @@ static inline int xfs_defer_drain_wait(struct xfs_defer_drain *dr)
}
/*
- * Get a passive reference to the AG that contains a fsbno and declare an intent
- * to update its metadata.
+ * Get a passive reference to the group that contains a fsbno and declare an
+ * intent to update its metadata.
+ *
+ * Other threads that need exclusive access can decide to back off if they see
+ * declared intentions.
*/
-struct xfs_perag *
-xfs_perag_intent_get(
+struct xfs_group *
+xfs_group_intent_get(
struct xfs_mount *mp,
- xfs_fsblock_t fsbno)
+ xfs_fsblock_t fsbno,
+ enum xfs_group_type type)
{
- struct xfs_perag *pag;
+ struct xfs_group *xg;
- pag = xfs_perag_get(mp, XFS_FSB_TO_AGNO(mp, fsbno));
- if (!pag)
+ xg = xfs_group_get_by_fsb(mp, fsbno, type);
+ if (!xg)
return NULL;
-
- xfs_perag_intent_hold(pag);
- return pag;
-}
-
-/*
- * Release our intent to update this AG's metadata, and then release our
- * passive ref to the AG.
- */
-void
-xfs_perag_intent_put(
- struct xfs_perag *pag)
-{
- xfs_perag_intent_rele(pag);
- xfs_perag_put(pag);
+ trace_xfs_group_intent_hold(xg, __return_address);
+ xfs_defer_drain_grab(&xg->xg_intents_drain);
+ return xg;
}
/*
- * Declare an intent to update AG metadata. Other threads that need exclusive
- * access can decide to back off if they see declared intentions.
+ * Release our intent to update this groups metadata, and then release our
+ * passive ref to it.
*/
void
-xfs_perag_intent_hold(
- struct xfs_perag *pag)
+xfs_group_intent_put(
+ struct xfs_group *xg)
{
- trace_xfs_perag_intent_hold(pag, __return_address);
- xfs_defer_drain_grab(&pag->pag_intents_drain);
-}
-
-/* Release our intent to update this AG's metadata. */
-void
-xfs_perag_intent_rele(
- struct xfs_perag *pag)
-{
- trace_xfs_perag_intent_rele(pag, __return_address);
- xfs_defer_drain_rele(&pag->pag_intents_drain);
+ trace_xfs_group_intent_rele(xg, __return_address);
+ xfs_defer_drain_rele(&xg->xg_intents_drain);
+ xfs_group_put(xg);
}
/*
@@ -150,17 +134,19 @@ xfs_perag_intent_rele(
* Callers must not hold any AG header buffers.
*/
int
-xfs_perag_intent_drain(
- struct xfs_perag *pag)
+xfs_group_intent_drain(
+ struct xfs_group *xg)
{
- trace_xfs_perag_wait_intents(pag, __return_address);
- return xfs_defer_drain_wait(&pag->pag_intents_drain);
+ trace_xfs_group_wait_intents(xg, __return_address);
+ return xfs_defer_drain_wait(&xg->xg_intents_drain);
}
-/* Has anyone declared an intent to update this AG? */
+/*
+ * Has anyone declared an intent to update this group?
+ */
bool
-xfs_perag_intent_busy(
- struct xfs_perag *pag)
+xfs_group_intent_busy(
+ struct xfs_group *xg)
{
- return xfs_defer_drain_busy(&pag->pag_intents_drain);
+ return xfs_defer_drain_busy(&xg->xg_intents_drain);
}
diff --git a/fs/xfs/xfs_drain.h b/fs/xfs/xfs_drain.h
index 775164f54ea6..efcf88df9a5e 100644
--- a/fs/xfs/xfs_drain.h
+++ b/fs/xfs/xfs_drain.h
@@ -6,6 +6,7 @@
#ifndef XFS_DRAIN_H_
#define XFS_DRAIN_H_
+struct xfs_group;
struct xfs_perag;
#ifdef CONFIG_XFS_DRAIN_INTENTS
@@ -61,27 +62,22 @@ void xfs_drain_wait_enable(void);
* soon as the item is added to the transaction and cannot drop the counter
* until the item is finished or cancelled.
*/
-struct xfs_perag *xfs_perag_intent_get(struct xfs_mount *mp,
- xfs_fsblock_t fsbno);
-void xfs_perag_intent_put(struct xfs_perag *pag);
+struct xfs_group *xfs_group_intent_get(struct xfs_mount *mp,
+ xfs_fsblock_t fsbno, enum xfs_group_type type);
+void xfs_group_intent_put(struct xfs_group *rtg);
-void xfs_perag_intent_hold(struct xfs_perag *pag);
-void xfs_perag_intent_rele(struct xfs_perag *pag);
+int xfs_group_intent_drain(struct xfs_group *xg);
+bool xfs_group_intent_busy(struct xfs_group *xg);
-int xfs_perag_intent_drain(struct xfs_perag *pag);
-bool xfs_perag_intent_busy(struct xfs_perag *pag);
#else
struct xfs_defer_drain { /* empty */ };
#define xfs_defer_drain_free(dr) ((void)0)
#define xfs_defer_drain_init(dr) ((void)0)
-#define xfs_perag_intent_get(mp, fsbno) \
- xfs_perag_get((mp), XFS_FSB_TO_AGNO(mp, fsbno))
-#define xfs_perag_intent_put(pag) xfs_perag_put(pag)
-
-static inline void xfs_perag_intent_hold(struct xfs_perag *pag) { }
-static inline void xfs_perag_intent_rele(struct xfs_perag *pag) { }
+#define xfs_group_intent_get(_mp, _fsbno, _type) \
+ xfs_group_get_by_fsb((_mp), (_fsbno), (_type))
+#define xfs_group_intent_put(xg) xfs_group_put(xg)
#endif /* CONFIG_XFS_DRAIN_INTENTS */
diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c
index 75cb53f090d1..9ab05ad224d1 100644
--- a/fs/xfs/xfs_exchrange.c
+++ b/fs/xfs/xfs_exchrange.c
@@ -217,7 +217,7 @@ xfs_exchrange_mappings(
* length in @fxr are safe to round up.
*/
if (xfs_inode_has_bigrtalloc(ip2))
- req.blockcount = xfs_rtb_roundup_rtx(mp, req.blockcount);
+ req.blockcount = xfs_blen_roundup_rtx(mp, req.blockcount);
error = xfs_exchrange_estimate(&req);
if (error)
@@ -813,8 +813,6 @@ xfs_ioc_exchange_range(
.file2 = file,
};
struct xfs_exchange_range args;
- struct fd file1;
- int error;
if (copy_from_user(&args, argp, sizeof(args)))
return -EFAULT;
@@ -828,14 +826,12 @@ xfs_ioc_exchange_range(
fxr.length = args.length;
fxr.flags = args.flags;
- file1 = fdget(args.file1_fd);
- if (!fd_file(file1))
+ CLASS(fd, file1)(args.file1_fd);
+ if (fd_empty(file1))
return -EBADF;
fxr.file1 = fd_file(file1);
- error = xfs_exchange_range(&fxr);
- fdput(file1);
- return error;
+ return xfs_exchange_range(&fxr);
}
/* Opaque freshness blob for XFS_IOC_COMMIT_RANGE */
@@ -909,8 +905,6 @@ xfs_ioc_commit_range(
struct xfs_commit_range_fresh *kern_f;
struct xfs_inode *ip2 = XFS_I(file_inode(file));
struct xfs_mount *mp = ip2->i_mount;
- struct fd file1;
- int error;
kern_f = (struct xfs_commit_range_fresh *)&args.file2_freshness;
@@ -934,12 +928,10 @@ xfs_ioc_commit_range(
fxr.file2_ctime.tv_sec = kern_f->file2_ctime;
fxr.file2_ctime.tv_nsec = kern_f->file2_ctime_nsec;
- file1 = fdget(args.file1_fd);
+ CLASS(fd, file1)(args.file1_fd);
if (fd_empty(file1))
return -EBADF;
fxr.file1 = fd_file(file1);
- error = xfs_exchange_range(&fxr);
- fdput(file1);
- return error;
+ return xfs_exchange_range(&fxr);
}
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index a73e7c73b664..ea43c9a6e54c 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -18,15 +18,24 @@
#include "xfs_trans.h"
#include "xfs_log.h"
#include "xfs_ag.h"
+#include "xfs_rtgroup.h"
+
+struct xfs_extent_busy_tree {
+ spinlock_t eb_lock;
+ struct rb_root eb_tree;
+ unsigned int eb_gen;
+ wait_queue_head_t eb_wait;
+};
static void
xfs_extent_busy_insert_list(
- struct xfs_perag *pag,
+ struct xfs_group *xg,
xfs_agblock_t bno,
xfs_extlen_t len,
unsigned int flags,
struct list_head *busy_list)
{
+ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
struct xfs_extent_busy *new;
struct xfs_extent_busy *busyp;
struct rb_node **rbp;
@@ -34,17 +43,17 @@ xfs_extent_busy_insert_list(
new = kzalloc(sizeof(struct xfs_extent_busy),
GFP_KERNEL | __GFP_NOFAIL);
- new->agno = pag->pag_agno;
+ new->group = xfs_group_hold(xg);
new->bno = bno;
new->length = len;
INIT_LIST_HEAD(&new->list);
new->flags = flags;
/* trace before insert to be able to see failed inserts */
- trace_xfs_extent_busy(pag->pag_mount, pag->pag_agno, bno, len);
+ trace_xfs_extent_busy(xg, bno, len);
- spin_lock(&pag->pagb_lock);
- rbp = &pag->pagb_tree.rb_node;
+ spin_lock(&eb->eb_lock);
+ rbp = &eb->eb_tree.rb_node;
while (*rbp) {
parent = *rbp;
busyp = rb_entry(parent, struct xfs_extent_busy, rb_node);
@@ -61,32 +70,32 @@ xfs_extent_busy_insert_list(
}
rb_link_node(&new->rb_node, parent, rbp);
- rb_insert_color(&new->rb_node, &pag->pagb_tree);
+ rb_insert_color(&new->rb_node, &eb->eb_tree);
/* always process discard lists in fifo order */
list_add_tail(&new->list, busy_list);
- spin_unlock(&pag->pagb_lock);
+ spin_unlock(&eb->eb_lock);
}
void
xfs_extent_busy_insert(
struct xfs_trans *tp,
- struct xfs_perag *pag,
+ struct xfs_group *xg,
xfs_agblock_t bno,
xfs_extlen_t len,
unsigned int flags)
{
- xfs_extent_busy_insert_list(pag, bno, len, flags, &tp->t_busy);
+ xfs_extent_busy_insert_list(xg, bno, len, flags, &tp->t_busy);
}
void
xfs_extent_busy_insert_discard(
- struct xfs_perag *pag,
+ struct xfs_group *xg,
xfs_agblock_t bno,
xfs_extlen_t len,
struct list_head *busy_list)
{
- xfs_extent_busy_insert_list(pag, bno, len, XFS_EXTENT_BUSY_DISCARDED,
+ xfs_extent_busy_insert_list(xg, bno, len, XFS_EXTENT_BUSY_DISCARDED,
busy_list);
}
@@ -101,18 +110,18 @@ xfs_extent_busy_insert_discard(
*/
int
xfs_extent_busy_search(
- struct xfs_mount *mp,
- struct xfs_perag *pag,
+ struct xfs_group *xg,
xfs_agblock_t bno,
xfs_extlen_t len)
{
+ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
struct rb_node *rbp;
struct xfs_extent_busy *busyp;
int match = 0;
/* find closest start bno overlap */
- spin_lock(&pag->pagb_lock);
- rbp = pag->pagb_tree.rb_node;
+ spin_lock(&eb->eb_lock);
+ rbp = eb->eb_tree.rb_node;
while (rbp) {
busyp = rb_entry(rbp, struct xfs_extent_busy, rb_node);
if (bno < busyp->bno) {
@@ -131,7 +140,7 @@ xfs_extent_busy_search(
break;
}
}
- spin_unlock(&pag->pagb_lock);
+ spin_unlock(&eb->eb_lock);
return match;
}
@@ -148,14 +157,15 @@ xfs_extent_busy_search(
*/
STATIC bool
xfs_extent_busy_update_extent(
- struct xfs_mount *mp,
- struct xfs_perag *pag,
+ struct xfs_group *xg,
struct xfs_extent_busy *busyp,
xfs_agblock_t fbno,
xfs_extlen_t flen,
- bool userdata) __releases(&pag->pagb_lock)
- __acquires(&pag->pagb_lock)
+ bool userdata)
+ __releases(&eb->eb_lock)
+ __acquires(&eb->eb_lock)
{
+ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
xfs_agblock_t fend = fbno + flen;
xfs_agblock_t bbno = busyp->bno;
xfs_agblock_t bend = bbno + busyp->length;
@@ -166,9 +176,9 @@ xfs_extent_busy_update_extent(
* and retry.
*/
if (busyp->flags & XFS_EXTENT_BUSY_DISCARDED) {
- spin_unlock(&pag->pagb_lock);
+ spin_unlock(&eb->eb_lock);
delay(1);
- spin_lock(&pag->pagb_lock);
+ spin_lock(&eb->eb_lock);
return false;
}
@@ -241,7 +251,7 @@ xfs_extent_busy_update_extent(
* tree root, because erasing the node can rearrange the
* tree topology.
*/
- rb_erase(&busyp->rb_node, &pag->pagb_tree);
+ rb_erase(&busyp->rb_node, &eb->eb_tree);
busyp->length = 0;
return false;
} else if (fend < bend) {
@@ -280,35 +290,34 @@ xfs_extent_busy_update_extent(
ASSERT(0);
}
- trace_xfs_extent_busy_reuse(mp, pag->pag_agno, fbno, flen);
+ trace_xfs_extent_busy_reuse(xg, fbno, flen);
return true;
out_force_log:
- spin_unlock(&pag->pagb_lock);
- xfs_log_force(mp, XFS_LOG_SYNC);
- trace_xfs_extent_busy_force(mp, pag->pag_agno, fbno, flen);
- spin_lock(&pag->pagb_lock);
+ spin_unlock(&eb->eb_lock);
+ xfs_log_force(xg->xg_mount, XFS_LOG_SYNC);
+ trace_xfs_extent_busy_force(xg, fbno, flen);
+ spin_lock(&eb->eb_lock);
return false;
}
-
/*
* For a given extent [fbno, flen], make sure we can reuse it safely.
*/
void
xfs_extent_busy_reuse(
- struct xfs_mount *mp,
- struct xfs_perag *pag,
+ struct xfs_group *xg,
xfs_agblock_t fbno,
xfs_extlen_t flen,
bool userdata)
{
+ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
struct rb_node *rbp;
ASSERT(flen > 0);
- spin_lock(&pag->pagb_lock);
+ spin_lock(&eb->eb_lock);
restart:
- rbp = pag->pagb_tree.rb_node;
+ rbp = eb->eb_tree.rb_node;
while (rbp) {
struct xfs_extent_busy *busyp =
rb_entry(rbp, struct xfs_extent_busy, rb_node);
@@ -323,11 +332,11 @@ restart:
continue;
}
- if (!xfs_extent_busy_update_extent(mp, pag, busyp, fbno, flen,
+ if (!xfs_extent_busy_update_extent(xg, busyp, fbno, flen,
userdata))
goto restart;
}
- spin_unlock(&pag->pagb_lock);
+ spin_unlock(&eb->eb_lock);
}
/*
@@ -336,7 +345,7 @@ restart:
* args->minlen no suitable extent could be found, and the higher level
* code needs to force out the log and retry the allocation.
*
- * Return the current busy generation for the AG if the extent is busy. This
+ * Return the current busy generation for the group if the extent is busy. This
* value can be used to wait for at least one of the currently busy extents
* to be cleared. Note that the busy list is not guaranteed to be empty after
* the gen is woken. The state of a specific extent must always be confirmed
@@ -344,11 +353,14 @@ restart:
*/
bool
xfs_extent_busy_trim(
- struct xfs_alloc_arg *args,
+ struct xfs_group *xg,
+ xfs_extlen_t minlen,
+ xfs_extlen_t maxlen,
xfs_agblock_t *bno,
xfs_extlen_t *len,
unsigned *busy_gen)
{
+ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
xfs_agblock_t fbno;
xfs_extlen_t flen;
struct rb_node *rbp;
@@ -356,11 +368,11 @@ xfs_extent_busy_trim(
ASSERT(*len > 0);
- spin_lock(&args->pag->pagb_lock);
+ spin_lock(&eb->eb_lock);
fbno = *bno;
flen = *len;
- rbp = args->pag->pagb_tree.rb_node;
- while (rbp && flen >= args->minlen) {
+ rbp = eb->eb_tree.rb_node;
+ while (rbp && flen >= minlen) {
struct xfs_extent_busy *busyp =
rb_entry(rbp, struct xfs_extent_busy, rb_node);
xfs_agblock_t fend = fbno + flen;
@@ -481,13 +493,13 @@ xfs_extent_busy_trim(
* good chance subsequent allocations will be
* contiguous.
*/
- if (bbno - fbno >= args->maxlen) {
+ if (bbno - fbno >= maxlen) {
/* left candidate fits perfect */
fend = bbno;
- } else if (fend - bend >= args->maxlen * 4) {
+ } else if (fend - bend >= maxlen * 4) {
/* right candidate has enough free space */
fbno = bend;
- } else if (bbno - fbno >= args->minlen) {
+ } else if (bbno - fbno >= minlen) {
/* left candidate fits minimum requirement */
fend = bbno;
} else {
@@ -500,14 +512,13 @@ xfs_extent_busy_trim(
out:
if (fbno != *bno || flen != *len) {
- trace_xfs_extent_busy_trim(args->mp, args->agno, *bno, *len,
- fbno, flen);
+ trace_xfs_extent_busy_trim(xg, *bno, *len, fbno, flen);
*bno = fbno;
*len = flen;
- *busy_gen = args->pag->pagb_gen;
+ *busy_gen = eb->eb_gen;
ret = true;
}
- spin_unlock(&args->pag->pagb_lock);
+ spin_unlock(&eb->eb_lock);
return ret;
fail:
/*
@@ -520,22 +531,24 @@ fail:
static bool
xfs_extent_busy_clear_one(
- struct xfs_perag *pag,
struct xfs_extent_busy *busyp,
bool do_discard)
{
+ struct xfs_extent_busy_tree *eb = busyp->group->xg_busy_extents;
+
if (busyp->length) {
if (do_discard &&
!(busyp->flags & XFS_EXTENT_BUSY_SKIP_DISCARD)) {
busyp->flags = XFS_EXTENT_BUSY_DISCARDED;
return false;
}
- trace_xfs_extent_busy_clear(pag->pag_mount, busyp->agno,
- busyp->bno, busyp->length);
- rb_erase(&busyp->rb_node, &pag->pagb_tree);
+ trace_xfs_extent_busy_clear(busyp->group, busyp->bno,
+ busyp->length);
+ rb_erase(&busyp->rb_node, &eb->eb_tree);
}
list_del_init(&busyp->list);
+ xfs_group_put(busyp->group);
kfree(busyp);
return true;
}
@@ -547,7 +560,6 @@ xfs_extent_busy_clear_one(
*/
void
xfs_extent_busy_clear(
- struct xfs_mount *mp,
struct list_head *list,
bool do_discard)
{
@@ -558,30 +570,30 @@ xfs_extent_busy_clear(
return;
do {
+ struct xfs_group *xg = xfs_group_hold(busyp->group);
+ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
bool wakeup = false;
- struct xfs_perag *pag;
- pag = xfs_perag_get(mp, busyp->agno);
- spin_lock(&pag->pagb_lock);
+ spin_lock(&eb->eb_lock);
do {
next = list_next_entry(busyp, list);
- if (xfs_extent_busy_clear_one(pag, busyp, do_discard))
+ if (xfs_extent_busy_clear_one(busyp, do_discard))
wakeup = true;
busyp = next;
} while (!list_entry_is_head(busyp, list, list) &&
- busyp->agno == pag->pag_agno);
+ busyp->group == xg);
if (wakeup) {
- pag->pagb_gen++;
- wake_up_all(&pag->pagb_wait);
+ eb->eb_gen++;
+ wake_up_all(&eb->eb_wait);
}
- spin_unlock(&pag->pagb_lock);
- xfs_perag_put(pag);
+ spin_unlock(&eb->eb_lock);
+ xfs_group_put(xg);
} while (!list_entry_is_head(busyp, list, list));
}
/*
- * Flush out all busy extents for this AG.
+ * Flush out all busy extents for this group.
*
* If the current transaction is holding busy extents, the caller may not want
* to wait for committed busy extents to resolve. If we are being told just to
@@ -597,10 +609,11 @@ xfs_extent_busy_clear(
int
xfs_extent_busy_flush(
struct xfs_trans *tp,
- struct xfs_perag *pag,
+ struct xfs_group *xg,
unsigned busy_gen,
uint32_t alloc_flags)
{
+ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
DEFINE_WAIT (wait);
int error;
@@ -613,7 +626,7 @@ xfs_extent_busy_flush(
if (alloc_flags & XFS_ALLOC_FLAG_TRYFLUSH)
return 0;
- if (busy_gen != READ_ONCE(pag->pagb_gen))
+ if (busy_gen != READ_ONCE(eb->eb_gen))
return 0;
if (alloc_flags & XFS_ALLOC_FLAG_FREEING)
@@ -622,37 +635,49 @@ xfs_extent_busy_flush(
/* Wait for committed busy extents to resolve. */
do {
- prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
- if (busy_gen != READ_ONCE(pag->pagb_gen))
+ prepare_to_wait(&eb->eb_wait, &wait, TASK_KILLABLE);
+ if (busy_gen != READ_ONCE(eb->eb_gen))
break;
schedule();
} while (1);
- finish_wait(&pag->pagb_wait, &wait);
+ finish_wait(&eb->eb_wait, &wait);
return 0;
}
+static void
+xfs_extent_busy_wait_group(
+ struct xfs_group *xg)
+{
+ DEFINE_WAIT (wait);
+ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
+
+ do {
+ prepare_to_wait(&eb->eb_wait, &wait, TASK_KILLABLE);
+ if (RB_EMPTY_ROOT(&eb->eb_tree))
+ break;
+ schedule();
+ } while (1);
+ finish_wait(&eb->eb_wait, &wait);
+}
+
void
xfs_extent_busy_wait_all(
struct xfs_mount *mp)
{
- struct xfs_perag *pag;
- DEFINE_WAIT (wait);
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
+ struct xfs_rtgroup *rtg = NULL;
- for_each_perag(mp, agno, pag) {
- do {
- prepare_to_wait(&pag->pagb_wait, &wait, TASK_KILLABLE);
- if (RB_EMPTY_ROOT(&pag->pagb_tree))
- break;
- schedule();
- } while (1);
- finish_wait(&pag->pagb_wait, &wait);
- }
+ while ((pag = xfs_perag_next(mp, pag)))
+ xfs_extent_busy_wait_group(pag_group(pag));
+
+ if (xfs_has_rtgroups(mp))
+ while ((rtg = xfs_rtgroup_next(mp, rtg)))
+ xfs_extent_busy_wait_group(rtg_group(rtg));
}
/*
- * Callback for list_sort to sort busy extents by the AG they reside in.
+ * Callback for list_sort to sort busy extents by the group they reside in.
*/
int
xfs_extent_busy_ag_cmp(
@@ -666,21 +691,38 @@ xfs_extent_busy_ag_cmp(
container_of(l2, struct xfs_extent_busy, list);
s32 diff;
- diff = b1->agno - b2->agno;
+ diff = b1->group->xg_gno - b2->group->xg_gno;
if (!diff)
diff = b1->bno - b2->bno;
return diff;
}
-/* Are there any busy extents in this AG? */
+/* Are there any busy extents in this group? */
bool
xfs_extent_busy_list_empty(
- struct xfs_perag *pag)
+ struct xfs_group *xg,
+ unsigned *busy_gen)
{
+ struct xfs_extent_busy_tree *eb = xg->xg_busy_extents;
bool res;
- spin_lock(&pag->pagb_lock);
- res = RB_EMPTY_ROOT(&pag->pagb_tree);
- spin_unlock(&pag->pagb_lock);
+ spin_lock(&eb->eb_lock);
+ res = RB_EMPTY_ROOT(&eb->eb_tree);
+ *busy_gen = READ_ONCE(eb->eb_gen);
+ spin_unlock(&eb->eb_lock);
return res;
}
+
+struct xfs_extent_busy_tree *
+xfs_extent_busy_alloc(void)
+{
+ struct xfs_extent_busy_tree *eb;
+
+ eb = kzalloc(sizeof(*eb), GFP_KERNEL);
+ if (!eb)
+ return NULL;
+ spin_lock_init(&eb->eb_lock);
+ init_waitqueue_head(&eb->eb_wait);
+ eb->eb_tree = RB_ROOT;
+ return eb;
+}
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h
index 470032de3139..f069b04e8ea1 100644
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -8,19 +8,18 @@
#ifndef __XFS_EXTENT_BUSY_H__
#define __XFS_EXTENT_BUSY_H__
+struct xfs_group;
struct xfs_mount;
-struct xfs_perag;
struct xfs_trans;
-struct xfs_alloc_arg;
/*
- * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that
- * have been freed but whose transactions aren't committed to disk yet.
+ * Busy block/extent entry. Indexed by a rbtree in the group to mark blocks
+ * that have been freed but whose transactions aren't committed to disk yet.
*/
struct xfs_extent_busy {
- struct rb_node rb_node; /* ag by-bno indexed search tree */
+ struct rb_node rb_node; /* group by-bno indexed search tree */
struct list_head list; /* transaction busy extent list */
- xfs_agnumber_t agno;
+ struct xfs_group *group;
xfs_agblock_t bno;
xfs_extlen_t length;
unsigned int flags;
@@ -33,7 +32,6 @@ struct xfs_extent_busy {
* to discard completion.
*/
struct xfs_busy_extents {
- struct xfs_mount *mount;
struct list_head extent_list;
struct work_struct endio_work;
@@ -45,46 +43,29 @@ struct xfs_busy_extents {
void *owner;
};
-void
-xfs_extent_busy_insert(struct xfs_trans *tp, struct xfs_perag *pag,
- xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags);
-
-void
-xfs_extent_busy_insert_discard(struct xfs_perag *pag, xfs_agblock_t bno,
- xfs_extlen_t len, struct list_head *busy_list);
-
-void
-xfs_extent_busy_clear(struct xfs_mount *mp, struct list_head *list,
- bool do_discard);
-
-int
-xfs_extent_busy_search(struct xfs_mount *mp, struct xfs_perag *pag,
- xfs_agblock_t bno, xfs_extlen_t len);
-
-void
-xfs_extent_busy_reuse(struct xfs_mount *mp, struct xfs_perag *pag,
- xfs_agblock_t fbno, xfs_extlen_t flen, bool userdata);
-
-bool
-xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno,
- xfs_extlen_t *len, unsigned *busy_gen);
-
-int
-xfs_extent_busy_flush(struct xfs_trans *tp, struct xfs_perag *pag,
+void xfs_extent_busy_insert(struct xfs_trans *tp, struct xfs_group *xg,
+ xfs_agblock_t bno, xfs_extlen_t len, unsigned int flags);
+void xfs_extent_busy_insert_discard(struct xfs_group *xg, xfs_agblock_t bno,
+ xfs_extlen_t len, struct list_head *busy_list);
+void xfs_extent_busy_clear(struct list_head *list, bool do_discard);
+int xfs_extent_busy_search(struct xfs_group *xg, xfs_agblock_t bno,
+ xfs_extlen_t len);
+void xfs_extent_busy_reuse(struct xfs_group *xg, xfs_agblock_t fbno,
+ xfs_extlen_t flen, bool userdata);
+bool xfs_extent_busy_trim(struct xfs_group *xg, xfs_extlen_t minlen,
+ xfs_extlen_t maxlen, xfs_agblock_t *bno, xfs_extlen_t *len,
+ unsigned *busy_gen);
+int xfs_extent_busy_flush(struct xfs_trans *tp, struct xfs_group *xg,
unsigned busy_gen, uint32_t alloc_flags);
+void xfs_extent_busy_wait_all(struct xfs_mount *mp);
+bool xfs_extent_busy_list_empty(struct xfs_group *xg, unsigned int *busy_gen);
+struct xfs_extent_busy_tree *xfs_extent_busy_alloc(void);
-void
-xfs_extent_busy_wait_all(struct xfs_mount *mp);
-
-int
-xfs_extent_busy_ag_cmp(void *priv, const struct list_head *a,
- const struct list_head *b);
-
+int xfs_extent_busy_ag_cmp(void *priv, const struct list_head *a,
+ const struct list_head *b);
static inline void xfs_extent_busy_sort(struct list_head *list)
{
list_sort(NULL, list, xfs_extent_busy_ag_cmp);
}
-bool xfs_extent_busy_list_empty(struct xfs_perag *pag);
-
#endif /* __XFS_EXTENT_BUSY_H__ */
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index abffc74a924f..a25c713ff888 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -25,6 +25,10 @@
#include "xfs_error.h"
#include "xfs_log_priv.h"
#include "xfs_log_recover.h"
+#include "xfs_rtalloc.h"
+#include "xfs_inode.h"
+#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
struct kmem_cache *xfs_efi_cache;
struct kmem_cache *xfs_efd_cache;
@@ -95,16 +99,15 @@ xfs_efi_item_format(
ASSERT(atomic_read(&efip->efi_next_extent) ==
efip->efi_format.efi_nextents);
+ ASSERT(lip->li_type == XFS_LI_EFI || lip->li_type == XFS_LI_EFI_RT);
- efip->efi_format.efi_type = XFS_LI_EFI;
+ efip->efi_format.efi_type = lip->li_type;
efip->efi_format.efi_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT,
- &efip->efi_format,
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, &efip->efi_format,
xfs_efi_log_format_sizeof(efip->efi_format.efi_nextents));
}
-
/*
* The unpin operation is the last place an EFI is manipulated in the log. It is
* either inserted in the AIL or aborted in the event of a log I/O error. In
@@ -140,12 +143,14 @@ xfs_efi_item_release(
STATIC struct xfs_efi_log_item *
xfs_efi_init(
struct xfs_mount *mp,
+ unsigned short item_type,
uint nextents)
-
{
struct xfs_efi_log_item *efip;
+ ASSERT(item_type == XFS_LI_EFI || item_type == XFS_LI_EFI_RT);
ASSERT(nextents > 0);
+
if (nextents > XFS_EFI_MAX_FAST_EXTENTS) {
efip = kzalloc(xfs_efi_log_item_sizeof(nextents),
GFP_KERNEL | __GFP_NOFAIL);
@@ -154,7 +159,7 @@ xfs_efi_init(
GFP_KERNEL | __GFP_NOFAIL);
}
- xfs_log_item_init(mp, &efip->efi_item, XFS_LI_EFI, &xfs_efi_item_ops);
+ xfs_log_item_init(mp, &efip->efi_item, item_type, &xfs_efi_item_ops);
efip->efi_format.efi_nextents = nextents;
efip->efi_format.efi_id = (uintptr_t)(void *)efip;
atomic_set(&efip->efi_next_extent, 0);
@@ -264,12 +269,12 @@ xfs_efd_item_format(
struct xfs_log_iovec *vecp = NULL;
ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);
+ ASSERT(lip->li_type == XFS_LI_EFD || lip->li_type == XFS_LI_EFD_RT);
- efdp->efd_format.efd_type = XFS_LI_EFD;
+ efdp->efd_format.efd_type = lip->li_type;
efdp->efd_format.efd_size = 1;
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT,
- &efdp->efd_format,
+ xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, &efdp->efd_format,
xfs_efd_log_format_sizeof(efdp->efd_format.efd_nextents));
}
@@ -308,6 +313,14 @@ static inline struct xfs_extent_free_item *xefi_entry(const struct list_head *e)
return list_entry(e, struct xfs_extent_free_item, xefi_list);
}
+static inline bool
+xfs_efi_item_isrt(const struct xfs_log_item *lip)
+{
+ ASSERT(lip->li_type == XFS_LI_EFI || lip->li_type == XFS_LI_EFI_RT);
+
+ return lip->li_type == XFS_LI_EFI_RT;
+}
+
/*
* Fill the EFD with all extents from the EFI when we need to roll the
* transaction and continue with a new EFI.
@@ -362,7 +375,7 @@ xfs_extent_free_diff_items(
struct xfs_extent_free_item *ra = xefi_entry(a);
struct xfs_extent_free_item *rb = xefi_entry(b);
- return ra->xefi_pag->pag_agno - rb->xefi_pag->pag_agno;
+ return ra->xefi_group->xg_gno - rb->xefi_group->xg_gno;
}
/* Log a free extent to the intent item. */
@@ -388,18 +401,20 @@ xfs_extent_free_log_item(
}
static struct xfs_log_item *
-xfs_extent_free_create_intent(
+__xfs_extent_free_create_intent(
struct xfs_trans *tp,
struct list_head *items,
unsigned int count,
- bool sort)
+ bool sort,
+ unsigned short item_type)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_efi_log_item *efip = xfs_efi_init(mp, count);
+ struct xfs_efi_log_item *efip;
struct xfs_extent_free_item *xefi;
ASSERT(count > 0);
+ efip = xfs_efi_init(mp, item_type, count);
if (sort)
list_sort(mp, items, xfs_extent_free_diff_items);
list_for_each_entry(xefi, items, xefi_list)
@@ -407,6 +422,23 @@ xfs_extent_free_create_intent(
return &efip->efi_item;
}
+static struct xfs_log_item *
+xfs_extent_free_create_intent(
+ struct xfs_trans *tp,
+ struct list_head *items,
+ unsigned int count,
+ bool sort)
+{
+ return __xfs_extent_free_create_intent(tp, items, count, sort,
+ XFS_LI_EFI);
+}
+
+static inline unsigned short
+xfs_efd_type_from_efi(const struct xfs_efi_log_item *efip)
+{
+ return xfs_efi_item_isrt(&efip->efi_item) ? XFS_LI_EFD_RT : XFS_LI_EFD;
+}
+
/* Get an EFD so we can process all the free extents. */
static struct xfs_log_item *
xfs_extent_free_create_done(
@@ -427,8 +459,8 @@ xfs_extent_free_create_done(
GFP_KERNEL | __GFP_NOFAIL);
}
- xfs_log_item_init(tp->t_mountp, &efdp->efd_item, XFS_LI_EFD,
- &xfs_efd_item_ops);
+ xfs_log_item_init(tp->t_mountp, &efdp->efd_item,
+ xfs_efd_type_from_efi(efip), &xfs_efd_item_ops);
efdp->efd_efip = efip;
efdp->efd_format.efd_nextents = count;
efdp->efd_format.efd_efi_id = efip->efi_format.efi_id;
@@ -436,6 +468,17 @@ xfs_extent_free_create_done(
return &efdp->efd_item;
}
+static inline const struct xfs_defer_op_type *
+xefi_ops(
+ struct xfs_extent_free_item *xefi)
+{
+ if (xfs_efi_is_realtime(xefi))
+ return &xfs_rtextent_free_defer_type;
+ if (xefi->xefi_agresv == XFS_AG_RESV_AGFL)
+ return &xfs_agfl_free_defer_type;
+ return &xfs_extent_free_defer_type;
+}
+
/* Add this deferred EFI to the transaction. */
void
xfs_extent_free_defer_add(
@@ -445,15 +488,11 @@ xfs_extent_free_defer_add(
{
struct xfs_mount *mp = tp->t_mountp;
- trace_xfs_extent_free_defer(mp, xefi);
+ xefi->xefi_group = xfs_group_intent_get(mp, xefi->xefi_startblock,
+ xfs_efi_is_realtime(xefi) ? XG_TYPE_RTG : XG_TYPE_AG);
- xefi->xefi_pag = xfs_perag_intent_get(mp, xefi->xefi_startblock);
- if (xefi->xefi_agresv == XFS_AG_RESV_AGFL)
- *dfpp = xfs_defer_add(tp, &xefi->xefi_list,
- &xfs_agfl_free_defer_type);
- else
- *dfpp = xfs_defer_add(tp, &xefi->xefi_list,
- &xfs_extent_free_defer_type);
+ trace_xfs_extent_free_defer(mp, xefi);
+ *dfpp = xfs_defer_add(tp, &xefi->xefi_list, xefi_ops(xefi));
}
/* Cancel a free extent. */
@@ -463,7 +502,7 @@ xfs_extent_free_cancel_item(
{
struct xfs_extent_free_item *xefi = xefi_entry(item);
- xfs_perag_intent_put(xefi->xefi_pag);
+ xfs_group_intent_put(xefi->xefi_group);
kmem_cache_free(xfs_extfree_item_cache, xefi);
}
@@ -499,7 +538,7 @@ xfs_extent_free_finish_item(
* in this EFI to the EFD so this works correctly.
*/
if (!(xefi->xefi_flags & XFS_EFI_CANCELLED))
- error = __xfs_free_extent(tp, xefi->xefi_pag, agbno,
+ error = __xfs_free_extent(tp, to_perag(xefi->xefi_group), agbno,
xefi->xefi_blockcount, &oinfo, xefi->xefi_agresv,
xefi->xefi_flags & XFS_EFI_SKIP_DISCARD);
if (error == -EAGAIN) {
@@ -545,10 +584,10 @@ xfs_agfl_free_finish_item(
trace_xfs_agfl_free_deferred(mp, xefi);
- error = xfs_alloc_read_agf(xefi->xefi_pag, tp, 0, &agbp);
+ error = xfs_alloc_read_agf(to_perag(xefi->xefi_group), tp, 0, &agbp);
if (!error)
- error = xfs_free_ag_extent(tp, agbp, xefi->xefi_pag->pag_agno,
- agbno, 1, &oinfo, XFS_AG_RESV_AGFL);
+ error = xfs_free_ag_extent(tp, agbp, agbno, 1, &oinfo,
+ XFS_AG_RESV_AGFL);
xfs_efd_add_extent(efdp, xefi);
xfs_extent_free_cancel_item(&xefi->xefi_list);
@@ -559,8 +598,12 @@ xfs_agfl_free_finish_item(
static inline bool
xfs_efi_validate_ext(
struct xfs_mount *mp,
+ bool isrt,
struct xfs_extent *extp)
{
+ if (isrt)
+ return xfs_verify_rtbext(mp, extp->ext_start, extp->ext_len);
+
return xfs_verify_fsbext(mp, extp->ext_start, extp->ext_len);
}
@@ -568,6 +611,7 @@ static inline void
xfs_efi_recover_work(
struct xfs_mount *mp,
struct xfs_defer_pending *dfp,
+ bool isrt,
struct xfs_extent *extp)
{
struct xfs_extent_free_item *xefi;
@@ -578,7 +622,10 @@ xfs_efi_recover_work(
xefi->xefi_blockcount = extp->ext_len;
xefi->xefi_agresv = XFS_AG_RESV_NONE;
xefi->xefi_owner = XFS_RMAP_OWN_UNKNOWN;
- xefi->xefi_pag = xfs_perag_intent_get(mp, extp->ext_start);
+ xefi->xefi_group = xfs_group_intent_get(mp, extp->ext_start,
+ isrt ? XG_TYPE_RTG : XG_TYPE_AG);
+ if (isrt)
+ xefi->xefi_flags |= XFS_EFI_REALTIME;
xfs_defer_add_item(dfp, &xefi->xefi_list);
}
@@ -599,14 +646,15 @@ xfs_extent_free_recover_work(
struct xfs_trans *tp;
int i;
int error = 0;
+ bool isrt = xfs_efi_item_isrt(lip);
/*
- * First check the validity of the extents described by the
- * EFI. If any are bad, then assume that all are bad and
- * just toss the EFI.
+ * First check the validity of the extents described by the EFI. If
+ * any are bad, then assume that all are bad and just toss the EFI.
+ * Mixing RT and non-RT extents in the same EFI item is not allowed.
*/
for (i = 0; i < efip->efi_format.efi_nextents; i++) {
- if (!xfs_efi_validate_ext(mp,
+ if (!xfs_efi_validate_ext(mp, isrt,
&efip->efi_format.efi_extents[i])) {
XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
&efip->efi_format,
@@ -614,7 +662,8 @@ xfs_extent_free_recover_work(
return -EFSCORRUPTED;
}
- xfs_efi_recover_work(mp, dfp, &efip->efi_format.efi_extents[i]);
+ xfs_efi_recover_work(mp, dfp, isrt,
+ &efip->efi_format.efi_extents[i]);
}
resv = xlog_recover_resv(&M_RES(mp)->tr_itruncate);
@@ -652,10 +701,12 @@ xfs_extent_free_relog_intent(
count = EFI_ITEM(intent)->efi_format.efi_nextents;
extp = EFI_ITEM(intent)->efi_format.efi_extents;
+ ASSERT(intent->li_type == XFS_LI_EFI || intent->li_type == XFS_LI_EFI_RT);
+
efdp->efd_next_extent = count;
memcpy(efdp->efd_format.efd_extents, extp, count * sizeof(*extp));
- efip = xfs_efi_init(tp->t_mountp, count);
+ efip = xfs_efi_init(tp->t_mountp, intent->li_type, count);
memcpy(efip->efi_format.efi_extents, extp, count * sizeof(*extp));
atomic_set(&efip->efi_next_extent, count);
@@ -687,6 +738,72 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
.relog_intent = xfs_extent_free_relog_intent,
};
+#ifdef CONFIG_XFS_RT
+/* Create a realtime extent freeing */
+static struct xfs_log_item *
+xfs_rtextent_free_create_intent(
+ struct xfs_trans *tp,
+ struct list_head *items,
+ unsigned int count,
+ bool sort)
+{
+ return __xfs_extent_free_create_intent(tp, items, count, sort,
+ XFS_LI_EFI_RT);
+}
+
+/* Process a free realtime extent. */
+STATIC int
+xfs_rtextent_free_finish_item(
+ struct xfs_trans *tp,
+ struct xfs_log_item *done,
+ struct list_head *item,
+ struct xfs_btree_cur **state)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_extent_free_item *xefi = xefi_entry(item);
+ struct xfs_efd_log_item *efdp = EFD_ITEM(done);
+ struct xfs_rtgroup **rtgp = (struct xfs_rtgroup **)state;
+ int error = 0;
+
+ trace_xfs_extent_free_deferred(mp, xefi);
+
+ if (!(xefi->xefi_flags & XFS_EFI_CANCELLED)) {
+ if (*rtgp != to_rtg(xefi->xefi_group)) {
+ *rtgp = to_rtg(xefi->xefi_group);
+ xfs_rtgroup_lock(*rtgp, XFS_RTGLOCK_BITMAP);
+ xfs_rtgroup_trans_join(tp, *rtgp,
+ XFS_RTGLOCK_BITMAP);
+ }
+ error = xfs_rtfree_blocks(tp, *rtgp,
+ xefi->xefi_startblock, xefi->xefi_blockcount);
+ }
+ if (error == -EAGAIN) {
+ xfs_efd_from_efi(efdp);
+ return error;
+ }
+
+ xfs_efd_add_extent(efdp, xefi);
+ xfs_extent_free_cancel_item(item);
+ return error;
+}
+
+const struct xfs_defer_op_type xfs_rtextent_free_defer_type = {
+ .name = "rtextent_free",
+ .max_items = XFS_EFI_MAX_FAST_EXTENTS,
+ .create_intent = xfs_rtextent_free_create_intent,
+ .abort_intent = xfs_extent_free_abort_intent,
+ .create_done = xfs_extent_free_create_done,
+ .finish_item = xfs_rtextent_free_finish_item,
+ .cancel_item = xfs_extent_free_cancel_item,
+ .recover_work = xfs_extent_free_recover_work,
+ .relog_intent = xfs_extent_free_relog_intent,
+};
+#else
+const struct xfs_defer_op_type xfs_rtextent_free_defer_type = {
+ .name = "rtextent_free",
+};
+#endif /* CONFIG_XFS_RT */
+
STATIC bool
xfs_efi_item_match(
struct xfs_log_item *lip,
@@ -731,7 +848,7 @@ xlog_recover_efi_commit_pass2(
return -EFSCORRUPTED;
}
- efip = xfs_efi_init(mp, efi_formatp->efi_nextents);
+ efip = xfs_efi_init(mp, ITEM_TYPE(item), efi_formatp->efi_nextents);
error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
if (error) {
xfs_efi_item_free(efip);
@@ -749,6 +866,58 @@ const struct xlog_recover_item_ops xlog_efi_item_ops = {
.commit_pass2 = xlog_recover_efi_commit_pass2,
};
+#ifdef CONFIG_XFS_RT
+STATIC int
+xlog_recover_rtefi_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_mount *mp = log->l_mp;
+ struct xfs_efi_log_item *efip;
+ struct xfs_efi_log_format *efi_formatp;
+ int error;
+
+ efi_formatp = item->ri_buf[0].i_addr;
+
+ if (item->ri_buf[0].i_len < xfs_efi_log_format_sizeof(0)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+ }
+
+ efip = xfs_efi_init(mp, ITEM_TYPE(item), efi_formatp->efi_nextents);
+ error = xfs_efi_copy_format(&item->ri_buf[0], &efip->efi_format);
+ if (error) {
+ xfs_efi_item_free(efip);
+ return error;
+ }
+ atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
+
+ xlog_recover_intent_item(log, &efip->efi_item, lsn,
+ &xfs_rtextent_free_defer_type);
+ return 0;
+}
+#else
+STATIC int
+xlog_recover_rtefi_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ item->ri_buf[0].i_addr, item->ri_buf[0].i_len);
+ return -EFSCORRUPTED;
+}
+#endif
+
+const struct xlog_recover_item_ops xlog_rtefi_item_ops = {
+ .item_type = XFS_LI_EFI_RT,
+ .commit_pass2 = xlog_recover_rtefi_commit_pass2,
+};
+
/*
* This routine is called when an EFD format structure is found in a committed
* transaction in the log. Its purpose is to cancel the corresponding EFI if it
@@ -791,3 +960,44 @@ const struct xlog_recover_item_ops xlog_efd_item_ops = {
.item_type = XFS_LI_EFD,
.commit_pass2 = xlog_recover_efd_commit_pass2,
};
+
+#ifdef CONFIG_XFS_RT
+STATIC int
+xlog_recover_rtefd_commit_pass2(
+ struct xlog *log,
+ struct list_head *buffer_list,
+ struct xlog_recover_item *item,
+ xfs_lsn_t lsn)
+{
+ struct xfs_efd_log_format *efd_formatp;
+ int buflen = item->ri_buf[0].i_len;
+
+ efd_formatp = item->ri_buf[0].i_addr;
+
+ if (buflen < sizeof(struct xfs_efd_log_format)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
+
+ if (item->ri_buf[0].i_len != xfs_efd_log_format32_sizeof(
+ efd_formatp->efd_nextents) &&
+ item->ri_buf[0].i_len != xfs_efd_log_format64_sizeof(
+ efd_formatp->efd_nextents)) {
+ XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, log->l_mp,
+ efd_formatp, buflen);
+ return -EFSCORRUPTED;
+ }
+
+ xlog_recover_release_intent(log, XFS_LI_EFI_RT,
+ efd_formatp->efd_efi_id);
+ return 0;
+}
+#else
+# define xlog_recover_rtefd_commit_pass2 xlog_recover_rtefi_commit_pass2
+#endif
+
+const struct xlog_recover_item_ops xlog_rtefd_item_ops = {
+ .item_type = XFS_LI_EFD_RT,
+ .commit_pass2 = xlog_recover_rtefd_commit_pass2,
+};
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index b19916b11fd5..4a0b7de4f7ae 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -852,6 +852,20 @@ xfs_file_write_iter(
if (IS_DAX(inode))
return xfs_file_dax_write(iocb, from);
+ if (iocb->ki_flags & IOCB_ATOMIC) {
+ /*
+ * Currently only atomic writing of a single FS block is
+ * supported. It would be possible to atomic write smaller than
+ * a FS block, but there is no requirement to support this.
+ * Note that iomap also does not support this yet.
+ */
+ if (ocount != ip->i_mount->m_sb.sb_blocksize)
+ return -EINVAL;
+ ret = generic_atomic_write_valid(iocb, from);
+ if (ret)
+ return ret;
+ }
+
if (iocb->ki_flags & IOCB_DIRECT) {
/*
* Allow a directio write to fall back to a buffered
@@ -1239,6 +1253,8 @@ xfs_file_open(
if (xfs_is_shutdown(XFS_M(inode->i_sb)))
return -EIO;
file->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
+ if (xfs_inode_can_atomicwrite(XFS_I(inode)))
+ file->f_mode |= FMODE_CAN_ATOMIC_WRITE;
return generic_file_open(inode, file);
}
@@ -1425,6 +1441,8 @@ xfs_dax_read_fault(
struct xfs_inode *ip = XFS_I(file_inode(vmf->vma->vm_file));
vm_fault_t ret;
+ trace_xfs_read_fault(ip, order);
+
xfs_ilock(ip, XFS_MMAPLOCK_SHARED);
ret = xfs_dax_fault_locked(vmf, order, false);
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
@@ -1432,6 +1450,16 @@ xfs_dax_read_fault(
return ret;
}
+/*
+ * Locking for serialisation of IO during page faults. This results in a lock
+ * ordering of:
+ *
+ * mmap_lock (MM)
+ * sb_start_pagefault(vfs, freeze)
+ * invalidate_lock (vfs/XFS_MMAPLOCK - truncate serialisation)
+ * page_lock (MM)
+ * i_lock (XFS - extent map serialisation)
+ */
static vm_fault_t
xfs_write_fault(
struct vm_fault *vmf,
@@ -1442,6 +1470,8 @@ xfs_write_fault(
unsigned int lock_mode = XFS_MMAPLOCK_SHARED;
vm_fault_t ret;
+ trace_xfs_write_fault(ip, order);
+
sb_start_pagefault(inode->i_sb);
file_update_time(vmf->vma->vm_file);
@@ -1460,40 +1490,13 @@ xfs_write_fault(
if (IS_DAX(inode))
ret = xfs_dax_fault_locked(vmf, order, true);
else
- ret = iomap_page_mkwrite(vmf, &xfs_page_mkwrite_iomap_ops);
+ ret = iomap_page_mkwrite(vmf, &xfs_buffered_write_iomap_ops);
xfs_iunlock(ip, lock_mode);
sb_end_pagefault(inode->i_sb);
return ret;
}
-/*
- * Locking for serialisation of IO during page faults. This results in a lock
- * ordering of:
- *
- * mmap_lock (MM)
- * sb_start_pagefault(vfs, freeze)
- * invalidate_lock (vfs/XFS_MMAPLOCK - truncate serialisation)
- * page_lock (MM)
- * i_lock (XFS - extent map serialisation)
- */
-static vm_fault_t
-__xfs_filemap_fault(
- struct vm_fault *vmf,
- unsigned int order,
- bool write_fault)
-{
- struct inode *inode = file_inode(vmf->vma->vm_file);
-
- trace_xfs_filemap_fault(XFS_I(inode), order, write_fault);
-
- if (write_fault)
- return xfs_write_fault(vmf, order);
- if (IS_DAX(inode))
- return xfs_dax_read_fault(vmf, order);
- return filemap_fault(vmf);
-}
-
static inline bool
xfs_is_write_fault(
struct vm_fault *vmf)
@@ -1506,10 +1509,17 @@ static vm_fault_t
xfs_filemap_fault(
struct vm_fault *vmf)
{
+ struct inode *inode = file_inode(vmf->vma->vm_file);
+
/* DAX can shortcut the normal fault path on write faults! */
- return __xfs_filemap_fault(vmf, 0,
- IS_DAX(file_inode(vmf->vma->vm_file)) &&
- xfs_is_write_fault(vmf));
+ if (IS_DAX(inode)) {
+ if (xfs_is_write_fault(vmf))
+ return xfs_write_fault(vmf, 0);
+ return xfs_dax_read_fault(vmf, 0);
+ }
+
+ trace_xfs_read_fault(XFS_I(inode), 0);
+ return filemap_fault(vmf);
}
static vm_fault_t
@@ -1521,15 +1531,16 @@ xfs_filemap_huge_fault(
return VM_FAULT_FALLBACK;
/* DAX can shortcut the normal fault path on write faults! */
- return __xfs_filemap_fault(vmf, order,
- xfs_is_write_fault(vmf));
+ if (xfs_is_write_fault(vmf))
+ return xfs_write_fault(vmf, order);
+ return xfs_dax_read_fault(vmf, order);
}
static vm_fault_t
xfs_filemap_page_mkwrite(
struct vm_fault *vmf)
{
- return __xfs_filemap_fault(vmf, 0, true);
+ return xfs_write_fault(vmf, 0);
}
/*
@@ -1541,8 +1552,7 @@ static vm_fault_t
xfs_filemap_pfn_mkwrite(
struct vm_fault *vmf)
{
-
- return __xfs_filemap_fault(vmf, 0, true);
+ return xfs_write_fault(vmf, 0);
}
static const struct vm_operations_struct xfs_file_vm_ops = {
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 290ba8887d29..a961aa420c48 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -96,7 +96,7 @@ restart:
maxfree = pag->pagf_freeblks;
if (max_pag)
xfs_perag_rele(max_pag);
- atomic_inc(&pag->pag_active_ref);
+ atomic_inc(&pag_group(pag)->xg_active_ref);
max_pag = pag;
}
@@ -222,12 +222,12 @@ xfs_filestream_lookup_association(
* down immediately after we mark the lookup as done.
*/
pag = container_of(mru, struct xfs_fstrm_item, mru)->pag;
- atomic_inc(&pag->pag_active_ref);
+ atomic_inc(&pag_group(pag)->xg_active_ref);
xfs_mru_cache_done(mp->m_filestream);
trace_xfs_filestream_lookup(pag, ap->ip->i_ino);
- ap->blkno = XFS_AGB_TO_FSB(args->mp, pag->pag_agno, 0);
+ ap->blkno = xfs_agbno_to_fsb(pag, 0);
xfs_bmap_adjacent(ap);
/*
@@ -275,7 +275,7 @@ xfs_filestream_create_association(
struct xfs_fstrm_item *item =
container_of(mru, struct xfs_fstrm_item, mru);
- agno = (item->pag->pag_agno + 1) % mp->m_sb.sb_agcount;
+ agno = (pag_agno(item->pag) + 1) % mp->m_sb.sb_agcount;
xfs_fstrm_free_func(mp, mru);
} else if (xfs_is_inode32(mp)) {
xfs_agnumber_t rotorstep = xfs_rotorstep;
@@ -314,7 +314,7 @@ xfs_filestream_create_association(
if (!item)
goto out_put_fstrms;
- atomic_inc(&args->pag->pag_active_ref);
+ atomic_inc(&pag_group(args->pag)->xg_active_ref);
item->pag = args->pag;
error = xfs_mru_cache_insert(mp->m_filestream, pino, &item->mru);
if (error)
@@ -344,7 +344,6 @@ xfs_filestream_select_ag(
struct xfs_alloc_arg *args,
xfs_extlen_t *longest)
{
- struct xfs_mount *mp = args->mp;
struct xfs_inode *pip;
xfs_ino_t ino = 0;
int error = 0;
@@ -370,7 +369,7 @@ xfs_filestream_select_ag(
return error;
out_select:
- ap->blkno = XFS_AGB_TO_FSB(mp, args->pag->pag_agno, 0);
+ ap->blkno = xfs_agbno_to_fsb(args->pag, 0);
return 0;
}
diff --git a/fs/xfs/xfs_fsmap.c b/fs/xfs/xfs_fsmap.c
index ae18ab86e608..82f2e0dd2249 100644
--- a/fs/xfs/xfs_fsmap.c
+++ b/fs/xfs/xfs_fsmap.c
@@ -25,6 +25,7 @@
#include "xfs_alloc_btree.h"
#include "xfs_rtbitmap.h"
#include "xfs_ag.h"
+#include "xfs_rtgroup.h"
/* Convert an xfs_fsmap to an fsmap. */
static void
@@ -110,18 +111,18 @@ xfs_fsmap_owner_to_rmap(
/* Convert an rmapbt owner into an fsmap owner. */
static int
-xfs_fsmap_owner_from_rmap(
+xfs_fsmap_owner_from_frec(
struct xfs_fsmap *dest,
- const struct xfs_rmap_irec *src)
+ const struct xfs_fsmap_irec *frec)
{
dest->fmr_flags = 0;
- if (!XFS_RMAP_NON_INODE_OWNER(src->rm_owner)) {
- dest->fmr_owner = src->rm_owner;
+ if (!XFS_RMAP_NON_INODE_OWNER(frec->owner)) {
+ dest->fmr_owner = frec->owner;
return 0;
}
dest->fmr_flags |= FMR_OF_SPECIAL_OWNER;
- switch (src->rm_owner) {
+ switch (frec->owner) {
case XFS_RMAP_OWN_FS:
dest->fmr_owner = XFS_FMR_OWN_FS;
break;
@@ -158,7 +159,7 @@ struct xfs_getfsmap_info {
struct xfs_fsmap_head *head;
struct fsmap *fsmap_recs; /* mapping records */
struct xfs_buf *agf_bp; /* AGF, for refcount queries */
- struct xfs_perag *pag; /* AG info, if applicable */
+ struct xfs_group *group; /* group info, if applicable */
xfs_daddr_t next_daddr; /* next daddr we expect */
/* daddr of low fsmap key when we're using the rtbitmap */
xfs_daddr_t low_daddr;
@@ -203,7 +204,7 @@ STATIC int
xfs_getfsmap_is_shared(
struct xfs_trans *tp,
struct xfs_getfsmap_info *info,
- const struct xfs_rmap_irec *rec,
+ const struct xfs_fsmap_irec *frec,
bool *stat)
{
struct xfs_mount *mp = tp->t_mountp;
@@ -216,15 +217,17 @@ xfs_getfsmap_is_shared(
if (!xfs_has_reflink(mp))
return 0;
/* rt files will have no perag structure */
- if (!info->pag)
+ if (!info->group)
return 0;
/* Are there any shared blocks here? */
flen = 0;
- cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp, info->pag);
+ cur = xfs_refcountbt_init_cursor(mp, tp, info->agf_bp,
+ to_perag(info->group));
- error = xfs_refcount_find_shared(cur, rec->rm_startblock,
- rec->rm_blockcount, &fbno, &flen, false);
+ error = xfs_refcount_find_shared(cur, frec->rec_key,
+ XFS_BB_TO_FSBT(mp, frec->len_daddr), &fbno, &flen,
+ false);
xfs_btree_del_cursor(cur, error);
if (error)
@@ -249,15 +252,22 @@ xfs_getfsmap_format(
}
static inline bool
-xfs_getfsmap_rec_before_start(
+xfs_getfsmap_frec_before_start(
struct xfs_getfsmap_info *info,
- const struct xfs_rmap_irec *rec,
- xfs_daddr_t rec_daddr)
+ const struct xfs_fsmap_irec *frec)
{
if (info->low_daddr != XFS_BUF_DADDR_NULL)
- return rec_daddr < info->low_daddr;
- if (info->low.rm_blockcount)
- return xfs_rmap_compare(rec, &info->low) < 0;
+ return frec->start_daddr < info->low_daddr;
+ if (info->low.rm_blockcount) {
+ struct xfs_rmap_irec rec = {
+ .rm_startblock = frec->rec_key,
+ .rm_owner = frec->owner,
+ .rm_flags = frec->rm_flags,
+ };
+
+ return xfs_rmap_compare(&rec, &info->low) < 0;
+ }
+
return false;
}
@@ -270,61 +280,36 @@ STATIC int
xfs_getfsmap_helper(
struct xfs_trans *tp,
struct xfs_getfsmap_info *info,
- const struct xfs_rmap_irec *rec,
- xfs_daddr_t rec_daddr,
- xfs_daddr_t len_daddr)
+ const struct xfs_fsmap_irec *frec)
{
struct xfs_fsmap fmr;
struct xfs_mount *mp = tp->t_mountp;
bool shared;
- int error;
+ int error = 0;
if (fatal_signal_pending(current))
return -EINTR;
- if (len_daddr == 0)
- len_daddr = XFS_FSB_TO_BB(mp, rec->rm_blockcount);
-
/*
* Filter out records that start before our startpoint, if the
* caller requested that.
*/
- if (xfs_getfsmap_rec_before_start(info, rec, rec_daddr)) {
- rec_daddr += len_daddr;
- if (info->next_daddr < rec_daddr)
- info->next_daddr = rec_daddr;
- return 0;
- }
-
- /*
- * For an info->last query, we're looking for a gap between the last
- * mapping emitted and the high key specified by userspace. If the
- * user's query spans less than 1 fsblock, then info->high and
- * info->low will have the same rm_startblock, which causes rec_daddr
- * and next_daddr to be the same. Therefore, use the end_daddr that
- * we calculated from userspace's high key to synthesize the record.
- * Note that if the btree query found a mapping, there won't be a gap.
- */
- if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL)
- rec_daddr = info->end_daddr;
+ if (xfs_getfsmap_frec_before_start(info, frec))
+ goto out;
/* Are we just counting mappings? */
if (info->head->fmh_count == 0) {
if (info->head->fmh_entries == UINT_MAX)
return -ECANCELED;
- if (rec_daddr > info->next_daddr)
+ if (frec->start_daddr > info->next_daddr)
info->head->fmh_entries++;
if (info->last)
return 0;
info->head->fmh_entries++;
-
- rec_daddr += len_daddr;
- if (info->next_daddr < rec_daddr)
- info->next_daddr = rec_daddr;
- return 0;
+ goto out;
}
/*
@@ -332,7 +317,7 @@ xfs_getfsmap_helper(
* then we've found a gap. Report the gap as being owned by
* whatever the caller specified is the missing owner.
*/
- if (rec_daddr > info->next_daddr) {
+ if (frec->start_daddr > info->next_daddr) {
if (info->head->fmh_entries >= info->head->fmh_count)
return -ECANCELED;
@@ -340,7 +325,7 @@ xfs_getfsmap_helper(
fmr.fmr_physical = info->next_daddr;
fmr.fmr_owner = info->missing_owner;
fmr.fmr_offset = 0;
- fmr.fmr_length = rec_daddr - info->next_daddr;
+ fmr.fmr_length = frec->start_daddr - info->next_daddr;
fmr.fmr_flags = FMR_OF_SPECIAL_OWNER;
xfs_getfsmap_format(mp, &fmr, info);
}
@@ -353,23 +338,24 @@ xfs_getfsmap_helper(
return -ECANCELED;
trace_xfs_fsmap_mapping(mp, info->dev,
- info->pag ? info->pag->pag_agno : NULLAGNUMBER, rec);
+ info->group ? info->group->xg_gno : NULLAGNUMBER,
+ frec);
fmr.fmr_device = info->dev;
- fmr.fmr_physical = rec_daddr;
- error = xfs_fsmap_owner_from_rmap(&fmr, rec);
+ fmr.fmr_physical = frec->start_daddr;
+ error = xfs_fsmap_owner_from_frec(&fmr, frec);
if (error)
return error;
- fmr.fmr_offset = XFS_FSB_TO_BB(mp, rec->rm_offset);
- fmr.fmr_length = len_daddr;
- if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
+ fmr.fmr_offset = XFS_FSB_TO_BB(mp, frec->offset);
+ fmr.fmr_length = frec->len_daddr;
+ if (frec->rm_flags & XFS_RMAP_UNWRITTEN)
fmr.fmr_flags |= FMR_OF_PREALLOC;
- if (rec->rm_flags & XFS_RMAP_ATTR_FORK)
+ if (frec->rm_flags & XFS_RMAP_ATTR_FORK)
fmr.fmr_flags |= FMR_OF_ATTR_FORK;
- if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK)
+ if (frec->rm_flags & XFS_RMAP_BMBT_BLOCK)
fmr.fmr_flags |= FMR_OF_EXTENT_MAP;
if (fmr.fmr_flags == 0) {
- error = xfs_getfsmap_is_shared(tp, info, rec, &shared);
+ error = xfs_getfsmap_is_shared(tp, info, frec, &shared);
if (error)
return error;
if (shared)
@@ -378,28 +364,55 @@ xfs_getfsmap_helper(
xfs_getfsmap_format(mp, &fmr, info);
out:
- rec_daddr += len_daddr;
- if (info->next_daddr < rec_daddr)
- info->next_daddr = rec_daddr;
+ info->next_daddr = max(info->next_daddr,
+ frec->start_daddr + frec->len_daddr);
return 0;
}
+static inline int
+xfs_getfsmap_group_helper(
+ struct xfs_getfsmap_info *info,
+ struct xfs_trans *tp,
+ struct xfs_group *xg,
+ xfs_agblock_t startblock,
+ xfs_extlen_t blockcount,
+ struct xfs_fsmap_irec *frec)
+{
+ /*
+ * For an info->last query, we're looking for a gap between the last
+ * mapping emitted and the high key specified by userspace. If the
+ * user's query spans less than 1 fsblock, then info->high and
+ * info->low will have the same rm_startblock, which causes rec_daddr
+ * and next_daddr to be the same. Therefore, use the end_daddr that
+ * we calculated from userspace's high key to synthesize the record.
+ * Note that if the btree query found a mapping, there won't be a gap.
+ */
+ if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL)
+ frec->start_daddr = info->end_daddr;
+ else
+ frec->start_daddr = xfs_gbno_to_daddr(xg, startblock);
+
+ frec->len_daddr = XFS_FSB_TO_BB(xg->xg_mount, blockcount);
+ return xfs_getfsmap_helper(tp, info, frec);
+}
+
/* Transform a rmapbt irec into a fsmap */
STATIC int
-xfs_getfsmap_datadev_helper(
+xfs_getfsmap_rmapbt_helper(
struct xfs_btree_cur *cur,
const struct xfs_rmap_irec *rec,
void *priv)
{
- struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_fsmap_irec frec = {
+ .owner = rec->rm_owner,
+ .offset = rec->rm_offset,
+ .rm_flags = rec->rm_flags,
+ .rec_key = rec->rm_startblock,
+ };
struct xfs_getfsmap_info *info = priv;
- xfs_fsblock_t fsb;
- xfs_daddr_t rec_daddr;
- fsb = XFS_AGB_TO_FSB(mp, cur->bc_ag.pag->pag_agno, rec->rm_startblock);
- rec_daddr = XFS_FSB_TO_DADDR(mp, fsb);
-
- return xfs_getfsmap_helper(cur->bc_tp, info, rec, rec_daddr, 0);
+ return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group,
+ rec->rm_startblock, rec->rm_blockcount, &frec);
}
/* Transform a bnobt irec into a fsmap */
@@ -409,21 +422,14 @@ xfs_getfsmap_datadev_bnobt_helper(
const struct xfs_alloc_rec_incore *rec,
void *priv)
{
- struct xfs_mount *mp = cur->bc_mp;
+ struct xfs_fsmap_irec frec = {
+ .owner = XFS_RMAP_OWN_NULL, /* "free" */
+ .rec_key = rec->ar_startblock,
+ };
struct xfs_getfsmap_info *info = priv;
- struct xfs_rmap_irec irec;
- xfs_daddr_t rec_daddr;
-
- rec_daddr = XFS_AGB_TO_DADDR(mp, cur->bc_ag.pag->pag_agno,
- rec->ar_startblock);
-
- irec.rm_startblock = rec->ar_startblock;
- irec.rm_blockcount = rec->ar_blockcount;
- irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
- irec.rm_offset = 0;
- irec.rm_flags = 0;
- return xfs_getfsmap_helper(cur->bc_tp, info, &irec, rec_daddr, 0);
+ return xfs_getfsmap_group_helper(info, cur->bc_tp, cur->bc_group,
+ rec->ar_startblock, rec->ar_blockcount, &frec);
}
/* Set rmap flags based on the getfsmap flags */
@@ -467,12 +473,11 @@ __xfs_getfsmap_datadev(
void *priv)
{
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_perag *pag;
+ struct xfs_perag *pag = NULL;
struct xfs_btree_cur *bt_cur = NULL;
xfs_fsblock_t start_fsb;
xfs_fsblock_t end_fsb;
- xfs_agnumber_t start_ag;
- xfs_agnumber_t end_ag;
+ xfs_agnumber_t start_ag, end_ag;
uint64_t eofs;
int error = 0;
@@ -520,13 +525,13 @@ __xfs_getfsmap_datadev(
start_ag = XFS_FSB_TO_AGNO(mp, start_fsb);
end_ag = XFS_FSB_TO_AGNO(mp, end_fsb);
- for_each_perag_range(mp, start_ag, end_ag, pag) {
+ while ((pag = xfs_perag_next_range(mp, pag, start_ag, end_ag))) {
/*
* Set the AG high key from the fsmap high key if this
* is the last AG that we're querying.
*/
- info->pag = pag;
- if (pag->pag_agno == end_ag) {
+ info->group = pag_group(pag);
+ if (pag_agno(pag) == end_ag) {
info->high.rm_startblock = XFS_FSB_TO_AGBNO(mp,
end_fsb);
info->high.rm_offset = XFS_BB_TO_FSBT(mp,
@@ -548,9 +553,9 @@ __xfs_getfsmap_datadev(
if (error)
break;
- trace_xfs_fsmap_low_key(mp, info->dev, pag->pag_agno,
+ trace_xfs_fsmap_low_group_key(mp, info->dev, pag_agno(pag),
&info->low);
- trace_xfs_fsmap_high_key(mp, info->dev, pag->pag_agno,
+ trace_xfs_fsmap_high_group_key(mp, info->dev, pag_agno(pag),
&info->high);
error = query_fn(tp, info, &bt_cur, priv);
@@ -561,7 +566,7 @@ __xfs_getfsmap_datadev(
* Set the AG low key to the start of the AG prior to
* moving on to the next AG.
*/
- if (pag->pag_agno == start_ag)
+ if (pag_agno(pag) == start_ag)
memset(&info->low, 0, sizeof(info->low));
/*
@@ -569,13 +574,13 @@ __xfs_getfsmap_datadev(
* before we drop the reference to the perag when the loop
* terminates.
*/
- if (pag->pag_agno == end_ag) {
+ if (pag_agno(pag) == end_ag) {
info->last = true;
error = query_fn(tp, info, &bt_cur, priv);
if (error)
break;
}
- info->pag = NULL;
+ info->group = NULL;
}
if (bt_cur)
@@ -585,9 +590,9 @@ __xfs_getfsmap_datadev(
xfs_trans_brelse(tp, info->agf_bp);
info->agf_bp = NULL;
}
- if (info->pag) {
- xfs_perag_rele(info->pag);
- info->pag = NULL;
+ if (info->group) {
+ xfs_perag_rele(pag);
+ info->group = NULL;
} else if (pag) {
/* loop termination case */
xfs_perag_rele(pag);
@@ -606,13 +611,13 @@ xfs_getfsmap_datadev_rmapbt_query(
{
/* Report any gap at the end of the last AG. */
if (info->last)
- return xfs_getfsmap_datadev_helper(*curpp, &info->high, info);
+ return xfs_getfsmap_rmapbt_helper(*curpp, &info->high, info);
/* Allocate cursor for this AG and query_range it. */
*curpp = xfs_rmapbt_init_cursor(tp->t_mountp, tp, info->agf_bp,
- info->pag);
+ to_perag(info->group));
return xfs_rmap_query_range(*curpp, &info->low, &info->high,
- xfs_getfsmap_datadev_helper, info);
+ xfs_getfsmap_rmapbt_helper, info);
}
/* Execute a getfsmap query against the regular data device rmapbt. */
@@ -643,7 +648,7 @@ xfs_getfsmap_datadev_bnobt_query(
/* Allocate cursor for this AG and query_range it. */
*curpp = xfs_bnobt_init_cursor(tp->t_mountp, tp, info->agf_bp,
- info->pag);
+ to_perag(info->group));
key->ar_startblock = info->low.rm_startblock;
key[1].ar_startblock = info->high.rm_startblock;
return xfs_alloc_query_range(*curpp, key, &key[1],
@@ -672,9 +677,12 @@ xfs_getfsmap_logdev(
const struct xfs_fsmap *keys,
struct xfs_getfsmap_info *info)
{
+ struct xfs_fsmap_irec frec = {
+ .start_daddr = 0,
+ .rec_key = 0,
+ .owner = XFS_RMAP_OWN_LOG,
+ };
struct xfs_mount *mp = tp->t_mountp;
- struct xfs_rmap_irec rmap;
- xfs_daddr_t rec_daddr, len_daddr;
xfs_fsblock_t start_fsb, end_fsb;
uint64_t eofs;
@@ -689,51 +697,53 @@ xfs_getfsmap_logdev(
if (keys[0].fmr_length > 0)
info->low_daddr = XFS_FSB_TO_BB(mp, start_fsb);
- trace_xfs_fsmap_low_key_linear(mp, info->dev, start_fsb);
- trace_xfs_fsmap_high_key_linear(mp, info->dev, end_fsb);
+ trace_xfs_fsmap_low_linear_key(mp, info->dev, start_fsb);
+ trace_xfs_fsmap_high_linear_key(mp, info->dev, end_fsb);
if (start_fsb > 0)
return 0;
/* Fabricate an rmap entry for the external log device. */
- rmap.rm_startblock = 0;
- rmap.rm_blockcount = mp->m_sb.sb_logblocks;
- rmap.rm_owner = XFS_RMAP_OWN_LOG;
- rmap.rm_offset = 0;
- rmap.rm_flags = 0;
-
- rec_daddr = XFS_FSB_TO_BB(mp, rmap.rm_startblock);
- len_daddr = XFS_FSB_TO_BB(mp, rmap.rm_blockcount);
- return xfs_getfsmap_helper(tp, info, &rmap, rec_daddr, len_daddr);
+ frec.len_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_logblocks);
+ return xfs_getfsmap_helper(tp, info, &frec);
}
#ifdef CONFIG_XFS_RT
/* Transform a rtbitmap "record" into a fsmap */
STATIC int
xfs_getfsmap_rtdev_rtbitmap_helper(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
const struct xfs_rtalloc_rec *rec,
void *priv)
{
+ struct xfs_fsmap_irec frec = {
+ .owner = XFS_RMAP_OWN_NULL, /* "free" */
+ };
+ struct xfs_mount *mp = rtg_mount(rtg);
struct xfs_getfsmap_info *info = priv;
- struct xfs_rmap_irec irec;
- xfs_rtblock_t rtbno;
- xfs_daddr_t rec_daddr, len_daddr;
-
- rtbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
- rec_daddr = XFS_FSB_TO_BB(mp, rtbno);
- irec.rm_startblock = rtbno;
+ xfs_rtblock_t start_rtb =
+ xfs_rtx_to_rtb(rtg, rec->ar_startext);
+ uint64_t rtbcount =
+ xfs_rtbxlen_to_blen(mp, rec->ar_extcount);
- rtbno = xfs_rtx_to_rtb(mp, rec->ar_extcount);
- len_daddr = XFS_FSB_TO_BB(mp, rtbno);
- irec.rm_blockcount = rtbno;
-
- irec.rm_owner = XFS_RMAP_OWN_NULL; /* "free" */
- irec.rm_offset = 0;
- irec.rm_flags = 0;
+ /*
+ * For an info->last query, we're looking for a gap between the last
+ * mapping emitted and the high key specified by userspace. If the
+ * user's query spans less than 1 fsblock, then info->high and
+ * info->low will have the same rm_startblock, which causes rec_daddr
+ * and next_daddr to be the same. Therefore, use the end_daddr that
+ * we calculated from userspace's high key to synthesize the record.
+ * Note that if the btree query found a mapping, there won't be a gap.
+ */
+ if (info->last && info->end_daddr != XFS_BUF_DADDR_NULL) {
+ frec.start_daddr = info->end_daddr;
+ } else {
+ frec.start_daddr = xfs_rtb_to_daddr(mp, start_rtb);
+ }
- return xfs_getfsmap_helper(tp, info, &irec, rec_daddr, len_daddr);
+ frec.len_daddr = XFS_FSB_TO_BB(mp, rtbcount);
+ return xfs_getfsmap_helper(tp, info, &frec);
}
/* Execute a getfsmap query against the realtime device rtbitmap. */
@@ -743,58 +753,83 @@ xfs_getfsmap_rtdev_rtbitmap(
const struct xfs_fsmap *keys,
struct xfs_getfsmap_info *info)
{
-
- struct xfs_rtalloc_rec ahigh = { 0 };
struct xfs_mount *mp = tp->t_mountp;
- xfs_rtblock_t start_rtb;
- xfs_rtblock_t end_rtb;
- xfs_rtxnum_t high;
+ xfs_rtblock_t start_rtbno, end_rtbno;
+ xfs_rtxnum_t start_rtx, end_rtx;
+ xfs_rgnumber_t start_rgno, end_rgno;
+ struct xfs_rtgroup *rtg = NULL;
uint64_t eofs;
int error;
- eofs = XFS_FSB_TO_BB(mp, xfs_rtx_to_rtb(mp, mp->m_sb.sb_rextents));
+ eofs = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks);
if (keys[0].fmr_physical >= eofs)
return 0;
- start_rtb = XFS_BB_TO_FSBT(mp,
- keys[0].fmr_physical + keys[0].fmr_length);
- end_rtb = XFS_BB_TO_FSB(mp, min(eofs - 1, keys[1].fmr_physical));
info->missing_owner = XFS_FMR_OWN_UNKNOWN;
/* Adjust the low key if we are continuing from where we left off. */
+ start_rtbno = xfs_daddr_to_rtb(mp,
+ keys[0].fmr_physical + keys[0].fmr_length);
if (keys[0].fmr_length > 0) {
- info->low_daddr = XFS_FSB_TO_BB(mp, start_rtb);
+ info->low_daddr = xfs_rtb_to_daddr(mp, start_rtbno);
if (info->low_daddr >= eofs)
return 0;
}
+ start_rtx = xfs_rtb_to_rtx(mp, start_rtbno);
+ start_rgno = xfs_rtb_to_rgno(mp, start_rtbno);
- trace_xfs_fsmap_low_key_linear(mp, info->dev, start_rtb);
- trace_xfs_fsmap_high_key_linear(mp, info->dev, end_rtb);
+ end_rtbno = xfs_daddr_to_rtb(mp, min(eofs - 1, keys[1].fmr_physical));
+ end_rgno = xfs_rtb_to_rgno(mp, end_rtbno);
- xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP);
+ trace_xfs_fsmap_low_linear_key(mp, info->dev, start_rtbno);
+ trace_xfs_fsmap_high_linear_key(mp, info->dev, end_rtbno);
- /*
- * Set up query parameters to return free rtextents covering the range
- * we want.
- */
- high = xfs_rtb_to_rtxup(mp, end_rtb);
- error = xfs_rtalloc_query_range(mp, tp, xfs_rtb_to_rtx(mp, start_rtb),
- high, xfs_getfsmap_rtdev_rtbitmap_helper, info);
- if (error)
- goto err;
+ end_rtx = -1ULL;
- /*
- * Report any gaps at the end of the rtbitmap by simulating a null
- * rmap starting at the block after the end of the query range.
- */
- info->last = true;
- ahigh.ar_startext = min(mp->m_sb.sb_rextents, high);
+ while ((rtg = xfs_rtgroup_next_range(mp, rtg, start_rgno, end_rgno))) {
+ if (rtg_rgno(rtg) == end_rgno)
+ end_rtx = xfs_rtb_to_rtx(mp,
+ end_rtbno + mp->m_sb.sb_rextsize - 1);
+
+ info->group = rtg_group(rtg);
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ error = xfs_rtalloc_query_range(rtg, tp, start_rtx, end_rtx,
+ xfs_getfsmap_rtdev_rtbitmap_helper, info);
+ if (error)
+ break;
+
+ /*
+ * Report any gaps at the end of the rtbitmap by simulating a
+ * zero-length free extent starting at the rtx after the end
+ * of the query range.
+ */
+ if (rtg_rgno(rtg) == end_rgno) {
+ struct xfs_rtalloc_rec ahigh = {
+ .ar_startext = min(end_rtx + 1,
+ rtg->rtg_extents),
+ };
+
+ info->last = true;
+ error = xfs_getfsmap_rtdev_rtbitmap_helper(rtg, tp,
+ &ahigh, info);
+ if (error)
+ break;
+ }
+
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ info->group = NULL;
+ start_rtx = 0;
+ }
+
+ /* loop termination case */
+ if (rtg) {
+ if (info->group) {
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ info->group = NULL;
+ }
+ xfs_rtgroup_rele(rtg);
+ }
- error = xfs_getfsmap_rtdev_rtbitmap_helper(mp, tp, &ahigh, info);
- if (error)
- goto err;
-err:
- xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
return error;
}
#endif /* CONFIG_XFS_RT */
@@ -1003,7 +1038,7 @@ xfs_getfsmap(
info.dev = handlers[i].dev;
info.last = false;
- info.pag = NULL;
+ info.group = NULL;
info.low_daddr = XFS_BUF_DADDR_NULL;
info.low.rm_blockcount = 0;
error = handlers[i].fn(tp, dkeys, &info);
diff --git a/fs/xfs/xfs_fsmap.h b/fs/xfs/xfs_fsmap.h
index a0bcc38486a5..06e492fd479d 100644
--- a/fs/xfs/xfs_fsmap.h
+++ b/fs/xfs/xfs_fsmap.h
@@ -28,6 +28,21 @@ struct xfs_fsmap_head {
struct xfs_fsmap fmh_keys[2]; /* low and high keys */
};
+/* internal fsmap record format */
+struct xfs_fsmap_irec {
+ xfs_daddr_t start_daddr;
+ xfs_daddr_t len_daddr;
+ uint64_t owner; /* extent owner */
+ uint64_t offset; /* offset within the owner */
+ unsigned int rm_flags; /* rmap state flags */
+
+ /*
+ * rmapbt startblock corresponding to start_daddr, if the record came
+ * from an rmap btree.
+ */
+ xfs_agblock_t rec_key;
+};
+
int xfs_ioc_getfsmap(struct xfs_inode *ip, struct fsmap_head __user *arg);
#endif /* __XFS_FSMAP_H__ */
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b247d895c276..28dde215c899 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -162,9 +162,7 @@ xfs_growfs_data_private(
error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount,
delta, last_pag, &lastag_extended);
} else {
- xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SHRINK,
- "EXPERIMENTAL online shrink feature in use. Use at your own risk!");
-
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_SHRINK);
error = xfs_ag_shrink_space(last_pag, &tp, -delta);
}
xfs_perag_put(last_pag);
@@ -528,13 +526,12 @@ int
xfs_fs_reserve_ag_blocks(
struct xfs_mount *mp)
{
- xfs_agnumber_t agno;
- struct xfs_perag *pag;
+ struct xfs_perag *pag = NULL;
int error = 0;
int err2;
mp->m_finobt_nores = false;
- for_each_perag(mp, agno, pag) {
+ while ((pag = xfs_perag_next(mp, pag))) {
err2 = xfs_ag_resv_init(pag, NULL);
if (err2 && !error)
error = err2;
@@ -556,9 +553,8 @@ void
xfs_fs_unreserve_ag_blocks(
struct xfs_mount *mp)
{
- xfs_agnumber_t agno;
- struct xfs_perag *pag;
+ struct xfs_perag *pag = NULL;
- for_each_perag(mp, agno, pag)
+ while ((pag = xfs_perag_next(mp, pag)))
xfs_ag_resv_free(pag);
}
diff --git a/fs/xfs/xfs_handle.c b/fs/xfs/xfs_handle.c
index 49e5e5f04e60..f19fce557354 100644
--- a/fs/xfs/xfs_handle.c
+++ b/fs/xfs/xfs_handle.c
@@ -85,22 +85,23 @@ xfs_find_handle(
int hsize;
xfs_handle_t handle;
struct inode *inode;
- struct fd f = EMPTY_FD;
struct path path;
int error;
struct xfs_inode *ip;
if (cmd == XFS_IOC_FD_TO_HANDLE) {
- f = fdget(hreq->fd);
- if (!fd_file(f))
+ CLASS(fd, f)(hreq->fd);
+
+ if (fd_empty(f))
return -EBADF;
- inode = file_inode(fd_file(f));
+ path = fd_file(f)->f_path;
+ path_get(&path);
} else {
error = user_path_at(AT_FDCWD, hreq->path, 0, &path);
if (error)
return error;
- inode = d_inode(path.dentry);
}
+ inode = d_inode(path.dentry);
ip = XFS_I(inode);
/*
@@ -134,10 +135,7 @@ xfs_find_handle(
error = 0;
out_put:
- if (cmd == XFS_IOC_FD_TO_HANDLE)
- fdput(f);
- else
- path_put(&path);
+ path_put(&path);
return error;
}
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index 10f116d093a2..c7c2e6561998 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -18,6 +18,22 @@
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_quota_defs.h"
+#include "xfs_rtgroup.h"
+
+static void
+xfs_health_unmount_group(
+ struct xfs_group *xg,
+ bool *warn)
+{
+ unsigned int sick = 0;
+ unsigned int checked = 0;
+
+ xfs_group_measure_sickness(xg, &sick, &checked);
+ if (sick) {
+ trace_xfs_group_unfixed_corruption(xg, sick);
+ *warn = true;
+ }
+}
/*
* Warn about metadata corruption that we detected but haven't fixed, and
@@ -28,8 +44,8 @@ void
xfs_health_unmount(
struct xfs_mount *mp)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
+ struct xfs_rtgroup *rtg = NULL;
unsigned int sick = 0;
unsigned int checked = 0;
bool warn = false;
@@ -38,20 +54,12 @@ xfs_health_unmount(
return;
/* Measure AG corruption levels. */
- for_each_perag(mp, agno, pag) {
- xfs_ag_measure_sickness(pag, &sick, &checked);
- if (sick) {
- trace_xfs_ag_unfixed_corruption(mp, agno, sick);
- warn = true;
- }
- }
+ while ((pag = xfs_perag_next(mp, pag)))
+ xfs_health_unmount_group(pag_group(pag), &warn);
- /* Measure realtime volume corruption levels. */
- xfs_rt_measure_sickness(mp, &sick, &checked);
- if (sick) {
- trace_xfs_rt_unfixed_corruption(mp, sick);
- warn = true;
- }
+ /* Measure realtime group corruption levels. */
+ while ((rtg = xfs_rtgroup_next(mp, rtg)))
+ xfs_health_unmount_group(rtg_group(rtg), &warn);
/*
* Measure fs corruption and keep the sample around for the warning.
@@ -150,65 +158,6 @@ xfs_fs_measure_sickness(
spin_unlock(&mp->m_sb_lock);
}
-/* Mark unhealthy realtime metadata. */
-void
-xfs_rt_mark_sick(
- struct xfs_mount *mp,
- unsigned int mask)
-{
- ASSERT(!(mask & ~XFS_SICK_RT_ALL));
- trace_xfs_rt_mark_sick(mp, mask);
-
- spin_lock(&mp->m_sb_lock);
- mp->m_rt_sick |= mask;
- spin_unlock(&mp->m_sb_lock);
-}
-
-/* Mark realtime metadata as having been checked and found unhealthy by fsck. */
-void
-xfs_rt_mark_corrupt(
- struct xfs_mount *mp,
- unsigned int mask)
-{
- ASSERT(!(mask & ~XFS_SICK_RT_ALL));
- trace_xfs_rt_mark_corrupt(mp, mask);
-
- spin_lock(&mp->m_sb_lock);
- mp->m_rt_sick |= mask;
- mp->m_rt_checked |= mask;
- spin_unlock(&mp->m_sb_lock);
-}
-
-/* Mark a realtime metadata healed. */
-void
-xfs_rt_mark_healthy(
- struct xfs_mount *mp,
- unsigned int mask)
-{
- ASSERT(!(mask & ~XFS_SICK_RT_ALL));
- trace_xfs_rt_mark_healthy(mp, mask);
-
- spin_lock(&mp->m_sb_lock);
- mp->m_rt_sick &= ~mask;
- if (!(mp->m_rt_sick & XFS_SICK_RT_PRIMARY))
- mp->m_rt_sick &= ~XFS_SICK_RT_SECONDARY;
- mp->m_rt_checked |= mask;
- spin_unlock(&mp->m_sb_lock);
-}
-
-/* Sample which realtime metadata are unhealthy. */
-void
-xfs_rt_measure_sickness(
- struct xfs_mount *mp,
- unsigned int *sick,
- unsigned int *checked)
-{
- spin_lock(&mp->m_sb_lock);
- *sick = mp->m_rt_sick;
- *checked = mp->m_rt_checked;
- spin_unlock(&mp->m_sb_lock);
-}
-
/* Mark unhealthy per-ag metadata given a raw AG number. */
void
xfs_agno_mark_sick(
@@ -226,63 +175,95 @@ xfs_agno_mark_sick(
xfs_perag_put(pag);
}
+static inline void
+xfs_group_check_mask(
+ struct xfs_group *xg,
+ unsigned int mask)
+{
+ if (xg->xg_type == XG_TYPE_AG)
+ ASSERT(!(mask & ~XFS_SICK_AG_ALL));
+ else
+ ASSERT(!(mask & ~XFS_SICK_RG_ALL));
+}
+
/* Mark unhealthy per-ag metadata. */
void
-xfs_ag_mark_sick(
- struct xfs_perag *pag,
+xfs_group_mark_sick(
+ struct xfs_group *xg,
unsigned int mask)
{
- ASSERT(!(mask & ~XFS_SICK_AG_ALL));
- trace_xfs_ag_mark_sick(pag->pag_mount, pag->pag_agno, mask);
+ xfs_group_check_mask(xg, mask);
+ trace_xfs_group_mark_sick(xg, mask);
- spin_lock(&pag->pag_state_lock);
- pag->pag_sick |= mask;
- spin_unlock(&pag->pag_state_lock);
+ spin_lock(&xg->xg_state_lock);
+ xg->xg_sick |= mask;
+ spin_unlock(&xg->xg_state_lock);
}
-/* Mark per-ag metadata as having been checked and found unhealthy by fsck. */
+/*
+ * Mark per-group metadata as having been checked and found unhealthy by fsck.
+ */
void
-xfs_ag_mark_corrupt(
- struct xfs_perag *pag,
+xfs_group_mark_corrupt(
+ struct xfs_group *xg,
unsigned int mask)
{
- ASSERT(!(mask & ~XFS_SICK_AG_ALL));
- trace_xfs_ag_mark_corrupt(pag->pag_mount, pag->pag_agno, mask);
+ xfs_group_check_mask(xg, mask);
+ trace_xfs_group_mark_corrupt(xg, mask);
- spin_lock(&pag->pag_state_lock);
- pag->pag_sick |= mask;
- pag->pag_checked |= mask;
- spin_unlock(&pag->pag_state_lock);
+ spin_lock(&xg->xg_state_lock);
+ xg->xg_sick |= mask;
+ xg->xg_checked |= mask;
+ spin_unlock(&xg->xg_state_lock);
}
-/* Mark per-ag metadata ok. */
+/*
+ * Mark per-group metadata ok.
+ */
void
-xfs_ag_mark_healthy(
- struct xfs_perag *pag,
+xfs_group_mark_healthy(
+ struct xfs_group *xg,
unsigned int mask)
{
- ASSERT(!(mask & ~XFS_SICK_AG_ALL));
- trace_xfs_ag_mark_healthy(pag->pag_mount, pag->pag_agno, mask);
-
- spin_lock(&pag->pag_state_lock);
- pag->pag_sick &= ~mask;
- if (!(pag->pag_sick & XFS_SICK_AG_PRIMARY))
- pag->pag_sick &= ~XFS_SICK_AG_SECONDARY;
- pag->pag_checked |= mask;
- spin_unlock(&pag->pag_state_lock);
+ xfs_group_check_mask(xg, mask);
+ trace_xfs_group_mark_healthy(xg, mask);
+
+ spin_lock(&xg->xg_state_lock);
+ xg->xg_sick &= ~mask;
+ if (!(xg->xg_sick & XFS_SICK_AG_PRIMARY))
+ xg->xg_sick &= ~XFS_SICK_AG_SECONDARY;
+ xg->xg_checked |= mask;
+ spin_unlock(&xg->xg_state_lock);
}
/* Sample which per-ag metadata are unhealthy. */
void
-xfs_ag_measure_sickness(
- struct xfs_perag *pag,
+xfs_group_measure_sickness(
+ struct xfs_group *xg,
unsigned int *sick,
unsigned int *checked)
{
- spin_lock(&pag->pag_state_lock);
- *sick = pag->pag_sick;
- *checked = pag->pag_checked;
- spin_unlock(&pag->pag_state_lock);
+ spin_lock(&xg->xg_state_lock);
+ *sick = xg->xg_sick;
+ *checked = xg->xg_checked;
+ spin_unlock(&xg->xg_state_lock);
+}
+
+/* Mark unhealthy per-rtgroup metadata given a raw rt group number. */
+void
+xfs_rgno_mark_sick(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno,
+ unsigned int mask)
+{
+ struct xfs_rtgroup *rtg = xfs_rtgroup_get(mp, rgno);
+
+ /* per-rtgroup structure not set up yet? */
+ if (!rtg)
+ return;
+
+ xfs_group_mark_sick(rtg_group(rtg), mask);
+ xfs_rtgroup_put(rtg);
}
/* Mark the unhealthy parts of an inode. */
@@ -369,6 +350,9 @@ struct ioctl_sick_map {
unsigned int ioctl_mask;
};
+#define for_each_sick_map(map, m) \
+ for ((m) = (map); (m) < (map) + ARRAY_SIZE(map); (m)++)
+
static const struct ioctl_sick_map fs_map[] = {
{ XFS_SICK_FS_COUNTERS, XFS_FSOP_GEOM_SICK_COUNTERS},
{ XFS_SICK_FS_UQUOTA, XFS_FSOP_GEOM_SICK_UQUOTA },
@@ -376,13 +360,13 @@ static const struct ioctl_sick_map fs_map[] = {
{ XFS_SICK_FS_PQUOTA, XFS_FSOP_GEOM_SICK_PQUOTA },
{ XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK },
{ XFS_SICK_FS_NLINKS, XFS_FSOP_GEOM_SICK_NLINKS },
- { 0, 0 },
+ { XFS_SICK_FS_METADIR, XFS_FSOP_GEOM_SICK_METADIR },
+ { XFS_SICK_FS_METAPATH, XFS_FSOP_GEOM_SICK_METAPATH },
};
static const struct ioctl_sick_map rt_map[] = {
- { XFS_SICK_RT_BITMAP, XFS_FSOP_GEOM_SICK_RT_BITMAP },
- { XFS_SICK_RT_SUMMARY, XFS_FSOP_GEOM_SICK_RT_SUMMARY },
- { 0, 0 },
+ { XFS_SICK_RG_BITMAP, XFS_FSOP_GEOM_SICK_RT_BITMAP },
+ { XFS_SICK_RG_SUMMARY, XFS_FSOP_GEOM_SICK_RT_SUMMARY },
};
static inline void
@@ -404,6 +388,7 @@ xfs_fsop_geom_health(
struct xfs_mount *mp,
struct xfs_fsop_geom *geo)
{
+ struct xfs_rtgroup *rtg = NULL;
const struct ioctl_sick_map *m;
unsigned int sick;
unsigned int checked;
@@ -412,12 +397,14 @@ xfs_fsop_geom_health(
geo->checked = 0;
xfs_fs_measure_sickness(mp, &sick, &checked);
- for (m = fs_map; m->sick_mask; m++)
+ for_each_sick_map(fs_map, m)
xfgeo_health_tick(geo, sick, checked, m);
- xfs_rt_measure_sickness(mp, &sick, &checked);
- for (m = rt_map; m->sick_mask; m++)
- xfgeo_health_tick(geo, sick, checked, m);
+ while ((rtg = xfs_rtgroup_next(mp, rtg))) {
+ xfs_group_measure_sickness(rtg_group(rtg), &sick, &checked);
+ for_each_sick_map(rt_map, m)
+ xfgeo_health_tick(geo, sick, checked, m);
+ }
}
static const struct ioctl_sick_map ag_map[] = {
@@ -432,7 +419,6 @@ static const struct ioctl_sick_map ag_map[] = {
{ XFS_SICK_AG_RMAPBT, XFS_AG_GEOM_SICK_RMAPBT },
{ XFS_SICK_AG_REFCNTBT, XFS_AG_GEOM_SICK_REFCNTBT },
{ XFS_SICK_AG_INODES, XFS_AG_GEOM_SICK_INODES },
- { 0, 0 },
};
/* Fill out ag geometry health info. */
@@ -448,8 +434,8 @@ xfs_ag_geom_health(
ageo->ag_sick = 0;
ageo->ag_checked = 0;
- xfs_ag_measure_sickness(pag, &sick, &checked);
- for (m = ag_map; m->sick_mask; m++) {
+ xfs_group_measure_sickness(pag_group(pag), &sick, &checked);
+ for_each_sick_map(ag_map, m) {
if (checked & m->sick_mask)
ageo->ag_checked |= m->ioctl_mask;
if (sick & m->sick_mask)
@@ -457,6 +443,34 @@ xfs_ag_geom_health(
}
}
+static const struct ioctl_sick_map rtgroup_map[] = {
+ { XFS_SICK_RG_SUPER, XFS_RTGROUP_GEOM_SICK_SUPER },
+ { XFS_SICK_RG_BITMAP, XFS_RTGROUP_GEOM_SICK_BITMAP },
+ { XFS_SICK_RG_SUMMARY, XFS_RTGROUP_GEOM_SICK_SUMMARY },
+};
+
+/* Fill out rtgroup geometry health info. */
+void
+xfs_rtgroup_geom_health(
+ struct xfs_rtgroup *rtg,
+ struct xfs_rtgroup_geometry *rgeo)
+{
+ const struct ioctl_sick_map *m;
+ unsigned int sick;
+ unsigned int checked;
+
+ rgeo->rg_sick = 0;
+ rgeo->rg_checked = 0;
+
+ xfs_group_measure_sickness(rtg_group(rtg), &sick, &checked);
+ for_each_sick_map(rtgroup_map, m) {
+ if (checked & m->sick_mask)
+ rgeo->rg_checked |= m->ioctl_mask;
+ if (sick & m->sick_mask)
+ rgeo->rg_sick |= m->ioctl_mask;
+ }
+}
+
static const struct ioctl_sick_map ino_map[] = {
{ XFS_SICK_INO_CORE, XFS_BS_SICK_INODE },
{ XFS_SICK_INO_BMBTD, XFS_BS_SICK_BMBTD },
@@ -471,7 +485,6 @@ static const struct ioctl_sick_map ino_map[] = {
{ XFS_SICK_INO_DIR_ZAPPED, XFS_BS_SICK_DIR },
{ XFS_SICK_INO_SYMLINK_ZAPPED, XFS_BS_SICK_SYMLINK },
{ XFS_SICK_INO_DIRTREE, XFS_BS_SICK_DIRTREE },
- { 0, 0 },
};
/* Fill out bulkstat health info. */
@@ -488,7 +501,7 @@ xfs_bulkstat_health(
bs->bs_checked = 0;
xfs_inode_measure_sickness(ip, &sick, &checked);
- for (m = ino_map; m->sick_mask; m++) {
+ for_each_sick_map(ino_map, m) {
if (checked & m->sick_mask)
bs->bs_checked |= m->ioctl_mask;
if (sick & m->sick_mask)
@@ -527,24 +540,13 @@ void
xfs_btree_mark_sick(
struct xfs_btree_cur *cur)
{
- switch (cur->bc_ops->type) {
- case XFS_BTREE_TYPE_MEM:
- /* no health state tracking for ephemeral btrees */
- return;
- case XFS_BTREE_TYPE_AG:
+ if (xfs_btree_is_bmap(cur->bc_ops)) {
+ xfs_bmap_mark_sick(cur->bc_ino.ip, cur->bc_ino.whichfork);
+ /* no health state tracking for ephemeral btrees */
+ } else if (cur->bc_ops->type != XFS_BTREE_TYPE_MEM) {
+ ASSERT(cur->bc_group);
ASSERT(cur->bc_ops->sick_mask);
- xfs_ag_mark_sick(cur->bc_ag.pag, cur->bc_ops->sick_mask);
- return;
- case XFS_BTREE_TYPE_INODE:
- if (xfs_btree_is_bmap(cur->bc_ops)) {
- xfs_bmap_mark_sick(cur->bc_ino.ip,
- cur->bc_ino.whichfork);
- return;
- }
- fallthrough;
- default:
- ASSERT(0);
- return;
+ xfs_group_mark_sick(cur->bc_group, cur->bc_ops->sick_mask);
}
}
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 6b119a7a324f..7b6c026d01a1 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -25,6 +25,9 @@
#include "xfs_ag.h"
#include "xfs_log_priv.h"
#include "xfs_health.h"
+#include "xfs_da_format.h"
+#include "xfs_dir2.h"
+#include "xfs_metafile.h"
#include <linux/iversion.h>
@@ -204,7 +207,7 @@ xfs_reclaim_work_queue(
{
rcu_read_lock();
- if (xa_marked(&mp->m_perags, XFS_PERAG_RECLAIM_MARK)) {
+ if (xfs_group_marked(mp, XG_TYPE_AG, XFS_PERAG_RECLAIM_MARK)) {
queue_delayed_work(mp->m_reclaim_workqueue, &mp->m_reclaim_work,
msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10));
}
@@ -219,15 +222,14 @@ static inline void
xfs_blockgc_queue(
struct xfs_perag *pag)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
if (!xfs_is_blockgc_enabled(mp))
return;
rcu_read_lock();
if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG))
- queue_delayed_work(pag->pag_mount->m_blockgc_wq,
- &pag->pag_blockgc_work,
+ queue_delayed_work(mp->m_blockgc_wq, &pag->pag_blockgc_work,
msecs_to_jiffies(xfs_blockgc_secs * 1000));
rcu_read_unlock();
}
@@ -239,7 +241,6 @@ xfs_perag_set_inode_tag(
xfs_agino_t agino,
unsigned int tag)
{
- struct xfs_mount *mp = pag->pag_mount;
bool was_tagged;
lockdep_assert_held(&pag->pag_ici_lock);
@@ -253,13 +254,13 @@ xfs_perag_set_inode_tag(
if (was_tagged)
return;
- /* propagate the tag up into the perag radix tree */
- xa_set_mark(&mp->m_perags, pag->pag_agno, ici_tag_to_mark(tag));
+ /* propagate the tag up into the pag xarray tree */
+ xfs_group_set_mark(pag_group(pag), ici_tag_to_mark(tag));
/* start background work */
switch (tag) {
case XFS_ICI_RECLAIM_TAG:
- xfs_reclaim_work_queue(mp);
+ xfs_reclaim_work_queue(pag_mount(pag));
break;
case XFS_ICI_BLOCKGC_TAG:
xfs_blockgc_queue(pag);
@@ -276,8 +277,6 @@ xfs_perag_clear_inode_tag(
xfs_agino_t agino,
unsigned int tag)
{
- struct xfs_mount *mp = pag->pag_mount;
-
lockdep_assert_held(&pag->pag_ici_lock);
/*
@@ -295,9 +294,8 @@ xfs_perag_clear_inode_tag(
if (radix_tree_tagged(&pag->pag_ici_root, tag))
return;
- /* clear the tag from the perag radix tree */
- xa_clear_mark(&mp->m_perags, pag->pag_agno, ici_tag_to_mark(tag));
-
+ /* clear the tag from the pag xarray */
+ xfs_group_clear_mark(pag_group(pag), ici_tag_to_mark(tag));
trace_xfs_perag_clear_inode_tag(pag, _RET_IP_);
}
@@ -310,22 +308,9 @@ xfs_perag_grab_next_tag(
struct xfs_perag *pag,
int tag)
{
- unsigned long index = 0;
-
- if (pag) {
- index = pag->pag_agno + 1;
- xfs_perag_rele(pag);
- }
-
- rcu_read_lock();
- pag = xa_find(&mp->m_perags, &index, ULONG_MAX, ici_tag_to_mark(tag));
- if (pag) {
- trace_xfs_perag_grab_next_tag(pag, _RET_IP_);
- if (!atomic_inc_not_zero(&pag->pag_active_ref))
- pag = NULL;
- }
- rcu_read_unlock();
- return pag;
+ return to_perag(xfs_group_grab_next_mark(mp,
+ pag ? pag_group(pag) : NULL,
+ ici_tag_to_mark(tag), XG_TYPE_AG));
}
/*
@@ -847,6 +832,77 @@ out_error_or_again:
}
/*
+ * Get a metadata inode.
+ *
+ * The metafile type must match the file mode exactly, and for files in the
+ * metadata directory tree, it must match the inode's metatype exactly.
+ */
+int
+xfs_trans_metafile_iget(
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ enum xfs_metafile_type metafile_type,
+ struct xfs_inode **ipp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *ip;
+ umode_t mode;
+ int error;
+
+ error = xfs_iget(mp, tp, ino, 0, 0, &ip);
+ if (error == -EFSCORRUPTED || error == -EINVAL)
+ goto whine;
+ if (error)
+ return error;
+
+ if (VFS_I(ip)->i_nlink == 0)
+ goto bad_rele;
+
+ if (metafile_type == XFS_METAFILE_DIR)
+ mode = S_IFDIR;
+ else
+ mode = S_IFREG;
+ if (inode_wrong_type(VFS_I(ip), mode))
+ goto bad_rele;
+ if (xfs_has_metadir(mp)) {
+ if (!xfs_is_metadir_inode(ip))
+ goto bad_rele;
+ if (metafile_type != ip->i_metatype)
+ goto bad_rele;
+ }
+
+ *ipp = ip;
+ return 0;
+bad_rele:
+ xfs_irele(ip);
+whine:
+ xfs_err(mp, "metadata inode 0x%llx type %u is corrupt", ino,
+ metafile_type);
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+}
+
+/* Grab a metadata file if the caller doesn't already have a transaction. */
+int
+xfs_metafile_iget(
+ struct xfs_mount *mp,
+ xfs_ino_t ino,
+ enum xfs_metafile_type metafile_type,
+ struct xfs_inode **ipp)
+{
+ struct xfs_trans *tp;
+ int error;
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+
+ error = xfs_trans_metafile_iget(tp, ino, metafile_type, ipp);
+ xfs_trans_cancel(tp);
+ return error;
+}
+
+/*
* Grab the inode for reclaim exclusively.
*
* We have found this inode via a lookup under RCU, so the inode may have
@@ -1014,7 +1070,7 @@ xfs_reclaim_inodes(
if (xfs_want_reclaim_sick(mp))
icw.icw_flags |= XFS_ICWALK_FLAG_RECLAIM_SICK;
- while (xa_marked(&mp->m_perags, XFS_PERAG_RECLAIM_MARK)) {
+ while (xfs_group_marked(mp, XG_TYPE_AG, XFS_PERAG_RECLAIM_MARK)) {
xfs_ail_push_all_sync(mp->m_ail);
xfs_icwalk(mp, XFS_ICWALK_RECLAIM, &icw);
}
@@ -1056,7 +1112,7 @@ long
xfs_reclaim_inodes_count(
struct xfs_mount *mp)
{
- XA_STATE (xas, &mp->m_perags, 0);
+ XA_STATE (xas, &mp->m_groups[XG_TYPE_AG].xa, 0);
long reclaimable = 0;
struct xfs_perag *pag;
@@ -1401,13 +1457,12 @@ void
xfs_blockgc_stop(
struct xfs_mount *mp)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
if (!xfs_clear_blockgc_enabled(mp))
return;
- for_each_perag(mp, agno, pag)
+ while ((pag = xfs_perag_next(mp, pag)))
cancel_delayed_work_sync(&pag->pag_blockgc_work);
trace_xfs_blockgc_stop(mp, __return_address);
}
@@ -1499,7 +1554,7 @@ xfs_blockgc_worker(
{
struct xfs_perag *pag = container_of(to_delayed_work(work),
struct xfs_perag, pag_blockgc_work);
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
int error;
trace_xfs_blockgc_worker(mp, __return_address);
@@ -1507,7 +1562,7 @@ xfs_blockgc_worker(
error = xfs_icwalk_ag(pag, XFS_ICWALK_BLOCKGC, NULL);
if (error)
xfs_info(mp, "AG %u preallocation gc worker failed, err=%d",
- pag->pag_agno, error);
+ pag_agno(pag), error);
xfs_blockgc_queue(pag);
}
@@ -1548,8 +1603,7 @@ xfs_blockgc_flush_all(
* queued, it will not be requeued. Then flush whatever is left.
*/
while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG)))
- mod_delayed_work(pag->pag_mount->m_blockgc_wq,
- &pag->pag_blockgc_work, 0);
+ mod_delayed_work(mp->m_blockgc_wq, &pag->pag_blockgc_work, 0);
while ((pag = xfs_perag_grab_next_tag(mp, pag, XFS_ICI_BLOCKGC_TAG)))
flush_delayed_work(&pag->pag_blockgc_work);
@@ -1688,7 +1742,7 @@ xfs_icwalk_ag(
enum xfs_icwalk_goal goal,
struct xfs_icwalk *icw)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
uint32_t first_index;
int last_error = 0;
int skipped;
@@ -1741,7 +1795,7 @@ restart:
* us to see this inode, so another lookup from the
* same index will not find it again.
*/
- if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag->pag_agno)
+ if (XFS_INO_TO_AGNO(mp, ip->i_ino) != pag_agno(pag))
continue;
first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 19dcb569a3e7..c8ad2606f928 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -43,6 +43,7 @@
#include "xfs_parent.h"
#include "xfs_xattr.h"
#include "xfs_inode_util.h"
+#include "xfs_metafile.h"
struct kmem_cache *xfs_inode_cache;
@@ -341,8 +342,7 @@ xfs_lock_inumorder(
{
uint class = 0;
- ASSERT(!(lock_mode & (XFS_ILOCK_PARENT | XFS_ILOCK_RTBITMAP |
- XFS_ILOCK_RTSUM)));
+ ASSERT(!(lock_mode & XFS_ILOCK_PARENT));
ASSERT(xfs_lockdep_subclass_ok(subclass));
if (lock_mode & (XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)) {
@@ -554,8 +554,20 @@ xfs_lookup(
if (error)
goto out_free_name;
+ /*
+ * Fail if a directory entry in the regular directory tree points to
+ * a metadata file.
+ */
+ if (XFS_IS_CORRUPT(dp->i_mount, xfs_is_metadir_inode(*ipp))) {
+ xfs_fs_mark_sick(dp->i_mount, XFS_SICK_FS_METADIR);
+ error = -EFSCORRUPTED;
+ goto out_irele;
+ }
+
return 0;
+out_irele:
+ xfs_irele(*ipp);
out_free_name:
if (ci_name)
kfree(ci_name->name);
@@ -1295,7 +1307,7 @@ xfs_inode_needs_inactive(
return false;
/* Metadata inodes require explicit resource cleanup. */
- if (xfs_is_metadata_inode(ip))
+ if (xfs_is_internal_inode(ip))
return false;
/* Want to clean out the cow blocks if there are any. */
@@ -1388,7 +1400,7 @@ xfs_inactive(
goto out;
/* Metadata inodes require explicit resource cleanup. */
- if (xfs_is_metadata_inode(ip))
+ if (xfs_is_internal_inode(ip))
goto out;
/* Try to clean out the cow blocks if there are any. */
@@ -1514,9 +1526,8 @@ xfs_iunlink_reload_next(
xfs_agino_t next_agino)
{
struct xfs_perag *pag = agibp->b_pag;
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_inode *next_ip = NULL;
- xfs_ino_t ino;
int error;
ASSERT(next_agino != NULLAGINO);
@@ -1530,7 +1541,7 @@ xfs_iunlink_reload_next(
xfs_info_ratelimited(mp,
"Found unrecovered unlinked inode 0x%x in AG 0x%x. Initiating recovery.",
- next_agino, pag->pag_agno);
+ next_agino, pag_agno(pag));
/*
* Use an untrusted lookup just to be cautious in case the AGI has been
@@ -1538,8 +1549,8 @@ xfs_iunlink_reload_next(
* but we'd rather shut down now since we're already running in a weird
* situation.
*/
- ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, next_agino);
- error = xfs_iget(mp, tp, ino, XFS_IGET_UNTRUSTED, 0, &next_ip);
+ error = xfs_iget(mp, tp, xfs_agino_to_ino(pag, next_agino),
+ XFS_IGET_UNTRUSTED, 0, &next_ip);
if (error) {
xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
return error;
@@ -1573,7 +1584,7 @@ xfs_ifree_mark_inode_stale(
struct xfs_inode *free_ip,
xfs_ino_t inum)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_inode_log_item *iip;
struct xfs_inode *ip;
@@ -3041,7 +3052,7 @@ xfs_inode_alloc_unitsize(
/* Should we always be using copy on write for file writes? */
bool
xfs_is_always_cow_inode(
- struct xfs_inode *ip)
+ const struct xfs_inode *ip)
{
return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount);
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 03944b6c5fba..b0de3d924d4c 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -65,6 +65,7 @@ typedef struct xfs_inode {
uint16_t i_flushiter; /* incremented on flush */
};
uint8_t i_forkoff; /* attr fork offset >> 3 */
+ enum xfs_metafile_type i_metatype; /* XFS_METAFILE_* */
uint16_t i_diflags; /* XFS_DIFLAG_... */
uint64_t i_diflags2; /* XFS_DIFLAG2_... */
struct timespec64 i_crtime; /* time created */
@@ -100,7 +101,7 @@ static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip)
return ip->i_prev_unlinked != 0;
}
-static inline bool xfs_inode_has_attr_fork(struct xfs_inode *ip)
+static inline bool xfs_inode_has_attr_fork(const struct xfs_inode *ip)
{
return ip->i_forkoff > 0;
}
@@ -271,23 +272,36 @@ xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned long flags)
return ret;
}
-static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
+static inline bool xfs_is_reflink_inode(const struct xfs_inode *ip)
{
return ip->i_diflags2 & XFS_DIFLAG2_REFLINK;
}
-static inline bool xfs_is_metadata_inode(const struct xfs_inode *ip)
+static inline bool xfs_is_metadir_inode(const struct xfs_inode *ip)
+{
+ return ip->i_diflags2 & XFS_DIFLAG2_METADATA;
+}
+
+static inline bool xfs_is_internal_inode(const struct xfs_inode *ip)
{
struct xfs_mount *mp = ip->i_mount;
+ /* Any file in the metadata directory tree is a metadata inode. */
+ if (xfs_has_metadir(mp))
+ return xfs_is_metadir_inode(ip);
+
+ /*
+ * Before metadata directories, the only metadata inodes were the
+ * three quota files, the realtime bitmap, and the realtime summary.
+ */
return ip->i_ino == mp->m_sb.sb_rbmino ||
ip->i_ino == mp->m_sb.sb_rsumino ||
xfs_is_quota_inode(&mp->m_sb, ip->i_ino);
}
-bool xfs_is_always_cow_inode(struct xfs_inode *ip);
+bool xfs_is_always_cow_inode(const struct xfs_inode *ip);
-static inline bool xfs_is_cow_inode(struct xfs_inode *ip)
+static inline bool xfs_is_cow_inode(const struct xfs_inode *ip)
{
return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
}
@@ -301,17 +315,17 @@ static inline bool xfs_inode_has_filedata(const struct xfs_inode *ip)
* Check if an inode has any data in the COW fork. This might be often false
* even for inodes with the reflink flag when there is no pending COW operation.
*/
-static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
+static inline bool xfs_inode_has_cow_data(const struct xfs_inode *ip)
{
return ip->i_cowfp && ip->i_cowfp->if_bytes;
}
-static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip)
+static inline bool xfs_inode_has_bigtime(const struct xfs_inode *ip)
{
return ip->i_diflags2 & XFS_DIFLAG2_BIGTIME;
}
-static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
+static inline bool xfs_inode_has_large_extent_counts(const struct xfs_inode *ip)
{
return ip->i_diflags2 & XFS_DIFLAG2_NREXT64;
}
@@ -320,7 +334,7 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
* Decide if this file is a realtime file whose data allocation unit is larger
* than a single filesystem block.
*/
-static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
+static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
{
return XFS_IS_REALTIME_INODE(ip) && ip->i_mount->m_sb.sb_rextsize > 1;
}
@@ -332,6 +346,21 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
(XFS_IS_REALTIME_INODE(ip) ? \
(ip)->i_mount->m_rtdev_targp : (ip)->i_mount->m_ddev_targp)
+static inline bool
+xfs_inode_can_atomicwrite(
+ struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_buftarg *target = xfs_inode_buftarg(ip);
+
+ if (mp->m_sb.sb_blocksize < target->bt_bdev_awu_min)
+ return false;
+ if (mp->m_sb.sb_blocksize > target->bt_bdev_awu_max)
+ return false;
+
+ return true;
+}
+
/*
* In-core inode flags.
*/
@@ -434,9 +463,8 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
* However, MAX_LOCKDEP_SUBCLASSES == 8, which means we are greatly
* limited to the subclasses we can represent via nesting. We need at least
* 5 inodes nest depth for the ILOCK through rename, and we also have to support
- * XFS_ILOCK_PARENT, which gives 6 subclasses. Then we have XFS_ILOCK_RTBITMAP
- * and XFS_ILOCK_RTSUM, which are another 2 unique subclasses, so that's all
- * 8 subclasses supported by lockdep.
+ * XFS_ILOCK_PARENT, which gives 6 subclasses. That's 6 of the 8 subclasses
+ * supported by lockdep.
*
* This also means we have to number the sub-classes in the lowest bits of
* the mask we keep, and we have to ensure we never exceed 3 bits of lockdep
@@ -462,8 +490,8 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
* ILOCK values
* 0-4 subclass values
* 5 PARENT subclass (not nestable)
- * 6 RTBITMAP subclass (not nestable)
- * 7 RTSUM subclass (not nestable)
+ * 6 unused
+ * 7 unused
*
*/
#define XFS_IOLOCK_SHIFT 16
@@ -478,12 +506,8 @@ static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
#define XFS_ILOCK_SHIFT 24
#define XFS_ILOCK_PARENT_VAL 5u
#define XFS_ILOCK_MAX_SUBCLASS (XFS_ILOCK_PARENT_VAL - 1)
-#define XFS_ILOCK_RTBITMAP_VAL 6u
-#define XFS_ILOCK_RTSUM_VAL 7u
#define XFS_ILOCK_DEP_MASK 0xff000000u
#define XFS_ILOCK_PARENT (XFS_ILOCK_PARENT_VAL << XFS_ILOCK_SHIFT)
-#define XFS_ILOCK_RTBITMAP (XFS_ILOCK_RTBITMAP_VAL << XFS_ILOCK_SHIFT)
-#define XFS_ILOCK_RTSUM (XFS_ILOCK_RTSUM_VAL << XFS_ILOCK_SHIFT)
#define XFS_LOCK_SUBCLASS_MASK (XFS_IOLOCK_DEP_MASK | \
XFS_MMAPLOCK_DEP_MASK | \
@@ -625,9 +649,9 @@ void xfs_sort_inodes(struct xfs_inode **i_tab, unsigned int num_inodes);
static inline bool
xfs_inode_unlinked_incomplete(
- struct xfs_inode *ip)
+ const struct xfs_inode *ip)
{
- return VFS_I(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip);
+ return VFS_IC(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip);
}
int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip);
int xfs_inode_reload_unlinked(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index b509cbd191f4..912f0b1bc3cb 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -556,7 +556,6 @@ xfs_inode_to_log_dinode(
to->di_projid_lo = ip->i_projid & 0xffff;
to->di_projid_hi = ip->i_projid >> 16;
- memset(to->di_pad3, 0, sizeof(to->di_pad3));
to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode_get_atime(inode));
to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode_get_mtime(inode));
to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode_get_ctime(inode));
@@ -590,10 +589,16 @@ xfs_inode_to_log_dinode(
/* dummy value for initialisation */
to->di_crc = 0;
+
+ if (xfs_is_metadir_inode(ip))
+ to->di_metatype = ip->i_metatype;
+ else
+ to->di_metatype = 0;
} else {
to->di_version = 2;
to->di_flushiter = ip->i_flushiter;
memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
+ to->di_metatype = 0;
}
xfs_inode_to_log_dinode_iext_counters(ip, to);
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index dbdab4ce7c44..e70d2611456b 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -175,7 +175,7 @@ xfs_log_dinode_to_disk(
to->di_mode = cpu_to_be16(from->di_mode);
to->di_version = from->di_version;
to->di_format = from->di_format;
- to->di_onlink = 0;
+ to->di_metatype = cpu_to_be16(from->di_metatype);
to->di_uid = cpu_to_be32(from->di_uid);
to->di_gid = cpu_to_be32(from->di_gid);
to->di_nlink = cpu_to_be32(from->di_nlink);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 2567fd2a0994..0789c18aaa18 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -40,6 +40,7 @@
#include "xfs_file.h"
#include "xfs_exchrange.h"
#include "xfs_handle.h"
+#include "xfs_rtgroup.h"
#include <linux/mount.h>
#include <linux/fileattr.h>
@@ -233,6 +234,10 @@ xfs_bulk_ireq_setup(
if (hdr->flags & XFS_BULK_IREQ_NREXT64)
breq->flags |= XFS_IBULK_NREXT64;
+ /* Caller wants to see metadata directories in bulkstat output. */
+ if (hdr->flags & XFS_BULK_IREQ_METADIR)
+ breq->flags |= XFS_IBULK_METADIR;
+
return 0;
}
@@ -323,6 +328,9 @@ xfs_ioc_inumbers(
if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
return -EFAULT;
+ if (hdr.flags & XFS_BULK_IREQ_METADIR)
+ return -EINVAL;
+
error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers);
if (error == -ECANCELED)
goto out_teardown;
@@ -396,6 +404,38 @@ xfs_ioc_ag_geometry(
return 0;
}
+STATIC int
+xfs_ioc_rtgroup_geometry(
+ struct xfs_mount *mp,
+ void __user *arg)
+{
+ struct xfs_rtgroup *rtg;
+ struct xfs_rtgroup_geometry rgeo;
+ int error;
+
+ if (copy_from_user(&rgeo, arg, sizeof(rgeo)))
+ return -EFAULT;
+ if (rgeo.rg_flags)
+ return -EINVAL;
+ if (memchr_inv(&rgeo.rg_reserved, 0, sizeof(rgeo.rg_reserved)))
+ return -EINVAL;
+ if (!xfs_has_rtgroups(mp))
+ return -EINVAL;
+
+ rtg = xfs_rtgroup_get(mp, rgeo.rg_number);
+ if (!rtg)
+ return -EINVAL;
+
+ error = xfs_rtgroup_get_geometry(rtg, &rgeo);
+ xfs_rtgroup_put(rtg);
+ if (error)
+ return error;
+
+ if (copy_to_user(arg, &rgeo, sizeof(rgeo)))
+ return -EFAULT;
+ return 0;
+}
+
/*
* Linux extended inode flags interface.
*/
@@ -881,41 +921,29 @@ xfs_ioc_swapext(
xfs_swapext_t *sxp)
{
xfs_inode_t *ip, *tip;
- struct fd f, tmp;
- int error = 0;
/* Pull information for the target fd */
- f = fdget((int)sxp->sx_fdtarget);
- if (!fd_file(f)) {
- error = -EINVAL;
- goto out;
- }
+ CLASS(fd, f)((int)sxp->sx_fdtarget);
+ if (fd_empty(f))
+ return -EINVAL;
if (!(fd_file(f)->f_mode & FMODE_WRITE) ||
!(fd_file(f)->f_mode & FMODE_READ) ||
- (fd_file(f)->f_flags & O_APPEND)) {
- error = -EBADF;
- goto out_put_file;
- }
+ (fd_file(f)->f_flags & O_APPEND))
+ return -EBADF;
- tmp = fdget((int)sxp->sx_fdtmp);
- if (!fd_file(tmp)) {
- error = -EINVAL;
- goto out_put_file;
- }
+ CLASS(fd, tmp)((int)sxp->sx_fdtmp);
+ if (fd_empty(tmp))
+ return -EINVAL;
if (!(fd_file(tmp)->f_mode & FMODE_WRITE) ||
!(fd_file(tmp)->f_mode & FMODE_READ) ||
- (fd_file(tmp)->f_flags & O_APPEND)) {
- error = -EBADF;
- goto out_put_tmp_file;
- }
+ (fd_file(tmp)->f_flags & O_APPEND))
+ return -EBADF;
if (IS_SWAPFILE(file_inode(fd_file(f))) ||
- IS_SWAPFILE(file_inode(fd_file(tmp)))) {
- error = -EINVAL;
- goto out_put_tmp_file;
- }
+ IS_SWAPFILE(file_inode(fd_file(tmp))))
+ return -EINVAL;
/*
* We need to ensure that the fds passed in point to XFS inodes
@@ -923,37 +951,22 @@ xfs_ioc_swapext(
* control over what the user passes us here.
*/
if (fd_file(f)->f_op != &xfs_file_operations ||
- fd_file(tmp)->f_op != &xfs_file_operations) {
- error = -EINVAL;
- goto out_put_tmp_file;
- }
+ fd_file(tmp)->f_op != &xfs_file_operations)
+ return -EINVAL;
ip = XFS_I(file_inode(fd_file(f)));
tip = XFS_I(file_inode(fd_file(tmp)));
- if (ip->i_mount != tip->i_mount) {
- error = -EINVAL;
- goto out_put_tmp_file;
- }
-
- if (ip->i_ino == tip->i_ino) {
- error = -EINVAL;
- goto out_put_tmp_file;
- }
+ if (ip->i_mount != tip->i_mount)
+ return -EINVAL;
- if (xfs_is_shutdown(ip->i_mount)) {
- error = -EIO;
- goto out_put_tmp_file;
- }
+ if (ip->i_ino == tip->i_ino)
+ return -EINVAL;
- error = xfs_swap_extents(ip, tip, sxp);
+ if (xfs_is_shutdown(ip->i_mount))
+ return -EIO;
- out_put_tmp_file:
- fdput(tmp);
- out_put_file:
- fdput(f);
- out:
- return error;
+ return xfs_swap_extents(ip, tip, sxp);
}
static int
@@ -1021,7 +1034,7 @@ xfs_ioc_setlabel(
* buffered reads from userspace (i.e. from blkid) are invalidated,
* and userspace will see the newly-written label.
*/
- error = xfs_sync_sb_buf(mp);
+ error = xfs_sync_sb_buf(mp, true);
if (error)
goto out;
/*
@@ -1032,6 +1045,8 @@ xfs_ioc_setlabel(
mutex_unlock(&mp->m_growlock);
invalidate_bdev(mp->m_ddev_targp->bt_bdev);
+ if (xfs_has_rtsb(mp) && mp->m_rtdev_targp)
+ invalidate_bdev(mp->m_rtdev_targp->bt_bdev);
out:
mnt_drop_write_file(filp);
@@ -1216,6 +1231,8 @@ xfs_file_ioctl(
case XFS_IOC_AG_GEOMETRY:
return xfs_ioc_ag_geometry(mp, arg);
+ case XFS_IOC_RTGROUP_GEOMETRY:
+ return xfs_ioc_rtgroup_geometry(mp, arg);
case XFS_IOC_GETVERSION:
return put_user(inode->i_generation, (int __user *)arg);
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 86da16f54be9..50fa3ef89f6c 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -24,6 +24,7 @@
#include "xfs_iomap.h"
#include "xfs_trace.h"
#include "xfs_quota.h"
+#include "xfs_rtgroup.h"
#include "xfs_dquot_item.h"
#include "xfs_dquot.h"
#include "xfs_reflink.h"
@@ -115,7 +116,9 @@ xfs_bmbt_to_iomap(
iomap->addr = IOMAP_NULL_ADDR;
iomap->type = IOMAP_DELALLOC;
} else {
- iomap->addr = BBTOB(xfs_fsb_to_db(ip, imap->br_startblock));
+ xfs_daddr_t daddr = xfs_fsb_to_db(ip, imap->br_startblock);
+
+ iomap->addr = BBTOB(daddr);
if (mapping_flags & IOMAP_DAX)
iomap->addr += target->bt_dax_part_off;
@@ -124,6 +127,14 @@ xfs_bmbt_to_iomap(
else
iomap->type = IOMAP_MAPPED;
+ /*
+ * Mark iomaps starting at the first sector of a RTG as merge
+ * boundary so that each I/O completions is contained to a
+ * single RTG.
+ */
+ if (XFS_IS_REALTIME_INODE(ip) && xfs_has_rtgroups(mp) &&
+ xfs_rtbno_is_group_start(mp, imap->br_startblock))
+ iomap->flags |= IOMAP_F_BOUNDARY;
}
iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff);
iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount);
@@ -342,16 +353,26 @@ xfs_quota_need_throttle(
xfs_fsblock_t alloc_blocks)
{
struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
+ struct xfs_dquot_res *res;
+ struct xfs_dquot_pre *pre;
if (!dq || !xfs_this_quota_on(ip->i_mount, type))
return false;
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ res = &dq->q_rtb;
+ pre = &dq->q_rtb_prealloc;
+ } else {
+ res = &dq->q_blk;
+ pre = &dq->q_blk_prealloc;
+ }
+
/* no hi watermark, no throttle */
- if (!dq->q_prealloc_hi_wmark)
+ if (!pre->q_prealloc_hi_wmark)
return false;
/* under the lo watermark, no throttle */
- if (dq->q_blk.reserved + alloc_blocks < dq->q_prealloc_lo_wmark)
+ if (res->reserved + alloc_blocks < pre->q_prealloc_lo_wmark)
return false;
return true;
@@ -366,22 +387,35 @@ xfs_quota_calc_throttle(
int64_t *qfreesp)
{
struct xfs_dquot *dq = xfs_inode_dquot(ip, type);
+ struct xfs_dquot_res *res;
+ struct xfs_dquot_pre *pre;
int64_t freesp;
int shift = 0;
+ if (!dq) {
+ res = NULL;
+ pre = NULL;
+ } else if (XFS_IS_REALTIME_INODE(ip)) {
+ res = &dq->q_rtb;
+ pre = &dq->q_rtb_prealloc;
+ } else {
+ res = &dq->q_blk;
+ pre = &dq->q_blk_prealloc;
+ }
+
/* no dq, or over hi wmark, squash the prealloc completely */
- if (!dq || dq->q_blk.reserved >= dq->q_prealloc_hi_wmark) {
+ if (!res || res->reserved >= pre->q_prealloc_hi_wmark) {
*qblocks = 0;
*qfreesp = 0;
return;
}
- freesp = dq->q_prealloc_hi_wmark - dq->q_blk.reserved;
- if (freesp < dq->q_low_space[XFS_QLOWSP_5_PCNT]) {
+ freesp = pre->q_prealloc_hi_wmark - res->reserved;
+ if (freesp < pre->q_low_space[XFS_QLOWSP_5_PCNT]) {
shift = 2;
- if (freesp < dq->q_low_space[XFS_QLOWSP_3_PCNT])
+ if (freesp < pre->q_low_space[XFS_QLOWSP_3_PCNT])
shift += 2;
- if (freesp < dq->q_low_space[XFS_QLOWSP_1_PCNT])
+ if (freesp < pre->q_low_space[XFS_QLOWSP_1_PCNT])
shift += 2;
}
@@ -501,8 +535,8 @@ xfs_iomap_prealloc_size(
alloc_blocks);
if (unlikely(XFS_IS_REALTIME_INODE(ip)))
- freesp = xfs_rtx_to_rtb(mp,
- xfs_iomap_freesp(&mp->m_frextents,
+ freesp = xfs_rtbxlen_to_blen(mp,
+ xfs_iomap_freesp(&mp->m_frextents,
mp->m_low_rtexts, &shift));
else
freesp = xfs_iomap_freesp(&mp->m_fdblocks, mp->m_low_space,
@@ -1234,6 +1268,14 @@ xfs_buffered_write_iomap_end(
if (iomap->type != IOMAP_DELALLOC || !(iomap->flags & IOMAP_F_NEW))
return 0;
+ /*
+ * iomap_page_mkwrite() will never fail in a way that requires delalloc
+ * extents that it allocated to be revoked. Hence never try to release
+ * them here.
+ */
+ if (flags & IOMAP_FAULT)
+ return 0;
+
/* Nothing to do if we've written the entire delalloc extent */
start_byte = iomap_last_written_block(inode, offset, written);
end_byte = round_up(offset + length, i_blocksize(inode));
@@ -1260,15 +1302,6 @@ const struct iomap_ops xfs_buffered_write_iomap_ops = {
.iomap_end = xfs_buffered_write_iomap_end,
};
-/*
- * iomap_page_mkwrite() will never fail in a way that requires delalloc extents
- * that it allocated to be revoked. Hence we do not need an .iomap_end method
- * for this operation.
- */
-const struct iomap_ops xfs_page_mkwrite_iomap_ops = {
- .iomap_begin = xfs_buffered_write_iomap_begin,
-};
-
static int
xfs_read_iomap_begin(
struct inode *inode,
diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h
index 4da13440bae9..8347268af727 100644
--- a/fs/xfs/xfs_iomap.h
+++ b/fs/xfs/xfs_iomap.h
@@ -48,7 +48,6 @@ xfs_aligned_fsb_count(
}
extern const struct iomap_ops xfs_buffered_write_iomap_ops;
-extern const struct iomap_ops xfs_page_mkwrite_iomap_ops;
extern const struct iomap_ops xfs_direct_write_iomap_ops;
extern const struct iomap_ops xfs_read_iomap_ops;
extern const struct iomap_ops xfs_seek_iomap_ops;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ee79cf161312..207e0dadffc3 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -42,7 +42,9 @@
* held. For regular files, the lock order is the other way around - the
* mmap_lock is taken during the page fault, and then we lock the ilock to do
* block mapping. Hence we need a different class for the directory ilock so
- * that lockdep can tell them apart.
+ * that lockdep can tell them apart. Directories in the metadata directory
+ * tree get a separate class so that lockdep reports will warn us if someone
+ * ever tries to lock regular directories after locking metadata directories.
*/
static struct lock_class_key xfs_nondir_ilock_class;
static struct lock_class_key xfs_dir_ilock_class;
@@ -570,6 +572,20 @@ xfs_stat_blksize(
return max_t(uint32_t, PAGE_SIZE, mp->m_sb.sb_blocksize);
}
+static void
+xfs_get_atomic_write_attr(
+ struct xfs_inode *ip,
+ unsigned int *unit_min,
+ unsigned int *unit_max)
+{
+ if (!xfs_inode_can_atomicwrite(ip)) {
+ *unit_min = *unit_max = 0;
+ return;
+ }
+
+ *unit_min = *unit_max = ip->i_mount->m_sb.sb_blocksize;
+}
+
STATIC int
xfs_vn_getattr(
struct mnt_idmap *idmap,
@@ -597,8 +613,9 @@ xfs_vn_getattr(
stat->gid = vfsgid_into_kgid(vfsgid);
stat->ino = ip->i_ino;
stat->atime = inode_get_atime(inode);
- stat->mtime = inode_get_mtime(inode);
- stat->ctime = inode_get_ctime(inode);
+
+ fill_mg_cmtime(stat, request_mask, inode);
+
stat->blocks = XFS_FSB_TO_BB(mp, ip->i_nblocks + ip->i_delayed_blks);
if (xfs_has_v3inodes(mp)) {
@@ -608,11 +625,6 @@ xfs_vn_getattr(
}
}
- if ((request_mask & STATX_CHANGE_COOKIE) && IS_I_VERSION(inode)) {
- stat->change_cookie = inode_query_iversion(inode);
- stat->result_mask |= STATX_CHANGE_COOKIE;
- }
-
/*
* Note: If you add another clause to set an attribute flag, please
* update attributes_mask below.
@@ -643,6 +655,14 @@ xfs_vn_getattr(
stat->dio_mem_align = bdev_dma_alignment(bdev) + 1;
stat->dio_offset_align = bdev_logical_block_size(bdev);
}
+ if (request_mask & STATX_WRITE_ATOMIC) {
+ unsigned int unit_min, unit_max;
+
+ xfs_get_atomic_write_attr(ip, &unit_min,
+ &unit_max);
+ generic_fill_statx_atomic_writes(stat,
+ unit_min, unit_max);
+ }
fallthrough;
default:
stat->blksize = xfs_stat_blksize(ip);
@@ -1289,6 +1309,7 @@ xfs_setup_inode(
{
struct inode *inode = &ip->i_vnode;
gfp_t gfp_mask;
+ bool is_meta = xfs_is_internal_inode(ip);
inode->i_ino = ip->i_ino;
inode->i_state |= I_NEW;
@@ -1300,6 +1321,16 @@ xfs_setup_inode(
i_size_write(inode, ip->i_disk_size);
xfs_diflags_to_iflags(ip, true);
+ /*
+ * Mark our metadata files as private so that LSMs and the ACL code
+ * don't try to add their own metadata or reason about these files,
+ * and users cannot ever obtain file handles to them.
+ */
+ if (is_meta) {
+ inode->i_flags |= S_PRIVATE;
+ inode->i_opflags &= ~IOP_XATTR;
+ }
+
if (S_ISDIR(inode->i_mode)) {
/*
* We set the i_rwsem class here to avoid potential races with
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c0757ab99495..1fa1c0564b0c 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -36,6 +36,14 @@ struct xfs_bstat_chunk {
struct xfs_bulkstat *buf;
};
+static inline bool
+want_metadir_file(
+ struct xfs_inode *ip,
+ struct xfs_ibulk *breq)
+{
+ return xfs_is_metadir_inode(ip) && (breq->flags & XFS_IBULK_METADIR);
+}
+
/*
* Fill out the bulkstat info for a single inode and report it somewhere.
*
@@ -69,9 +77,6 @@ xfs_bulkstat_one_int(
vfsuid_t vfsuid;
vfsgid_t vfsgid;
- if (xfs_internal_inum(mp, ino))
- goto out_advance;
-
error = xfs_iget(mp, tp, ino,
(XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
XFS_ILOCK_SHARED, &ip);
@@ -97,8 +102,28 @@ xfs_bulkstat_one_int(
vfsuid = i_uid_into_vfsuid(idmap, inode);
vfsgid = i_gid_into_vfsgid(idmap, inode);
+ /*
+ * If caller wants files from the metadata directories, push out the
+ * bare minimum information for enabling scrub.
+ */
+ if (want_metadir_file(ip, bc->breq)) {
+ memset(buf, 0, sizeof(*buf));
+ buf->bs_ino = ino;
+ buf->bs_gen = inode->i_generation;
+ buf->bs_mode = inode->i_mode & S_IFMT;
+ xfs_bulkstat_health(ip, buf);
+ buf->bs_version = XFS_BULKSTAT_VERSION_V5;
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ xfs_irele(ip);
+
+ error = bc->formatter(bc->breq, buf);
+ if (!error || error == -ECANCELED)
+ goto out_advance;
+ goto out;
+ }
+
/* If this is a private inode, don't leak its details to userspace. */
- if (IS_PRIVATE(inode)) {
+ if (IS_PRIVATE(inode) || xfs_is_sb_inum(mp, ino)) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
xfs_irele(ip);
error = -EINVAL;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 1659f13f17a8..f10e8f8f2335 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -22,6 +22,9 @@ struct xfs_ibulk {
/* Fill out the bs_extents64 field if set. */
#define XFS_IBULK_NREXT64 (1U << 1)
+/* Signal that we can return metadata directories. */
+#define XFS_IBULK_METADIR (1U << 2)
+
/*
* Advance the user buffer pointer by one record of the given size. If the
* buffer is now full, return the appropriate error code.
diff --git a/fs/xfs/xfs_iunlink_item.c b/fs/xfs/xfs_iunlink_item.c
index 2ddccb172fa0..1fd70a7aed63 100644
--- a/fs/xfs/xfs_iunlink_item.c
+++ b/fs/xfs/xfs_iunlink_item.c
@@ -52,14 +52,14 @@ xfs_iunlink_log_dinode(
struct xfs_trans *tp,
struct xfs_iunlink_item *iup)
{
- struct xfs_mount *mp = tp->t_mountp;
struct xfs_inode *ip = iup->ip;
struct xfs_dinode *dip;
struct xfs_buf *ibp;
+ xfs_agino_t old_ptr;
int offset;
int error;
- error = xfs_imap_to_bp(mp, tp, &ip->i_imap, &ibp);
+ error = xfs_imap_to_bp(tp->t_mountp, tp, &ip->i_imap, &ibp);
if (error)
return error;
/*
@@ -73,22 +73,21 @@ xfs_iunlink_log_dinode(
dip = xfs_buf_offset(ibp, ip->i_imap.im_boffset);
/* Make sure the old pointer isn't garbage. */
- if (be32_to_cpu(dip->di_next_unlinked) != iup->old_agino) {
+ old_ptr = be32_to_cpu(dip->di_next_unlinked);
+ if (old_ptr != iup->old_agino) {
xfs_inode_verifier_error(ip, -EFSCORRUPTED, __func__, dip,
sizeof(*dip), __this_address);
error = -EFSCORRUPTED;
goto out;
}
- trace_xfs_iunlink_update_dinode(mp, iup->pag->pag_agno,
- XFS_INO_TO_AGINO(mp, ip->i_ino),
- be32_to_cpu(dip->di_next_unlinked), iup->next_agino);
+ trace_xfs_iunlink_update_dinode(iup, old_ptr);
dip->di_next_unlinked = cpu_to_be32(iup->next_agino);
offset = ip->i_imap.im_boffset +
offsetof(struct xfs_dinode, di_next_unlinked);
- xfs_dinode_calc_crc(mp, dip);
+ xfs_dinode_calc_crc(tp->t_mountp, dip);
xfs_trans_inode_buf(tp, ibp);
xfs_trans_log_buf(tp, ibp, offset, offset + sizeof(xfs_agino_t) - 1);
return 0;
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 86f14ec7c31f..7db3ece370b1 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -100,7 +100,6 @@ xfs_iwalk_ichunk_ra(
struct xfs_inobt_rec_incore *irec)
{
struct xfs_ino_geometry *igeo = M_IGEO(mp);
- xfs_agnumber_t agno = pag->pag_agno;
xfs_agblock_t agbno;
struct blk_plug plug;
int i; /* inode chunk index */
@@ -114,7 +113,7 @@ xfs_iwalk_ichunk_ra(
imask = xfs_inobt_maskn(i, igeo->inodes_per_cluster);
if (imask & ~irec->ir_free) {
xfs_buf_readahead(mp->m_ddev_targp,
- XFS_AGB_TO_DADDR(mp, agno, agbno),
+ xfs_agbno_to_daddr(pag, agbno),
igeo->blocks_per_cluster * mp->m_bsize,
&xfs_inode_buf_ops);
}
@@ -177,20 +176,19 @@ xfs_iwalk_ag_recs(
struct xfs_mount *mp = iwag->mp;
struct xfs_trans *tp = iwag->tp;
struct xfs_perag *pag = iwag->pag;
- xfs_ino_t ino;
unsigned int i, j;
int error;
for (i = 0; i < iwag->nr_recs; i++) {
struct xfs_inobt_rec_incore *irec = &iwag->recs[i];
- trace_xfs_iwalk_ag_rec(mp, pag->pag_agno, irec);
+ trace_xfs_iwalk_ag_rec(pag, irec);
if (xfs_pwork_want_abort(&iwag->pwork))
return 0;
if (iwag->inobt_walk_fn) {
- error = iwag->inobt_walk_fn(mp, tp, pag->pag_agno, irec,
+ error = iwag->inobt_walk_fn(mp, tp, pag_agno(pag), irec,
iwag->data);
if (error)
return error;
@@ -208,9 +206,10 @@ xfs_iwalk_ag_recs(
continue;
/* Otherwise call our function. */
- ino = XFS_AGINO_TO_INO(mp, pag->pag_agno,
- irec->ir_startino + j);
- error = iwag->iwalk_fn(mp, tp, ino, iwag->data);
+ error = iwag->iwalk_fn(mp, tp,
+ xfs_agino_to_ino(pag,
+ irec->ir_startino + j),
+ iwag->data);
if (error)
return error;
}
@@ -305,7 +304,7 @@ xfs_iwalk_ag_start(
return -EFSCORRUPTED;
}
- iwag->lastino = XFS_AGINO_TO_INO(mp, pag->pag_agno,
+ iwag->lastino = xfs_agino_to_ino(pag,
irec->ir_startino + XFS_INODES_PER_CHUNK - 1);
/*
@@ -406,7 +405,7 @@ xfs_iwalk_ag(
int error = 0;
/* Set up our cursor at the right place in the inode btree. */
- ASSERT(pag->pag_agno == XFS_INO_TO_AGNO(mp, iwag->startino));
+ ASSERT(pag_agno(pag) == XFS_INO_TO_AGNO(mp, iwag->startino));
agino = XFS_INO_TO_AGINO(mp, iwag->startino);
error = xfs_iwalk_ag_start(iwag, agino, &cur, &agi_bp, &has_more);
@@ -425,7 +424,7 @@ xfs_iwalk_ag(
break;
/* Make sure that we always move forward. */
- rec_fsino = XFS_AGINO_TO_INO(mp, pag->pag_agno, irec->ir_startino);
+ rec_fsino = xfs_agino_to_ino(pag, irec->ir_startino);
if (iwag->lastino != NULLFSINO &&
XFS_IS_CORRUPT(mp, iwag->lastino >= rec_fsino)) {
xfs_btree_mark_sick(cur);
@@ -535,6 +534,37 @@ xfs_iwalk_prefetch(
return max(inobt_records, 2U);
}
+static int
+xfs_iwalk_args(
+ struct xfs_iwalk_ag *iwag,
+ unsigned int flags)
+{
+ struct xfs_mount *mp = iwag->mp;
+ xfs_agnumber_t start_agno;
+ int error;
+
+ start_agno = XFS_INO_TO_AGNO(iwag->mp, iwag->startino);
+ ASSERT(start_agno < iwag->mp->m_sb.sb_agcount);
+ ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
+
+ error = xfs_iwalk_alloc(iwag);
+ if (error)
+ return error;
+
+ while ((iwag->pag = xfs_perag_next_from(mp, iwag->pag, start_agno))) {
+ error = xfs_iwalk_ag(iwag);
+ if (error || (flags & XFS_IWALK_SAME_AG)) {
+ xfs_perag_rele(iwag->pag);
+ break;
+ }
+ iwag->startino =
+ XFS_AGINO_TO_INO(mp, pag_agno(iwag->pag) + 1, 0);
+ }
+
+ xfs_iwalk_free(iwag);
+ return error;
+}
+
/*
* Walk all inodes in the filesystem starting from @startino. The @iwalk_fn
* will be called for each allocated inode, being passed the inode's number and
@@ -563,32 +593,8 @@ xfs_iwalk(
.pwork = XFS_PWORK_SINGLE_THREADED,
.lastino = NULLFSINO,
};
- struct xfs_perag *pag;
- xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
- int error;
-
- ASSERT(agno < mp->m_sb.sb_agcount);
- ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
-
- error = xfs_iwalk_alloc(&iwag);
- if (error)
- return error;
-
- for_each_perag_from(mp, agno, pag) {
- iwag.pag = pag;
- error = xfs_iwalk_ag(&iwag);
- if (error)
- break;
- iwag.startino = XFS_AGINO_TO_INO(mp, agno + 1, 0);
- if (flags & XFS_INOBT_WALK_SAME_AG)
- break;
- iwag.pag = NULL;
- }
- if (iwag.pag)
- xfs_perag_rele(pag);
- xfs_iwalk_free(&iwag);
- return error;
+ return xfs_iwalk_args(&iwag, flags);
}
/* Run per-thread iwalk work. */
@@ -640,19 +646,19 @@ xfs_iwalk_threaded(
bool polled,
void *data)
{
+ xfs_agnumber_t start_agno = XFS_INO_TO_AGNO(mp, startino);
struct xfs_pwork_ctl pctl;
- struct xfs_perag *pag;
- xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
+ struct xfs_perag *pag = NULL;
int error;
- ASSERT(agno < mp->m_sb.sb_agcount);
+ ASSERT(start_agno < mp->m_sb.sb_agcount);
ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL));
error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk");
if (error)
return error;
- for_each_perag_from(mp, agno, pag) {
+ while ((pag = xfs_perag_next_from(mp, pag, start_agno))) {
struct xfs_iwalk_ag *iwag;
if (xfs_pwork_ctl_want_abort(&pctl))
@@ -673,8 +679,8 @@ xfs_iwalk_threaded(
iwag->sz_recs = xfs_iwalk_prefetch(inode_records);
iwag->lastino = NULLFSINO;
xfs_pwork_queue(&pctl, &iwag->pwork);
- startino = XFS_AGINO_TO_INO(mp, pag->pag_agno + 1, 0);
- if (flags & XFS_INOBT_WALK_SAME_AG)
+ startino = XFS_AGINO_TO_INO(mp, pag_agno(pag) + 1, 0);
+ if (flags & XFS_IWALK_SAME_AG)
break;
}
if (pag)
@@ -748,30 +754,6 @@ xfs_inobt_walk(
.pwork = XFS_PWORK_SINGLE_THREADED,
.lastino = NULLFSINO,
};
- struct xfs_perag *pag;
- xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, startino);
- int error;
- ASSERT(agno < mp->m_sb.sb_agcount);
- ASSERT(!(flags & ~XFS_INOBT_WALK_FLAGS_ALL));
-
- error = xfs_iwalk_alloc(&iwag);
- if (error)
- return error;
-
- for_each_perag_from(mp, agno, pag) {
- iwag.pag = pag;
- error = xfs_iwalk_ag(&iwag);
- if (error)
- break;
- iwag.startino = XFS_AGINO_TO_INO(mp, pag->pag_agno + 1, 0);
- if (flags & XFS_INOBT_WALK_SAME_AG)
- break;
- iwag.pag = NULL;
- }
-
- if (iwag.pag)
- xfs_perag_rele(pag);
- xfs_iwalk_free(&iwag);
- return error;
+ return xfs_iwalk_args(&iwag, flags);
}
diff --git a/fs/xfs/xfs_iwalk.h b/fs/xfs/xfs_iwalk.h
index 83699089755e..17a5a2c6debb 100644
--- a/fs/xfs/xfs_iwalk.h
+++ b/fs/xfs/xfs_iwalk.h
@@ -25,7 +25,7 @@ int xfs_iwalk_threaded(struct xfs_mount *mp, xfs_ino_t startino,
unsigned int flags, xfs_iwalk_fn iwalk_fn,
unsigned int inode_records, bool poll, void *data);
-/* Only iterate inodes within the same AG as @startino. */
+/* Only iterate within the same AG as @startino. */
#define XFS_IWALK_SAME_AG (1U << 0)
#define XFS_IWALK_FLAGS_ALL (XFS_IWALK_SAME_AG)
@@ -41,9 +41,4 @@ int xfs_inobt_walk(struct xfs_mount *mp, struct xfs_trans *tp,
xfs_inobt_walk_fn inobt_walk_fn, unsigned int inobt_records,
void *data);
-/* Only iterate inobt records within the same AG as @startino. */
-#define XFS_INOBT_WALK_SAME_AG (XFS_IWALK_SAME_AG)
-
-#define XFS_INOBT_WALK_FLAGS_ALL (XFS_INOBT_WALK_SAME_AG)
-
#endif /* __XFS_IWALK_H__ */
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 80da0cf87d7a..2e9157b650e6 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -907,7 +907,7 @@ xlog_cil_committed(
xlog_cil_ail_insert(ctx, abort);
xfs_extent_busy_sort(&ctx->busy_extents.extent_list);
- xfs_extent_busy_clear(mp, &ctx->busy_extents.extent_list,
+ xfs_extent_busy_clear(&ctx->busy_extents.extent_list,
xfs_has_discard(mp) && !abort);
spin_lock(&ctx->cil->xc_push_lock);
@@ -917,7 +917,6 @@ xlog_cil_committed(
xlog_cil_free_logvec(&ctx->lv_chain);
if (!list_empty(&ctx->busy_extents.extent_list)) {
- ctx->busy_extents.mount = mp;
ctx->busy_extents.owner = ctx;
xfs_discard_extents(mp, &ctx->busy_extents);
return;
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 704aaadb61cf..0af3d477197b 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -1818,6 +1818,8 @@ static const struct xlog_recover_item_ops *xlog_recover_item_ops[] = {
&xlog_attrd_item_ops,
&xlog_xmi_item_ops,
&xlog_xmd_item_ops,
+ &xlog_rtefi_item_ops,
+ &xlog_rtefd_item_ops,
};
static const struct xlog_recover_item_ops *
@@ -2677,7 +2679,7 @@ xlog_recover_clear_agi_bucket(
struct xfs_perag *pag,
int bucket)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_trans *tp;
struct xfs_agi *agi;
struct xfs_buf *agibp;
@@ -2708,7 +2710,7 @@ out_abort:
xfs_trans_cancel(tp);
out_error:
xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__,
- pag->pag_agno);
+ pag_agno(pag));
return;
}
@@ -2718,7 +2720,7 @@ xlog_recover_iunlink_bucket(
struct xfs_agi *agi,
int bucket)
{
- struct xfs_mount *mp = pag->pag_mount;
+ struct xfs_mount *mp = pag_mount(pag);
struct xfs_inode *prev_ip = NULL;
struct xfs_inode *ip;
xfs_agino_t prev_agino, agino;
@@ -2726,9 +2728,8 @@ xlog_recover_iunlink_bucket(
agino = be32_to_cpu(agi->agi_unlinked[bucket]);
while (agino != NULLAGINO) {
- error = xfs_iget(mp, NULL,
- XFS_AGINO_TO_INO(mp, pag->pag_agno, agino),
- 0, 0, &ip);
+ error = xfs_iget(mp, NULL, xfs_agino_to_ino(pag, agino), 0, 0,
+ &ip);
if (error)
break;
@@ -2846,10 +2847,9 @@ static void
xlog_recover_process_iunlinks(
struct xlog *log)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
- for_each_perag(log->l_mp, agno, pag)
+ while ((pag = xfs_perag_next(log->l_mp, pag)))
xlog_recover_iunlink_ag(pag);
}
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 8f495cc23903..6ed485ff2756 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -131,3 +131,54 @@ xfs_buf_alert_ratelimited(
__xfs_printk(KERN_ALERT, mp, &vaf);
va_end(args);
}
+
+void
+xfs_warn_experimental(
+ struct xfs_mount *mp,
+ enum xfs_experimental_feat feat)
+{
+ static const struct {
+ const char *name;
+ long opstate;
+ } features[] = {
+ [XFS_EXPERIMENTAL_PNFS] = {
+ .opstate = XFS_OPSTATE_WARNED_PNFS,
+ .name = "pNFS",
+ },
+ [XFS_EXPERIMENTAL_SCRUB] = {
+ .opstate = XFS_OPSTATE_WARNED_SCRUB,
+ .name = "online scrub",
+ },
+ [XFS_EXPERIMENTAL_SHRINK] = {
+ .opstate = XFS_OPSTATE_WARNED_SHRINK,
+ .name = "online shrink",
+ },
+ [XFS_EXPERIMENTAL_LARP] = {
+ .opstate = XFS_OPSTATE_WARNED_LARP,
+ .name = "logged extended attributes",
+ },
+ [XFS_EXPERIMENTAL_LBS] = {
+ .opstate = XFS_OPSTATE_WARNED_LBS,
+ .name = "large block size",
+ },
+ [XFS_EXPERIMENTAL_EXCHRANGE] = {
+ .opstate = XFS_OPSTATE_WARNED_EXCHRANGE,
+ .name = "exchange range",
+ },
+ [XFS_EXPERIMENTAL_PPTR] = {
+ .opstate = XFS_OPSTATE_WARNED_PPTR,
+ .name = "parent pointer",
+ },
+ [XFS_EXPERIMENTAL_METADIR] = {
+ .opstate = XFS_OPSTATE_WARNED_METADIR,
+ .name = "metadata directory tree",
+ },
+ };
+ ASSERT(feat >= 0 && feat < XFS_EXPERIMENTAL_MAX);
+ BUILD_BUG_ON(ARRAY_SIZE(features) != XFS_EXPERIMENTAL_MAX);
+
+ if (xfs_should_warn(mp, features[feat].opstate))
+ xfs_warn(mp,
+ "EXPERIMENTAL %s feature enabled. Use at your own risk!",
+ features[feat].name);
+}
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index cc323775a12c..7fb36ced9df7 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -75,12 +75,6 @@ do { \
#define xfs_debug_ratelimited(dev, fmt, ...) \
xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__)
-#define xfs_warn_mount(mp, warntag, fmt, ...) \
-do { \
- if (xfs_should_warn((mp), (warntag))) \
- xfs_warn((mp), (fmt), ##__VA_ARGS__); \
-} while (0)
-
#define xfs_warn_once(dev, fmt, ...) \
xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__)
#define xfs_notice_once(dev, fmt, ...) \
@@ -96,4 +90,18 @@ extern void xfs_hex_dump(const void *p, int length);
void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg,
const char *fmt, ...);
+enum xfs_experimental_feat {
+ XFS_EXPERIMENTAL_PNFS,
+ XFS_EXPERIMENTAL_SCRUB,
+ XFS_EXPERIMENTAL_SHRINK,
+ XFS_EXPERIMENTAL_LARP,
+ XFS_EXPERIMENTAL_LBS,
+ XFS_EXPERIMENTAL_EXCHRANGE,
+ XFS_EXPERIMENTAL_PPTR,
+ XFS_EXPERIMENTAL_METADIR,
+
+ XFS_EXPERIMENTAL_MAX,
+};
+void xfs_warn_experimental(struct xfs_mount *mp, enum xfs_experimental_feat f);
+
#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 25bbcc3f4ee0..5918f433dba7 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -35,6 +35,8 @@
#include "xfs_trace.h"
#include "xfs_ag.h"
#include "xfs_rtbitmap.h"
+#include "xfs_metafile.h"
+#include "xfs_rtgroup.h"
#include "scrub/stats.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -620,6 +622,22 @@ xfs_mount_setup_inode_geom(
xfs_ialloc_setup_geometry(mp);
}
+/* Mount the metadata directory tree root. */
+STATIC int
+xfs_mount_setup_metadir(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ /* Load the metadata directory root inode into memory. */
+ error = xfs_metafile_iget(mp, mp->m_sb.sb_metadirino, XFS_METAFILE_DIR,
+ &mp->m_metadirip);
+ if (error)
+ xfs_warn(mp, "Failed to load metadir root directory, error %d",
+ error);
+ return error;
+}
+
/* Compute maximum possible height for per-AG btree types for this fs. */
static inline void
xfs_agbtree_compute_maxlevels(
@@ -817,10 +835,17 @@ xfs_mountfs(
goto out_free_dir;
}
+ error = xfs_initialize_rtgroups(mp, 0, sbp->sb_rgcount,
+ mp->m_sb.sb_rextents);
+ if (error) {
+ xfs_warn(mp, "Failed rtgroup init: %d", error);
+ goto out_free_perag;
+ }
+
if (XFS_IS_CORRUPT(mp, !sbp->sb_logblocks)) {
xfs_warn(mp, "no log defined");
error = -EFSCORRUPTED;
- goto out_free_perag;
+ goto out_free_rtgroup;
}
error = xfs_inodegc_register_shrinker(mp);
@@ -828,6 +853,13 @@ xfs_mountfs(
goto out_fail_wait;
/*
+ * If we're resuming quota status, pick up the preliminary qflags from
+ * the ondisk superblock so that we know if we should recover dquots.
+ */
+ if (xfs_is_resuming_quotaon(mp))
+ xfs_qm_resume_quotaon(mp);
+
+ /*
* Log's mount-time initialization. The first part of recovery can place
* some items on the AIL, to be handled when recovery is finished or
* cancelled.
@@ -841,6 +873,14 @@ xfs_mountfs(
}
/*
+ * If we're resuming quota status and recovered the log, re-sample the
+ * qflags from the ondisk superblock now that we've recovered it, just
+ * in case someone shut down enforcement just before a crash.
+ */
+ if (xfs_clear_resuming_quotaon(mp) && xlog_recovery_needed(mp->m_log))
+ xfs_qm_resume_quotaon(mp);
+
+ /*
* If logged xattrs are still enabled after log recovery finishes, then
* they'll be available until unmount. Otherwise, turn them off.
*/
@@ -866,6 +906,12 @@ xfs_mountfs(
mp->m_features |= XFS_FEAT_ATTR2;
}
+ if (xfs_has_metadir(mp)) {
+ error = xfs_mount_setup_metadir(mp);
+ if (error)
+ goto out_free_metadir;
+ }
+
/*
* Get and sanity-check the root inode.
* Save the pointer to it in the mount structure.
@@ -876,7 +922,7 @@ xfs_mountfs(
xfs_warn(mp,
"Failed to read root inode 0x%llx, error %d",
sbp->sb_rootino, -error);
- goto out_log_dealloc;
+ goto out_free_metadir;
}
ASSERT(rip != NULL);
@@ -1018,6 +1064,9 @@ xfs_mountfs(
xfs_irele(rip);
/* Clean out dquots that might be in memory after quotacheck. */
xfs_qm_unmount(mp);
+ out_free_metadir:
+ if (mp->m_metadirip)
+ xfs_irele(mp->m_metadirip);
/*
* Inactivate all inodes that might still be in memory after a log
@@ -1039,7 +1088,6 @@ xfs_mountfs(
* quota inodes.
*/
xfs_unmount_flush_inodes(mp);
- out_log_dealloc:
xfs_log_mount_cancel(mp);
out_inodegc_shrinker:
shrinker_free(mp->m_inodegc_shrinker);
@@ -1047,6 +1095,8 @@ xfs_mountfs(
if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
xfs_buftarg_drain(mp->m_logdev_targp);
xfs_buftarg_drain(mp->m_ddev_targp);
+ out_free_rtgroup:
+ xfs_free_rtgroups(mp, 0, mp->m_sb.sb_rgcount);
out_free_perag:
xfs_free_perag_range(mp, 0, mp->m_sb.sb_agcount);
out_free_dir:
@@ -1091,6 +1141,8 @@ xfs_unmountfs(
xfs_qm_unmount_quotas(mp);
xfs_rtunmount_inodes(mp);
xfs_irele(mp->m_rootip);
+ if (mp->m_metadirip)
+ xfs_irele(mp->m_metadirip);
xfs_unmount_flush_inodes(mp);
@@ -1129,6 +1181,7 @@ xfs_unmountfs(
xfs_errortag_clearall(mp);
#endif
shrinker_free(mp->m_inodegc_shrinker);
+ xfs_free_rtgroups(mp, 0, mp->m_sb.sb_rgcount);
xfs_free_perag_range(mp, 0, mp->m_sb.sb_agcount);
xfs_errortag_del(mp);
xfs_error_sysfs_del(mp);
@@ -1436,7 +1489,7 @@ xfs_mod_delalloc(
if (XFS_IS_REALTIME_INODE(ip)) {
percpu_counter_add_batch(&mp->m_delalloc_rtextents,
- xfs_rtb_to_rtx(mp, data_delta),
+ xfs_blen_to_rtbxlen(mp, data_delta),
XFS_DELALLOC_BATCH);
if (!ind_delta)
return;
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 96496f39f551..db9dade7d22a 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -72,6 +72,40 @@ struct xfs_inodegc {
};
/*
+ * Container for each type of groups, used to look up individual groups and
+ * describes the geometry.
+ */
+struct xfs_groups {
+ struct xarray xa;
+
+ /*
+ * Maximum capacity of the group in FSBs.
+ *
+ * Each group is laid out densely in the daddr space. For the
+ * degenerate case of a pre-rtgroups filesystem, the incore rtgroup
+ * pretends to have a zero-block and zero-blklog rtgroup.
+ */
+ uint32_t blocks;
+
+ /*
+ * Log(2) of the logical size of each group.
+ *
+ * Compared to the blocks field above this is rounded up to the next
+ * power of two, and thus lays out the xfs_fsblock_t/xfs_rtblock_t
+ * space sparsely with a hole from blocks to (1 << blklog) at the end
+ * of each group.
+ */
+ uint8_t blklog;
+
+ /*
+ * Mask to extract the group-relative block number from a FSB.
+ * For a pre-rtgroups filesystem we pretend to have one very large
+ * rtgroup, so this mask must be 64-bit.
+ */
+ uint64_t blkmask;
+};
+
+/*
* The struct xfsmount layout is optimised to separate read-mostly variables
* from variables that are frequently modified. We put the read-mostly variables
* first, then place all the other variables at the end.
@@ -85,27 +119,20 @@ typedef struct xfs_mount {
struct super_block *m_super;
struct xfs_ail *m_ail; /* fs active log item list */
struct xfs_buf *m_sb_bp; /* buffer for superblock */
+ struct xfs_buf *m_rtsb_bp; /* realtime superblock */
char *m_rtname; /* realtime device name */
char *m_logname; /* external log device name */
struct xfs_da_geometry *m_dir_geo; /* directory block geometry */
struct xfs_da_geometry *m_attr_geo; /* attribute block geometry */
struct xlog *m_log; /* log specific stuff */
- struct xfs_inode *m_rbmip; /* pointer to bitmap inode */
- struct xfs_inode *m_rsumip; /* pointer to summary inode */
struct xfs_inode *m_rootip; /* pointer to root directory */
+ struct xfs_inode *m_metadirip; /* ptr to metadata directory */
+ struct xfs_inode *m_rtdirip; /* ptr to realtime metadir */
struct xfs_quotainfo *m_quotainfo; /* disk quota information */
struct xfs_buftarg *m_ddev_targp; /* data device */
struct xfs_buftarg *m_logdev_targp;/* log device */
struct xfs_buftarg *m_rtdev_targp; /* rt device */
void __percpu *m_inodegc; /* percpu inodegc structures */
-
- /*
- * Optional cache of rt summary level per bitmap block with the
- * invariant that m_rsum_cache[bbno] > the maximum i for which
- * rsum[i][bbno] != 0, or 0 if rsum[i][bbno] == 0 for all i.
- * Reads and writes are serialized by the rsumip inode lock.
- */
- uint8_t *m_rsum_cache;
struct xfs_mru_cache *m_filestream; /* per-mount filestream data */
struct workqueue_struct *m_buf_workqueue;
struct workqueue_struct *m_unwritten_workqueue;
@@ -120,9 +147,11 @@ typedef struct xfs_mount {
uint8_t m_agno_log; /* log #ag's */
uint8_t m_sectbb_log; /* sectlog - BBSHIFT */
int8_t m_rtxblklog; /* log2 of rextsize, if possible */
+
uint m_blockmask; /* sb_blocksize-1 */
uint m_blockwsize; /* sb_blocksize in words */
- uint m_blockwmask; /* blockwsize-1 */
+ /* number of rt extents per rt bitmap block if rtgroups enabled */
+ unsigned int m_rtx_per_rbmblock;
uint m_alloc_mxr[2]; /* max alloc btree records */
uint m_alloc_mnr[2]; /* min alloc btree records */
uint m_bmap_dmxr[2]; /* max bmap btree records */
@@ -146,7 +175,7 @@ typedef struct xfs_mount {
uint m_allocsize_blocks; /* min write size blocks */
int m_logbufs; /* number of log buffers */
int m_logbsize; /* size of each log buffer */
- uint m_rsumlevels; /* rt summary levels */
+ unsigned int m_rsumlevels; /* rt summary levels */
xfs_filblks_t m_rsumblocks; /* size of rt summary, FSBs */
int m_fixedfsid[2]; /* unchanged for life of FS */
uint m_qflags; /* quota status flags */
@@ -208,7 +237,7 @@ typedef struct xfs_mount {
*/
atomic64_t m_allocbt_blks;
- struct xarray m_perags; /* per-ag accounting info */
+ struct xfs_groups m_groups[XG_TYPE_MAX];
uint64_t m_resblks; /* total reserved blocks */
uint64_t m_resblks_avail;/* available reserved blocks */
uint64_t m_resblks_save; /* reserved blks @ remount,ro */
@@ -224,6 +253,7 @@ typedef struct xfs_mount {
#endif
xfs_agnumber_t m_agfrotor; /* last ag where space found */
atomic_t m_agirotor; /* last ag dir inode alloced */
+ atomic_t m_rtgrotor; /* last rtgroup rtpicked */
/* Memory shrinker to throttle and reprioritize inodegc */
struct shrinker *m_inodegc_shrinker;
@@ -298,6 +328,7 @@ typedef struct xfs_mount {
#define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */
#define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */
#define XFS_FEAT_EXCHANGE_RANGE (1ULL << 27) /* exchange range */
+#define XFS_FEAT_METADIR (1ULL << 28) /* metadata directory tree */
/* Mount features */
#define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */
@@ -353,6 +384,19 @@ __XFS_HAS_FEAT(bigtime, BIGTIME)
__XFS_HAS_FEAT(needsrepair, NEEDSREPAIR)
__XFS_HAS_FEAT(large_extent_counts, NREXT64)
__XFS_HAS_FEAT(exchange_range, EXCHANGE_RANGE)
+__XFS_HAS_FEAT(metadir, METADIR)
+
+static inline bool xfs_has_rtgroups(struct xfs_mount *mp)
+{
+ /* all metadir file systems also allow rtgroups */
+ return xfs_has_metadir(mp);
+}
+
+static inline bool xfs_has_rtsb(struct xfs_mount *mp)
+{
+ /* all rtgroups filesystems with an rt section have an rtsb */
+ return xfs_has_rtgroups(mp) && xfs_has_realtime(mp);
+}
/*
* Some features are always on for v5 file systems, allow the compiler to
@@ -433,18 +477,30 @@ __XFS_HAS_FEAT(nouuid, NOUUID)
*/
#define XFS_OPSTATE_BLOCKGC_ENABLED 6
+/* Kernel has logged a warning about pNFS being used on this fs. */
+#define XFS_OPSTATE_WARNED_PNFS 7
/* Kernel has logged a warning about online fsck being used on this fs. */
-#define XFS_OPSTATE_WARNED_SCRUB 7
+#define XFS_OPSTATE_WARNED_SCRUB 8
/* Kernel has logged a warning about shrink being used on this fs. */
-#define XFS_OPSTATE_WARNED_SHRINK 8
+#define XFS_OPSTATE_WARNED_SHRINK 9
/* Kernel has logged a warning about logged xattr updates being used. */
-#define XFS_OPSTATE_WARNED_LARP 9
+#define XFS_OPSTATE_WARNED_LARP 10
/* Mount time quotacheck is running */
-#define XFS_OPSTATE_QUOTACHECK_RUNNING 10
+#define XFS_OPSTATE_QUOTACHECK_RUNNING 11
/* Do we want to clear log incompat flags? */
-#define XFS_OPSTATE_UNSET_LOG_INCOMPAT 11
+#define XFS_OPSTATE_UNSET_LOG_INCOMPAT 12
/* Filesystem can use logged extended attributes */
-#define XFS_OPSTATE_USE_LARP 12
+#define XFS_OPSTATE_USE_LARP 13
+/* Kernel has logged a warning about blocksize > pagesize on this fs. */
+#define XFS_OPSTATE_WARNED_LBS 14
+/* Kernel has logged a warning about exchange-range being used on this fs. */
+#define XFS_OPSTATE_WARNED_EXCHRANGE 15
+/* Kernel has logged a warning about parent pointers being used on this fs. */
+#define XFS_OPSTATE_WARNED_PPTR 16
+/* Kernel has logged a warning about metadata dirs being used on this fs. */
+#define XFS_OPSTATE_WARNED_METADIR 17
+/* Filesystem should use qflags to determine quotaon status */
+#define XFS_OPSTATE_RESUMING_QUOTAON 18
#define __XFS_IS_OPSTATE(name, NAME) \
static inline bool xfs_is_ ## name (struct xfs_mount *mp) \
@@ -469,9 +525,24 @@ __XFS_IS_OPSTATE(inodegc_enabled, INODEGC_ENABLED)
__XFS_IS_OPSTATE(blockgc_enabled, BLOCKGC_ENABLED)
#ifdef CONFIG_XFS_QUOTA
__XFS_IS_OPSTATE(quotacheck_running, QUOTACHECK_RUNNING)
+__XFS_IS_OPSTATE(resuming_quotaon, RESUMING_QUOTAON)
#else
-# define xfs_is_quotacheck_running(mp) (false)
-#endif
+static inline bool xfs_is_quotacheck_running(struct xfs_mount *mp)
+{
+ return false;
+}
+static inline bool xfs_is_resuming_quotaon(struct xfs_mount *mp)
+{
+ return false;
+}
+static inline void xfs_set_resuming_quotaon(struct xfs_mount *m)
+{
+}
+static inline bool xfs_clear_resuming_quotaon(struct xfs_mount *mp)
+{
+ return false;
+}
+#endif /* CONFIG_XFS_QUOTA */
__XFS_IS_OPSTATE(done_with_log_incompat, UNSET_LOG_INCOMPAT)
__XFS_IS_OPSTATE(using_logged_xattrs, USE_LARP)
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 23d16186e1a3..6f4479deac6d 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -58,8 +58,7 @@ xfs_fs_get_uuid(
{
struct xfs_mount *mp = XFS_M(sb);
- xfs_notice_once(mp,
-"Using experimental pNFS feature, use at your own risk!");
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PNFS);
if (*len < sizeof(uuid_t))
return -EINVAL;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 7e2307921deb..b928b036990b 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -27,6 +27,9 @@
#include "xfs_ialloc.h"
#include "xfs_log_priv.h"
#include "xfs_health.h"
+#include "xfs_da_format.h"
+#include "xfs_metafile.h"
+#include "xfs_rtgroup.h"
/*
* The global quota manager. There is only one of these for the entire
@@ -37,7 +40,6 @@
STATIC int xfs_qm_init_quotainos(struct xfs_mount *mp);
STATIC int xfs_qm_init_quotainfo(struct xfs_mount *mp);
-STATIC void xfs_qm_destroy_quotainos(struct xfs_quotainfo *qi);
STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp);
/*
* We use the batch lookup interface to iterate over the dquots as it
@@ -208,6 +210,39 @@ xfs_qm_unmount(
}
}
+static void
+xfs_qm_unmount_rt(
+ struct xfs_mount *mp)
+{
+ struct xfs_rtgroup *rtg = xfs_rtgroup_grab(mp, 0);
+
+ if (!rtg)
+ return;
+ if (rtg->rtg_inodes[XFS_RTGI_BITMAP])
+ xfs_qm_dqdetach(rtg->rtg_inodes[XFS_RTGI_BITMAP]);
+ if (rtg->rtg_inodes[XFS_RTGI_SUMMARY])
+ xfs_qm_dqdetach(rtg->rtg_inodes[XFS_RTGI_SUMMARY]);
+ xfs_rtgroup_rele(rtg);
+}
+
+STATIC void
+xfs_qm_destroy_quotainos(
+ struct xfs_quotainfo *qi)
+{
+ if (qi->qi_uquotaip) {
+ xfs_irele(qi->qi_uquotaip);
+ qi->qi_uquotaip = NULL; /* paranoia */
+ }
+ if (qi->qi_gquotaip) {
+ xfs_irele(qi->qi_gquotaip);
+ qi->qi_gquotaip = NULL;
+ }
+ if (qi->qi_pquotaip) {
+ xfs_irele(qi->qi_pquotaip);
+ qi->qi_pquotaip = NULL;
+ }
+}
+
/*
* Called from the vfsops layer.
*/
@@ -221,28 +256,19 @@ xfs_qm_unmount_quotas(
*/
ASSERT(mp->m_rootip);
xfs_qm_dqdetach(mp->m_rootip);
- if (mp->m_rbmip)
- xfs_qm_dqdetach(mp->m_rbmip);
- if (mp->m_rsumip)
- xfs_qm_dqdetach(mp->m_rsumip);
+
+ /*
+ * For pre-RTG file systems, the RT inodes have quotas attached,
+ * detach them now.
+ */
+ if (!xfs_has_rtgroups(mp))
+ xfs_qm_unmount_rt(mp);
/*
* Release the quota inodes.
*/
- if (mp->m_quotainfo) {
- if (mp->m_quotainfo->qi_uquotaip) {
- xfs_irele(mp->m_quotainfo->qi_uquotaip);
- mp->m_quotainfo->qi_uquotaip = NULL;
- }
- if (mp->m_quotainfo->qi_gquotaip) {
- xfs_irele(mp->m_quotainfo->qi_gquotaip);
- mp->m_quotainfo->qi_gquotaip = NULL;
- }
- if (mp->m_quotainfo->qi_pquotaip) {
- xfs_irele(mp->m_quotainfo->qi_pquotaip);
- mp->m_quotainfo->qi_pquotaip = NULL;
- }
- }
+ if (mp->m_quotainfo)
+ xfs_qm_destroy_quotainos(mp->m_quotainfo);
}
STATIC int
@@ -302,6 +328,8 @@ xfs_qm_need_dqattach(
return false;
if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
return false;
+ if (xfs_is_metadir_inode(ip))
+ return false;
return true;
}
@@ -324,6 +352,7 @@ xfs_qm_dqattach_locked(
return 0;
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ ASSERT(!xfs_is_metadir_inode(ip));
if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER,
@@ -616,6 +645,157 @@ xfs_qm_init_timelimits(
xfs_qm_dqdestroy(dqp);
}
+static int
+xfs_qm_load_metadir_qinos(
+ struct xfs_mount *mp,
+ struct xfs_quotainfo *qi,
+ struct xfs_inode **dpp)
+{
+ struct xfs_trans *tp;
+ int error;
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+
+ error = xfs_dqinode_load_parent(tp, dpp);
+ if (error == -ENOENT) {
+ /* no quota dir directory, but we'll create one later */
+ error = 0;
+ goto out_trans;
+ }
+ if (error)
+ goto out_trans;
+
+ if (XFS_IS_UQUOTA_ON(mp)) {
+ error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_USER,
+ &qi->qi_uquotaip);
+ if (error && error != -ENOENT)
+ goto out_trans;
+ }
+
+ if (XFS_IS_GQUOTA_ON(mp)) {
+ error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_GROUP,
+ &qi->qi_gquotaip);
+ if (error && error != -ENOENT)
+ goto out_trans;
+ }
+
+ if (XFS_IS_PQUOTA_ON(mp)) {
+ error = xfs_dqinode_load(tp, *dpp, XFS_DQTYPE_PROJ,
+ &qi->qi_pquotaip);
+ if (error && error != -ENOENT)
+ goto out_trans;
+ }
+
+ error = 0;
+out_trans:
+ xfs_trans_cancel(tp);
+ return error;
+}
+
+/* Create quota inodes in the metadata directory tree. */
+STATIC int
+xfs_qm_create_metadir_qinos(
+ struct xfs_mount *mp,
+ struct xfs_quotainfo *qi,
+ struct xfs_inode **dpp)
+{
+ int error;
+
+ if (!*dpp) {
+ error = xfs_dqinode_mkdir_parent(mp, dpp);
+ if (error && error != -EEXIST)
+ return error;
+ }
+
+ if (XFS_IS_UQUOTA_ON(mp) && !qi->qi_uquotaip) {
+ error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_USER,
+ &qi->qi_uquotaip);
+ if (error)
+ return error;
+ }
+
+ if (XFS_IS_GQUOTA_ON(mp) && !qi->qi_gquotaip) {
+ error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_GROUP,
+ &qi->qi_gquotaip);
+ if (error)
+ return error;
+ }
+
+ if (XFS_IS_PQUOTA_ON(mp) && !qi->qi_pquotaip) {
+ error = xfs_dqinode_metadir_create(*dpp, XFS_DQTYPE_PROJ,
+ &qi->qi_pquotaip);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+/*
+ * Add QUOTABIT to sb_versionnum and initialize qflags in preparation for
+ * creating quota files on a metadir filesystem.
+ */
+STATIC int
+xfs_qm_prep_metadir_sb(
+ struct xfs_mount *mp)
+{
+ struct xfs_trans *tp;
+ int error;
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_sb, 0, 0, 0, &tp);
+ if (error)
+ return error;
+
+ spin_lock(&mp->m_sb_lock);
+
+ xfs_add_quota(mp);
+
+ /* qflags will get updated fully _after_ quotacheck */
+ mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT;
+
+ spin_unlock(&mp->m_sb_lock);
+ xfs_log_sb(tp);
+
+ return xfs_trans_commit(tp);
+}
+
+/*
+ * Load existing quota inodes or create them. Since this is a V5 filesystem,
+ * we don't have to deal with the grp/prjquota switcheroo thing from V4.
+ */
+STATIC int
+xfs_qm_init_metadir_qinos(
+ struct xfs_mount *mp)
+{
+ struct xfs_quotainfo *qi = mp->m_quotainfo;
+ struct xfs_inode *dp = NULL;
+ int error;
+
+ if (!xfs_has_quota(mp)) {
+ error = xfs_qm_prep_metadir_sb(mp);
+ if (error)
+ return error;
+ }
+
+ error = xfs_qm_load_metadir_qinos(mp, qi, &dp);
+ if (error)
+ goto out_err;
+
+ error = xfs_qm_create_metadir_qinos(mp, qi, &dp);
+ if (error)
+ goto out_err;
+
+ xfs_irele(dp);
+ return 0;
+out_err:
+ xfs_qm_destroy_quotainos(mp->m_quotainfo);
+ if (dp)
+ xfs_irele(dp);
+ return error;
+}
+
/*
* This initializes all the quota information that's kept in the
* mount structure
@@ -640,7 +820,10 @@ xfs_qm_init_quotainfo(
* See if quotainodes are setup, and if not, allocate them,
* and change the superblock accordingly.
*/
- error = xfs_qm_init_quotainos(mp);
+ if (xfs_has_metadir(mp))
+ error = xfs_qm_init_metadir_qinos(mp);
+ else
+ error = xfs_qm_init_quotainos(mp);
if (error)
goto out_free_lru;
@@ -733,6 +916,17 @@ xfs_qm_destroy_quotainfo(
mp->m_quotainfo = NULL;
}
+static inline enum xfs_metafile_type
+xfs_qm_metafile_type(
+ unsigned int flags)
+{
+ if (flags & XFS_QMOPT_UQUOTA)
+ return XFS_METAFILE_USRQUOTA;
+ else if (flags & XFS_QMOPT_GQUOTA)
+ return XFS_METAFILE_GRPQUOTA;
+ return XFS_METAFILE_PRJQUOTA;
+}
+
/*
* Create an inode and return with a reference already taken, but unlocked
* This is how we create quota inodes
@@ -744,6 +938,7 @@ xfs_qm_qino_alloc(
unsigned int flags)
{
struct xfs_trans *tp;
+ enum xfs_metafile_type metafile_type = xfs_qm_metafile_type(flags);
int error;
bool need_alloc = true;
@@ -777,9 +972,10 @@ xfs_qm_qino_alloc(
}
}
if (ino != NULLFSINO) {
- error = xfs_iget(mp, NULL, ino, 0, 0, ipp);
+ error = xfs_metafile_iget(mp, ino, metafile_type, ipp);
if (error)
return error;
+
mp->m_sb.sb_gquotino = NULLFSINO;
mp->m_sb.sb_pquotino = NULLFSINO;
need_alloc = false;
@@ -806,6 +1002,8 @@ xfs_qm_qino_alloc(
xfs_trans_cancel(tp);
return error;
}
+ if (xfs_has_metadir(mp))
+ xfs_metafile_set_iflag(tp, *ipp, metafile_type);
}
/*
@@ -1153,8 +1351,8 @@ xfs_qm_dqusage_adjust(
void *data)
{
struct xfs_inode *ip;
- xfs_qcnt_t nblks;
- xfs_filblks_t rtblks = 0; /* total rt blks */
+ xfs_filblks_t nblks, rtblks;
+ unsigned int lock_mode;
int error;
ASSERT(XFS_IS_QUOTA_ON(mp));
@@ -1189,20 +1387,23 @@ xfs_qm_dqusage_adjust(
}
}
+ /* Metadata directory files are not accounted to user-visible quotas. */
+ if (xfs_is_metadir_inode(ip))
+ goto error0;
+
ASSERT(ip->i_delayed_blks == 0);
+ lock_mode = xfs_ilock_data_map_shared(ip);
if (XFS_IS_REALTIME_INODE(ip)) {
- struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK);
-
error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
- if (error)
+ if (error) {
+ xfs_iunlock(ip, lock_mode);
goto error0;
-
- xfs_bmap_count_leaves(ifp, &rtblks);
+ }
}
-
- nblks = (xfs_qcnt_t)ip->i_nblocks - rtblks;
+ xfs_inode_count_blocks(tp, ip, &nblks, &rtblks);
xfs_iflags_clear(ip, XFS_IQUOTAUNCHECKED);
+ xfs_iunlock(ip, lock_mode);
/*
* Add the (disk blocks and inode) resources occupied by this
@@ -1462,10 +1663,11 @@ xfs_qm_mount_quotas(
uint sbf;
/*
- * If quotas on realtime volumes is not supported, we disable
- * quotas immediately.
+ * If quotas on realtime volumes is not supported, disable quotas
+ * immediately. We only support rtquota if rtgroups are enabled to
+ * avoid problems with older kernels.
*/
- if (mp->m_sb.sb_rextents) {
+ if (mp->m_sb.sb_rextents && !xfs_has_rtgroups(mp)) {
xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
mp->m_qflags = 0;
goto write_changes;
@@ -1533,7 +1735,7 @@ xfs_qm_mount_quotas(
}
if (error) {
- xfs_warn(mp, "Failed to initialize disk quotas.");
+ xfs_warn(mp, "Failed to initialize disk quotas, err %d.", error);
return;
}
}
@@ -1552,27 +1754,26 @@ xfs_qm_qino_load(
xfs_dqtype_t type,
struct xfs_inode **ipp)
{
- xfs_ino_t ino = NULLFSINO;
-
- switch (type) {
- case XFS_DQTYPE_USER:
- ino = mp->m_sb.sb_uquotino;
- break;
- case XFS_DQTYPE_GROUP:
- ino = mp->m_sb.sb_gquotino;
- break;
- case XFS_DQTYPE_PROJ:
- ino = mp->m_sb.sb_pquotino;
- break;
- default:
- ASSERT(0);
- return -EFSCORRUPTED;
- }
-
- if (ino == NULLFSINO)
- return -ENOENT;
-
- return xfs_iget(mp, NULL, ino, 0, 0, ipp);
+ struct xfs_trans *tp;
+ struct xfs_inode *dp = NULL;
+ int error;
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+
+ if (xfs_has_metadir(mp)) {
+ error = xfs_dqinode_load_parent(tp, &dp);
+ if (error)
+ goto out_cancel;
+ }
+
+ error = xfs_dqinode_load(tp, dp, type, ipp);
+ if (dp)
+ xfs_irele(dp);
+out_cancel:
+ xfs_trans_cancel(tp);
+ return error;
}
/*
@@ -1666,24 +1867,6 @@ error_rele:
}
STATIC void
-xfs_qm_destroy_quotainos(
- struct xfs_quotainfo *qi)
-{
- if (qi->qi_uquotaip) {
- xfs_irele(qi->qi_uquotaip);
- qi->qi_uquotaip = NULL; /* paranoia */
- }
- if (qi->qi_gquotaip) {
- xfs_irele(qi->qi_gquotaip);
- qi->qi_gquotaip = NULL;
- }
- if (qi->qi_pquotaip) {
- xfs_irele(qi->qi_pquotaip);
- qi->qi_pquotaip = NULL;
- }
-}
-
-STATIC void
xfs_qm_dqfree_one(
struct xfs_dquot *dqp)
{
@@ -1735,6 +1918,8 @@ xfs_qm_vop_dqalloc(
if (!XFS_IS_QUOTA_ON(mp))
return 0;
+ ASSERT(!xfs_is_metadir_inode(ip));
+
lockflags = XFS_ILOCK_EXCL;
xfs_ilock(ip, lockflags);
@@ -1858,23 +2043,29 @@ xfs_qm_vop_chown(
struct xfs_dquot *newdq)
{
struct xfs_dquot *prevdq;
- uint bfield = XFS_IS_REALTIME_INODE(ip) ?
- XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
-
+ xfs_filblks_t dblocks, rblocks;
+ bool isrt = XFS_IS_REALTIME_INODE(ip);
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(XFS_IS_QUOTA_ON(ip->i_mount));
+ ASSERT(!xfs_is_metadir_inode(ip));
/* old dquot */
prevdq = *IO_olddq;
ASSERT(prevdq);
ASSERT(prevdq != newdq);
- xfs_trans_mod_ino_dquot(tp, ip, prevdq, bfield, -(ip->i_nblocks));
+ xfs_inode_count_blocks(tp, ip, &dblocks, &rblocks);
+
+ xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_BCOUNT,
+ -(xfs_qcnt_t)dblocks);
+ xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_RTBCOUNT,
+ -(xfs_qcnt_t)rblocks);
xfs_trans_mod_ino_dquot(tp, ip, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
/* the sparkling new dquot */
- xfs_trans_mod_ino_dquot(tp, ip, newdq, bfield, ip->i_nblocks);
+ xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_BCOUNT, dblocks);
+ xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_RTBCOUNT, rblocks);
xfs_trans_mod_ino_dquot(tp, ip, newdq, XFS_TRANS_DQ_ICOUNT, 1);
/*
@@ -1884,7 +2075,8 @@ xfs_qm_vop_chown(
* (having already bumped up the real counter) so that we don't have
* any reservation to give back when we commit.
*/
- xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_RES_BLKS,
+ xfs_trans_mod_dquot(tp, newdq,
+ isrt ? XFS_TRANS_DQ_RES_RTBLKS : XFS_TRANS_DQ_RES_BLKS,
-ip->i_delayed_blks);
/*
@@ -1896,8 +2088,13 @@ xfs_qm_vop_chown(
*/
tp->t_flags |= XFS_TRANS_DIRTY;
xfs_dqlock(prevdq);
- ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
- prevdq->q_blk.reserved -= ip->i_delayed_blks;
+ if (isrt) {
+ ASSERT(prevdq->q_rtb.reserved >= ip->i_delayed_blks);
+ prevdq->q_rtb.reserved -= ip->i_delayed_blks;
+ } else {
+ ASSERT(prevdq->q_blk.reserved >= ip->i_delayed_blks);
+ prevdq->q_blk.reserved -= ip->i_delayed_blks;
+ }
xfs_dqunlock(prevdq);
/*
@@ -1951,6 +2148,7 @@ xfs_qm_vop_create_dqattach(
return;
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ ASSERT(!xfs_is_metadir_inode(ip));
if (udqp && XFS_IS_UQUOTA_ON(mp)) {
ASSERT(ip->i_udquot == NULL);
@@ -1981,6 +2179,8 @@ xfs_inode_near_dquot_enforcement(
xfs_dqtype_t type)
{
struct xfs_dquot *dqp;
+ struct xfs_dquot_res *res;
+ struct xfs_dquot_pre *pre;
int64_t freesp;
/* We only care for quotas that are enabled and enforced. */
@@ -1989,21 +2189,30 @@ xfs_inode_near_dquot_enforcement(
return false;
if (xfs_dquot_res_over_limits(&dqp->q_ino) ||
+ xfs_dquot_res_over_limits(&dqp->q_blk) ||
xfs_dquot_res_over_limits(&dqp->q_rtb))
return true;
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ res = &dqp->q_rtb;
+ pre = &dqp->q_rtb_prealloc;
+ } else {
+ res = &dqp->q_blk;
+ pre = &dqp->q_blk_prealloc;
+ }
+
/* For space on the data device, check the various thresholds. */
- if (!dqp->q_prealloc_hi_wmark)
+ if (!pre->q_prealloc_hi_wmark)
return false;
- if (dqp->q_blk.reserved < dqp->q_prealloc_lo_wmark)
+ if (res->reserved < pre->q_prealloc_lo_wmark)
return false;
- if (dqp->q_blk.reserved >= dqp->q_prealloc_hi_wmark)
+ if (res->reserved >= pre->q_prealloc_hi_wmark)
return true;
- freesp = dqp->q_prealloc_hi_wmark - dqp->q_blk.reserved;
- if (freesp < dqp->q_low_space[XFS_QLOWSP_5_PCNT])
+ freesp = pre->q_prealloc_hi_wmark - res->reserved;
+ if (freesp < pre->q_low_space[XFS_QLOWSP_5_PCNT])
return true;
return false;
diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c
index a11436579877..847ba29630e9 100644
--- a/fs/xfs/xfs_qm_bhv.c
+++ b/fs/xfs/xfs_qm_bhv.c
@@ -19,18 +19,24 @@
STATIC void
xfs_fill_statvfs_from_dquot(
struct kstatfs *statp,
+ struct xfs_inode *ip,
struct xfs_dquot *dqp)
{
+ struct xfs_dquot_res *blkres = &dqp->q_blk;
uint64_t limit;
- limit = dqp->q_blk.softlimit ?
- dqp->q_blk.softlimit :
- dqp->q_blk.hardlimit;
+ if (XFS_IS_REALTIME_MOUNT(ip->i_mount) &&
+ (ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME)))
+ blkres = &dqp->q_rtb;
+
+ limit = blkres->softlimit ?
+ blkres->softlimit :
+ blkres->hardlimit;
if (limit && statp->f_blocks > limit) {
statp->f_blocks = limit;
statp->f_bfree = statp->f_bavail =
- (statp->f_blocks > dqp->q_blk.reserved) ?
- (statp->f_blocks - dqp->q_blk.reserved) : 0;
+ (statp->f_blocks > blkres->reserved) ?
+ (statp->f_blocks - blkres->reserved) : 0;
}
limit = dqp->q_ino.softlimit ?
@@ -61,7 +67,7 @@ xfs_qm_statvfs(
struct xfs_dquot *dqp;
if (!xfs_qm_dqget(mp, ip->i_projid, XFS_DQTYPE_PROJ, false, &dqp)) {
- xfs_fill_statvfs_from_dquot(statp, dqp);
+ xfs_fill_statvfs_from_dquot(statp, ip, dqp);
xfs_qm_dqput(dqp);
}
}
@@ -135,3 +141,21 @@ xfs_qm_newmount(
return 0;
}
+
+/*
+ * If the sysadmin didn't provide any quota mount options, restore the quota
+ * accounting and enforcement state from the ondisk superblock. Only do this
+ * for metadir filesystems because this is a behavior change.
+ */
+void
+xfs_qm_resume_quotaon(
+ struct xfs_mount *mp)
+{
+ if (!xfs_has_metadir(mp))
+ return;
+ if (xfs_has_norecovery(mp))
+ return;
+
+ mp->m_qflags = mp->m_sb.sb_qflags & (XFS_ALL_QUOTA_ACCT |
+ XFS_ALL_QUOTA_ENFD);
+}
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 23d71a55bbc0..fa1317cc396c 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -29,6 +29,11 @@ struct xfs_buf;
(XFS_IS_GQUOTA_ON(mp) && (ip)->i_gdquot == NULL) || \
(XFS_IS_PQUOTA_ON(mp) && (ip)->i_pdquot == NULL))
+#define XFS_IS_DQDETACHED(ip) \
+ ((ip)->i_udquot == NULL && \
+ (ip)->i_gdquot == NULL && \
+ (ip)->i_pdquot == NULL)
+
#define XFS_QM_NEED_QUOTACHECK(mp) \
((XFS_IS_UQUOTA_ON(mp) && \
(mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \
@@ -120,10 +125,12 @@ extern void xfs_qm_dqdetach(struct xfs_inode *);
extern void xfs_qm_dqrele(struct xfs_dquot *);
extern void xfs_qm_statvfs(struct xfs_inode *, struct kstatfs *);
extern int xfs_qm_newmount(struct xfs_mount *, uint *, uint *);
+void xfs_qm_resume_quotaon(struct xfs_mount *mp);
extern void xfs_qm_mount_quotas(struct xfs_mount *);
extern void xfs_qm_unmount(struct xfs_mount *);
extern void xfs_qm_unmount_quotas(struct xfs_mount *);
bool xfs_inode_near_dquot_enforcement(struct xfs_inode *ip, xfs_dqtype_t type);
+int xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks);
# ifdef CONFIG_XFS_LIVE_HOOKS
void xfs_trans_mod_ino_dquot(struct xfs_trans *tp, struct xfs_inode *ip,
@@ -197,11 +204,17 @@ xfs_trans_reserve_quota_icreate(struct xfs_trans *tp, struct xfs_dquot *udqp,
#define xfs_qm_dqrele(d) do { (d) = (d); } while(0)
#define xfs_qm_statvfs(ip, s) do { } while(0)
#define xfs_qm_newmount(mp, a, b) (0)
+#define xfs_qm_resume_quotaon(mp) ((void)0)
#define xfs_qm_mount_quotas(mp)
#define xfs_qm_unmount(mp)
#define xfs_qm_unmount_quotas(mp)
#define xfs_inode_near_dquot_enforcement(ip, type) (false)
+static inline int xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks)
+{
+ return 0;
+}
+
# ifdef CONFIG_XFS_LIVE_HOOKS
# define xfs_dqtrx_hook_enable() ((void)0)
# define xfs_dqtrx_hook_disable() ((void)0)
@@ -209,12 +222,6 @@ xfs_trans_reserve_quota_icreate(struct xfs_trans *tp, struct xfs_dquot *udqp,
#endif /* CONFIG_XFS_QUOTA */
-static inline int
-xfs_quota_reserve_blkres(struct xfs_inode *ip, int64_t blocks)
-{
- return xfs_trans_reserve_quota_nblks(NULL, ip, blocks, 0, false);
-}
-
static inline void
xfs_quota_unreserve_blkres(struct xfs_inode *ip, uint64_t blocks)
{
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 27398512b179..bede1c96c330 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -244,7 +244,7 @@ xfs_refcount_update_diff_items(
struct xfs_refcount_intent *ra = ci_entry(a);
struct xfs_refcount_intent *rb = ci_entry(b);
- return ra->ri_pag->pag_agno - rb->ri_pag->pag_agno;
+ return ra->ri_group->xg_gno - rb->ri_group->xg_gno;
}
/* Log refcount updates in the intent item. */
@@ -330,7 +330,7 @@ xfs_refcount_defer_add(
trace_xfs_refcount_defer(mp, ri);
- ri->ri_pag = xfs_perag_intent_get(mp, ri->ri_startblock);
+ ri->ri_group = xfs_group_intent_get(mp, ri->ri_startblock, XG_TYPE_AG);
xfs_defer_add(tp, &ri->ri_list, &xfs_refcount_update_defer_type);
}
@@ -341,7 +341,7 @@ xfs_refcount_update_cancel_item(
{
struct xfs_refcount_intent *ri = ci_entry(item);
- xfs_perag_intent_put(ri->ri_pag);
+ xfs_group_intent_put(ri->ri_group);
kmem_cache_free(xfs_refcount_intent_cache, ri);
}
@@ -431,7 +431,8 @@ xfs_cui_recover_work(
ri->ri_type = pmap->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK;
ri->ri_startblock = pmap->pe_startblock;
ri->ri_blockcount = pmap->pe_len;
- ri->ri_pag = xfs_perag_intent_get(mp, pmap->pe_startblock);
+ ri->ri_group = xfs_group_intent_get(mp, pmap->pe_startblock,
+ XG_TYPE_AG);
xfs_defer_add_item(dfp, &ri->ri_list);
}
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 5bf6682e701b..b11769c009ef 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -144,7 +144,7 @@ xfs_reflink_find_shared(
if (error)
return error;
- cur = xfs_refcountbt_init_cursor(pag->pag_mount, tp, agbp, pag);
+ cur = xfs_refcountbt_init_cursor(pag_mount(pag), tp, agbp, pag);
error = xfs_refcount_find_shared(cur, agbno, aglen, fbno, flen,
find_end_of_shared);
@@ -894,14 +894,13 @@ int
xfs_reflink_recover_cow(
struct xfs_mount *mp)
{
- struct xfs_perag *pag;
- xfs_agnumber_t agno;
+ struct xfs_perag *pag = NULL;
int error = 0;
if (!xfs_has_reflink(mp))
return 0;
- for_each_perag(mp, agno, pag) {
+ while ((pag = xfs_perag_next(mp, pag))) {
error = xfs_refcount_recover_cow_leftovers(mp, pag);
if (error) {
xfs_perag_rele(pag);
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 88b5580e1e19..76b3c0ed3b4f 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -243,7 +243,7 @@ xfs_rmap_update_diff_items(
struct xfs_rmap_intent *ra = ri_entry(a);
struct xfs_rmap_intent *rb = ri_entry(b);
- return ra->ri_pag->pag_agno - rb->ri_pag->pag_agno;
+ return ra->ri_group->xg_gno - rb->ri_group->xg_gno;
}
/* Log rmap updates in the intent item. */
@@ -353,7 +353,8 @@ xfs_rmap_defer_add(
trace_xfs_rmap_defer(mp, ri);
- ri->ri_pag = xfs_perag_intent_get(mp, ri->ri_bmap.br_startblock);
+ ri->ri_group = xfs_group_intent_get(mp, ri->ri_bmap.br_startblock,
+ XG_TYPE_AG);
xfs_defer_add(tp, &ri->ri_list, &xfs_rmap_update_defer_type);
}
@@ -364,7 +365,7 @@ xfs_rmap_update_cancel_item(
{
struct xfs_rmap_intent *ri = ri_entry(item);
- xfs_perag_intent_put(ri->ri_pag);
+ xfs_group_intent_put(ri->ri_group);
kmem_cache_free(xfs_rmap_intent_cache, ri);
}
@@ -494,7 +495,7 @@ xfs_rui_recover_work(
ri->ri_bmap.br_blockcount = map->me_len;
ri->ri_bmap.br_state = (map->me_flags & XFS_RMAP_EXTENT_UNWRITTEN) ?
XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
- ri->ri_pag = xfs_perag_intent_get(mp, map->me_startblock);
+ ri->ri_group = xfs_group_intent_get(mp, map->me_startblock, XG_TYPE_AG);
xfs_defer_add_item(dfp, &ri->ri_list);
}
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 3a2005a1e673..0cb534d71119 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -25,6 +25,11 @@
#include "xfs_quota.h"
#include "xfs_log_priv.h"
#include "xfs_health.h"
+#include "xfs_da_format.h"
+#include "xfs_metafile.h"
+#include "xfs_rtgroup.h"
+#include "xfs_error.h"
+#include "xfs_trace.h"
/*
* Return whether there are any free extents in the size range given
@@ -38,14 +43,14 @@ xfs_rtany_summary(
xfs_fileoff_t bbno, /* bitmap block number */
int *maxlog) /* out: max log2 extent size free */
{
- struct xfs_mount *mp = args->mp;
+ uint8_t *rsum_cache = args->rtg->rtg_rsum_cache;
int error;
int log; /* loop counter, log2 of ext. size */
xfs_suminfo_t sum; /* summary data */
- /* There are no extents at levels >= m_rsum_cache[bbno]. */
- if (mp->m_rsum_cache) {
- high = min(high, mp->m_rsum_cache[bbno] - 1);
+ /* There are no extents at levels >= rsum_cache[bbno]. */
+ if (rsum_cache) {
+ high = min(high, rsum_cache[bbno] - 1);
if (low > high) {
*maxlog = -1;
return 0;
@@ -77,12 +82,11 @@ xfs_rtany_summary(
*maxlog = -1;
out:
/* There were no extents at levels > log. */
- if (mp->m_rsum_cache && log + 1 < mp->m_rsum_cache[bbno])
- mp->m_rsum_cache[bbno] = log + 1;
+ if (rsum_cache && log + 1 < rsum_cache[bbno])
+ rsum_cache[bbno] = log + 1;
return 0;
}
-
/*
* Copy and transform the summary file, given the old and new
* parameters in the mount structures.
@@ -149,7 +153,7 @@ xfs_rtallocate_range(
/*
* Find the next allocated block (end of free extent).
*/
- error = xfs_rtfind_forw(args, end, mp->m_sb.sb_rextents - 1,
+ error = xfs_rtfind_forw(args, end, args->rtg->rtg_extents - 1,
&postblock);
if (error)
return error;
@@ -211,14 +215,14 @@ xfs_rtalloc_align_len(
*/
static inline xfs_rtxlen_t
xfs_rtallocate_clamp_len(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
xfs_rtxnum_t startrtx,
xfs_rtxlen_t rtxlen,
xfs_rtxlen_t prod)
{
xfs_rtxlen_t ret;
- ret = min(mp->m_sb.sb_rextents, startrtx + rtxlen) - startrtx;
+ ret = min(rtg->rtg_extents, startrtx + rtxlen) - startrtx;
return xfs_rtalloc_align_len(ret, prod);
}
@@ -253,10 +257,11 @@ xfs_rtallocate_extent_block(
* Loop over all the extents starting in this bitmap block up to the
* end of the rt volume, looking for one that's long enough.
*/
- end = min(mp->m_sb.sb_rextents, xfs_rbmblock_to_rtx(mp, bbno + 1)) - 1;
+ end = min(args->rtg->rtg_extents, xfs_rbmblock_to_rtx(mp, bbno + 1)) -
+ 1;
for (i = xfs_rbmblock_to_rtx(mp, bbno); i <= end; i++) {
/* Make sure we don't scan off the end of the rt volume. */
- scanlen = xfs_rtallocate_clamp_len(mp, i, maxlen, prod);
+ scanlen = xfs_rtallocate_clamp_len(args->rtg, i, maxlen, prod);
if (scanlen < minlen)
break;
@@ -341,7 +346,6 @@ xfs_rtallocate_extent_exact(
xfs_rtxlen_t prod, /* extent product factor */
xfs_rtxnum_t *rtx) /* out: start rtext allocated */
{
- struct xfs_mount *mp = args->mp;
xfs_rtxnum_t next; /* next rtext to try (dummy) */
xfs_rtxlen_t alloclen; /* candidate length */
xfs_rtxlen_t scanlen; /* number of free rtx to look for */
@@ -352,7 +356,7 @@ xfs_rtallocate_extent_exact(
ASSERT(maxlen % prod == 0);
/* Make sure we don't run off the end of the rt volume. */
- scanlen = xfs_rtallocate_clamp_len(mp, start, maxlen, prod);
+ scanlen = xfs_rtallocate_clamp_len(args->rtg, start, maxlen, prod);
if (scanlen < minlen)
return -ENOSPC;
@@ -413,11 +417,10 @@ xfs_rtallocate_extent_near(
ASSERT(maxlen % prod == 0);
/*
- * If the block number given is off the end, silently set it to
- * the last block.
+ * If the block number given is off the end, silently set it to the last
+ * block.
*/
- if (start >= mp->m_sb.sb_rextents)
- start = mp->m_sb.sb_rextents - 1;
+ start = min(start, args->rtg->rtg_extents - 1);
/*
* Try the exact allocation first.
@@ -649,19 +652,30 @@ xfs_rtallocate_extent_size(
return -ENOSPC;
}
+static void
+xfs_rtunmount_rtg(
+ struct xfs_rtgroup *rtg)
+{
+ int i;
+
+ for (i = 0; i < XFS_RTGI_MAX; i++)
+ xfs_rtginode_irele(&rtg->rtg_inodes[i]);
+ kvfree(rtg->rtg_rsum_cache);
+}
+
static int
xfs_alloc_rsum_cache(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
xfs_extlen_t rbmblocks)
{
/*
* The rsum cache is initialized to the maximum value, which is
* trivially an upper bound on the maximum level with any free extents.
*/
- mp->m_rsum_cache = kvmalloc(rbmblocks, GFP_KERNEL);
- if (!mp->m_rsum_cache)
+ rtg->rtg_rsum_cache = kvmalloc(rbmblocks, GFP_KERNEL);
+ if (!rtg->rtg_rsum_cache)
return -ENOMEM;
- memset(mp->m_rsum_cache, -1, rbmblocks);
+ memset(rtg->rtg_rsum_cache, -1, rbmblocks);
return 0;
}
@@ -698,44 +712,175 @@ out_iolock:
return error;
}
+/* Ensure that the rtgroup metadata inode is loaded, creating it if neeeded. */
+static int
+xfs_rtginode_ensure(
+ struct xfs_rtgroup *rtg,
+ enum xfs_rtg_inodes type)
+{
+ struct xfs_trans *tp;
+ int error;
+
+ if (rtg->rtg_inodes[type])
+ return 0;
+
+ error = xfs_trans_alloc_empty(rtg_mount(rtg), &tp);
+ if (error)
+ return error;
+ error = xfs_rtginode_load(rtg, type, tp);
+ xfs_trans_cancel(tp);
+
+ if (error != -ENOENT)
+ return 0;
+ return xfs_rtginode_create(rtg, type, true);
+}
+
+static struct xfs_mount *
+xfs_growfs_rt_alloc_fake_mount(
+ const struct xfs_mount *mp,
+ xfs_rfsblock_t rblocks,
+ xfs_agblock_t rextsize)
+{
+ struct xfs_mount *nmp;
+
+ nmp = kmemdup(mp, sizeof(*mp), GFP_KERNEL);
+ if (!nmp)
+ return NULL;
+ xfs_mount_sb_set_rextsize(nmp, &nmp->m_sb, rextsize);
+ nmp->m_sb.sb_rblocks = rblocks;
+ nmp->m_sb.sb_rextents = xfs_blen_to_rtbxlen(nmp, nmp->m_sb.sb_rblocks);
+ nmp->m_sb.sb_rbmblocks = xfs_rtbitmap_blockcount(nmp);
+ nmp->m_sb.sb_rextslog = xfs_compute_rextslog(nmp->m_sb.sb_rextents);
+ if (xfs_has_rtgroups(nmp))
+ nmp->m_sb.sb_rgcount = howmany_64(nmp->m_sb.sb_rextents,
+ nmp->m_sb.sb_rgextents);
+ else
+ nmp->m_sb.sb_rgcount = 1;
+ nmp->m_rsumblocks = xfs_rtsummary_blockcount(nmp, &nmp->m_rsumlevels);
+
+ if (rblocks > 0)
+ nmp->m_features |= XFS_FEAT_REALTIME;
+
+ /* recompute growfsrt reservation from new rsumsize */
+ xfs_trans_resv_calc(nmp, &nmp->m_resv);
+ return nmp;
+}
+
+/* Free all the new space and return the number of extents actually freed. */
+static int
+xfs_growfs_rt_free_new(
+ struct xfs_rtgroup *rtg,
+ struct xfs_rtalloc_args *nargs,
+ xfs_rtbxlen_t *freed_rtx)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ xfs_rgnumber_t rgno = rtg_rgno(rtg);
+ xfs_rtxnum_t start_rtx = 0, end_rtx;
+
+ if (rgno < mp->m_sb.sb_rgcount)
+ start_rtx = xfs_rtgroup_extents(mp, rgno);
+ end_rtx = xfs_rtgroup_extents(nargs->mp, rgno);
+
+ /*
+ * Compute the first new extent that we want to free, being careful to
+ * skip past a realtime superblock at the start of the realtime volume.
+ */
+ if (xfs_has_rtsb(nargs->mp) && rgno == 0 && start_rtx == 0)
+ start_rtx++;
+ *freed_rtx = end_rtx - start_rtx;
+ return xfs_rtfree_range(nargs, start_rtx, *freed_rtx);
+}
+
+static xfs_rfsblock_t
+xfs_growfs_rt_nrblocks(
+ struct xfs_rtgroup *rtg,
+ xfs_rfsblock_t nrblocks,
+ xfs_agblock_t rextsize,
+ xfs_fileoff_t bmbno)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ xfs_rfsblock_t step;
+
+ step = (bmbno + 1) * mp->m_rtx_per_rbmblock * rextsize;
+ if (xfs_has_rtgroups(mp)) {
+ xfs_rfsblock_t rgblocks = mp->m_sb.sb_rgextents * rextsize;
+
+ step = min(rgblocks, step) + rgblocks * rtg_rgno(rtg);
+ }
+
+ return min(nrblocks, step);
+}
+
+/*
+ * If the post-grow filesystem will have an rtsb; we're initializing the first
+ * rtgroup; and the filesystem didn't have a realtime section, write the rtsb
+ * now, and attach the rtsb buffer to the real mount.
+ */
+static int
+xfs_growfs_rt_init_rtsb(
+ const struct xfs_rtalloc_args *nargs,
+ const struct xfs_rtgroup *rtg,
+ const struct xfs_rtalloc_args *args)
+{
+ struct xfs_mount *mp = args->mp;
+ struct xfs_buf *rtsb_bp;
+ int error;
+
+ if (!xfs_has_rtsb(nargs->mp))
+ return 0;
+ if (rtg_rgno(rtg) > 0)
+ return 0;
+ if (mp->m_sb.sb_rblocks)
+ return 0;
+
+ error = xfs_buf_get_uncached(mp->m_rtdev_targp, XFS_FSB_TO_BB(mp, 1),
+ 0, &rtsb_bp);
+ if (error)
+ return error;
+
+ rtsb_bp->b_maps[0].bm_bn = XFS_RTSB_DADDR;
+ rtsb_bp->b_ops = &xfs_rtsb_buf_ops;
+
+ xfs_update_rtsb(rtsb_bp, mp->m_sb_bp);
+ mp->m_rtsb_bp = rtsb_bp;
+ error = xfs_bwrite(rtsb_bp);
+ xfs_buf_unlock(rtsb_bp);
+ return error;
+}
+
static int
xfs_growfs_rt_bmblock(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
xfs_rfsblock_t nrblocks,
xfs_agblock_t rextsize,
xfs_fileoff_t bmbno)
{
- struct xfs_inode *rbmip = mp->m_rbmip;
- struct xfs_inode *rsumip = mp->m_rsumip;
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
+ struct xfs_inode *rsumip = rtg->rtg_inodes[XFS_RTGI_SUMMARY];
struct xfs_rtalloc_args args = {
.mp = mp,
+ .rtg = rtg,
};
struct xfs_rtalloc_args nargs = {
+ .rtg = rtg,
};
struct xfs_mount *nmp;
- xfs_rfsblock_t nrblocks_step;
xfs_rtbxlen_t freed_rtx;
int error;
-
- nrblocks_step = (bmbno + 1) * NBBY * mp->m_sb.sb_blocksize * rextsize;
-
- nmp = nargs.mp = kmemdup(mp, sizeof(*mp), GFP_KERNEL);
+ /*
+ * Calculate new sb and mount fields for this round. Also ensure the
+ * rtg_extents value is uptodate as the rtbitmap code relies on it.
+ */
+ nmp = nargs.mp = xfs_growfs_rt_alloc_fake_mount(mp,
+ xfs_growfs_rt_nrblocks(rtg, nrblocks, rextsize, bmbno),
+ rextsize);
if (!nmp)
return -ENOMEM;
- /*
- * Calculate new sb and mount fields for this round.
- */
- nmp->m_sb.sb_rextsize = rextsize;
- xfs_mount_sb_set_rextsize(nmp, &nmp->m_sb);
- nmp->m_sb.sb_rbmblocks = bmbno + 1;
- nmp->m_sb.sb_rblocks = min(nrblocks, nrblocks_step);
- nmp->m_sb.sb_rextents = xfs_rtb_to_rtx(nmp, nmp->m_sb.sb_rblocks);
- nmp->m_sb.sb_rextslog = xfs_compute_rextslog(nmp->m_sb.sb_rextents);
- nmp->m_rsumlevels = nmp->m_sb.sb_rextslog + 1;
- nmp->m_rsumblocks = xfs_rtsummary_blockcount(mp, nmp->m_rsumlevels,
- nmp->m_sb.sb_rbmblocks);
+ xfs_rtgroup_calc_geometry(nmp, rtg, rtg_rgno(rtg),
+ nmp->m_sb.sb_rgcount, nmp->m_sb.sb_rextents);
/*
* Recompute the growfsrt reservation from the new rsumsize, so that the
@@ -748,8 +893,8 @@ xfs_growfs_rt_bmblock(
goto out_free;
nargs.tp = args.tp;
- xfs_rtbitmap_lock(mp);
- xfs_rtbitmap_trans_join(args.tp);
+ xfs_rtgroup_lock(args.rtg, XFS_RTGLOCK_BITMAP);
+ xfs_rtgroup_trans_join(args.tp, args.rtg, XFS_RTGLOCK_BITMAP);
/*
* Update the bitmap inode's size ondisk and incore. We need to update
@@ -780,6 +925,10 @@ xfs_growfs_rt_bmblock(
goto out_cancel;
}
+ error = xfs_growfs_rt_init_rtsb(&nargs, rtg, &args);
+ if (error)
+ goto out_cancel;
+
/*
* Update superblock fields.
*/
@@ -798,12 +947,14 @@ xfs_growfs_rt_bmblock(
if (nmp->m_sb.sb_rextslog != mp->m_sb.sb_rextslog)
xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_REXTSLOG,
nmp->m_sb.sb_rextslog - mp->m_sb.sb_rextslog);
+ if (nmp->m_sb.sb_rgcount != mp->m_sb.sb_rgcount)
+ xfs_trans_mod_sb(args.tp, XFS_TRANS_SB_RGCOUNT,
+ nmp->m_sb.sb_rgcount - mp->m_sb.sb_rgcount);
/*
* Free the new extent.
*/
- freed_rtx = nmp->m_sb.sb_rextents - mp->m_sb.sb_rextents;
- error = xfs_rtfree_range(&nargs, mp->m_sb.sb_rextents, freed_rtx);
+ error = xfs_growfs_rt_free_new(rtg, &nargs, &freed_rtx);
xfs_rtbuf_cache_relse(&nargs);
if (error)
goto out_cancel;
@@ -818,7 +969,6 @@ xfs_growfs_rt_bmblock(
*/
mp->m_rsumlevels = nmp->m_rsumlevels;
mp->m_rsumblocks = nmp->m_rsumblocks;
- xfs_mount_sb_set_rextsize(mp, &mp->m_sb);
/*
* Recompute the growfsrt reservation from the new rsumsize.
@@ -844,6 +994,15 @@ out_free:
return error;
}
+static xfs_rtxnum_t
+xfs_last_rtgroup_extents(
+ struct xfs_mount *mp)
+{
+ return mp->m_sb.sb_rextents -
+ ((xfs_rtxnum_t)(mp->m_sb.sb_rgcount - 1) *
+ mp->m_sb.sb_rgextents);
+}
+
/*
* Calculate the last rbmblock currently used.
*
@@ -851,34 +1010,235 @@ out_free:
*/
static xfs_fileoff_t
xfs_last_rt_bmblock(
- struct xfs_mount *mp)
+ struct xfs_rtgroup *rtg)
{
- xfs_fileoff_t bmbno = mp->m_sb.sb_rbmblocks;
+ struct xfs_mount *mp = rtg_mount(rtg);
+ xfs_rgnumber_t rgno = rtg_rgno(rtg);
+ xfs_fileoff_t bmbno = 0;
+
+ ASSERT(!mp->m_sb.sb_rgcount || rgno >= mp->m_sb.sb_rgcount - 1);
+
+ if (mp->m_sb.sb_rgcount && rgno == mp->m_sb.sb_rgcount - 1) {
+ xfs_rtxnum_t nrext = xfs_last_rtgroup_extents(mp);
+
+ /* Also fill up the previous block if not entirely full. */
+ bmbno = xfs_rtbitmap_blockcount_len(mp, nrext);
+ if (xfs_rtx_to_rbmword(mp, nrext) != 0)
+ bmbno--;
+ }
- /* Skip the current block if it is exactly full. */
- if (xfs_rtx_to_rbmword(mp, mp->m_sb.sb_rextents) != 0)
- bmbno--;
return bmbno;
}
/*
+ * Allocate space to the bitmap and summary files, as necessary.
+ */
+static int
+xfs_growfs_rt_alloc_blocks(
+ struct xfs_rtgroup *rtg,
+ xfs_rfsblock_t nrblocks,
+ xfs_agblock_t rextsize,
+ xfs_extlen_t *nrbmblocks)
+{
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
+ struct xfs_inode *rsumip = rtg->rtg_inodes[XFS_RTGI_SUMMARY];
+ xfs_extlen_t orbmblocks = 0;
+ xfs_extlen_t orsumblocks = 0;
+ struct xfs_mount *nmp;
+ int error = 0;
+
+ nmp = xfs_growfs_rt_alloc_fake_mount(mp, nrblocks, rextsize);
+ if (!nmp)
+ return -ENOMEM;
+ *nrbmblocks = nmp->m_sb.sb_rbmblocks;
+
+ if (xfs_has_rtgroups(mp)) {
+ /*
+ * For file systems with the rtgroups feature, the RT bitmap and
+ * summary are always fully allocated, which means that we never
+ * need to grow the existing files.
+ *
+ * But we have to be careful to only fill the bitmap until the
+ * end of the actually used range.
+ */
+ if (rtg_rgno(rtg) == nmp->m_sb.sb_rgcount - 1)
+ *nrbmblocks = xfs_rtbitmap_blockcount_len(nmp,
+ xfs_last_rtgroup_extents(nmp));
+
+ if (mp->m_sb.sb_rgcount &&
+ rtg_rgno(rtg) == mp->m_sb.sb_rgcount - 1)
+ goto out_free;
+ } else {
+ /*
+ * Get the old block counts for bitmap and summary inodes.
+ * These can't change since other growfs callers are locked out.
+ */
+ orbmblocks = XFS_B_TO_FSB(mp, rbmip->i_disk_size);
+ orsumblocks = XFS_B_TO_FSB(mp, rsumip->i_disk_size);
+ }
+
+ error = xfs_rtfile_initialize_blocks(rtg, XFS_RTGI_BITMAP, orbmblocks,
+ nmp->m_sb.sb_rbmblocks, NULL);
+ if (error)
+ goto out_free;
+ error = xfs_rtfile_initialize_blocks(rtg, XFS_RTGI_SUMMARY, orsumblocks,
+ nmp->m_rsumblocks, NULL);
+out_free:
+ kfree(nmp);
+ return error;
+}
+
+static int
+xfs_growfs_rtg(
+ struct xfs_mount *mp,
+ xfs_rgnumber_t rgno,
+ xfs_rfsblock_t nrblocks,
+ xfs_agblock_t rextsize)
+{
+ uint8_t *old_rsum_cache = NULL;
+ xfs_extlen_t bmblocks;
+ xfs_fileoff_t bmbno;
+ struct xfs_rtgroup *rtg;
+ unsigned int i;
+ int error;
+
+ rtg = xfs_rtgroup_grab(mp, rgno);
+ if (!rtg)
+ return -EINVAL;
+
+ for (i = 0; i < XFS_RTGI_MAX; i++) {
+ error = xfs_rtginode_ensure(rtg, i);
+ if (error)
+ goto out_rele;
+ }
+
+ error = xfs_growfs_rt_alloc_blocks(rtg, nrblocks, rextsize, &bmblocks);
+ if (error)
+ goto out_rele;
+
+ if (bmblocks != rtg_mount(rtg)->m_sb.sb_rbmblocks) {
+ old_rsum_cache = rtg->rtg_rsum_cache;
+ error = xfs_alloc_rsum_cache(rtg, bmblocks);
+ if (error)
+ goto out_rele;
+ }
+
+ for (bmbno = xfs_last_rt_bmblock(rtg); bmbno < bmblocks; bmbno++) {
+ error = xfs_growfs_rt_bmblock(rtg, nrblocks, rextsize, bmbno);
+ if (error)
+ goto out_error;
+ }
+
+ if (old_rsum_cache)
+ kvfree(old_rsum_cache);
+ xfs_rtgroup_rele(rtg);
+ return 0;
+
+out_error:
+ /*
+ * Reset rtg_extents to the old value if adding more blocks failed.
+ */
+ xfs_rtgroup_calc_geometry(mp, rtg, rtg_rgno(rtg), mp->m_sb.sb_rgcount,
+ mp->m_sb.sb_rextents);
+ if (old_rsum_cache) {
+ kvfree(rtg->rtg_rsum_cache);
+ rtg->rtg_rsum_cache = old_rsum_cache;
+ }
+out_rele:
+ xfs_rtgroup_rele(rtg);
+ return error;
+}
+
+static int
+xfs_growfs_check_rtgeom(
+ const struct xfs_mount *mp,
+ xfs_rfsblock_t rblocks,
+ xfs_extlen_t rextsize)
+{
+ struct xfs_mount *nmp;
+ int error = 0;
+
+ nmp = xfs_growfs_rt_alloc_fake_mount(mp, rblocks, rextsize);
+ if (!nmp)
+ return -ENOMEM;
+
+ /*
+ * New summary size can't be more than half the size of the log. This
+ * prevents us from getting a log overflow, since we'll log basically
+ * the whole summary file at once.
+ */
+ if (nmp->m_rsumblocks > (mp->m_sb.sb_logblocks >> 1))
+ error = -EINVAL;
+
+ kfree(nmp);
+ return error;
+}
+
+/*
+ * Compute the new number of rt groups and ensure that /rtgroups exists.
+ *
+ * Changing the rtgroup size is not allowed (even if the rt volume hasn't yet
+ * been initialized) because the userspace ABI doesn't support it.
+ */
+static int
+xfs_growfs_rt_prep_groups(
+ struct xfs_mount *mp,
+ xfs_rfsblock_t rblocks,
+ xfs_extlen_t rextsize,
+ xfs_rgnumber_t *new_rgcount)
+{
+ int error;
+
+ *new_rgcount = howmany_64(rblocks, mp->m_sb.sb_rgextents * rextsize);
+ if (*new_rgcount > XFS_MAX_RGNUMBER)
+ return -EINVAL;
+
+ /* Make sure the /rtgroups dir has been created */
+ if (!mp->m_rtdirip) {
+ struct xfs_trans *tp;
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+ error = xfs_rtginode_load_parent(tp);
+ xfs_trans_cancel(tp);
+
+ if (error == -ENOENT)
+ error = xfs_rtginode_mkdir_parent(mp);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+static bool
+xfs_grow_last_rtg(
+ struct xfs_mount *mp)
+{
+ if (!xfs_has_rtgroups(mp))
+ return true;
+ if (mp->m_sb.sb_rgcount == 0)
+ return false;
+ return xfs_rtgroup_extents(mp, mp->m_sb.sb_rgcount - 1) <=
+ mp->m_sb.sb_rgextents;
+}
+
+/*
* Grow the realtime area of the filesystem.
*/
int
xfs_growfs_rt(
- xfs_mount_t *mp, /* mount point for filesystem */
- xfs_growfs_rt_t *in) /* growfs rt input struct */
+ struct xfs_mount *mp,
+ struct xfs_growfs_rt *in)
{
- xfs_fileoff_t bmbno; /* bitmap block number */
- struct xfs_buf *bp; /* temporary buffer */
- int error; /* error return value */
- xfs_extlen_t nrbmblocks; /* new number of rt bitmap blocks */
- xfs_rtxnum_t nrextents; /* new number of realtime extents */
- xfs_extlen_t nrsumblocks; /* new number of summary blocks */
- xfs_extlen_t rbmblocks; /* current number of rt bitmap blocks */
- xfs_extlen_t rsumblocks; /* current number of rt summary blks */
- uint8_t *rsum_cache; /* old summary cache */
- xfs_agblock_t old_rextsize = mp->m_sb.sb_rextsize;
+ xfs_rgnumber_t old_rgcount = mp->m_sb.sb_rgcount;
+ xfs_rgnumber_t new_rgcount = 1;
+ xfs_rgnumber_t rgno;
+ struct xfs_buf *bp;
+ xfs_agblock_t old_rextsize = mp->m_sb.sb_rextsize;
+ int error;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -889,15 +1249,9 @@ xfs_growfs_rt(
if (!mutex_trylock(&mp->m_growlock))
return -EWOULDBLOCK;
- /*
- * Mount should fail if the rt bitmap/summary files don't load, but
- * we'll check anyway.
- */
- error = -EINVAL;
- if (!mp->m_rbmip || !mp->m_rsumip)
- goto out_unlock;
/* Shrink not supported. */
+ error = -EINVAL;
if (in->newblocks <= mp->m_sb.sb_rblocks)
goto out_unlock;
/* Can only change rt extent size when adding rt volume. */
@@ -911,7 +1265,9 @@ xfs_growfs_rt(
/* Unsupported realtime features. */
error = -EOPNOTSUPP;
- if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp) || xfs_has_quota(mp))
+ if (xfs_has_quota(mp) && !xfs_has_rtgroups(mp))
+ goto out_unlock;
+ if (xfs_has_rmapbt(mp) || xfs_has_reflink(mp))
goto out_unlock;
error = xfs_sb_validate_fsb_count(&mp->m_sb, in->newblocks);
@@ -930,80 +1286,64 @@ xfs_growfs_rt(
/*
* Calculate new parameters. These are the final values to be reached.
*/
- nrextents = div_u64(in->newblocks, in->extsize);
- if (nrextents == 0) {
- error = -EINVAL;
- goto out_unlock;
- }
- nrbmblocks = xfs_rtbitmap_blockcount(mp, nrextents);
- nrsumblocks = xfs_rtsummary_blockcount(mp,
- xfs_compute_rextslog(nrextents) + 1, nrbmblocks);
-
- /*
- * New summary size can't be more than half the size of
- * the log. This prevents us from getting a log overflow,
- * since we'll log basically the whole summary file at once.
- */
- if (nrsumblocks > (mp->m_sb.sb_logblocks >> 1)) {
- error = -EINVAL;
+ error = -EINVAL;
+ if (in->newblocks < in->extsize)
goto out_unlock;
- }
- /*
- * Get the old block counts for bitmap and summary inodes.
- * These can't change since other growfs callers are locked out.
- */
- rbmblocks = XFS_B_TO_FSB(mp, mp->m_rbmip->i_disk_size);
- rsumblocks = XFS_B_TO_FSB(mp, mp->m_rsumip->i_disk_size);
- /*
- * Allocate space to the bitmap and summary files, as necessary.
- */
- error = xfs_rtfile_initialize_blocks(mp->m_rbmip, rbmblocks,
- nrbmblocks, NULL);
- if (error)
- goto out_unlock;
- error = xfs_rtfile_initialize_blocks(mp->m_rsumip, rsumblocks,
- nrsumblocks, NULL);
+ /* Make sure the new fs size won't cause problems with the log. */
+ error = xfs_growfs_check_rtgeom(mp, in->newblocks, in->extsize);
if (error)
goto out_unlock;
- rsum_cache = mp->m_rsum_cache;
- if (nrbmblocks != mp->m_sb.sb_rbmblocks) {
- error = xfs_alloc_rsum_cache(mp, nrbmblocks);
+ if (xfs_has_rtgroups(mp)) {
+ error = xfs_growfs_rt_prep_groups(mp, in->newblocks,
+ in->extsize, &new_rgcount);
if (error)
goto out_unlock;
}
- /* Initialize the free space bitmap one bitmap block at a time. */
- for (bmbno = xfs_last_rt_bmblock(mp); bmbno < nrbmblocks; bmbno++) {
- error = xfs_growfs_rt_bmblock(mp, in->newblocks, in->extsize,
- bmbno);
+ if (xfs_grow_last_rtg(mp)) {
+ error = xfs_growfs_rtg(mp, old_rgcount - 1, in->newblocks,
+ in->extsize);
if (error)
- goto out_free;
+ goto out_unlock;
}
- if (old_rextsize != in->extsize) {
- error = xfs_growfs_rt_fixup_extsize(mp);
+ for (rgno = old_rgcount; rgno < new_rgcount; rgno++) {
+ xfs_rtbxlen_t rextents = div_u64(in->newblocks, in->extsize);
+
+ error = xfs_rtgroup_alloc(mp, rgno, new_rgcount, rextents);
if (error)
- goto out_free;
+ goto out_unlock;
+
+ error = xfs_growfs_rtg(mp, rgno, in->newblocks, in->extsize);
+ if (error) {
+ struct xfs_rtgroup *rtg;
+
+ rtg = xfs_rtgroup_grab(mp, rgno);
+ if (!WARN_ON_ONCE(!rtg)) {
+ xfs_rtunmount_rtg(rtg);
+ xfs_rtgroup_rele(rtg);
+ xfs_rtgroup_free(mp, rgno);
+ }
+ break;
+ }
}
- /* Update secondary superblocks now the physical grow has completed */
- error = xfs_update_secondary_sbs(mp);
+ if (!error && old_rextsize != in->extsize)
+ error = xfs_growfs_rt_fixup_extsize(mp);
-out_free:
/*
- * If we had to allocate a new rsum_cache, we either need to free the
- * old one (if we succeeded) or free the new one and restore the old one
- * (if there was an error).
+ * Update secondary superblocks now the physical grow has completed.
+ *
+ * Also do this in case of an error as we might have already
+ * successfully updated one or more RTGs and incremented sb_rgcount.
*/
- if (rsum_cache != mp->m_rsum_cache) {
- if (error) {
- kvfree(mp->m_rsum_cache);
- mp->m_rsum_cache = rsum_cache;
- } else {
- kvfree(rsum_cache);
- }
+ if (!xfs_is_shutdown(mp)) {
+ int error2 = xfs_update_secondary_sbs(mp);
+
+ if (!error)
+ error = error2;
}
out_unlock:
@@ -1011,6 +1351,56 @@ out_unlock:
return error;
}
+/* Read the realtime superblock and attach it to the mount. */
+int
+xfs_rtmount_readsb(
+ struct xfs_mount *mp)
+{
+ struct xfs_buf *bp;
+ int error;
+
+ if (!xfs_has_rtsb(mp))
+ return 0;
+ if (mp->m_sb.sb_rblocks == 0)
+ return 0;
+ if (mp->m_rtdev_targp == NULL) {
+ xfs_warn(mp,
+ "Filesystem has a realtime volume, use rtdev=device option");
+ return -ENODEV;
+ }
+
+ /* m_blkbb_log is not set up yet */
+ error = xfs_buf_read_uncached(mp->m_rtdev_targp, XFS_RTSB_DADDR,
+ mp->m_sb.sb_blocksize >> BBSHIFT, XBF_NO_IOACCT, &bp,
+ &xfs_rtsb_buf_ops);
+ if (error) {
+ xfs_warn(mp, "rt sb validate failed with error %d.", error);
+ /* bad CRC means corrupted metadata */
+ if (error == -EFSBADCRC)
+ error = -EFSCORRUPTED;
+ return error;
+ }
+
+ mp->m_rtsb_bp = bp;
+ xfs_buf_unlock(bp);
+ return 0;
+}
+
+/* Detach the realtime superblock from the mount and free it. */
+void
+xfs_rtmount_freesb(
+ struct xfs_mount *mp)
+{
+ struct xfs_buf *bp = mp->m_rtsb_bp;
+
+ if (!bp)
+ return;
+
+ xfs_buf_lock(bp);
+ mp->m_rtsb_bp = NULL;
+ xfs_buf_relse(bp);
+}
+
/*
* Initialize realtime fields in the mount structure.
*/
@@ -1019,22 +1409,19 @@ xfs_rtmount_init(
struct xfs_mount *mp) /* file system mount structure */
{
struct xfs_buf *bp; /* buffer for last block of subvolume */
- struct xfs_sb *sbp; /* filesystem superblock copy in mount */
xfs_daddr_t d; /* address of last block of subvolume */
int error;
- sbp = &mp->m_sb;
- if (sbp->sb_rblocks == 0)
+ if (mp->m_sb.sb_rblocks == 0)
return 0;
if (mp->m_rtdev_targp == NULL) {
xfs_warn(mp,
"Filesystem has a realtime volume, use rtdev=device option");
return -ENODEV;
}
- mp->m_rsumlevels = sbp->sb_rextslog + 1;
- mp->m_rsumblocks = xfs_rtsummary_blockcount(mp, mp->m_rsumlevels,
- mp->m_sb.sb_rbmblocks);
- mp->m_rbmip = mp->m_rsumip = NULL;
+
+ mp->m_rsumblocks = xfs_rtsummary_blockcount(mp, &mp->m_rsumlevels);
+
/*
* Check that the realtime section is an ok size.
*/
@@ -1058,7 +1445,7 @@ xfs_rtmount_init(
static int
xfs_rtalloc_count_frextent(
- struct xfs_mount *mp,
+ struct xfs_rtgroup *rtg,
struct xfs_trans *tp,
const struct xfs_rtalloc_rec *rec,
void *priv)
@@ -1080,12 +1467,18 @@ xfs_rtalloc_reinit_frextents(
uint64_t val = 0;
int error;
- xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP);
- error = xfs_rtalloc_query_all(mp, NULL, xfs_rtalloc_count_frextent,
- &val);
- xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
- if (error)
- return error;
+ struct xfs_rtgroup *rtg = NULL;
+
+ while ((rtg = xfs_rtgroup_next(mp, rtg))) {
+ xfs_rtgroup_lock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ error = xfs_rtalloc_query_all(rtg, NULL,
+ xfs_rtalloc_count_frextent, &val);
+ xfs_rtgroup_unlock(rtg, XFS_RTGLOCK_BITMAP_SHARED);
+ if (error) {
+ xfs_rtgroup_rele(rtg);
+ return error;
+ }
+ }
spin_lock(&mp->m_sb_lock);
mp->m_sb.sb_frextents = val;
@@ -1101,17 +1494,12 @@ xfs_rtalloc_reinit_frextents(
*/
static inline int
xfs_rtmount_iread_extents(
- struct xfs_inode *ip,
- unsigned int lock_class)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
{
- struct xfs_trans *tp;
int error;
- error = xfs_trans_alloc_empty(ip->i_mount, &tp);
- if (error)
- return error;
-
- xfs_ilock(ip, XFS_ILOCK_EXCL | lock_class);
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
if (error)
@@ -1124,54 +1512,67 @@ xfs_rtmount_iread_extents(
}
out_unlock:
- xfs_iunlock(ip, XFS_ILOCK_EXCL | lock_class);
- xfs_trans_cancel(tp);
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
return error;
}
+static int
+xfs_rtmount_rtg(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_rtgroup *rtg)
+{
+ int error, i;
+
+ for (i = 0; i < XFS_RTGI_MAX; i++) {
+ error = xfs_rtginode_load(rtg, i, tp);
+ if (error)
+ return error;
+
+ if (rtg->rtg_inodes[i]) {
+ error = xfs_rtmount_iread_extents(tp,
+ rtg->rtg_inodes[i]);
+ if (error)
+ return error;
+ }
+ }
+
+ return xfs_alloc_rsum_cache(rtg, mp->m_sb.sb_rbmblocks);
+}
+
/*
* Get the bitmap and summary inodes and the summary cache into the mount
* structure at mount time.
*/
-int /* error */
+int
xfs_rtmount_inodes(
- xfs_mount_t *mp) /* file system mount structure */
+ struct xfs_mount *mp)
{
- int error; /* error return value */
- xfs_sb_t *sbp;
+ struct xfs_trans *tp;
+ struct xfs_rtgroup *rtg = NULL;
+ int error;
- sbp = &mp->m_sb;
- error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
- if (xfs_metadata_is_sick(error))
- xfs_rt_mark_sick(mp, XFS_SICK_RT_BITMAP);
+ error = xfs_trans_alloc_empty(mp, &tp);
if (error)
return error;
- ASSERT(mp->m_rbmip != NULL);
- error = xfs_rtmount_iread_extents(mp->m_rbmip, XFS_ILOCK_RTBITMAP);
- if (error)
- goto out_rele_bitmap;
-
- error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
- if (xfs_metadata_is_sick(error))
- xfs_rt_mark_sick(mp, XFS_SICK_RT_SUMMARY);
- if (error)
- goto out_rele_bitmap;
- ASSERT(mp->m_rsumip != NULL);
-
- error = xfs_rtmount_iread_extents(mp->m_rsumip, XFS_ILOCK_RTSUM);
- if (error)
- goto out_rele_summary;
+ if (xfs_has_rtgroups(mp) && mp->m_sb.sb_rgcount > 0) {
+ error = xfs_rtginode_load_parent(tp);
+ if (error)
+ goto out_cancel;
+ }
- error = xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks);
- if (error)
- goto out_rele_summary;
- return 0;
+ while ((rtg = xfs_rtgroup_next(mp, rtg))) {
+ error = xfs_rtmount_rtg(mp, tp, rtg);
+ if (error) {
+ xfs_rtgroup_rele(rtg);
+ xfs_rtunmount_inodes(mp);
+ break;
+ }
+ }
-out_rele_summary:
- xfs_irele(mp->m_rsumip);
-out_rele_bitmap:
- xfs_irele(mp->m_rbmip);
+out_cancel:
+ xfs_trans_cancel(tp);
return error;
}
@@ -1179,11 +1580,11 @@ void
xfs_rtunmount_inodes(
struct xfs_mount *mp)
{
- kvfree(mp->m_rsum_cache);
- if (mp->m_rbmip)
- xfs_irele(mp->m_rbmip);
- if (mp->m_rsumip)
- xfs_irele(mp->m_rsumip);
+ struct xfs_rtgroup *rtg = NULL;
+
+ while ((rtg = xfs_rtgroup_next(mp, rtg)))
+ xfs_rtunmount_rtg(rtg);
+ xfs_rtginode_irele(&mp->m_rtdirip);
}
/*
@@ -1195,28 +1596,29 @@ xfs_rtunmount_inodes(
*/
static xfs_rtxnum_t
xfs_rtpick_extent(
- xfs_mount_t *mp, /* file system mount point */
- xfs_trans_t *tp, /* transaction pointer */
+ struct xfs_rtgroup *rtg,
+ struct xfs_trans *tp,
xfs_rtxlen_t len) /* allocation length (rtextents) */
{
- xfs_rtxnum_t b; /* result rtext */
+ struct xfs_mount *mp = rtg_mount(rtg);
+ struct xfs_inode *rbmip = rtg->rtg_inodes[XFS_RTGI_BITMAP];
+ xfs_rtxnum_t b = 0; /* result rtext */
int log2; /* log of sequence number */
uint64_t resid; /* residual after log removed */
uint64_t seq; /* sequence number of file creation */
struct timespec64 ts; /* timespec in inode */
- xfs_assert_ilocked(mp->m_rbmip, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(rbmip, XFS_ILOCK_EXCL);
- ts = inode_get_atime(VFS_I(mp->m_rbmip));
- if (!(mp->m_rbmip->i_diflags & XFS_DIFLAG_NEWRTBM)) {
- mp->m_rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
+ ts = inode_get_atime(VFS_I(rbmip));
+ if (!(rbmip->i_diflags & XFS_DIFLAG_NEWRTBM)) {
+ rbmip->i_diflags |= XFS_DIFLAG_NEWRTBM;
seq = 0;
} else {
seq = ts.tv_sec;
}
- if ((log2 = xfs_highbit64(seq)) == -1)
- b = 0;
- else {
+ log2 = xfs_highbit64(seq);
+ if (log2 != -1) {
resid = seq - (1ULL << log2);
b = (mp->m_sb.sb_rextents * ((resid << 1) + 1ULL)) >>
(log2 + 1);
@@ -1226,8 +1628,8 @@ xfs_rtpick_extent(
b = mp->m_sb.sb_rextents - len;
}
ts.tv_sec = seq + 1;
- inode_set_atime_to_ts(VFS_I(mp->m_rbmip), ts);
- xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
+ inode_set_atime_to_ts(VFS_I(rbmip), ts);
+ xfs_trans_log_inode(tp, rbmip, XFS_ILOG_CORE);
return b;
}
@@ -1260,9 +1662,118 @@ xfs_rtalloc_align_minmax(
*raminlen = newminlen;
}
+/* Given a free extent, find any part of it that isn't busy, if possible. */
+STATIC bool
+xfs_rtalloc_check_busy(
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start,
+ xfs_rtxlen_t minlen_rtx,
+ xfs_rtxlen_t maxlen_rtx,
+ xfs_rtxlen_t len_rtx,
+ xfs_rtxlen_t prod,
+ xfs_rtxnum_t rtx,
+ xfs_rtxlen_t *reslen,
+ xfs_rtxnum_t *resrtx,
+ unsigned *busy_gen)
+{
+ struct xfs_rtgroup *rtg = args->rtg;
+ struct xfs_mount *mp = rtg_mount(rtg);
+ xfs_agblock_t rgbno = xfs_rtx_to_rgbno(rtg, rtx);
+ xfs_rgblock_t min_rgbno = xfs_rtx_to_rgbno(rtg, start);
+ xfs_extlen_t minlen = xfs_rtxlen_to_extlen(mp, minlen_rtx);
+ xfs_extlen_t len = xfs_rtxlen_to_extlen(mp, len_rtx);
+ xfs_extlen_t diff;
+ bool busy;
+
+ busy = xfs_extent_busy_trim(rtg_group(rtg), minlen,
+ xfs_rtxlen_to_extlen(mp, maxlen_rtx), &rgbno, &len,
+ busy_gen);
+
+ /*
+ * If we have a largish extent that happens to start before min_rgbno,
+ * see if we can shift it into range...
+ */
+ if (rgbno < min_rgbno && rgbno + len > min_rgbno) {
+ diff = min_rgbno - rgbno;
+ if (len > diff) {
+ rgbno += diff;
+ len -= diff;
+ }
+ }
+
+ if (prod > 1 && len >= minlen) {
+ xfs_rgblock_t aligned_rgbno = roundup(rgbno, prod);
+
+ diff = aligned_rgbno - rgbno;
+
+ *resrtx = xfs_rgbno_to_rtx(mp, aligned_rgbno);
+ *reslen = xfs_extlen_to_rtxlen(mp,
+ diff >= len ? 0 : len - diff);
+ } else {
+ *resrtx = xfs_rgbno_to_rtx(mp, rgbno);
+ *reslen = xfs_extlen_to_rtxlen(mp, len);
+ }
+
+ return busy;
+}
+
+/*
+ * Adjust the given free extent so that it isn't busy, or flush the log and
+ * wait for the space to become unbusy. Only needed for rtgroups.
+ */
+STATIC int
+xfs_rtallocate_adjust_for_busy(
+ struct xfs_rtalloc_args *args,
+ xfs_rtxnum_t start,
+ xfs_rtxlen_t minlen,
+ xfs_rtxlen_t maxlen,
+ xfs_rtxlen_t *len,
+ xfs_rtxlen_t prod,
+ xfs_rtxnum_t *rtx)
+{
+ xfs_rtxnum_t resrtx;
+ xfs_rtxlen_t reslen;
+ unsigned busy_gen;
+ bool busy;
+ int error;
+
+again:
+ busy = xfs_rtalloc_check_busy(args, start, minlen, maxlen, *len, prod,
+ *rtx, &reslen, &resrtx, &busy_gen);
+ if (!busy)
+ return 0;
+
+ if (reslen < minlen || (start != 0 && resrtx != *rtx)) {
+ /*
+ * Enough of the extent was busy that we cannot satisfy the
+ * allocation, or this is a near allocation and the start of
+ * the extent is busy. Flush the log and wait for the busy
+ * situation to resolve.
+ */
+ trace_xfs_rtalloc_extent_busy(args->rtg, start, minlen, maxlen,
+ *len, prod, *rtx, busy_gen);
+
+ error = xfs_extent_busy_flush(args->tp, rtg_group(args->rtg),
+ busy_gen, 0);
+ if (error)
+ return error;
+
+ goto again;
+ }
+
+ /* Some of the free space wasn't busy, hand that back to the caller. */
+ trace_xfs_rtalloc_extent_busy_trim(args->rtg, *rtx, *len, resrtx,
+ reslen);
+ *len = reslen;
+ *rtx = resrtx;
+
+ return 0;
+}
+
static int
-xfs_rtallocate(
+xfs_rtallocate_rtg(
struct xfs_trans *tp,
+ xfs_rgnumber_t rgno,
xfs_rtblock_t bno_hint,
xfs_rtxlen_t minlen,
xfs_rtxlen_t maxlen,
@@ -1282,12 +1793,33 @@ xfs_rtallocate(
xfs_rtxlen_t len = 0;
int error = 0;
+ args.rtg = xfs_rtgroup_grab(args.mp, rgno);
+ if (!args.rtg)
+ return -ENOSPC;
+
/*
- * Lock out modifications to both the RT bitmap and summary inodes.
+ * We need to lock out modifications to both the RT bitmap and summary
+ * inodes for finding free space in xfs_rtallocate_extent_{near,size}
+ * and join the bitmap and summary inodes for the actual allocation
+ * down in xfs_rtallocate_range.
+ *
+ * For RTG-enabled file system we don't want to join the inodes to the
+ * transaction until we are committed to allocate to allocate from this
+ * RTG so that only one inode of each type is locked at a time.
+ *
+ * But for pre-RTG file systems we need to already to join the bitmap
+ * inode to the transaction for xfs_rtpick_extent, which bumps the
+ * sequence number in it, so we'll have to join the inode to the
+ * transaction early here.
+ *
+ * This is all a bit messy, but at least the mess is contained in
+ * this function.
*/
if (!*rtlocked) {
- xfs_rtbitmap_lock(args.mp);
- xfs_rtbitmap_trans_join(tp);
+ xfs_rtgroup_lock(args.rtg, XFS_RTGLOCK_BITMAP);
+ if (!xfs_has_rtgroups(args.mp))
+ xfs_rtgroup_trans_join(tp, args.rtg,
+ XFS_RTGLOCK_BITMAP);
*rtlocked = true;
}
@@ -1297,8 +1829,8 @@ xfs_rtallocate(
*/
if (bno_hint)
start = xfs_rtb_to_rtx(args.mp, bno_hint);
- else if (initial_user_data)
- start = xfs_rtpick_extent(args.mp, tp, maxlen);
+ else if (!xfs_has_rtgroups(args.mp) && initial_user_data)
+ start = xfs_rtpick_extent(args.rtg, tp, maxlen);
if (start) {
error = xfs_rtallocate_extent_near(&args, start, minlen, maxlen,
@@ -1318,8 +1850,20 @@ xfs_rtallocate(
prod, &rtx);
}
- if (error)
+ if (error) {
+ if (xfs_has_rtgroups(args.mp))
+ goto out_unlock;
goto out_release;
+ }
+
+ if (xfs_has_rtgroups(args.mp)) {
+ error = xfs_rtallocate_adjust_for_busy(&args, start, minlen,
+ maxlen, &len, prod, &rtx);
+ if (error)
+ goto out_unlock;
+
+ xfs_rtgroup_trans_join(tp, args.rtg, XFS_RTGLOCK_BITMAP);
+ }
error = xfs_rtallocate_range(&args, rtx, len);
if (error)
@@ -1328,12 +1872,64 @@ xfs_rtallocate(
xfs_trans_mod_sb(tp, wasdel ?
XFS_TRANS_SB_RES_FREXTENTS : XFS_TRANS_SB_FREXTENTS,
-(long)len);
- *bno = xfs_rtx_to_rtb(args.mp, rtx);
+ *bno = xfs_rtx_to_rtb(args.rtg, rtx);
*blen = xfs_rtxlen_to_extlen(args.mp, len);
out_release:
+ xfs_rtgroup_rele(args.rtg);
xfs_rtbuf_cache_relse(&args);
return error;
+out_unlock:
+ xfs_rtgroup_unlock(args.rtg, XFS_RTGLOCK_BITMAP);
+ *rtlocked = false;
+ goto out_release;
+}
+
+static int
+xfs_rtallocate_rtgs(
+ struct xfs_trans *tp,
+ xfs_fsblock_t bno_hint,
+ xfs_rtxlen_t minlen,
+ xfs_rtxlen_t maxlen,
+ xfs_rtxlen_t prod,
+ bool wasdel,
+ bool initial_user_data,
+ xfs_rtblock_t *bno,
+ xfs_extlen_t *blen)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ xfs_rgnumber_t start_rgno, rgno;
+ int error;
+
+ /*
+ * For now this just blindly iterates over the RTGs for an initial
+ * allocation. We could try to keep an in-memory rtg_longest member
+ * to avoid the locking when just looking for big enough free space,
+ * but for now this keeps things simple.
+ */
+ if (bno_hint != NULLFSBLOCK)
+ start_rgno = xfs_rtb_to_rgno(mp, bno_hint);
+ else
+ start_rgno = (atomic_inc_return(&mp->m_rtgrotor) - 1) %
+ mp->m_sb.sb_rgcount;
+
+ rgno = start_rgno;
+ do {
+ bool rtlocked = false;
+
+ error = xfs_rtallocate_rtg(tp, rgno, bno_hint, minlen, maxlen,
+ prod, wasdel, initial_user_data, &rtlocked,
+ bno, blen);
+ if (error != -ENOSPC)
+ return error;
+ ASSERT(!rtlocked);
+
+ if (++rgno == mp->m_sb.sb_rgcount)
+ rgno = 0;
+ bno_hint = NULLFSBLOCK;
+ } while (rgno != start_rgno);
+
+ return -ENOSPC;
}
static int
@@ -1430,9 +2026,16 @@ retry:
if (xfs_bmap_adjacent(ap))
bno_hint = ap->blkno;
- error = xfs_rtallocate(ap->tp, bno_hint, raminlen, ralen, prod,
- ap->wasdel, initial_user_data, &rtlocked,
- &ap->blkno, &ap->length);
+ if (xfs_has_rtgroups(ap->ip->i_mount)) {
+ error = xfs_rtallocate_rtgs(ap->tp, bno_hint, raminlen, ralen,
+ prod, ap->wasdel, initial_user_data,
+ &ap->blkno, &ap->length);
+ } else {
+ error = xfs_rtallocate_rtg(ap->tp, 0, bno_hint, raminlen, ralen,
+ prod, ap->wasdel, initial_user_data,
+ &rtlocked, &ap->blkno, &ap->length);
+ }
+
if (error == -ENOSPC) {
if (!noalign) {
/*
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index a6836da9bebe..8e2a07b8174b 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -12,6 +12,10 @@ struct xfs_mount;
struct xfs_trans;
#ifdef CONFIG_XFS_RT
+/* rtgroup superblock initialization */
+int xfs_rtmount_readsb(struct xfs_mount *mp);
+void xfs_rtmount_freesb(struct xfs_mount *mp);
+
/*
* Initialize realtime fields in the mount structure.
*/
@@ -42,6 +46,8 @@ int xfs_rtalloc_reinit_frextents(struct xfs_mount *mp);
#else
# define xfs_growfs_rt(mp,in) (-ENOSYS)
# define xfs_rtalloc_reinit_frextents(m) (0)
+# define xfs_rtmount_readsb(mp) (0)
+# define xfs_rtmount_freesb(mp) ((void)0)
static inline int /* error */
xfs_rtmount_init(
xfs_mount_t *mp) /* file system mount structure */
diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c
index ed97d72caa66..ffb52725c2a8 100644
--- a/fs/xfs/xfs_stats.c
+++ b/fs/xfs/xfs_stats.c
@@ -115,10 +115,11 @@ void xfs_stats_clearall(struct xfsstats __percpu *stats)
static int xqm_proc_show(struct seq_file *m, void *v)
{
- /* maximum; incore; ratio free to inuse; freelist */
- seq_printf(m, "%d\t%d\t%d\t%u\n",
+ /* maximum; incore; ratio free to inuse; freelist; rtquota */
+ seq_printf(m, "%d\t%d\t%d\t%u\t%s\n",
0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT),
- 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT + 1));
+ 0, counter_val(xfsstats.xs_stats, XFSSTAT_END_XQMSTAT + 1),
+ IS_ENABLED(CONFIG_XFS_RT) ? "rtquota" : "quota");
return 0;
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index fbb3a1594c0d..394fdf3bb535 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -45,6 +45,7 @@
#include "xfs_rtbitmap.h"
#include "xfs_exchmaps_item.h"
#include "xfs_parent.h"
+#include "xfs_rtalloc.h"
#include "scrub/stats.h"
#include "scrub/rcbag_btree.h"
@@ -66,6 +67,9 @@ enum xfs_dax_mode {
XFS_DAX_NEVER = 2,
};
+/* Were quota mount options provided? Must use the upper 16 bits of qflags. */
+#define XFS_QFLAGS_MNTOPTS (1U << 31)
+
static void
xfs_mount_set_dax_mode(
struct xfs_mount *mp,
@@ -238,7 +242,7 @@ xfs_set_inode_alloc_perag(
xfs_ino_t ino,
xfs_agnumber_t max_metadata)
{
- if (!xfs_is_inode32(pag->pag_mount)) {
+ if (!xfs_is_inode32(pag_mount(pag))) {
set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
return false;
@@ -251,7 +255,7 @@ xfs_set_inode_alloc_perag(
}
set_bit(XFS_AGSTATE_ALLOWS_INODES, &pag->pag_opstate);
- if (pag->pag_agno < max_metadata)
+ if (pag_agno(pag) < max_metadata)
set_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
else
clear_bit(XFS_AGSTATE_PREFERS_METADATA, &pag->pag_opstate);
@@ -873,21 +877,21 @@ xfs_fs_statfs(
ffree = statp->f_files - (icount - ifree);
statp->f_ffree = max_t(int64_t, ffree, 0);
-
- if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
- ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
- (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
- xfs_qm_statvfs(ip, statp);
-
if (XFS_IS_REALTIME_MOUNT(mp) &&
(ip->i_diflags & (XFS_DIFLAG_RTINHERIT | XFS_DIFLAG_REALTIME))) {
s64 freertx;
statp->f_blocks = sbp->sb_rblocks;
freertx = percpu_counter_sum_positive(&mp->m_frextents);
- statp->f_bavail = statp->f_bfree = xfs_rtx_to_rtb(mp, freertx);
+ statp->f_bavail = statp->f_bfree =
+ xfs_rtbxlen_to_blen(mp, freertx);
}
+ if ((ip->i_diflags & XFS_DIFLAG_PROJINHERIT) &&
+ ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))) ==
+ (XFS_PQUOTA_ACCT|XFS_PQUOTA_ENFD))
+ xfs_qm_statvfs(ip, statp);
+
return 0;
}
@@ -1144,6 +1148,7 @@ xfs_fs_put_super(
xfs_filestream_unmount(mp);
xfs_unmountfs(mp);
+ xfs_rtmount_freesb(mp);
xfs_freesb(mp);
xchk_mount_stats_free(mp);
free_percpu(mp->m_stats.xs_stats);
@@ -1261,6 +1266,8 @@ xfs_fs_parse_param(
int size = 0;
int opt;
+ BUILD_BUG_ON(XFS_QFLAGS_MNTOPTS & XFS_MOUNT_QUOTA_ALL);
+
opt = fs_parse(fc, xfs_fs_parameters, param, &result);
if (opt < 0)
return opt;
@@ -1338,32 +1345,39 @@ xfs_fs_parse_param(
case Opt_noquota:
parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_quota:
case Opt_uquota:
case Opt_usrquota:
parsing_mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ENFD);
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_qnoenforce:
case Opt_uqnoenforce:
parsing_mp->m_qflags |= XFS_UQUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_UQUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_pquota:
case Opt_prjquota:
parsing_mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ENFD);
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_pqnoenforce:
parsing_mp->m_qflags |= XFS_PQUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_PQUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_gquota:
case Opt_grpquota:
parsing_mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ENFD);
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_gqnoenforce:
parsing_mp->m_qflags |= XFS_GQUOTA_ACCT;
parsing_mp->m_qflags &= ~XFS_GQUOTA_ENFD;
+ parsing_mp->m_qflags |= XFS_QFLAGS_MNTOPTS;
return 0;
case Opt_discard:
parsing_mp->m_features |= XFS_FEAT_DISCARD;
@@ -1430,7 +1444,8 @@ xfs_fs_validate_params(
return -EINVAL;
}
- if (!IS_ENABLED(CONFIG_XFS_QUOTA) && mp->m_qflags != 0) {
+ if (!IS_ENABLED(CONFIG_XFS_QUOTA) &&
+ (mp->m_qflags & ~XFS_QFLAGS_MNTOPTS)) {
xfs_warn(mp, "quota support not available in this kernel.");
return -EINVAL;
}
@@ -1657,9 +1672,7 @@ xfs_fs_fill_super(
goto out_free_sb;
}
- xfs_warn(mp,
-"EXPERIMENTAL: V5 Filesystem with Large Block Size (%d bytes) enabled.",
- mp->m_sb.sb_blocksize);
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS);
}
/* Ensure this filesystem fits in the page cache limits */
@@ -1691,10 +1704,14 @@ xfs_fs_fill_super(
goto out_free_sb;
}
- error = xfs_filestream_mount(mp);
+ error = xfs_rtmount_readsb(mp);
if (error)
goto out_free_sb;
+ error = xfs_filestream_mount(mp);
+ if (error)
+ goto out_free_rtsb;
+
/*
* we must configure the block size in the superblock before we run the
* full mount process as the mount process can lookup and cache inodes.
@@ -1733,6 +1750,9 @@ xfs_fs_fill_super(
mp->m_features &= ~XFS_FEAT_DISCARD;
}
+ if (xfs_has_metadir(mp))
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR);
+
if (xfs_has_reflink(mp)) {
if (mp->m_sb.sb_rblocks) {
xfs_alert(mp,
@@ -1755,12 +1775,18 @@ xfs_fs_fill_super(
}
if (xfs_has_exchange_range(mp))
- xfs_warn(mp,
- "EXPERIMENTAL exchange-range feature enabled. Use at your own risk!");
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_EXCHRANGE);
if (xfs_has_parent(mp))
- xfs_warn(mp,
- "EXPERIMENTAL parent pointer feature enabled. Use at your own risk!");
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PPTR);
+
+ /*
+ * If no quota mount options were provided, maybe we'll try to pick
+ * up the quota accounting and enforcement flags from the ondisk sb.
+ */
+ if (!(mp->m_qflags & XFS_QFLAGS_MNTOPTS))
+ xfs_set_resuming_quotaon(mp);
+ mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
error = xfs_mountfs(mp);
if (error)
@@ -1781,6 +1807,8 @@ xfs_fs_fill_super(
out_filestream_unmount:
xfs_filestream_unmount(mp);
+ out_free_rtsb:
+ xfs_rtmount_freesb(mp);
out_free_sb:
xfs_freesb(mp);
out_free_scrub_stats:
@@ -1800,7 +1828,7 @@ xfs_fs_fill_super(
out_unmount:
xfs_filestream_unmount(mp);
xfs_unmountfs(mp);
- goto out_free_sb;
+ goto out_free_rtsb;
}
static int
@@ -1946,6 +1974,8 @@ xfs_fs_reconfigure(
int flags = fc->sb_flags;
int error;
+ new_mp->m_qflags &= ~XFS_QFLAGS_MNTOPTS;
+
/* version 5 superblocks always support version counters. */
if (xfs_has_crc(mp))
fc->sb_flags |= SB_I_VERSION;
@@ -2011,17 +2041,20 @@ static const struct fs_context_operations xfs_context_ops = {
* mount option parsing having already been performed as this can be called from
* fsopen() before any parameters have been set.
*/
-static int xfs_init_fs_context(
+static int
+xfs_init_fs_context(
struct fs_context *fc)
{
struct xfs_mount *mp;
+ int i;
mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL | __GFP_NOFAIL);
if (!mp)
return -ENOMEM;
spin_lock_init(&mp->m_sb_lock);
- xa_init(&mp->m_perags);
+ for (i = 0; i < XG_TYPE_MAX; i++)
+ xa_init(&mp->m_groups[i].xa);
mutex_init(&mp->m_growlock);
INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
@@ -2063,7 +2096,7 @@ static struct file_system_type xfs_fs_type = {
.init_fs_context = xfs_init_fs_context,
.parameters = xfs_fs_parameters,
.kill_sb = xfs_kill_sb,
- .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_MGTIME,
};
MODULE_ALIAS_FS("xfs");
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 2af9f274e872..8f530e69c18a 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -11,6 +11,7 @@
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
+#include "xfs_group.h"
#include "xfs_defer.h"
#include "xfs_da_format.h"
#include "xfs_inode.h"
@@ -32,6 +33,7 @@
#include "xfs_fsmap.h"
#include "xfs_btree_staging.h"
#include "xfs_icache.h"
+#include "xfs_iunlink_item.h"
#include "xfs_ag.h"
#include "xfs_ag_resv.h"
#include "xfs_error.h"
@@ -44,6 +46,9 @@
#include "xfs_parent.h"
#include "xfs_rmap.h"
#include "xfs_refcount.h"
+#include "xfs_metafile.h"
+#include "xfs_metadir.h"
+#include "xfs_rtgroup.h"
/*
* We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index fcb2bad4f76e..7b16cdd72e9d 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -72,8 +72,11 @@ struct xfs_btree_cur;
struct xfs_defer_op_type;
struct xfs_refcount_irec;
struct xfs_fsmap;
+struct xfs_fsmap_irec;
+struct xfs_group;
struct xfs_rmap_irec;
struct xfs_icreate_log;
+struct xfs_iunlink_item;
struct xfs_owner_info;
struct xfs_trans_res;
struct xfs_inobt_rec_incore;
@@ -93,6 +96,8 @@ struct xfs_attrlist_cursor_kern;
struct xfs_extent_free_item;
struct xfs_rmap_intent;
struct xfs_refcount_intent;
+struct xfs_metadir_update;
+struct xfs_rtgroup;
#define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \
@@ -181,7 +186,7 @@ TRACE_EVENT(xlog_intent_recovery_failed,
);
DECLARE_EVENT_CLASS(xfs_perag_class,
- TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip),
+ TP_PROTO(const struct xfs_perag *pag, unsigned long caller_ip),
TP_ARGS(pag, caller_ip),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -191,10 +196,11 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
__field(unsigned long, caller_ip)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
- __entry->refcount = atomic_read(&pag->pag_ref);
- __entry->active_refcount = atomic_read(&pag->pag_active_ref);
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
+ __entry->refcount = atomic_read(&pag->pag_group.xg_ref);
+ __entry->active_refcount =
+ atomic_read(&pag->pag_group.xg_active_ref);
__entry->caller_ip = caller_ip;
),
TP_printk("dev %d:%d agno 0x%x passive refs %d active refs %d caller %pS",
@@ -207,18 +213,54 @@ DECLARE_EVENT_CLASS(xfs_perag_class,
#define DEFINE_PERAG_REF_EVENT(name) \
DEFINE_EVENT(xfs_perag_class, name, \
- TP_PROTO(struct xfs_perag *pag, unsigned long caller_ip), \
+ TP_PROTO(const struct xfs_perag *pag, unsigned long caller_ip), \
TP_ARGS(pag, caller_ip))
-DEFINE_PERAG_REF_EVENT(xfs_perag_get);
-DEFINE_PERAG_REF_EVENT(xfs_perag_hold);
-DEFINE_PERAG_REF_EVENT(xfs_perag_put);
-DEFINE_PERAG_REF_EVENT(xfs_perag_grab);
-DEFINE_PERAG_REF_EVENT(xfs_perag_grab_next_tag);
-DEFINE_PERAG_REF_EVENT(xfs_perag_rele);
DEFINE_PERAG_REF_EVENT(xfs_perag_set_inode_tag);
DEFINE_PERAG_REF_EVENT(xfs_perag_clear_inode_tag);
DEFINE_PERAG_REF_EVENT(xfs_reclaim_inodes_count);
+TRACE_DEFINE_ENUM(XG_TYPE_AG);
+TRACE_DEFINE_ENUM(XG_TYPE_RTG);
+
+DECLARE_EVENT_CLASS(xfs_group_class,
+ TP_PROTO(struct xfs_group *xg, unsigned long caller_ip),
+ TP_ARGS(xg, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(enum xfs_group_type, type)
+ __field(xfs_agnumber_t, agno)
+ __field(int, refcount)
+ __field(int, active_refcount)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->type = xg->xg_type;
+ __entry->agno = xg->xg_gno;
+ __entry->refcount = atomic_read(&xg->xg_ref);
+ __entry->active_refcount = atomic_read(&xg->xg_active_ref);
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d %sno 0x%x passive refs %d active refs %d caller %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
+ __entry->agno,
+ __entry->refcount,
+ __entry->active_refcount,
+ (char *)__entry->caller_ip)
+);
+
+#define DEFINE_GROUP_REF_EVENT(name) \
+DEFINE_EVENT(xfs_group_class, name, \
+ TP_PROTO(struct xfs_group *xg, unsigned long caller_ip), \
+ TP_ARGS(xg, caller_ip))
+DEFINE_GROUP_REF_EVENT(xfs_group_get);
+DEFINE_GROUP_REF_EVENT(xfs_group_hold);
+DEFINE_GROUP_REF_EVENT(xfs_group_put);
+DEFINE_GROUP_REF_EVENT(xfs_group_grab);
+DEFINE_GROUP_REF_EVENT(xfs_group_grab_next_tag);
+DEFINE_GROUP_REF_EVENT(xfs_group_rele);
+
TRACE_EVENT(xfs_inodegc_worker,
TP_PROTO(struct xfs_mount *mp, unsigned int shrinker_hits),
TP_ARGS(mp, shrinker_hits),
@@ -299,15 +341,15 @@ TRACE_EVENT(xfs_inodegc_shrinker_scan,
);
DECLARE_EVENT_CLASS(xfs_ag_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno),
- TP_ARGS(mp, agno),
+ TP_PROTO(const struct xfs_perag *pag),
+ TP_ARGS(pag),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
),
TP_printk("dev %d:%d agno 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -315,8 +357,8 @@ DECLARE_EVENT_CLASS(xfs_ag_class,
);
#define DEFINE_AG_EVENT(name) \
DEFINE_EVENT(xfs_ag_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), \
- TP_ARGS(mp, agno))
+ TP_PROTO(const struct xfs_perag *pag), \
+ TP_ARGS(pag))
DEFINE_AG_EVENT(xfs_read_agf);
DEFINE_AG_EVENT(xfs_alloc_read_agf);
@@ -662,7 +704,7 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
DECLARE_EVENT_CLASS(xfs_filestream_class,
- TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino),
+ TP_PROTO(const struct xfs_perag *pag, xfs_ino_t ino),
TP_ARGS(pag, ino),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -671,9 +713,9 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
__field(int, streams)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
__entry->ino = ino;
- __entry->agno = pag->pag_agno;
+ __entry->agno = pag_agno(pag);
__entry->streams = atomic_read(&pag->pagf_fstrms);
),
TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d",
@@ -684,14 +726,14 @@ DECLARE_EVENT_CLASS(xfs_filestream_class,
)
#define DEFINE_FILESTREAM_EVENT(name) \
DEFINE_EVENT(xfs_filestream_class, name, \
- TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino), \
+ TP_PROTO(const struct xfs_perag *pag, xfs_ino_t ino), \
TP_ARGS(pag, ino))
DEFINE_FILESTREAM_EVENT(xfs_filestream_free);
DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
TRACE_EVENT(xfs_filestream_pick,
- TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino),
+ TP_PROTO(const struct xfs_perag *pag, xfs_ino_t ino),
TP_ARGS(pag, ino),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -701,9 +743,9 @@ TRACE_EVENT(xfs_filestream_pick,
__field(xfs_extlen_t, free)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
__entry->ino = ino;
- __entry->agno = pag->pag_agno;
+ __entry->agno = pag_agno(pag);
__entry->streams = atomic_read(&pag->pagf_fstrms);
__entry->free = pag->pagf_freeblks;
),
@@ -822,28 +864,32 @@ DEFINE_INODE_EVENT(xfs_inode_inactivating);
TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_SHARED);
TRACE_DEFINE_ENUM(XFS_REFC_DOMAIN_COW);
-TRACE_EVENT(xfs_filemap_fault,
- TP_PROTO(struct xfs_inode *ip, unsigned int order, bool write_fault),
- TP_ARGS(ip, order, write_fault),
+DECLARE_EVENT_CLASS(xfs_fault_class,
+ TP_PROTO(struct xfs_inode *ip, unsigned int order),
+ TP_ARGS(ip, order),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
__field(unsigned int, order)
- __field(bool, write_fault)
),
TP_fast_assign(
__entry->dev = VFS_I(ip)->i_sb->s_dev;
__entry->ino = ip->i_ino;
__entry->order = order;
- __entry->write_fault = write_fault;
),
- TP_printk("dev %d:%d ino 0x%llx order %u write_fault %d",
+ TP_printk("dev %d:%d ino 0x%llx order %u",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
- __entry->order,
- __entry->write_fault)
+ __entry->order)
)
+#define DEFINE_FAULT_EVENT(name) \
+DEFINE_EVENT(xfs_fault_class, name, \
+ TP_PROTO(struct xfs_inode *ip, unsigned int order), \
+ TP_ARGS(ip, order))
+DEFINE_FAULT_EVENT(xfs_read_fault);
+DEFINE_FAULT_EVENT(xfs_write_fault);
+
DECLARE_EVENT_CLASS(xfs_iref_class,
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
TP_ARGS(ip, caller_ip),
@@ -894,9 +940,10 @@ TRACE_EVENT(xfs_iomap_prealloc_size,
)
TRACE_EVENT(xfs_irec_merge_pre,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
- uint16_t holemask, xfs_agino_t nagino, uint16_t nholemask),
- TP_ARGS(mp, agno, agino, holemask, nagino, nholemask),
+ TP_PROTO(const struct xfs_perag *pag,
+ const struct xfs_inobt_rec_incore *rec,
+ const struct xfs_inobt_rec_incore *nrec),
+ TP_ARGS(pag, rec, nrec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -906,12 +953,12 @@ TRACE_EVENT(xfs_irec_merge_pre,
__field(uint16_t, nholemask)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->agino = agino;
- __entry->holemask = holemask;
- __entry->nagino = nagino;
- __entry->nholemask = holemask;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
+ __entry->agino = rec->ir_startino;
+ __entry->holemask = rec->ir_holemask;
+ __entry->nagino = nrec->ir_startino;
+ __entry->nholemask = nrec->ir_holemask;
),
TP_printk("dev %d:%d agno 0x%x agino 0x%x holemask 0x%x new_agino 0x%x new_holemask 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -923,9 +970,9 @@ TRACE_EVENT(xfs_irec_merge_pre,
)
TRACE_EVENT(xfs_irec_merge_post,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
- uint16_t holemask),
- TP_ARGS(mp, agno, agino, holemask),
+ TP_PROTO(const struct xfs_perag *pag,
+ const struct xfs_inobt_rec_incore *nrec),
+ TP_ARGS(pag, nrec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -933,10 +980,10 @@ TRACE_EVENT(xfs_irec_merge_post,
__field(uint16_t, holemask)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->agino = agino;
- __entry->holemask = holemask;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
+ __entry->agino = nrec->ir_startino;
+ __entry->holemask = nrec->ir_holemask;
),
TP_printk("dev %d:%d agno 0x%x agino 0x%x holemask 0x%x",
MAJOR(__entry->dev),
@@ -1634,44 +1681,48 @@ TRACE_EVENT(xfs_bunmap,
);
DECLARE_EVENT_CLASS(xfs_extent_busy_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t len),
- TP_ARGS(mp, agno, agbno, len),
+ TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno,
+ xfs_extlen_t len),
+ TP_ARGS(xg, agbno, len),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->type = xg->xg_type;
+ __entry->agno = xg->xg_gno;
__entry->agbno = agbno;
__entry->len = len;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x",
+ TP_printk("dev %d:%d %sno 0x%x %sbno 0x%x fsbcount 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agbno,
__entry->len)
);
#define DEFINE_BUSY_EVENT(name) \
DEFINE_EVENT(xfs_extent_busy_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- xfs_agblock_t agbno, xfs_extlen_t len), \
- TP_ARGS(mp, agno, agbno, len))
+ TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, \
+ xfs_extlen_t len), \
+ TP_ARGS(xg, agbno, len))
DEFINE_BUSY_EVENT(xfs_extent_busy);
DEFINE_BUSY_EVENT(xfs_extent_busy_force);
DEFINE_BUSY_EVENT(xfs_extent_busy_reuse);
DEFINE_BUSY_EVENT(xfs_extent_busy_clear);
TRACE_EVENT(xfs_extent_busy_trim,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t len,
- xfs_agblock_t tbno, xfs_extlen_t tlen),
- TP_ARGS(mp, agno, agbno, len, tbno, tlen),
+ TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno,
+ xfs_extlen_t len, xfs_agblock_t tbno, xfs_extlen_t tlen),
+ TP_ARGS(xg, agbno, len, tbno, tlen),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
@@ -1679,22 +1730,99 @@ TRACE_EVENT(xfs_extent_busy_trim,
__field(xfs_extlen_t, tlen)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->type = xg->xg_type;
+ __entry->agno = xg->xg_gno;
__entry->agbno = agbno;
__entry->len = len;
__entry->tbno = tbno;
__entry->tlen = tlen;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x found_agbno 0x%x found_fsbcount 0x%x",
+ TP_printk("dev %d:%d %sno 0x%x %sbno 0x%x fsbcount 0x%x found_agbno 0x%x found_fsbcount 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agbno,
__entry->len,
__entry->tbno,
__entry->tlen)
);
+#ifdef CONFIG_XFS_RT
+TRACE_EVENT(xfs_rtalloc_extent_busy,
+ TP_PROTO(struct xfs_rtgroup *rtg, xfs_rtxnum_t start,
+ xfs_rtxlen_t minlen, xfs_rtxlen_t maxlen,
+ xfs_rtxlen_t len, xfs_rtxlen_t prod, xfs_rtxnum_t rtx,
+ unsigned busy_gen),
+ TP_ARGS(rtg, start, minlen, maxlen, len, prod, rtx, busy_gen),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_rgnumber_t, rgno)
+ __field(xfs_rtxnum_t, start)
+ __field(xfs_rtxlen_t, minlen)
+ __field(xfs_rtxlen_t, maxlen)
+ __field(xfs_rtxlen_t, mod)
+ __field(xfs_rtxlen_t, prod)
+ __field(xfs_rtxlen_t, len)
+ __field(xfs_rtxnum_t, rtx)
+ __field(unsigned, busy_gen)
+ ),
+ TP_fast_assign(
+ __entry->dev = rtg_mount(rtg)->m_super->s_dev;
+ __entry->rgno = rtg_rgno(rtg);
+ __entry->start = start;
+ __entry->minlen = minlen;
+ __entry->maxlen = maxlen;
+ __entry->prod = prod;
+ __entry->len = len;
+ __entry->rtx = rtx;
+ __entry->busy_gen = busy_gen;
+ ),
+ TP_printk("dev %d:%d rgno 0x%x startrtx 0x%llx minlen %u maxlen %u "
+ "prod %u len %u rtx 0%llx busy_gen 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rgno,
+ __entry->start,
+ __entry->minlen,
+ __entry->maxlen,
+ __entry->prod,
+ __entry->len,
+ __entry->rtx,
+ __entry->busy_gen)
+)
+
+TRACE_EVENT(xfs_rtalloc_extent_busy_trim,
+ TP_PROTO(struct xfs_rtgroup *rtg, xfs_rtxnum_t old_rtx,
+ xfs_rtxlen_t old_len, xfs_rtxnum_t new_rtx,
+ xfs_rtxlen_t new_len),
+ TP_ARGS(rtg, old_rtx, old_len, new_rtx, new_len),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_rgnumber_t, rgno)
+ __field(xfs_rtxnum_t, old_rtx)
+ __field(xfs_rtxnum_t, new_rtx)
+ __field(xfs_rtxlen_t, old_len)
+ __field(xfs_rtxlen_t, new_len)
+ ),
+ TP_fast_assign(
+ __entry->dev = rtg_mount(rtg)->m_super->s_dev;
+ __entry->rgno = rtg_rgno(rtg);
+ __entry->old_rtx = old_rtx;
+ __entry->old_len = old_len;
+ __entry->new_rtx = new_rtx;
+ __entry->new_len = new_len;
+ ),
+ TP_printk("dev %d:%d rgno 0x%x rtx 0x%llx rtxcount 0x%x -> rtx 0x%llx rtxcount 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->rgno,
+ __entry->old_rtx,
+ __entry->old_len,
+ __entry->new_rtx,
+ __entry->new_len)
+);
+#endif /* CONFIG_XFS_RT */
+
DECLARE_EVENT_CLASS(xfs_agf_class,
TP_PROTO(struct xfs_mount *mp, struct xfs_agf *agf, int flags,
unsigned long caller_ip),
@@ -1758,10 +1886,10 @@ DEFINE_AGF_EVENT(xfs_agf);
DEFINE_AGF_EVENT(xfs_agfl_reset);
TRACE_EVENT(xfs_free_extent,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
+ TP_PROTO(const struct xfs_perag *pag, xfs_agblock_t agbno,
xfs_extlen_t len, enum xfs_ag_resv_type resv, int haveleft,
int haveright),
- TP_ARGS(mp, agno, agbno, len, resv, haveleft, haveright),
+ TP_ARGS(pag, agbno, len, resv, haveleft, haveright),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -1772,8 +1900,8 @@ TRACE_EVENT(xfs_free_extent,
__field(int, haveright)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->agbno = agbno;
__entry->len = len;
__entry->resv = resv;
@@ -2426,23 +2554,26 @@ DEFINE_LOG_RECOVER_ICREATE_ITEM(xfs_log_recover_icreate_cancel);
DEFINE_LOG_RECOVER_ICREATE_ITEM(xfs_log_recover_icreate_recover);
DECLARE_EVENT_CLASS(xfs_discard_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agblock_t agbno, xfs_extlen_t len),
- TP_ARGS(mp, agno, agbno, len),
+ TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno,
+ xfs_extlen_t len),
+ TP_ARGS(xg, agbno, len),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->type = xg->xg_type;
+ __entry->agno = xg->xg_gno;
__entry->agbno = agbno;
__entry->len = len;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x",
+ TP_printk("dev %d:%d %sno 0x%x gbno 0x%x fsbcount 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
__entry->agbno,
__entry->len)
@@ -2450,9 +2581,9 @@ DECLARE_EVENT_CLASS(xfs_discard_class,
#define DEFINE_DISCARD_EVENT(name) \
DEFINE_EVENT(xfs_discard_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- xfs_agblock_t agbno, xfs_extlen_t len), \
- TP_ARGS(mp, agno, agbno, len))
+ TP_PROTO(const struct xfs_group *xg, xfs_agblock_t agbno, \
+ xfs_extlen_t len), \
+ TP_ARGS(xg, agbno, len))
DEFINE_DISCARD_EVENT(xfs_discard_extent);
DEFINE_DISCARD_EVENT(xfs_discard_toosmall);
DEFINE_DISCARD_EVENT(xfs_discard_exclude);
@@ -2542,7 +2673,7 @@ TRACE_EVENT(xfs_btree_alloc_block,
__entry->ino = cur->bc_ino.ip->i_ino;
break;
case XFS_BTREE_TYPE_AG:
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->ino = 0;
break;
case XFS_BTREE_TYPE_MEM:
@@ -2712,6 +2843,7 @@ DECLARE_EVENT_CLASS(xfs_free_extent_deferred_class,
TP_ARGS(mp, free),
TP_STRUCT__entry(
__field(dev_t, dev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
@@ -2719,13 +2851,16 @@ DECLARE_EVENT_CLASS(xfs_free_extent_deferred_class,
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
- __entry->agno = XFS_FSB_TO_AGNO(mp, free->xefi_startblock);
- __entry->agbno = XFS_FSB_TO_AGBNO(mp, free->xefi_startblock);
+ __entry->type = free->xefi_group->xg_type;
+ __entry->agno = free->xefi_group->xg_gno;
+ __entry->agbno = xfs_fsb_to_gbno(mp, free->xefi_startblock,
+ free->xefi_group->xg_type);
__entry->len = free->xefi_blockcount;
__entry->flags = free->xefi_flags;
),
- TP_printk("dev %d:%d agno 0x%x agbno 0x%x fsbcount 0x%x flags 0x%x",
+ TP_printk("dev %d:%d %sno 0x%x gbno 0x%x fsbcount 0x%x flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
__entry->agbno,
__entry->len,
@@ -2735,7 +2870,6 @@ DECLARE_EVENT_CLASS(xfs_free_extent_deferred_class,
DEFINE_EVENT(xfs_free_extent_deferred_class, name, \
TP_PROTO(struct xfs_mount *mp, struct xfs_extent_free_item *free), \
TP_ARGS(mp, free))
-DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_defer);
DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_agfl_free_deferred);
DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_defer);
DEFINE_FREE_EXTENT_DEFERRED_EVENT(xfs_extent_free_deferred);
@@ -2798,7 +2932,7 @@ DECLARE_EVENT_CLASS(xfs_rmap_class,
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->agbno = agbno;
__entry->len = len;
__entry->owner = oinfo->oi_owner;
@@ -2843,7 +2977,7 @@ DECLARE_EVENT_CLASS(xfs_btree_error_class,
__entry->ino = cur->bc_ino.ip->i_ino;
break;
case XFS_BTREE_TYPE_AG:
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->ino = 0;
break;
case XFS_BTREE_TYPE_MEM:
@@ -2897,7 +3031,7 @@ TRACE_EVENT(xfs_rmap_convert_state,
__entry->ino = cur->bc_ino.ip->i_ino;
break;
case XFS_BTREE_TYPE_AG:
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->ino = 0;
break;
case XFS_BTREE_TYPE_MEM:
@@ -2932,7 +3066,7 @@ DECLARE_EVENT_CLASS(xfs_rmapbt_class,
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->agbno = agbno;
__entry->len = len;
__entry->owner = owner;
@@ -3032,11 +3166,10 @@ DECLARE_EVENT_CLASS(xfs_bmap_deferred_class,
TP_ARGS(bi),
TP_STRUCT__entry(
__field(dev_t, dev)
- __field(dev_t, opdev)
+ __field(enum xfs_group_type, type)
__field(xfs_agnumber_t, agno)
__field(xfs_ino_t, ino)
- __field(xfs_agblock_t, agbno)
- __field(xfs_fsblock_t, rtbno)
+ __field(unsigned long long, gbno)
__field(int, whichfork)
__field(xfs_fileoff_t, l_loff)
__field(xfs_filblks_t, l_len)
@@ -3045,20 +3178,25 @@ DECLARE_EVENT_CLASS(xfs_bmap_deferred_class,
),
TP_fast_assign(
struct xfs_inode *ip = bi->bi_owner;
+ struct xfs_mount *mp = ip->i_mount;
- __entry->dev = ip->i_mount->m_super->s_dev;
- if (xfs_ifork_is_realtime(ip, bi->bi_whichfork)) {
- __entry->agno = 0;
- __entry->agbno = 0;
- __entry->rtbno = bi->bi_bmap.br_startblock;
- __entry->opdev = ip->i_mount->m_rtdev_targp->bt_dev;
+ __entry->dev = mp->m_super->s_dev;
+ __entry->type = bi->bi_group->xg_type;
+ __entry->agno = bi->bi_group->xg_gno;
+ if (bi->bi_group->xg_type == XG_TYPE_RTG &&
+ !xfs_has_rtgroups(mp)) {
+ /*
+ * Legacy rt filesystems do not have allocation groups
+ * ondisk. We emulate this incore with one gigantic
+ * rtgroup whose size can exceed a 32-bit block number.
+ * For this tracepoint, we report group 0 and a 64-bit
+ * group block number.
+ */
+ __entry->gbno = bi->bi_bmap.br_startblock;
} else {
- __entry->agno = XFS_FSB_TO_AGNO(ip->i_mount,
- bi->bi_bmap.br_startblock);
- __entry->agbno = XFS_FSB_TO_AGBNO(ip->i_mount,
- bi->bi_bmap.br_startblock);
- __entry->rtbno = 0;
- __entry->opdev = __entry->dev;
+ __entry->gbno = xfs_fsb_to_gbno(mp,
+ bi->bi_bmap.br_startblock,
+ bi->bi_group->xg_type);
}
__entry->ino = ip->i_ino;
__entry->whichfork = bi->bi_whichfork;
@@ -3067,14 +3205,13 @@ DECLARE_EVENT_CLASS(xfs_bmap_deferred_class,
__entry->l_state = bi->bi_bmap.br_state;
__entry->op = bi->bi_type;
),
- TP_printk("dev %d:%d op %s opdev %d:%d ino 0x%llx agno 0x%x agbno 0x%x rtbno 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d",
+ TP_printk("dev %d:%d op %s ino 0x%llx %sno 0x%x gbno 0x%llx %s fileoff 0x%llx fsbcount 0x%llx state %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__print_symbolic(__entry->op, XFS_BMAP_INTENT_STRINGS),
- MAJOR(__entry->opdev), MINOR(__entry->opdev),
__entry->ino,
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
__entry->agno,
- __entry->agbno,
- __entry->rtbno,
+ __entry->gbno,
__print_symbolic(__entry->whichfork, XFS_WHICHFORK_STRINGS),
__entry->l_loff,
__entry->l_len,
@@ -3105,8 +3242,8 @@ DECLARE_EVENT_CLASS(xfs_ag_resv_class,
TP_fast_assign(
struct xfs_ag_resv *r = xfs_perag_resv(pag, resv);
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->resv = resv;
__entry->freeblks = pag->pagf_freeblks;
__entry->flcount = pag->pagf_flcount;
@@ -3139,11 +3276,10 @@ DEFINE_AG_RESV_EVENT(xfs_ag_resv_free_extent);
DEFINE_AG_RESV_EVENT(xfs_ag_resv_critical);
DEFINE_AG_RESV_EVENT(xfs_ag_resv_needed);
-/* simple AG-based error/%ip tracepoint class */
-DECLARE_EVENT_CLASS(xfs_ag_error_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error,
+TRACE_EVENT(xfs_ag_resv_init_error,
+ TP_PROTO(const struct xfs_perag *pag, int error,
unsigned long caller_ip),
- TP_ARGS(mp, agno, error, caller_ip),
+ TP_ARGS(pag, error, caller_ip),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -3151,8 +3287,8 @@ DECLARE_EVENT_CLASS(xfs_ag_error_class,
__field(unsigned long, caller_ip)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->error = error;
__entry->caller_ip = caller_ip;
),
@@ -3163,13 +3299,6 @@ DECLARE_EVENT_CLASS(xfs_ag_error_class,
(char *)__entry->caller_ip)
);
-#define DEFINE_AG_ERROR_EVENT(name) \
-DEFINE_EVENT(xfs_ag_error_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, \
- unsigned long caller_ip), \
- TP_ARGS(mp, agno, error, caller_ip))
-DEFINE_AG_ERROR_EVENT(xfs_ag_resv_init_error);
-
/* refcount tracepoint classes */
DECLARE_EVENT_CLASS(xfs_refcount_class,
@@ -3184,7 +3313,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_class,
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->agbno = agbno;
__entry->len = len;
),
@@ -3215,7 +3344,7 @@ TRACE_EVENT(xfs_refcount_lookup,
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->agbno = agbno;
__entry->dir = dir;
),
@@ -3241,7 +3370,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_class,
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->domain = irec->rc_domain;
__entry->startblock = irec->rc_startblock;
__entry->blockcount = irec->rc_blockcount;
@@ -3277,7 +3406,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_extent_at_class,
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->domain = irec->rc_domain;
__entry->startblock = irec->rc_startblock;
__entry->blockcount = irec->rc_blockcount;
@@ -3319,7 +3448,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_class,
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
@@ -3369,7 +3498,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_double_extent_at_class,
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
@@ -3424,7 +3553,7 @@ DECLARE_EVENT_CLASS(xfs_refcount_triple_extent_class,
),
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->i1_domain = i1->rc_domain;
__entry->i1_startblock = i1->rc_startblock;
__entry->i1_blockcount = i1->rc_blockcount;
@@ -3838,7 +3967,45 @@ DEFINE_INODE_IREC_EVENT(xfs_swap_extent_rmap_remap_piece);
DEFINE_INODE_ERROR_EVENT(xfs_swap_extent_rmap_error);
/* fsmap traces */
-DECLARE_EVENT_CLASS(xfs_fsmap_class,
+TRACE_EVENT(xfs_fsmap_mapping,
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno,
+ const struct xfs_fsmap_irec *frec),
+ TP_ARGS(mp, keydev, agno, frec),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(dev_t, keydev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, agbno)
+ __field(xfs_daddr_t, start_daddr)
+ __field(xfs_daddr_t, len_daddr)
+ __field(uint64_t, owner)
+ __field(uint64_t, offset)
+ __field(unsigned int, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = mp->m_super->s_dev;
+ __entry->keydev = new_decode_dev(keydev);
+ __entry->agno = agno;
+ __entry->agbno = frec->rec_key;
+ __entry->start_daddr = frec->start_daddr;
+ __entry->len_daddr = frec->len_daddr;
+ __entry->owner = frec->owner;
+ __entry->offset = frec->offset;
+ __entry->flags = frec->rm_flags;
+ ),
+ TP_printk("dev %d:%d keydev %d:%d agno 0x%x rmapbno 0x%x start_daddr 0x%llx len_daddr 0x%llx owner 0x%llx fileoff 0x%llx flags 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ MAJOR(__entry->keydev), MINOR(__entry->keydev),
+ __entry->agno,
+ __entry->agbno,
+ __entry->start_daddr,
+ __entry->len_daddr,
+ __entry->owner,
+ __entry->offset,
+ __entry->flags)
+);
+
+DECLARE_EVENT_CLASS(xfs_fsmap_group_key_class,
TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno,
const struct xfs_rmap_irec *rmap),
TP_ARGS(mp, keydev, agno, rmap),
@@ -3846,8 +4013,7 @@ DECLARE_EVENT_CLASS(xfs_fsmap_class,
__field(dev_t, dev)
__field(dev_t, keydev)
__field(xfs_agnumber_t, agno)
- __field(xfs_fsblock_t, bno)
- __field(xfs_filblks_t, len)
+ __field(xfs_agblock_t, agbno)
__field(uint64_t, owner)
__field(uint64_t, offset)
__field(unsigned int, flags)
@@ -3856,33 +4022,30 @@ DECLARE_EVENT_CLASS(xfs_fsmap_class,
__entry->dev = mp->m_super->s_dev;
__entry->keydev = new_decode_dev(keydev);
__entry->agno = agno;
- __entry->bno = rmap->rm_startblock;
- __entry->len = rmap->rm_blockcount;
+ __entry->agbno = rmap->rm_startblock;
__entry->owner = rmap->rm_owner;
__entry->offset = rmap->rm_offset;
__entry->flags = rmap->rm_flags;
),
- TP_printk("dev %d:%d keydev %d:%d agno 0x%x startblock 0x%llx fsbcount 0x%llx owner 0x%llx fileoff 0x%llx flags 0x%x",
+ TP_printk("dev %d:%d keydev %d:%d agno 0x%x startblock 0x%x owner 0x%llx fileoff 0x%llx flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
MAJOR(__entry->keydev), MINOR(__entry->keydev),
__entry->agno,
- __entry->bno,
- __entry->len,
+ __entry->agbno,
__entry->owner,
__entry->offset,
__entry->flags)
)
-#define DEFINE_FSMAP_EVENT(name) \
-DEFINE_EVENT(xfs_fsmap_class, name, \
+#define DEFINE_FSMAP_GROUP_KEY_EVENT(name) \
+DEFINE_EVENT(xfs_fsmap_group_key_class, name, \
TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_agnumber_t agno, \
const struct xfs_rmap_irec *rmap), \
TP_ARGS(mp, keydev, agno, rmap))
-DEFINE_FSMAP_EVENT(xfs_fsmap_low_key);
-DEFINE_FSMAP_EVENT(xfs_fsmap_high_key);
-DEFINE_FSMAP_EVENT(xfs_fsmap_mapping);
+DEFINE_FSMAP_GROUP_KEY_EVENT(xfs_fsmap_low_group_key);
+DEFINE_FSMAP_GROUP_KEY_EVENT(xfs_fsmap_high_group_key);
-DECLARE_EVENT_CLASS(xfs_fsmap_linear_class,
- TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno),
+DECLARE_EVENT_CLASS(xfs_fsmap_linear_key_class,
+ TP_PROTO(struct xfs_mount *mp, u32 keydev, xfs_fsblock_t bno),
TP_ARGS(mp, keydev, bno),
TP_STRUCT__entry(
__field(dev_t, dev)
@@ -3899,12 +4062,12 @@ DECLARE_EVENT_CLASS(xfs_fsmap_linear_class,
MAJOR(__entry->keydev), MINOR(__entry->keydev),
__entry->bno)
)
-#define DEFINE_FSMAP_LINEAR_EVENT(name) \
-DEFINE_EVENT(xfs_fsmap_linear_class, name, \
+#define DEFINE_FSMAP_LINEAR_KEY_EVENT(name) \
+DEFINE_EVENT(xfs_fsmap_linear_key_class, name, \
TP_PROTO(struct xfs_mount *mp, u32 keydev, uint64_t bno), \
TP_ARGS(mp, keydev, bno))
-DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_low_key_linear);
-DEFINE_FSMAP_LINEAR_EVENT(xfs_fsmap_high_key_linear);
+DEFINE_FSMAP_LINEAR_KEY_EVENT(xfs_fsmap_low_linear_key);
+DEFINE_FSMAP_LINEAR_KEY_EVENT(xfs_fsmap_high_linear_key);
DECLARE_EVENT_CLASS(xfs_getfsmap_class,
TP_PROTO(struct xfs_mount *mp, struct xfs_fsmap *fsmap),
@@ -4036,9 +4199,9 @@ DEFINE_TRANS_EVENT(xfs_trans_commit_items);
DEFINE_TRANS_EVENT(xfs_trans_free_items);
TRACE_EVENT(xfs_iunlink_update_bucket,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int bucket,
+ TP_PROTO(const struct xfs_perag *pag, unsigned int bucket,
xfs_agino_t old_ptr, xfs_agino_t new_ptr),
- TP_ARGS(mp, agno, bucket, old_ptr, new_ptr),
+ TP_ARGS(pag, bucket, old_ptr, new_ptr),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -4047,8 +4210,8 @@ TRACE_EVENT(xfs_iunlink_update_bucket,
__field(xfs_agino_t, new_ptr)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->bucket = bucket;
__entry->old_ptr = old_ptr;
__entry->new_ptr = new_ptr;
@@ -4062,9 +4225,8 @@ TRACE_EVENT(xfs_iunlink_update_bucket,
);
TRACE_EVENT(xfs_iunlink_update_dinode,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t agino,
- xfs_agino_t old_ptr, xfs_agino_t new_ptr),
- TP_ARGS(mp, agno, agino, old_ptr, new_ptr),
+ TP_PROTO(const struct xfs_iunlink_item *iup, xfs_agino_t old_ptr),
+ TP_ARGS(iup, old_ptr),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -4073,11 +4235,12 @@ TRACE_EVENT(xfs_iunlink_update_dinode,
__field(xfs_agino_t, new_ptr)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->agino = agino;
+ __entry->dev = pag_mount(iup->pag)->m_super->s_dev;
+ __entry->agno = pag_agno(iup->pag);
+ __entry->agino =
+ XFS_INO_TO_AGINO(iup->ip->i_mount, iup->ip->i_ino);
__entry->old_ptr = old_ptr;
- __entry->new_ptr = new_ptr;
+ __entry->new_ptr = iup->next_agino;
),
TP_printk("dev %d:%d agno 0x%x agino 0x%x old 0x%x new 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
@@ -4180,37 +4343,35 @@ DEFINE_FS_CORRUPT_EVENT(xfs_fs_mark_sick);
DEFINE_FS_CORRUPT_EVENT(xfs_fs_mark_corrupt);
DEFINE_FS_CORRUPT_EVENT(xfs_fs_mark_healthy);
DEFINE_FS_CORRUPT_EVENT(xfs_fs_unfixed_corruption);
-DEFINE_FS_CORRUPT_EVENT(xfs_rt_mark_sick);
-DEFINE_FS_CORRUPT_EVENT(xfs_rt_mark_corrupt);
-DEFINE_FS_CORRUPT_EVENT(xfs_rt_mark_healthy);
-DEFINE_FS_CORRUPT_EVENT(xfs_rt_unfixed_corruption);
-DECLARE_EVENT_CLASS(xfs_ag_corrupt_class,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, unsigned int flags),
- TP_ARGS(mp, agno, flags),
+DECLARE_EVENT_CLASS(xfs_group_corrupt_class,
+ TP_PROTO(const struct xfs_group *xg, unsigned int flags),
+ TP_ARGS(xg, flags),
TP_STRUCT__entry(
__field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
+ __field(enum xfs_group_type, type)
+ __field(uint32_t, index)
__field(unsigned int, flags)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->type = xg->xg_type;
+ __entry->index = xg->xg_gno;
__entry->flags = flags;
),
- TP_printk("dev %d:%d agno 0x%x flags 0x%x",
+ TP_printk("dev %d:%d %sno 0x%x flags 0x%x",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->agno, __entry->flags)
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
+ __entry->index, __entry->flags)
);
-#define DEFINE_AG_CORRUPT_EVENT(name) \
-DEFINE_EVENT(xfs_ag_corrupt_class, name, \
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \
- unsigned int flags), \
- TP_ARGS(mp, agno, flags))
-DEFINE_AG_CORRUPT_EVENT(xfs_ag_mark_sick);
-DEFINE_AG_CORRUPT_EVENT(xfs_ag_mark_corrupt);
-DEFINE_AG_CORRUPT_EVENT(xfs_ag_mark_healthy);
-DEFINE_AG_CORRUPT_EVENT(xfs_ag_unfixed_corruption);
+#define DEFINE_GROUP_CORRUPT_EVENT(name) \
+DEFINE_EVENT(xfs_group_corrupt_class, name, \
+ TP_PROTO(const struct xfs_group *xg, unsigned int flags), \
+ TP_ARGS(xg, flags))
+DEFINE_GROUP_CORRUPT_EVENT(xfs_group_mark_sick);
+DEFINE_GROUP_CORRUPT_EVENT(xfs_group_mark_corrupt);
+DEFINE_GROUP_CORRUPT_EVENT(xfs_group_mark_healthy);
+DEFINE_GROUP_CORRUPT_EVENT(xfs_group_unfixed_corruption);
DECLARE_EVENT_CLASS(xfs_inode_corrupt_class,
TP_PROTO(struct xfs_inode *ip, unsigned int flags),
@@ -4238,29 +4399,10 @@ DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_corrupt);
DEFINE_INODE_CORRUPT_EVENT(xfs_inode_mark_healthy);
DEFINE_INODE_CORRUPT_EVENT(xfs_inode_unfixed_corruption);
-TRACE_EVENT(xfs_iwalk_ag,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
- xfs_agino_t startino),
- TP_ARGS(mp, agno, startino),
- TP_STRUCT__entry(
- __field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
- __field(xfs_agino_t, startino)
- ),
- TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
- __entry->startino = startino;
- ),
- TP_printk("dev %d:%d agno 0x%x startino 0x%x",
- MAJOR(__entry->dev), MINOR(__entry->dev), __entry->agno,
- __entry->startino)
-)
-
TRACE_EVENT(xfs_iwalk_ag_rec,
- TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno,
+ TP_PROTO(const struct xfs_perag *pag, \
struct xfs_inobt_rec_incore *irec),
- TP_ARGS(mp, agno, irec),
+ TP_ARGS(pag, irec),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
@@ -4268,8 +4410,8 @@ TRACE_EVENT(xfs_iwalk_ag_rec,
__field(uint64_t, freemask)
),
TP_fast_assign(
- __entry->dev = mp->m_super->s_dev;
- __entry->agno = agno;
+ __entry->dev = pag_mount(pag)->m_super->s_dev;
+ __entry->agno = pag_agno(pag);
__entry->startino = irec->ir_startino;
__entry->freemask = irec->ir_free;
),
@@ -4331,7 +4473,7 @@ TRACE_EVENT(xfs_btree_commit_afakeroot,
TP_fast_assign(
__entry->dev = cur->bc_mp->m_super->s_dev;
__assign_str(name);
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->agbno = cur->bc_ag.afake->af_root;
__entry->levels = cur->bc_ag.afake->af_levels;
__entry->blocks = cur->bc_ag.afake->af_blocks;
@@ -4446,7 +4588,7 @@ TRACE_EVENT(xfs_btree_bload_block,
__entry->agno = XFS_FSB_TO_AGNO(cur->bc_mp, fsb);
__entry->agbno = XFS_FSB_TO_AGBNO(cur->bc_mp, fsb);
} else {
- __entry->agno = cur->bc_ag.pag->pag_agno;
+ __entry->agno = cur->bc_group->xg_gno;
__entry->agbno = be32_to_cpu(ptr->s);
}
__entry->nr_records = nr_records;
@@ -4671,35 +4813,39 @@ TRACE_EVENT(xfs_force_shutdown,
);
#ifdef CONFIG_XFS_DRAIN_INTENTS
-DECLARE_EVENT_CLASS(xfs_perag_intents_class,
- TP_PROTO(struct xfs_perag *pag, void *caller_ip),
- TP_ARGS(pag, caller_ip),
+DECLARE_EVENT_CLASS(xfs_group_intents_class,
+ TP_PROTO(const struct xfs_group *xg, void *caller_ip),
+ TP_ARGS(xg, caller_ip),
TP_STRUCT__entry(
__field(dev_t, dev)
- __field(xfs_agnumber_t, agno)
+ __field(enum xfs_group_type, type)
+ __field(uint32_t, index)
__field(long, nr_intents)
__field(void *, caller_ip)
),
TP_fast_assign(
- __entry->dev = pag->pag_mount->m_super->s_dev;
- __entry->agno = pag->pag_agno;
- __entry->nr_intents = atomic_read(&pag->pag_intents_drain.dr_count);
+ __entry->dev = xg->xg_mount->m_super->s_dev;
+ __entry->type = xg->xg_type;
+ __entry->index = xg->xg_gno;
+ __entry->nr_intents =
+ atomic_read(&xg->xg_intents_drain.dr_count);
__entry->caller_ip = caller_ip;
),
- TP_printk("dev %d:%d agno 0x%x intents %ld caller %pS",
+ TP_printk("dev %d:%d %sno 0x%x intents %ld caller %pS",
MAJOR(__entry->dev), MINOR(__entry->dev),
- __entry->agno,
+ __print_symbolic(__entry->type, XG_TYPE_STRINGS),
+ __entry->index,
__entry->nr_intents,
__entry->caller_ip)
);
-#define DEFINE_PERAG_INTENTS_EVENT(name) \
-DEFINE_EVENT(xfs_perag_intents_class, name, \
- TP_PROTO(struct xfs_perag *pag, void *caller_ip), \
- TP_ARGS(pag, caller_ip))
-DEFINE_PERAG_INTENTS_EVENT(xfs_perag_intent_hold);
-DEFINE_PERAG_INTENTS_EVENT(xfs_perag_intent_rele);
-DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents);
+#define DEFINE_GROUP_INTENTS_EVENT(name) \
+DEFINE_EVENT(xfs_group_intents_class, name, \
+ TP_PROTO(const struct xfs_group *xg, void *caller_ip), \
+ TP_ARGS(xg, caller_ip))
+DEFINE_GROUP_INTENTS_EVENT(xfs_group_intent_hold);
+DEFINE_GROUP_INTENTS_EVENT(xfs_group_intent_rele);
+DEFINE_GROUP_INTENTS_EVENT(xfs_group_wait_intents);
#endif /* CONFIG_XFS_DRAIN_INTENTS */
@@ -5327,6 +5473,107 @@ DEFINE_EVENT(xfs_getparents_class, name, \
DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_begin);
DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_end);
+DECLARE_EVENT_CLASS(xfs_metadir_update_class,
+ TP_PROTO(const struct xfs_metadir_update *upd),
+ TP_ARGS(upd),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, dp_ino)
+ __field(xfs_ino_t, ino)
+ __string(fname, upd->path)
+ ),
+ TP_fast_assign(
+ __entry->dev = upd->dp->i_mount->m_super->s_dev;
+ __entry->dp_ino = upd->dp->i_ino;
+ __entry->ino = upd->ip ? upd->ip->i_ino : NULLFSINO;
+ __assign_str(fname);
+ ),
+ TP_printk("dev %d:%d dp 0x%llx fname '%s' ino 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dp_ino,
+ __get_str(fname),
+ __entry->ino)
+)
+
+#define DEFINE_METADIR_UPDATE_EVENT(name) \
+DEFINE_EVENT(xfs_metadir_update_class, name, \
+ TP_PROTO(const struct xfs_metadir_update *upd), \
+ TP_ARGS(upd))
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_start_create);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_start_link);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_commit);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_cancel);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_try_create);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_create);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_link);
+
+DECLARE_EVENT_CLASS(xfs_metadir_update_error_class,
+ TP_PROTO(const struct xfs_metadir_update *upd, int error),
+ TP_ARGS(upd, error),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, dp_ino)
+ __field(xfs_ino_t, ino)
+ __field(int, error)
+ __string(fname, upd->path)
+ ),
+ TP_fast_assign(
+ __entry->dev = upd->dp->i_mount->m_super->s_dev;
+ __entry->dp_ino = upd->dp->i_ino;
+ __entry->ino = upd->ip ? upd->ip->i_ino : NULLFSINO;
+ __entry->error = error;
+ __assign_str(fname);
+ ),
+ TP_printk("dev %d:%d dp 0x%llx fname '%s' ino 0x%llx error %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dp_ino,
+ __get_str(fname),
+ __entry->ino,
+ __entry->error)
+)
+
+#define DEFINE_METADIR_UPDATE_ERROR_EVENT(name) \
+DEFINE_EVENT(xfs_metadir_update_error_class, name, \
+ TP_PROTO(const struct xfs_metadir_update *upd, int error), \
+ TP_ARGS(upd, error))
+DEFINE_METADIR_UPDATE_ERROR_EVENT(xfs_metadir_teardown);
+
+DECLARE_EVENT_CLASS(xfs_metadir_class,
+ TP_PROTO(struct xfs_inode *dp, struct xfs_name *name,
+ xfs_ino_t ino),
+ TP_ARGS(dp, name, ino),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, dp_ino)
+ __field(xfs_ino_t, ino)
+ __field(int, ftype)
+ __field(int, namelen)
+ __dynamic_array(char, name, name->len)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(dp)->i_sb->s_dev;
+ __entry->dp_ino = dp->i_ino;
+ __entry->ino = ino,
+ __entry->ftype = name->type;
+ __entry->namelen = name->len;
+ memcpy(__get_str(name), name->name, name->len);
+ ),
+ TP_printk("dev %d:%d dir 0x%llx type %s name '%.*s' ino 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dp_ino,
+ __print_symbolic(__entry->ftype, XFS_DIR3_FTYPE_STR),
+ __entry->namelen,
+ __get_str(name),
+ __entry->ino)
+)
+
+#define DEFINE_METADIR_EVENT(name) \
+DEFINE_EVENT(xfs_metadir_class, name, \
+ TP_PROTO(struct xfs_inode *dp, struct xfs_name *name, \
+ xfs_ino_t ino), \
+ TP_ARGS(dp, name, ino))
+DEFINE_METADIR_EVENT(xfs_metadir_lookup);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index bdf3704dc301..30fbed27cf05 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -25,6 +25,8 @@
#include "xfs_dquot.h"
#include "xfs_icache.h"
#include "xfs_rtbitmap.h"
+#include "xfs_rtgroup.h"
+#include "xfs_sb.h"
struct kmem_cache *xfs_trans_cache;
@@ -67,7 +69,7 @@ xfs_trans_free(
struct xfs_trans *tp)
{
xfs_extent_busy_sort(&tp->t_busy);
- xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
+ xfs_extent_busy_clear(&tp->t_busy, false);
trace_xfs_trans_free(tp, _RET_IP_);
xfs_trans_clear_context(tp);
@@ -420,6 +422,8 @@ xfs_trans_mod_sb(
ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res);
}
tp->t_frextents_delta += delta;
+ if (xfs_has_rtgroups(mp))
+ flags &= ~XFS_TRANS_SB_DIRTY;
break;
case XFS_TRANS_SB_RES_FREXTENTS:
/*
@@ -429,6 +433,8 @@ xfs_trans_mod_sb(
*/
ASSERT(delta < 0);
tp->t_res_frextents_delta += delta;
+ if (xfs_has_rtgroups(mp))
+ flags &= ~XFS_TRANS_SB_DIRTY;
break;
case XFS_TRANS_SB_DBLOCKS:
tp->t_dblocks_delta += delta;
@@ -455,6 +461,10 @@ xfs_trans_mod_sb(
case XFS_TRANS_SB_REXTSLOG:
tp->t_rextslog_delta += delta;
break;
+ case XFS_TRANS_SB_RGCOUNT:
+ ASSERT(delta > 0);
+ tp->t_rgcount_delta += delta;
+ break;
default:
ASSERT(0);
return;
@@ -497,20 +507,22 @@ xfs_trans_apply_sb_deltas(
}
/*
- * Updating frextents requires careful handling because it does not
- * behave like the lazysb counters because we cannot rely on log
- * recovery in older kenels to recompute the value from the rtbitmap.
- * This means that the ondisk frextents must be consistent with the
- * rtbitmap.
+ * sb_frextents was added to the lazy sb counters when the rt groups
+ * feature was introduced. This is possible because we know that all
+ * kernels supporting rtgroups will also recompute frextents from the
+ * realtime bitmap.
+ *
+ * For older file systems, updating frextents requires careful handling
+ * because we cannot rely on log recovery in older kernels to recompute
+ * the value from the rtbitmap. This means that the ondisk frextents
+ * must be consistent with the rtbitmap.
*
* Therefore, log the frextents change to the ondisk superblock and
* update the incore superblock so that future calls to xfs_log_sb
* write the correct value ondisk.
- *
- * Don't touch m_frextents because it includes incore reservations,
- * and those are handled by the unreserve function.
*/
- if (tp->t_frextents_delta || tp->t_res_frextents_delta) {
+ if ((tp->t_frextents_delta || tp->t_res_frextents_delta) &&
+ !xfs_has_rtgroups(tp->t_mountp)) {
struct xfs_mount *mp = tp->t_mountp;
int64_t rtxdelta;
@@ -536,6 +548,18 @@ xfs_trans_apply_sb_deltas(
}
if (tp->t_rextsize_delta) {
be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta);
+
+ /*
+ * Because the ondisk sb records rtgroup size in units of rt
+ * extents, any time we update the rt extent size we have to
+ * recompute the ondisk rtgroup block log. The incore values
+ * will be recomputed in xfs_trans_unreserve_and_mod_sb.
+ */
+ if (xfs_has_rtgroups(tp->t_mountp)) {
+ sbp->sb_rgblklog = xfs_compute_rgblklog(
+ be32_to_cpu(sbp->sb_rgextents),
+ be32_to_cpu(sbp->sb_rextsize));
+ }
whole = 1;
}
if (tp->t_rbmblocks_delta) {
@@ -554,6 +578,10 @@ xfs_trans_apply_sb_deltas(
sbp->sb_rextslog += tp->t_rextslog_delta;
whole = 1;
}
+ if (tp->t_rgcount_delta) {
+ be32_add_cpu(&sbp->sb_rgcount, tp->t_rgcount_delta);
+ whole = 1;
+ }
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
if (whole)
@@ -618,7 +646,7 @@ xfs_trans_unreserve_and_mod_sb(
}
ASSERT(tp->t_rtx_res || tp->t_frextents_delta >= 0);
- if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
+ if (xfs_has_rtgroups(mp) || (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
rtxdelta += tp->t_frextents_delta;
ASSERT(rtxdelta >= 0);
}
@@ -651,23 +679,21 @@ xfs_trans_unreserve_and_mod_sb(
mp->m_sb.sb_icount += idelta;
mp->m_sb.sb_ifree += ifreedelta;
/*
- * Do not touch sb_frextents here because we are dealing with incore
- * reservation. sb_frextents is not part of the lazy sb counters so it
- * must be consistent with the ondisk rtbitmap and must never include
- * incore reservations.
+ * Do not touch sb_frextents here because it is handled in
+ * xfs_trans_apply_sb_deltas for file systems where it isn't a lazy
+ * counter anyway.
*/
mp->m_sb.sb_dblocks += tp->t_dblocks_delta;
mp->m_sb.sb_agcount += tp->t_agcount_delta;
mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta;
- mp->m_sb.sb_rextsize += tp->t_rextsize_delta;
- if (tp->t_rextsize_delta) {
- mp->m_rtxblklog = log2_if_power2(mp->m_sb.sb_rextsize);
- mp->m_rtxblkmask = mask64_if_power2(mp->m_sb.sb_rextsize);
- }
+ if (tp->t_rextsize_delta)
+ xfs_mount_sb_set_rextsize(mp, &mp->m_sb,
+ mp->m_sb.sb_rextsize + tp->t_rextsize_delta);
mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta;
mp->m_sb.sb_rblocks += tp->t_rblocks_delta;
mp->m_sb.sb_rextents += tp->t_rextents_delta;
mp->m_sb.sb_rextslog += tp->t_rextslog_delta;
+ mp->m_sb.sb_rgcount += tp->t_rgcount_delta;
spin_unlock(&mp->m_sb_lock);
/*
@@ -1262,11 +1288,26 @@ retry:
gdqp = (new_gdqp != ip->i_gdquot) ? new_gdqp : NULL;
pdqp = (new_pdqp != ip->i_pdquot) ? new_pdqp : NULL;
if (udqp || gdqp || pdqp) {
+ xfs_filblks_t dblocks, rblocks;
unsigned int qflags = XFS_QMOPT_RES_REGBLKS;
+ bool isrt = XFS_IS_REALTIME_INODE(ip);
if (force)
qflags |= XFS_QMOPT_FORCE_RES;
+ if (isrt) {
+ error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
+ if (error)
+ goto out_cancel;
+ }
+
+ xfs_inode_count_blocks(tp, ip, &dblocks, &rblocks);
+
+ if (isrt)
+ rblocks += ip->i_delayed_blks;
+ else
+ dblocks += ip->i_delayed_blks;
+
/*
* Reserve enough quota to handle blocks on disk and reserved
* for a delayed allocation. We'll actually transfer the
@@ -1274,8 +1315,20 @@ retry:
* though that part is only semi-transactional.
*/
error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
- pdqp, ip->i_nblocks + ip->i_delayed_blks,
- 1, qflags);
+ pdqp, dblocks, 1, qflags);
+ if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
+ xfs_trans_cancel(tp);
+ xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
+ retried = true;
+ goto retry;
+ }
+ if (error)
+ goto out_cancel;
+
+ /* Do the same for realtime. */
+ qflags = XFS_QMOPT_RES_RTBLKS | (qflags & XFS_QMOPT_FORCE_RES);
+ error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
+ pdqp, rblocks, 0, qflags);
if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
xfs_trans_cancel(tp);
xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index f06cc0f41665..71c2e82e4dad 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -148,6 +148,7 @@ typedef struct xfs_trans {
int64_t t_rblocks_delta;/* superblock rblocks change */
int64_t t_rextents_delta;/* superblocks rextents chg */
int64_t t_rextslog_delta;/* superblocks rextslog chg */
+ int64_t t_rgcount_delta; /* realtime group count */
struct list_head t_items; /* log item descriptors */
struct list_head t_busy; /* list of busy extents */
struct list_head t_dfops; /* deferred operations */
@@ -214,6 +215,7 @@ xfs_trans_read_buf(
}
struct xfs_buf *xfs_trans_getsb(struct xfs_trans *);
+struct xfs_buf *xfs_trans_getrtsb(struct xfs_trans *tp);
void xfs_trans_brelse(xfs_trans_t *, struct xfs_buf *);
void xfs_trans_bjoin(xfs_trans_t *, struct xfs_buf *);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index e28ab74af4f0..8e886ecfd69a 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -168,12 +168,11 @@ xfs_trans_get_buf_map(
/*
* Get and lock the superblock buffer for the given transaction.
*/
-struct xfs_buf *
-xfs_trans_getsb(
- struct xfs_trans *tp)
+static struct xfs_buf *
+__xfs_trans_getsb(
+ struct xfs_trans *tp,
+ struct xfs_buf *bp)
{
- struct xfs_buf *bp = tp->t_mountp->m_sb_bp;
-
/*
* Just increment the lock recursion count if the buffer is already
* attached to this transaction.
@@ -197,6 +196,22 @@ xfs_trans_getsb(
return bp;
}
+struct xfs_buf *
+xfs_trans_getsb(
+ struct xfs_trans *tp)
+{
+ return __xfs_trans_getsb(tp, tp->t_mountp->m_sb_bp);
+}
+
+struct xfs_buf *
+xfs_trans_getrtsb(
+ struct xfs_trans *tp)
+{
+ if (!tp->t_mountp->m_rtsb_bp)
+ return NULL;
+ return __xfs_trans_getsb(tp, tp->t_mountp->m_rtsb_bp);
+}
+
/*
* Get and lock the buffer for the caller if it is not already
* locked within the given transaction. If it has not yet been
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index b368e13424c4..481ba3dc9f19 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -156,6 +156,8 @@ xfs_trans_mod_ino_dquot(
unsigned int field,
int64_t delta)
{
+ ASSERT(!xfs_is_metadir_inode(ip) || XFS_IS_DQDETACHED(ip));
+
xfs_trans_mod_dquot(tp, dqp, field, delta);
if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) {
@@ -247,6 +249,8 @@ xfs_trans_mod_dquot_byino(
xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
return;
+ ASSERT(!xfs_is_metadir_inode(ip) || XFS_IS_DQDETACHED(ip));
+
if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
xfs_trans_mod_ino_dquot(tp, ip, ip->i_udquot, field, delta);
if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot)
@@ -962,6 +966,8 @@ xfs_trans_reserve_quota_nblks(
if (!XFS_IS_QUOTA_ON(mp))
return 0;
+ if (xfs_is_metadir_inode(ip))
+ return 0;
ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino));
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
@@ -1025,3 +1031,14 @@ xfs_trans_free_dqinfo(
kmem_cache_free(xfs_dqtrx_cache, tp->t_dqinfo);
tp->t_dqinfo = NULL;
}
+
+int
+xfs_quota_reserve_blkres(
+ struct xfs_inode *ip,
+ int64_t blocks)
+{
+ if (XFS_IS_REALTIME_INODE(ip))
+ return xfs_trans_reserve_quota_nblks(NULL, ip, 0, blocks,
+ false);
+ return xfs_trans_reserve_quota_nblks(NULL, ip, blocks, 0, false);
+}
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index eaf849260bd6..0f641a9091ec 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -51,8 +51,7 @@ xfs_attr_grab_log_assist(
return error;
xfs_set_using_logged_xattrs(mp);
- xfs_warn_mount(mp, XFS_OPSTATE_WARNED_LARP,
- "EXPERIMENTAL logged extended attributes feature in use. Use at your own risk!");
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LARP);
return 0;
}