diff options
author | Dave Chinner <david@fromorbit.com> | 2010-01-11 11:47:44 +0000 |
---|---|---|
committer | Alex Elder <aelder@sgi.com> | 2010-01-15 15:33:52 -0600 |
commit | 1c1c6ebcf5284aee4910f3b906ac90c20e510c82 (patch) | |
tree | bbcf74752bf7bc058a5c5bdd6bd03090c845b041 /fs/xfs/xfs_ialloc.c | |
parent | 44b56e0a1aed522a10051645e85d300e10926fd3 (diff) | |
download | linux-1c1c6ebcf5284aee4910f3b906ac90c20e510c82.tar.gz linux-1c1c6ebcf5284aee4910f3b906ac90c20e510c82.tar.bz2 linux-1c1c6ebcf5284aee4910f3b906ac90c20e510c82.zip |
xfs: Replace per-ag array with a radix tree
The use of an array for the per-ag structures requires reallocation
of the array when growing the filesystem. This requires locking
access to the array to avoid use after free situations, and the
locking is difficult to get right. To avoid needing to reallocate an
array, change the per-ag structures to an allocated object per ag
and index them using a tree structure.
The AGs are always densely indexed (hence the use of an array), but
the number supported is 2^32 and lookups tend to be random and hence
indexing needs to scale. A simple choice is a radix tree - it works
well with this sort of index. This change also removes another
large contiguous allocation from the mount/growfs path in XFS.
The growing process now needs to change to only initialise the new
AGs required for the extra space, and as such only needs to
exclusively lock the tree for inserts. The rest of the code only
needs to lock the tree while doing lookups, and hence this will
remove all the deadlocks that currently occur on the m_perag_lock as
it is now an innermost lock. The lock is also changed to a spinlock
from a read/write lock as the hold time is now extremely short.
To complete the picture, the per-ag structures will need to be
reference counted to ensure that we don't free/modify them while
they are still in use. This will be done in subsequent patch.
Signed-off-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Alex Elder <aelder@sgi.com>
Diffstat (limited to 'fs/xfs/xfs_ialloc.c')
-rw-r--r-- | fs/xfs/xfs_ialloc.c | 25 |
1 files changed, 2 insertions, 23 deletions
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index 884ee1367f46..52c9d006c0e6 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -383,11 +383,9 @@ xfs_ialloc_ag_alloc( newino = XFS_OFFBNO_TO_AGINO(args.mp, args.agbno, 0); be32_add_cpu(&agi->agi_count, newlen); be32_add_cpu(&agi->agi_freecount, newlen); - down_read(&args.mp->m_peraglock); pag = xfs_perag_get(args.mp, agno); pag->pagi_freecount += newlen; xfs_perag_put(pag); - up_read(&args.mp->m_peraglock); agi->agi_newino = cpu_to_be32(newino); /* @@ -489,7 +487,6 @@ xfs_ialloc_ag_select( */ agno = pagno; flags = XFS_ALLOC_FLAG_TRYLOCK; - down_read(&mp->m_peraglock); for (;;) { pag = xfs_perag_get(mp, agno); if (!pag->pagi_init) { @@ -531,7 +528,6 @@ xfs_ialloc_ag_select( goto nextag; } xfs_perag_put(pag); - up_read(&mp->m_peraglock); return agbp; } } @@ -544,18 +540,14 @@ nextag: * No point in iterating over the rest, if we're shutting * down. */ - if (XFS_FORCED_SHUTDOWN(mp)) { - up_read(&mp->m_peraglock); + if (XFS_FORCED_SHUTDOWN(mp)) return NULL; - } agno++; if (agno >= agcount) agno = 0; if (agno == pagno) { - if (flags == 0) { - up_read(&mp->m_peraglock); + if (flags == 0) return NULL; - } flags = 0; } } @@ -777,16 +769,13 @@ nextag: *inop = NULLFSINO; return noroom ? ENOSPC : 0; } - down_read(&mp->m_peraglock); pag = xfs_perag_get(mp, tagno); if (pag->pagi_inodeok == 0) { xfs_perag_put(pag); - up_read(&mp->m_peraglock); goto nextag; } error = xfs_ialloc_read_agi(mp, tp, tagno, &agbp); xfs_perag_put(pag); - up_read(&mp->m_peraglock); if (error) goto nextag; agi = XFS_BUF_TO_AGI(agbp); @@ -1015,9 +1004,7 @@ alloc_inode: goto error0; be32_add_cpu(&agi->agi_freecount, -1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); pag->pagi_freecount--; - up_read(&mp->m_peraglock); error = xfs_check_agi_freecount(cur, agi); if (error) @@ -1100,9 +1087,7 @@ xfs_difree( /* * Get the allocation group header. */ - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { cmn_err(CE_WARN, "xfs_difree: xfs_ialloc_read_agi() returned an error %d on %s. Returning error.", @@ -1169,11 +1154,9 @@ xfs_difree( be32_add_cpu(&agi->agi_count, -ilen); be32_add_cpu(&agi->agi_freecount, -(ilen - 1)); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); pag = xfs_perag_get(mp, agno); pag->pagi_freecount -= ilen - 1; xfs_perag_put(pag); - up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1)); @@ -1202,11 +1185,9 @@ xfs_difree( */ be32_add_cpu(&agi->agi_freecount, 1); xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); - down_read(&mp->m_peraglock); pag = xfs_perag_get(mp, agno); pag->pagi_freecount++; xfs_perag_put(pag); - up_read(&mp->m_peraglock); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1); } @@ -1328,9 +1309,7 @@ xfs_imap( xfs_buf_t *agbp; /* agi buffer */ int i; /* temp state */ - down_read(&mp->m_peraglock); error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); - up_read(&mp->m_peraglock); if (error) { xfs_fs_cmn_err(CE_ALERT, mp, "xfs_imap: " "xfs_ialloc_read_agi() returned " |