summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Viro <aviro@redhat.com>2005-06-23 00:09:01 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-23 09:45:17 -0700
commit991114c6fa6a21d1fa4d544abe78592352860c82 (patch)
treecc81f871756a70a3312552409ee5cacd6625aa1d
parentf972be33ce6a08b5f096ba013c7459a3a82f5f39 (diff)
downloadlinux-991114c6fa6a21d1fa4d544abe78592352860c82.tar.gz
linux-991114c6fa6a21d1fa4d544abe78592352860c82.tar.bz2
linux-991114c6fa6a21d1fa4d544abe78592352860c82.zip
[PATCH] fix for prune_icache()/forced final iput() races
Based on analysis and a patch from Russ Weight <rweight@us.ibm.com> There is a race condition that can occur if an inode is allocated and then released (using iput) during the ->fill_super functions. The race condition is between kswapd and mount. For most filesystems this can only happen in an error path when kswapd is running concurrently. For isofs, however, the error can occur in a more common code path (which is how the bug was found). The logic here is "we want final iput() to free inode *now* instead of letting it sit in cache if fs is going down or had not quite come up". The problem is with kswapd seeing such inodes in the middle of being killed and happily taking over. The clean solution would be to tell kswapd to leave those inodes alone and let our final iput deal with them. I.e. add a new flag (I_FORCED_FREEING), set it before write_inode_now() there and make prune_icache() leave those alone. Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
-rw-r--r--fs/inode.c16
-rw-r--r--include/linux/fs.h1
2 files changed, 11 insertions, 6 deletions
diff --git a/fs/inode.c b/fs/inode.c
index 801fe7f36280..1f9a3a2b89bc 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -500,7 +500,7 @@ repeat:
continue;
if (!test(inode, data))
continue;
- if (inode->i_state & (I_FREEING|I_CLEAR)) {
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
__wait_on_freeing_inode(inode);
goto repeat;
}
@@ -525,7 +525,7 @@ repeat:
continue;
if (inode->i_sb != sb)
continue;
- if (inode->i_state & (I_FREEING|I_CLEAR)) {
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
__wait_on_freeing_inode(inode);
goto repeat;
}
@@ -727,7 +727,7 @@ EXPORT_SYMBOL(iunique);
struct inode *igrab(struct inode *inode)
{
spin_lock(&inode_lock);
- if (!(inode->i_state & I_FREEING))
+ if (!(inode->i_state & (I_FREEING|I_WILL_FREE)))
__iget(inode);
else
/*
@@ -1024,17 +1024,21 @@ static void generic_forget_inode(struct inode *inode)
if (!(inode->i_state & (I_DIRTY|I_LOCK)))
list_move(&inode->i_list, &inode_unused);
inodes_stat.nr_unused++;
- spin_unlock(&inode_lock);
- if (!sb || (sb->s_flags & MS_ACTIVE))
+ if (!sb || (sb->s_flags & MS_ACTIVE)) {
+ spin_unlock(&inode_lock);
return;
+ }
+ inode->i_state |= I_WILL_FREE;
+ spin_unlock(&inode_lock);
write_inode_now(inode, 1);
spin_lock(&inode_lock);
+ inode->i_state &= ~I_WILL_FREE;
inodes_stat.nr_unused--;
hlist_del_init(&inode->i_hash);
}
list_del_init(&inode->i_list);
list_del_init(&inode->i_sb_list);
- inode->i_state|=I_FREEING;
+ inode->i_state |= I_FREEING;
inodes_stat.nr_inodes--;
spin_unlock(&inode_lock);
if (inode->i_data.nrpages)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e5a8db00df29..3622e952e98c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1025,6 +1025,7 @@ struct super_operations {
#define I_FREEING 16
#define I_CLEAR 32
#define I_NEW 64
+#define I_WILL_FREE 128
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)