From 79eb4dde742fe2e9c9e301432b894a7410261ce7 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:24 +1100 Subject: cifs: dont overwrite dentry name in d_revalidate Use vfat's method for dealing with negative dentries to preserve case, rather than overwrite dentry name in d_revalidate, which is a bit ugly and also gets in the way of doing lock-free path walking. Signed-off-by: Nick Piggin --- fs/cifs/dir.c | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) (limited to 'fs/cifs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 3840eddbfb7a..521d841b1fd1 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -656,22 +656,34 @@ lookup_out: static int cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) { - int isValid = 1; - if (direntry->d_inode) { if (cifs_revalidate_dentry(direntry)) return 0; - } else { - cFYI(1, "neg dentry 0x%p name = %s", - direntry, direntry->d_name.name); - if (time_after(jiffies, direntry->d_time + HZ) || - !lookupCacheEnabled) { - d_drop(direntry); - isValid = 0; - } + else + return 1; } - return isValid; + /* + * This may be nfsd (or something), anyway, we can't see the + * intent of this. So, since this can be for creation, drop it. + */ + if (!nd) + return 0; + + /* + * Drop the negative dentry, in order to make sure to use the + * case sensitive name which is specified by user if this is + * for creation. + */ + if (!(nd->flags & (LOOKUP_CONTINUE | LOOKUP_PARENT))) { + if (nd->flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET)) + return 0; + } + + if (time_after(jiffies, direntry->d_time + HZ) || !lookupCacheEnabled) + return 0; + + return 1; } /* static int cifs_d_delete(struct dentry *direntry) @@ -709,15 +721,8 @@ static int cifs_ci_compare(struct dentry *dentry, struct qstr *a, struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; if ((a->len == b->len) && - (nls_strnicmp(codepage, a->name, b->name, a->len) == 0)) { - /* - * To preserve case, don't let an existing negative dentry's - * case take precedence. If a is not a negative dentry, this - * should have no side effects - */ - memcpy((void *)a->name, b->name, a->len); + (nls_strnicmp(codepage, a->name, b->name, a->len) == 0)) return 0; - } return 1; } -- cgit v1.2.3 From 621e155a3591962420eacdd39f6f0aa29ceb221e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:27 +1100 Subject: fs: change d_compare for rcu-walk Change d_compare so it may be called from lock-free RCU lookups. This does put significant restrictions on what may be done from the callback, however there don't seem to have been any problems with in-tree fses. If some strange use case pops up that _really_ cannot cope with the rcu-walk rules, we can just add new rcu-unaware callbacks, which would cause name lookup to drop out of rcu-walk mode. For in-tree filesystems, this is just a mechanical change. Signed-off-by: Nick Piggin --- fs/cifs/dir.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs/cifs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 521d841b1fd1..c60133f0d8e4 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -715,13 +715,15 @@ static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) return 0; } -static int cifs_ci_compare(struct dentry *dentry, struct qstr *a, - struct qstr *b) +static int cifs_ci_compare(const struct dentry *parent, + const struct inode *pinode, + const struct dentry *dentry, const struct inode *inode, + unsigned int len, const char *str, const struct qstr *name) { - struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; + struct nls_table *codepage = CIFS_SB(pinode->i_sb)->local_nls; - if ((a->len == b->len) && - (nls_strnicmp(codepage, a->name, b->name, a->len) == 0)) + if ((name->len == len) && + (nls_strnicmp(codepage, name->name, str, len) == 0)) return 0; return 1; } -- cgit v1.2.3 From b1e6a015a580ad145689ad1d6b4aa0e03e6c868b Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:28 +1100 Subject: fs: change d_hash for rcu-walk Change d_hash so it may be called from lock-free RCU lookups. See similar patch for d_compare for details. For in-tree filesystems, this is just a mechanical change. Signed-off-by: Nick Piggin --- fs/cifs/dir.c | 5 +++-- fs/cifs/readdir.c | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'fs/cifs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index c60133f0d8e4..88bfe686ac00 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -700,9 +700,10 @@ const struct dentry_operations cifs_dentry_ops = { /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ }; -static int cifs_ci_hash(struct dentry *dentry, struct qstr *q) +static int cifs_ci_hash(const struct dentry *dentry, const struct inode *inode, + struct qstr *q) { - struct nls_table *codepage = CIFS_SB(dentry->d_inode->i_sb)->local_nls; + struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls; unsigned long hash; int i; diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index a73eb9f4bdaf..ee463aeca0b0 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -79,7 +79,7 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, cFYI(1, "For %s", name->name); if (parent->d_op && parent->d_op->d_hash) - parent->d_op->d_hash(parent, name); + parent->d_op->d_hash(parent, parent->d_inode, name); else name->hash = full_name_hash(name->name, name->len); -- cgit v1.2.3 From b23fb0a60379a95e10c671f646b259ea2558421e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:35 +1100 Subject: fs: scale inode alias list Add a new lock, dcache_inode_lock, to protect the inode's i_dentry list from concurrent modification. d_alias is also protected by d_lock. Signed-off-by: Nick Piggin --- fs/cifs/inode.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/cifs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 589f3e3f6e00..003698365ece 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -810,12 +810,15 @@ inode_has_hashed_dentries(struct inode *inode) struct dentry *dentry; spin_lock(&dcache_lock); + spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return true; } } + spin_unlock(&dcache_inode_lock); spin_unlock(&dcache_lock); return false; } -- cgit v1.2.3 From b5c84bf6f6fa3a7dfdcb556023a62953574b60ee Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:38 +1100 Subject: fs: dcache remove dcache_lock dcache_lock no longer protects anything. remove it. Signed-off-by: Nick Piggin --- fs/cifs/inode.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'fs/cifs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 003698365ece..99b9a2cc14b7 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -809,17 +809,14 @@ inode_has_hashed_dentries(struct inode *inode) { struct dentry *dentry; - spin_lock(&dcache_lock); spin_lock(&dcache_inode_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return true; } } spin_unlock(&dcache_inode_lock); - spin_unlock(&dcache_lock); return false; } -- cgit v1.2.3 From fa0d7e3de6d6fc5004ad9dea0dd6b286af8f03e9 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:49 +1100 Subject: fs: icache RCU free inodes RCU free the struct inode. This will allow: - Subsequent store-free path walking patch. The inode must be consulted for permissions when walking, so an RCU inode reference is a must. - sb_inode_list_lock to be moved inside i_lock because sb list walkers who want to take i_lock no longer need to take sb_inode_list_lock to walk the list in the first place. This will simplify and optimize locking. - Could remove some nested trylock loops in dcache code - Could potentially simplify things a bit in VM land. Do not need to take the page lock to follow page->mapping. The downsides of this is the performance cost of using RCU. In a simple creat/unlink microbenchmark, performance drops by about 10% due to inability to reuse cache-hot slab objects. As iterations increase and RCU freeing starts kicking over, this increases to about 20%. In cases where inode lifetimes are longer (ie. many inodes may be allocated during the average life span of a single inode), a lot of this cache reuse is not applicable, so the regression caused by this patch is smaller. The cache-hot regression could largely be avoided by using SLAB_DESTROY_BY_RCU, however this adds some complexity to list walking and store-free path walking, so I prefer to implement this at a later date, if it is shown to be a win in real situations. I haven't found a regression in any non-micro benchmark so I doubt it will be a problem. Signed-off-by: Nick Piggin --- fs/cifs/cifsfs.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/cifs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 3936aa7f2c22..223717dcc401 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -334,10 +334,17 @@ cifs_alloc_inode(struct super_block *sb) return &cifs_inode->vfs_inode; } +static void cifs_i_callback(struct rcu_head *head) +{ + struct inode *inode = container_of(head, struct inode, i_rcu); + INIT_LIST_HEAD(&inode->i_dentry); + kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); +} + static void cifs_destroy_inode(struct inode *inode) { - kmem_cache_free(cifs_inode_cachep, CIFS_I(inode)); + call_rcu(&inode->i_rcu, cifs_i_callback); } static void -- cgit v1.2.3 From fb045adb99d9b7c562dc7fef834857f78249daa1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:55 +1100 Subject: fs: dcache reduce branches in lookup path Reduce some branches and memory accesses in dcache lookup by adding dentry flags to indicate common d_ops are set, rather than having to check them. This saves a pointer memory access (dentry->d_op) in common path lookup situations, and saves another pointer load and branch in cases where we have d_op but not the particular operation. Patched with: git grep -E '[.>]([[:space:]])*d_op([[:space:]])*=' | xargs sed -e 's/\([^\t ]*\)->d_op = \(.*\);/d_set_d_op(\1, \2);/' -e 's/\([^\t ]*\)\.d_op = \(.*\);/d_set_d_op(\&\1, \2);/' -i Signed-off-by: Nick Piggin --- fs/cifs/dir.c | 16 ++++++++-------- fs/cifs/inode.c | 8 ++++---- fs/cifs/link.c | 4 ++-- fs/cifs/readdir.c | 4 ++-- 4 files changed, 16 insertions(+), 16 deletions(-) (limited to 'fs/cifs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 88bfe686ac00..e3b10ca6d453 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -135,9 +135,9 @@ static void setup_cifs_dentry(struct cifsTconInfo *tcon, struct inode *newinode) { if (tcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } @@ -421,9 +421,9 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, int mode, rc = cifs_get_inode_info_unix(&newinode, full_path, inode->i_sb, xid); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); if (rc == 0) d_instantiate(direntry, newinode); @@ -604,9 +604,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, if ((rc == 0) && (newInode != NULL)) { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, newInode); if (posix_open) { filp = lookup_instantiate_filp(nd, direntry, @@ -634,9 +634,9 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, rc = 0; direntry->d_time = jiffies; if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_add(direntry, NULL); /* if it was once a directory (but how can we tell?) we could do shrink_dcache_parent(direntry); */ diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 99b9a2cc14b7..2a239d878e85 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1319,9 +1319,9 @@ int cifs_mkdir(struct inode *inode, struct dentry *direntry, int mode) to set uid/gid */ inc_nlink(inode); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); cifs_unix_basic_to_fattr(&fattr, pInfo, cifs_sb); cifs_fill_uniqueid(inode->i_sb, &fattr); @@ -1363,9 +1363,9 @@ mkdir_get_info: inode->i_sb, xid, NULL); if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); /* setting nlink not necessary except in cases where we * failed to get it from the server or was set bogus */ diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 85cdbf831e7b..fe2f6a93c49e 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -525,9 +525,9 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) rc); } else { if (pTcon->nocase) - direntry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(direntry, &cifs_ci_dentry_ops); else - direntry->d_op = &cifs_dentry_ops; + d_set_d_op(direntry, &cifs_dentry_ops); d_instantiate(direntry, newinode); } } diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index ee463aeca0b0..ec5b68e3b928 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -103,9 +103,9 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name, } if (cifs_sb_master_tcon(CIFS_SB(sb))->nocase) - dentry->d_op = &cifs_ci_dentry_ops; + d_set_d_op(dentry, &cifs_ci_dentry_ops); else - dentry->d_op = &cifs_dentry_ops; + d_set_d_op(dentry, &cifs_dentry_ops); alias = d_materialise_unique(dentry, inode); if (alias != NULL) { -- cgit v1.2.3 From 34286d6662308d82aed891852d04c7c3a2649b16 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:57 +1100 Subject: fs: rcu-walk aware d_revalidate method Require filesystems be aware of .d_revalidate being called in rcu-walk mode (nd->flags & LOOKUP_RCU). For now do a simple push down, returning -ECHILD from all implementations. Signed-off-by: Nick Piggin --- fs/cifs/dir.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/cifs') diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index e3b10ca6d453..db2a58c00f7b 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -656,6 +656,9 @@ lookup_out: static int cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) { + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + if (direntry->d_inode) { if (cifs_revalidate_dentry(direntry)) return 0; -- cgit v1.2.3 From b74c79e99389cd79b31fcc08f82c24e492e63c7e Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:49:58 +1100 Subject: fs: provide rcu-walk aware permission i_ops Signed-off-by: Nick Piggin --- fs/cifs/cifsfs.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/cifs') diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 223717dcc401..8e21e0fe65d5 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -283,10 +283,13 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } -static int cifs_permission(struct inode *inode, int mask) +static int cifs_permission(struct inode *inode, int mask, unsigned int flags) { struct cifs_sb_info *cifs_sb; + if (flags & IPERM_FLAG_RCU) + return -ECHILD; + cifs_sb = CIFS_SB(inode->i_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM) { @@ -298,7 +301,7 @@ static int cifs_permission(struct inode *inode, int mask) on the client (above and beyond ACL on servers) for servers which do not support setting and viewing mode bits, so allowing client to check permissions is useful */ - return generic_permission(inode, mask, NULL); + return generic_permission(inode, mask, flags, NULL); } static struct kmem_cache *cifs_inode_cachep; -- cgit v1.2.3 From 873feea09ebc980cbd3631b767356ce1eee65ec1 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Fri, 7 Jan 2011 17:50:06 +1100 Subject: fs: dcache per-inode inode alias locking dcache_inode_lock can be replaced with per-inode locking. Use existing inode->i_lock for this. This is slightly non-trivial because we sometimes need to find the inode from the dentry, which requires d_inode to be stabilised (either with refcount or d_lock). Signed-off-by: Nick Piggin --- fs/cifs/inode.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/cifs') diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 2a239d878e85..a853a89857a5 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -809,14 +809,14 @@ inode_has_hashed_dentries(struct inode *inode) { struct dentry *dentry; - spin_lock(&dcache_inode_lock); + spin_lock(&inode->i_lock); list_for_each_entry(dentry, &inode->i_dentry, d_alias) { if (!d_unhashed(dentry) || IS_ROOT(dentry)) { - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); return true; } } - spin_unlock(&dcache_inode_lock); + spin_unlock(&inode->i_lock); return false; } -- cgit v1.2.3