diff options
Diffstat (limited to 'fs')
126 files changed, 1883 insertions, 1695 deletions
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index fb9ffcb43277..0923f2cf3c80 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -149,8 +149,6 @@ extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d); extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d); extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry); -extern void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, - void *p); extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid, struct super_block *sb, int new); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 703342e309f5..510040b04c96 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1224,100 +1224,43 @@ ino_t v9fs_qid2ino(struct p9_qid *qid) } /** - * v9fs_readlink - read a symlink's location (internal version) + * v9fs_vfs_follow_link - follow a symlink path * @dentry: dentry for symlink - * @buffer: buffer to load symlink location into - * @buflen: length of buffer - * + * @cookie: place to pass the data to put_link() */ -static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen) +static const char *v9fs_vfs_follow_link(struct dentry *dentry, void **cookie) { - int retval; - - struct v9fs_session_info *v9ses; - struct p9_fid *fid; + struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry); + struct p9_fid *fid = v9fs_fid_lookup(dentry); struct p9_wstat *st; + char *res; + + p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); - p9_debug(P9_DEBUG_VFS, " %pd\n", dentry); - retval = -EPERM; - v9ses = v9fs_dentry2v9ses(dentry); - fid = v9fs_fid_lookup(dentry); if (IS_ERR(fid)) - return PTR_ERR(fid); + return ERR_CAST(fid); if (!v9fs_proto_dotu(v9ses)) - return -EBADF; + return ERR_PTR(-EBADF); st = p9_client_stat(fid); if (IS_ERR(st)) - return PTR_ERR(st); + return ERR_CAST(st); if (!(st->mode & P9_DMSYMLINK)) { - retval = -EINVAL; - goto done; + p9stat_free(st); + kfree(st); + return ERR_PTR(-EINVAL); } + res = st->extension; + st->extension = NULL; + if (strlen(res) >= PATH_MAX) + res[PATH_MAX - 1] = '\0'; - /* copy extension buffer into buffer */ - retval = min(strlen(st->extension)+1, (size_t)buflen); - memcpy(buffer, st->extension, retval); - - p9_debug(P9_DEBUG_VFS, "%pd -> %s (%.*s)\n", - dentry, st->extension, buflen, buffer); - -done: p9stat_free(st); kfree(st); - return retval; -} - -/** - * v9fs_vfs_follow_link - follow a symlink path - * @dentry: dentry for symlink - * @nd: nameidata - * - */ - -static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - int len = 0; - char *link = __getname(); - - p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); - - if (!link) - link = ERR_PTR(-ENOMEM); - else { - len = v9fs_readlink(dentry, link, PATH_MAX); - - if (len < 0) { - __putname(link); - link = ERR_PTR(len); - } else - link[min(len, PATH_MAX-1)] = 0; - } - nd_set_link(nd, link); - - return NULL; -} - -/** - * v9fs_vfs_put_link - release a symlink path - * @dentry: dentry for symlink - * @nd: nameidata - * @p: unused - * - */ - -void -v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p) -{ - char *s = nd_get_link(nd); - - p9_debug(P9_DEBUG_VFS, " %pd %s\n", - dentry, IS_ERR(s) ? "<error>" : s); - if (!IS_ERR(s)) - __putname(s); + return *cookie = res; } /** @@ -1370,6 +1313,8 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname) return v9fs_vfs_mkspecial(dir, dentry, P9_DMSYMLINK, symname); } +#define U32_MAX_DIGITS 10 + /** * v9fs_vfs_link - create a hardlink * @old_dentry: dentry for file to link to @@ -1383,7 +1328,7 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) { int retval; - char *name; + char name[1 + U32_MAX_DIGITS + 2]; /* sign + number + \n + \0 */ struct p9_fid *oldfid; p9_debug(P9_DEBUG_VFS, " %lu,%pd,%pd\n", @@ -1393,20 +1338,12 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir, if (IS_ERR(oldfid)) return PTR_ERR(oldfid); - name = __getname(); - if (unlikely(!name)) { - retval = -ENOMEM; - goto clunk_fid; - } - sprintf(name, "%d\n", oldfid->fid); retval = v9fs_vfs_mkspecial(dir, dentry, P9_DMLINK, name); - __putname(name); if (!retval) { v9fs_refresh_inode(oldfid, d_inode(old_dentry)); v9fs_invalidate_inode_attr(dir); } -clunk_fid: p9_client_clunk(oldfid); return retval; } @@ -1425,7 +1362,7 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde { struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir); int retval; - char *name; + char name[2 + U32_MAX_DIGITS + 1 + U32_MAX_DIGITS + 1]; u32 perm; p9_debug(P9_DEBUG_VFS, " %lu,%pd mode: %hx MAJOR: %u MINOR: %u\n", @@ -1435,26 +1372,16 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rde if (!new_valid_dev(rdev)) return -EINVAL; - name = __getname(); - if (!name) - return -ENOMEM; /* build extension */ if (S_ISBLK(mode)) sprintf(name, "b %u %u", MAJOR(rdev), MINOR(rdev)); else if (S_ISCHR(mode)) sprintf(name, "c %u %u", MAJOR(rdev), MINOR(rdev)); - else if (S_ISFIFO(mode)) - *name = 0; - else if (S_ISSOCK(mode)) + else *name = 0; - else { - __putname(name); - return -EINVAL; - } perm = unixmode2p9mode(v9ses, mode); retval = v9fs_vfs_mkspecial(dir, dentry, perm, name); - __putname(name); return retval; } @@ -1530,7 +1457,7 @@ static const struct inode_operations v9fs_file_inode_operations = { static const struct inode_operations v9fs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = v9fs_vfs_follow_link, - .put_link = v9fs_vfs_put_link, + .put_link = kfree_put_link, .getattr = v9fs_vfs_getattr, .setattr = v9fs_vfs_setattr, }; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 9861c7c951a6..09e4433717b8 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -905,41 +905,24 @@ error: /** * v9fs_vfs_follow_link_dotl - follow a symlink path * @dentry: dentry for symlink - * @nd: nameidata - * + * @cookie: place to pass the data to put_link() */ -static void * -v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd) +static const char * +v9fs_vfs_follow_link_dotl(struct dentry *dentry, void **cookie) { - int retval; - struct p9_fid *fid; - char *link = __getname(); + struct p9_fid *fid = v9fs_fid_lookup(dentry); char *target; + int retval; p9_debug(P9_DEBUG_VFS, "%pd\n", dentry); - if (!link) { - link = ERR_PTR(-ENOMEM); - goto ndset; - } - fid = v9fs_fid_lookup(dentry); - if (IS_ERR(fid)) { - __putname(link); - link = ERR_CAST(fid); - goto ndset; - } + if (IS_ERR(fid)) + return ERR_CAST(fid); retval = p9_client_readlink(fid, &target); - if (!retval) { - strcpy(link, target); - kfree(target); - goto ndset; - } - __putname(link); - link = ERR_PTR(retval); -ndset: - nd_set_link(nd, link); - return NULL; + if (retval) + return ERR_PTR(retval); + return *cookie = target; } int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) @@ -1006,7 +989,7 @@ const struct inode_operations v9fs_file_inode_operations_dotl = { const struct inode_operations v9fs_symlink_inode_operations_dotl = { .readlink = generic_readlink, .follow_link = v9fs_vfs_follow_link_dotl, - .put_link = v9fs_vfs_put_link, + .put_link = kfree_put_link, .getattr = v9fs_vfs_getattr_dotl, .setattr = v9fs_vfs_setattr_dotl, .setxattr = generic_setxattr, diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c index de58cc7b8076..da0c33481bc0 100644 --- a/fs/autofs4/symlink.c +++ b/fs/autofs4/symlink.c @@ -12,14 +12,13 @@ #include "autofs_i.h" -static void *autofs4_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *autofs4_follow_link(struct dentry *dentry, void **cookie) { struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb); struct autofs_info *ino = autofs4_dentry_ino(dentry); if (ino && !autofs4_oz_mode(sbi)) ino->last_used = jiffies; - nd_set_link(nd, d_inode(dentry)->i_private); - return NULL; + return d_inode(dentry)->i_private; } const struct inode_operations autofs4_symlink_inode_operations = { diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 7943533c3868..46aedacfa6a8 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -42,8 +42,7 @@ static struct inode *befs_iget(struct super_block *, unsigned long); static struct inode *befs_alloc_inode(struct super_block *sb); static void befs_destroy_inode(struct inode *inode); static void befs_destroy_inodecache(void); -static void *befs_follow_link(struct dentry *, struct nameidata *); -static void *befs_fast_follow_link(struct dentry *, struct nameidata *); +static const char *befs_follow_link(struct dentry *, void **); static int befs_utf2nls(struct super_block *sb, const char *in, int in_len, char **out, int *out_len); static int befs_nls2utf(struct super_block *sb, const char *in, int in_len, @@ -80,11 +79,6 @@ static const struct address_space_operations befs_aops = { .bmap = befs_bmap, }; -static const struct inode_operations befs_fast_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = befs_fast_follow_link, -}; - static const struct inode_operations befs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = befs_follow_link, @@ -403,10 +397,12 @@ static struct inode *befs_iget(struct super_block *sb, unsigned long ino) inode->i_op = &befs_dir_inode_operations; inode->i_fop = &befs_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - if (befs_ino->i_flags & BEFS_LONG_SYMLINK) + if (befs_ino->i_flags & BEFS_LONG_SYMLINK) { inode->i_op = &befs_symlink_inode_operations; - else - inode->i_op = &befs_fast_symlink_inode_operations; + } else { + inode->i_link = befs_ino->i_data.symlink; + inode->i_op = &simple_symlink_inode_operations; + } } else { befs_error(sb, "Inode %lu is not a regular file, " "directory or symlink. THAT IS WRONG! BeFS has no " @@ -467,8 +463,8 @@ befs_destroy_inodecache(void) * The data stream become link name. Unless the LONG_SYMLINK * flag is set. */ -static void * -befs_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char * +befs_follow_link(struct dentry *dentry, void **cookie) { struct super_block *sb = dentry->d_sb; struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry)); @@ -478,33 +474,20 @@ befs_follow_link(struct dentry *dentry, struct nameidata *nd) if (len == 0) { befs_error(sb, "Long symlink with illegal length"); - link = ERR_PTR(-EIO); - } else { - befs_debug(sb, "Follow long symlink"); - - link = kmalloc(len, GFP_NOFS); - if (!link) { - link = ERR_PTR(-ENOMEM); - } else if (befs_read_lsymlink(sb, data, link, len) != len) { - kfree(link); - befs_error(sb, "Failed to read entire long symlink"); - link = ERR_PTR(-EIO); - } else { - link[len - 1] = '\0'; - } + return ERR_PTR(-EIO); } - nd_set_link(nd, link); - return NULL; -} - - -static void * -befs_fast_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct befs_inode_info *befs_ino = BEFS_I(d_inode(dentry)); + befs_debug(sb, "Follow long symlink"); - nd_set_link(nd, befs_ino->i_data.symlink); - return NULL; + link = kmalloc(len, GFP_NOFS); + if (!link) + return ERR_PTR(-ENOMEM); + if (befs_read_lsymlink(sb, data, link, len) != len) { + kfree(link); + befs_error(sb, "Failed to read entire long symlink"); + return ERR_PTR(-EIO); + } + link[len - 1] = '\0'; + return *cookie = link; } /* diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 241ef68d2893..cd46e4158830 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -918,7 +918,7 @@ static int load_elf_binary(struct linux_binprm *bprm) total_size = total_mapping_size(elf_phdata, loc->elf_ex.e_phnum); if (!total_size) { - error = -EINVAL; + retval = -EINVAL; goto out_free_dentry; } } diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 9de772ee0031..614aaa1969bd 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -880,6 +880,8 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info, * indirect refs to their parent bytenr. * When roots are found, they're added to the roots list * + * NOTE: This can return values > 0 + * * FIXME some caching might speed things up */ static int find_parent_nodes(struct btrfs_trans_handle *trans, @@ -1198,6 +1200,19 @@ int btrfs_find_all_roots(struct btrfs_trans_handle *trans, return ret; } +/** + * btrfs_check_shared - tell us whether an extent is shared + * + * @trans: optional trans handle + * + * btrfs_check_shared uses the backref walking code but will short + * circuit as soon as it finds a root or inode that doesn't match the + * one passed in. This provides a significant performance benefit for + * callers (such as fiemap) which want to know whether the extent is + * shared but do not need a ref count. + * + * Return: 0 if extent is not shared, 1 if it is shared, < 0 on error. + */ int btrfs_check_shared(struct btrfs_trans_handle *trans, struct btrfs_fs_info *fs_info, u64 root_objectid, u64 inum, u64 bytenr) @@ -1226,11 +1241,13 @@ int btrfs_check_shared(struct btrfs_trans_handle *trans, ret = find_parent_nodes(trans, fs_info, bytenr, elem.seq, tmp, roots, NULL, root_objectid, inum); if (ret == BACKREF_FOUND_SHARED) { + /* this is the only condition under which we return 1 */ ret = 1; break; } if (ret < 0 && ret != -ENOENT) break; + ret = 0; node = ulist_next(tmp, &uiter); if (!node) break; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 0ec8e228b89f..0ec3acd14cbf 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -3180,8 +3180,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(leaf); fail: btrfs_release_path(path); - if (ret) - btrfs_abort_transaction(trans, root, ret); return ret; } @@ -3487,8 +3485,30 @@ again: ret = 0; } } - if (!ret) + if (!ret) { ret = write_one_cache_group(trans, root, path, cache); + /* + * Our block group might still be attached to the list + * of new block groups in the transaction handle of some + * other task (struct btrfs_trans_handle->new_bgs). This + * means its block group item isn't yet in the extent + * tree. If this happens ignore the error, as we will + * try again later in the critical section of the + * transaction commit. + */ + if (ret == -ENOENT) { + ret = 0; + spin_lock(&cur_trans->dirty_bgs_lock); + if (list_empty(&cache->dirty_list)) { + list_add_tail(&cache->dirty_list, + &cur_trans->dirty_bgs); + btrfs_get_block_group(cache); + } + spin_unlock(&cur_trans->dirty_bgs_lock); + } else if (ret) { + btrfs_abort_transaction(trans, root, ret); + } + } /* if its not on the io list, we need to put the block group */ if (should_put) @@ -3597,8 +3617,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, ret = 0; } } - if (!ret) + if (!ret) { ret = write_one_cache_group(trans, root, path, cache); + if (ret) + btrfs_abort_transaction(trans, root, ret); + } /* if its not on the io list, we need to put the block group */ if (should_put) @@ -8806,6 +8829,24 @@ again: goto again; } + /* + * if we are changing raid levels, try to allocate a corresponding + * block group with the new raid level. + */ + alloc_flags = update_block_group_flags(root, cache->flags); + if (alloc_flags != cache->flags) { + ret = do_chunk_alloc(trans, root, alloc_flags, + CHUNK_ALLOC_FORCE); + /* + * ENOSPC is allowed here, we may have enough space + * already allocated at the new raid level to + * carry on + */ + if (ret == -ENOSPC) + ret = 0; + if (ret < 0) + goto out; + } ret = set_block_group_ro(cache, 0); if (!ret) @@ -8819,7 +8860,9 @@ again: out: if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) { alloc_flags = update_block_group_flags(root, cache->flags); + lock_chunks(root->fs_info->chunk_root); check_system_chunk(trans, root, alloc_flags); + unlock_chunks(root->fs_info->chunk_root); } mutex_unlock(&root->fs_info->ro_block_group_mutex); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 43af5a61ad25..c32d226bfecc 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4772,6 +4772,25 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, start >> PAGE_CACHE_SHIFT); if (eb && atomic_inc_not_zero(&eb->refs)) { rcu_read_unlock(); + /* + * Lock our eb's refs_lock to avoid races with + * free_extent_buffer. When we get our eb it might be flagged + * with EXTENT_BUFFER_STALE and another task running + * free_extent_buffer might have seen that flag set, + * eb->refs == 2, that the buffer isn't under IO (dirty and + * writeback flags not set) and it's still in the tree (flag + * EXTENT_BUFFER_TREE_REF set), therefore being in the process + * of decrementing the extent buffer's reference count twice. + * So here we could race and increment the eb's reference count, + * clear its stale flag, mark it as dirty and drop our reference + * before the other task finishes executing free_extent_buffer, + * which would later result in an attempt to free an extent + * buffer that is dirty. + */ + if (test_bit(EXTENT_BUFFER_STALE, &eb->bflags)) { + spin_lock(&eb->refs_lock); + spin_unlock(&eb->refs_lock); + } mark_extent_buffer_accessed(eb, NULL); return eb; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 41c510b7cc11..9dbe5b548fa6 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -86,7 +86,7 @@ static struct inode *__lookup_free_space_inode(struct btrfs_root *root, mapping_set_gfp_mask(inode->i_mapping, mapping_gfp_mask(inode->i_mapping) & - ~(GFP_NOFS & ~__GFP_HIGHMEM)); + ~(__GFP_FS | __GFP_HIGHMEM)); return inode; } @@ -3466,6 +3466,7 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, struct btrfs_free_space_ctl *ctl = root->free_ino_ctl; int ret; struct btrfs_io_ctl io_ctl; + bool release_metadata = true; if (!btrfs_test_opt(root, INODE_MAP_CACHE)) return 0; @@ -3473,11 +3474,20 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root, memset(&io_ctl, 0, sizeof(io_ctl)); ret = __btrfs_write_out_cache(root, inode, ctl, NULL, &io_ctl, trans, path, 0); - if (!ret) + if (!ret) { + /* + * At this point writepages() didn't error out, so our metadata + * reservation is released when the writeback finishes, at + * inode.c:btrfs_finish_ordered_io(), regardless of it finishing + * with or without an error. + */ + release_metadata = false; ret = btrfs_wait_cache_io(root, trans, NULL, &io_ctl, path, 0); + } if (ret) { - btrfs_delalloc_release_metadata(inode, inode->i_size); + if (release_metadata) + btrfs_delalloc_release_metadata(inode, inode->i_size); #ifdef DEBUG btrfs_err(root->fs_info, "failed to write free ino cache for root %llu", diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 157cc54fc634..760c4a5e096b 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -722,6 +722,7 @@ void btrfs_start_ordered_extent(struct inode *inode, int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) { int ret = 0; + int ret_wb = 0; u64 end; u64 orig_end; struct btrfs_ordered_extent *ordered; @@ -741,9 +742,14 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) if (ret) return ret; - ret = filemap_fdatawait_range(inode->i_mapping, start, orig_end); - if (ret) - return ret; + /* + * If we have a writeback error don't return immediately. Wait first + * for any ordered extents that haven't completed yet. This is to make + * sure no one can dirty the same page ranges and call writepages() + * before the ordered extents complete - to avoid failures (-EEXIST) + * when adding the new ordered extents to the ordered tree. + */ + ret_wb = filemap_fdatawait_range(inode->i_mapping, start, orig_end); end = orig_end; while (1) { @@ -767,7 +773,7 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len) break; end--; } - return ret; + return ret_wb ? ret_wb : ret; } /* diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 96aebf3bcd5b..174f5e1e00ab 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4625,6 +4625,7 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, { u64 chunk_offset; + ASSERT(mutex_is_locked(&extent_root->fs_info->chunk_mutex)); chunk_offset = find_next_chunk(extent_root->fs_info); return __btrfs_alloc_chunk(trans, extent_root, chunk_offset, type); } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e876e1944519..571acd88606c 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -6,7 +6,6 @@ #include <linux/string.h> #include <linux/uaccess.h> #include <linux/kernel.h> -#include <linux/namei.h> #include <linux/writeback.h> #include <linux/vmalloc.h> #include <linux/posix_acl.h> @@ -819,6 +818,7 @@ static int fill_inode(struct inode *inode, struct page *locked_page, else kfree(sym); /* lost a race */ } + inode->i_link = ci->i_symlink; break; case S_IFDIR: inode->i_op = &ceph_dir_iops; @@ -1691,16 +1691,9 @@ retry: /* * symlinks */ -static void *ceph_sym_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct ceph_inode_info *ci = ceph_inode(d_inode(dentry)); - nd_set_link(nd, ci->i_symlink); - return NULL; -} - static const struct inode_operations ceph_symlink_iops = { .readlink = generic_readlink, - .follow_link = ceph_sym_follow_link, + .follow_link = simple_follow_link, .setattr = ceph_setattr, .getattr = ceph_getattr, .setxattr = ceph_setxattr, diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index 430e0348c99e..7dc886c9a78f 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -24,6 +24,7 @@ #include "cifsfs.h" #include "dns_resolve.h" #include "cifs_debug.h" +#include "cifs_unicode.h" static LIST_HEAD(cifs_dfs_automount_list); @@ -312,7 +313,7 @@ static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) xid = get_xid(); rc = get_dfs_path(xid, ses, full_path + 1, cifs_sb->local_nls, &num_referrals, &referrals, - cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); free_xid(xid); cifs_put_tlink(tlink); diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c index 0303c6793d90..5a53ac6b1e02 100644 --- a/fs/cifs/cifs_unicode.c +++ b/fs/cifs/cifs_unicode.c @@ -27,41 +27,6 @@ #include "cifsglob.h" #include "cifs_debug.h" -/* - * cifs_utf16_bytes - how long will a string be after conversion? - * @utf16 - pointer to input string - * @maxbytes - don't go past this many bytes of input string - * @codepage - destination codepage - * - * Walk a utf16le string and return the number of bytes that the string will - * be after being converted to the given charset, not including any null - * termination required. Don't walk past maxbytes in the source buffer. - */ -int -cifs_utf16_bytes(const __le16 *from, int maxbytes, - const struct nls_table *codepage) -{ - int i; - int charlen, outlen = 0; - int maxwords = maxbytes / 2; - char tmp[NLS_MAX_CHARSET_SIZE]; - __u16 ftmp; - - for (i = 0; i < maxwords; i++) { - ftmp = get_unaligned_le16(&from[i]); - if (ftmp == 0) - break; - - charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE); - if (charlen > 0) - outlen += charlen; - else - outlen++; - } - - return outlen; -} - int cifs_remap(struct cifs_sb_info *cifs_sb) { int map_type; @@ -155,10 +120,13 @@ convert_sfm_char(const __u16 src_char, char *target) * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). */ static int -cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, +cifs_mapchar(char *target, const __u16 *from, const struct nls_table *cp, int maptype) { int len = 1; + __u16 src_char; + + src_char = *from; if ((maptype == SFM_MAP_UNI_RSVD) && convert_sfm_char(src_char, target)) return len; @@ -168,10 +136,23 @@ cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, /* if character not one of seven in special remap set */ len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); - if (len <= 0) { - *target = '?'; - len = 1; - } + if (len <= 0) + goto surrogate_pair; + + return len; + +surrogate_pair: + /* convert SURROGATE_PAIR and IVS */ + if (strcmp(cp->charset, "utf8")) + goto unknown; + len = utf16s_to_utf8s(from, 3, UTF16_LITTLE_ENDIAN, target, 6); + if (len <= 0) + goto unknown; + return len; + +unknown: + *target = '?'; + len = 1; return len; } @@ -206,7 +187,7 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, int nullsize = nls_nullsize(codepage); int fromwords = fromlen / 2; char tmp[NLS_MAX_CHARSET_SIZE]; - __u16 ftmp; + __u16 ftmp[3]; /* ftmp[3] = 3array x 2bytes = 6bytes UTF-16 */ /* * because the chars can be of varying widths, we need to take care @@ -217,9 +198,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); for (i = 0; i < fromwords; i++) { - ftmp = get_unaligned_le16(&from[i]); - if (ftmp == 0) + ftmp[0] = get_unaligned_le16(&from[i]); + if (ftmp[0] == 0) break; + if (i + 1 < fromwords) + ftmp[1] = get_unaligned_le16(&from[i + 1]); + else + ftmp[1] = 0; + if (i + 2 < fromwords) + ftmp[2] = get_unaligned_le16(&from[i + 2]); + else + ftmp[2] = 0; /* * check to see if converting this character might make the @@ -234,6 +223,17 @@ cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, /* put converted char into 'to' buffer */ charlen = cifs_mapchar(&to[outlen], ftmp, codepage, map_type); outlen += charlen; + + /* charlen (=bytes of UTF-8 for 1 character) + * 4bytes UTF-8(surrogate pair) is charlen=4 + * (4bytes UTF-16 code) + * 7-8bytes UTF-8(IVS) is charlen=3+4 or 4+4 + * (2 UTF-8 pairs divided to 2 UTF-16 pairs) */ + if (charlen == 4) + i++; + else if (charlen >= 5) + /* 5-6bytes UTF-8 */ + i += 2; } /* properly null-terminate string */ @@ -296,6 +296,46 @@ success: } /* + * cifs_utf16_bytes - how long will a string be after conversion? + * @utf16 - pointer to input string + * @maxbytes - don't go past this many bytes of input string + * @codepage - destination codepage + * + * Walk a utf16le string and return the number of bytes that the string will + * be after being converted to the given charset, not including any null + * termination required. Don't walk past maxbytes in the source buffer. + */ +int +cifs_utf16_bytes(const __le16 *from, int maxbytes, + const struct nls_table *codepage) +{ + int i; + int charlen, outlen = 0; + int maxwords = maxbytes / 2; + char tmp[NLS_MAX_CHARSET_SIZE]; + __u16 ftmp[3]; + + for (i = 0; i < maxwords; i++) { + ftmp[0] = get_unaligned_le16(&from[i]); + if (ftmp[0] == 0) + break; + if (i + 1 < maxwords) + ftmp[1] = get_unaligned_le16(&from[i + 1]); + else + ftmp[1] = 0; + if (i + 2 < maxwords) + ftmp[2] = get_unaligned_le16(&from[i + 2]); + else + ftmp[2] = 0; + + charlen = cifs_mapchar(tmp, ftmp, codepage, NO_MAP_UNI_RSVD); + outlen += charlen; + } + + return outlen; +} + +/* * cifs_strndup_from_utf16 - copy a string from wire format to the local * codepage * @src - source string @@ -409,10 +449,15 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, char src_char; __le16 dst_char; wchar_t tmp; + wchar_t *wchar_to; /* UTF-16 */ + int ret; + unicode_t u; if (map_chars == NO_MAP_UNI_RSVD) return cifs_strtoUTF16(target, source, PATH_MAX, cp); + wchar_to = kzalloc(6, GFP_KERNEL); + for (i = 0; i < srclen; j++) { src_char = source[i]; charlen = 1; @@ -441,11 +486,55 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, * if no match, use question mark, which at least in * some cases serves as wild card */ - if (charlen < 1) { - dst_char = cpu_to_le16(0x003f); - charlen = 1; + if (charlen > 0) + goto ctoUTF16; + + /* convert SURROGATE_PAIR */ + if (strcmp(cp->charset, "utf8") || !wchar_to) + goto unknown; + if (*(source + i) & 0x80) { + charlen = utf8_to_utf32(source + i, 6, &u); + if (charlen < 0) + goto unknown; + } else + goto unknown; + ret = utf8s_to_utf16s(source + i, charlen, + UTF16_LITTLE_ENDIAN, + wchar_to, 6); + if (ret < 0) + goto unknown; + + i += charlen; + dst_char = cpu_to_le16(*wchar_to); + if (charlen <= 3) + /* 1-3bytes UTF-8 to 2bytes UTF-16 */ + put_unaligned(dst_char, &target[j]); + else if (charlen == 4) { + /* 4bytes UTF-8(surrogate pair) to 4bytes UTF-16 + * 7-8bytes UTF-8(IVS) divided to 2 UTF-16 + * (charlen=3+4 or 4+4) */ + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 1)); + j++; + put_unaligned(dst_char, &target[j]); + } else if (charlen >= 5) { + /* 5-6bytes UTF-8 to 6bytes UTF-16 */ + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 1)); + j++; + put_unaligned(dst_char, &target[j]); + dst_char = cpu_to_le16(*(wchar_to + 2)); + j++; + put_unaligned(dst_char, &target[j]); } + continue; + +unknown: + dst_char = cpu_to_le16(0x003f); + charlen = 1; } + +ctoUTF16: /* * character may take more than one byte in the source string, * but will take exactly two bytes in the target string @@ -456,6 +545,7 @@ cifsConvertToUTF16(__le16 *target, const char *source, int srclen, ctoUTF16_out: put_unaligned(0, &target[j]); /* Null terminate target unicode string */ + kfree(wchar_to); return j; } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f5089bde3635..0a9fb6b53126 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -469,6 +469,8 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_puts(s, ",nouser_xattr"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR) seq_puts(s, ",mapchars"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SFM_CHR) + seq_puts(s, ",mapposix"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL) seq_puts(s, ",sfu"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL) diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 252f5c15806b..a782b22904e4 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -120,7 +120,7 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path); #endif /* Functions related to symlinks */ -extern void *cifs_follow_link(struct dentry *direntry, struct nameidata *nd); +extern const char *cifs_follow_link(struct dentry *direntry, void **cookie); extern int cifs_readlink(struct dentry *direntry, char __user *buffer, int buflen); extern int cifs_symlink(struct inode *inode, struct dentry *direntry, diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index c31ce98c1704..c63fd1dde25b 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -361,11 +361,11 @@ extern int CIFSUnixCreateHardLink(const unsigned int xid, extern int CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, - const struct nls_table *nls_codepage); + const struct nls_table *nls_codepage, int remap); extern int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **syminfo, - const struct nls_table *nls_codepage); + const struct nls_table *nls_codepage, int remap); extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, __u16 fid, char **symlinkinfo, const struct nls_table *nls_codepage); diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 84650a51c7c4..f26ffbfc64d8 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -2784,7 +2784,7 @@ copyRetry: int CIFSUnixCreateSymLink(const unsigned int xid, struct cifs_tcon *tcon, const char *fromName, const char *toName, - const struct nls_table *nls_codepage) + const struct nls_table *nls_codepage, int remap) { TRANSACTION2_SPI_REQ *pSMB = NULL; TRANSACTION2_SPI_RSP *pSMBr = NULL; @@ -2804,9 +2804,9 @@ createSymLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifs_strtoUTF16((__le16 *) pSMB->FileName, fromName, - /* find define for this maxpathcomponent */ - PATH_MAX, nls_codepage); + cifsConvertToUTF16((__le16 *) pSMB->FileName, fromName, + /* find define for this maxpathcomponent */ + PATH_MAX, nls_codepage, remap); name_len++; /* trailing null */ name_len *= 2; @@ -2828,9 +2828,9 @@ createSymLinkRetry: data_offset = (char *) (&pSMB->hdr.Protocol) + offset; if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len_target = - cifs_strtoUTF16((__le16 *) data_offset, toName, PATH_MAX - /* find define for this maxpathcomponent */ - , nls_codepage); + cifsConvertToUTF16((__le16 *) data_offset, toName, + /* find define for this maxpathcomponent */ + PATH_MAX, nls_codepage, remap); name_len_target++; /* trailing null */ name_len_target *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -3034,7 +3034,7 @@ winCreateHardLinkRetry: int CIFSSMBUnixQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon, const unsigned char *searchName, char **symlinkinfo, - const struct nls_table *nls_codepage) + const struct nls_table *nls_codepage, int remap) { /* SMB_QUERY_FILE_UNIX_LINK */ TRANSACTION2_QPI_REQ *pSMB = NULL; @@ -3055,8 +3055,9 @@ querySymLinkRetry: if (pSMB->hdr.Flags2 & SMBFLG2_UNICODE) { name_len = - cifs_strtoUTF16((__le16 *) pSMB->FileName, searchName, - PATH_MAX, nls_codepage); + cifsConvertToUTF16((__le16 *) pSMB->FileName, + searchName, PATH_MAX, nls_codepage, + remap); name_len++; /* trailing null */ name_len *= 2; } else { /* BB improve the check for buffer overruns BB */ @@ -4917,7 +4918,7 @@ getDFSRetry: strncpy(pSMB->RequestFileName, search_name, name_len); } - if (ses->server && ses->server->sign) + if (ses->server->sign) pSMB->hdr.Flags2 |= SMBFLG2_SECURITY_SIGNATURE; pSMB->hdr.Uid = ses->Suid; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index f3bfe08e177b..8383d5ea4202 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -386,6 +386,7 @@ cifs_reconnect(struct TCP_Server_Info *server) rc = generic_ip_connect(server); if (rc) { cifs_dbg(FYI, "reconnect error %d\n", rc); + mutex_unlock(&server->srv_mutex); msleep(3000); } else { atomic_inc(&tcpSesReconnectCount); @@ -393,8 +394,8 @@ cifs_reconnect(struct TCP_Server_Info *server) if (server->tcpStatus != CifsExiting) server->tcpStatus = CifsNeedNegotiate; spin_unlock(&GlobalMid_Lock); + mutex_unlock(&server->srv_mutex); } - mutex_unlock(&server->srv_mutex); } while (server->tcpStatus == CifsNeedReconnect); return rc; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 338d56936f6a..c3eb998a99bd 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -620,8 +620,7 @@ int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode, } rc = CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); if (rc) goto mknod_out; diff --git a/fs/cifs/file.c b/fs/cifs/file.c index cafbf10521d5..3f50cee79df9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -140,8 +140,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, posix_flags = cifs_posix_convert_flags(f_flags); rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data, poplock, full_path, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); cifs_put_tlink(tlink); if (rc) @@ -1553,8 +1552,8 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type, rc = server->ops->mand_unlock_range(cfile, flock, xid); out: - if (flock->fl_flags & FL_POSIX) - posix_lock_file_wait(file, flock); + if (flock->fl_flags & FL_POSIX && !rc) + rc = posix_lock_file_wait(file, flock); return rc; } diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 55b58112d122..f621b44cb800 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -373,8 +373,7 @@ int cifs_get_inode_info_unix(struct inode **pinode, /* could have done a find first instead but this returns more info */ rc = CIFSSMBUnixQPathInfo(xid, tcon, full_path, &find_data, - cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_sb->local_nls, cifs_remap(cifs_sb)); cifs_put_tlink(tlink); if (!rc) { @@ -402,9 +401,25 @@ int cifs_get_inode_info_unix(struct inode **pinode, rc = -ENOMEM; } else { /* we already have inode, update it */ + + /* if uniqueid is different, return error */ + if (unlikely(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM && + CIFS_I(*pinode)->uniqueid != fattr.cf_uniqueid)) { + rc = -ESTALE; + goto cgiiu_exit; + } + + /* if filetype is different, return error */ + if (unlikely(((*pinode)->i_mode & S_IFMT) != + (fattr.cf_mode & S_IFMT))) { + rc = -ESTALE; + goto cgiiu_exit; + } + cifs_fattr_to_inode(*pinode, &fattr); } +cgiiu_exit: return rc; } @@ -839,6 +854,15 @@ cifs_get_inode_info(struct inode **inode, const char *full_path, if (!*inode) rc = -ENOMEM; } else { + /* we already have inode, update it */ + + /* if filetype is different, return error */ + if (unlikely(((*inode)->i_mode & S_IFMT) != + (fattr.cf_mode & S_IFMT))) { + rc = -ESTALE; + goto cgii_exit; + } + cifs_fattr_to_inode(*inode, &fattr); } @@ -2215,8 +2239,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) pTcon = tlink_tcon(tlink); rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, cifs_sb->local_nls, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); + cifs_remap(cifs_sb)); cifs_put_tlink(tlink); } diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 252e672d5604..e3548f73bdea 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -626,8 +626,8 @@ cifs_hl_exit: return rc; } -void * -cifs_follow_link(struct dentry *direntry, struct nameidata *nd) +const char * +cifs_follow_link(struct dentry *direntry, void **cookie) { struct inode *inode = d_inode(direntry); int rc = -ENOMEM; @@ -643,16 +643,18 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { - rc = PTR_ERR(tlink); - tlink = NULL; - goto out; + free_xid(xid); + return ERR_CAST(tlink); } tcon = tlink_tcon(tlink); server = tcon->ses->server; full_path = build_path_from_dentry(direntry); - if (!full_path) - goto out; + if (!full_path) { + free_xid(xid); + cifs_put_tlink(tlink); + return ERR_PTR(-ENOMEM); + } cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", full_path, inode); @@ -670,17 +672,13 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd) &target_path, cifs_sb); kfree(full_path); -out: + free_xid(xid); + cifs_put_tlink(tlink); if (rc != 0) { kfree(target_path); - target_path = ERR_PTR(rc); + return ERR_PTR(rc); } - - free_xid(xid); - if (tlink) - cifs_put_tlink(tlink); - nd_set_link(nd, target_path); - return NULL; + return *cookie = target_path; } int @@ -717,7 +715,8 @@ cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname) rc = create_mf_symlink(xid, pTcon, cifs_sb, full_path, symname); else if (pTcon->unix_ext) rc = CIFSUnixCreateSymLink(xid, pTcon, full_path, symname, - cifs_sb->local_nls); + cifs_sb->local_nls, + cifs_remap(cifs_sb)); /* else rc = CIFSCreateReparseSymLink(xid, pTcon, fromName, toName, cifs_sb_target->local_nls); */ diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index b4a47237486b..b1eede3678a9 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -90,6 +90,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name, if (dentry) { inode = d_inode(dentry); if (inode) { + if (d_mountpoint(dentry)) + goto out; /* * If we're generating inode numbers, then we don't * want to clobber the existing one with the one that diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c index 7bfdd6066276..fc537c29044e 100644 --- a/fs/cifs/smb1ops.c +++ b/fs/cifs/smb1ops.c @@ -960,7 +960,8 @@ cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon, /* Check for unix extensions */ if (cap_unix(tcon->ses)) { rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, target_path, - cifs_sb->local_nls); + cifs_sb->local_nls, + cifs_remap(cifs_sb)); if (rc == -EREMOTE) rc = cifs_unix_dfs_readlink(xid, tcon, full_path, target_path, diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 65cd7a84c8bc..54cbe19d9c08 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -110,7 +110,7 @@ smb2_hdr_assemble(struct smb2_hdr *hdr, __le16 smb2_cmd /* command */ , /* GLOBAL_CAP_LARGE_MTU will only be set if dialect > SMB2.02 */ /* See sections 2.2.4 and 3.2.4.1.5 of MS-SMB2 */ - if ((tcon->ses) && + if ((tcon->ses) && (tcon->ses->server) && (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU)) hdr->CreditCharge = cpu_to_le16(1); /* else CreditCharge MBZ */ diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index da94e41bdbf6..537356742091 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -173,5 +173,5 @@ MODULE_LICENSE("GPL"); MODULE_VERSION("0.0.2"); MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration."); -module_init(configfs_init); +core_initcall(configfs_init); module_exit(configfs_exit); diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c index cc9f2546ea4a..ec5c8325b503 100644 --- a/fs/configfs/symlink.c +++ b/fs/configfs/symlink.c @@ -279,36 +279,27 @@ static int configfs_getlink(struct dentry *dentry, char * path) } -static void *configfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *configfs_follow_link(struct dentry *dentry, void **cookie) { - int error = -ENOMEM; unsigned long page = get_zeroed_page(GFP_KERNEL); + int error; - if (page) { - error = configfs_getlink(dentry, (char *)page); - if (!error) { - nd_set_link(nd, (char *)page); - return (void *)page; - } - } - - nd_set_link(nd, ERR_PTR(error)); - return NULL; -} + if (!page) + return ERR_PTR(-ENOMEM); -static void configfs_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - if (cookie) { - unsigned long page = (unsigned long)cookie; - free_page(page); + error = configfs_getlink(dentry, (char *)page); + if (!error) { + return *cookie = (void *)page; } + + free_page(page); + return ERR_PTR(error); } const struct inode_operations configfs_symlink_inode_operations = { .follow_link = configfs_follow_link, .readlink = generic_readlink, - .put_link = configfs_put_link, + .put_link = free_page_put_link, .setattr = configfs_setattr, }; diff --git a/fs/dcache.c b/fs/dcache.c index 656ce522a218..592c4b582495 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -322,17 +322,17 @@ static void dentry_free(struct dentry *dentry) } /** - * dentry_rcuwalk_barrier - invalidate in-progress rcu-walk lookups + * dentry_rcuwalk_invalidate - invalidate in-progress rcu-walk lookups * @dentry: the target dentry * After this call, in-progress rcu-walk path lookup will fail. This * should be called after unhashing, and after changing d_inode (if * the dentry has not already been unhashed). */ -static inline void dentry_rcuwalk_barrier(struct dentry *dentry) +static inline void dentry_rcuwalk_invalidate(struct dentry *dentry) { - assert_spin_locked(&dentry->d_lock); - /* Go through a barrier */ - write_seqcount_barrier(&dentry->d_seq); + lockdep_assert_held(&dentry->d_lock); + /* Go through am invalidation barrier */ + write_seqcount_invalidate(&dentry->d_seq); } /* @@ -372,7 +372,7 @@ static void dentry_unlink_inode(struct dentry * dentry) struct inode *inode = dentry->d_inode; __d_clear_type_and_inode(dentry); hlist_del_init(&dentry->d_u.d_alias); - dentry_rcuwalk_barrier(dentry); + dentry_rcuwalk_invalidate(dentry); spin_unlock(&dentry->d_lock); spin_unlock(&inode->i_lock); if (!inode->i_nlink) @@ -494,7 +494,7 @@ void __d_drop(struct dentry *dentry) __hlist_bl_del(&dentry->d_hash); dentry->d_hash.pprev = NULL; hlist_bl_unlock(b); - dentry_rcuwalk_barrier(dentry); + dentry_rcuwalk_invalidate(dentry); } } EXPORT_SYMBOL(__d_drop); @@ -1239,13 +1239,13 @@ ascend: /* might go back up the wrong parent if we have had a rename. */ if (need_seqretry(&rename_lock, seq)) goto rename_retry; - next = child->d_child.next; - while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)) { + /* go into the first sibling still alive */ + do { + next = child->d_child.next; if (next == &this_parent->d_subdirs) goto ascend; child = list_entry(next, struct dentry, d_child); - next = next->next; - } + } while (unlikely(child->d_flags & DCACHE_DENTRY_KILLED)); rcu_read_unlock(); goto resume; } @@ -1752,7 +1752,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode) if (inode) hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry); __d_set_inode_and_type(dentry, inode, add_flags); - dentry_rcuwalk_barrier(dentry); + dentry_rcuwalk_invalidate(dentry); spin_unlock(&dentry->d_lock); fsnotify_d_instantiate(dentry, inode); } diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c index 830a7e76f5c6..284f9aa0028b 100644 --- a/fs/debugfs/file.c +++ b/fs/debugfs/file.c @@ -17,7 +17,6 @@ #include <linux/fs.h> #include <linux/seq_file.h> #include <linux/pagemap.h> -#include <linux/namei.h> #include <linux/debugfs.h> #include <linux/io.h> #include <linux/slab.h> @@ -43,17 +42,6 @@ const struct file_operations debugfs_file_operations = { .llseek = noop_llseek, }; -static void *debugfs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - nd_set_link(nd, d_inode(dentry)->i_private); - return NULL; -} - -const struct inode_operations debugfs_link_operations = { - .readlink = generic_readlink, - .follow_link = debugfs_follow_link, -}; - static int debugfs_u8_set(void *data, u64 val) { *(u8 *)data = val; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index c1e7ffb0dab6..7eaec88ea970 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -174,7 +174,7 @@ static void debugfs_evict_inode(struct inode *inode) truncate_inode_pages_final(&inode->i_data); clear_inode(inode); if (S_ISLNK(inode->i_mode)) - kfree(inode->i_private); + kfree(inode->i_link); } static const struct super_operations debugfs_super_operations = { @@ -511,8 +511,8 @@ struct dentry *debugfs_create_symlink(const char *name, struct dentry *parent, return failed_creating(dentry); } inode->i_mode = S_IFLNK | S_IRWXUGO; - inode->i_op = &debugfs_link_operations; - inode->i_private = link; + inode->i_op = &simple_symlink_inode_operations; + inode->i_link = link; d_instantiate(dentry, inode); return end_creating(dentry); } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index fc850b55db67..3c4db1172d22 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -170,7 +170,6 @@ out_unlock: * @directory_inode: inode of the new file's dentry's parent in ecryptfs * @ecryptfs_dentry: New file's dentry in ecryptfs * @mode: The mode of the new file - * @nd: nameidata of ecryptfs' parent's dentry & vfsmount * * Creates the underlying file and the eCryptfs inode which will link to * it. It will also update the eCryptfs directory inode to mimic the @@ -384,7 +383,7 @@ static int ecryptfs_lookup_interpose(struct dentry *dentry, * ecryptfs_lookup * @ecryptfs_dir_inode: The eCryptfs directory inode * @ecryptfs_dentry: The eCryptfs dentry that we are looking up - * @ecryptfs_nd: nameidata; may be NULL + * @flags: lookup flags * * Find a file on disk. If the file does not exist, then we'll add it to the * dentry cache and continue on to read it from the disk. @@ -675,18 +674,16 @@ out: return rc ? ERR_PTR(rc) : buf; } -static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *ecryptfs_follow_link(struct dentry *dentry, void **cookie) { size_t len; char *buf = ecryptfs_readlink_lower(dentry, &len); if (IS_ERR(buf)) - goto out; + return buf; fsstack_copy_attr_atime(d_inode(dentry), d_inode(ecryptfs_dentry_to_lower(dentry))); buf[len] = '\0'; -out: - nd_set_link(nd, buf); - return NULL; + return *cookie = buf; } /** diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c index 59fedbcf8798..86a2121828c3 100644 --- a/fs/efivarfs/super.c +++ b/fs/efivarfs/super.c @@ -121,7 +121,7 @@ static int efivarfs_callback(efi_char16_t *name16, efi_guid_t vendor, int len, i; int err = -ENOMEM; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return err; diff --git a/fs/exec.c b/fs/exec.c index 49a1c61433b7..1977c2a553ac 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -659,6 +659,9 @@ int setup_arg_pages(struct linux_binprm *bprm, if (stack_base > STACK_SIZE_MAX) stack_base = STACK_SIZE_MAX; + /* Add space for stack randomization. */ + stack_base += (STACK_RND_MASK << PAGE_SHIFT); + /* Make sure we didn't let the argument array grow too large. */ if (vma->vm_end - vma->vm_start > stack_base) return -ENOMEM; diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild index b47c7b8dc275..a364fd0965ec 100644 --- a/fs/exofs/Kbuild +++ b/fs/exofs/Kbuild @@ -16,5 +16,5 @@ libore-y := ore.o ore_raid.o obj-$(CONFIG_ORE) += libore.o -exofs-y := inode.o file.o symlink.o namei.o dir.o super.o sys.o +exofs-y := inode.o file.o namei.o dir.o super.o sys.o obj-$(CONFIG_EXOFS_FS) += exofs.o diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h index ad9cac670a47..2e86086bc940 100644 --- a/fs/exofs/exofs.h +++ b/fs/exofs/exofs.h @@ -207,10 +207,6 @@ extern const struct address_space_operations exofs_aops; extern const struct inode_operations exofs_dir_inode_operations; extern const struct inode_operations exofs_special_inode_operations; -/* symlink.c */ -extern const struct inode_operations exofs_symlink_inode_operations; -extern const struct inode_operations exofs_fast_symlink_inode_operations; - /* exofs_init_comps will initialize an ore_components device array * pointing to a single ore_comp struct, and a round-robin view * of the device table. diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 786e4cc8c889..73c64daa0f55 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -1222,10 +1222,11 @@ struct inode *exofs_iget(struct super_block *sb, unsigned long ino) inode->i_fop = &exofs_dir_operations; inode->i_mapping->a_ops = &exofs_aops; } else if (S_ISLNK(inode->i_mode)) { - if (exofs_inode_is_fast_symlink(inode)) - inode->i_op = &exofs_fast_symlink_inode_operations; - else { - inode->i_op = &exofs_symlink_inode_operations; + if (exofs_inode_is_fast_symlink(inode)) { + inode->i_op = &simple_symlink_inode_operations; + inode->i_link = (char *)oi->i_data; + } else { + inode->i_op = &page_symlink_inode_operations; inode->i_mapping->a_ops = &exofs_aops; } } else { diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index 5ae25e431191..09a6bb1ad63c 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -113,7 +113,7 @@ static int exofs_symlink(struct inode *dir, struct dentry *dentry, oi = exofs_i(inode); if (l > sizeof(oi->i_data)) { /* slow symlink */ - inode->i_op = &exofs_symlink_inode_operations; + inode->i_op = &page_symlink_inode_operations; inode->i_mapping->a_ops = &exofs_aops; memset(oi->i_data, 0, sizeof(oi->i_data)); @@ -122,7 +122,8 @@ static int exofs_symlink(struct inode *dir, struct dentry *dentry, goto out_fail; } else { /* fast symlink */ - inode->i_op = &exofs_fast_symlink_inode_operations; + inode->i_op = &simple_symlink_inode_operations; + inode->i_link = (char *)oi->i_data; memcpy(oi->i_data, symname, l); inode->i_size = l-1; } diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c deleted file mode 100644 index 6f6f3a4c1365..000000000000 --- a/fs/exofs/symlink.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (C) 2005, 2006 - * Avishay Traeger (avishay@gmail.com) - * Copyright (C) 2008, 2009 - * Boaz Harrosh <ooo@electrozaur.com> - * - * Copyrights for code taken from ext2: - * Copyright (C) 1992, 1993, 1994, 1995 - * Remy Card (card@masi.ibp.fr) - * Laboratoire MASI - Institut Blaise Pascal - * Universite Pierre et Marie Curie (Paris VI) - * from - * linux/fs/minix/inode.c - * Copyright (C) 1991, 1992 Linus Torvalds - * - * This file is part of exofs. - * - * exofs is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation. Since it is based on ext2, and the only - * valid version of GPL for the Linux kernel is version 2, the only valid - * version of GPL for exofs is version 2. - * - * exofs is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with exofs; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include <linux/namei.h> - -#include "exofs.h" - -static void *exofs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct exofs_i_info *oi = exofs_i(d_inode(dentry)); - - nd_set_link(nd, (char *)oi->i_data); - return NULL; -} - -const struct inode_operations exofs_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = page_follow_link_light, - .put_link = page_put_link, -}; - -const struct inode_operations exofs_fast_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = exofs_follow_link, -}; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index f460ae36d5b7..5c09776d347f 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -1403,6 +1403,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino) inode->i_mapping->a_ops = &ext2_aops; } else if (S_ISLNK(inode->i_mode)) { if (ext2_inode_is_fast_symlink(inode)) { + inode->i_link = (char *)ei->i_data; inode->i_op = &ext2_fast_symlink_inode_operations; nd_terminate_link(ei->i_data, inode->i_size, sizeof(ei->i_data) - 1); diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 3e074a9ccbe6..13ec54a99c96 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -189,7 +189,8 @@ static int ext2_symlink (struct inode * dir, struct dentry * dentry, } else { /* fast symlink */ inode->i_op = &ext2_fast_symlink_inode_operations; - memcpy((char*)(EXT2_I(inode)->i_data),symname,l); + inode->i_link = (char*)EXT2_I(inode)->i_data; + memcpy(inode->i_link, symname, l); inode->i_size = l-1; } mark_inode_dirty(inode); diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 20608f17c2e5..ae17179f3810 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -19,14 +19,6 @@ #include "ext2.h" #include "xattr.h" -#include <linux/namei.h> - -static void *ext2_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct ext2_inode_info *ei = EXT2_I(d_inode(dentry)); - nd_set_link(nd, (char *)ei->i_data); - return NULL; -} const struct inode_operations ext2_symlink_inode_operations = { .readlink = generic_readlink, @@ -43,7 +35,7 @@ const struct inode_operations ext2_symlink_inode_operations = { const struct inode_operations ext2_fast_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = ext2_follow_link, + .follow_link = simple_follow_link, .setattr = ext2_setattr, #ifdef CONFIG_EXT2_FS_XATTR .setxattr = generic_setxattr, diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2ee2dc4351d1..6c7e5468a2f8 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2999,6 +2999,7 @@ struct inode *ext3_iget(struct super_block *sb, unsigned long ino) inode->i_op = &ext3_fast_symlink_inode_operations; nd_terminate_link(ei->i_data, inode->i_size, sizeof(ei->i_data) - 1); + inode->i_link = (char *)ei->i_data; } else { inode->i_op = &ext3_symlink_inode_operations; ext3_set_aops(inode); diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 4264b9bd0002..c9e767cd4b67 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2308,7 +2308,8 @@ retry: } } else { inode->i_op = &ext3_fast_symlink_inode_operations; - memcpy((char*)&EXT3_I(inode)->i_data,symname,l); + inode->i_link = (char*)&EXT3_I(inode)->i_data; + memcpy(inode->i_link, symname, l); inode->i_size = l-1; } EXT3_I(inode)->i_disksize = inode->i_size; diff --git a/fs/ext3/symlink.c b/fs/ext3/symlink.c index ea96df3c58db..c08c59094ae6 100644 --- a/fs/ext3/symlink.c +++ b/fs/ext3/symlink.c @@ -17,17 +17,9 @@ * ext3 symlink handling code */ -#include <linux/namei.h> #include "ext3.h" #include "xattr.h" -static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct ext3_inode_info *ei = EXT3_I(d_inode(dentry)); - nd_set_link(nd, (char*)ei->i_data); - return NULL; -} - const struct inode_operations ext3_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = page_follow_link_light, @@ -43,7 +35,7 @@ const struct inode_operations ext3_symlink_inode_operations = { const struct inode_operations ext3_fast_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = ext3_follow_link, + .follow_link = simple_follow_link, .setattr = ext3_setattr, #ifdef CONFIG_EXT3_FS_XATTR .setxattr = generic_setxattr, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 009a0590b20f..0a3b72d1d458 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2847,6 +2847,7 @@ extern int ext4_mpage_readpages(struct address_space *mapping, unsigned nr_pages); /* symlink.c */ +extern const struct inode_operations ext4_encrypted_symlink_inode_operations; extern const struct inode_operations ext4_symlink_inode_operations; extern const struct inode_operations ext4_fast_symlink_inode_operations; @@ -2889,7 +2890,6 @@ extern int ext4_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); extern int ext4_ext_calc_metadata_amount(struct inode *inode, ext4_lblk_t lblocks); -extern int ext4_extent_tree_init(handle_t *, struct inode *); extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode, int num, struct ext4_ext_path *path); diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c index 3445035c7e01..d41843181818 100644 --- a/fs/ext4/ext4_jbd2.c +++ b/fs/ext4/ext4_jbd2.c @@ -87,6 +87,12 @@ int __ext4_journal_stop(const char *where, unsigned int line, handle_t *handle) ext4_put_nojournal(handle); return 0; } + + if (!handle->h_transaction) { + err = jbd2_journal_stop(handle); + return handle->h_err ? handle->h_err : err; + } + sb = handle->h_transaction->t_journal->j_private; err = handle->h_err; rc = jbd2_journal_stop(handle); diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index d74e08029643..e003a1e81dc3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -377,7 +377,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) ext4_lblk_t lblock = le32_to_cpu(ext->ee_block); ext4_lblk_t last = lblock + len - 1; - if (lblock > last) + if (len == 0 || lblock > last) return 0; return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); } @@ -5396,6 +5396,14 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len) loff_t new_size, ioffset; int ret; + /* + * We need to test this early because xfstests assumes that a + * collapse range of (0, 1) will return EOPNOTSUPP if the file + * system does not support collapse range. + */ + if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) + return -EOPNOTSUPP; + /* Collapse range works only on fs block size aligned offsets. */ if (offset & (EXT4_CLUSTER_SIZE(sb) - 1) || len & (EXT4_CLUSTER_SIZE(sb) - 1)) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 55b187c3bac1..5168c9b56880 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4213,8 +4213,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) inode->i_op = &ext4_dir_inode_operations; inode->i_fop = &ext4_dir_operations; } else if (S_ISLNK(inode->i_mode)) { - if (ext4_inode_is_fast_symlink(inode) && - !ext4_encrypted_inode(inode)) { + if (ext4_encrypted_inode(inode)) { + inode->i_op = &ext4_encrypted_symlink_inode_operations; + ext4_set_aops(inode); + } else if (ext4_inode_is_fast_symlink(inode)) { + inode->i_link = (char *)ei->i_data; inode->i_op = &ext4_fast_symlink_inode_operations; nd_terminate_link(ei->i_data, inode->i_size, sizeof(ei->i_data) - 1); @@ -4345,7 +4348,7 @@ static void ext4_update_other_inodes_time(struct super_block *sb, int inode_size = EXT4_INODE_SIZE(sb); oi.orig_ino = orig_ino; - ino = orig_ino & ~(inodes_per_block - 1); + ino = (orig_ino & ~(inodes_per_block - 1)) + 1; for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { if (ino == orig_ino) continue; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 814f3beb4369..5fdb9f6aa869 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -3206,10 +3206,12 @@ static int ext4_symlink(struct inode *dir, goto err_drop_inode; sd->len = cpu_to_le16(ostr.len); disk_link.name = (char *) sd; + inode->i_op = &ext4_encrypted_symlink_inode_operations; } if ((disk_link.len > EXT4_N_BLOCKS * 4)) { - inode->i_op = &ext4_symlink_inode_operations; + if (!encryption_required) + inode->i_op = &ext4_symlink_inode_operations; ext4_set_aops(inode); /* * We cannot call page_symlink() with transaction started @@ -3249,9 +3251,10 @@ static int ext4_symlink(struct inode *dir, } else { /* clear the extent format for fast symlink */ ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); - inode->i_op = encryption_required ? - &ext4_symlink_inode_operations : - &ext4_fast_symlink_inode_operations; + if (!encryption_required) { + inode->i_op = &ext4_fast_symlink_inode_operations; + inode->i_link = (char *)&EXT4_I(inode)->i_data; + } memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name, disk_link.len); inode->i_size = disk_link.len - 1; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index f06d0589ddba..ca9d4a2fed41 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -294,6 +294,8 @@ static void __save_error_info(struct super_block *sb, const char *func, struct ext4_super_block *es = EXT4_SB(sb)->s_es; EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS; + if (bdev_read_only(sb->s_bdev)) + return; es->s_state |= cpu_to_le16(EXT4_ERROR_FS); es->s_last_error_time = cpu_to_le32(get_seconds()); strncpy(es->s_last_error_func, func, sizeof(es->s_last_error_func)); diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 187b78920314..ba5bd18a9825 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -23,7 +23,7 @@ #include "xattr.h" #ifdef CONFIG_EXT4_FS_ENCRYPTION -static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *ext4_follow_link(struct dentry *dentry, void **cookie) { struct page *cpage = NULL; char *caddr, *paddr = NULL; @@ -35,12 +35,9 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) int res; u32 plen, max_size = inode->i_sb->s_blocksize; - if (!ext4_encrypted_inode(inode)) - return page_follow_link_light(dentry, nd); - ctx = ext4_get_fname_crypto_ctx(inode, inode->i_sb->s_blocksize); if (IS_ERR(ctx)) - return ctx; + return ERR_CAST(ctx); if (ext4_inode_is_fast_symlink(inode)) { caddr = (char *) EXT4_I(inode)->i_data; @@ -49,7 +46,7 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) cpage = read_mapping_page(inode->i_mapping, 0, NULL); if (IS_ERR(cpage)) { ext4_put_fname_crypto_ctx(&ctx); - return cpage; + return ERR_CAST(cpage); } caddr = kmap(cpage); caddr[size] = 0; @@ -80,13 +77,12 @@ static void *ext4_follow_link(struct dentry *dentry, struct nameidata *nd) /* Null-terminate the name */ if (res <= plen) paddr[res] = '\0'; - nd_set_link(nd, paddr); ext4_put_fname_crypto_ctx(&ctx); if (cpage) { kunmap(cpage); page_cache_release(cpage); } - return NULL; + return *cookie = paddr; errout: ext4_put_fname_crypto_ctx(&ctx); if (cpage) { @@ -97,36 +93,22 @@ errout: return ERR_PTR(res); } -static void ext4_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - struct page *page = cookie; - - if (!page) { - kfree(nd_get_link(nd)); - } else { - kunmap(page); - page_cache_release(page); - } -} +const struct inode_operations ext4_encrypted_symlink_inode_operations = { + .readlink = generic_readlink, + .follow_link = ext4_follow_link, + .put_link = kfree_put_link, + .setattr = ext4_setattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, + .listxattr = ext4_listxattr, + .removexattr = generic_removexattr, +}; #endif -static void *ext4_follow_fast_link(struct dentry *dentry, struct nameidata *nd) -{ - struct ext4_inode_info *ei = EXT4_I(d_inode(dentry)); - nd_set_link(nd, (char *) ei->i_data); - return NULL; -} - const struct inode_operations ext4_symlink_inode_operations = { .readlink = generic_readlink, -#ifdef CONFIG_EXT4_FS_ENCRYPTION - .follow_link = ext4_follow_link, - .put_link = ext4_put_link, -#else .follow_link = page_follow_link_light, .put_link = page_put_link, -#endif .setattr = ext4_setattr, .setxattr = generic_setxattr, .getxattr = generic_getxattr, @@ -136,7 +118,7 @@ const struct inode_operations ext4_symlink_inode_operations = { const struct inode_operations ext4_fast_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = ext4_follow_fast_link, + .follow_link = simple_follow_link, .setattr = ext4_setattr, .setxattr = generic_setxattr, .getxattr = generic_getxattr, diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index b91b0e10678e..1e1aae669fa8 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1513,6 +1513,7 @@ static int f2fs_write_data_pages(struct address_space *mapping, { struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); + bool locked = false; int ret; long diff; @@ -1533,7 +1534,13 @@ static int f2fs_write_data_pages(struct address_space *mapping, diff = nr_pages_to_write(sbi, DATA, wbc); + if (!S_ISDIR(inode->i_mode)) { + mutex_lock(&sbi->writepages); + locked = true; + } ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); + if (locked) + mutex_unlock(&sbi->writepages); f2fs_submit_merged_bio(sbi, DATA, WRITE); diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d8921cf2ba9a..8de34ab6d5b1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -625,6 +625,7 @@ struct f2fs_sb_info { struct mutex cp_mutex; /* checkpoint procedure lock */ struct rw_semaphore cp_rwsem; /* blocking FS operations */ struct rw_semaphore node_write; /* locking node writes */ + struct mutex writepages; /* mutex for writepages() */ wait_queue_head_t cp_wait; struct inode_management im[MAX_INO_ENTRY]; /* manage inode cache */ diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c index 7e3794edae42..71765d062914 100644 --- a/fs/f2fs/namei.c +++ b/fs/f2fs/namei.c @@ -296,21 +296,15 @@ fail: return err; } -static void *f2fs_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *f2fs_follow_link(struct dentry *dentry, void **cookie) { - struct page *page; - - page = page_follow_link_light(dentry, nd); - if (IS_ERR(page)) - return page; - - /* this is broken symlink case */ - if (*nd_get_link(nd) == 0) { - kunmap(page); - page_cache_release(page); - return ERR_PTR(-ENOENT); + const char *link = page_follow_link_light(dentry, cookie); + if (!IS_ERR(link) && !*link) { + /* this is broken symlink case */ + page_put_link(NULL, *cookie); + link = ERR_PTR(-ENOENT); } - return page; + return link; } static int f2fs_symlink(struct inode *dir, struct dentry *dentry, diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 160b88346b24..b2dd1b01f076 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1035,6 +1035,7 @@ try_onemore: sbi->raw_super = raw_super; sbi->raw_super_buf = raw_super_buf; mutex_init(&sbi->gc_mutex); + mutex_init(&sbi->writepages); mutex_init(&sbi->cp_mutex); init_rwsem(&sbi->node_write); clear_sbi_flag(sbi, SBI_POR_DOING); diff --git a/fs/fhandle.c b/fs/fhandle.c index 999ff5c3cab0..d59712dfa3e7 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -195,8 +195,9 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, goto out_err; } /* copy the full handle */ - if (copy_from_user(handle, ufh, - sizeof(struct file_handle) + + *handle = f_handle; + if (copy_from_user(&handle->f_handle, + &ufh->f_handle, f_handle.handle_bytes)) { retval = -EFAULT; goto out_handle; diff --git a/fs/freevxfs/vxfs_extern.h b/fs/freevxfs/vxfs_extern.h index 881aa3d217f0..e3dcb4467d92 100644 --- a/fs/freevxfs/vxfs_extern.h +++ b/fs/freevxfs/vxfs_extern.h @@ -50,9 +50,6 @@ extern daddr_t vxfs_bmap1(struct inode *, long); /* vxfs_fshead.c */ extern int vxfs_read_fshead(struct super_block *); -/* vxfs_immed.c */ -extern const struct inode_operations vxfs_immed_symlink_iops; - /* vxfs_inode.c */ extern const struct address_space_operations vxfs_immed_aops; extern struct kmem_cache *vxfs_inode_cachep; diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c index 8b9229e2ca5c..cb84f0fcc72a 100644 --- a/fs/freevxfs/vxfs_immed.c +++ b/fs/freevxfs/vxfs_immed.c @@ -32,29 +32,15 @@ */ #include <linux/fs.h> #include <linux/pagemap.h> -#include <linux/namei.h> #include "vxfs.h" #include "vxfs_extern.h" #include "vxfs_inode.h" -static void * vxfs_immed_follow_link(struct dentry *, struct nameidata *); - static int vxfs_immed_readpage(struct file *, struct page *); /* - * Inode operations for immed symlinks. - * - * Unliked all other operations we do not go through the pagecache, - * but do all work directly on the inode. - */ -const struct inode_operations vxfs_immed_symlink_iops = { - .readlink = generic_readlink, - .follow_link = vxfs_immed_follow_link, -}; - -/* * Address space operations for immed files and directories. */ const struct address_space_operations vxfs_immed_aops = { @@ -62,26 +48,6 @@ const struct address_space_operations vxfs_immed_aops = { }; /** - * vxfs_immed_follow_link - follow immed symlink - * @dp: dentry for the link - * @np: pathname lookup data for the current path walk - * - * Description: - * vxfs_immed_follow_link restarts the pathname lookup with - * the data obtained from @dp. - * - * Returns: - * Zero on success, else a negative error code. - */ -static void * -vxfs_immed_follow_link(struct dentry *dp, struct nameidata *np) -{ - struct vxfs_inode_info *vip = VXFS_INO(d_inode(dp)); - nd_set_link(np, vip->vii_immed.vi_immed); - return NULL; -} - -/** * vxfs_immed_readpage - read part of an immed inode into pagecache * @file: file context (unused) * @page: page frame to fill in. diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 363e3ae25f6b..ef73ed674a27 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -35,6 +35,7 @@ #include <linux/pagemap.h> #include <linux/kernel.h> #include <linux/slab.h> +#include <linux/namei.h> #include "vxfs.h" #include "vxfs_inode.h" @@ -327,8 +328,10 @@ vxfs_iget(struct super_block *sbp, ino_t ino) ip->i_op = &page_symlink_inode_operations; ip->i_mapping->a_ops = &vxfs_aops; } else { - ip->i_op = &vxfs_immed_symlink_iops; - vip->vii_immed.vi_immed[ip->i_size] = '\0'; + ip->i_op = &simple_symlink_inode_operations; + ip->i_link = vip->vii_immed.vi_immed; + nd_terminate_link(ip->i_link, ip->i_size, + sizeof(vip->vii_immed.vi_immed) - 1); } } else init_special_inode(ip, ip->i_mode, old_decode_dev(vip->vii_rdev)); diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 0572bca49f15..5e2e08712d3b 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1365,7 +1365,7 @@ static int fuse_readdir(struct file *file, struct dir_context *ctx) return err; } -static char *read_link(struct dentry *dentry) +static const char *fuse_follow_link(struct dentry *dentry, void **cookie) { struct inode *inode = d_inode(dentry); struct fuse_conn *fc = get_fuse_conn(inode); @@ -1389,28 +1389,12 @@ static char *read_link(struct dentry *dentry) link = ERR_PTR(ret); } else { link[ret] = '\0'; + *cookie = link; } fuse_invalidate_atime(inode); return link; } -static void free_link(char *link) -{ - if (!IS_ERR(link)) - free_page((unsigned long) link); -} - -static void *fuse_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - nd_set_link(nd, read_link(dentry)); - return NULL; -} - -static void fuse_put_link(struct dentry *dentry, struct nameidata *nd, void *c) -{ - free_link(nd_get_link(nd)); -} - static int fuse_dir_open(struct inode *inode, struct file *file) { return fuse_open_common(inode, file, true); @@ -1926,7 +1910,7 @@ static const struct inode_operations fuse_common_inode_operations = { static const struct inode_operations fuse_symlink_inode_operations = { .setattr = fuse_setattr, .follow_link = fuse_follow_link, - .put_link = fuse_put_link, + .put_link = free_page_put_link, .readlink = generic_readlink, .getattr = fuse_getattr, .setxattr = fuse_setxattr, diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 1b3ca7a2e3fc..3a1461de1551 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1548,7 +1548,7 @@ out: * Returns: 0 on success or error code */ -static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *gfs2_follow_link(struct dentry *dentry, void **cookie) { struct gfs2_inode *ip = GFS2_I(d_inode(dentry)); struct gfs2_holder i_gh; @@ -1561,8 +1561,7 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) error = gfs2_glock_nq(&i_gh); if (error) { gfs2_holder_uninit(&i_gh); - nd_set_link(nd, ERR_PTR(error)); - return NULL; + return ERR_PTR(error); } size = (unsigned int)i_size_read(&ip->i_inode); @@ -1586,8 +1585,9 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd) brelse(dibh); out: gfs2_glock_dq_uninit(&i_gh); - nd_set_link(nd, buf); - return NULL; + if (!IS_ERR(buf)) + *cookie = buf; + return buf; } /** diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index ef263174acd2..059597b23f67 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -581,7 +581,7 @@ static int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, if (name == NULL) goto out_put; - fd = file_create(name, mode & S_IFMT); + fd = file_create(name, mode & 0777); if (fd < 0) error = fd; else @@ -892,7 +892,7 @@ static const struct inode_operations hostfs_dir_iops = { .setattr = hostfs_setattr, }; -static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *hostfs_follow_link(struct dentry *dentry, void **cookie) { char *link = __getname(); if (link) { @@ -906,21 +906,18 @@ static void *hostfs_follow_link(struct dentry *dentry, struct nameidata *nd) } if (err < 0) { __putname(link); - link = ERR_PTR(err); + return ERR_PTR(err); } } else { - link = ERR_PTR(-ENOMEM); + return ERR_PTR(-ENOMEM); } - nd_set_link(nd, link); - return NULL; + return *cookie = link; } -static void hostfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) +static void hostfs_put_link(struct inode *unused, void *cookie) { - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - __putname(s); + __putname(cookie); } static const struct inode_operations hostfs_link_iops = { diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index fa2bd5366ecf..2867837909a9 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -642,20 +642,19 @@ static int hppfs_readlink(struct dentry *dentry, char __user *buffer, buflen); } -static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *hppfs_follow_link(struct dentry *dentry, void **cookie) { struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry; - return d_inode(proc_dentry)->i_op->follow_link(proc_dentry, nd); + return d_inode(proc_dentry)->i_op->follow_link(proc_dentry, cookie); } -static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) +static void hppfs_put_link(struct inode *inode, void *cookie) { - struct dentry *proc_dentry = HPPFS_I(d_inode(dentry))->proc_dentry; + struct inode *proc_inode = d_inode(HPPFS_I(inode)->proc_dentry); - if (d_inode(proc_dentry)->i_op->put_link) - d_inode(proc_dentry)->i_op->put_link(proc_dentry, nd, cookie); + if (proc_inode->i_op->put_link) + proc_inode->i_op->put_link(proc_inode, cookie); } static const struct inode_operations hppfs_dir_iops = { diff --git a/fs/inode.c b/fs/inode.c index ea37cd17b53f..e8d62688ed91 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -152,6 +152,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_pipe = NULL; inode->i_bdev = NULL; inode->i_cdev = NULL; + inode->i_link = NULL; inode->i_rdev = 0; inode->dirtied_when = 0; @@ -1584,36 +1585,47 @@ static int update_time(struct inode *inode, struct timespec *time, int flags) * This function automatically handles read only file systems and media, * as well as the "noatime" flag and inode specific "noatime" markers. */ -void touch_atime(const struct path *path) +bool atime_needs_update(const struct path *path, struct inode *inode) { struct vfsmount *mnt = path->mnt; - struct inode *inode = d_inode(path->dentry); struct timespec now; if (inode->i_flags & S_NOATIME) - return; + return false; if (IS_NOATIME(inode)) - return; + return false; if ((inode->i_sb->s_flags & MS_NODIRATIME) && S_ISDIR(inode->i_mode)) - return; + return false; if (mnt->mnt_flags & MNT_NOATIME) - return; + return false; if ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode)) - return; + return false; now = current_fs_time(inode->i_sb); if (!relatime_need_update(mnt, inode, now)) - return; + return false; if (timespec_equal(&inode->i_atime, &now)) + return false; + + return true; +} + +void touch_atime(const struct path *path) +{ + struct vfsmount *mnt = path->mnt; + struct inode *inode = d_inode(path->dentry); + struct timespec now; + + if (!atime_needs_update(path, inode)) return; if (!sb_start_write_trylock(inode->i_sb)) return; - if (__mnt_want_write(mnt)) + if (__mnt_want_write(mnt) != 0) goto skip_update; /* * File systems can error out when updating inodes if they need to @@ -1624,6 +1636,7 @@ void touch_atime(const struct path *path) * We may also fail on filesystems that have the ability to make parts * of the fs read only, e.g. subvolumes in Btrfs. */ + now = current_fs_time(inode->i_sb); update_time(inode, &now, S_ATIME); __mnt_drop_write(mnt); skip_update: diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index b5128c6e63ad..a9079d035ae5 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -842,15 +842,23 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, { jbd2_journal_revoke_header_t *header; int offset, max; + int csum_size = 0; + __u32 rcount; int record_len = 4; header = (jbd2_journal_revoke_header_t *) bh->b_data; offset = sizeof(jbd2_journal_revoke_header_t); - max = be32_to_cpu(header->r_count); + rcount = be32_to_cpu(header->r_count); if (!jbd2_revoke_block_csum_verify(journal, header)) return -EINVAL; + if (jbd2_journal_has_csum_v2or3(journal)) + csum_size = sizeof(struct jbd2_journal_revoke_tail); + if (rcount > journal->j_blocksize - csum_size) + return -EINVAL; + max = rcount; + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) record_len = 8; diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index c6cbaef2bda1..14214da80eb8 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -577,7 +577,7 @@ static void write_one_revoke_record(journal_t *journal, { int csum_size = 0; struct buffer_head *descriptor; - int offset; + int sz, offset; journal_header_t *header; /* If we are already aborting, this all becomes a noop. We @@ -594,9 +594,14 @@ static void write_one_revoke_record(journal_t *journal, if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_revoke_tail); + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) + sz = 8; + else + sz = 4; + /* Make sure we have a descriptor with space left for the record */ if (descriptor) { - if (offset >= journal->j_blocksize - csum_size) { + if (offset + sz > journal->j_blocksize - csum_size) { flush_descriptor(journal, descriptor, offset, write_op); descriptor = NULL; } @@ -619,16 +624,13 @@ static void write_one_revoke_record(journal_t *journal, *descriptorp = descriptor; } - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { + if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) * ((__be64 *)(&descriptor->b_data[offset])) = cpu_to_be64(record->blocknr); - offset += 8; - - } else { + else * ((__be32 *)(&descriptor->b_data[offset])) = cpu_to_be32(record->blocknr); - offset += 4; - } + offset += sz; *offsetp = offset; } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 5f09370c90a8..ff2f2e6ad311 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -551,7 +551,6 @@ int jbd2_journal_extend(handle_t *handle, int nblocks) int result; int wanted; - WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; journal = transaction->t_journal; @@ -627,7 +626,6 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) tid_t tid; int need_to_start, ret; - WARN_ON(!transaction); /* If we've had an abort of any type, don't even think about * actually doing the restart! */ if (is_handle_aborted(handle)) @@ -785,7 +783,6 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int need_copy = 0; unsigned long start_lock, time_lock; - WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; journal = transaction->t_journal; @@ -1051,7 +1048,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) int err; jbd_debug(5, "journal_head %p\n", jh); - WARN_ON(!transaction); err = -EROFS; if (is_handle_aborted(handle)) goto out; @@ -1266,7 +1262,6 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; int ret = 0; - WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; journal = transaction->t_journal; @@ -1397,7 +1392,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) int err = 0; int was_modified = 0; - WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; journal = transaction->t_journal; @@ -1530,8 +1524,22 @@ int jbd2_journal_stop(handle_t *handle) tid_t tid; pid_t pid; - if (!transaction) - goto free_and_exit; + if (!transaction) { + /* + * Handle is already detached from the transaction so + * there is nothing to do other than decrease a refcount, + * or free the handle if refcount drops to zero + */ + if (--handle->h_ref > 0) { + jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, + handle->h_ref); + return err; + } else { + if (handle->h_rsv_handle) + jbd2_free_handle(handle->h_rsv_handle); + goto free_and_exit; + } + } journal = transaction->t_journal; J_ASSERT(journal_current_handle() == handle); @@ -2373,7 +2381,6 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) transaction_t *transaction = handle->h_transaction; journal_t *journal; - WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; journal = transaction->t_journal; diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 1ba5c97943b8..81180022923f 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -354,6 +354,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char ret = -ENOMEM; goto fail; } + inode->i_link = f->target; jffs2_dbg(1, "%s(): symlink's target '%s' cached\n", __func__, (char *)f->target); diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index fe5ea080b4ec..60d86e8fba6e 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -294,6 +294,7 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino) case S_IFLNK: inode->i_op = &jffs2_symlink_inode_operations; + inode->i_link = f->target; break; case S_IFDIR: diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c index 1fefa25d0fa5..8ce2f240125b 100644 --- a/fs/jffs2/symlink.c +++ b/fs/jffs2/symlink.c @@ -9,58 +9,15 @@ * */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/kernel.h> -#include <linux/fs.h> -#include <linux/namei.h> #include "nodelist.h" -static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd); - const struct inode_operations jffs2_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = jffs2_follow_link, + .follow_link = simple_follow_link, .setattr = jffs2_setattr, .setxattr = jffs2_setxattr, .getxattr = jffs2_getxattr, .listxattr = jffs2_listxattr, .removexattr = jffs2_removexattr }; - -static void *jffs2_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct jffs2_inode_info *f = JFFS2_INODE_INFO(d_inode(dentry)); - char *p = (char *)f->target; - - /* - * We don't acquire the f->sem mutex here since the only data we - * use is f->target. - * - * 1. If we are here the inode has already built and f->target has - * to point to the target path. - * 2. Nobody uses f->target (if the inode is symlink's inode). The - * exception is inode freeing function which frees f->target. But - * it can't be called while we are here and before VFS has - * stopped using our f->target string which we provide by means of - * nd_set_link() call. - */ - - if (!p) { - pr_err("%s(): can't find symlink target\n", __func__); - p = ERR_PTR(-EIO); - } - jffs2_dbg(1, "%s(): target path is '%s'\n", - __func__, (char *)f->target); - - nd_set_link(nd, p); - - /* - * We will unlock the f->sem mutex but VFS will use the f->target string. This is safe - * since the only way that may cause f->target to be changed is iput() operation. - * But VFS will not use f->target after iput() has been called. - */ - return NULL; -} - diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 070dc4b33544..6f1cb2b5ee28 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -63,11 +63,12 @@ struct inode *jfs_iget(struct super_block *sb, unsigned long ino) inode->i_mapping->a_ops = &jfs_aops; } else { inode->i_op = &jfs_fast_symlink_inode_operations; + inode->i_link = JFS_IP(inode)->i_inline; /* * The inline data should be null-terminated, but * don't let on-disk corruption crash the kernel */ - JFS_IP(inode)->i_inline[inode->i_size] = '\0'; + inode->i_link[inode->i_size] = '\0'; } } else { inode->i_op = &jfs_file_inode_operations; diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 66db7bc0ed10..e33be921aa41 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -880,7 +880,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, int ssize; /* source pathname size */ struct btstack btstack; struct inode *ip = d_inode(dentry); - unchar *i_fastsymlink; s64 xlen = 0; int bmask = 0, xsize; s64 xaddr; @@ -946,8 +945,8 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, if (ssize <= IDATASIZE) { ip->i_op = &jfs_fast_symlink_inode_operations; - i_fastsymlink = JFS_IP(ip)->i_inline; - memcpy(i_fastsymlink, name, ssize); + ip->i_link = JFS_IP(ip)->i_inline; + memcpy(ip->i_link, name, ssize); ip->i_size = ssize - 1; /* diff --git a/fs/jfs/symlink.c b/fs/jfs/symlink.c index 80f42bcc4ef1..5929e2363cb8 100644 --- a/fs/jfs/symlink.c +++ b/fs/jfs/symlink.c @@ -17,21 +17,13 @@ */ #include <linux/fs.h> -#include <linux/namei.h> #include "jfs_incore.h" #include "jfs_inode.h" #include "jfs_xattr.h" -static void *jfs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - char *s = JFS_IP(d_inode(dentry))->i_inline; - nd_set_link(nd, s); - return NULL; -} - const struct inode_operations jfs_fast_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = jfs_follow_link, + .follow_link = simple_follow_link, .setattr = jfs_setattr, .setxattr = jfs_setxattr, .getxattr = jfs_getxattr, diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index f131fc23ffc4..fffca9517321 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -518,7 +518,14 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root, if (!kn) goto err_out1; - ret = ida_simple_get(&root->ino_ida, 1, 0, GFP_KERNEL); + /* + * If the ino of the sysfs entry created for a kmem cache gets + * allocated from an ida layer, which is accounted to the memcg that + * owns the cache, the memcg will get pinned forever. So do not account + * ino ida allocations. + */ + ret = ida_simple_get(&root->ino_ida, 1, 0, + GFP_KERNEL | __GFP_NOACCOUNT); if (ret < 0) goto err_out2; kn->ino = ret; diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c index 8a198898e39a..db272528ab5b 100644 --- a/fs/kernfs/symlink.c +++ b/fs/kernfs/symlink.c @@ -112,25 +112,18 @@ static int kernfs_getlink(struct dentry *dentry, char *path) return error; } -static void *kernfs_iop_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *kernfs_iop_follow_link(struct dentry *dentry, void **cookie) { int error = -ENOMEM; unsigned long page = get_zeroed_page(GFP_KERNEL); - if (page) { - error = kernfs_getlink(dentry, (char *) page); - if (error < 0) - free_page((unsigned long)page); - } - nd_set_link(nd, error ? ERR_PTR(error) : (char *)page); - return NULL; -} - -static void kernfs_iop_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - char *page = nd_get_link(nd); - if (!IS_ERR(page)) + if (!page) + return ERR_PTR(-ENOMEM); + error = kernfs_getlink(dentry, (char *)page); + if (unlikely(error < 0)) { free_page((unsigned long)page); + return ERR_PTR(error); + } + return *cookie = (char *)page; } const struct inode_operations kernfs_symlink_iops = { @@ -140,7 +133,7 @@ const struct inode_operations kernfs_symlink_iops = { .listxattr = kernfs_iop_listxattr, .readlink = generic_readlink, .follow_link = kernfs_iop_follow_link, - .put_link = kernfs_iop_put_link, + .put_link = free_page_put_link, .setattr = kernfs_iop_setattr, .getattr = kernfs_iop_getattr, .permission = kernfs_iop_permission, diff --git a/fs/libfs.c b/fs/libfs.c index cb1fb4b9b637..65e1feca8b98 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -1024,15 +1024,18 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync) } EXPORT_SYMBOL(noop_fsync); -void kfree_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) +void kfree_put_link(struct inode *unused, void *cookie) { - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); + kfree(cookie); } EXPORT_SYMBOL(kfree_put_link); +void free_page_put_link(struct inode *unused, void *cookie) +{ + free_page((unsigned long) cookie); +} +EXPORT_SYMBOL(free_page_put_link); + /* * nop .set_page_dirty method so that people can use .page_mkwrite on * anon inodes. @@ -1093,3 +1096,15 @@ simple_nosetlease(struct file *filp, long arg, struct file_lock **flp, return -EINVAL; } EXPORT_SYMBOL(simple_nosetlease); + +const char *simple_follow_link(struct dentry *dentry, void **cookie) +{ + return d_inode(dentry)->i_link; +} +EXPORT_SYMBOL(simple_follow_link); + +const struct inode_operations simple_symlink_inode_operations = { + .follow_link = simple_follow_link, + .readlink = generic_readlink +}; +EXPORT_SYMBOL(simple_symlink_inode_operations); diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 4cf38f118549..f9b45d46d4c4 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -779,6 +779,7 @@ fail: const struct inode_operations logfs_symlink_iops = { .readlink = generic_readlink, .follow_link = page_follow_link_light, + .put_link = page_put_link, }; const struct inode_operations logfs_dir_iops = { diff --git a/fs/mount.h b/fs/mount.h index 6a61c2b3e385..b5b8082bfa42 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -88,6 +88,7 @@ static inline int is_mounted(struct vfsmount *mnt) extern struct mount *__lookup_mnt(struct vfsmount *, struct dentry *); extern struct mount *__lookup_mnt_last(struct vfsmount *, struct dentry *); +extern int __legitimize_mnt(struct vfsmount *, unsigned); extern bool legitimize_mnt(struct vfsmount *, unsigned); extern void __detach_mounts(struct dentry *dentry); diff --git a/fs/namei.c b/fs/namei.c index 4a8d998b7274..2dad0eaf91d3 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -492,6 +492,7 @@ void path_put(const struct path *path) } EXPORT_SYMBOL(path_put); +#define EMBEDDED_LEVELS 2 struct nameidata { struct path path; struct qstr last; @@ -501,10 +502,139 @@ struct nameidata { unsigned seq, m_seq; int last_type; unsigned depth; - struct file *base; - char *saved_names[MAX_NESTED_LINKS + 1]; + int total_link_count; + struct saved { + struct path link; + void *cookie; + const char *name; + struct inode *inode; + unsigned seq; + } *stack, internal[EMBEDDED_LEVELS]; + struct filename *name; + struct nameidata *saved; + unsigned root_seq; + int dfd; }; +static void set_nameidata(struct nameidata *p, int dfd, struct filename *name) +{ + struct nameidata *old = current->nameidata; + p->stack = p->internal; + p->dfd = dfd; + p->name = name; + p->total_link_count = old ? old->total_link_count : 0; + p->saved = old; + current->nameidata = p; +} + +static void restore_nameidata(void) +{ + struct nameidata *now = current->nameidata, *old = now->saved; + + current->nameidata = old; + if (old) + old->total_link_count = now->total_link_count; + if (now->stack != now->internal) { + kfree(now->stack); + now->stack = now->internal; + } +} + +static int __nd_alloc_stack(struct nameidata *nd) +{ + struct saved *p; + + if (nd->flags & LOOKUP_RCU) { + p= kmalloc(MAXSYMLINKS * sizeof(struct saved), + GFP_ATOMIC); + if (unlikely(!p)) + return -ECHILD; + } else { + p= kmalloc(MAXSYMLINKS * sizeof(struct saved), + GFP_KERNEL); + if (unlikely(!p)) + return -ENOMEM; + } + memcpy(p, nd->internal, sizeof(nd->internal)); + nd->stack = p; + return 0; +} + +static inline int nd_alloc_stack(struct nameidata *nd) +{ + if (likely(nd->depth != EMBEDDED_LEVELS)) + return 0; + if (likely(nd->stack != nd->internal)) + return 0; + return __nd_alloc_stack(nd); +} + +static void drop_links(struct nameidata *nd) +{ + int i = nd->depth; + while (i--) { + struct saved *last = nd->stack + i; + struct inode *inode = last->inode; + if (last->cookie && inode->i_op->put_link) { + inode->i_op->put_link(inode, last->cookie); + last->cookie = NULL; + } + } +} + +static void terminate_walk(struct nameidata *nd) +{ + drop_links(nd); + if (!(nd->flags & LOOKUP_RCU)) { + int i; + path_put(&nd->path); + for (i = 0; i < nd->depth; i++) + path_put(&nd->stack[i].link); + if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { + path_put(&nd->root); + nd->root.mnt = NULL; + } + } else { + nd->flags &= ~LOOKUP_RCU; + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; + rcu_read_unlock(); + } + nd->depth = 0; +} + +/* path_put is needed afterwards regardless of success or failure */ +static bool legitimize_path(struct nameidata *nd, + struct path *path, unsigned seq) +{ + int res = __legitimize_mnt(path->mnt, nd->m_seq); + if (unlikely(res)) { + if (res > 0) + path->mnt = NULL; + path->dentry = NULL; + return false; + } + if (unlikely(!lockref_get_not_dead(&path->dentry->d_lockref))) { + path->dentry = NULL; + return false; + } + return !read_seqcount_retry(&path->dentry->d_seq, seq); +} + +static bool legitimize_links(struct nameidata *nd) +{ + int i; + for (i = 0; i < nd->depth; i++) { + struct saved *last = nd->stack + i; + if (unlikely(!legitimize_path(nd, &last->link, last->seq))) { + drop_links(nd); + nd->depth = i + 1; + return false; + } + } + return true; +} + /* * Path walking has 2 modes, rcu-walk and ref-walk (see * Documentation/filesystems/path-lookup.txt). In situations when we can't @@ -520,35 +650,28 @@ struct nameidata { * unlazy_walk - try to switch to ref-walk mode. * @nd: nameidata pathwalk data * @dentry: child of nd->path.dentry or NULL + * @seq: seq number to check dentry against * Returns: 0 on success, -ECHILD on failure * * unlazy_walk attempts to legitimize the current nd->path, nd->root and dentry * for ref-walk mode. @dentry must be a path found by a do_lookup call on * @nd or NULL. Must be called from rcu-walk context. + * Nothing should touch nameidata between unlazy_walk() failure and + * terminate_walk(). */ -static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) +static int unlazy_walk(struct nameidata *nd, struct dentry *dentry, unsigned seq) { - struct fs_struct *fs = current->fs; struct dentry *parent = nd->path.dentry; BUG_ON(!(nd->flags & LOOKUP_RCU)); - /* - * After legitimizing the bastards, terminate_walk() - * will do the right thing for non-RCU mode, and all our - * subsequent exit cases should rcu_read_unlock() - * before returning. Do vfsmount first; if dentry - * can't be legitimized, just set nd->path.dentry to NULL - * and rely on dput(NULL) being a no-op. - */ - if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) - return -ECHILD; nd->flags &= ~LOOKUP_RCU; - - if (!lockref_get_not_dead(&parent->d_lockref)) { - nd->path.dentry = NULL; - goto out; - } + if (unlikely(!legitimize_links(nd))) + goto out2; + if (unlikely(!legitimize_mnt(nd->path.mnt, nd->m_seq))) + goto out2; + if (unlikely(!lockref_get_not_dead(&parent->d_lockref))) + goto out1; /* * For a negative lookup, the lookup sequence point is the parents @@ -568,7 +691,7 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) } else { if (!lockref_get_not_dead(&dentry->d_lockref)) goto out; - if (read_seqcount_retry(&dentry->d_seq, nd->seq)) + if (read_seqcount_retry(&dentry->d_seq, seq)) goto drop_dentry; } @@ -577,22 +700,24 @@ static int unlazy_walk(struct nameidata *nd, struct dentry *dentry) * still valid and get it if required. */ if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { - spin_lock(&fs->lock); - if (nd->root.mnt != fs->root.mnt || nd->root.dentry != fs->root.dentry) - goto unlock_and_drop_dentry; - path_get(&nd->root); - spin_unlock(&fs->lock); + if (unlikely(!legitimize_path(nd, &nd->root, nd->root_seq))) { + rcu_read_unlock(); + dput(dentry); + return -ECHILD; + } } rcu_read_unlock(); return 0; -unlock_and_drop_dentry: - spin_unlock(&fs->lock); drop_dentry: rcu_read_unlock(); dput(dentry); goto drop_root_mnt; +out2: + nd->path.mnt = NULL; +out1: + nd->path.dentry = NULL; out: rcu_read_unlock(); drop_root_mnt: @@ -601,6 +726,24 @@ drop_root_mnt: return -ECHILD; } +static int unlazy_link(struct nameidata *nd, struct path *link, unsigned seq) +{ + if (unlikely(!legitimize_path(nd, link, seq))) { + drop_links(nd); + nd->depth = 0; + nd->flags &= ~LOOKUP_RCU; + nd->path.mnt = NULL; + nd->path.dentry = NULL; + if (!(nd->flags & LOOKUP_ROOT)) + nd->root.mnt = NULL; + rcu_read_unlock(); + } else if (likely(unlazy_walk(nd, NULL, 0)) == 0) { + return 0; + } + path_put(link); + return -ECHILD; +} + static inline int d_revalidate(struct dentry *dentry, unsigned int flags) { return dentry->d_op->d_revalidate(dentry, flags); @@ -622,26 +765,10 @@ static int complete_walk(struct nameidata *nd) int status; if (nd->flags & LOOKUP_RCU) { - nd->flags &= ~LOOKUP_RCU; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; - - if (!legitimize_mnt(nd->path.mnt, nd->m_seq)) { - rcu_read_unlock(); + if (unlikely(unlazy_walk(nd, NULL, 0))) return -ECHILD; - } - if (unlikely(!lockref_get_not_dead(&dentry->d_lockref))) { - rcu_read_unlock(); - mntput(nd->path.mnt); - return -ECHILD; - } - if (read_seqcount_retry(&dentry->d_seq, nd->seq)) { - rcu_read_unlock(); - dput(dentry); - mntput(nd->path.mnt); - return -ECHILD; - } - rcu_read_unlock(); } if (likely(!(nd->flags & LOOKUP_JUMPED))) @@ -657,28 +784,25 @@ static int complete_walk(struct nameidata *nd) if (!status) status = -ESTALE; - path_put(&nd->path); return status; } -static __always_inline void set_root(struct nameidata *nd) +static void set_root(struct nameidata *nd) { get_fs_root(current->fs, &nd->root); } -static int link_path_walk(const char *, struct nameidata *); - -static __always_inline unsigned set_root_rcu(struct nameidata *nd) +static unsigned set_root_rcu(struct nameidata *nd) { struct fs_struct *fs = current->fs; - unsigned seq, res; + unsigned seq; do { seq = read_seqcount_begin(&fs->seq); nd->root = fs->root; - res = __read_seqcount_begin(&nd->root.dentry->d_seq); + nd->root_seq = __read_seqcount_begin(&nd->root.dentry->d_seq); } while (read_seqcount_retry(&fs->seq, seq)); - return res; + return nd->root_seq; } static void path_put_conditional(struct path *path, struct nameidata *nd) @@ -704,8 +828,9 @@ static inline void path_to_nameidata(const struct path *path, * Helper to directly jump to a known parsed path from ->follow_link, * caller must have taken a reference to path beforehand. */ -void nd_jump_link(struct nameidata *nd, struct path *path) +void nd_jump_link(struct path *path) { + struct nameidata *nd = current->nameidata; path_put(&nd->path); nd->path = *path; @@ -713,24 +838,14 @@ void nd_jump_link(struct nameidata *nd, struct path *path) nd->flags |= LOOKUP_JUMPED; } -void nd_set_link(struct nameidata *nd, char *path) +static inline void put_link(struct nameidata *nd) { - nd->saved_names[nd->depth] = path; -} -EXPORT_SYMBOL(nd_set_link); - -char *nd_get_link(struct nameidata *nd) -{ - return nd->saved_names[nd->depth]; -} -EXPORT_SYMBOL(nd_get_link); - -static inline void put_link(struct nameidata *nd, struct path *link, void *cookie) -{ - struct inode *inode = link->dentry->d_inode; - if (inode->i_op->put_link) - inode->i_op->put_link(link->dentry, nd, cookie); - path_put(link); + struct saved *last = nd->stack + --nd->depth; + struct inode *inode = last->inode; + if (last->cookie && inode->i_op->put_link) + inode->i_op->put_link(inode, last->cookie); + if (!(nd->flags & LOOKUP_RCU)) + path_put(&last->link); } int sysctl_protected_symlinks __read_mostly = 0; @@ -738,7 +853,6 @@ int sysctl_protected_hardlinks __read_mostly = 0; /** * may_follow_link - Check symlink following for unsafe situations - * @link: The path of the symlink * @nd: nameidata pathwalk data * * In the case of the sysctl_protected_symlinks sysctl being enabled, @@ -752,7 +866,7 @@ int sysctl_protected_hardlinks __read_mostly = 0; * * Returns 0 if following the symlink is allowed, -ve on error. */ -static inline int may_follow_link(struct path *link, struct nameidata *nd) +static inline int may_follow_link(struct nameidata *nd) { const struct inode *inode; const struct inode *parent; @@ -761,7 +875,7 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) return 0; /* Allowed if owner and follower match. */ - inode = link->dentry->d_inode; + inode = nd->stack[0].inode; if (uid_eq(current_cred()->fsuid, inode->i_uid)) return 0; @@ -774,9 +888,10 @@ static inline int may_follow_link(struct path *link, struct nameidata *nd) if (uid_eq(parent->i_uid, inode->i_uid)) return 0; - audit_log_link_denied("follow_link", link); - path_put_conditional(link, nd); - path_put(&nd->path); + if (nd->flags & LOOKUP_RCU) + return -ECHILD; + + audit_log_link_denied("follow_link", &nd->stack[0].link); return -EACCES; } @@ -849,82 +964,68 @@ static int may_linkat(struct path *link) return -EPERM; } -static __always_inline int -follow_link(struct path *link, struct nameidata *nd, void **p) +static __always_inline +const char *get_link(struct nameidata *nd) { - struct dentry *dentry = link->dentry; + struct saved *last = nd->stack + nd->depth - 1; + struct dentry *dentry = last->link.dentry; + struct inode *inode = last->inode; int error; - char *s; - - BUG_ON(nd->flags & LOOKUP_RCU); - - if (link->mnt == nd->path.mnt) - mntget(link->mnt); + const char *res; - error = -ELOOP; - if (unlikely(current->total_link_count >= 40)) - goto out_put_nd_path; - - cond_resched(); - current->total_link_count++; - - touch_atime(link); - nd_set_link(nd, NULL); + if (!(nd->flags & LOOKUP_RCU)) { + touch_atime(&last->link); + cond_resched(); + } else if (atime_needs_update(&last->link, inode)) { + if (unlikely(unlazy_walk(nd, NULL, 0))) + return ERR_PTR(-ECHILD); + touch_atime(&last->link); + } - error = security_inode_follow_link(link->dentry, nd); - if (error) - goto out_put_nd_path; + error = security_inode_follow_link(dentry, inode, + nd->flags & LOOKUP_RCU); + if (unlikely(error)) + return ERR_PTR(error); nd->last_type = LAST_BIND; - *p = dentry->d_inode->i_op->follow_link(dentry, nd); - error = PTR_ERR(*p); - if (IS_ERR(*p)) - goto out_put_nd_path; - - error = 0; - s = nd_get_link(nd); - if (s) { - if (unlikely(IS_ERR(s))) { - path_put(&nd->path); - put_link(nd, link, *p); - return PTR_ERR(s); + res = inode->i_link; + if (!res) { + if (nd->flags & LOOKUP_RCU) { + if (unlikely(unlazy_walk(nd, NULL, 0))) + return ERR_PTR(-ECHILD); } - if (*s == '/') { + res = inode->i_op->follow_link(dentry, &last->cookie); + if (IS_ERR_OR_NULL(res)) { + last->cookie = NULL; + return res; + } + } + if (*res == '/') { + if (nd->flags & LOOKUP_RCU) { + struct dentry *d; + if (!nd->root.mnt) + set_root_rcu(nd); + nd->path = nd->root; + d = nd->path.dentry; + nd->inode = d->d_inode; + nd->seq = nd->root_seq; + if (unlikely(read_seqcount_retry(&d->d_seq, nd->seq))) + return ERR_PTR(-ECHILD); + } else { if (!nd->root.mnt) set_root(nd); path_put(&nd->path); nd->path = nd->root; path_get(&nd->root); - nd->flags |= LOOKUP_JUMPED; + nd->inode = nd->path.dentry->d_inode; } - nd->inode = nd->path.dentry->d_inode; - error = link_path_walk(s, nd); - if (unlikely(error)) - put_link(nd, link, *p); + nd->flags |= LOOKUP_JUMPED; + while (unlikely(*++res == '/')) + ; } - - return error; - -out_put_nd_path: - *p = NULL; - path_put(&nd->path); - path_put(link); - return error; -} - -static int follow_up_rcu(struct path *path) -{ - struct mount *mnt = real_mount(path->mnt); - struct mount *parent; - struct dentry *mountpoint; - - parent = mnt->mnt_parent; - if (&parent->mnt == path->mnt) - return 0; - mountpoint = mnt->mnt_mountpoint; - path->dentry = mountpoint; - path->mnt = &parent->mnt; - return 1; + if (!*res) + res = NULL; + return res; } /* @@ -965,7 +1066,7 @@ EXPORT_SYMBOL(follow_up); * - return -EISDIR to tell follow_managed() to stop and return the path we * were called with. */ -static int follow_automount(struct path *path, unsigned flags, +static int follow_automount(struct path *path, struct nameidata *nd, bool *need_mntput) { struct vfsmount *mnt; @@ -985,13 +1086,13 @@ static int follow_automount(struct path *path, unsigned flags, * as being automount points. These will need the attentions * of the daemon to instantiate them before they can be used. */ - if (!(flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | - LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && + if (!(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY | + LOOKUP_OPEN | LOOKUP_CREATE | LOOKUP_AUTOMOUNT)) && path->dentry->d_inode) return -EISDIR; - current->total_link_count++; - if (current->total_link_count >= 40) + nd->total_link_count++; + if (nd->total_link_count >= 40) return -ELOOP; mnt = path->dentry->d_op->d_automount(path); @@ -1005,7 +1106,7 @@ static int follow_automount(struct path *path, unsigned flags, * the path being looked up; if it wasn't then the remainder of * the path is inaccessible and we should say so. */ - if (PTR_ERR(mnt) == -EISDIR && (flags & LOOKUP_PARENT)) + if (PTR_ERR(mnt) == -EISDIR && (nd->flags & LOOKUP_PARENT)) return -EREMOTE; return PTR_ERR(mnt); } @@ -1045,7 +1146,7 @@ static int follow_automount(struct path *path, unsigned flags, * * Serialization is taken care of in namespace.c */ -static int follow_managed(struct path *path, unsigned flags) +static int follow_managed(struct path *path, struct nameidata *nd) { struct vfsmount *mnt = path->mnt; /* held by caller, must be left alone */ unsigned managed; @@ -1089,7 +1190,7 @@ static int follow_managed(struct path *path, unsigned flags) /* Handle an automount point */ if (managed & DCACHE_NEED_AUTOMOUNT) { - ret = follow_automount(path, flags, &need_mntput); + ret = follow_automount(path, nd, &need_mntput); if (ret < 0) break; continue; @@ -1103,7 +1204,11 @@ static int follow_managed(struct path *path, unsigned flags) mntput(path->mnt); if (ret == -EISDIR) ret = 0; - return ret < 0 ? ret : need_mntput; + if (need_mntput) + nd->flags |= LOOKUP_JUMPED; + if (unlikely(ret < 0)) + path_put_conditional(path, nd); + return ret; } int follow_down_one(struct path *path) @@ -1133,7 +1238,7 @@ static inline int managed_dentry_rcu(struct dentry *dentry) * we meet a managed dentry that would need blocking. */ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, - struct inode **inode) + struct inode **inode, unsigned *seqp) { for (;;) { struct mount *mounted; @@ -1160,7 +1265,7 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path, path->mnt = &mounted->mnt; path->dentry = mounted->mnt.mnt_root; nd->flags |= LOOKUP_JUMPED; - nd->seq = read_seqcount_begin(&path->dentry->d_seq); + *seqp = read_seqcount_begin(&path->dentry->d_seq); /* * Update the inode too. We don't need to re-check the * dentry sequence number here after this d_inode read, @@ -1179,10 +1284,8 @@ static int follow_dotdot_rcu(struct nameidata *nd) set_root_rcu(nd); while (1) { - if (nd->path.dentry == nd->root.dentry && - nd->path.mnt == nd->root.mnt) { + if (path_equal(&nd->path, &nd->root)) break; - } if (nd->path.dentry != nd->path.mnt->mnt_root) { struct dentry *old = nd->path.dentry; struct dentry *parent = old->d_parent; @@ -1190,38 +1293,42 @@ static int follow_dotdot_rcu(struct nameidata *nd) inode = parent->d_inode; seq = read_seqcount_begin(&parent->d_seq); - if (read_seqcount_retry(&old->d_seq, nd->seq)) - goto failed; + if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq))) + return -ECHILD; nd->path.dentry = parent; nd->seq = seq; break; + } else { + struct mount *mnt = real_mount(nd->path.mnt); + struct mount *mparent = mnt->mnt_parent; + struct dentry *mountpoint = mnt->mnt_mountpoint; + struct inode *inode2 = mountpoint->d_inode; + unsigned seq = read_seqcount_begin(&mountpoint->d_seq); + if (unlikely(read_seqretry(&mount_lock, nd->m_seq))) + return -ECHILD; + if (&mparent->mnt == nd->path.mnt) + break; + /* we know that mountpoint was pinned */ + nd->path.dentry = mountpoint; + nd->path.mnt = &mparent->mnt; + inode = inode2; + nd->seq = seq; } - if (!follow_up_rcu(&nd->path)) - break; - inode = nd->path.dentry->d_inode; - nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } - while (d_mountpoint(nd->path.dentry)) { + while (unlikely(d_mountpoint(nd->path.dentry))) { struct mount *mounted; mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry); + if (unlikely(read_seqretry(&mount_lock, nd->m_seq))) + return -ECHILD; if (!mounted) break; nd->path.mnt = &mounted->mnt; nd->path.dentry = mounted->mnt.mnt_root; inode = nd->path.dentry->d_inode; nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); - if (read_seqretry(&mount_lock, nd->m_seq)) - goto failed; } nd->inode = inode; return 0; - -failed: - nd->flags &= ~LOOKUP_RCU; - if (!(nd->flags & LOOKUP_ROOT)) - nd->root.mnt = NULL; - rcu_read_unlock(); - return -ECHILD; } /* @@ -1400,7 +1507,8 @@ static struct dentry *__lookup_hash(struct qstr *name, * It _is_ time-critical. */ static int lookup_fast(struct nameidata *nd, - struct path *path, struct inode **inode) + struct path *path, struct inode **inode, + unsigned *seqp) { struct vfsmount *mnt = nd->path.mnt; struct dentry *dentry, *parent = nd->path.dentry; @@ -1415,6 +1523,7 @@ static int lookup_fast(struct nameidata *nd, */ if (nd->flags & LOOKUP_RCU) { unsigned seq; + bool negative; dentry = __d_lookup_rcu(parent, &nd->last, &seq); if (!dentry) goto unlazy; @@ -1423,9 +1532,12 @@ static int lookup_fast(struct nameidata *nd, * This sequence count validates that the inode matches * the dentry name information from lookup. */ - *inode = dentry->d_inode; + *inode = d_backing_inode(dentry); + negative = d_is_negative(dentry); if (read_seqcount_retry(&dentry->d_seq, seq)) return -ECHILD; + if (negative) + return -ENOENT; /* * This sequence count validates that the parent had no @@ -1436,8 +1548,8 @@ static int lookup_fast(struct nameidata *nd, */ if (__read_seqcount_retry(&parent->d_seq, nd->seq)) return -ECHILD; - nd->seq = seq; + *seqp = seq; if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) { status = d_revalidate(dentry, nd->flags); if (unlikely(status <= 0)) { @@ -1448,10 +1560,10 @@ static int lookup_fast(struct nameidata *nd, } path->mnt = mnt; path->dentry = dentry; - if (likely(__follow_mount_rcu(nd, path, inode))) + if (likely(__follow_mount_rcu(nd, path, inode, seqp))) return 0; unlazy: - if (unlazy_walk(nd, dentry)) + if (unlazy_walk(nd, dentry, seq)) return -ECHILD; } else { dentry = __d_lookup(parent, &nd->last); @@ -1472,17 +1584,16 @@ unlazy: goto need_lookup; } + if (unlikely(d_is_negative(dentry))) { + dput(dentry); + return -ENOENT; + } path->mnt = mnt; path->dentry = dentry; - err = follow_managed(path, nd->flags); - if (unlikely(err < 0)) { - path_put_conditional(path, nd); - return err; - } - if (err) - nd->flags |= LOOKUP_JUMPED; - *inode = path->dentry->d_inode; - return 0; + err = follow_managed(path, nd); + if (likely(!err)) + *inode = d_backing_inode(path->dentry); + return err; need_lookup: return 1; @@ -1492,7 +1603,6 @@ need_lookup: static int lookup_slow(struct nameidata *nd, struct path *path) { struct dentry *dentry, *parent; - int err; parent = nd->path.dentry; BUG_ON(nd->inode != parent->d_inode); @@ -1504,14 +1614,7 @@ static int lookup_slow(struct nameidata *nd, struct path *path) return PTR_ERR(dentry); path->mnt = nd->path.mnt; path->dentry = dentry; - err = follow_managed(path, nd->flags); - if (unlikely(err < 0)) { - path_put_conditional(path, nd); - return err; - } - if (err) - nd->flags |= LOOKUP_JUMPED; - return 0; + return follow_managed(path, nd); } static inline int may_lookup(struct nameidata *nd) @@ -1520,7 +1623,7 @@ static inline int may_lookup(struct nameidata *nd) int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK); if (err != -ECHILD) return err; - if (unlazy_walk(nd, NULL)) + if (unlazy_walk(nd, NULL, 0)) return -ECHILD; } return inode_permission(nd->inode, MAY_EXEC); @@ -1530,24 +1633,45 @@ static inline int handle_dots(struct nameidata *nd, int type) { if (type == LAST_DOTDOT) { if (nd->flags & LOOKUP_RCU) { - if (follow_dotdot_rcu(nd)) - return -ECHILD; + return follow_dotdot_rcu(nd); } else follow_dotdot(nd); } return 0; } -static void terminate_walk(struct nameidata *nd) +static int pick_link(struct nameidata *nd, struct path *link, + struct inode *inode, unsigned seq) { + int error; + struct saved *last; + if (unlikely(nd->total_link_count++ >= MAXSYMLINKS)) { + path_to_nameidata(link, nd); + return -ELOOP; + } if (!(nd->flags & LOOKUP_RCU)) { - path_put(&nd->path); - } else { - nd->flags &= ~LOOKUP_RCU; - if (!(nd->flags & LOOKUP_ROOT)) - nd->root.mnt = NULL; - rcu_read_unlock(); + if (link->mnt == nd->path.mnt) + mntget(link->mnt); + } + error = nd_alloc_stack(nd); + if (unlikely(error)) { + if (error == -ECHILD) { + if (unlikely(unlazy_link(nd, link, seq))) + return -ECHILD; + error = nd_alloc_stack(nd); + } + if (error) { + path_put(link); + return error; + } } + + last = nd->stack + nd->depth++; + last->link = *link; + last->cookie = NULL; + last->inode = inode; + last->seq = seq; + return 1; } /* @@ -1556,98 +1680,68 @@ static void terminate_walk(struct nameidata *nd) * so we keep a cache of "no, this doesn't need follow_link" * for the common case. */ -static inline int should_follow_link(struct dentry *dentry, int follow) +static inline int should_follow_link(struct nameidata *nd, struct path *link, + int follow, + struct inode *inode, unsigned seq) { - return unlikely(d_is_symlink(dentry)) ? follow : 0; + if (likely(!d_is_symlink(link->dentry))) + return 0; + if (!follow) + return 0; + return pick_link(nd, link, inode, seq); } -static inline int walk_component(struct nameidata *nd, struct path *path, - int follow) +enum {WALK_GET = 1, WALK_PUT = 2}; + +static int walk_component(struct nameidata *nd, int flags) { + struct path path; struct inode *inode; + unsigned seq; int err; /* * "." and ".." are special - ".." especially so because it has * to be able to know about the current root directory and * parent relationships. */ - if (unlikely(nd->last_type != LAST_NORM)) - return handle_dots(nd, nd->last_type); - err = lookup_fast(nd, path, &inode); + if (unlikely(nd->last_type != LAST_NORM)) { + err = handle_dots(nd, nd->last_type); + if (flags & WALK_PUT) + put_link(nd); + return err; + } + err = lookup_fast(nd, &path, &inode, &seq); if (unlikely(err)) { if (err < 0) - goto out_err; + return err; - err = lookup_slow(nd, path); + err = lookup_slow(nd, &path); if (err < 0) - goto out_err; + return err; - inode = path->dentry->d_inode; + inode = d_backing_inode(path.dentry); + seq = 0; /* we are already out of RCU mode */ + err = -ENOENT; + if (d_is_negative(path.dentry)) + goto out_path_put; } - err = -ENOENT; - if (d_is_negative(path->dentry)) - goto out_path_put; - if (should_follow_link(path->dentry, follow)) { - if (nd->flags & LOOKUP_RCU) { - if (unlikely(nd->path.mnt != path->mnt || - unlazy_walk(nd, path->dentry))) { - err = -ECHILD; - goto out_err; - } - } - BUG_ON(inode != path->dentry->d_inode); - return 1; - } - path_to_nameidata(path, nd); + if (flags & WALK_PUT) + put_link(nd); + err = should_follow_link(nd, &path, flags & WALK_GET, inode, seq); + if (unlikely(err)) + return err; + path_to_nameidata(&path, nd); nd->inode = inode; + nd->seq = seq; return 0; out_path_put: - path_to_nameidata(path, nd); -out_err: - terminate_walk(nd); + path_to_nameidata(&path, nd); return err; } /* - * This limits recursive symlink follows to 8, while - * limiting consecutive symlinks to 40. - * - * Without that kind of total limit, nasty chains of consecutive - * symlinks can cause almost arbitrarily long lookups. - */ -static inline int nested_symlink(struct path *path, struct nameidata *nd) -{ - int res; - - if (unlikely(current->link_count >= MAX_NESTED_LINKS)) { - path_put_conditional(path, nd); - path_put(&nd->path); - return -ELOOP; - } - BUG_ON(nd->depth >= MAX_NESTED_LINKS); - - nd->depth++; - current->link_count++; - - do { - struct path link = *path; - void *cookie; - - res = follow_link(&link, nd, &cookie); - if (res) - break; - res = walk_component(nd, path, LOOKUP_FOLLOW); - put_link(nd, &link, cookie); - } while (res > 0); - - current->link_count--; - nd->depth--; - return res; -} - -/* * We can do the critical dentry name comparison and hashing * operations one word at a time, but we are limited to: * @@ -1773,9 +1867,8 @@ static inline u64 hash_name(const char *name) */ static int link_path_walk(const char *name, struct nameidata *nd) { - struct path next; int err; - + while (*name=='/') name++; if (!*name) @@ -1788,7 +1881,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) err = may_lookup(nd); if (err) - break; + return err; hash_len = hash_name(name); @@ -1810,7 +1903,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) struct qstr this = { { .hash_len = hash_len }, .name = name }; err = parent->d_op->d_hash(parent, &this); if (err < 0) - break; + return err; hash_len = this.hash_len; name = this.name; } @@ -1822,7 +1915,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) name += hashlen_len(hash_len); if (!*name) - return 0; + goto OK; /* * If it wasn't NUL, we know it was '/'. Skip that * slash, and continue until no more slashes. @@ -1830,57 +1923,73 @@ static int link_path_walk(const char *name, struct nameidata *nd) do { name++; } while (unlikely(*name == '/')); - if (!*name) - return 0; - - err = walk_component(nd, &next, LOOKUP_FOLLOW); + if (unlikely(!*name)) { +OK: + /* pathname body, done */ + if (!nd->depth) + return 0; + name = nd->stack[nd->depth - 1].name; + /* trailing symlink, done */ + if (!name) + return 0; + /* last component of nested symlink */ + err = walk_component(nd, WALK_GET | WALK_PUT); + } else { + err = walk_component(nd, WALK_GET); + } if (err < 0) return err; if (err) { - err = nested_symlink(&next, nd); - if (err) - return err; - } - if (!d_can_lookup(nd->path.dentry)) { - err = -ENOTDIR; - break; + const char *s = get_link(nd); + + if (unlikely(IS_ERR(s))) + return PTR_ERR(s); + err = 0; + if (unlikely(!s)) { + /* jumped */ + put_link(nd); + } else { + nd->stack[nd->depth - 1].name = name; + name = s; + continue; + } } + if (unlikely(!d_can_lookup(nd->path.dentry))) + return -ENOTDIR; } - terminate_walk(nd); - return err; } -static int path_init(int dfd, const struct filename *name, unsigned int flags, - struct nameidata *nd) +static const char *path_init(struct nameidata *nd, unsigned flags) { int retval = 0; - const char *s = name->name; + const char *s = nd->name->name; nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; nd->depth = 0; - nd->base = NULL; + nd->total_link_count = 0; if (flags & LOOKUP_ROOT) { struct dentry *root = nd->root.dentry; struct inode *inode = root->d_inode; if (*s) { if (!d_can_lookup(root)) - return -ENOTDIR; + return ERR_PTR(-ENOTDIR); retval = inode_permission(inode, MAY_EXEC); if (retval) - return retval; + return ERR_PTR(retval); } nd->path = nd->root; nd->inode = inode; if (flags & LOOKUP_RCU) { rcu_read_lock(); nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); + nd->root_seq = nd->seq; nd->m_seq = read_seqbegin(&mount_lock); } else { path_get(&nd->path); } - goto done; + return s; } nd->root.mnt = NULL; @@ -1895,7 +2004,7 @@ static int path_init(int dfd, const struct filename *name, unsigned int flags, path_get(&nd->root); } nd->path = nd->root; - } else if (dfd == AT_FDCWD) { + } else if (nd->dfd == AT_FDCWD) { if (flags & LOOKUP_RCU) { struct fs_struct *fs = current->fs; unsigned seq; @@ -1912,180 +2021,205 @@ static int path_init(int dfd, const struct filename *name, unsigned int flags, } } else { /* Caller must check execute permissions on the starting path component */ - struct fd f = fdget_raw(dfd); + struct fd f = fdget_raw(nd->dfd); struct dentry *dentry; if (!f.file) - return -EBADF; + return ERR_PTR(-EBADF); dentry = f.file->f_path.dentry; if (*s) { if (!d_can_lookup(dentry)) { fdput(f); - return -ENOTDIR; + return ERR_PTR(-ENOTDIR); } } nd->path = f.file->f_path; if (flags & LOOKUP_RCU) { - if (f.flags & FDPUT_FPUT) - nd->base = f.file; - nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq); rcu_read_lock(); + nd->inode = nd->path.dentry->d_inode; + nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq); } else { path_get(&nd->path); - fdput(f); + nd->inode = nd->path.dentry->d_inode; } + fdput(f); + return s; } nd->inode = nd->path.dentry->d_inode; if (!(flags & LOOKUP_RCU)) - goto done; + return s; if (likely(!read_seqcount_retry(&nd->path.dentry->d_seq, nd->seq))) - goto done; + return s; if (!(nd->flags & LOOKUP_ROOT)) nd->root.mnt = NULL; rcu_read_unlock(); - return -ECHILD; -done: - current->total_link_count = 0; - return link_path_walk(s, nd); + return ERR_PTR(-ECHILD); } -static void path_cleanup(struct nameidata *nd) +static const char *trailing_symlink(struct nameidata *nd) { - if (nd->root.mnt && !(nd->flags & LOOKUP_ROOT)) { - path_put(&nd->root); - nd->root.mnt = NULL; - } - if (unlikely(nd->base)) - fput(nd->base); + const char *s; + int error = may_follow_link(nd); + if (unlikely(error)) + return ERR_PTR(error); + nd->flags |= LOOKUP_PARENT; + nd->stack[0].name = NULL; + s = get_link(nd); + return s ? s : ""; } -static inline int lookup_last(struct nameidata *nd, struct path *path) +static inline int lookup_last(struct nameidata *nd) { if (nd->last_type == LAST_NORM && nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; nd->flags &= ~LOOKUP_PARENT; - return walk_component(nd, path, nd->flags & LOOKUP_FOLLOW); + return walk_component(nd, + nd->flags & LOOKUP_FOLLOW + ? nd->depth + ? WALK_PUT | WALK_GET + : WALK_GET + : 0); } /* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ -static int path_lookupat(int dfd, const struct filename *name, - unsigned int flags, struct nameidata *nd) +static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path) { - struct path path; + const char *s = path_init(nd, flags); int err; - /* - * Path walking is largely split up into 2 different synchronisation - * schemes, rcu-walk and ref-walk (explained in - * Documentation/filesystems/path-lookup.txt). These share much of the - * path walk code, but some things particularly setup, cleanup, and - * following mounts are sufficiently divergent that functions are - * duplicated. Typically there is a function foo(), and its RCU - * analogue, foo_rcu(). - * - * -ECHILD is the error number of choice (just to avoid clashes) that - * is returned if some aspect of an rcu-walk fails. Such an error must - * be handled by restarting a traditional ref-walk (which will always - * be able to complete). - */ - err = path_init(dfd, name, flags, nd); - if (!err && !(flags & LOOKUP_PARENT)) { - err = lookup_last(nd, &path); - while (err > 0) { - void *cookie; - struct path link = path; - err = may_follow_link(&link, nd); - if (unlikely(err)) - break; - nd->flags |= LOOKUP_PARENT; - err = follow_link(&link, nd, &cookie); - if (err) - break; - err = lookup_last(nd, &path); - put_link(nd, &link, cookie); + if (IS_ERR(s)) + return PTR_ERR(s); + while (!(err = link_path_walk(s, nd)) + && ((err = lookup_last(nd)) > 0)) { + s = trailing_symlink(nd); + if (IS_ERR(s)) { + err = PTR_ERR(s); + break; } } - if (!err) err = complete_walk(nd); - if (!err && nd->flags & LOOKUP_DIRECTORY) { - if (!d_can_lookup(nd->path.dentry)) { - path_put(&nd->path); + if (!err && nd->flags & LOOKUP_DIRECTORY) + if (!d_can_lookup(nd->path.dentry)) err = -ENOTDIR; - } + if (!err) { + *path = nd->path; + nd->path.mnt = NULL; + nd->path.dentry = NULL; } - - path_cleanup(nd); + terminate_walk(nd); return err; } -static int filename_lookup(int dfd, struct filename *name, - unsigned int flags, struct nameidata *nd) +static int filename_lookup(int dfd, struct filename *name, unsigned flags, + struct path *path, struct path *root) { - int retval = path_lookupat(dfd, name, flags | LOOKUP_RCU, nd); + int retval; + struct nameidata nd; + if (IS_ERR(name)) + return PTR_ERR(name); + if (unlikely(root)) { + nd.root = *root; + flags |= LOOKUP_ROOT; + } + set_nameidata(&nd, dfd, name); + retval = path_lookupat(&nd, flags | LOOKUP_RCU, path); if (unlikely(retval == -ECHILD)) - retval = path_lookupat(dfd, name, flags, nd); + retval = path_lookupat(&nd, flags, path); if (unlikely(retval == -ESTALE)) - retval = path_lookupat(dfd, name, flags | LOOKUP_REVAL, nd); + retval = path_lookupat(&nd, flags | LOOKUP_REVAL, path); if (likely(!retval)) - audit_inode(name, nd->path.dentry, flags & LOOKUP_PARENT); + audit_inode(name, path->dentry, flags & LOOKUP_PARENT); + restore_nameidata(); + putname(name); return retval; } +/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */ +static int path_parentat(struct nameidata *nd, unsigned flags, + struct path *parent) +{ + const char *s = path_init(nd, flags); + int err; + if (IS_ERR(s)) + return PTR_ERR(s); + err = link_path_walk(s, nd); + if (!err) + err = complete_walk(nd); + if (!err) { + *parent = nd->path; + nd->path.mnt = NULL; + nd->path.dentry = NULL; + } + terminate_walk(nd); + return err; +} + +static struct filename *filename_parentat(int dfd, struct filename *name, + unsigned int flags, struct path *parent, + struct qstr *last, int *type) +{ + int retval; + struct nameidata nd; + + if (IS_ERR(name)) + return name; + set_nameidata(&nd, dfd, name); + retval = path_parentat(&nd, flags | LOOKUP_RCU, parent); + if (unlikely(retval == -ECHILD)) + retval = path_parentat(&nd, flags, parent); + if (unlikely(retval == -ESTALE)) + retval = path_parentat(&nd, flags | LOOKUP_REVAL, parent); + if (likely(!retval)) { + *last = nd.last; + *type = nd.last_type; + audit_inode(name, parent->dentry, LOOKUP_PARENT); + } else { + putname(name); + name = ERR_PTR(retval); + } + restore_nameidata(); + return name; +} + /* does lookup, returns the object with parent locked */ struct dentry *kern_path_locked(const char *name, struct path *path) { - struct filename *filename = getname_kernel(name); - struct nameidata nd; + struct filename *filename; struct dentry *d; - int err; + struct qstr last; + int type; + filename = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path, + &last, &type); if (IS_ERR(filename)) return ERR_CAST(filename); - - err = filename_lookup(AT_FDCWD, filename, LOOKUP_PARENT, &nd); - if (err) { - d = ERR_PTR(err); - goto out; - } - if (nd.last_type != LAST_NORM) { - path_put(&nd.path); - d = ERR_PTR(-EINVAL); - goto out; + if (unlikely(type != LAST_NORM)) { + path_put(path); + putname(filename); + return ERR_PTR(-EINVAL); } - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - d = __lookup_hash(&nd.last, nd.path.dentry, 0); + mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT); + d = __lookup_hash(&last, path->dentry, 0); if (IS_ERR(d)) { - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - path_put(&nd.path); - goto out; + mutex_unlock(&path->dentry->d_inode->i_mutex); + path_put(path); } - *path = nd.path; -out: putname(filename); return d; } int kern_path(const char *name, unsigned int flags, struct path *path) { - struct nameidata nd; - struct filename *filename = getname_kernel(name); - int res = PTR_ERR(filename); - - if (!IS_ERR(filename)) { - res = filename_lookup(AT_FDCWD, filename, flags, &nd); - putname(filename); - if (!res) - *path = nd.path; - } - return res; + return filename_lookup(AT_FDCWD, getname_kernel(name), + flags, path, NULL); } EXPORT_SYMBOL(kern_path); @@ -2101,36 +2235,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, const char *name, unsigned int flags, struct path *path) { - struct filename *filename = getname_kernel(name); - int err = PTR_ERR(filename); - - BUG_ON(flags & LOOKUP_PARENT); - - /* the first argument of filename_lookup() is ignored with LOOKUP_ROOT */ - if (!IS_ERR(filename)) { - struct nameidata nd; - nd.root.dentry = dentry; - nd.root.mnt = mnt; - err = filename_lookup(AT_FDCWD, filename, - flags | LOOKUP_ROOT, &nd); - if (!err) - *path = nd.path; - putname(filename); - } - return err; + struct path root = {.mnt = mnt, .dentry = dentry}; + /* the first argument of filename_lookup() is ignored with root */ + return filename_lookup(AT_FDCWD, getname_kernel(name), + flags , path, &root); } EXPORT_SYMBOL(vfs_path_lookup); -/* - * Restricted form of lookup. Doesn't follow links, single-component only, - * needs parent already locked. Doesn't follow mounts. - * SMP-safe. - */ -static struct dentry *lookup_hash(struct nameidata *nd) -{ - return __lookup_hash(&nd->last, nd->path.dentry, nd->flags); -} - /** * lookup_one_len - filesystem helper to lookup single pathname component * @name: pathname component to lookup @@ -2185,27 +2296,10 @@ EXPORT_SYMBOL(lookup_one_len); int user_path_at_empty(int dfd, const char __user *name, unsigned flags, struct path *path, int *empty) { - struct nameidata nd; - struct filename *tmp = getname_flags(name, flags, empty); - int err = PTR_ERR(tmp); - if (!IS_ERR(tmp)) { - - BUG_ON(flags & LOOKUP_PARENT); - - err = filename_lookup(dfd, tmp, flags, &nd); - putname(tmp); - if (!err) - *path = nd.path; - } - return err; -} - -int user_path_at(int dfd, const char __user *name, unsigned flags, - struct path *path) -{ - return user_path_at_empty(dfd, name, flags, path, NULL); + return filename_lookup(dfd, getname_flags(name, flags, empty), + flags, path, NULL); } -EXPORT_SYMBOL(user_path_at); +EXPORT_SYMBOL(user_path_at_empty); /* * NB: most callers don't do anything directly with the reference to the @@ -2213,26 +2307,16 @@ EXPORT_SYMBOL(user_path_at); * allocated by getname. So we must hold the reference to it until all * path-walking is complete. */ -static struct filename * -user_path_parent(int dfd, const char __user *path, struct nameidata *nd, +static inline struct filename * +user_path_parent(int dfd, const char __user *path, + struct path *parent, + struct qstr *last, + int *type, unsigned int flags) { - struct filename *s = getname(path); - int error; - /* only LOOKUP_REVAL is allowed in extra flags */ - flags &= LOOKUP_REVAL; - - if (IS_ERR(s)) - return s; - - error = filename_lookup(dfd, s, flags | LOOKUP_PARENT, nd); - if (error) { - putname(s); - return ERR_PTR(error); - } - - return s; + return filename_parentat(dfd, getname(path), flags & LOOKUP_REVAL, + parent, last, type); } /** @@ -2271,10 +2355,8 @@ mountpoint_last(struct nameidata *nd, struct path *path) /* If we're in rcuwalk, drop out of it to handle last component */ if (nd->flags & LOOKUP_RCU) { - if (unlazy_walk(nd, NULL)) { - error = -ECHILD; - goto out; - } + if (unlazy_walk(nd, NULL, 0)) + return -ECHILD; } nd->flags &= ~LOOKUP_PARENT; @@ -2282,7 +2364,7 @@ mountpoint_last(struct nameidata *nd, struct path *path) if (unlikely(nd->last_type != LAST_NORM)) { error = handle_dots(nd, nd->last_type); if (error) - goto out; + return error; dentry = dget(nd->path.dentry); goto done; } @@ -2297,74 +2379,60 @@ mountpoint_last(struct nameidata *nd, struct path *path) */ dentry = d_alloc(dir, &nd->last); if (!dentry) { - error = -ENOMEM; mutex_unlock(&dir->d_inode->i_mutex); - goto out; + return -ENOMEM; } dentry = lookup_real(dir->d_inode, dentry, nd->flags); - error = PTR_ERR(dentry); if (IS_ERR(dentry)) { mutex_unlock(&dir->d_inode->i_mutex); - goto out; + return PTR_ERR(dentry); } } mutex_unlock(&dir->d_inode->i_mutex); done: if (d_is_negative(dentry)) { - error = -ENOENT; dput(dentry); - goto out; + return -ENOENT; } + if (nd->depth) + put_link(nd); path->dentry = dentry; path->mnt = nd->path.mnt; - if (should_follow_link(dentry, nd->flags & LOOKUP_FOLLOW)) - return 1; + error = should_follow_link(nd, path, nd->flags & LOOKUP_FOLLOW, + d_backing_inode(dentry), 0); + if (unlikely(error)) + return error; mntget(path->mnt); follow_mount(path); - error = 0; -out: - terminate_walk(nd); - return error; + return 0; } /** * path_mountpoint - look up a path to be umounted - * @dfd: directory file descriptor to start walk from - * @name: full pathname to walk - * @path: pointer to container for result + * @nameidata: lookup context * @flags: lookup flags + * @path: pointer to container for result * * Look up the given name, but don't attempt to revalidate the last component. * Returns 0 and "path" will be valid on success; Returns error otherwise. */ static int -path_mountpoint(int dfd, const struct filename *name, struct path *path, - unsigned int flags) +path_mountpoint(struct nameidata *nd, unsigned flags, struct path *path) { - struct nameidata nd; + const char *s = path_init(nd, flags); int err; - - err = path_init(dfd, name, flags, &nd); - if (unlikely(err)) - goto out; - - err = mountpoint_last(&nd, path); - while (err > 0) { - void *cookie; - struct path link = *path; - err = may_follow_link(&link, &nd); - if (unlikely(err)) - break; - nd.flags |= LOOKUP_PARENT; - err = follow_link(&link, &nd, &cookie); - if (err) + if (IS_ERR(s)) + return PTR_ERR(s); + while (!(err = link_path_walk(s, nd)) && + (err = mountpoint_last(nd, path)) > 0) { + s = trailing_symlink(nd); + if (IS_ERR(s)) { + err = PTR_ERR(s); break; - err = mountpoint_last(&nd, path); - put_link(&nd, &link, cookie); + } } -out: - path_cleanup(&nd); + terminate_walk(nd); return err; } @@ -2372,16 +2440,19 @@ static int filename_mountpoint(int dfd, struct filename *name, struct path *path, unsigned int flags) { + struct nameidata nd; int error; if (IS_ERR(name)) return PTR_ERR(name); - error = path_mountpoint(dfd, name, path, flags | LOOKUP_RCU); + set_nameidata(&nd, dfd, name); + error = path_mountpoint(&nd, flags | LOOKUP_RCU, path); if (unlikely(error == -ECHILD)) - error = path_mountpoint(dfd, name, path, flags); + error = path_mountpoint(&nd, flags, path); if (unlikely(error == -ESTALE)) - error = path_mountpoint(dfd, name, path, flags | LOOKUP_REVAL); + error = path_mountpoint(&nd, flags | LOOKUP_REVAL, path); if (likely(!error)) audit_inode(name, path->dentry, 0); + restore_nameidata(); putname(name); return error; } @@ -2448,7 +2519,7 @@ EXPORT_SYMBOL(__check_sticky); */ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir) { - struct inode *inode = victim->d_inode; + struct inode *inode = d_backing_inode(victim); int error; if (d_is_negative(victim)) @@ -2914,18 +2985,19 @@ out_dput: /* * Handle the last step of open() */ -static int do_last(struct nameidata *nd, struct path *path, +static int do_last(struct nameidata *nd, struct file *file, const struct open_flags *op, - int *opened, struct filename *name) + int *opened) { struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool will_truncate = (open_flag & O_TRUNC) != 0; bool got_write = false; int acc_mode = op->acc_mode; + unsigned seq; struct inode *inode; - bool symlink_ok = false; struct path save_parent = { .dentry = NULL, .mnt = NULL }; + struct path path; bool retried = false; int error; @@ -2934,7 +3006,7 @@ static int do_last(struct nameidata *nd, struct path *path, if (nd->last_type != LAST_NORM) { error = handle_dots(nd, nd->last_type); - if (error) + if (unlikely(error)) return error; goto finish_open; } @@ -2942,15 +3014,13 @@ static int do_last(struct nameidata *nd, struct path *path, if (!(open_flag & O_CREAT)) { if (nd->last.name[nd->last.len]) nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; - if (open_flag & O_PATH && !(nd->flags & LOOKUP_FOLLOW)) - symlink_ok = true; /* we _can_ be in RCU mode here */ - error = lookup_fast(nd, path, &inode); + error = lookup_fast(nd, &path, &inode, &seq); if (likely(!error)) goto finish_lookup; if (error < 0) - goto out; + return error; BUG_ON(nd->inode != dir->d_inode); } else { @@ -2964,11 +3034,10 @@ static int do_last(struct nameidata *nd, struct path *path, if (error) return error; - audit_inode(name, dir, LOOKUP_PARENT); - error = -EISDIR; + audit_inode(nd->name, dir, LOOKUP_PARENT); /* trailing slashes? */ - if (nd->last.name[nd->last.len]) - goto out; + if (unlikely(nd->last.name[nd->last.len])) + return -EISDIR; } retry_lookup: @@ -2983,7 +3052,7 @@ retry_lookup: */ } mutex_lock(&dir->d_inode->i_mutex); - error = lookup_open(nd, path, file, op, got_write, opened); + error = lookup_open(nd, &path, file, op, got_write, opened); mutex_unlock(&dir->d_inode->i_mutex); if (error <= 0) { @@ -2994,7 +3063,7 @@ retry_lookup: !S_ISREG(file_inode(file)->i_mode)) will_truncate = false; - audit_inode(name, file->f_path.dentry, 0); + audit_inode(nd->name, file->f_path.dentry, 0); goto opened; } @@ -3003,15 +3072,15 @@ retry_lookup: open_flag &= ~O_TRUNC; will_truncate = false; acc_mode = MAY_OPEN; - path_to_nameidata(path, nd); + path_to_nameidata(&path, nd); goto finish_open_created; } /* * create/update audit record if it already exists. */ - if (d_is_positive(path->dentry)) - audit_inode(name, path->dentry, 0); + if (d_is_positive(path.dentry)) + audit_inode(nd->name, path.dentry, 0); /* * If atomic_open() acquired write access it is dropped now due to @@ -3023,48 +3092,45 @@ retry_lookup: got_write = false; } - error = -EEXIST; - if ((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT)) - goto exit_dput; - - error = follow_managed(path, nd->flags); - if (error < 0) - goto exit_dput; + if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) { + path_to_nameidata(&path, nd); + return -EEXIST; + } - if (error) - nd->flags |= LOOKUP_JUMPED; + error = follow_managed(&path, nd); + if (unlikely(error < 0)) + return error; BUG_ON(nd->flags & LOOKUP_RCU); - inode = path->dentry->d_inode; -finish_lookup: - /* we _can_ be in RCU mode here */ - error = -ENOENT; - if (d_is_negative(path->dentry)) { - path_to_nameidata(path, nd); - goto out; + inode = d_backing_inode(path.dentry); + seq = 0; /* out of RCU mode, so the value doesn't matter */ + if (unlikely(d_is_negative(path.dentry))) { + path_to_nameidata(&path, nd); + return -ENOENT; } +finish_lookup: + if (nd->depth) + put_link(nd); + error = should_follow_link(nd, &path, nd->flags & LOOKUP_FOLLOW, + inode, seq); + if (unlikely(error)) + return error; - if (should_follow_link(path->dentry, !symlink_ok)) { - if (nd->flags & LOOKUP_RCU) { - if (unlikely(nd->path.mnt != path->mnt || - unlazy_walk(nd, path->dentry))) { - error = -ECHILD; - goto out; - } - } - BUG_ON(inode != path->dentry->d_inode); - return 1; + if (unlikely(d_is_symlink(path.dentry)) && !(open_flag & O_PATH)) { + path_to_nameidata(&path, nd); + return -ELOOP; } - if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) { - path_to_nameidata(path, nd); + if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path.mnt) { + path_to_nameidata(&path, nd); } else { save_parent.dentry = nd->path.dentry; - save_parent.mnt = mntget(path->mnt); - nd->path.dentry = path->dentry; + save_parent.mnt = mntget(path.mnt); + nd->path.dentry = path.dentry; } nd->inode = inode; + nd->seq = seq; /* Why this, you ask? _Now_ we might have grown LOOKUP_JUMPED... */ finish_open: error = complete_walk(nd); @@ -3072,7 +3138,7 @@ finish_open: path_put(&save_parent); return error; } - audit_inode(name, nd->path.dentry, 0); + audit_inode(nd->name, nd->path.dentry, 0); error = -EISDIR; if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry)) goto out; @@ -3119,12 +3185,8 @@ out: if (got_write) mnt_drop_write(nd->path.mnt); path_put(&save_parent); - terminate_walk(nd); return error; -exit_dput: - path_put_conditional(path, nd); - goto out; exit_fput: fput(file); goto out; @@ -3148,50 +3210,46 @@ stale_open: goto retry_lookup; } -static int do_tmpfile(int dfd, struct filename *pathname, - struct nameidata *nd, int flags, +static int do_tmpfile(struct nameidata *nd, unsigned flags, const struct open_flags *op, struct file *file, int *opened) { static const struct qstr name = QSTR_INIT("/", 1); - struct dentry *dentry, *child; + struct dentry *child; struct inode *dir; - int error = path_lookupat(dfd, pathname, - flags | LOOKUP_DIRECTORY, nd); + struct path path; + int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path); if (unlikely(error)) return error; - error = mnt_want_write(nd->path.mnt); + error = mnt_want_write(path.mnt); if (unlikely(error)) goto out; + dir = path.dentry->d_inode; /* we want directory to be writable */ - error = inode_permission(nd->inode, MAY_WRITE | MAY_EXEC); + error = inode_permission(dir, MAY_WRITE | MAY_EXEC); if (error) goto out2; - dentry = nd->path.dentry; - dir = dentry->d_inode; if (!dir->i_op->tmpfile) { error = -EOPNOTSUPP; goto out2; } - child = d_alloc(dentry, &name); + child = d_alloc(path.dentry, &name); if (unlikely(!child)) { error = -ENOMEM; goto out2; } - nd->flags &= ~LOOKUP_DIRECTORY; - nd->flags |= op->intent; - dput(nd->path.dentry); - nd->path.dentry = child; - error = dir->i_op->tmpfile(dir, nd->path.dentry, op->mode); + dput(path.dentry); + path.dentry = child; + error = dir->i_op->tmpfile(dir, child, op->mode); if (error) goto out2; - audit_inode(pathname, nd->path.dentry, 0); + audit_inode(nd->name, child, 0); /* Don't check for other permissions, the inode was just created */ - error = may_open(&nd->path, MAY_OPEN, op->open_flag); + error = may_open(&path, MAY_OPEN, op->open_flag); if (error) goto out2; - file->f_path.mnt = nd->path.mnt; - error = finish_open(file, nd->path.dentry, NULL, opened); + file->f_path.mnt = path.mnt; + error = finish_open(file, child, NULL, opened); if (error) goto out2; error = open_check_o_direct(file); @@ -3204,17 +3262,17 @@ static int do_tmpfile(int dfd, struct filename *pathname, spin_unlock(&inode->i_lock); } out2: - mnt_drop_write(nd->path.mnt); + mnt_drop_write(path.mnt); out: - path_put(&nd->path); + path_put(&path); return error; } -static struct file *path_openat(int dfd, struct filename *pathname, - struct nameidata *nd, const struct open_flags *op, int flags) +static struct file *path_openat(struct nameidata *nd, + const struct open_flags *op, unsigned flags) { + const char *s; struct file *file; - struct path path; int opened = 0; int error; @@ -3225,37 +3283,26 @@ static struct file *path_openat(int dfd, struct filename *pathname, file->f_flags = op->open_flag; if (unlikely(file->f_flags & __O_TMPFILE)) { - error = do_tmpfile(dfd, pathname, nd, flags, op, file, &opened); - goto out; + error = do_tmpfile(nd, flags, op, file, &opened); + goto out2; } - error = path_init(dfd, pathname, flags, nd); - if (unlikely(error)) - goto out; - - error = do_last(nd, &path, file, op, &opened, pathname); - while (unlikely(error > 0)) { /* trailing symlink */ - struct path link = path; - void *cookie; - if (!(nd->flags & LOOKUP_FOLLOW)) { - path_put_conditional(&path, nd); - path_put(&nd->path); - error = -ELOOP; - break; - } - error = may_follow_link(&link, nd); - if (unlikely(error)) - break; - nd->flags |= LOOKUP_PARENT; + s = path_init(nd, flags); + if (IS_ERR(s)) { + put_filp(file); + return ERR_CAST(s); + } + while (!(error = link_path_walk(s, nd)) && + (error = do_last(nd, file, op, &opened)) > 0) { nd->flags &= ~(LOOKUP_OPEN|LOOKUP_CREATE|LOOKUP_EXCL); - error = follow_link(&link, nd, &cookie); - if (unlikely(error)) + s = trailing_symlink(nd); + if (IS_ERR(s)) { + error = PTR_ERR(s); break; - error = do_last(nd, &path, file, op, &opened, pathname); - put_link(nd, &link, cookie); + } } -out: - path_cleanup(nd); + terminate_walk(nd); +out2: if (!(opened & FILE_OPENED)) { BUG_ON(!error); put_filp(file); @@ -3279,11 +3326,13 @@ struct file *do_filp_open(int dfd, struct filename *pathname, int flags = op->lookup_flags; struct file *filp; - filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_RCU); + set_nameidata(&nd, dfd, pathname); + filp = path_openat(&nd, op, flags | LOOKUP_RCU); if (unlikely(filp == ERR_PTR(-ECHILD))) - filp = path_openat(dfd, pathname, &nd, op, flags); + filp = path_openat(&nd, op, flags); if (unlikely(filp == ERR_PTR(-ESTALE))) - filp = path_openat(dfd, pathname, &nd, op, flags | LOOKUP_REVAL); + filp = path_openat(&nd, op, flags | LOOKUP_REVAL); + restore_nameidata(); return filp; } @@ -3305,11 +3354,13 @@ struct file *do_file_open_root(struct dentry *dentry, struct vfsmount *mnt, if (unlikely(IS_ERR(filename))) return ERR_CAST(filename); - file = path_openat(-1, filename, &nd, op, flags | LOOKUP_RCU); + set_nameidata(&nd, -1, filename); + file = path_openat(&nd, op, flags | LOOKUP_RCU); if (unlikely(file == ERR_PTR(-ECHILD))) - file = path_openat(-1, filename, &nd, op, flags); + file = path_openat(&nd, op, flags); if (unlikely(file == ERR_PTR(-ESTALE))) - file = path_openat(-1, filename, &nd, op, flags | LOOKUP_REVAL); + file = path_openat(&nd, op, flags | LOOKUP_REVAL); + restore_nameidata(); putname(filename); return file; } @@ -3318,7 +3369,8 @@ static struct dentry *filename_create(int dfd, struct filename *name, struct path *path, unsigned int lookup_flags) { struct dentry *dentry = ERR_PTR(-EEXIST); - struct nameidata nd; + struct qstr last; + int type; int err2; int error; bool is_dir = (lookup_flags & LOOKUP_DIRECTORY); @@ -3329,26 +3381,25 @@ static struct dentry *filename_create(int dfd, struct filename *name, */ lookup_flags &= LOOKUP_REVAL; - error = filename_lookup(dfd, name, LOOKUP_PARENT|lookup_flags, &nd); - if (error) - return ERR_PTR(error); + name = filename_parentat(dfd, name, lookup_flags, path, &last, &type); + if (IS_ERR(name)) + return ERR_CAST(name); /* * Yucky last component or no last component at all? * (foo/., foo/.., /////) */ - if (nd.last_type != LAST_NORM) + if (unlikely(type != LAST_NORM)) goto out; - nd.flags &= ~LOOKUP_PARENT; - nd.flags |= LOOKUP_CREATE | LOOKUP_EXCL; /* don't fail immediately if it's r/o, at least try to report other errors */ - err2 = mnt_want_write(nd.path.mnt); + err2 = mnt_want_write(path->mnt); /* * Do the final lookup. */ - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - dentry = lookup_hash(&nd); + lookup_flags |= LOOKUP_CREATE | LOOKUP_EXCL; + mutex_lock_nested(&path->dentry->d_inode->i_mutex, I_MUTEX_PARENT); + dentry = __lookup_hash(&last, path->dentry, lookup_flags); if (IS_ERR(dentry)) goto unlock; @@ -3362,7 +3413,7 @@ static struct dentry *filename_create(int dfd, struct filename *name, * all is fine. Let's be bastards - you had / on the end, you've * been asking for (non-existent) directory. -ENOENT for you. */ - if (unlikely(!is_dir && nd.last.name[nd.last.len])) { + if (unlikely(!is_dir && last.name[last.len])) { error = -ENOENT; goto fail; } @@ -3370,31 +3421,26 @@ static struct dentry *filename_create(int dfd, struct filename *name, error = err2; goto fail; } - *path = nd.path; + putname(name); return dentry; fail: dput(dentry); dentry = ERR_PTR(error); unlock: - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + mutex_unlock(&path->dentry->d_inode->i_mutex); if (!err2) - mnt_drop_write(nd.path.mnt); + mnt_drop_write(path->mnt); out: - path_put(&nd.path); + path_put(path); + putname(name); return dentry; } struct dentry *kern_path_create(int dfd, const char *pathname, struct path *path, unsigned int lookup_flags) { - struct filename *filename = getname_kernel(pathname); - struct dentry *res; - - if (IS_ERR(filename)) - return ERR_CAST(filename); - res = filename_create(dfd, filename, path, lookup_flags); - putname(filename); - return res; + return filename_create(dfd, getname_kernel(pathname), + path, lookup_flags); } EXPORT_SYMBOL(kern_path_create); @@ -3407,16 +3453,10 @@ void done_path_create(struct path *path, struct dentry *dentry) } EXPORT_SYMBOL(done_path_create); -struct dentry *user_path_create(int dfd, const char __user *pathname, +inline struct dentry *user_path_create(int dfd, const char __user *pathname, struct path *path, unsigned int lookup_flags) { - struct filename *tmp = getname(pathname); - struct dentry *res; - if (IS_ERR(tmp)) - return ERR_CAST(tmp); - res = filename_create(dfd, tmp, path, lookup_flags); - putname(tmp); - return res; + return filename_create(dfd, getname(pathname), path, lookup_flags); } EXPORT_SYMBOL(user_path_create); @@ -3637,14 +3677,17 @@ static long do_rmdir(int dfd, const char __user *pathname) int error = 0; struct filename *name; struct dentry *dentry; - struct nameidata nd; + struct path path; + struct qstr last; + int type; unsigned int lookup_flags = 0; retry: - name = user_path_parent(dfd, pathname, &nd, lookup_flags); + name = user_path_parent(dfd, pathname, + &path, &last, &type, lookup_flags); if (IS_ERR(name)) return PTR_ERR(name); - switch(nd.last_type) { + switch (type) { case LAST_DOTDOT: error = -ENOTEMPTY; goto exit1; @@ -3656,13 +3699,12 @@ retry: goto exit1; } - nd.flags &= ~LOOKUP_PARENT; - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(path.mnt); if (error) goto exit1; - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - dentry = lookup_hash(&nd); + mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); + dentry = __lookup_hash(&last, path.dentry, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit2; @@ -3670,17 +3712,17 @@ retry: error = -ENOENT; goto exit3; } - error = security_path_rmdir(&nd.path, dentry); + error = security_path_rmdir(&path, dentry); if (error) goto exit3; - error = vfs_rmdir(nd.path.dentry->d_inode, dentry); + error = vfs_rmdir(path.dentry->d_inode, dentry); exit3: dput(dentry); exit2: - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); - mnt_drop_write(nd.path.mnt); + mutex_unlock(&path.dentry->d_inode->i_mutex); + mnt_drop_write(path.mnt); exit1: - path_put(&nd.path); + path_put(&path); putname(name); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -3763,43 +3805,45 @@ static long do_unlinkat(int dfd, const char __user *pathname) int error; struct filename *name; struct dentry *dentry; - struct nameidata nd; + struct path path; + struct qstr last; + int type; struct inode *inode = NULL; struct inode *delegated_inode = NULL; unsigned int lookup_flags = 0; retry: - name = user_path_parent(dfd, pathname, &nd, lookup_flags); + name = user_path_parent(dfd, pathname, + &path, &last, &type, lookup_flags); if (IS_ERR(name)) return PTR_ERR(name); error = -EISDIR; - if (nd.last_type != LAST_NORM) + if (type != LAST_NORM) goto exit1; - nd.flags &= ~LOOKUP_PARENT; - error = mnt_want_write(nd.path.mnt); + error = mnt_want_write(path.mnt); if (error) goto exit1; retry_deleg: - mutex_lock_nested(&nd.path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); - dentry = lookup_hash(&nd); + mutex_lock_nested(&path.dentry->d_inode->i_mutex, I_MUTEX_PARENT); + dentry = __lookup_hash(&last, path.dentry, lookup_flags); error = PTR_ERR(dentry); if (!IS_ERR(dentry)) { /* Why not before? Because we want correct error value */ - if (nd.last.name[nd.last.len]) + if (last.name[last.len]) goto slashes; inode = dentry->d_inode; if (d_is_negative(dentry)) goto slashes; ihold(inode); - error = security_path_unlink(&nd.path, dentry); + error = security_path_unlink(&path, dentry); if (error) goto exit2; - error = vfs_unlink(nd.path.dentry->d_inode, dentry, &delegated_inode); + error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode); exit2: dput(dentry); } - mutex_unlock(&nd.path.dentry->d_inode->i_mutex); + mutex_unlock(&path.dentry->d_inode->i_mutex); if (inode) iput(inode); /* truncate the inode here */ inode = NULL; @@ -3808,9 +3852,9 @@ exit2: if (!error) goto retry_deleg; } - mnt_drop_write(nd.path.mnt); + mnt_drop_write(path.mnt); exit1: - path_put(&nd.path); + path_put(&path); putname(name); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -4240,14 +4284,15 @@ EXPORT_SYMBOL(vfs_rename); SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname, unsigned int, flags) { - struct dentry *old_dir, *new_dir; struct dentry *old_dentry, *new_dentry; struct dentry *trap; - struct nameidata oldnd, newnd; + struct path old_path, new_path; + struct qstr old_last, new_last; + int old_type, new_type; struct inode *delegated_inode = NULL; struct filename *from; struct filename *to; - unsigned int lookup_flags = 0; + unsigned int lookup_flags = 0, target_flags = LOOKUP_RENAME_TARGET; bool should_retry = false; int error; @@ -4261,47 +4306,45 @@ SYSCALL_DEFINE5(renameat2, int, olddfd, const char __user *, oldname, if ((flags & RENAME_WHITEOUT) && !capable(CAP_MKNOD)) return -EPERM; + if (flags & RENAME_EXCHANGE) + target_flags = 0; + retry: - from = user_path_parent(olddfd, oldname, &oldnd, lookup_flags); + from = user_path_parent(olddfd, oldname, + &old_path, &old_last, &old_type, lookup_flags); if (IS_ERR(from)) { error = PTR_ERR(from); goto exit; } - to = user_path_parent(newdfd, newname, &newnd, lookup_flags); + to = user_path_parent(newdfd, newname, + &new_path, &new_last, &new_type, lookup_flags); if (IS_ERR(to)) { error = PTR_ERR(to); goto exit1; } error = -EXDEV; - if (oldnd.path.mnt != newnd.path.mnt) + if (old_path.mnt != new_path.mnt) goto exit2; - old_dir = oldnd.path.dentry; error = -EBUSY; - if (oldnd.last_type != LAST_NORM) + if (old_type != LAST_NORM) goto exit2; - new_dir = newnd.path.dentry; if (flags & RENAME_NOREPLACE) error = -EEXIST; - if (newnd.last_type != LAST_NORM) + if (new_type != LAST_NORM) goto exit2; - error = mnt_want_write(oldnd.path.mnt); + error = mnt_want_write(old_path.mnt); if (error) goto exit2; - oldnd.flags &= ~LOOKUP_PARENT; - newnd.flags &= ~LOOKUP_PARENT; - if (!(flags & RENAME_EXCHANGE)) - newnd.flags |= LOOKUP_RENAME_TARGET; - retry_deleg: - trap = lock_rename(new_dir, old_dir); + trap = lock_rename(new_path.dentry, old_path.dentry); - old_dentry = lookup_hash(&oldnd); + old_dentry = __lookup_hash(&old_last, old_path.dentry, lookup_flags); error = PTR_ERR(old_dentry); if (IS_ERR(old_dentry)) goto exit3; @@ -4309,7 +4352,7 @@ retry_deleg: error = -ENOENT; if (d_is_negative(old_dentry)) goto exit4; - new_dentry = lookup_hash(&newnd); + new_dentry = __lookup_hash(&new_last, new_path.dentry, lookup_flags | target_flags); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto exit4; @@ -4323,16 +4366,16 @@ retry_deleg: if (!d_is_dir(new_dentry)) { error = -ENOTDIR; - if (newnd.last.name[newnd.last.len]) + if (new_last.name[new_last.len]) goto exit5; } } /* unless the source is a directory trailing slashes give -ENOTDIR */ if (!d_is_dir(old_dentry)) { error = -ENOTDIR; - if (oldnd.last.name[oldnd.last.len]) + if (old_last.name[old_last.len]) goto exit5; - if (!(flags & RENAME_EXCHANGE) && newnd.last.name[newnd.last.len]) + if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len]) goto exit5; } /* source should not be ancestor of target */ @@ -4345,32 +4388,32 @@ retry_deleg: if (new_dentry == trap) goto exit5; - error = security_path_rename(&oldnd.path, old_dentry, - &newnd.path, new_dentry, flags); + error = security_path_rename(&old_path, old_dentry, + &new_path, new_dentry, flags); if (error) goto exit5; - error = vfs_rename(old_dir->d_inode, old_dentry, - new_dir->d_inode, new_dentry, + error = vfs_rename(old_path.dentry->d_inode, old_dentry, + new_path.dentry->d_inode, new_dentry, &delegated_inode, flags); exit5: dput(new_dentry); exit4: dput(old_dentry); exit3: - unlock_rename(new_dir, old_dir); + unlock_rename(new_path.dentry, old_path.dentry); if (delegated_inode) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } - mnt_drop_write(oldnd.path.mnt); + mnt_drop_write(old_path.mnt); exit2: if (retry_estale(error, lookup_flags)) should_retry = true; - path_put(&newnd.path); + path_put(&new_path); putname(to); exit1: - path_put(&oldnd.path); + path_put(&old_path); putname(from); if (should_retry) { should_retry = false; @@ -4429,18 +4472,19 @@ EXPORT_SYMBOL(readlink_copy); */ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen) { - struct nameidata nd; void *cookie; + struct inode *inode = d_inode(dentry); + const char *link = inode->i_link; int res; - nd.depth = 0; - cookie = dentry->d_inode->i_op->follow_link(dentry, &nd); - if (IS_ERR(cookie)) - return PTR_ERR(cookie); - - res = readlink_copy(buffer, buflen, nd_get_link(&nd)); - if (dentry->d_inode->i_op->put_link) - dentry->d_inode->i_op->put_link(dentry, &nd, cookie); + if (!link) { + link = inode->i_op->follow_link(dentry, &cookie); + if (IS_ERR(link)) + return PTR_ERR(link); + } + res = readlink_copy(buffer, buflen, link); + if (inode->i_op->put_link) + inode->i_op->put_link(inode, cookie); return res; } EXPORT_SYMBOL(generic_readlink); @@ -4472,22 +4516,21 @@ int page_readlink(struct dentry *dentry, char __user *buffer, int buflen) } EXPORT_SYMBOL(page_readlink); -void *page_follow_link_light(struct dentry *dentry, struct nameidata *nd) +const char *page_follow_link_light(struct dentry *dentry, void **cookie) { struct page *page = NULL; - nd_set_link(nd, page_getlink(dentry, &page)); - return page; + char *res = page_getlink(dentry, &page); + if (!IS_ERR(res)) + *cookie = page; + return res; } EXPORT_SYMBOL(page_follow_link_light); -void page_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) +void page_put_link(struct inode *unused, void *cookie) { struct page *page = cookie; - - if (page) { - kunmap(page); - page_cache_release(page); - } + kunmap(page); + page_cache_release(page); } EXPORT_SYMBOL(page_put_link); diff --git a/fs/namespace.c b/fs/namespace.c index 1f4f9dac6e5a..9c1c43d0d4f1 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -590,24 +590,35 @@ static void delayed_free_vfsmnt(struct rcu_head *head) } /* call under rcu_read_lock */ -bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) +int __legitimize_mnt(struct vfsmount *bastard, unsigned seq) { struct mount *mnt; if (read_seqretry(&mount_lock, seq)) - return false; + return 1; if (bastard == NULL) - return true; + return 0; mnt = real_mount(bastard); mnt_add_count(mnt, 1); if (likely(!read_seqretry(&mount_lock, seq))) - return true; + return 0; if (bastard->mnt_flags & MNT_SYNC_UMOUNT) { mnt_add_count(mnt, -1); - return false; + return 1; + } + return -1; +} + +/* call under rcu_read_lock */ +bool legitimize_mnt(struct vfsmount *bastard, unsigned seq) +{ + int res = __legitimize_mnt(bastard, seq); + if (likely(!res)) + return true; + if (unlikely(res < 0)) { + rcu_read_unlock(); + mntput(bastard); + rcu_read_lock(); } - rcu_read_unlock(); - mntput(bastard); - rcu_read_lock(); return false; } @@ -3179,6 +3190,12 @@ bool fs_fully_visible(struct file_system_type *type) if (mnt->mnt.mnt_sb->s_type != type) continue; + /* This mount is not fully visible if it's root directory + * is not the root directory of the filesystem. + */ + if (mnt->mnt.mnt_root != mnt->mnt.mnt_sb->s_root) + continue; + /* This mount is not fully visible if there are any child mounts * that cover anything except for empty directories. */ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 45b35b9b1e36..55e1e3af23a3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -38,6 +38,7 @@ #include <linux/mm.h> #include <linux/delay.h> #include <linux/errno.h> +#include <linux/file.h> #include <linux/string.h> #include <linux/ratelimit.h> #include <linux/printk.h> @@ -5604,6 +5605,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl, p->server = server; atomic_inc(&lsp->ls_count); p->ctx = get_nfs_open_context(ctx); + get_file(fl->fl_file); memcpy(&p->fl, fl, sizeof(p->fl)); return p; out_free_seqid: @@ -5716,6 +5718,7 @@ static void nfs4_lock_release(void *calldata) nfs_free_seqid(data->arg.lock_seqid); nfs4_put_lock_state(data->lsp); put_nfs_open_context(data->ctx); + fput(data->fl.fl_file); kfree(data); dprintk("%s: done!\n", __func__); } diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 2d56200655fe..b6de433da5db 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -20,7 +20,6 @@ #include <linux/stat.h> #include <linux/mm.h> #include <linux/string.h> -#include <linux/namei.h> /* Symlink caching in the page cache is even more simplistic * and straight-forward than readdir caching. @@ -43,7 +42,7 @@ error: return -EIO; } -static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *nfs_follow_link(struct dentry *dentry, void **cookie) { struct inode *inode = d_inode(dentry); struct page *page; @@ -51,19 +50,13 @@ static void *nfs_follow_link(struct dentry *dentry, struct nameidata *nd) err = ERR_PTR(nfs_revalidate_mapping(inode, inode->i_mapping)); if (err) - goto read_failed; + return err; page = read_cache_page(&inode->i_data, 0, (filler_t *)nfs_symlink_filler, inode); - if (IS_ERR(page)) { - err = page; - goto read_failed; - } - nd_set_link(nd, kmap(page)); - return page; - -read_failed: - nd_set_link(nd, err); - return NULL; + if (IS_ERR(page)) + return ERR_CAST(page); + *cookie = page; + return kmap(page); } /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index d12a4be613a5..dfc19f1575a1 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1845,12 +1845,15 @@ int nfs_wb_all(struct inode *inode) trace_nfs_writeback_inode_enter(inode); ret = filemap_write_and_wait(inode->i_mapping); - if (!ret) { - ret = nfs_commit_inode(inode, FLUSH_SYNC); - if (!ret) - pnfs_sync_inode(inode, true); - } + if (ret) + goto out; + ret = nfs_commit_inode(inode, FLUSH_SYNC); + if (ret < 0) + goto out; + pnfs_sync_inode(inode, true); + ret = 0; +out: trace_nfs_writeback_inode_exit(inode, ret); return ret; } diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c index 03d647bf195d..cdefaa331a07 100644 --- a/fs/nfsd/blocklayout.c +++ b/fs/nfsd/blocklayout.c @@ -181,6 +181,17 @@ nfsd4_block_proc_layoutcommit(struct inode *inode, } const struct nfsd4_layout_ops bl_layout_ops = { + /* + * Pretend that we send notification to the client. This is a blatant + * lie to force recent Linux clients to cache our device IDs. + * We rarely ever change the device ID, so the harm of leaking deviceids + * for a while isn't too bad. Unfortunately RFC5661 is a complete mess + * in this regard, but I filed errata 4119 for this a while ago, and + * hopefully the Linux client will eventually start caching deviceids + * without this again. + */ + .notify_types = + NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo, .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, .proc_layoutget = nfsd4_block_proc_layoutget, diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 58277859a467..5694cfb7a47b 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -224,7 +224,7 @@ static int nfs_cb_stat_to_errno(int status) } static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected, - enum nfsstat4 *status) + int *status) { __be32 *p; u32 op; @@ -235,7 +235,7 @@ static int decode_cb_op_status(struct xdr_stream *xdr, enum nfs_opnum4 expected, op = be32_to_cpup(p++); if (unlikely(op != expected)) goto out_unexpected; - *status = be32_to_cpup(p); + *status = nfs_cb_stat_to_errno(be32_to_cpup(p)); return 0; out_overflow: print_overflow_msg(__func__, xdr); @@ -446,22 +446,16 @@ out_overflow: static int decode_cb_sequence4res(struct xdr_stream *xdr, struct nfsd4_callback *cb) { - enum nfsstat4 nfserr; int status; if (cb->cb_minorversion == 0) return 0; - status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &nfserr); - if (unlikely(status)) - goto out; - if (unlikely(nfserr != NFS4_OK)) - goto out_default; - status = decode_cb_sequence4resok(xdr, cb); -out: - return status; -out_default: - return nfs_cb_stat_to_errno(nfserr); + status = decode_cb_op_status(xdr, OP_CB_SEQUENCE, &cb->cb_status); + if (unlikely(status || cb->cb_status)) + return status; + + return decode_cb_sequence4resok(xdr, cb); } /* @@ -524,26 +518,19 @@ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, struct nfsd4_callback *cb) { struct nfs4_cb_compound_hdr hdr; - enum nfsstat4 nfserr; int status; status = decode_cb_compound4res(xdr, &hdr); if (unlikely(status)) - goto out; + return status; if (cb != NULL) { status = decode_cb_sequence4res(xdr, cb); - if (unlikely(status)) - goto out; + if (unlikely(status || cb->cb_status)) + return status; } - status = decode_cb_op_status(xdr, OP_CB_RECALL, &nfserr); - if (unlikely(status)) - goto out; - if (unlikely(nfserr != NFS4_OK)) - status = nfs_cb_stat_to_errno(nfserr); -out: - return status; + return decode_cb_op_status(xdr, OP_CB_RECALL, &cb->cb_status); } #ifdef CONFIG_NFSD_PNFS @@ -621,24 +608,18 @@ static int nfs4_xdr_dec_cb_layout(struct rpc_rqst *rqstp, struct nfsd4_callback *cb) { struct nfs4_cb_compound_hdr hdr; - enum nfsstat4 nfserr; int status; status = decode_cb_compound4res(xdr, &hdr); if (unlikely(status)) - goto out; + return status; + if (cb) { status = decode_cb_sequence4res(xdr, cb); - if (unlikely(status)) - goto out; + if (unlikely(status || cb->cb_status)) + return status; } - status = decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &nfserr); - if (unlikely(status)) - goto out; - if (unlikely(nfserr != NFS4_OK)) - status = nfs_cb_stat_to_errno(nfserr); -out: - return status; + return decode_cb_op_status(xdr, OP_CB_LAYOUTRECALL, &cb->cb_status); } #endif /* CONFIG_NFSD_PNFS */ @@ -898,13 +879,6 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata) if (!nfsd41_cb_get_slot(clp, task)) return; } - spin_lock(&clp->cl_lock); - if (list_empty(&cb->cb_per_client)) { - /* This is the first call, not a restart */ - cb->cb_done = false; - list_add(&cb->cb_per_client, &clp->cl_callbacks); - } - spin_unlock(&clp->cl_lock); rpc_call_start(task); } @@ -918,22 +892,33 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) if (clp->cl_minorversion) { /* No need for lock, access serialized in nfsd4_cb_prepare */ - ++clp->cl_cb_session->se_cb_seq_nr; + if (!task->tk_status) + ++clp->cl_cb_session->se_cb_seq_nr; clear_bit(0, &clp->cl_cb_slot_busy); rpc_wake_up_next(&clp->cl_cb_waitq); dprintk("%s: freed slot, new seqid=%d\n", __func__, clp->cl_cb_session->se_cb_seq_nr); } - if (clp->cl_cb_client != task->tk_client) { - /* We're shutting down or changing cl_cb_client; leave - * it to nfsd4_process_cb_update to restart the call if - * necessary. */ + /* + * If the backchannel connection was shut down while this + * task was queued, we need to resubmit it after setting up + * a new backchannel connection. + * + * Note that if we lost our callback connection permanently + * the submission code will error out, so we don't need to + * handle that case here. + */ + if (task->tk_flags & RPC_TASK_KILLED) { + task->tk_status = 0; + cb->cb_need_restart = true; return; } - if (cb->cb_done) - return; + if (cb->cb_status) { + WARN_ON_ONCE(task->tk_status); + task->tk_status = cb->cb_status; + } switch (cb->cb_ops->done(cb, task)) { case 0: @@ -949,21 +934,17 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata) default: BUG(); } - cb->cb_done = true; } static void nfsd4_cb_release(void *calldata) { struct nfsd4_callback *cb = calldata; - struct nfs4_client *clp = cb->cb_clp; - - if (cb->cb_done) { - spin_lock(&clp->cl_lock); - list_del(&cb->cb_per_client); - spin_unlock(&clp->cl_lock); + if (cb->cb_need_restart) + nfsd4_run_cb(cb); + else cb->cb_ops->release(cb); - } + } static const struct rpc_call_ops nfsd4_cb_ops = { @@ -1058,9 +1039,6 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb) nfsd4_mark_cb_down(clp, err); return; } - /* Yay, the callback channel's back! Restart any callbacks: */ - list_for_each_entry(cb, &clp->cl_callbacks, cb_per_client) - queue_work(callback_wq, &cb->cb_work); } static void @@ -1071,8 +1049,12 @@ nfsd4_run_cb_work(struct work_struct *work) struct nfs4_client *clp = cb->cb_clp; struct rpc_clnt *clnt; - if (cb->cb_ops && cb->cb_ops->prepare) - cb->cb_ops->prepare(cb); + if (cb->cb_need_restart) { + cb->cb_need_restart = false; + } else { + if (cb->cb_ops && cb->cb_ops->prepare) + cb->cb_ops->prepare(cb); + } if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK) nfsd4_process_cb_update(cb); @@ -1084,6 +1066,15 @@ nfsd4_run_cb_work(struct work_struct *work) cb->cb_ops->release(cb); return; } + + /* + * Don't send probe messages for 4.1 or later. + */ + if (!cb->cb_ops && clp->cl_minorversion) { + clp->cl_cb_state = NFSD4_CB_UP; + return; + } + cb->cb_msg.rpc_cred = clp->cl_cb_cred; rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | RPC_TASK_SOFTCONN, cb->cb_ops ? &nfsd4_cb_ops : &nfsd4_cb_probe_ops, cb); @@ -1098,8 +1089,8 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp, cb->cb_msg.rpc_resp = cb; cb->cb_ops = ops; INIT_WORK(&cb->cb_work, nfsd4_run_cb_work); - INIT_LIST_HEAD(&cb->cb_per_client); - cb->cb_done = true; + cb->cb_status = 0; + cb->cb_need_restart = false; } void nfsd4_run_cb(struct nfsd4_callback *cb) diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 38f2d7abe3a7..039f9c8a95e8 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -94,6 +94,7 @@ static struct kmem_cache *lockowner_slab; static struct kmem_cache *file_slab; static struct kmem_cache *stateid_slab; static struct kmem_cache *deleg_slab; +static struct kmem_cache *odstate_slab; static void free_session(struct nfsd4_session *); @@ -281,6 +282,7 @@ put_nfs4_file(struct nfs4_file *fi) if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) { hlist_del_rcu(&fi->fi_hash); spin_unlock(&state_lock); + WARN_ON_ONCE(!list_empty(&fi->fi_clnt_odstate)); WARN_ON_ONCE(!list_empty(&fi->fi_delegations)); call_rcu(&fi->fi_rcu, nfsd4_free_file_rcu); } @@ -471,6 +473,86 @@ static void nfs4_file_put_access(struct nfs4_file *fp, u32 access) __nfs4_file_put_access(fp, O_RDONLY); } +/* + * Allocate a new open/delegation state counter. This is needed for + * pNFS for proper return on close semantics. + * + * Note that we only allocate it for pNFS-enabled exports, otherwise + * all pointers to struct nfs4_clnt_odstate are always NULL. + */ +static struct nfs4_clnt_odstate * +alloc_clnt_odstate(struct nfs4_client *clp) +{ + struct nfs4_clnt_odstate *co; + + co = kmem_cache_zalloc(odstate_slab, GFP_KERNEL); + if (co) { + co->co_client = clp; + atomic_set(&co->co_odcount, 1); + } + return co; +} + +static void +hash_clnt_odstate_locked(struct nfs4_clnt_odstate *co) +{ + struct nfs4_file *fp = co->co_file; + + lockdep_assert_held(&fp->fi_lock); + list_add(&co->co_perfile, &fp->fi_clnt_odstate); +} + +static inline void +get_clnt_odstate(struct nfs4_clnt_odstate *co) +{ + if (co) + atomic_inc(&co->co_odcount); +} + +static void +put_clnt_odstate(struct nfs4_clnt_odstate *co) +{ + struct nfs4_file *fp; + + if (!co) + return; + + fp = co->co_file; + if (atomic_dec_and_lock(&co->co_odcount, &fp->fi_lock)) { + list_del(&co->co_perfile); + spin_unlock(&fp->fi_lock); + + nfsd4_return_all_file_layouts(co->co_client, fp); + kmem_cache_free(odstate_slab, co); + } +} + +static struct nfs4_clnt_odstate * +find_or_hash_clnt_odstate(struct nfs4_file *fp, struct nfs4_clnt_odstate *new) +{ + struct nfs4_clnt_odstate *co; + struct nfs4_client *cl; + + if (!new) + return NULL; + + cl = new->co_client; + + spin_lock(&fp->fi_lock); + list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) { + if (co->co_client == cl) { + get_clnt_odstate(co); + goto out; + } + } + co = new; + co->co_file = fp; + hash_clnt_odstate_locked(new); +out: + spin_unlock(&fp->fi_lock); + return co; +} + struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab) { @@ -606,7 +688,8 @@ static void block_delegations(struct knfsd_fh *fh) } static struct nfs4_delegation * -alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) +alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh, + struct nfs4_clnt_odstate *odstate) { struct nfs4_delegation *dp; long n; @@ -631,6 +714,8 @@ alloc_init_deleg(struct nfs4_client *clp, struct svc_fh *current_fh) INIT_LIST_HEAD(&dp->dl_perfile); INIT_LIST_HEAD(&dp->dl_perclnt); INIT_LIST_HEAD(&dp->dl_recall_lru); + dp->dl_clnt_odstate = odstate; + get_clnt_odstate(odstate); dp->dl_type = NFS4_OPEN_DELEGATE_READ; dp->dl_retries = 1; nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client, @@ -714,6 +799,7 @@ static void destroy_delegation(struct nfs4_delegation *dp) spin_lock(&state_lock); unhash_delegation_locked(dp); spin_unlock(&state_lock); + put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_deleg_lease(dp->dl_stid.sc_file); nfs4_put_stid(&dp->dl_stid); } @@ -724,6 +810,7 @@ static void revoke_delegation(struct nfs4_delegation *dp) WARN_ON(!list_empty(&dp->dl_recall_lru)); + put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_deleg_lease(dp->dl_stid.sc_file); if (clp->cl_minorversion == 0) @@ -933,6 +1020,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid) { struct nfs4_ol_stateid *stp = openlockstateid(stid); + put_clnt_odstate(stp->st_clnt_odstate); release_all_access(stp); if (stp->st_stateowner) nfs4_put_stateowner(stp->st_stateowner); @@ -1538,7 +1626,6 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) INIT_LIST_HEAD(&clp->cl_openowners); INIT_LIST_HEAD(&clp->cl_delegations); INIT_LIST_HEAD(&clp->cl_lru); - INIT_LIST_HEAD(&clp->cl_callbacks); INIT_LIST_HEAD(&clp->cl_revoked); #ifdef CONFIG_NFSD_PNFS INIT_LIST_HEAD(&clp->cl_lo_states); @@ -1634,6 +1721,7 @@ __destroy_client(struct nfs4_client *clp) while (!list_empty(&reaplist)) { dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); + put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_deleg_lease(dp->dl_stid.sc_file); nfs4_put_stid(&dp->dl_stid); } @@ -3057,6 +3145,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, spin_lock_init(&fp->fi_lock); INIT_LIST_HEAD(&fp->fi_stateids); INIT_LIST_HEAD(&fp->fi_delegations); + INIT_LIST_HEAD(&fp->fi_clnt_odstate); fh_copy_shallow(&fp->fi_fhandle, fh); fp->fi_deleg_file = NULL; fp->fi_had_conflict = false; @@ -3073,6 +3162,7 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval, void nfsd4_free_slabs(void) { + kmem_cache_destroy(odstate_slab); kmem_cache_destroy(openowner_slab); kmem_cache_destroy(lockowner_slab); kmem_cache_destroy(file_slab); @@ -3103,8 +3193,14 @@ nfsd4_init_slabs(void) sizeof(struct nfs4_delegation), 0, 0, NULL); if (deleg_slab == NULL) goto out_free_stateid_slab; + odstate_slab = kmem_cache_create("nfsd4_odstate", + sizeof(struct nfs4_clnt_odstate), 0, 0, NULL); + if (odstate_slab == NULL) + goto out_free_deleg_slab; return 0; +out_free_deleg_slab: + kmem_cache_destroy(deleg_slab); out_free_stateid_slab: kmem_cache_destroy(stateid_slab); out_free_file_slab: @@ -3581,6 +3677,14 @@ alloc_stateid: open->op_stp = nfs4_alloc_open_stateid(clp); if (!open->op_stp) return nfserr_jukebox; + + if (nfsd4_has_session(cstate) && + (cstate->current_fh.fh_export->ex_flags & NFSEXP_PNFS)) { + open->op_odstate = alloc_clnt_odstate(clp); + if (!open->op_odstate) + return nfserr_jukebox; + } + return nfs_ok; } @@ -3869,7 +3973,7 @@ out_fput: static struct nfs4_delegation * nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, - struct nfs4_file *fp) + struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate) { int status; struct nfs4_delegation *dp; @@ -3877,7 +3981,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, if (fp->fi_had_conflict) return ERR_PTR(-EAGAIN); - dp = alloc_init_deleg(clp, fh); + dp = alloc_init_deleg(clp, fh, odstate); if (!dp) return ERR_PTR(-ENOMEM); @@ -3903,6 +4007,7 @@ out_unlock: spin_unlock(&state_lock); out: if (status) { + put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_stid(&dp->dl_stid); return ERR_PTR(status); } @@ -3980,7 +4085,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, default: goto out_no_deleg; } - dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file); + dp = nfs4_set_delegation(clp, fh, stp->st_stid.sc_file, stp->st_clnt_odstate); if (IS_ERR(dp)) goto out_no_deleg; @@ -4069,6 +4174,11 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf release_open_stateid(stp); goto out; } + + stp->st_clnt_odstate = find_or_hash_clnt_odstate(fp, + open->op_odstate); + if (stp->st_clnt_odstate == open->op_odstate) + open->op_odstate = NULL; } update_stateid(&stp->st_stid.sc_stateid); memcpy(&open->op_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); @@ -4129,6 +4239,8 @@ void nfsd4_cleanup_open_state(struct nfsd4_compound_state *cstate, kmem_cache_free(file_slab, open->op_file); if (open->op_stp) nfs4_put_stid(&open->op_stp->st_stid); + if (open->op_odstate) + kmem_cache_free(odstate_slab, open->op_odstate); } __be32 @@ -4385,10 +4497,17 @@ static __be32 check_stateid_generation(stateid_t *in, stateid_t *ref, bool has_s return nfserr_old_stateid; } +static __be32 nfsd4_check_openowner_confirmed(struct nfs4_ol_stateid *ols) +{ + if (ols->st_stateowner->so_is_open_owner && + !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) + return nfserr_bad_stateid; + return nfs_ok; +} + static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) { struct nfs4_stid *s; - struct nfs4_ol_stateid *ols; __be32 status = nfserr_bad_stateid; if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) @@ -4418,13 +4537,7 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) break; case NFS4_OPEN_STID: case NFS4_LOCK_STID: - ols = openlockstateid(s); - if (ols->st_stateowner->so_is_open_owner - && !(openowner(ols->st_stateowner)->oo_flags - & NFS4_OO_CONFIRMED)) - status = nfserr_bad_stateid; - else - status = nfs_ok; + status = nfsd4_check_openowner_confirmed(openlockstateid(s)); break; default: printk("unknown stateid type %x\n", s->sc_type); @@ -4516,8 +4629,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate, status = nfs4_check_fh(current_fh, stp); if (status) goto out; - if (stp->st_stateowner->so_is_open_owner - && !(openowner(stp->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED)) + status = nfsd4_check_openowner_confirmed(stp); + if (status) goto out; status = nfs4_check_openmode(stp, flags); if (status) @@ -4852,9 +4965,6 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, update_stateid(&stp->st_stid.sc_stateid); memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); - nfsd4_return_all_file_layouts(stp->st_stateowner->so_client, - stp->st_stid.sc_file); - nfsd4_close_open_stateid(stp); /* put reference from nfs4_preprocess_seqid_op */ @@ -6488,6 +6598,7 @@ nfs4_state_shutdown_net(struct net *net) list_for_each_safe(pos, next, &reaplist) { dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru); list_del_init(&dp->dl_recall_lru); + put_clnt_odstate(dp->dl_clnt_odstate); nfs4_put_deleg_lease(dp->dl_stid.sc_file); nfs4_put_stid(&dp->dl_stid); } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 4f3bfeb11766..dbc4f85a5008 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -63,12 +63,12 @@ typedef struct { struct nfsd4_callback { struct nfs4_client *cb_clp; - struct list_head cb_per_client; u32 cb_minorversion; struct rpc_message cb_msg; struct nfsd4_callback_ops *cb_ops; struct work_struct cb_work; - bool cb_done; + int cb_status; + bool cb_need_restart; }; struct nfsd4_callback_ops { @@ -126,6 +126,7 @@ struct nfs4_delegation { struct list_head dl_perfile; struct list_head dl_perclnt; struct list_head dl_recall_lru; /* delegation recalled */ + struct nfs4_clnt_odstate *dl_clnt_odstate; u32 dl_type; time_t dl_time; /* For recall: */ @@ -332,7 +333,6 @@ struct nfs4_client { int cl_cb_state; struct nfsd4_callback cl_cb_null; struct nfsd4_session *cl_cb_session; - struct list_head cl_callbacks; /* list of in-progress callbacks */ /* for all client information that callback code might need: */ spinlock_t cl_lock; @@ -465,6 +465,17 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so) } /* + * Per-client state indicating no. of opens and outstanding delegations + * on a file from a particular client.'od' stands for 'open & delegation' + */ +struct nfs4_clnt_odstate { + struct nfs4_client *co_client; + struct nfs4_file *co_file; + struct list_head co_perfile; + atomic_t co_odcount; +}; + +/* * nfs4_file: a file opened by some number of (open) nfs4_stateowners. * * These objects are global. nfsd keeps one instance of a nfs4_file per @@ -485,6 +496,7 @@ struct nfs4_file { struct list_head fi_delegations; struct rcu_head fi_rcu; }; + struct list_head fi_clnt_odstate; /* One each for O_RDONLY, O_WRONLY, O_RDWR: */ struct file * fi_fds[3]; /* @@ -526,6 +538,7 @@ struct nfs4_ol_stateid { struct list_head st_perstateowner; struct list_head st_locks; struct nfs4_stateowner * st_stateowner; + struct nfs4_clnt_odstate * st_clnt_odstate; unsigned char st_access_bmap; unsigned char st_deny_bmap; struct nfs4_ol_stateid * st_openstp; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index f982ae84f0cd..2f8c092be2b3 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -247,6 +247,7 @@ struct nfsd4_open { struct nfs4_openowner *op_openowner; /* used during processing */ struct nfs4_file *op_file; /* used during processing */ struct nfs4_ol_stateid *op_stp; /* used during processing */ + struct nfs4_clnt_odstate *op_odstate; /* used during processing */ struct nfs4_acl *op_acl; struct xdr_netobj op_label; }; diff --git a/fs/nilfs2/btree.c b/fs/nilfs2/btree.c index 059f37137f9a..919fd5bb14a8 100644 --- a/fs/nilfs2/btree.c +++ b/fs/nilfs2/btree.c @@ -388,7 +388,7 @@ static int nilfs_btree_root_broken(const struct nilfs_btree_node *node, nchildren = nilfs_btree_node_get_nchildren(node); if (unlikely(level < NILFS_BTREE_LEVEL_NODE_MIN || - level > NILFS_BTREE_LEVEL_MAX || + level >= NILFS_BTREE_LEVEL_MAX || nchildren < 0 || nchildren > NILFS_BTREE_ROOT_NCHILDREN_MAX)) { pr_crit("NILFS: bad btree root (inode number=%lu): level = %d, flags = 0x%x, nchildren = %d\n", diff --git a/fs/ntfs/namei.c b/fs/ntfs/namei.c index 0f35b80d17fe..443abecf01b7 100644 --- a/fs/ntfs/namei.c +++ b/fs/ntfs/namei.c @@ -35,7 +35,7 @@ * ntfs_lookup - find the inode represented by a dentry in a directory inode * @dir_ino: directory inode in which to look for the inode * @dent: dentry representing the inode to look for - * @nd: lookup nameidata + * @flags: lookup flags * * In short, ntfs_lookup() looks for the inode represented by the dentry @dent * in the directory inode @dir_ino and if found attaches the inode to the diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index a6944b25fd5b..fdf4b41d0609 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -757,6 +757,19 @@ lookup: if (tmpres) { spin_unlock(&dlm->spinlock); spin_lock(&tmpres->spinlock); + + /* + * Right after dlm spinlock was released, dlm_thread could have + * purged the lockres. Check if lockres got unhashed. If so + * start over. + */ + if (hlist_unhashed(&tmpres->hash_node)) { + spin_unlock(&tmpres->spinlock); + dlm_lockres_put(tmpres); + tmpres = NULL; + goto lookup; + } + /* Wait on the thread that is mastering the resource */ if (tmpres->owner == DLM_LOCK_RES_OWNER_UNKNOWN) { __dlm_wait_on_lockres(tmpres); diff --git a/fs/omfs/bitmap.c b/fs/omfs/bitmap.c index 082234581d05..83f4e76511c2 100644 --- a/fs/omfs/bitmap.c +++ b/fs/omfs/bitmap.c @@ -159,7 +159,7 @@ int omfs_allocate_range(struct super_block *sb, goto out; found: - *return_block = i * bits_per_entry + bit; + *return_block = (u64) i * bits_per_entry + bit; *return_size = run; ret = set_run(sb, i, bits_per_entry, bit, run, 1); diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index 138321b0c6c2..3d935c81789a 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -306,7 +306,8 @@ static const struct super_operations omfs_sops = { */ static int omfs_get_imap(struct super_block *sb) { - unsigned int bitmap_size, count, array_size; + unsigned int bitmap_size, array_size; + int count; struct omfs_sb_info *sbi = OMFS_SB(sb); struct buffer_head *bh; unsigned long **ptr; @@ -359,7 +360,7 @@ nomem: } enum { - Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask + Opt_uid, Opt_gid, Opt_umask, Opt_dmask, Opt_fmask, Opt_err }; static const match_table_t tokens = { @@ -368,6 +369,7 @@ static const match_table_t tokens = { {Opt_umask, "umask=%o"}, {Opt_dmask, "dmask=%o"}, {Opt_fmask, "fmask=%o"}, + {Opt_err, NULL}, }; static int parse_options(char *options, struct omfs_sb_info *sbi) @@ -548,8 +550,10 @@ static int omfs_fill_super(struct super_block *sb, void *data, int silent) } sb->s_root = d_make_root(root); - if (!sb->s_root) + if (!sb->s_root) { + ret = -ENOMEM; goto out_brelse_bh2; + } printk(KERN_DEBUG "omfs: Mounted volume %s\n", omfs_rb->r_name); ret = 0; diff --git a/fs/open.c b/fs/open.c index 98e5a52dc68c..e0250bdcc440 100644 --- a/fs/open.c +++ b/fs/open.c @@ -367,7 +367,7 @@ retry: if (res) goto out; - inode = path.dentry->d_inode; + inode = d_backing_inode(path.dentry); if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) { /* diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 24f640441bd9..84d693d37428 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -299,6 +299,9 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, struct cred *override_cred; char *link = NULL; + if (WARN_ON(!workdir)) + return -EROFS; + ovl_path_upper(parent, &parentpath); upperdir = parentpath.dentry; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index d139405d2bfa..692ceda3bc21 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -222,6 +222,9 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, struct kstat stat; int err; + if (WARN_ON(!workdir)) + return ERR_PTR(-EROFS); + err = ovl_lock_rename_workdir(workdir, upperdir); if (err) goto out; @@ -322,6 +325,9 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode, struct dentry *newdentry; int err; + if (WARN_ON(!workdir)) + return -EROFS; + err = ovl_lock_rename_workdir(workdir, upperdir); if (err) goto out; @@ -506,11 +512,28 @@ static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir) struct dentry *opaquedir = NULL; int err; - if (is_dir && OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) { - opaquedir = ovl_check_empty_and_clear(dentry); - err = PTR_ERR(opaquedir); - if (IS_ERR(opaquedir)) - goto out; + if (WARN_ON(!workdir)) + return -EROFS; + + if (is_dir) { + if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) { + opaquedir = ovl_check_empty_and_clear(dentry); + err = PTR_ERR(opaquedir); + if (IS_ERR(opaquedir)) + goto out; + } else { + LIST_HEAD(list); + + /* + * When removing an empty opaque directory, then it + * makes no sense to replace it with an exact replica of + * itself. But emptiness still needs to be checked. + */ + err = ovl_check_empty_dir(dentry, &list); + ovl_cache_free(&list); + if (err) + goto out; + } } err = ovl_lock_rename_workdir(workdir, upperdir); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 04f124884687..308379b2d0b2 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -140,11 +140,12 @@ struct ovl_link_data { void *cookie; }; -static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *ovl_follow_link(struct dentry *dentry, void **cookie) { - void *ret; struct dentry *realdentry; struct inode *realinode; + struct ovl_link_data *data = NULL; + const char *ret; realdentry = ovl_dentry_real(dentry); realinode = realdentry->d_inode; @@ -152,28 +153,28 @@ static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd) if (WARN_ON(!realinode->i_op->follow_link)) return ERR_PTR(-EPERM); - ret = realinode->i_op->follow_link(realdentry, nd); - if (IS_ERR(ret)) - return ret; - if (realinode->i_op->put_link) { - struct ovl_link_data *data; - data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL); - if (!data) { - realinode->i_op->put_link(realdentry, nd, ret); + if (!data) return ERR_PTR(-ENOMEM); - } data->realdentry = realdentry; - data->cookie = ret; + } - return data; - } else { - return NULL; + ret = realinode->i_op->follow_link(realdentry, cookie); + if (IS_ERR_OR_NULL(ret)) { + kfree(data); + return ret; } + + if (data) + data->cookie = *cookie; + + *cookie = data; + + return ret; } -static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) +static void ovl_put_link(struct inode *unused, void *c) { struct inode *realinode; struct ovl_link_data *data = c; @@ -182,7 +183,7 @@ static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c) return; realinode = data->realdentry->d_inode; - realinode->i_op->put_link(data->realdentry, nd, data->cookie); + realinode->i_op->put_link(realinode, data->cookie); kfree(data); } diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 5f0d1993e6e3..bf8537c7f455 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -529,7 +529,7 @@ static int ovl_remount(struct super_block *sb, int *flags, char *data) { struct ovl_fs *ufs = sb->s_fs_info; - if (!(*flags & MS_RDONLY) && !ufs->upper_mnt) + if (!(*flags & MS_RDONLY) && (!ufs->upper_mnt || !ufs->workdir)) return -EROFS; return 0; @@ -925,9 +925,10 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) ufs->workdir = ovl_workdir_create(ufs->upper_mnt, workpath.dentry); err = PTR_ERR(ufs->workdir); if (IS_ERR(ufs->workdir)) { - pr_err("overlayfs: failed to create directory %s/%s\n", - ufs->config.workdir, OVL_WORKDIR_NAME); - goto out_put_upper_mnt; + pr_warn("overlayfs: failed to create directory %s/%s (errno: %i); mounting read-only\n", + ufs->config.workdir, OVL_WORKDIR_NAME, -err); + sb->s_flags |= MS_RDONLY; + ufs->workdir = NULL; } } @@ -997,7 +998,6 @@ out_put_lower_mnt: kfree(ufs->lower_mnt); out_put_workdir: dput(ufs->workdir); -out_put_upper_mnt: mntput(ufs->upper_mnt); out_put_lowerpath: for (i = 0; i < numlower; i++) diff --git a/fs/proc/base.c b/fs/proc/base.c index 093ca14f5701..286a422f440e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1380,7 +1380,7 @@ static int proc_exe_link(struct dentry *dentry, struct path *exe_path) return -ENOENT; } -static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *proc_pid_follow_link(struct dentry *dentry, void **cookie) { struct inode *inode = d_inode(dentry); struct path path; @@ -1394,7 +1394,7 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd) if (error) goto out; - nd_jump_link(nd, &path); + nd_jump_link(&path); return NULL; out: return ERR_PTR(error); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8272aaba1bb0..afe232b9df6e 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -23,7 +23,6 @@ #include <linux/slab.h> #include <linux/mount.h> #include <linux/magic.h> -#include <linux/namei.h> #include <asm/uaccess.h> @@ -394,16 +393,16 @@ static const struct file_operations proc_reg_file_ops_no_compat = { }; #endif -static void *proc_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *proc_follow_link(struct dentry *dentry, void **cookie) { struct proc_dir_entry *pde = PDE(d_inode(dentry)); if (unlikely(!use_pde(pde))) return ERR_PTR(-EINVAL); - nd_set_link(nd, pde->data); - return pde; + *cookie = pde; + return pde->data; } -static void proc_put_link(struct dentry *dentry, struct nameidata *nd, void *p) +static void proc_put_link(struct inode *unused, void *p) { unuse_pde(p); } diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index e512642dbbdc..f6e8354b8cea 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -30,7 +30,7 @@ static const struct proc_ns_operations *ns_entries[] = { &mntns_operations, }; -static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *proc_ns_follow_link(struct dentry *dentry, void **cookie) { struct inode *inode = d_inode(dentry); const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; @@ -45,7 +45,7 @@ static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) if (ptrace_may_access(task, PTRACE_MODE_READ)) { error = ns_get_path(&ns_path, task, ns_ops); if (!error) - nd_jump_link(nd, &ns_path); + nd_jump_link(&ns_path); } put_task_struct(task); return error; diff --git a/fs/proc/self.c b/fs/proc/self.c index 6195b4a7c3b1..113b8d061fc0 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -1,5 +1,4 @@ #include <linux/sched.h> -#include <linux/namei.h> #include <linux/slab.h> #include <linux/pid_namespace.h> #include "internal.h" @@ -19,21 +18,20 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, return readlink_copy(buffer, buflen, tmp); } -static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *proc_self_follow_link(struct dentry *dentry, void **cookie) { struct pid_namespace *ns = dentry->d_sb->s_fs_info; pid_t tgid = task_tgid_nr_ns(current, ns); - char *name = ERR_PTR(-ENOENT); - if (tgid) { - /* 11 for max length of signed int in decimal + NULL term */ - name = kmalloc(12, GFP_KERNEL); - if (!name) - name = ERR_PTR(-ENOMEM); - else - sprintf(name, "%d", tgid); - } - nd_set_link(nd, name); - return NULL; + char *name; + + if (!tgid) + return ERR_PTR(-ENOENT); + /* 11 for max length of signed int in decimal + NULL term */ + name = kmalloc(12, GFP_KERNEL); + if (!name) + return ERR_PTR(-ENOMEM); + sprintf(name, "%d", tgid); + return *cookie = name; } static const struct inode_operations proc_self_inode_operations = { diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index a8371993b4fb..947b0f4fd0a1 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -1,5 +1,4 @@ #include <linux/sched.h> -#include <linux/namei.h> #include <linux/slab.h> #include <linux/pid_namespace.h> #include "internal.h" @@ -20,21 +19,20 @@ static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer, return readlink_copy(buffer, buflen, tmp); } -static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd) +static const char *proc_thread_self_follow_link(struct dentry *dentry, void **cookie) { struct pid_namespace *ns = dentry->d_sb->s_fs_info; pid_t tgid = task_tgid_nr_ns(current, ns); pid_t pid = task_pid_nr_ns(current, ns); - char *name = ERR_PTR(-ENOENT); - if (pid) { - name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL); - if (!name) - name = ERR_PTR(-ENOMEM); - else - sprintf(name, "%d/task/%d", tgid, pid); - } - nd_set_link(nd, name); - return NULL; + char *name; + + if (!pid) + return ERR_PTR(-ENOENT); + name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL); + if (!name) + return ERR_PTR(-ENOMEM); + sprintf(name, "%d/task/%d", tgid, pid); + return *cookie = name; } static const struct inode_operations proc_thread_self_inode_operations = { diff --git a/fs/select.c b/fs/select.c index f684c750e08a..015547330e88 100644 --- a/fs/select.c +++ b/fs/select.c @@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) * doesn't imply write barrier and the users expect write * barrier semantics on wakeup functions. The following * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up() - * and is paired with set_mb() in poll_schedule_timeout. + * and is paired with smp_store_mb() in poll_schedule_timeout. */ smp_wmb(); pwq->triggered = 1; @@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, /* * Prepare for the next iteration. * - * The following set_mb() serves two purposes. First, it's + * The following smp_store_mb() serves two purposes. First, it's * the counterpart rmb of the wmb in pollwake() such that data * written before wake up is always visible after wake up. * Second, the full barrier guarantees that triggered clearing @@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state, * this problem doesn't exist for the first iteration as * add_wait_queue() has full barrier semantics. */ - set_mb(pwq->triggered, 0); + smp_store_mb(pwq->triggered, 0); return rc; } diff --git a/fs/splice.c b/fs/splice.c index 476024bb6546..bfe62ae40f40 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1161,7 +1161,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, long ret, bytes; umode_t i_mode; size_t len; - int i, flags; + int i, flags, more; /* * We require the input being a regular file, as we don't want to @@ -1204,6 +1204,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, * Don't block on output, we have to drain the direct pipe. */ sd->flags &= ~SPLICE_F_NONBLOCK; + more = sd->flags & SPLICE_F_MORE; while (len) { size_t read_len; @@ -1217,6 +1218,15 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, sd->total_len = read_len; /* + * If more data is pending, set SPLICE_F_MORE + * If this is the last data and SPLICE_F_MORE was not set + * initially, clears it. + */ + if (read_len < len) + sd->flags |= SPLICE_F_MORE; + else if (!more) + sd->flags &= ~SPLICE_F_MORE; + /* * NOTE: nonblocking mode only applies to the input. We * must not do the output in nonblocking mode as then we * could get stuck data in the internal pipe: diff --git a/fs/sysv/Makefile b/fs/sysv/Makefile index 3591f9d7a48a..7a75e70a4b61 100644 --- a/fs/sysv/Makefile +++ b/fs/sysv/Makefile @@ -5,4 +5,4 @@ obj-$(CONFIG_SYSV_FS) += sysv.o sysv-objs := ialloc.o balloc.o inode.o itree.o file.o dir.o \ - namei.o super.o symlink.o + namei.o super.o diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 88956309cc86..590ad9206e3f 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -166,8 +166,9 @@ void sysv_set_inode(struct inode *inode, dev_t rdev) inode->i_op = &sysv_symlink_inode_operations; inode->i_mapping->a_ops = &sysv_aops; } else { - inode->i_op = &sysv_fast_symlink_inode_operations; - nd_terminate_link(SYSV_I(inode)->i_data, inode->i_size, + inode->i_op = &simple_symlink_inode_operations; + inode->i_link = (char *)SYSV_I(inode)->i_data; + nd_terminate_link(inode->i_link, inode->i_size, sizeof(SYSV_I(inode)->i_data) - 1); } } else diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c deleted file mode 100644 index d3fa0d703314..000000000000 --- a/fs/sysv/symlink.c +++ /dev/null @@ -1,20 +0,0 @@ -/* - * linux/fs/sysv/symlink.c - * - * Handling of System V filesystem fast symlinks extensions. - * Aug 2001, Christoph Hellwig (hch@infradead.org) - */ - -#include "sysv.h" -#include <linux/namei.h> - -static void *sysv_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - nd_set_link(nd, (char *)SYSV_I(d_inode(dentry))->i_data); - return NULL; -} - -const struct inode_operations sysv_fast_symlink_inode_operations = { - .readlink = generic_readlink, - .follow_link = sysv_follow_link, -}; diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 69d488986cce..2c13525131cd 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -161,7 +161,6 @@ extern ino_t sysv_inode_by_name(struct dentry *); extern const struct inode_operations sysv_file_inode_operations; extern const struct inode_operations sysv_dir_inode_operations; -extern const struct inode_operations sysv_fast_symlink_inode_operations; extern const struct file_operations sysv_file_operations; extern const struct file_operations sysv_dir_operations; extern const struct address_space_operations sysv_aops; diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 27060fc855d4..5c27c66c224a 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -889,6 +889,7 @@ static int ubifs_symlink(struct inode *dir, struct dentry *dentry, memcpy(ui->data, symname, len); ((char *)ui->data)[len] = '\0'; + inode->i_link = ui->data; /* * The terminating zero byte is not written to the flash media and it * is put just to make later in-memory string processing simpler. Thus, diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 35efc103c39c..a3dfe2ae79f2 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -51,7 +51,6 @@ #include "ubifs.h" #include <linux/mount.h> -#include <linux/namei.h> #include <linux/slab.h> static int read_block(struct inode *inode, void *addr, unsigned int block, @@ -1300,14 +1299,6 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset, ClearPageChecked(page); } -static void *ubifs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct ubifs_inode *ui = ubifs_inode(d_inode(dentry)); - - nd_set_link(nd, ui->data); - return NULL; -} - int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; @@ -1570,7 +1561,7 @@ const struct inode_operations ubifs_file_inode_operations = { const struct inode_operations ubifs_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = ubifs_follow_link, + .follow_link = simple_follow_link, .setattr = ubifs_setattr, .getattr = ubifs_getattr, .setxattr = ubifs_setxattr, diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index 75e6f04bb795..20f5dbd7c6a8 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -195,6 +195,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) } memcpy(ui->data, ino->data, ui->data_len); ((char *)ui->data)[ui->data_len] = '\0'; + inode->i_link = ui->data; break; case S_IFBLK: case S_IFCHR: diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index be7d42c7d938..99aaf5c9bf4d 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -572,9 +572,10 @@ static void ufs_set_inode_ops(struct inode *inode) inode->i_fop = &ufs_dir_operations; inode->i_mapping->a_ops = &ufs_aops; } else if (S_ISLNK(inode->i_mode)) { - if (!inode->i_blocks) + if (!inode->i_blocks) { inode->i_op = &ufs_fast_symlink_inode_operations; - else { + inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink; + } else { inode->i_op = &ufs_symlink_inode_operations; inode->i_mapping->a_ops = &ufs_aops; } diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index e491a93a7e9a..f773deb1d2e3 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -144,7 +144,8 @@ static int ufs_symlink (struct inode * dir, struct dentry * dentry, } else { /* fast symlink */ inode->i_op = &ufs_fast_symlink_inode_operations; - memcpy(UFS_I(inode)->i_u1.i_symlink, symname, l); + inode->i_link = (char *)UFS_I(inode)->i_u1.i_symlink; + memcpy(inode->i_link, symname, l); inode->i_size = l-1; } mark_inode_dirty(inode); diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c index 5b537e2fdda3..874480bb43e9 100644 --- a/fs/ufs/symlink.c +++ b/fs/ufs/symlink.c @@ -25,23 +25,12 @@ * ext2 symlink handling code */ -#include <linux/fs.h> -#include <linux/namei.h> - #include "ufs_fs.h" #include "ufs.h" - -static void *ufs_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct ufs_inode_info *p = UFS_I(d_inode(dentry)); - nd_set_link(nd, (char*)p->i_u1.i_symlink); - return NULL; -} - const struct inode_operations ufs_fast_symlink_inode_operations = { .readlink = generic_readlink, - .follow_link = ufs_follow_link, + .follow_link = simple_follow_link, .setattr = ufs_setattr, }; diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c index 04e79d57bca6..e9d401ce93bb 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.c +++ b/fs/xfs/libxfs/xfs_attr_leaf.c @@ -574,8 +574,8 @@ xfs_attr_shortform_add(xfs_da_args_t *args, int forkoff) * After the last attribute is removed revert to original inode format, * making all literal area available to the data fork once more. */ -STATIC void -xfs_attr_fork_reset( +void +xfs_attr_fork_remove( struct xfs_inode *ip, struct xfs_trans *tp) { @@ -641,7 +641,7 @@ xfs_attr_shortform_remove(xfs_da_args_t *args) (mp->m_flags & XFS_MOUNT_ATTR2) && (dp->i_d.di_format != XFS_DINODE_FMT_BTREE) && !(args->op_flags & XFS_DA_OP_ADDNAME)) { - xfs_attr_fork_reset(dp, args->trans); + xfs_attr_fork_remove(dp, args->trans); } else { xfs_idata_realloc(dp, -size, XFS_ATTR_FORK); dp->i_d.di_forkoff = xfs_attr_shortform_bytesfit(dp, totsize); @@ -905,7 +905,7 @@ xfs_attr3_leaf_to_shortform( if (forkoff == -1) { ASSERT(dp->i_mount->m_flags & XFS_MOUNT_ATTR2); ASSERT(dp->i_d.di_format != XFS_DINODE_FMT_BTREE); - xfs_attr_fork_reset(dp, args->trans); + xfs_attr_fork_remove(dp, args->trans); goto out; } diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h index 025c4b820c03..882c8d338891 100644 --- a/fs/xfs/libxfs/xfs_attr_leaf.h +++ b/fs/xfs/libxfs/xfs_attr_leaf.h @@ -53,7 +53,7 @@ int xfs_attr_shortform_remove(struct xfs_da_args *args); int xfs_attr_shortform_list(struct xfs_attr_list_context *context); int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); int xfs_attr_shortform_bytesfit(xfs_inode_t *dp, int bytes); - +void xfs_attr_fork_remove(struct xfs_inode *ip, struct xfs_trans *tp); /* * Internal routines when attribute fork size == XFS_LBSIZE(mp). diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index aeffeaaac0ec..f1026e86dabc 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3224,12 +3224,24 @@ xfs_bmap_extsize_align( align_alen += temp; align_off -= temp; } + + /* Same adjustment for the end of the requested area. */ + temp = (align_alen % extsz); + if (temp) + align_alen += extsz - temp; + /* - * Same adjustment for the end of the requested area. + * For large extent hint sizes, the aligned extent might be larger than + * MAXEXTLEN. In that case, reduce the size by an extsz so that it pulls + * the length back under MAXEXTLEN. The outer allocation loops handle + * short allocation just fine, so it is safe to do this. We only want to + * do it when we are forced to, though, because it means more allocation + * operations are required. */ - if ((temp = (align_alen % extsz))) { - align_alen += extsz - temp; - } + while (align_alen > MAXEXTLEN) + align_alen -= extsz; + ASSERT(align_alen <= MAXEXTLEN); + /* * If the previous block overlaps with this proposed allocation * then move the start forward without adjusting the length. @@ -3318,7 +3330,9 @@ xfs_bmap_extsize_align( return -EINVAL; } else { ASSERT(orig_off >= align_off); - ASSERT(orig_end <= align_off + align_alen); + /* see MAXEXTLEN handling above */ + ASSERT(orig_end <= align_off + align_alen || + align_alen + extsz > MAXEXTLEN); } #ifdef DEBUG @@ -4099,13 +4113,6 @@ xfs_bmapi_reserve_delalloc( /* Figure out the extent size, adjust alen */ extsz = xfs_get_extsz_hint(ip); if (extsz) { - /* - * Make sure we don't exceed a single extent length when we - * align the extent by reducing length we are going to - * allocate by the maximum amount extent size aligment may - * require. - */ - alen = XFS_FILBLKS_MIN(len, MAXEXTLEN - (2 * extsz - 1)); error = xfs_bmap_extsize_align(mp, got, prev, extsz, rt, eof, 1, 0, &aoff, &alen); ASSERT(!error); diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 07349a183a11..1c9e75521250 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -376,7 +376,7 @@ xfs_ialloc_ag_alloc( */ newlen = args.mp->m_ialloc_inos; if (args.mp->m_maxicount && - percpu_counter_read(&args.mp->m_icount) + newlen > + percpu_counter_read_positive(&args.mp->m_icount) + newlen > args.mp->m_maxicount) return -ENOSPC; args.minlen = args.maxlen = args.mp->m_ialloc_blks; @@ -1339,10 +1339,13 @@ xfs_dialloc( * If we have already hit the ceiling of inode blocks then clear * okalloc so we scan all available agi structures for a free * inode. + * + * Read rough value of mp->m_icount by percpu_counter_read_positive, + * which will sacrifice the preciseness but improve the performance. */ if (mp->m_maxicount && - percpu_counter_read(&mp->m_icount) + mp->m_ialloc_inos > - mp->m_maxicount) { + percpu_counter_read_positive(&mp->m_icount) + mp->m_ialloc_inos + > mp->m_maxicount) { noroom = 1; okalloc = 0; } diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index f9c1c64782d3..3fbf167cfb4c 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -380,23 +380,31 @@ xfs_attr3_root_inactive( return error; } +/* + * xfs_attr_inactive kills all traces of an attribute fork on an inode. It + * removes both the on-disk and in-memory inode fork. Note that this also has to + * handle the condition of inodes without attributes but with an attribute fork + * configured, so we can't use xfs_inode_hasattr() here. + * + * The in-memory attribute fork is removed even on error. + */ int -xfs_attr_inactive(xfs_inode_t *dp) +xfs_attr_inactive( + struct xfs_inode *dp) { - xfs_trans_t *trans; - xfs_mount_t *mp; - int error; + struct xfs_trans *trans; + struct xfs_mount *mp; + int cancel_flags = 0; + int lock_mode = XFS_ILOCK_SHARED; + int error = 0; mp = dp->i_mount; ASSERT(! XFS_NOT_DQATTACHED(mp, dp)); - xfs_ilock(dp, XFS_ILOCK_SHARED); - if (!xfs_inode_hasattr(dp) || - dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { - xfs_iunlock(dp, XFS_ILOCK_SHARED); - return 0; - } - xfs_iunlock(dp, XFS_ILOCK_SHARED); + xfs_ilock(dp, lock_mode); + if (!XFS_IFORK_Q(dp)) + goto out_destroy_fork; + xfs_iunlock(dp, lock_mode); /* * Start our first transaction of the day. @@ -408,13 +416,18 @@ xfs_attr_inactive(xfs_inode_t *dp) * the inode in every transaction to let it float upward through * the log. */ + lock_mode = 0; trans = xfs_trans_alloc(mp, XFS_TRANS_ATTRINVAL); error = xfs_trans_reserve(trans, &M_RES(mp)->tr_attrinval, 0, 0); - if (error) { - xfs_trans_cancel(trans, 0); - return error; - } - xfs_ilock(dp, XFS_ILOCK_EXCL); + if (error) + goto out_cancel; + + lock_mode = XFS_ILOCK_EXCL; + cancel_flags = XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT; + xfs_ilock(dp, lock_mode); + + if (!XFS_IFORK_Q(dp)) + goto out_cancel; /* * No need to make quota reservations here. We expect to release some @@ -422,29 +435,31 @@ xfs_attr_inactive(xfs_inode_t *dp) */ xfs_trans_ijoin(trans, dp, 0); - /* - * Decide on what work routines to call based on the inode size. - */ - if (!xfs_inode_hasattr(dp) || - dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { - error = 0; - goto out; + /* invalidate and truncate the attribute fork extents */ + if (dp->i_d.di_aformat != XFS_DINODE_FMT_LOCAL) { + error = xfs_attr3_root_inactive(&trans, dp); + if (error) + goto out_cancel; + + error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); + if (error) + goto out_cancel; } - error = xfs_attr3_root_inactive(&trans, dp); - if (error) - goto out; - error = xfs_itruncate_extents(&trans, dp, XFS_ATTR_FORK, 0); - if (error) - goto out; + /* Reset the attribute fork - this also destroys the in-core fork */ + xfs_attr_fork_remove(dp, trans); error = xfs_trans_commit(trans, XFS_TRANS_RELEASE_LOG_RES); - xfs_iunlock(dp, XFS_ILOCK_EXCL); - + xfs_iunlock(dp, lock_mode); return error; -out: - xfs_trans_cancel(trans, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); - xfs_iunlock(dp, XFS_ILOCK_EXCL); +out_cancel: + xfs_trans_cancel(trans, cancel_flags); +out_destroy_fork: + /* kill the in-core attr fork before we drop the inode lock */ + if (dp->i_afp) + xfs_idestroy_fork(dp, XFS_ATTR_FORK); + if (lock_mode) + xfs_iunlock(dp, lock_mode); return error; } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 8121e75352ee..3b7591224f4a 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -124,7 +124,7 @@ xfs_iozero( status = 0; } while (count); - return (-status); + return status; } int diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index d6ebc85192b7..539a85fddbc2 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1946,21 +1946,17 @@ xfs_inactive( /* * If there are attributes associated with the file then blow them away * now. The code calls a routine that recursively deconstructs the - * attribute fork. We need to just commit the current transaction - * because we can't use it for xfs_attr_inactive(). + * attribute fork. If also blows away the in-core attribute fork. */ - if (ip->i_d.di_anextents > 0) { - ASSERT(ip->i_d.di_forkoff != 0); - + if (XFS_IFORK_Q(ip)) { error = xfs_attr_inactive(ip); if (error) return; } - if (ip->i_afp) - xfs_idestroy_fork(ip, XFS_ATTR_FORK); - + ASSERT(!ip->i_afp); ASSERT(ip->i_d.di_anextents == 0); + ASSERT(ip->i_d.di_forkoff == 0); /* * Free the inode. @@ -2883,7 +2879,13 @@ xfs_rename_alloc_whiteout( if (error) return error; - /* Satisfy xfs_bumplink that this is a real tmpfile */ + /* + * Prepare the tmpfile inode as if it were created through the VFS. + * Otherwise, the link increment paths will complain about nlink 0->1. + * Drop the link count as done by d_tmpfile(), complete the inode setup + * and flag it as linkable. + */ + drop_nlink(VFS_I(tmpfile)); xfs_finish_inode_setup(tmpfile); VFS_I(tmpfile)->i_state |= I_LINKABLE; @@ -3151,7 +3153,7 @@ xfs_rename( * intermediate state on disk. */ if (wip) { - ASSERT(wip->i_d.di_nlink == 0); + ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0); error = xfs_bumplink(tp, wip); if (error) goto out_trans_abort; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index f4cd7204e236..7f51f39f8acc 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -41,7 +41,6 @@ #include <linux/capability.h> #include <linux/xattr.h> -#include <linux/namei.h> #include <linux/posix_acl.h> #include <linux/security.h> #include <linux/fiemap.h> @@ -414,10 +413,10 @@ xfs_vn_rename( * we need to be very careful about how much stack we use. * uio is kmalloced for this reason... */ -STATIC void * +STATIC const char * xfs_vn_follow_link( struct dentry *dentry, - struct nameidata *nd) + void **cookie) { char *link; int error = -ENOMEM; @@ -430,14 +429,12 @@ xfs_vn_follow_link( if (unlikely(error)) goto out_kfree; - nd_set_link(nd, link); - return NULL; + return *cookie = link; out_kfree: kfree(link); out_err: - nd_set_link(nd, ERR_PTR(error)); - return NULL; + return ERR_PTR(error); } STATIC int diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 2ce7ee3b4ec1..6f23fbdfb365 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1084,14 +1084,18 @@ xfs_log_sbcount(xfs_mount_t *mp) return xfs_sync_sb(mp, true); } +/* + * Deltas for the inode count are +/-64, hence we use a large batch size + * of 128 so we don't need to take the counter lock on every update. + */ +#define XFS_ICOUNT_BATCH 128 int xfs_mod_icount( struct xfs_mount *mp, int64_t delta) { - /* deltas are +/-64, hence the large batch size of 128. */ - __percpu_counter_add(&mp->m_icount, delta, 128); - if (percpu_counter_compare(&mp->m_icount, 0) < 0) { + __percpu_counter_add(&mp->m_icount, delta, XFS_ICOUNT_BATCH); + if (__percpu_counter_compare(&mp->m_icount, 0, XFS_ICOUNT_BATCH) < 0) { ASSERT(0); percpu_counter_add(&mp->m_icount, -delta); return -EINVAL; @@ -1113,6 +1117,14 @@ xfs_mod_ifree( return 0; } +/* + * Deltas for the block count can vary from 1 to very large, but lock contention + * only occurs on frequent small block count updates such as in the delayed + * allocation path for buffered writes (page a time updates). Hence we set + * a large batch count (1024) to minimise global counter updates except when + * we get near to ENOSPC and we have to be very accurate with our updates. + */ +#define XFS_FDBLOCKS_BATCH 1024 int xfs_mod_fdblocks( struct xfs_mount *mp, @@ -1151,25 +1163,19 @@ xfs_mod_fdblocks( * Taking blocks away, need to be more accurate the closer we * are to zero. * - * batch size is set to a maximum of 1024 blocks - if we are - * allocating of freeing extents larger than this then we aren't - * going to be hammering the counter lock so a lock per update - * is not a problem. - * * If the counter has a value of less than 2 * max batch size, * then make everything serialise as we are real close to * ENOSPC. */ -#define __BATCH 1024 - if (percpu_counter_compare(&mp->m_fdblocks, 2 * __BATCH) < 0) + if (__percpu_counter_compare(&mp->m_fdblocks, 2 * XFS_FDBLOCKS_BATCH, + XFS_FDBLOCKS_BATCH) < 0) batch = 1; else - batch = __BATCH; -#undef __BATCH + batch = XFS_FDBLOCKS_BATCH; __percpu_counter_add(&mp->m_fdblocks, delta, batch); - if (percpu_counter_compare(&mp->m_fdblocks, - XFS_ALLOC_SET_ASIDE(mp)) >= 0) { + if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp), + XFS_FDBLOCKS_BATCH) >= 0) { /* we had space! */ return 0; } |