From 00422483ad415d6267d36711f0e51f4bbbd653ed Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Thu, 29 Jun 2017 06:34:53 -0700 Subject: nfs: add export operations This support for opening files on NFS by file handle, both through the open_by_handle syscall, and for re-exporting NFS (for example using a different version). The support is very basic for now, as each open by handle will have to do an NFSv4 open operation on the wire. In the future this will hopefully be mitigated by an open file cache, as well as various optimizations in NFS for this specific case. Signed-off-by: Peng Tao [hch: incorporated various changes, resplit the patches, new changelog] Signed-off-by: Christoph Hellwig --- fs/nfs/Makefile | 2 +- fs/nfs/export.c | 177 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/internal.h | 2 + fs/nfs/super.c | 2 + 4 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 fs/nfs/export.c (limited to 'fs') diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 98f4e5728a67..1fb118902d57 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_NFS_FS) += nfs.o CFLAGS_nfstrace.o += -I$(src) nfs-y := client.o dir.o file.o getroot.o inode.o super.o \ io.o direct.o pagelist.o read.o symlink.o unlink.o \ - write.o namespace.o mount_clnt.o nfstrace.o + write.o namespace.o mount_clnt.o nfstrace.o export.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o nfs-$(CONFIG_SYSCTL) += sysctl.o nfs-$(CONFIG_NFS_FSCACHE) += fscache.o fscache-index.o diff --git a/fs/nfs/export.c b/fs/nfs/export.c new file mode 100644 index 000000000000..249cb96cc5b5 --- /dev/null +++ b/fs/nfs/export.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2015, Primary Data, Inc. All rights reserved. + * + * Tao Peng + */ +#include +#include +#include +#include + +#include "internal.h" +#include "nfstrace.h" + +#define NFSDBG_FACILITY NFSDBG_VFS + +enum { + FILEID_HIGH_OFF = 0, /* inode fileid high */ + FILEID_LOW_OFF, /* inode fileid low */ + FILE_I_TYPE_OFF, /* inode type */ + EMBED_FH_OFF /* embeded server fh */ +}; + + +static struct nfs_fh *nfs_exp_embedfh(__u32 *p) +{ + return (struct nfs_fh *)(p + EMBED_FH_OFF); +} + +/* + * Let's break subtree checking for now... otherwise we'll have to embed parent fh + * but there might not be enough space. + */ +static int +nfs_encode_fh(struct inode *inode, __u32 *p, int *max_len, struct inode *parent) +{ + struct nfs_fh *server_fh = NFS_FH(inode); + struct nfs_fh *clnt_fh = nfs_exp_embedfh(p); + size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size; + int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size); + + dprintk("%s: max fh len %d inode %p parent %p", + __func__, *max_len, inode, parent); + + if (*max_len < len || IS_AUTOMOUNT(inode)) { + dprintk("%s: fh len %d too small, required %d\n", + __func__, *max_len, len); + *max_len = len; + return FILEID_INVALID; + } + if (IS_AUTOMOUNT(inode)) { + *max_len = FILEID_INVALID; + goto out; + } + + p[FILEID_HIGH_OFF] = NFS_FILEID(inode) >> 32; + p[FILEID_LOW_OFF] = NFS_FILEID(inode); + p[FILE_I_TYPE_OFF] = inode->i_mode & S_IFMT; + p[len - 1] = 0; /* Padding */ + nfs_copy_fh(clnt_fh, server_fh); + *max_len = len; +out: + dprintk("%s: result fh fileid %llu mode %u size %d\n", + __func__, NFS_FILEID(inode), inode->i_mode, *max_len); + return *max_len; +} + +static struct dentry * +nfs_fh_to_dentry(struct super_block *sb, struct fid *fid, + int fh_len, int fh_type) +{ + struct nfs4_label *label = NULL; + struct nfs_fattr *fattr = NULL; + struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw); + size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size; + const struct nfs_rpc_ops *rpc_ops; + struct dentry *dentry; + struct inode *inode; + int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size); + u32 *p = fid->raw; + int ret; + + /* NULL translates to ESTALE */ + if (fh_len < len || fh_type != len) + return NULL; + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) { + dentry = ERR_PTR(-ENOMEM); + goto out; + } + + fattr->fileid = ((u64)p[FILEID_HIGH_OFF] << 32) + p[FILEID_LOW_OFF]; + fattr->mode = p[FILE_I_TYPE_OFF]; + fattr->valid |= NFS_ATTR_FATTR_FILEID | NFS_ATTR_FATTR_TYPE; + + dprintk("%s: fileid %llu mode %d\n", __func__, fattr->fileid, fattr->mode); + + inode = nfs_ilookup(sb, fattr, server_fh); + if (inode) + goto out_found; + + label = nfs4_label_alloc(NFS_SB(sb), GFP_KERNEL); + if (IS_ERR(label)) { + dentry = ERR_CAST(label); + goto out_free_fattr; + } + + rpc_ops = NFS_SB(sb)->nfs_client->rpc_ops; + ret = rpc_ops->getattr(NFS_SB(sb), server_fh, fattr, label); + if (ret) { + dprintk("%s: getattr failed %d\n", __func__, ret); + dentry = ERR_PTR(ret); + goto out_free_label; + } + + inode = nfs_fhget(sb, server_fh, fattr, label); + +out_found: + dentry = d_obtain_alias(inode); + +out_free_label: + nfs4_label_free(label); +out_free_fattr: + nfs_free_fattr(fattr); +out: + return dentry; +} + +static struct dentry * +nfs_get_parent(struct dentry *dentry) +{ + int ret; + struct inode *inode = d_inode(dentry), *pinode; + struct super_block *sb = inode->i_sb; + struct nfs_server *server = NFS_SB(sb); + struct nfs_fattr *fattr = NULL; + struct nfs4_label *label = NULL; + struct dentry *parent; + struct nfs_rpc_ops const *ops = server->nfs_client->rpc_ops; + struct nfs_fh fh; + + if (!ops->lookupp) + return ERR_PTR(-EACCES); + + fattr = nfs_alloc_fattr(); + if (fattr == NULL) { + parent = ERR_PTR(-ENOMEM); + goto out; + } + + label = nfs4_label_alloc(server, GFP_KERNEL); + if (IS_ERR(label)) { + parent = ERR_CAST(label); + goto out_free_fattr; + } + + ret = ops->lookupp(inode, &fh, fattr, label); + if (ret) { + parent = ERR_PTR(ret); + goto out_free_label; + } + + pinode = nfs_fhget(sb, &fh, fattr, label); + parent = d_obtain_alias(pinode); +out_free_label: + nfs4_label_free(label); +out_free_fattr: + nfs_free_fattr(fattr); +out: + return parent; +} + +const struct export_operations nfs_export_ops = { + .encode_fh = nfs_encode_fh, + .fh_to_dentry = nfs_fh_to_dentry, + .get_parent = nfs_get_parent, +}; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index c5054edb0157..2ebd57498986 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -10,6 +10,8 @@ #define NFS_MS_MASK (MS_RDONLY|MS_NOSUID|MS_NODEV|MS_NOEXEC|MS_SYNCHRONOUS) +extern const struct export_operations nfs_export_ops; + struct nfs_string; /* Maximum number of readahead requests diff --git a/fs/nfs/super.c b/fs/nfs/super.c index b4176393f049..b5271644b472 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2339,6 +2339,7 @@ void nfs_fill_super(struct super_block *sb, struct nfs_mount_info *mount_info) */ sb->s_flags |= MS_POSIXACL; sb->s_time_gran = 1; + sb->s_export_op = &nfs_export_ops; } nfs_initialise_sb(sb); @@ -2360,6 +2361,7 @@ static void nfs_clone_super(struct super_block *sb, sb->s_xattr = old_sb->s_xattr; sb->s_op = old_sb->s_op; sb->s_time_gran = 1; + sb->s_export_op = old_sb->s_export_op; if (server->nfs_client->rpc_ops->version != 2) { /* The VFS shouldn't apply the umask to mode bits. We will do -- cgit v1.2.3 From 301bfa483016d48b7fb9cbad87c0a04a15c25b90 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jul 2017 17:53:48 -0400 Subject: NFS: Don't run wake_up_bit() when nobody is waiting... "perf lock" shows fairly heavy contention for the bit waitqueue locks when doing an I/O heavy workload. Use a bit to tell whether or not there has been contention for a lock so that we can optimise away the bit waitqueue options in those cases. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pagelist.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 8a23e2b40b04..de9066a92c0d 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -155,9 +155,12 @@ nfs_page_group_lock(struct nfs_page *req, bool nonblock) if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) return 0; - if (!nonblock) + if (!nonblock) { + set_bit(PG_CONTENDED1, &head->wb_flags); + smp_mb__after_atomic(); return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, TASK_UNINTERRUPTIBLE); + } return -EAGAIN; } @@ -175,6 +178,10 @@ nfs_page_group_lock_wait(struct nfs_page *req) WARN_ON_ONCE(head != head->wb_head); + if (!test_bit(PG_HEADLOCK, &head->wb_flags)) + return; + set_bit(PG_CONTENDED1, &head->wb_flags); + smp_mb__after_atomic(); wait_on_bit(&head->wb_flags, PG_HEADLOCK, TASK_UNINTERRUPTIBLE); } @@ -193,6 +200,8 @@ nfs_page_group_unlock(struct nfs_page *req) smp_mb__before_atomic(); clear_bit(PG_HEADLOCK, &head->wb_flags); smp_mb__after_atomic(); + if (!test_bit(PG_CONTENDED1, &head->wb_flags)) + return; wake_up_bit(&head->wb_flags, PG_HEADLOCK); } @@ -383,6 +392,8 @@ void nfs_unlock_request(struct nfs_page *req) smp_mb__before_atomic(); clear_bit(PG_BUSY, &req->wb_flags); smp_mb__after_atomic(); + if (!test_bit(PG_CONTENDED2, &req->wb_flags)) + return; wake_up_bit(&req->wb_flags, PG_BUSY); } @@ -465,6 +476,10 @@ void nfs_release_request(struct nfs_page *req) int nfs_wait_on_request(struct nfs_page *req) { + if (!test_bit(PG_BUSY, &req->wb_flags)) + return 0; + set_bit(PG_CONTENDED2, &req->wb_flags); + smp_mb__after_atomic(); return wait_on_bit_io(&req->wb_flags, PG_BUSY, TASK_UNINTERRUPTIBLE); } -- cgit v1.2.3 From 89a6814d9b665b196aa3a102f96b6dc7e8cb669e Mon Sep 17 00:00:00 2001 From: Steve Dickson Date: Thu, 29 Jun 2017 11:48:26 -0400 Subject: mount: copy the port field into the cloned nfs_server structure. Doing this copy eliminates the "port=0" entry in the /proc/mounts entries Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=69241 Signed-off-by: Steve Dickson Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index ee5ddbd36088..efebe6cf4378 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -820,6 +820,7 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour target->caps = source->caps; target->options = source->options; target->auth_info = source->auth_info; + target->port = source->port; } EXPORT_SYMBOL_GPL(nfs_server_copy_userdata); -- cgit v1.2.3 From e39928f942ff7a9d1cb9005423e9e35a0a4bb2e0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 12 Jul 2017 19:10:56 -0400 Subject: NFS: Fix a COMMIT race in pNFS We must make sure that cinfo->ds->nwritten is in sync with the commit list, since it is checked as part of pnfs_scan_commit_lists(). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs_nfs.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index d40755a0984b..3e6de85faf42 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -159,13 +159,18 @@ void pnfs_generic_recover_commit_reqs(struct list_head *dst, { struct pnfs_commit_bucket *b; struct pnfs_layout_segment *freeme; + int nwritten; int i; lockdep_assert_held(&cinfo->inode->i_lock); restart: for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { - if (pnfs_generic_transfer_commit_list(&b->written, dst, - cinfo, 0)) { + nwritten = pnfs_generic_transfer_commit_list(&b->written, + dst, cinfo, 0); + if (!nwritten) + continue; + cinfo->ds->nwritten -= nwritten; + if (list_empty(&b->written)) { freeme = b->wlseg; b->wlseg = NULL; spin_unlock(&cinfo->inode->i_lock); @@ -174,7 +179,6 @@ restart: goto restart; } } - cinfo->ds->nwritten = 0; } EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs); -- cgit v1.2.3 From 41181886459b281ed1346369f27b3c3bb8e2dde3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 12 Jul 2017 19:10:57 -0400 Subject: NFS: Fix another COMMIT race in pNFS We must make sure that cinfo->ds->ncommitting is in sync with the commit list, since it is checked as part of pnfs_commit_list(). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs_nfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 3e6de85faf42..7ceb86627e54 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -187,6 +187,7 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds; struct pnfs_commit_bucket *bucket; struct pnfs_layout_segment *freeme; + struct list_head *pos; LIST_HEAD(pages); int i; @@ -197,6 +198,8 @@ static void pnfs_generic_retry_commit(struct nfs_commit_info *cinfo, int idx) continue; freeme = bucket->clseg; bucket->clseg = NULL; + list_for_each(pos, &bucket->committing) + cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, &pages); spin_unlock(&cinfo->inode->i_lock); nfs_retry_commit(&pages, freeme, cinfo, i); @@ -247,9 +250,12 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages, struct nfs_commit_info *cinfo) { struct pnfs_commit_bucket *bucket; + struct list_head *pos; bucket = &cinfo->ds->buckets[data->ds_commit_index]; spin_lock(&cinfo->inode->i_lock); + list_for_each(pos, &bucket->committing) + cinfo->ds->ncommitting--; list_splice_init(&bucket->committing, pages); data->lseg = bucket->clseg; bucket->clseg = NULL; @@ -334,7 +340,6 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages, } } out: - cinfo->ds->ncommitting = 0; return PNFS_ATTEMPTED; } EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist); -- cgit v1.2.3 From 4b75053e9bb6db4b700526d2d67c67a0d07f867e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 12 Jul 2017 19:10:58 -0400 Subject: pNFS/flexfiles: Handle expired layout segments in ff_layout_initiate_commit() If the layout has expired due to a fencing event, then we should not attempt to commit to the DS. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/flexfilelayout/flexfilelayout.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 1f2ac3dd0fe5..b0fa83a60754 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1842,6 +1842,10 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) int vers, ret; struct nfs_fh *fh; + if (!lseg || !(pnfs_is_valid_lseg(lseg) || + test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))) + goto out_err; + idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); if (!ds) -- cgit v1.2.3 From 213297369cf4900eba906dd32ce845074e30f487 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 12 Jul 2017 19:10:59 -0400 Subject: Revert commit 722f0b891198 ("pNFS: Don't send COMMITs to the DSes if...") Doing the test without taking any locks is racy, and so really it makes more sense to do it in the flexfiles code (which is the only case that cares). Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/pnfs_nfs.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'fs') diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 7ceb86627e54..25f28fa64c57 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -224,13 +224,6 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo, for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { if (list_empty(&bucket->committing)) continue; - /* - * If the layout segment is invalid, then let - * pnfs_generic_retry_commit() clean up the bucket. - */ - if (bucket->clseg && !pnfs_is_valid_lseg(bucket->clseg) && - !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags)) - break; data = nfs_commitdata_alloc(false); if (!data) break; -- cgit v1.2.3 From ecc7b435d2beb025d7506af74cf749af2cef5734 Mon Sep 17 00:00:00 2001 From: Eryu Guan Date: Tue, 18 Jul 2017 13:32:32 +0800 Subject: nfs: count correct array for mnt3_counts array size Array size of mnt3_counts should be the size of array mnt3_procedures, not mnt_procedures, though they're same in size right now. Found this by code inspection. Fixes: 1c5876ddbdb4 ("sunrpc: move p_count out of struct rpc_procinfo") Cc: Christoph Hellwig Signed-off-by: Eryu Guan Reviewed-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/mount_clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs') diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c index 3efe946672be..60bad882c123 100644 --- a/fs/nfs/mount_clnt.c +++ b/fs/nfs/mount_clnt.c @@ -512,7 +512,7 @@ static const struct rpc_version mnt_version1 = { .counts = mnt_counts, }; -static unsigned int mnt3_counts[ARRAY_SIZE(mnt_procedures)]; +static unsigned int mnt3_counts[ARRAY_SIZE(mnt3_procedures)]; static const struct rpc_version mnt_version3 = { .number = 3, .nrprocs = ARRAY_SIZE(mnt3_procedures), -- cgit v1.2.3 From 15d4b73ac2232d6f2beb61d8b2400ea66e4da606 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jul 2017 17:54:32 -0400 Subject: NFS: Refactor NFS access to kernel access mask calculation Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 1255891e5695..24b3a6748062 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2375,16 +2375,31 @@ void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) } EXPORT_SYMBOL_GPL(nfs_access_add_cache); +#define NFS_MAY_READ (NFS4_ACCESS_READ) +#define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \ + NFS4_ACCESS_EXTEND | \ + NFS4_ACCESS_DELETE) +#define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP) +#define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE) +static int +nfs_access_calc_mask(u32 access_result) +{ + int mask = 0; + + if (access_result & NFS_MAY_READ) + mask |= MAY_READ; + if (access_result & NFS_MAY_WRITE) + mask |= MAY_WRITE; + if (access_result & NFS_MAY_LOOKUP) + mask |= MAY_EXEC; + if (access_result & NFS_MAY_EXECUTE) + mask |= MAY_EXEC; + return mask; +} + void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) { - entry->mask = 0; - if (access_result & NFS4_ACCESS_READ) - entry->mask |= MAY_READ; - if (access_result & - (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE)) - entry->mask |= MAY_WRITE; - if (access_result & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE)) - entry->mask |= MAY_EXEC; + entry->mask = nfs_access_calc_mask(access_result); } EXPORT_SYMBOL_GPL(nfs_access_set_mask); -- cgit v1.2.3 From eda3e20847788c453aa7ab478aeaceb56ed29cb6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jul 2017 17:54:33 -0400 Subject: NFSv3: Convert nfs3_proc_access() to use nfs_access_set_mask() Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs3proc.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index df4a7d3ab915..d1e87ec0df84 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -220,15 +220,8 @@ static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, res.fattr); - if (status == 0) { - entry->mask = 0; - if (res.access & NFS3_ACCESS_READ) - entry->mask |= MAY_READ; - if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE)) - entry->mask |= MAY_WRITE; - if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) - entry->mask |= MAY_EXEC; - } + if (status == 0) + nfs_access_set_mask(entry, res.access); nfs_free_fattr(res.fattr); out: dprintk("NFS reply access: %d\n", status); -- cgit v1.2.3 From bd8b2441742b49c76bec707757bd9c028ea9838e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jul 2017 17:54:34 -0400 Subject: NFS: Store the raw NFS access mask in the inode's access cache Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 24b3a6748062..8fae8b00b8f5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2399,7 +2399,7 @@ nfs_access_calc_mask(u32 access_result) void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result) { - entry->mask = nfs_access_calc_mask(access_result); + entry->mask = access_result; } EXPORT_SYMBOL_GPL(nfs_access_set_mask); @@ -2407,6 +2407,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) { struct nfs_access_entry cache; bool may_block = (mask & MAY_NOT_BLOCK) == 0; + int cache_mask; int status; trace_nfs_access_enter(inode); @@ -2422,7 +2423,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) goto out; /* Be clever: ask server to check for all possible rights */ - cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.mask = NFS_MAY_LOOKUP | NFS_MAY_EXECUTE + | NFS_MAY_WRITE | NFS_MAY_READ; cache.cred = cred; cache.jiffies = jiffies; status = NFS_PROTO(inode)->access(inode, &cache); @@ -2436,7 +2438,8 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) } nfs_access_add_cache(inode, &cache); out_cached: - if ((mask & ~cache.mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) + cache_mask = nfs_access_calc_mask(cache.mask); + if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) status = -EACCES; out: trace_nfs_access_exit(inode, status); -- cgit v1.2.3 From ecbb903c56745d59c301db26dd7d8b74b520eb84 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Jul 2017 17:54:35 -0400 Subject: NFS: Be more careful about mapping file permissions When mapping a directory, we want the MAY_WRITE permissions to reflect whether or not we have permission to modify, add and delete the directory entries. MAY_EXEC must map to lookup permissions. On the other hand, for files, we want MAY_WRITE to reflect a permission to modify and extend the file. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8fae8b00b8f5..37a6180ee2e8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2379,21 +2379,30 @@ EXPORT_SYMBOL_GPL(nfs_access_add_cache); #define NFS_MAY_WRITE (NFS4_ACCESS_MODIFY | \ NFS4_ACCESS_EXTEND | \ NFS4_ACCESS_DELETE) +#define NFS_FILE_MAY_WRITE (NFS4_ACCESS_MODIFY | \ + NFS4_ACCESS_EXTEND) +#define NFS_DIR_MAY_WRITE NFS_MAY_WRITE #define NFS_MAY_LOOKUP (NFS4_ACCESS_LOOKUP) #define NFS_MAY_EXECUTE (NFS4_ACCESS_EXECUTE) static int -nfs_access_calc_mask(u32 access_result) +nfs_access_calc_mask(u32 access_result, umode_t umode) { int mask = 0; if (access_result & NFS_MAY_READ) mask |= MAY_READ; - if (access_result & NFS_MAY_WRITE) - mask |= MAY_WRITE; - if (access_result & NFS_MAY_LOOKUP) - mask |= MAY_EXEC; - if (access_result & NFS_MAY_EXECUTE) - mask |= MAY_EXEC; + if (S_ISDIR(umode)) { + if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE) + mask |= MAY_WRITE; + if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP) + mask |= MAY_EXEC; + } else if (S_ISREG(umode)) { + if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE) + mask |= MAY_WRITE; + if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE) + mask |= MAY_EXEC; + } else if (access_result & NFS_MAY_WRITE) + mask |= MAY_WRITE; return mask; } @@ -2438,7 +2447,7 @@ static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) } nfs_access_add_cache(inode, &cache); out_cached: - cache_mask = nfs_access_calc_mask(cache.mask); + cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode); if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0) status = -EACCES; out: -- cgit v1.2.3 From 1ebf980127924c639e2b85c08468311ba1c95b70 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 20 Jul 2017 17:00:02 -0400 Subject: NFS/filelayout: Fix racy setting of fl->dsaddr in filelayout_check_deviceid() We must set fl->dsaddr once, and once only, even if there are multiple processes calling filelayout_check_deviceid() for the same layout segment. Reported-by: Olga Kornievskaia Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/filelayout/filelayout.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 080fc6b278bd..44c638b7876c 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -542,6 +542,10 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, struct nfs4_file_layout_dsaddr *dsaddr; int status = -EINVAL; + /* Is the deviceid already set? If so, we're good. */ + if (fl->dsaddr != NULL) + return 0; + /* find and reference the deviceid */ d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode), &fl->deviceid, lo->plh_lc_cred, gfp_flags); @@ -553,8 +557,6 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, if (filelayout_test_devid_unavailable(&dsaddr->id_node)) goto out_put; - fl->dsaddr = dsaddr; - if (fl->first_stripe_index >= dsaddr->stripe_count) { dprintk("%s Bad first_stripe_index %u\n", __func__, fl->first_stripe_index); @@ -570,6 +572,13 @@ filelayout_check_deviceid(struct pnfs_layout_hdr *lo, goto out_put; } status = 0; + + /* + * Atomic compare and xchange to ensure we don't scribble + * over a non-NULL pointer. + */ + if (cmpxchg(&fl->dsaddr, NULL, dsaddr) != NULL) + goto out_put; out: return status; out_put: -- cgit v1.2.3