From 5482e09a8840ee3bb1848d73d05301eafc0e6199 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 24 Nov 2020 19:15:18 -0500 Subject: NFS: Fix rpcrdma_inline_fixup() crash with new LISTXATTRS operation By switching to an XFS-backed export, I am able to reproduce the ibcomp worker crash on my client with xfstests generic/013. For the failing LISTXATTRS operation, xdr_inline_pages() is called with page_len=12 and buflen=128. - When ->send_request() is called, rpcrdma_marshal_req() does not set up a Reply chunk because buflen is smaller than the inline threshold. Thus rpcrdma_convert_iovs() does not get invoked at all and the transport's XDRBUF_SPARSE_PAGES logic is not invoked on the receive buffer. - During reply processing, rpcrdma_inline_fixup() tries to copy received data into rq_rcv_buf->pages because page_len is positive. But there are no receive pages because rpcrdma_marshal_req() never allocated them. The result is that the ibcomp worker faults and dies. Sometimes that causes a visible crash, and sometimes it results in a transport hang without other symptoms. RPC/RDMA's XDRBUF_SPARSE_PAGES support is not entirely correct, and should eventually be fixed or replaced. However, my preference is that upper-layer operations should explicitly allocate their receive buffers (using GFP_KERNEL) when possible, rather than relying on XDRBUF_SPARSE_PAGES. Reported-by: Olga kornievskaia Suggested-by: Olga kornievskaia Fixes: c10a75145feb ("NFSv4.2: add the extended attribute proc functions.") Signed-off-by: Chuck Lever Reviewed-by: Olga kornievskaia Reviewed-by: Frank van der Linden Tested-by: Olga kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs42proc.c | 21 +++++++++++++-------- fs/nfs/nfs42xdr.c | 1 - 2 files changed, 13 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 2b2211d1234e..4fc61e3d098d 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1241,12 +1241,13 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, .rpc_resp = &res, }; u32 xdrlen; - int ret, np; + int ret, np, i; + ret = -ENOMEM; res.scratch = alloc_page(GFP_KERNEL); if (!res.scratch) - return -ENOMEM; + goto out; xdrlen = nfs42_listxattr_xdrsize(buflen); if (xdrlen > server->lxasize) @@ -1254,9 +1255,12 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, np = xdrlen / PAGE_SIZE + 1; pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL); - if (pages == NULL) { - __free_page(res.scratch); - return -ENOMEM; + if (!pages) + goto out_free_scratch; + for (i = 0; i < np; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + goto out_free_pages; } arg.xattr_pages = pages; @@ -1271,14 +1275,15 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf, *eofp = res.eof; } +out_free_pages: while (--np >= 0) { if (pages[np]) __free_page(pages[np]); } - - __free_page(res.scratch); kfree(pages); - +out_free_scratch: + __free_page(res.scratch); +out: return ret; } diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 6e060a88f98c..8432bd6b95f0 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1528,7 +1528,6 @@ static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req, rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, hdr.replen); - req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; encode_nops(&hdr); } -- cgit v1.2.3 From bd75475c2fa161acec0017e030f6e8c01cb0d107 Mon Sep 17 00:00:00 2001 From: Dai Ngo Date: Mon, 23 Nov 2020 22:15:17 -0500 Subject: NFSv4.2: Fix 5 seconds delay when doing inter server copy Since commit b4868b44c5628 ("NFSv4: Wait for stateid updates after CLOSE/OPEN_DOWNGRADE"), every inter server copy operation suffers 5 seconds delay regardless of the size of the copy. The delay is from nfs_set_open_stateid_locked when the check by nfs_stateid_is_sequential fails because the seqid in both nfs4_state and nfs4_stateid are 0. Fix __nfs42_ssc_open to delay setting of NFS_OPEN_STATE in nfs4_state, until after the call to update_open_stateid, to indicate this is the 1st open. This fix is part of a 2 patches, the other patch is the fix in the source server to return the stateid for COPY_NOTIFY request with seqid 1 instead of 0. Fixes: ce0887ac96d3 ("NFSD add nfs4 inter ssc to nfsd4_copy") Signed-off-by: Dai Ngo Signed-off-by: Anna Schumaker --- fs/nfs/nfs4file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 9d354de613da..57b3821d975a 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -377,10 +377,10 @@ static struct file *__nfs42_ssc_open(struct vfsmount *ss_mnt, goto out_stateowner; set_bit(NFS_SRV_SSC_COPY_STATE, &ctx->state->flags); - set_bit(NFS_OPEN_STATE, &ctx->state->flags); memcpy(&ctx->state->open_stateid.other, &stateid->other, NFS4_STATEID_OTHER_SIZE); update_open_stateid(ctx->state, stateid, NULL, filep->f_mode); + set_bit(NFS_OPEN_STATE, &ctx->state->flags); nfs_file_set_open_context(filep, ctx); put_nfs_open_context(ctx); -- cgit v1.2.3 From 5f447cb88123857d69f0c1a665356688037c6ad3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Oct 2020 12:40:59 -0400 Subject: NFSv3: Refactor nfs3_proc_lookup() to split out the dentry We want to reuse the lookup code in NFSv3 in order to emulate the NFSv4 lookupp operation. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 2397ceedba8a..acbdf7496d31 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -154,14 +154,14 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, } static int -nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, - struct nfs_fh *fhandle, struct nfs_fattr *fattr, - struct nfs4_label *label) +__nfs3_proc_lookup(struct inode *dir, const char *name, size_t len, + struct nfs_fh *fhandle, struct nfs_fattr *fattr, + unsigned short task_flags) { struct nfs3_diropargs arg = { .fh = NFS_FH(dir), - .name = dentry->d_name.name, - .len = dentry->d_name.len + .name = name, + .len = len }; struct nfs3_diropres res = { .fh = fhandle, @@ -173,17 +173,11 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, .rpc_resp = &res, }; int status; - unsigned short task_flags = 0; - - /* Is this is an attribute revalidation, subject to softreval? */ - if (nfs_lookup_is_soft_revalidate(dentry)) - task_flags |= RPC_TASK_TIMEOUT; res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) return -ENOMEM; - dprintk("NFS call lookup %pd2\n", dentry); nfs_fattr_init(fattr); status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags); nfs_refresh_inode(dir, res.dir_attr); @@ -198,6 +192,23 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, return status; } +static int +nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, + struct nfs_fh *fhandle, struct nfs_fattr *fattr, + struct nfs4_label *label) +{ + unsigned short task_flags = 0; + + /* Is this is an attribute revalidation, subject to softreval? */ + if (nfs_lookup_is_soft_revalidate(dentry)) + task_flags |= RPC_TASK_TIMEOUT; + + dprintk("NFS call lookup %pd2\n", dentry); + return __nfs3_proc_lookup(dir, dentry->d_name.name, + dentry->d_name.len, fhandle, fattr, + task_flags); +} + static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) { struct nfs3_accessargs arg = { -- cgit v1.2.3 From 3c5e9a59faa6b1bb3110b961e695502c7ee8699b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 19 Oct 2020 12:45:53 -0400 Subject: NFSv3: Add emulation of the lookupp() operation In order to use the open_by_filehandle() operations on NFSv3, we need to be able to emulate lookupp() so that nfs_get_parent() can be used to convert disconnected dentries into connected ones. Signed-off-by: Trond Myklebust --- fs/nfs/nfs3proc.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index acbdf7496d31..6b66b73a50eb 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -209,6 +209,20 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry, task_flags); } +static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle, + struct nfs_fattr *fattr, struct nfs4_label *label) +{ + const char dotdot[] = ".."; + const size_t len = strlen(dotdot); + unsigned short task_flags = 0; + + if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) + task_flags |= RPC_TASK_TIMEOUT; + + return __nfs3_proc_lookup(inode, dotdot, len, fhandle, fattr, + task_flags); +} + static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) { struct nfs3_accessargs arg = { @@ -1015,6 +1029,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = { .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, .lookup = nfs3_proc_lookup, + .lookupp = nfs3_proc_lookupp, .access = nfs3_proc_access, .readlink = nfs3_proc_readlink, .create = nfs3_proc_create, -- cgit v1.2.3 From 76998ebb91582812540f591f9e148daa0f08c76d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 20 Oct 2020 14:30:35 -0400 Subject: NFSv4: Observe the NFS_MOUNT_SOFTREVAL flag in _nfs4_proc_lookupp We need to respect the NFS_MOUNT_SOFTREVAL flag in _nfs4_proc_lookupp, by timing out if the server is unavailable. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 9e0ca9b2b210..a5b9356bee6a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4397,6 +4397,10 @@ static int _nfs4_proc_lookupp(struct inode *inode, .rpc_argp = &args, .rpc_resp = &res, }; + unsigned short task_flags = 0; + + if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL) + task_flags |= RPC_TASK_TIMEOUT; args.bitmask = nfs4_bitmask(server, label); @@ -4404,7 +4408,7 @@ static int _nfs4_proc_lookupp(struct inode *inode, dprintk("NFS call lookupp ino=0x%lx\n", inode->i_ino); status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, - &res.seq_res, 0); + &res.seq_res, task_flags); dprintk("NFS reply lookupp: %d\n", status); return status; } -- cgit v1.2.3 From 05ad917561fca39a03338cb21fe9622f998b0f9c Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 6 Nov 2020 16:03:38 -0500 Subject: NFSv4.2: condition READDIR's mask for security label based on LSM state Currently, the client will always ask for security_labels if the server returns that it supports that feature regardless of any LSM modules (such as Selinux) enforcing security policy. This adds performance penalty to the READDIR operation. Client adjusts superblock's support of the security_label based on the server's support but also current client's configuration of the LSM modules. Thus, prior to using the default bitmask in READDIR, this patch checks the server's capabilities and then instructs READDIR to remove FATTR4_WORD2_SECURITY_LABEL from the bitmask. v5: fixing silly mistakes of the rushed v4 v4: simplifying logic v3: changing label's initialization per Ondrej's comment v2: dropping selinux hook and using the sb cap. Suggested-by: Ondrej Mosnacek Suggested-by: Scott Mayhew Signed-off-by: Olga Kornievskaia Fixes: 2b0143b5c986 ("VFS: normal filesystems (and lustre): d_inode() annotations") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index a5b9356bee6a..66f1f4a5c74c 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4965,12 +4965,12 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, u64 cookie, struct page **pages, unsigned int count, bool plus) { struct inode *dir = d_inode(dentry); + struct nfs_server *server = NFS_SERVER(dir); struct nfs4_readdir_arg args = { .fh = NFS_FH(dir), .pages = pages, .pgbase = 0, .count = count, - .bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask, .plus = plus, }; struct nfs4_readdir_res res; @@ -4985,9 +4985,15 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__, dentry, (unsigned long long)cookie); + if (!(server->caps & NFS_CAP_SECURITY_LABEL)) + args.bitmask = server->attr_bitmask_nl; + else + args.bitmask = server->attr_bitmask; + nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); res.pgbase = args.pgbase; - status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0); + status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, + &res.seq_res, 0); if (status >= 0) { memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; -- cgit v1.2.3 From 2e7a46417952ae480cb5091ed5ade73078630b40 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 09:56:18 -0500 Subject: NFS: Ensure contents of struct nfs_open_dir_context are consistent Ensure that the contents of struct nfs_open_dir_context are consistent by setting them under the file->f_lock from a private copy (that is known to be consistent). Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 72 ++++++++++++++++++++++++++++++++++++------------------------ 1 file changed, 43 insertions(+), 29 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4e011adaf967..67d8595cd6e5 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -144,20 +144,23 @@ struct nfs_cache_array { struct nfs_cache_array_entry array[]; }; -typedef struct { +typedef struct nfs_readdir_descriptor { struct file *file; struct page *page; struct dir_context *ctx; unsigned long page_index; - u64 *dir_cookie; + u64 dir_cookie; u64 last_cookie; + u64 dup_cookie; loff_t current_index; loff_t prev_index; unsigned long dir_verifier; unsigned long timestamp; unsigned long gencount; + unsigned long attr_gencount; unsigned int cache_entry_index; + signed char duped; bool plus; bool eof; } nfs_readdir_descriptor_t; @@ -273,7 +276,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri } index = (unsigned int)diff; - *desc->dir_cookie = array->array[index].cookie; + desc->dir_cookie = array->array[index].cookie; desc->cache_entry_index = index; return 0; out_eof: @@ -298,33 +301,32 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des int status = -EAGAIN; for (i = 0; i < array->size; i++) { - if (array->array[i].cookie == *desc->dir_cookie) { + if (array->array[i].cookie == desc->dir_cookie) { struct nfs_inode *nfsi = NFS_I(file_inode(desc->file)); - struct nfs_open_dir_context *ctx = desc->file->private_data; new_pos = desc->current_index + i; - if (ctx->attr_gencount != nfsi->attr_gencount || + if (desc->attr_gencount != nfsi->attr_gencount || !nfs_readdir_inode_mapping_valid(nfsi)) { - ctx->duped = 0; - ctx->attr_gencount = nfsi->attr_gencount; + desc->duped = 0; + desc->attr_gencount = nfsi->attr_gencount; } else if (new_pos < desc->prev_index) { - if (ctx->duped > 0 - && ctx->dup_cookie == *desc->dir_cookie) { + if (desc->duped > 0 + && desc->dup_cookie == desc->dir_cookie) { if (printk_ratelimit()) { pr_notice("NFS: directory %pD2 contains a readdir loop." "Please contact your server vendor. " "The file: %.*s has duplicate cookie %llu\n", desc->file, array->array[i].string.len, - array->array[i].string.name, *desc->dir_cookie); + array->array[i].string.name, desc->dir_cookie); } status = -ELOOP; goto out; } - ctx->dup_cookie = *desc->dir_cookie; - ctx->duped = -1; + desc->dup_cookie = desc->dir_cookie; + desc->duped = -1; } if (nfs_readdir_use_cookie(desc->file)) - desc->ctx->pos = *desc->dir_cookie; + desc->ctx->pos = desc->dir_cookie; else desc->ctx->pos = new_pos; desc->prev_index = new_pos; @@ -334,7 +336,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des } if (array->eof_index >= 0) { status = -EBADCOOKIE; - if (*desc->dir_cookie == array->last_cookie) + if (desc->dir_cookie == array->last_cookie) desc->eof = true; } out: @@ -349,7 +351,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) array = kmap(desc->page); - if (*desc->dir_cookie == 0) + if (desc->dir_cookie == 0) status = nfs_readdir_search_for_pos(array, desc); else status = nfs_readdir_search_for_cookie(array, desc); @@ -801,7 +803,6 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) int i = 0; int res = 0; struct nfs_cache_array *array = NULL; - struct nfs_open_dir_context *ctx = file->private_data; array = kmap(desc->page); for (i = desc->cache_entry_index; i < array->size; i++) { @@ -814,22 +815,22 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) break; } if (i < (array->size-1)) - *desc->dir_cookie = array->array[i+1].cookie; + desc->dir_cookie = array->array[i+1].cookie; else - *desc->dir_cookie = array->last_cookie; + desc->dir_cookie = array->last_cookie; if (nfs_readdir_use_cookie(file)) - desc->ctx->pos = *desc->dir_cookie; + desc->ctx->pos = desc->dir_cookie; else desc->ctx->pos++; - if (ctx->duped != 0) - ctx->duped = 1; + if (desc->duped != 0) + desc->duped = 1; } if (array->eof_index >= 0) desc->eof = true; kunmap(desc->page); dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", - (unsigned long long)*desc->dir_cookie, res); + (unsigned long long)desc->dir_cookie, res); return res; } @@ -851,10 +852,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) struct page *page = NULL; int status; struct inode *inode = file_inode(desc->file); - struct nfs_open_dir_context *ctx = desc->file->private_data; dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", - (unsigned long long)*desc->dir_cookie); + (unsigned long long)desc->dir_cookie); page = alloc_page(GFP_HIGHUSER); if (!page) { @@ -863,9 +863,9 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) } desc->page_index = 0; - desc->last_cookie = *desc->dir_cookie; + desc->last_cookie = desc->dir_cookie; desc->page = page; - ctx->duped = 0; + desc->duped = 0; status = nfs_readdir_xdr_to_array(desc, page, inode); if (status < 0) @@ -894,7 +894,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) nfs_readdir_descriptor_t my_desc = { .file = file, .ctx = ctx, - .dir_cookie = &dir_ctx->dir_cookie, .plus = nfs_use_readdirplus(inode, ctx), }, *desc = &my_desc; @@ -915,13 +914,20 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) if (res < 0) goto out; + spin_lock(&file->f_lock); + desc->dir_cookie = dir_ctx->dir_cookie; + desc->dup_cookie = dir_ctx->dup_cookie; + desc->duped = dir_ctx->duped; + desc->attr_gencount = dir_ctx->attr_gencount; + spin_unlock(&file->f_lock); + do { res = readdir_search_pagecache(desc); if (res == -EBADCOOKIE) { res = 0; /* This means either end of directory */ - if (*desc->dir_cookie && !desc->eof) { + if (desc->dir_cookie && !desc->eof) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc); if (res == 0) @@ -946,6 +952,14 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) if (res < 0) break; } while (!desc->eof); + + spin_lock(&file->f_lock); + dir_ctx->dir_cookie = desc->dir_cookie; + dir_ctx->dup_cookie = desc->dup_cookie; + dir_ctx->duped = desc->duped; + dir_ctx->attr_gencount = desc->attr_gencount; + spin_unlock(&file->f_lock); + out: if (res > 0) res = 0; -- cgit v1.2.3 From b1e21c97437f64d0a00f8fea1f9e64e77e0e4242 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 13:45:55 -0500 Subject: NFS: Clean up readdir struct nfs_cache_array Since the 'eof_index' is only ever used as a flag, make it so. Also add a flag to detect if the page has been completely filled. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 66 ++++++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 17 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 67d8595cd6e5..604ebe015387 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -138,9 +138,10 @@ struct nfs_cache_array_entry { }; struct nfs_cache_array { - int size; - int eof_index; u64 last_cookie; + unsigned int size; + unsigned char page_full : 1, + page_is_eof : 1; struct nfs_cache_array_entry array[]; }; @@ -172,7 +173,6 @@ void nfs_readdir_init_array(struct page *page) array = kmap_atomic(page); memset(array, 0, sizeof(struct nfs_cache_array)); - array->eof_index = -1; kunmap_atomic(array); } @@ -192,6 +192,17 @@ void nfs_readdir_clear_array(struct page *page) kunmap_atomic(array); } +static void nfs_readdir_array_set_eof(struct nfs_cache_array *array) +{ + array->page_is_eof = 1; + array->page_full = 1; +} + +static bool nfs_readdir_array_is_full(struct nfs_cache_array *array) +{ + return array->page_full; +} + /* * the caller is responsible for freeing qstr.name * when called by nfs_readdir_add_to_array, the strings will be freed in @@ -213,6 +224,23 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le return 0; } +/* + * Check that the next array entry lies entirely within the page bounds + */ +static int nfs_readdir_array_can_expand(struct nfs_cache_array *array) +{ + struct nfs_cache_array_entry *cache_entry; + + if (array->page_full) + return -ENOSPC; + cache_entry = &array->array[array->size + 1]; + if ((char *)cache_entry - (char *)array > PAGE_SIZE) { + array->page_full = 1; + return -ENOSPC; + } + return 0; +} + static int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) { @@ -220,13 +248,11 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) struct nfs_cache_array_entry *cache_entry; int ret; - cache_entry = &array->array[array->size]; - - /* Check that this entry lies within the page bounds */ - ret = -ENOSPC; - if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE) + ret = nfs_readdir_array_can_expand(array); + if (ret) goto out; + cache_entry = &array->array[array->size]; cache_entry->cookie = entry->prev_cookie; cache_entry->ino = entry->ino; cache_entry->d_type = entry->d_type; @@ -236,12 +262,21 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) array->last_cookie = entry->cookie; array->size++; if (entry->eof != 0) - array->eof_index = array->size; + nfs_readdir_array_set_eof(array); out: kunmap(page); return ret; } +static void nfs_readdir_page_set_eof(struct page *page) +{ + struct nfs_cache_array *array; + + array = kmap_atomic(page); + nfs_readdir_array_set_eof(array); + kunmap_atomic(array); +} + static inline int is_32bit_api(void) { @@ -270,7 +305,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri if (diff < 0) goto out_eof; if (diff >= array->size) { - if (array->eof_index >= 0) + if (array->page_is_eof) goto out_eof; return -EAGAIN; } @@ -334,7 +369,7 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des return 0; } } - if (array->eof_index >= 0) { + if (array->page_is_eof) { status = -EBADCOOKIE; if (desc->dir_cookie == array->last_cookie) desc->eof = true; @@ -566,7 +601,6 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en struct xdr_stream stream; struct xdr_buf buf; struct page *scratch; - struct nfs_cache_array *array; unsigned int count = 0; int status; @@ -604,10 +638,8 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en out_nopages: if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { - array = kmap(page); - array->eof_index = array->size; + nfs_readdir_page_set_eof(page); status = 0; - kunmap(page); } put_page(scratch); @@ -689,7 +721,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, status = 0; break; } - } while (array->eof_index < 0); + } while (!nfs_readdir_array_is_full(array)); nfs_readdir_free_pages(pages, array_size); out_release_array: @@ -825,7 +857,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) if (desc->duped != 0) desc->duped = 1; } - if (array->eof_index >= 0) + if (array->page_is_eof) desc->eof = true; kunmap(desc->page); -- cgit v1.2.3 From 972bcdf233096d36b2f3e02f34a80d0f073b6b05 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 17:15:43 -0500 Subject: NFS: Clean up nfs_readdir_page_filler() Clean up handling of the case where there are no entries in the readdir reply. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 604ebe015387..68acbde3f914 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -601,16 +601,12 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en struct xdr_stream stream; struct xdr_buf buf; struct page *scratch; - unsigned int count = 0; int status; scratch = alloc_page(GFP_KERNEL); if (scratch == NULL) return -ENOMEM; - if (buflen == 0) - goto out_nopages; - xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen); xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); @@ -619,27 +615,27 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en entry->label->len = NFS4_MAXLABELLEN; status = xdr_decode(desc, entry, &stream); - if (status != 0) { - if (status == -EAGAIN) - status = 0; + if (status != 0) break; - } - - count++; if (desc->plus) nfs_prime_dcache(file_dentry(desc->file), entry, desc->dir_verifier); status = nfs_readdir_add_to_array(entry, page); - if (status != 0) - break; - } while (!entry->eof); + } while (!status && !entry->eof); -out_nopages: - if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { - nfs_readdir_page_set_eof(page); + switch (status) { + case -EBADCOOKIE: + if (entry->eof) { + nfs_readdir_page_set_eof(page); + status = 0; + } + break; + case -ENOSPC: + case -EAGAIN: status = 0; + break; } put_page(scratch); @@ -714,14 +710,15 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, if (status < 0) break; + pglen = status; - status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen); - if (status < 0) { - if (status == -ENOSPC) - status = 0; + if (pglen == 0) { + nfs_readdir_page_set_eof(page); break; } - } while (!nfs_readdir_array_is_full(array)); + + status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen); + } while (!status && !nfs_readdir_array_is_full(array)); nfs_readdir_free_pages(pages, array_size); out_release_array: -- cgit v1.2.3 From 1f1d4aa4e4bcb4721d3c51f4c07dda790b6accd9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 12:34:43 -0500 Subject: NFS: Clean up directory array handling Refactor to use pagecache_get_page() so that we can fill the page in multiple stages. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 138 +++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 77 insertions(+), 61 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 68acbde3f914..842f69120a01 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -149,7 +149,7 @@ typedef struct nfs_readdir_descriptor { struct file *file; struct page *page; struct dir_context *ctx; - unsigned long page_index; + pgoff_t page_index; u64 dir_cookie; u64 last_cookie; u64 dup_cookie; @@ -166,13 +166,18 @@ typedef struct nfs_readdir_descriptor { bool eof; } nfs_readdir_descriptor_t; -static -void nfs_readdir_init_array(struct page *page) +static void nfs_readdir_array_init(struct nfs_cache_array *array) +{ + memset(array, 0, sizeof(struct nfs_cache_array)); +} + +static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie) { struct nfs_cache_array *array; array = kmap_atomic(page); - memset(array, 0, sizeof(struct nfs_cache_array)); + nfs_readdir_array_init(array); + array->last_cookie = last_cookie; kunmap_atomic(array); } @@ -188,7 +193,7 @@ void nfs_readdir_clear_array(struct page *page) array = kmap_atomic(page); for (i = 0; i < array->size; i++) kfree(array->array[i].string.name); - array->size = 0; + nfs_readdir_array_init(array); kunmap_atomic(array); } @@ -268,6 +273,44 @@ out: return ret; } +static struct page *nfs_readdir_page_get_locked(struct address_space *mapping, + pgoff_t index, u64 last_cookie) +{ + struct page *page; + + page = grab_cache_page(mapping, index); + if (page && !PageUptodate(page)) { + nfs_readdir_page_init_array(page, last_cookie); + if (invalidate_inode_pages2_range(mapping, index + 1, -1) < 0) + nfs_zap_mapping(mapping->host, mapping); + SetPageUptodate(page); + } + + return page; +} + +static u64 nfs_readdir_page_last_cookie(struct page *page) +{ + struct nfs_cache_array *array; + u64 ret; + + array = kmap_atomic(page); + ret = array->last_cookie; + kunmap_atomic(array); + return ret; +} + +static bool nfs_readdir_page_needs_filling(struct page *page) +{ + struct nfs_cache_array *array; + bool ret; + + array = kmap_atomic(page); + ret = !nfs_readdir_array_is_full(array); + kunmap_atomic(array); + return ret; +} + static void nfs_readdir_page_set_eof(struct page *page) { struct nfs_cache_array *array; @@ -682,10 +725,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); - nfs_readdir_init_array(page); - entry.prev_cookie = 0; - entry.cookie = desc->last_cookie; + entry.cookie = nfs_readdir_page_last_cookie(page); entry.eof = 0; entry.fh = nfs_alloc_fhandle(); entry.fattr = nfs_alloc_fattr(); @@ -730,48 +771,25 @@ out: return status; } -/* - * Now we cache directories properly, by converting xdr information - * to an array that can be used for lookups later. This results in - * fewer cache pages, since we can store more information on each page. - * We only need to convert from xdr once so future lookups are much simpler - */ -static -int nfs_readdir_filler(void *data, struct page* page) +static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc) { - nfs_readdir_descriptor_t *desc = data; - struct inode *inode = file_inode(desc->file); - int ret; - - ret = nfs_readdir_xdr_to_array(desc, page, inode); - if (ret < 0) - goto error; - SetPageUptodate(page); - - if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) { - /* Should never happen */ - nfs_zap_mapping(inode, inode->i_mapping); - } - unlock_page(page); - return 0; - error: - nfs_readdir_clear_array(page); - unlock_page(page); - return ret; + put_page(desc->page); + desc->page = NULL; } -static -void cache_page_release(nfs_readdir_descriptor_t *desc) +static void +nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc) { - put_page(desc->page); - desc->page = NULL; + unlock_page(desc->page); + nfs_readdir_page_put(desc); } -static -struct page *get_cache_page(nfs_readdir_descriptor_t *desc) +static struct page * +nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc) { - return read_cache_page(desc->file->f_mapping, desc->page_index, - nfs_readdir_filler, desc); + return nfs_readdir_page_get_locked(desc->file->f_mapping, + desc->page_index, + desc->last_cookie); } /* @@ -785,23 +803,21 @@ int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) struct nfs_inode *nfsi = NFS_I(inode); int res; - desc->page = get_cache_page(desc); - if (IS_ERR(desc->page)) - return PTR_ERR(desc->page); - res = lock_page_killable(desc->page); - if (res != 0) - goto error; - res = -EAGAIN; - if (desc->page->mapping != NULL) { - res = nfs_readdir_search_array(desc); - if (res == 0) { - nfsi->page_index = desc->page_index; - return 0; - } + desc->page = nfs_readdir_page_get_cached(desc); + if (!desc->page) + return -ENOMEM; + if (nfs_readdir_page_needs_filling(desc->page)) { + res = nfs_readdir_xdr_to_array(desc, desc->page, inode); + if (res < 0) + goto error; + } + res = nfs_readdir_search_array(desc); + if (res == 0) { + nfsi->page_index = desc->page_index; + return 0; } - unlock_page(desc->page); error: - cache_page_release(desc); + nfs_readdir_page_unlock_and_put_cached(desc); return res; } @@ -896,6 +912,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) desc->page = page; desc->duped = 0; + nfs_readdir_page_init_array(page, desc->dir_cookie); status = nfs_readdir_xdr_to_array(desc, page, inode); if (status < 0) goto out_release; @@ -904,7 +921,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) out_release: nfs_readdir_clear_array(desc->page); - cache_page_release(desc); + nfs_readdir_page_put(desc); out: dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); @@ -976,8 +993,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) break; res = nfs_do_filldir(desc); - unlock_page(desc->page); - cache_page_release(desc); + nfs_readdir_page_unlock_and_put_cached(desc); if (res < 0) break; } while (!desc->eof); -- cgit v1.2.3 From 3b2a09f127e025674945e82c1ec0c88d6740280e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 13:14:10 -0500 Subject: NFS: Don't discard readdir results If a readdir call returns more data than we can fit into one page cache page, then allocate a new one for that data rather than discarding the data. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 842f69120a01..f7248145c333 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -320,6 +320,26 @@ static void nfs_readdir_page_set_eof(struct page *page) kunmap_atomic(array); } +static void nfs_readdir_page_unlock_and_put(struct page *page) +{ + unlock_page(page); + put_page(page); +} + +static struct page *nfs_readdir_page_get_next(struct address_space *mapping, + pgoff_t index, u64 cookie) +{ + struct page *page; + + page = nfs_readdir_page_get_locked(mapping, index, cookie); + if (page) { + if (nfs_readdir_page_last_cookie(page) == cookie) + return page; + nfs_readdir_page_unlock_and_put(page); + } + return NULL; +} + static inline int is_32bit_api(void) { @@ -637,13 +657,15 @@ out: } /* Perform conversion from xdr to cache array */ -static -int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, - struct page **xdr_pages, struct page *page, unsigned int buflen) +static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, + struct nfs_entry *entry, + struct page **xdr_pages, + struct page *fillme, unsigned int buflen) { + struct address_space *mapping = desc->file->f_mapping; struct xdr_stream stream; struct xdr_buf buf; - struct page *scratch; + struct page *scratch, *new, *page = fillme; int status; scratch = alloc_page(GFP_KERNEL); @@ -666,6 +688,19 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en desc->dir_verifier); status = nfs_readdir_add_to_array(entry, page); + if (status != -ENOSPC) + continue; + + if (page->mapping != mapping) + break; + new = nfs_readdir_page_get_next(mapping, page->index + 1, + entry->prev_cookie); + if (!new) + break; + if (page != fillme) + nfs_readdir_page_unlock_and_put(page); + page = new; + status = nfs_readdir_add_to_array(entry, page); } while (!status && !entry->eof); switch (status) { @@ -681,6 +716,9 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en break; } + if (page != fillme) + nfs_readdir_page_unlock_and_put(page); + put_page(scratch); return status; } -- cgit v1.2.3 From e762a639816015a70bb1af8ea4baf54f4facb591 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 13:31:20 -0500 Subject: NFS: Remove unnecessary kmap in nfs_readdir_xdr_to_array() The kmapped pointer is only used once per loop to check if we need to exit. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index f7248145c333..e8b0fcc1bc9e 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -759,7 +759,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct page *pages[NFS_MAX_READDIR_PAGES]; struct nfs_entry entry; struct file *file = desc->file; - struct nfs_cache_array *array; int status = -ENOMEM; unsigned int array_size = ARRAY_SIZE(pages); @@ -778,11 +777,9 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, goto out; } - array = kmap(page); - status = nfs_readdir_alloc_pages(pages, array_size); if (status < 0) - goto out_release_array; + goto out_release_label; do { unsigned int pglen; status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); @@ -797,11 +794,10 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, } status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen); - } while (!status && !nfs_readdir_array_is_full(array)); + } while (!status && nfs_readdir_page_needs_filling(page)); nfs_readdir_free_pages(pages, array_size); -out_release_array: - kunmap(page); +out_release_label: nfs4_label_free(entry.label); out: nfs_free_fattr(entry.fattr); -- cgit v1.2.3 From ed09222d651dbd30e707f96180628229146b885c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 13:34:32 -0500 Subject: NFS: Replace kmap() with kmap_atomic() in nfs_readdir_search_array() Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e8b0fcc1bc9e..b9001123ec84 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -447,7 +447,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) struct nfs_cache_array *array; int status; - array = kmap(desc->page); + array = kmap_atomic(desc->page); if (desc->dir_cookie == 0) status = nfs_readdir_search_for_pos(array, desc); @@ -459,7 +459,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) desc->current_index += array->size; desc->page_index++; } - kunmap(desc->page); + kunmap_atomic(array); return status; } -- cgit v1.2.3 From a52a8a6adad99e1162c27f70cd6495626a48064d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 19:17:29 -0500 Subject: NFS: Simplify struct nfs_cache_array_entry We don't need to store a hash, so replace struct qstr with a simple const char pointer and length. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b9001123ec84..be0e2891fecc 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -133,7 +133,8 @@ nfs_closedir(struct inode *inode, struct file *filp) struct nfs_cache_array_entry { u64 cookie; u64 ino; - struct qstr string; + const char *name; + unsigned int name_len; unsigned char d_type; }; @@ -192,7 +193,7 @@ void nfs_readdir_clear_array(struct page *page) array = kmap_atomic(page); for (i = 0; i < array->size; i++) - kfree(array->array[i].string.name); + kfree(array->array[i].name); nfs_readdir_array_init(array); kunmap_atomic(array); } @@ -213,20 +214,17 @@ static bool nfs_readdir_array_is_full(struct nfs_cache_array *array) * when called by nfs_readdir_add_to_array, the strings will be freed in * nfs_clear_readdir_array() */ -static -int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len) +static const char *nfs_readdir_copy_name(const char *name, unsigned int len) { - string->len = len; - string->name = kmemdup_nul(name, len, GFP_KERNEL); - if (string->name == NULL) - return -ENOMEM; + const char *ret = kmemdup_nul(name, len, GFP_KERNEL); + /* * Avoid a kmemleak false positive. The pointer to the name is stored * in a page cache page which kmemleak does not scan. */ - kmemleak_not_leak(string->name); - string->hash = full_name_hash(NULL, name, len); - return 0; + if (ret != NULL) + kmemleak_not_leak(ret); + return ret; } /* @@ -249,27 +247,34 @@ static int nfs_readdir_array_can_expand(struct nfs_cache_array *array) static int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) { - struct nfs_cache_array *array = kmap(page); + struct nfs_cache_array *array; struct nfs_cache_array_entry *cache_entry; + const char *name; int ret; + name = nfs_readdir_copy_name(entry->name, entry->len); + if (!name) + return -ENOMEM; + + array = kmap_atomic(page); ret = nfs_readdir_array_can_expand(array); - if (ret) + if (ret) { + kfree(name); goto out; + } cache_entry = &array->array[array->size]; cache_entry->cookie = entry->prev_cookie; cache_entry->ino = entry->ino; cache_entry->d_type = entry->d_type; - ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len); - if (ret) - goto out; + cache_entry->name_len = entry->len; + cache_entry->name = name; array->last_cookie = entry->cookie; array->size++; if (entry->eof != 0) nfs_readdir_array_set_eof(array); out: - kunmap(page); + kunmap_atomic(array); return ret; } @@ -413,9 +418,8 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des if (printk_ratelimit()) { pr_notice("NFS: directory %pD2 contains a readdir loop." "Please contact your server vendor. " - "The file: %.*s has duplicate cookie %llu\n", - desc->file, array->array[i].string.len, - array->array[i].string.name, desc->dir_cookie); + "The file: %s has duplicate cookie %llu\n", + desc->file, array->array[i].name, desc->dir_cookie); } status = -ELOOP; goto out; @@ -888,7 +892,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) struct nfs_cache_array_entry *ent; ent = &array->array[i]; - if (!dir_emit(desc->ctx, ent->string.name, ent->string.len, + if (!dir_emit(desc->ctx, ent->name, ent->name_len, nfs_compat_user_ino64(ent->ino), ent->d_type)) { desc->eof = true; break; -- cgit v1.2.3 From 1a34c8c9a49ee10ccaf91091ddd98c25e4d567dd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 14:26:47 -0500 Subject: NFS: Support larger readdir buffers Support readdir buffers of up to 1MB in size so that we can read large directories using few RPC calls. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/client.c | 4 ++-- fs/nfs/dir.c | 33 +++++++++++++++++++-------------- fs/nfs/internal.h | 6 ------ 3 files changed, 21 insertions(+), 22 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index 4b8cc93913f7..f6454ba53d05 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -781,8 +781,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server, server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); server->dtsize = nfs_block_size(fsinfo->dtpref, NULL); - if (server->dtsize > PAGE_SIZE * NFS_MAX_READDIR_PAGES) - server->dtsize = PAGE_SIZE * NFS_MAX_READDIR_PAGES; + if (server->dtsize > NFS_MAX_FILE_IO_SIZE) + server->dtsize = NFS_MAX_FILE_IO_SIZE; if (server->dtsize > server->rsize) server->dtsize = server->rsize; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index be0e2891fecc..438906dae083 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -727,44 +727,47 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, return status; } -static -void nfs_readdir_free_pages(struct page **pages, unsigned int npages) +static void nfs_readdir_free_pages(struct page **pages, size_t npages) { - unsigned int i; - for (i = 0; i < npages; i++) - put_page(pages[i]); + while (npages--) + put_page(pages[npages]); + kfree(pages); } /* * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call * to nfs_readdir_free_pages() */ -static -int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages) +static struct page **nfs_readdir_alloc_pages(size_t npages) { - unsigned int i; + struct page **pages; + size_t i; + pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL); + if (!pages) + return NULL; for (i = 0; i < npages; i++) { struct page *page = alloc_page(GFP_KERNEL); if (page == NULL) goto out_freepages; pages[i] = page; } - return 0; + return pages; out_freepages: nfs_readdir_free_pages(pages, i); - return -ENOMEM; + return NULL; } static int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) { - struct page *pages[NFS_MAX_READDIR_PAGES]; + struct page **pages; struct nfs_entry entry; struct file *file = desc->file; + size_t array_size; + size_t dtsize = NFS_SERVER(inode)->dtsize; int status = -ENOMEM; - unsigned int array_size = ARRAY_SIZE(pages); entry.prev_cookie = 0; entry.cookie = nfs_readdir_page_last_cookie(page); @@ -781,9 +784,11 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, goto out; } - status = nfs_readdir_alloc_pages(pages, array_size); - if (status < 0) + array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT; + pages = nfs_readdir_alloc_pages(array_size); + if (!pages) goto out_release_label; + do { unsigned int pglen; status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 6673a77884d9..b840d0a91c9d 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -56,12 +56,6 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry) #define NFS_UNSPEC_RETRANS (UINT_MAX) #define NFS_UNSPEC_TIMEO (UINT_MAX) -/* - * Maximum number of pages that readdir can use for creating - * a vmapped array of pages. - */ -#define NFS_MAX_READDIR_PAGES 8 - struct nfs_client_initdata { unsigned long init_flags; const char *hostname; /* Hostname of the server */ -- cgit v1.2.3 From 93b8959a0a8cf1b1a493efee9e8328681e111862 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 15:24:41 -0500 Subject: NFS: More readdir cleanups Remove the redundant caching of the credential in struct nfs_open_dir_context. Pass the buffer size as an argument to nfs_readdir_xdr_filler(). Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 438906dae083..bc366bd8e8f3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -68,7 +68,7 @@ const struct address_space_operations nfs_dir_aops = { .freepage = nfs_readdir_clear_array, }; -static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, const struct cred *cred) +static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir) { struct nfs_inode *nfsi = NFS_I(dir); struct nfs_open_dir_context *ctx; @@ -78,7 +78,6 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir ctx->attr_gencount = nfsi->attr_gencount; ctx->dir_cookie = 0; ctx->dup_cookie = 0; - ctx->cred = get_cred(cred); spin_lock(&dir->i_lock); if (list_empty(&nfsi->open_files) && (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER)) @@ -96,7 +95,6 @@ static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_cont spin_lock(&dir->i_lock); list_del(&ctx->list); spin_unlock(&dir->i_lock); - put_cred(ctx->cred); kfree(ctx); } @@ -113,7 +111,7 @@ nfs_opendir(struct inode *inode, struct file *filp) nfs_inc_stats(inode, NFSIOS_VFSOPEN); - ctx = alloc_nfs_open_dir_context(inode, current_cred()); + ctx = alloc_nfs_open_dir_context(inode); if (IS_ERR(ctx)) { res = PTR_ERR(ctx); goto out; @@ -468,12 +466,12 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) } /* Fill a page with xdr information before transferring to the cache page */ -static -int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, - struct nfs_entry *entry, struct file *file, struct inode *inode) +static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc, + u64 cookie, struct page **pages, + size_t bufsize) { - struct nfs_open_dir_context *ctx = file->private_data; - const struct cred *cred = ctx->cred; + struct file *file = desc->file; + struct inode *inode = file_inode(file); unsigned long timestamp, gencount; int error; @@ -481,8 +479,8 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, timestamp = jiffies; gencount = nfs_inc_attr_generation_counter(); desc->dir_verifier = nfs_save_change_attribute(inode); - error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages, - NFS_SERVER(inode)->dtsize, desc->plus); + error = NFS_PROTO(inode)->readdir(file_dentry(file), file->f_cred, + cookie, pages, bufsize, desc->plus); if (error < 0) { /* We requested READDIRPLUS, but the server doesn't grok it */ if (error == -ENOTSUPP && desc->plus) { @@ -764,7 +762,6 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, { struct page **pages; struct nfs_entry entry; - struct file *file = desc->file; size_t array_size; size_t dtsize = NFS_SERVER(inode)->dtsize; int status = -ENOMEM; @@ -791,8 +788,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, do { unsigned int pglen; - status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode); - + status = nfs_readdir_xdr_filler(desc, entry.cookie, + pages, dtsize); if (status < 0) break; -- cgit v1.2.3 From dbeaf8c984ca689c2c0966c41bd78dee178b5dfe Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 1 Nov 2020 18:20:03 -0500 Subject: NFS: nfs_do_filldir() does not return a value Clean up nfs_do_filldir(). Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index bc366bd8e8f3..48856cee10de 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -881,13 +881,11 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) /* * Once we've found the start of the dirent within a page: fill 'er up... */ -static -int nfs_do_filldir(nfs_readdir_descriptor_t *desc) +static void nfs_do_filldir(struct nfs_readdir_descriptor *desc) { struct file *file = desc->file; - int i = 0; - int res = 0; - struct nfs_cache_array *array = NULL; + struct nfs_cache_array *array; + unsigned int i = 0; array = kmap(desc->page); for (i = desc->cache_entry_index; i < array->size; i++) { @@ -914,9 +912,8 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc) desc->eof = true; kunmap(desc->page); - dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", - (unsigned long long)desc->dir_cookie, res); - return res; + dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n", + (unsigned long long)desc->dir_cookie); } /* @@ -957,7 +954,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc) if (status < 0) goto out_release; - status = nfs_do_filldir(desc); + nfs_do_filldir(desc); out_release: nfs_readdir_clear_array(desc->page); @@ -1032,10 +1029,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) if (res < 0) break; - res = nfs_do_filldir(desc); + nfs_do_filldir(desc); nfs_readdir_page_unlock_and_put_cached(desc); - if (res < 0) - break; } while (!desc->eof); spin_lock(&file->f_lock); @@ -1046,8 +1041,6 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) spin_unlock(&file->f_lock); out: - if (res > 0) - res = 0; dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); return res; } -- cgit v1.2.3 From 6b75cf9e309d18664f964889ac026096ba0d1919 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Nov 2020 08:55:03 -0500 Subject: NFS: Reduce readdir stack usage The descriptor and the struct nfs_entry are both large structures, so don't allocate them from the stack. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 58 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 33 insertions(+), 25 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 48856cee10de..c3af54640f6c 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -761,23 +761,24 @@ static int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) { struct page **pages; - struct nfs_entry entry; + struct nfs_entry *entry; size_t array_size; size_t dtsize = NFS_SERVER(inode)->dtsize; int status = -ENOMEM; - entry.prev_cookie = 0; - entry.cookie = nfs_readdir_page_last_cookie(page); - entry.eof = 0; - entry.fh = nfs_alloc_fhandle(); - entry.fattr = nfs_alloc_fattr(); - entry.server = NFS_SERVER(inode); - if (entry.fh == NULL || entry.fattr == NULL) + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return -ENOMEM; + entry->cookie = nfs_readdir_page_last_cookie(page); + entry->fh = nfs_alloc_fhandle(); + entry->fattr = nfs_alloc_fattr(); + entry->server = NFS_SERVER(inode); + if (entry->fh == NULL || entry->fattr == NULL) goto out; - entry.label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); - if (IS_ERR(entry.label)) { - status = PTR_ERR(entry.label); + entry->label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT); + if (IS_ERR(entry->label)) { + status = PTR_ERR(entry->label); goto out; } @@ -788,7 +789,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, do { unsigned int pglen; - status = nfs_readdir_xdr_filler(desc, entry.cookie, + status = nfs_readdir_xdr_filler(desc, entry->cookie, pages, dtsize); if (status < 0) break; @@ -799,15 +800,16 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, break; } - status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen); + status = nfs_readdir_page_filler(desc, entry, pages, page, pglen); } while (!status && nfs_readdir_page_needs_filling(page)); nfs_readdir_free_pages(pages, array_size); out_release_label: - nfs4_label_free(entry.label); + nfs4_label_free(entry->label); out: - nfs_free_fattr(entry.fattr); - nfs_free_fhandle(entry.fh); + nfs_free_fattr(entry->fattr); + nfs_free_fhandle(entry->fh); + kfree(entry); return status; } @@ -974,13 +976,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) struct dentry *dentry = file_dentry(file); struct inode *inode = d_inode(dentry); struct nfs_open_dir_context *dir_ctx = file->private_data; - nfs_readdir_descriptor_t my_desc = { - .file = file, - .ctx = ctx, - .plus = nfs_use_readdirplus(inode, ctx), - }, - *desc = &my_desc; - int res = 0; + struct nfs_readdir_descriptor *desc; + int res; dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n", file, (long long)ctx->pos); @@ -992,10 +989,19 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) * to either find the entry with the appropriate number or * revalidate the cookie. */ - if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) + if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) { res = nfs_revalidate_mapping(inode, file->f_mapping); - if (res < 0) + if (res < 0) + goto out; + } + + res = -ENOMEM; + desc = kzalloc(sizeof(*desc), GFP_KERNEL); + if (!desc) goto out; + desc->file = file; + desc->ctx = ctx; + desc->plus = nfs_use_readdirplus(inode, ctx); spin_lock(&file->f_lock); desc->dir_cookie = dir_ctx->dir_cookie; @@ -1040,6 +1046,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) dir_ctx->attr_gencount = desc->attr_gencount; spin_unlock(&file->f_lock); + kfree(desc); + out: dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res); return res; -- cgit v1.2.3 From 6c981eff23b894ce429281dc45a5589359eef2c1 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 3 Nov 2020 07:42:04 -0500 Subject: NFS: Cleanup to remove nfs_readdir_descriptor_t typedef Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index c3af54640f6c..b226f6f3ae96 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -144,7 +144,7 @@ struct nfs_cache_array { struct nfs_cache_array_entry array[]; }; -typedef struct nfs_readdir_descriptor { +struct nfs_readdir_descriptor { struct file *file; struct page *page; struct dir_context *ctx; @@ -163,7 +163,7 @@ typedef struct nfs_readdir_descriptor { signed char duped; bool plus; bool eof; -} nfs_readdir_descriptor_t; +}; static void nfs_readdir_array_init(struct nfs_cache_array *array) { @@ -362,8 +362,8 @@ bool nfs_readdir_use_cookie(const struct file *filp) return true; } -static -int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) +static int nfs_readdir_search_for_pos(struct nfs_cache_array *array, + struct nfs_readdir_descriptor *desc) { loff_t diff = desc->ctx->pos - desc->current_index; unsigned int index; @@ -394,8 +394,8 @@ nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi) return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags); } -static -int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) +static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, + struct nfs_readdir_descriptor *desc) { int i; loff_t new_pos; @@ -443,8 +443,7 @@ out: return status; } -static -int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc) +static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc) { struct nfs_cache_array *array; int status; @@ -497,7 +496,7 @@ error: return error; } -static int xdr_decode(nfs_readdir_descriptor_t *desc, +static int xdr_decode(struct nfs_readdir_descriptor *desc, struct nfs_entry *entry, struct xdr_stream *xdr) { struct inode *inode = file_inode(desc->file); @@ -757,8 +756,8 @@ out_freepages: return NULL; } -static -int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode) +static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, + struct page *page, struct inode *inode) { struct page **pages; struct nfs_entry *entry; @@ -838,8 +837,7 @@ nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc) * Returns 0 if desc->dir_cookie was found on page desc->page_index * and locks the page to prevent removal from the page cache. */ -static -int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc) +static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) { struct inode *inode = file_inode(desc->file); struct nfs_inode *nfsi = NFS_I(inode); @@ -864,8 +862,7 @@ error: } /* Search for desc->dir_cookie from the beginning of the page cache */ -static inline -int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) +static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc) { int res; @@ -930,8 +927,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc) * we should already have a complete representation of the * directory in the page cache by the time we get here. */ -static inline -int uncached_readdir(nfs_readdir_descriptor_t *desc) +static int uncached_readdir(struct nfs_readdir_descriptor *desc) { struct page *page = NULL; int status; -- cgit v1.2.3 From 82e22a5e6245873779db1607d3b0fec6f9ca07d0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Nov 2020 17:34:23 -0500 Subject: NFS: Allow the NFS generic code to pass in a verifier to readdir If we're ever going to allow support for servers that use the readdir verifier, then that use needs to be managed by the middle layers as those need to be able to reject cookies from other verifiers. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 23 ++++++++++++++++++----- fs/nfs/nfs3proc.c | 35 +++++++++++++++++------------------ fs/nfs/nfs4proc.c | 36 +++++++++++++++++------------------- fs/nfs/proc.c | 18 +++++++++--------- 4 files changed, 61 insertions(+), 51 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b226f6f3ae96..3ee0668a9719 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -469,8 +469,20 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc, u64 cookie, struct page **pages, size_t bufsize) { - struct file *file = desc->file; - struct inode *inode = file_inode(file); + struct inode *inode = file_inode(desc->file); + __be32 verf_res[2]; + struct nfs_readdir_arg arg = { + .dentry = file_dentry(desc->file), + .cred = desc->file->f_cred, + .verf = NFS_I(inode)->cookieverf, + .cookie = cookie, + .pages = pages, + .page_len = bufsize, + .plus = desc->plus, + }; + struct nfs_readdir_res res = { + .verf = verf_res, + }; unsigned long timestamp, gencount; int error; @@ -478,20 +490,21 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc, timestamp = jiffies; gencount = nfs_inc_attr_generation_counter(); desc->dir_verifier = nfs_save_change_attribute(inode); - error = NFS_PROTO(inode)->readdir(file_dentry(file), file->f_cred, - cookie, pages, bufsize, desc->plus); + error = NFS_PROTO(inode)->readdir(&arg, &res); if (error < 0) { /* We requested READDIRPLUS, but the server doesn't grok it */ if (error == -ENOTSUPP && desc->plus) { NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS; clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); - desc->plus = false; + desc->plus = arg.plus = false; goto again; } goto error; } desc->timestamp = timestamp; desc->gencount = gencount; + memcpy(NFS_I(inode)->cookieverf, res.verf, + sizeof(NFS_I(inode)->cookieverf)); error: return error; } diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c index 6b66b73a50eb..5c4e23abc345 100644 --- a/fs/nfs/nfs3proc.c +++ b/fs/nfs/nfs3proc.c @@ -662,37 +662,36 @@ out: * Also note that this implementation handles both plain readdir and * readdirplus. */ -static int -nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred, - u64 cookie, struct page **pages, unsigned int count, bool plus) +static int nfs3_proc_readdir(struct nfs_readdir_arg *nr_arg, + struct nfs_readdir_res *nr_res) { - struct inode *dir = d_inode(dentry); - __be32 *verf = NFS_I(dir)->cookieverf; + struct inode *dir = d_inode(nr_arg->dentry); struct nfs3_readdirargs arg = { .fh = NFS_FH(dir), - .cookie = cookie, - .verf = {verf[0], verf[1]}, - .plus = plus, - .count = count, - .pages = pages + .cookie = nr_arg->cookie, + .plus = nr_arg->plus, + .count = nr_arg->page_len, + .pages = nr_arg->pages }; struct nfs3_readdirres res = { - .verf = verf, - .plus = plus + .verf = nr_res->verf, + .plus = nr_arg->plus, }; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_READDIR], .rpc_argp = &arg, .rpc_resp = &res, - .rpc_cred = cred, + .rpc_cred = nr_arg->cred, }; int status = -ENOMEM; - if (plus) + if (nr_arg->plus) msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS]; + if (arg.cookie) + memcpy(arg.verf, nr_arg->verf, sizeof(arg.verf)); - dprintk("NFS call readdir%s %d\n", - plus? "plus" : "", (unsigned int) cookie); + dprintk("NFS call readdir%s %llu\n", nr_arg->plus ? "plus" : "", + (unsigned long long)nr_arg->cookie); res.dir_attr = nfs_alloc_fattr(); if (res.dir_attr == NULL) @@ -705,8 +704,8 @@ nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred, nfs_free_fattr(res.dir_attr); out: - dprintk("NFS reply readdir%s: %d\n", - plus? "plus" : "", status); + dprintk("NFS reply readdir%s: %d\n", nr_arg->plus ? "plus" : "", + status); return status; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 66f1f4a5c74c..adcaba68eaed 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4961,41 +4961,40 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry, return err; } -static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, - u64 cookie, struct page **pages, unsigned int count, bool plus) +static int _nfs4_proc_readdir(struct nfs_readdir_arg *nr_arg, + struct nfs_readdir_res *nr_res) { - struct inode *dir = d_inode(dentry); + struct inode *dir = d_inode(nr_arg->dentry); struct nfs_server *server = NFS_SERVER(dir); struct nfs4_readdir_arg args = { .fh = NFS_FH(dir), - .pages = pages, + .pages = nr_arg->pages, .pgbase = 0, - .count = count, - .plus = plus, + .count = nr_arg->page_len, + .plus = nr_arg->plus, }; struct nfs4_readdir_res res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR], .rpc_argp = &args, .rpc_resp = &res, - .rpc_cred = cred, + .rpc_cred = nr_arg->cred, }; int status; - dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__, - dentry, - (unsigned long long)cookie); + dprintk("%s: dentry = %pd2, cookie = %llu\n", __func__, + nr_arg->dentry, (unsigned long long)nr_arg->cookie); if (!(server->caps & NFS_CAP_SECURITY_LABEL)) args.bitmask = server->attr_bitmask_nl; else args.bitmask = server->attr_bitmask; - nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args); + nfs4_setup_readdir(nr_arg->cookie, nr_arg->verf, nr_arg->dentry, &args); res.pgbase = args.pgbase; status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); if (status >= 0) { - memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE); + memcpy(nr_res->verf, res.verifier.data, NFS4_VERIFIER_SIZE); status += args.pgbase; } @@ -5005,19 +5004,18 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, return status; } -static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred, - u64 cookie, struct page **pages, unsigned int count, bool plus) +static int nfs4_proc_readdir(struct nfs_readdir_arg *arg, + struct nfs_readdir_res *res) { struct nfs4_exception exception = { .interruptible = true, }; int err; do { - err = _nfs4_proc_readdir(dentry, cred, cookie, - pages, count, plus); - trace_nfs4_readdir(d_inode(dentry), err); - err = nfs4_handle_exception(NFS_SERVER(d_inode(dentry)), err, - &exception); + err = _nfs4_proc_readdir(arg, res); + trace_nfs4_readdir(d_inode(arg->dentry), err); + err = nfs4_handle_exception(NFS_SERVER(d_inode(arg->dentry)), + err, &exception); } while (exception.retry); return err; } diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c index 15c865cc837f..73ab7c59d3a7 100644 --- a/fs/nfs/proc.c +++ b/fs/nfs/proc.c @@ -499,26 +499,26 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name) * sure it is syntactically correct; the entries itself are decoded * from nfs_readdir by calling the decode_entry function directly. */ -static int -nfs_proc_readdir(struct dentry *dentry, const struct cred *cred, - u64 cookie, struct page **pages, unsigned int count, bool plus) +static int nfs_proc_readdir(struct nfs_readdir_arg *nr_arg, + struct nfs_readdir_res *nr_res) { - struct inode *dir = d_inode(dentry); + struct inode *dir = d_inode(nr_arg->dentry); struct nfs_readdirargs arg = { .fh = NFS_FH(dir), - .cookie = cookie, - .count = count, - .pages = pages, + .cookie = nr_arg->cookie, + .count = nr_arg->page_len, + .pages = nr_arg->pages, }; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_READDIR], .rpc_argp = &arg, - .rpc_cred = cred, + .rpc_cred = nr_arg->cred, }; int status; - dprintk("NFS call readdir %d\n", (unsigned int)cookie); + dprintk("NFS call readdir %llu\n", (unsigned long long)nr_arg->cookie); status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + nr_res->verf[0] = nr_res->verf[1] = 0; nfs_invalidate_atime(dir); -- cgit v1.2.3 From 9fff59ed4c4d239125f8529a9971c46defd2e2b0 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Nov 2020 20:11:32 -0500 Subject: NFS: Handle NFS4ERR_NOT_SAME and NFSERR_BADCOOKIE from readdir calls If the server returns NFS4ERR_NOT_SAME or tells us that the cookie is bad in response to a READDIR call, then we should empty the page cache so that we can fill it from scratch again. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 24 ++++++++++++++++-------- fs/nfs/nfs4proc.c | 2 ++ 2 files changed, 18 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3ee0668a9719..3b44bef3a1b4 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -861,15 +861,21 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) return -ENOMEM; if (nfs_readdir_page_needs_filling(desc->page)) { res = nfs_readdir_xdr_to_array(desc, desc->page, inode); - if (res < 0) - goto error; + if (res < 0) { + nfs_readdir_page_unlock_and_put_cached(desc); + if (res == -EBADCOOKIE || res == -ENOTSYNC) { + invalidate_inode_pages2(desc->file->f_mapping); + desc->page_index = 0; + return -EAGAIN; + } + return res; + } } res = nfs_readdir_search_array(desc); if (res == 0) { nfsi->page_index = desc->page_index; return 0; } -error: nfs_readdir_page_unlock_and_put_cached(desc); return res; } @@ -879,12 +885,12 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc) { int res; - if (desc->page_index == 0) { - desc->current_index = 0; - desc->prev_index = 0; - desc->last_cookie = 0; - } do { + if (desc->page_index == 0) { + desc->current_index = 0; + desc->prev_index = 0; + desc->last_cookie = 0; + } res = find_and_lock_cache_page(desc); } while (res == -EAGAIN); return res; @@ -1030,6 +1036,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) res = uncached_readdir(desc); if (res == 0) continue; + if (res == -EBADCOOKIE || res == -ENOTSYNC) + res = 0; } break; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index adcaba68eaed..7ab40d0e6a74 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -184,6 +184,8 @@ static int nfs4_map_errors(int err) return -EPROTONOSUPPORT; case -NFS4ERR_FILE_OPEN: return -EBUSY; + case -NFS4ERR_NOT_SAME: + return -ENOTSYNC; default: dprintk("%s could not handle NFSv4 error %d\n", __func__, -err); -- cgit v1.2.3 From b593c09f83a2732a0f0298c8f3468236a83cdd9f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 2 Nov 2020 20:06:12 -0500 Subject: NFS: Improve handling of directory verifiers If the server insists on using the readdir verifiers in order to allow cookies to expire, then we should ensure that we cache the verifier with the cookie, so that we can return an error if the application tries to use the expired cookie. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 35 +++++++++++++++++++++++------------ fs/nfs/inode.c | 7 ------- 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 3b44bef3a1b4..454377228167 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -155,6 +155,7 @@ struct nfs_readdir_descriptor { loff_t current_index; loff_t prev_index; + __be32 verf[NFS_DIR_VERIFIER_SIZE]; unsigned long dir_verifier; unsigned long timestamp; unsigned long gencount; @@ -466,15 +467,15 @@ static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc) /* Fill a page with xdr information before transferring to the cache page */ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc, - u64 cookie, struct page **pages, - size_t bufsize) + __be32 *verf, u64 cookie, + struct page **pages, size_t bufsize, + __be32 *verf_res) { struct inode *inode = file_inode(desc->file); - __be32 verf_res[2]; struct nfs_readdir_arg arg = { .dentry = file_dentry(desc->file), .cred = desc->file->f_cred, - .verf = NFS_I(inode)->cookieverf, + .verf = verf, .cookie = cookie, .pages = pages, .page_len = bufsize, @@ -503,8 +504,6 @@ static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc, } desc->timestamp = timestamp; desc->gencount = gencount; - memcpy(NFS_I(inode)->cookieverf, res.verf, - sizeof(NFS_I(inode)->cookieverf)); error: return error; } @@ -770,11 +769,13 @@ out_freepages: } static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, - struct page *page, struct inode *inode) + struct page *page, __be32 *verf_arg, + __be32 *verf_res) { struct page **pages; struct nfs_entry *entry; size_t array_size; + struct inode *inode = file_inode(desc->file); size_t dtsize = NFS_SERVER(inode)->dtsize; int status = -ENOMEM; @@ -801,8 +802,9 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, do { unsigned int pglen; - status = nfs_readdir_xdr_filler(desc, entry->cookie, - pages, dtsize); + status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, + pages, dtsize, + verf_res); if (status < 0) break; @@ -854,13 +856,15 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) { struct inode *inode = file_inode(desc->file); struct nfs_inode *nfsi = NFS_I(inode); + __be32 verf[NFS_DIR_VERIFIER_SIZE]; int res; desc->page = nfs_readdir_page_get_cached(desc); if (!desc->page) return -ENOMEM; if (nfs_readdir_page_needs_filling(desc->page)) { - res = nfs_readdir_xdr_to_array(desc, desc->page, inode); + res = nfs_readdir_xdr_to_array(desc, desc->page, + nfsi->cookieverf, verf); if (res < 0) { nfs_readdir_page_unlock_and_put_cached(desc); if (res == -EBADCOOKIE || res == -ENOTSYNC) { @@ -870,6 +874,7 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) } return res; } + memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf)); } res = nfs_readdir_search_array(desc); if (res == 0) { @@ -902,6 +907,7 @@ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc) static void nfs_do_filldir(struct nfs_readdir_descriptor *desc) { struct file *file = desc->file; + struct nfs_inode *nfsi = NFS_I(file_inode(file)); struct nfs_cache_array *array; unsigned int i = 0; @@ -915,6 +921,7 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc) desc->eof = true; break; } + memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf)); if (i < (array->size-1)) desc->dir_cookie = array->array[i+1].cookie; else @@ -949,8 +956,8 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc) static int uncached_readdir(struct nfs_readdir_descriptor *desc) { struct page *page = NULL; + __be32 verf[NFS_DIR_VERIFIER_SIZE]; int status; - struct inode *inode = file_inode(desc->file); dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)desc->dir_cookie); @@ -967,7 +974,7 @@ static int uncached_readdir(struct nfs_readdir_descriptor *desc) desc->duped = 0; nfs_readdir_page_init_array(page, desc->dir_cookie); - status = nfs_readdir_xdr_to_array(desc, page, inode); + status = nfs_readdir_xdr_to_array(desc, page, desc->verf, verf); if (status < 0) goto out_release; @@ -1023,6 +1030,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) desc->dup_cookie = dir_ctx->dup_cookie; desc->duped = dir_ctx->duped; desc->attr_gencount = dir_ctx->attr_gencount; + memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf)); spin_unlock(&file->f_lock); do { @@ -1061,6 +1069,7 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx) dir_ctx->dup_cookie = desc->dup_cookie; dir_ctx->duped = desc->duped; dir_ctx->attr_gencount = desc->attr_gencount; + memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf)); spin_unlock(&file->f_lock); kfree(desc); @@ -1101,6 +1110,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) dir_ctx->dir_cookie = offset; else dir_ctx->dir_cookie = 0; + if (offset == 0) + memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf)); dir_ctx->duped = 0; } spin_unlock(&filp->f_lock); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index aa6493905bbe..9b765a900b28 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -229,7 +229,6 @@ static void nfs_zap_caches_locked(struct inode *inode) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf)); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA @@ -1237,7 +1236,6 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode); static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping) { - struct nfs_inode *nfsi = NFS_I(inode); int ret; if (mapping->nrpages != 0) { @@ -1250,11 +1248,6 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map if (ret < 0) return ret; } - if (S_ISDIR(inode->i_mode)) { - spin_lock(&inode->i_lock); - memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - spin_unlock(&inode->i_lock); - } nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); nfs_fscache_wait_on_invalidate(inode); -- cgit v1.2.3 From 762567b7c798afd08c22811ecfc66885a2b50f91 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 4 Nov 2020 08:32:19 -0500 Subject: NFS: Optimisations for monotonically increasing readdir cookies If the server is handing out monotonically increasing readdir cookie values, then we can optimise away searches through pages that contain cookies that lie outside our search range. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 454377228167..b6c3501e8f61 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -140,7 +140,8 @@ struct nfs_cache_array { u64 last_cookie; unsigned int size; unsigned char page_full : 1, - page_is_eof : 1; + page_is_eof : 1, + cookies_are_ordered : 1; struct nfs_cache_array_entry array[]; }; @@ -178,6 +179,7 @@ static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie) array = kmap_atomic(page); nfs_readdir_array_init(array); array->last_cookie = last_cookie; + array->cookies_are_ordered = 1; kunmap_atomic(array); } @@ -269,6 +271,8 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) cache_entry->name_len = entry->len; cache_entry->name = name; array->last_cookie = entry->cookie; + if (array->last_cookie <= cache_entry->cookie) + array->cookies_are_ordered = 0; array->size++; if (entry->eof != 0) nfs_readdir_array_set_eof(array); @@ -395,6 +399,19 @@ nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi) return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags); } +static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array, + u64 cookie) +{ + if (!array->cookies_are_ordered) + return true; + /* Optimisation for monotonically increasing cookies */ + if (cookie >= array->last_cookie) + return false; + if (array->size && cookie < array->array[0].cookie) + return false; + return true; +} + static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, struct nfs_readdir_descriptor *desc) { @@ -402,6 +419,9 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, loff_t new_pos; int status = -EAGAIN; + if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie)) + goto check_eof; + for (i = 0; i < array->size; i++) { if (array->array[i].cookie == desc->dir_cookie) { struct nfs_inode *nfsi = NFS_I(file_inode(desc->file)); @@ -435,6 +455,7 @@ static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, return 0; } } +check_eof: if (array->page_is_eof) { status = -EBADCOOKIE; if (desc->dir_cookie == array->last_cookie) -- cgit v1.2.3 From 35df59d3ef693292840a61cdb04b39d8c9412f4e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Nov 2020 20:38:47 -0500 Subject: NFS: Reduce number of RPC calls when doing uncached readdir If we're doing uncached readdir, allocate multiple pages in order to try to avoid duplicate RPC calls for the same getdents() call. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 105 +++++++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 69 insertions(+), 36 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index b6c3501e8f61..93c6f22cdc62 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -199,6 +199,23 @@ void nfs_readdir_clear_array(struct page *page) kunmap_atomic(array); } +static struct page * +nfs_readdir_page_array_alloc(u64 last_cookie, gfp_t gfp_flags) +{ + struct page *page = alloc_page(gfp_flags); + if (page) + nfs_readdir_page_init_array(page, last_cookie); + return page; +} + +static void nfs_readdir_page_array_free(struct page *page) +{ + if (page) { + nfs_readdir_clear_array(page); + put_page(page); + } +} + static void nfs_readdir_array_set_eof(struct nfs_cache_array *array) { array->page_is_eof = 1; @@ -694,12 +711,14 @@ out: static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, struct nfs_entry *entry, struct page **xdr_pages, - struct page *fillme, unsigned int buflen) + unsigned int buflen, + struct page **arrays, + size_t narrays) { struct address_space *mapping = desc->file->f_mapping; struct xdr_stream stream; struct xdr_buf buf; - struct page *scratch, *new, *page = fillme; + struct page *scratch, *new, *page = *arrays; int status; scratch = alloc_page(GFP_KERNEL); @@ -725,15 +744,25 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, if (status != -ENOSPC) continue; - if (page->mapping != mapping) - break; - new = nfs_readdir_page_get_next(mapping, page->index + 1, - entry->prev_cookie); - if (!new) - break; - if (page != fillme) - nfs_readdir_page_unlock_and_put(page); - page = new; + if (page->mapping != mapping) { + if (!--narrays) + break; + new = nfs_readdir_page_array_alloc(entry->prev_cookie, + GFP_KERNEL); + if (!new) + break; + arrays++; + *arrays = page = new; + } else { + new = nfs_readdir_page_get_next(mapping, + page->index + 1, + entry->prev_cookie); + if (!new) + break; + if (page != *arrays) + nfs_readdir_page_unlock_and_put(page); + page = new; + } status = nfs_readdir_add_to_array(entry, page); } while (!status && !entry->eof); @@ -750,7 +779,7 @@ static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc, break; } - if (page != fillme) + if (page != *arrays) nfs_readdir_page_unlock_and_put(page); put_page(scratch); @@ -790,10 +819,11 @@ out_freepages: } static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, - struct page *page, __be32 *verf_arg, - __be32 *verf_res) + __be32 *verf_arg, __be32 *verf_res, + struct page **arrays, size_t narrays) { struct page **pages; + struct page *page = *arrays; struct nfs_entry *entry; size_t array_size; struct inode *inode = file_inode(desc->file); @@ -835,7 +865,8 @@ static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc, break; } - status = nfs_readdir_page_filler(desc, entry, pages, page, pglen); + status = nfs_readdir_page_filler(desc, entry, pages, pglen, + arrays, narrays); } while (!status && nfs_readdir_page_needs_filling(page)); nfs_readdir_free_pages(pages, array_size); @@ -884,8 +915,8 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) if (!desc->page) return -ENOMEM; if (nfs_readdir_page_needs_filling(desc->page)) { - res = nfs_readdir_xdr_to_array(desc, desc->page, - nfsi->cookieverf, verf); + res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf, + &desc->page, 1); if (res < 0) { nfs_readdir_page_unlock_and_put_cached(desc); if (res == -EBADCOOKIE || res == -ENOTSYNC) { @@ -976,37 +1007,39 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc) */ static int uncached_readdir(struct nfs_readdir_descriptor *desc) { - struct page *page = NULL; + struct page **arrays; + size_t i, sz = 512; __be32 verf[NFS_DIR_VERIFIER_SIZE]; - int status; + int status = -ENOMEM; - dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n", + dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %llu\n", (unsigned long long)desc->dir_cookie); - page = alloc_page(GFP_HIGHUSER); - if (!page) { - status = -ENOMEM; + arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL); + if (!arrays) + goto out; + arrays[0] = nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL); + if (!arrays[0]) goto out; - } desc->page_index = 0; desc->last_cookie = desc->dir_cookie; - desc->page = page; desc->duped = 0; - nfs_readdir_page_init_array(page, desc->dir_cookie); - status = nfs_readdir_xdr_to_array(desc, page, desc->verf, verf); - if (status < 0) - goto out_release; + status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz); - nfs_do_filldir(desc); + for (i = 0; !desc->eof && i < sz && arrays[i]; i++) { + desc->page = arrays[i]; + nfs_do_filldir(desc); + } + desc->page = NULL; - out_release: - nfs_readdir_clear_array(desc->page); - nfs_readdir_page_put(desc); - out: - dfprintk(DIRCACHE, "NFS: %s: returns %d\n", - __func__, status); + + for (i = 0; i < sz && arrays[i]; i++) + nfs_readdir_page_array_free(arrays[i]); +out: + kfree(arrays); + dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status); return status; } -- cgit v1.2.3 From 794092c57f89c2c833da00f82f38a0afcb4033bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Nov 2020 20:47:05 -0500 Subject: NFS: Do uncached readdir when we're seeking a cookie in an empty page cache If the directory is changing, causing the page cache to get invalidated while we are listing the contents, then the NFS client is currently forced to read in the entire directory contents from scratch, because it needs to perform a linear search for the readdir cookie. While this is not an issue for small directories, it does not scale to directories with millions of entries. In order to be able to deal with large directories that are changing, add a heuristic to ensure that if the page cache is empty, and we are searching for a cookie that is not the zero cookie, we just default to performing uncached readdir. Signed-off-by: Trond Myklebust Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Tested-by: Dave Wysochanski --- fs/nfs/dir.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 93c6f22cdc62..3f70697729d8 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -937,11 +937,28 @@ static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc) return res; } +static bool nfs_readdir_dont_search_cache(struct nfs_readdir_descriptor *desc) +{ + struct address_space *mapping = desc->file->f_mapping; + struct inode *dir = file_inode(desc->file); + unsigned int dtsize = NFS_SERVER(dir)->dtsize; + loff_t size = i_size_read(dir); + + /* + * Default to uncached readdir if the page cache is empty, and + * we're looking for a non-zero cookie in a large directory. + */ + return desc->dir_cookie != 0 && mapping->nrpages == 0 && size > dtsize; +} + /* Search for desc->dir_cookie from the beginning of the page cache */ static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc) { int res; + if (nfs_readdir_dont_search_cache(desc)) + return -EBADCOOKIE; + do { if (desc->page_index == 0) { desc->current_index = 0; -- cgit v1.2.3 From 1c3695d0bb3869fedcde4538d831003519576ece Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Nov 2020 10:30:35 -0500 Subject: NFS: Switch mount code to use xprt_find_transport_ident() Switch the mount code to use xprt_find_transport_ident() and to check the results before allowing the mount to proceed. Signed-off-by: Trond Myklebust --- fs/nfs/fs_context.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index 29ec8b09a52d..06894bcdea2d 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -510,13 +510,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP; break; case Opt_tcp: - ctx->flags |= NFS_MOUNT_TCP; - ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP; - break; case Opt_rdma: ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */ - ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(param->key); + ret = xprt_find_transport_ident(param->key); + if (ret < 0) + goto out_bad_transport; + ctx->nfs_server.protocol = ret; break; case Opt_acl: if (result.negated) @@ -670,11 +669,13 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, case Opt_xprt_rdma: /* vector side protocols to TCP */ ctx->flags |= NFS_MOUNT_TCP; - ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA; - xprt_load_transport(param->string); + ret = xprt_find_transport_ident(param->string); + if (ret < 0) + goto out_bad_transport; + ctx->nfs_server.protocol = ret; break; default: - return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); + goto out_bad_transport; } ctx->protofamily = protofamily; @@ -697,7 +698,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc, break; case Opt_xprt_rdma: /* not used for side protocols */ default: - return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); + goto out_bad_transport; } ctx->mountfamily = mountfamily; break; @@ -787,6 +788,8 @@ out_invalid_address: return nfs_invalf(fc, "NFS: Bad IP address specified"); out_of_bounds: return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key); +out_bad_transport: + return nfs_invalf(fc, "NFS: Unrecognized transport protocol"); } /* -- cgit v1.2.3 From a12f996d3413ae41b6c0952013cd7a11396e14eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Nov 2020 16:58:53 -0500 Subject: NFSv4/pNFS: Use connections to a DS that are all of the same protocol family If the pNFS metadata server advertises multiple addresses for the same data server, we should try to connect to just one protocol family and transport type on the assumption that homogeneity will improve performance. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 679767ac258d..ee6d003db844 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -860,6 +860,9 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, .addrlen = da->da_addrlen, .servername = clp->cl_hostname, }; + + if (da->da_addr.ss_family != clp->cl_addr.ss_family) + continue; /* Add this address as an alias */ rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, rpc_clnt_test_and_add_xprt, NULL); @@ -913,17 +916,19 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, }; struct nfs4_add_xprt_data xprtdata = { .clp = clp, - .cred = nfs4_get_clid_cred(clp), }; struct rpc_add_xprt_test rpcdata = { .add_xprt_test = clp->cl_mvops->session_trunk, .data = &xprtdata, }; + if (da->da_addr.ss_family != clp->cl_addr.ss_family) + continue; /** * Test this address for session trunking and * add as an alias */ + xprtdata.cred = nfs4_get_clid_cred(clp), rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, rpc_clnt_setup_test_and_add_xprt, &rpcdata); -- cgit v1.2.3 From 190c75a31fe65e311f3628bd97bd58cff50d221f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Nov 2020 09:21:48 -0500 Subject: pNFS: Add helpers for allocation/free of struct nfs4_pnfs_ds_addr Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index ee6d003db844..c3c04a763985 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -661,6 +661,20 @@ _data_server_lookup_locked(const struct list_head *dsaddrs) return NULL; } +static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags) +{ + struct nfs4_pnfs_ds_addr *da = kzalloc(sizeof(*da), gfp_flags); + if (da) + INIT_LIST_HEAD(&da->da_node); + return da; +} + +static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da) +{ + kfree(da->da_remotestr); + kfree(da); +} + static void destroy_ds(struct nfs4_pnfs_ds *ds) { struct nfs4_pnfs_ds_addr *da; @@ -676,8 +690,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds) struct nfs4_pnfs_ds_addr, da_node); list_del_init(&da->da_node); - kfree(da->da_remotestr); - kfree(da); + nfs4_pnfs_ds_addr_free(da); } kfree(ds->ds_remotestr); @@ -1094,12 +1107,10 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) } *portstr = '\0'; - da = kzalloc(sizeof(*da), gfp_flags); + da = nfs4_pnfs_ds_addr_alloc(gfp_flags); if (unlikely(!da)) goto out_free_buf; - INIT_LIST_HEAD(&da->da_node); - if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr, sizeof(da->da_addr))) { dprintk("%s: error parsing address %s\n", __func__, buf); -- cgit v1.2.3 From 4be78d26810bc506769773ca2f81b4de65f43fd5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 6 Nov 2020 16:10:52 -0500 Subject: NFSv4/pNFS: Store the transport type in struct nfs4_pnfs_ds_addr We want to enable RDMA and UDP as valid transport methods if a GETDEVICEINFO call specifies it. Do so by adding a parser for the netid that translates it to an appropriate argument for the RPC transport layer. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs.h | 2 ++ fs/nfs/pnfs_nfs.c | 34 +++++++++++++++++++--------------- 2 files changed, 21 insertions(+), 15 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 2661c44c62db..f618c49697bb 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -51,6 +51,8 @@ struct nfs4_pnfs_ds_addr { size_t da_addrlen; struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ char *da_remotestr; /* human readable addr+port */ + const char *da_netid; + int da_transport; }; struct nfs4_pnfs_ds { diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index c3c04a763985..7a97643acf3f 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -672,6 +672,7 @@ static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags) static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da) { kfree(da->da_remotestr); + kfree(da->da_netid); kfree(da); } @@ -867,13 +868,15 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, if (!IS_ERR(clp)) { struct xprt_create xprt_args = { - .ident = XPRT_TRANSPORT_TCP, + .ident = da->da_transport, .net = clp->cl_net, .dstaddr = (struct sockaddr *)&da->da_addr, .addrlen = da->da_addrlen, .servername = clp->cl_hostname, }; + if (da->da_transport != clp->cl_proto) + continue; if (da->da_addr.ss_family != clp->cl_addr.ss_family) continue; /* Add this address as an alias */ @@ -883,7 +886,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, } clp = get_v3_ds_connect(mds_srv, (struct sockaddr *)&da->da_addr, - da->da_addrlen, IPPROTO_TCP, + da->da_addrlen, da->da_transport, timeo, retrans); if (IS_ERR(clp)) continue; @@ -921,7 +924,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) { struct xprt_create xprt_args = { - .ident = XPRT_TRANSPORT_TCP, + .ident = da->da_transport, .net = clp->cl_net, .dstaddr = (struct sockaddr *)&da->da_addr, .addrlen = da->da_addrlen, @@ -935,6 +938,8 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, .data = &xprtdata, }; + if (da->da_transport != clp->cl_proto) + continue; if (da->da_addr.ss_family != clp->cl_addr.ss_family) continue; /** @@ -950,8 +955,9 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, } else { clp = nfs4_set_ds_client(mds_srv, (struct sockaddr *)&da->da_addr, - da->da_addrlen, IPPROTO_TCP, - timeo, retrans, minor_version); + da->da_addrlen, + da->da_transport, timeo, + retrans, minor_version); if (IS_ERR(clp)) continue; @@ -1042,8 +1048,8 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) int nlen, rlen; int tmp[2]; __be32 *p; - char *netid, *match_netid; - size_t len, match_netid_len; + char *netid; + size_t len; char *startsep = ""; char *endsep = ""; @@ -1125,15 +1131,11 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) case AF_INET: ((struct sockaddr_in *)&da->da_addr)->sin_port = port; da->da_addrlen = sizeof(struct sockaddr_in); - match_netid = "tcp"; - match_netid_len = 3; break; case AF_INET6: ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; da->da_addrlen = sizeof(struct sockaddr_in6); - match_netid = "tcp6"; - match_netid_len = 4; startsep = "["; endsep = "]"; break; @@ -1144,12 +1146,15 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) goto out_free_da; } - if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { - dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", - __func__, netid, match_netid); + da->da_transport = xprt_find_transport_ident(netid); + if (da->da_transport < 0) { + dprintk("%s: ERROR: unknown r_netid \"%s\"\n", + __func__, netid); goto out_free_da; } + da->da_netid = netid; + /* save human readable address */ len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; da->da_remotestr = kzalloc(len, gfp_flags); @@ -1161,7 +1166,6 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); kfree(buf); - kfree(netid); return da; out_free_da: -- cgit v1.2.3 From 9a7016319e1e7c6348a960931182f5f71b95f24e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Nov 2020 15:42:03 -0500 Subject: pNFS/flexfiles: Fix up layoutstats reporting for non-TCP transports Ensure that we report the correct netid when using UDP or RDMA transports to the DSes. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 24bf5797f88a..b7c26836b2cb 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -2284,7 +2284,6 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) struct sockaddr *sap = (struct sockaddr *)&da->da_addr; char portbuf[RPCBIND_MAXUADDRPLEN]; char addrbuf[RPCBIND_MAXUADDRLEN]; - char *netid; unsigned short port; int len, netid_len; __be32 *p; @@ -2294,18 +2293,13 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0) return; port = ntohs(((struct sockaddr_in *)sap)->sin_port); - netid = "tcp"; - netid_len = 3; break; case AF_INET6: if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0) return; port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port); - netid = "tcp6"; - netid_len = 4; break; default: - /* we only support tcp and tcp6 */ WARN_ON_ONCE(1); return; } @@ -2313,8 +2307,9 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da) snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff); len = strlcat(addrbuf, portbuf, sizeof(addrbuf)); + netid_len = strlen(da->da_netid); p = xdr_reserve_space(xdr, 4 + netid_len); - xdr_encode_opaque(p, netid, netid_len); + xdr_encode_opaque(p, da->da_netid, netid_len); p = xdr_reserve_space(xdr, 4 + len); xdr_encode_opaque(p, addrbuf, len); -- cgit v1.2.3 From 988998134996a397a47cf758627def5f20dc1e88 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 9 Nov 2020 16:06:15 -0500 Subject: pNFS: Clean up open coded xdr string decoding Use the existing xdr_stream_decode_string_dup() to safely decode into kmalloced strings. Signed-off-by: Trond Myklebust --- fs/nfs/pnfs_nfs.c | 43 +++++++------------------------------------ 1 file changed, 7 insertions(+), 36 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index 7a97643acf3f..2efcfdd348a1 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -1045,9 +1045,8 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) struct nfs4_pnfs_ds_addr *da = NULL; char *buf, *portstr; __be16 port; - int nlen, rlen; + ssize_t nlen, rlen; int tmp[2]; - __be32 *p; char *netid; size_t len; char *startsep = ""; @@ -1055,45 +1054,17 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags) /* r_netid */ - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) + nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ, + gfp_flags); + if (unlikely(nlen < 0)) goto out_err; - nlen = be32_to_cpup(p++); - - p = xdr_inline_decode(xdr, nlen); - if (unlikely(!p)) - goto out_err; - - netid = kmalloc(nlen+1, gfp_flags); - if (unlikely(!netid)) - goto out_err; - - netid[nlen] = '\0'; - memcpy(netid, p, nlen); /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ - p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - goto out_free_netid; - rlen = be32_to_cpup(p); - - p = xdr_inline_decode(xdr, rlen); - if (unlikely(!p)) - goto out_free_netid; - /* port is ".ABC.DEF", 8 chars max */ - if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { - dprintk("%s: Invalid address, length %d\n", __func__, - rlen); + rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN + + IPV6_SCOPE_ID_LEN + 8, gfp_flags); + if (unlikely(rlen < 0)) goto out_free_netid; - } - buf = kmalloc(rlen + 1, gfp_flags); - if (!buf) { - dprintk("%s: Not enough memory\n", __func__); - goto out_free_netid; - } - buf[rlen] = '\0'; - memcpy(buf, p, rlen); /* replace port '.' with '-' */ portstr = strrchr(buf, '.'); -- cgit v1.2.3 From 046e5ccb4198b990190e11fb52fd9cfd264402eb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 13 Nov 2020 21:42:16 -0500 Subject: NFSv4: Fix the alignment of page data in the getdeviceinfo reply We can fit the device_addr4 opaque data padding in the pages. Fixes: cf500bac8fd4 ("SUNRPC: Introduce rpc_prepare_reply_pages()") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c6dbfcae7517..c16b93df1bc1 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3009,15 +3009,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; + uint32_t replen; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); + + replen = hdr.replen + op_decode_hdr_maxsz; + encode_getdeviceinfo(xdr, args, &hdr); - /* set up reply kvec. Subtract notification bitmap max size (2) - * so that notification bitmap is put in xdr_buf tail */ + /* set up reply kvec. device_addr4 opaque data is read into the + * pages */ rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase, - args->pdev->pglen, hdr.replen - 2); + args->pdev->pglen, replen + 2 + 1); encode_nops(&hdr); } -- cgit v1.2.3 From 9ed5af268e88f6e5b65376be98d652b37cb20d7b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 21 Nov 2020 20:46:18 -0500 Subject: SUNRPC: Clean up the handling of page padding in rpc_prepare_reply_pages() rpc_prepare_reply_pages() currently expects the 'hdrsize' argument to contain the length of the data that we expect to want placed in the head kvec plus a count of 1 word of padding that is placed after the page data. This is very confusing when trying to read the code, and sometimes leads to callers adding an arbitrary value of '1' just in order to satisfy the requirement (whether or not the page data actually needs such padding). This patch aims to clarify the code by changing the 'hdrsize' argument to remove that 1 word of padding. This means we need to subtract the padding from all the existing callers. Fixes: 02ef04e432ba ("NFS: Account for XDR pad of buf->pages") Signed-off-by: Trond Myklebust --- fs/nfs/nfs2xdr.c | 19 ++++++++++--------- fs/nfs/nfs3xdr.c | 29 ++++++++++++++++------------- fs/nfs/nfs4xdr.c | 36 +++++++++++++++++++----------------- 3 files changed, 45 insertions(+), 39 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index f6676af37d5d..7fba7711e6b3 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -34,6 +34,7 @@ * Declare the space requirements for NFS arguments and replies as * number of 32bit-words */ +#define NFS_pagepad_sz (1) /* Page padding */ #define NFS_fhandle_sz (8) #define NFS_sattr_sz (8) #define NFS_filename_sz (1+(NFS2_MAXNAMLEN>>2)) @@ -56,11 +57,11 @@ #define NFS_attrstat_sz (1+NFS_fattr_sz) #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz) -#define NFS_readlinkres_sz (2+1) -#define NFS_readres_sz (1+NFS_fattr_sz+1+1) +#define NFS_readlinkres_sz (2+NFS_pagepad_sz) +#define NFS_readres_sz (1+NFS_fattr_sz+1+NFS_pagepad_sz) #define NFS_writeres_sz (NFS_attrstat_sz) #define NFS_stat_sz (1) -#define NFS_readdirres_sz (1+1) +#define NFS_readdirres_sz (1+NFS_pagepad_sz) #define NFS_statfsres_sz (1+NFS_info_sz) static int nfs_stat_to_errno(enum nfs_stat); @@ -592,8 +593,8 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req, const struct nfs_readlinkargs *args = data; encode_fhandle(xdr, args->fh); - rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->pglen, NFS_readlinkres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->pglen, + NFS_readlinkres_sz - NFS_pagepad_sz); } /* @@ -628,8 +629,8 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, const struct nfs_pgio_args *args = data; encode_readargs(xdr, args); - rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->count, NFS_readres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->count, + NFS_readres_sz - NFS_pagepad_sz); req->rq_rcv_buf.flags |= XDRBUF_READ; } @@ -786,8 +787,8 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req, const struct nfs_readdirargs *args = data; encode_readdirargs(xdr, args); - rpc_prepare_reply_pages(req, args->pages, 0, - args->count, NFS_readdirres_sz); + rpc_prepare_reply_pages(req, args->pages, 0, args->count, + NFS_readdirres_sz - NFS_pagepad_sz); } /* diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 69971f6c840d..ca10072644ff 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -33,6 +33,7 @@ * Declare the space requirements for NFS arguments and replies as * number of 32bit-words */ +#define NFS3_pagepad_sz (1) /* Page padding */ #define NFS3_fhandle_sz (1+16) #define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */ #define NFS3_sattr_sz (15) @@ -69,13 +70,13 @@ #define NFS3_removeres_sz (NFS3_setattrres_sz) #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz)) #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1) -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1) -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1) +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+NFS3_pagepad_sz) +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+NFS3_pagepad_sz) #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4) #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz)) #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1) +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+NFS3_pagepad_sz) #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13) #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12) #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) @@ -85,7 +86,8 @@ #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \ XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)) #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \ - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1) + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+\ + NFS3_pagepad_sz) #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) static int nfs3_stat_to_errno(enum nfs_stat); @@ -909,8 +911,8 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req, const struct nfs3_readlinkargs *args = data; encode_nfs_fh3(xdr, args->fh); - rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->pglen, NFS3_readlinkres_sz); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->pglen, + NFS3_readlinkres_sz - NFS3_pagepad_sz); } /* @@ -939,7 +941,8 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, const void *data) { const struct nfs_pgio_args *args = data; - unsigned int replen = args->replen ? args->replen : NFS3_readres_sz; + unsigned int replen = args->replen ? args->replen : + NFS3_readres_sz - NFS3_pagepad_sz; encode_read3args(xdr, args); rpc_prepare_reply_pages(req, args->pages, args->pgbase, @@ -1239,8 +1242,8 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req, const struct nfs3_readdirargs *args = data; encode_readdir3args(xdr, args); - rpc_prepare_reply_pages(req, args->pages, 0, - args->count, NFS3_readdirres_sz); + rpc_prepare_reply_pages(req, args->pages, 0, args->count, + NFS3_readdirres_sz - NFS3_pagepad_sz); } /* @@ -1281,8 +1284,8 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req, const struct nfs3_readdirargs *args = data; encode_readdirplus3args(xdr, args); - rpc_prepare_reply_pages(req, args->pages, 0, - args->count, NFS3_readdirres_sz); + rpc_prepare_reply_pages(req, args->pages, 0, args->count, + NFS3_readdirres_sz - NFS3_pagepad_sz); } /* @@ -1328,7 +1331,7 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req, if (args->mask & (NFS_ACL | NFS_DFACL)) { rpc_prepare_reply_pages(req, args->pages, 0, NFSACL_MAXPAGES << PAGE_SHIFT, - ACL3_getaclres_sz); + ACL3_getaclres_sz - NFS3_pagepad_sz); req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; } } @@ -1648,7 +1651,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr, result->op_status = status; if (status != NFS3_OK) goto out_status; - result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2); + result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2); error = decode_read3resok(xdr, result); out: return error; diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c16b93df1bc1..3899ef3047f4 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -84,6 +84,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, /* lock,open owner id: * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ +#define pagepad_maxsz (1) #define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2) #define lock_owner_id_maxsz (1 + 1 + 4) #define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) @@ -215,14 +216,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, nfs4_fattr_bitmap_maxsz) #define encode_read_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) -#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1) +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + pagepad_maxsz) #define encode_readdir_maxsz (op_encode_hdr_maxsz + \ 2 + encode_verifier_maxsz + 5 + \ nfs4_label_maxsz) #define decode_readdir_maxsz (op_decode_hdr_maxsz + \ - decode_verifier_maxsz + 1) + decode_verifier_maxsz + pagepad_maxsz) #define encode_readlink_maxsz (op_encode_hdr_maxsz) -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1) +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz) #define encode_write_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 4) #define decode_write_maxsz (op_decode_hdr_maxsz + \ @@ -284,14 +285,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, #define decode_delegreturn_maxsz (op_decode_hdr_maxsz) #define encode_getacl_maxsz (encode_getattr_maxsz) #define decode_getacl_maxsz (op_decode_hdr_maxsz + \ - nfs4_fattr_bitmap_maxsz + 1 + 1) + nfs4_fattr_bitmap_maxsz + 1 + pagepad_maxsz) #define encode_setacl_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + 3) #define decode_setacl_maxsz (decode_setattr_maxsz) #define encode_fs_locations_maxsz \ (encode_getattr_maxsz) #define decode_fs_locations_maxsz \ - (1) + (pagepad_maxsz) #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz) #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4)) @@ -393,12 +394,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req, /* devaddr4 payload is read into page */ \ 1 /* notification bitmap length */ + \ 1 /* notification bitmap, word 0 */ + \ - 1 /* possible XDR padding */) + pagepad_maxsz /* possible XDR padding */) #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \ encode_stateid_maxsz) #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ decode_stateid_maxsz + \ - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1) + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + \ + pagepad_maxsz) #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \ 2 /* offset */ + \ 2 /* length */ + \ @@ -2342,7 +2344,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr, encode_layoutget(xdr, args->lg_args, &hdr); rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0, args->lg_args->layout.pglen, - hdr.replen); + hdr.replen - pagepad_maxsz); } encode_nops(&hdr); } @@ -2388,7 +2390,7 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req, encode_layoutget(xdr, args->lg_args, &hdr); rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0, args->lg_args->layout.pglen, - hdr.replen); + hdr.replen - pagepad_maxsz); } encode_nops(&hdr); } @@ -2499,7 +2501,7 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr, encode_readlink(xdr, args, req, &hdr); rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->pglen, hdr.replen); + args->pglen, hdr.replen - pagepad_maxsz); encode_nops(&hdr); } @@ -2520,7 +2522,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr, encode_readdir(xdr, args, req, &hdr); rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->count, hdr.replen); + args->count, hdr.replen - pagepad_maxsz); encode_nops(&hdr); } @@ -2541,7 +2543,7 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr, encode_read(xdr, args, &hdr); rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->count, hdr.replen); + args->count, hdr.replen - pagepad_maxsz); req->rq_rcv_buf.flags |= XDRBUF_READ; encode_nops(&hdr); } @@ -2588,7 +2590,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr, ARRAY_SIZE(nfs4_acl_bitmap), &hdr); rpc_prepare_reply_pages(req, args->acl_pages, 0, - args->acl_len, replen + 1); + args->acl_len, replen); encode_nops(&hdr); } @@ -2810,7 +2812,7 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req, } rpc_prepare_reply_pages(req, (struct page **)&args->page, 0, - PAGE_SIZE, replen + 1); + PAGE_SIZE, replen); encode_nops(&hdr); } @@ -3014,14 +3016,14 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req, encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); - replen = hdr.replen + op_decode_hdr_maxsz; + replen = hdr.replen + op_decode_hdr_maxsz + 2; encode_getdeviceinfo(xdr, args, &hdr); /* set up reply kvec. device_addr4 opaque data is read into the * pages */ rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase, - args->pdev->pglen, replen + 2 + 1); + args->pdev->pglen, replen); encode_nops(&hdr); } @@ -3043,7 +3045,7 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req, encode_layoutget(xdr, args, &hdr); rpc_prepare_reply_pages(req, args->layout.pages, 0, - args->layout.pglen, hdr.replen); + args->layout.pglen, hdr.replen - pagepad_maxsz); encode_nops(&hdr); } -- cgit v1.2.3 From 17068466ad02d3ec07ab1b8f3f97928598affc9a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 21 Nov 2020 21:41:08 -0500 Subject: NFSv4: Fix open coded xdr_stream_remaining() Signed-off-by: Trond Myklebust --- fs/nfs/nfs4xdr.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index 3899ef3047f4..de69276cfd22 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -5337,11 +5337,11 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, res->acl_len = attrlen; /* Check for receive buffer overflow */ - if (res->acl_len > (xdr->nwords << 2) || + if (res->acl_len > xdr_stream_remaining(xdr) || res->acl_len + res->acl_data_offset > xdr->buf->page_len) { res->acl_flags |= NFS4_ACL_TRUNC; - dprintk("NFS: acl reply: attrlen %u > page_len %u\n", - attrlen, xdr->nwords << 2); + dprintk("NFS: acl reply: attrlen %u > page_len %zu\n", + attrlen, xdr_stream_remaining(xdr)); } } else status = -EOPNOTSUPP; -- cgit v1.2.3 From b6d49ecd1081740b6e632366428b960461f8158b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 25 Nov 2020 12:06:14 -0500 Subject: NFSv4: Fix a pNFS layout related use-after-free race when freeing the inode When returning the layout in nfs4_evict_inode(), we need to ensure that the layout is actually done being freed before we can proceed to free the inode itself. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4super.c | 2 +- fs/nfs/pnfs.c | 33 +++++++++++++++++++++++++++++++-- fs/nfs/pnfs.h | 5 +++++ 3 files changed, 37 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c index 93f5c1678ec2..984cc42ee54d 100644 --- a/fs/nfs/nfs4super.c +++ b/fs/nfs/nfs4super.c @@ -67,7 +67,7 @@ static void nfs4_evict_inode(struct inode *inode) nfs_inode_evict_delegation(inode); /* Note that above delegreturn would trigger pnfs return-on-close */ pnfs_return_layout(inode); - pnfs_destroy_layout(NFS_I(inode)); + pnfs_destroy_layout_final(NFS_I(inode)); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); nfs4_xattr_cache_zap(inode); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0e50b9d45c32..07f59dc8cb2e 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -294,6 +294,7 @@ void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) { struct inode *inode; + unsigned long i_state; if (!lo) return; @@ -304,8 +305,12 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo) if (!list_empty(&lo->plh_segs)) WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n"); pnfs_detach_layout_hdr(lo); + i_state = inode->i_state; spin_unlock(&inode->i_lock); pnfs_free_layout_hdr(lo); + /* Notify pnfs_destroy_layout_final() that we're done */ + if (i_state & (I_FREEING | I_CLEAR)) + wake_up_var(lo); } } @@ -734,8 +739,7 @@ pnfs_free_lseg_list(struct list_head *free_me) } } -void -pnfs_destroy_layout(struct nfs_inode *nfsi) +static struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi) { struct pnfs_layout_hdr *lo; LIST_HEAD(tmp_list); @@ -753,9 +757,34 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) pnfs_put_layout_hdr(lo); } else spin_unlock(&nfsi->vfs_inode.i_lock); + return lo; +} + +void pnfs_destroy_layout(struct nfs_inode *nfsi) +{ + __pnfs_destroy_layout(nfsi); } EXPORT_SYMBOL_GPL(pnfs_destroy_layout); +static bool pnfs_layout_removed(struct nfs_inode *nfsi, + struct pnfs_layout_hdr *lo) +{ + bool ret; + + spin_lock(&nfsi->vfs_inode.i_lock); + ret = nfsi->layout != lo; + spin_unlock(&nfsi->vfs_inode.i_lock); + return ret; +} + +void pnfs_destroy_layout_final(struct nfs_inode *nfsi) +{ + struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi); + + if (lo) + wait_var_event(lo, pnfs_layout_removed(nfsi, lo)); +} + static bool pnfs_layout_add_bulk_destroy_list(struct inode *inode, struct list_head *layout_list) diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index f618c49697bb..bbd3de1025f2 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -268,6 +268,7 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_layoutget_free(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); void pnfs_destroy_layout(struct nfs_inode *); +void pnfs_destroy_layout_final(struct nfs_inode *); void pnfs_destroy_all_layouts(struct nfs_client *); int pnfs_destroy_layouts_byfsid(struct nfs_client *clp, struct nfs_fsid *fsid, @@ -712,6 +713,10 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi) { } +static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi) +{ +} + static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { -- cgit v1.2.3 From d18a9d3fa0f27a47706fb67f1ee0f4d971587c4e Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Thu, 12 Nov 2020 02:09:51 -0800 Subject: NFS: NFSv2/NFSv3: Use cred from fs_context during mount There was refactoring done to use the fs_context for mounting done in: 62a55d088cd87: NFS: Additional refactoring for fs_context conversion This made it so that the net_ns is fetched from the fs_context (the netns that fsopen is called in). This change also makes it so that the credential fetched during fsopen is used as well as the net_ns. NFS has already had a number of changes to prepare it for user namespaces: 1a58e8a0e5c1: NFS: Store the credential of the mount process in the nfs_server 264d948ce7d0: NFS: Convert NFSv3 to use the container user namespace c207db2f5da5: NFS: Convert NFSv2 to use the container user namespace Previously, different credentials could be used for creation of the fs_context versus creation of the nfs_server, as FSCONFIG_CMD_CREATE did the actual credential check, and that's where current_creds() were fetched. This meant that the user namespace which fsopen was called in could be a non-init user namespace. This still requires that the user that calls FSCONFIG_CMD_CREATE has CAP_SYS_ADMIN in the init user ns. This roughly allows a privileged user to mount on behalf of an unprivileged usernamespace, by forking off and calling fsopen in the unprivileged user namespace. It can then pass back that fsfd to the privileged process which can configure the NFS mount, and then it can call FSCONFIG_CMD_CREATE before switching back into the mount namespace of the container, and finish up the mounting process and call fsmount and move_mount. Signed-off-by: Sargun Dhillon Tested-by: Alban Crequy Fixes: 62a55d088cd8 ("NFS: Additional refactoring for fs_context conversion") Signed-off-by: Trond Myklebust --- fs/nfs/client.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/client.c b/fs/nfs/client.c index f6454ba53d05..ff5c4d0d6d13 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -571,7 +571,7 @@ static int nfs_start_lockd(struct nfs_server *server) 1 : 0, .net = clp->cl_net, .nlmclnt_ops = clp->cl_nfs_mod->rpc_ops->nlmclnt_ops, - .cred = current_cred(), + .cred = server->cred, }; if (nlm_init.nfs_version > 3) @@ -985,7 +985,7 @@ struct nfs_server *nfs_create_server(struct fs_context *fc) if (!server) return ERR_PTR(-ENOMEM); - server->cred = get_cred(current_cred()); + server->cred = get_cred(fc->cred); error = -ENOMEM; fattr = nfs_alloc_fattr(); -- cgit v1.2.3 From d3ff46fe693683cb9660e9b93e8c932cc8e0c1f8 Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Thu, 12 Nov 2020 02:09:52 -0800 Subject: NFSv4: Refactor to use user namespaces for nfs4idmap In several patches work has been done to enable NFSv4 to use user namespaces: 58002399da65: NFSv4: Convert the NFS client idmapper to use the container user namespace 3b7eb5e35d0f: NFS: When mounting, don't share filesystems between different user namespaces Unfortunately, the userspace APIs were only such that the userspace facing side of the filesystem (superblock s_user_ns) could be set to a non init user namespace. This furthers the fs_context related refactoring, and piggybacks on top of that logic, so the superblock user namespace, and the NFS user namespace are the same. Users can still use rpc.idmapd if they choose to, but there are complexities with user namespaces and request-key that have yet to be addresssed. Eventually, we will need to at least: * Come up with an upcall mechanism that can be triggered inside of the container, or safely triggered outside, with the requisite context to do the right mapping. * Handle whatever refactoring needs to be done in net/sunrpc. Signed-off-by: Sargun Dhillon Tested-by: Alban Crequy Fixes: 62a55d088cd8 ("NFS: Additional refactoring for fs_context conversion") Signed-off-by: Trond Myklebust --- fs/nfs/nfs4client.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index be7915c861ce..86acffe7335c 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1153,7 +1153,7 @@ struct nfs_server *nfs4_create_server(struct fs_context *fc) if (!server) return ERR_PTR(-ENOMEM); - server->cred = get_cred(current_cred()); + server->cred = get_cred(fc->cred); auth_probe = ctx->auth_info.flavor_len < 1; -- cgit v1.2.3 From bf701b765eaa82dd164d65edc5747ec7288bb5c3 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 27 Nov 2020 11:24:33 +1100 Subject: NFS: switch nfsiod to be an UNBOUND workqueue. nfsiod is currently a concurrency-managed workqueue (CMWQ). This means that workitems scheduled to nfsiod on a given CPU are queued behind all other work items queued on any CMWQ on the same CPU. This can introduce unexpected latency. Occaionally nfsiod can even cause excessive latency. If the work item to complete a CLOSE request calls the final iput() on an inode, the address_space of that inode will be dismantled. This takes time proportional to the number of in-memory pages, which on a large host working on large files (e.g.. 5TB), can be a large number of pages resulting in a noticable number of seconds. We can avoid these latency problems by switching nfsiod to WQ_UNBOUND. This causes each concurrent work item to gets a dedicated thread which can be scheduled to an idle CPU. There is precedent for this as several other filesystems use WQ_UNBOUND workqueue for handling various async events. Signed-off-by: NeilBrown Fixes: ada609ee2ac2 ("workqueue: use WQ_MEM_RECLAIM instead of WQ_RESCUER") Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 9b765a900b28..522aa10a1a3e 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2173,7 +2173,7 @@ static int nfsiod_start(void) { struct workqueue_struct *wq; dprintk("RPC: creating workqueue nfsiod\n"); - wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0); + wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0); if (wq == NULL) return -ENOMEM; nfsiod_workqueue = wq; -- cgit v1.2.3 From fa94a951bf3553fee66e2c879b97da97bcf00589 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Dec 2020 12:04:51 -0500 Subject: NFSv4.2: Fix up the get/listxattr calls to rpc_prepare_reply_pages() Ensure that both getxattr and listxattr page array are correctly aligned, and that getxattr correctly accounts for the page padding word. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 8432bd6b95f0..103978ff76c9 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -191,7 +191,7 @@ #define encode_getxattr_maxsz (op_encode_hdr_maxsz + 1 + \ nfs4_xattr_name_maxsz) -#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + 1) +#define decode_getxattr_maxsz (op_decode_hdr_maxsz + 1 + pagepad_maxsz) #define encode_setxattr_maxsz (op_encode_hdr_maxsz + \ 1 + nfs4_xattr_name_maxsz + 1) #define decode_setxattr_maxsz (op_decode_hdr_maxsz + decode_change_info_maxsz) @@ -1476,17 +1476,18 @@ static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; + uint32_t replen; size_t plen; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); + replen = hdr.replen + op_decode_hdr_maxsz + 1; encode_getxattr(xdr, args->xattr_name, &hdr); plen = args->xattr_len ? args->xattr_len : XATTR_SIZE_MAX; - rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen, - hdr.replen); + rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen, replen); req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; encode_nops(&hdr); @@ -1520,14 +1521,15 @@ static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req, struct compound_hdr hdr = { .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; + uint32_t replen; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); encode_putfh(xdr, args->fh, &hdr); + replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1; encode_listxattrs(xdr, args, &hdr); - rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, - hdr.replen); + rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen); encode_nops(&hdr); } -- cgit v1.2.3 From a1f26739ccdcc6967617998bd200dd907f7ff80a Mon Sep 17 00:00:00 2001 From: Frank van der Linden Date: Wed, 2 Dec 2020 00:34:11 +0000 Subject: NFSv4.2: improve page handling for GETXATTR XDRBUF_SPARSE_PAGES can cause problems for the RDMA transport, and it's easy enough to allocate enough pages for the request up front, so do that. Also, since we've allocated the pages anyway, use the full page aligned length for the receive buffer. This will allow caching of valid replies that are too large for the caller, but that still fit in the allocated pages. Signed-off-by: Frank van der Linden Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 47 ++++++++++++++++++++++++++++++++++++----------- fs/nfs/nfs42xdr.c | 13 ++++++++----- 2 files changed, 44 insertions(+), 16 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 4fc61e3d098d..b9836e2ce4a2 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1173,14 +1173,12 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name, } static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, - void *buf, size_t buflen) + void *buf, size_t buflen, struct page **pages, + size_t plen) { struct nfs_server *server = NFS_SERVER(inode); - struct page *pages[NFS4XATTR_MAXPAGES] = {}; struct nfs42_getxattrargs arg = { .fh = NFS_FH(inode), - .xattr_pages = pages, - .xattr_len = buflen, .xattr_name = name, }; struct nfs42_getxattrres res; @@ -1189,7 +1187,10 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, .rpc_argp = &arg, .rpc_resp = &res, }; - int ret, np; + ssize_t ret; + + arg.xattr_len = plen; + arg.xattr_pages = pages; ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 0); @@ -1214,10 +1215,6 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name, _copy_from_pages(buf, pages, 0, res.xattr_len); } - np = DIV_ROUND_UP(res.xattr_len, PAGE_SIZE); - while (--np >= 0) - __free_page(pages[np]); - return res.xattr_len; } @@ -1292,16 +1289,44 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, void *buf, size_t buflen) { struct nfs4_exception exception = { }; - ssize_t err; + ssize_t err, np, i; + struct page **pages; + np = nfs_page_array_len(0, buflen ?: XATTR_SIZE_MAX); + pages = kmalloc_array(np, sizeof(*pages), GFP_KERNEL); + if (!pages) + return -ENOMEM; + + for (i = 0; i < np; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) { + np = i + 1; + goto out; + } + } + + /* + * The GETXATTR op has no length field in the call, and the + * xattr data is at the end of the reply. + * + * There is no downside in using the page-aligned length. It will + * allow receiving and caching xattrs that are too large for the + * caller but still fit in the page-rounded value. + */ do { - err = _nfs42_proc_getxattr(inode, name, buf, buflen); + err = _nfs42_proc_getxattr(inode, name, buf, buflen, + pages, np * PAGE_SIZE); if (err >= 0) break; err = nfs4_handle_exception(NFS_SERVER(inode), err, &exception); } while (exception.retry); +out: + while (--np >= 0) + __free_page(pages[np]); + kfree(pages); + return err; } diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 103978ff76c9..c0b8fcd266c9 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -489,6 +489,12 @@ static int decode_getxattr(struct xdr_stream *xdr, return -EIO; len = be32_to_cpup(p); + + /* + * Only check against the page length here. The actual + * requested length may be smaller, but that is only + * checked against after possibly caching a valid reply. + */ if (len > req->rq_rcv_buf.page_len) return -ERANGE; @@ -1477,7 +1483,6 @@ static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr, .minorversion = nfs4_xdr_minorversion(&args->seq_args), }; uint32_t replen; - size_t plen; encode_compound_hdr(xdr, req, &hdr); encode_sequence(xdr, &args->seq_args, &hdr); @@ -1485,10 +1490,8 @@ static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr, replen = hdr.replen + op_decode_hdr_maxsz + 1; encode_getxattr(xdr, args->xattr_name, &hdr); - plen = args->xattr_len ? args->xattr_len : XATTR_SIZE_MAX; - - rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen, replen); - req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES; + rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len, + replen); encode_nops(&hdr); } -- cgit v1.2.3 From 1f70ea700909d77d5658c33b6bf13e9123416ff1 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 7 Dec 2020 17:03:52 +0800 Subject: NFSv4.1: use BITS_PER_LONG macro in nfs4session.h Use the existing BITS_PER_LONG macro instead of calculating the value. Signed-off-by: Geliang Tang Signed-off-by: Trond Myklebust --- fs/nfs/nfs4session.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h index b996ee23f1ba..3de425f59b3a 100644 --- a/fs/nfs/nfs4session.h +++ b/fs/nfs/nfs4session.h @@ -34,7 +34,7 @@ enum nfs4_slot_tbl_state { NFS4_SLOT_TBL_DRAINING, }; -#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long)) +#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, BITS_PER_LONG) struct nfs4_slot_table { struct nfs4_session *session; /* Parent session */ struct nfs4_slot *slots; /* seqid per slot */ -- cgit v1.2.3 From 1ee6310119a5b4460324111a8c4536054356b963 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Dec 2020 13:11:11 -0500 Subject: NFSv4.2: Ensure we always reset the result->count in decode_read_plus() Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index c0b8fcd266c9..1c21db640f4d 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1087,6 +1087,7 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) if (unlikely(!p)) return -EIO; + res->count = 0; eof = be32_to_cpup(p++); segments = be32_to_cpup(p++); if (segments == 0) -- cgit v1.2.3 From 5c4afe2ab624cb8156e987ff929e00632fb56aeb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Dec 2020 07:40:04 -0500 Subject: NFSv4.2: decode_read_plus_data() must skip padding after data segment All XDR opaque object sizes are 32-bit aligned, and a data segment is no exception. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 1c21db640f4d..4c6bce3dbaeb 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1038,7 +1038,9 @@ static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *re p = xdr_decode_hyper(p, &offset); count = be32_to_cpup(p); - recvd = xdr_align_data(xdr, res->count, count); + recvd = xdr_align_data(xdr, res->count, xdr_align_size(count)); + if (recvd > count) + recvd = count; res->count += recvd; if (count > recvd) { -- cgit v1.2.3 From 82f98c8b116bd769a47688ca5227f94826ae8a2a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Dec 2020 08:14:55 -0500 Subject: NFSv4.2: decode_read_plus_hole() needs to check the extent offset The server is allowed to return a hole extent with an offset that starts before the offset supplied in the READ_PLUS argument. Ensure that we support that case too. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 4c6bce3dbaeb..f9faa131a4f5 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1053,8 +1053,9 @@ static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *re return 0; } -static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res, - uint32_t *eof) +static int decode_read_plus_hole(struct xdr_stream *xdr, + struct nfs_pgio_args *args, + struct nfs_pgio_res *res, uint32_t *eof) { uint64_t offset, length, recvd; __be32 *p; @@ -1065,6 +1066,20 @@ static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *re p = xdr_decode_hyper(p, &offset); p = xdr_decode_hyper(p, &length); + if (offset != args->offset + res->count) { + /* Server returned an out-of-sequence extent */ + if (offset > args->offset + res->count || + offset + length < args->offset + res->count) { + dprintk("NFS: server returned out of sequence extent: " + "offset/size = %llu/%llu != expected %llu\n", + (unsigned long long)offset, + (unsigned long long)length, + (unsigned long long)(args->offset + + res->count)); + return 1; + } + length -= args->offset + res->count - offset; + } recvd = xdr_expand_hole(xdr, res->count, length); res->count += recvd; @@ -1077,6 +1092,9 @@ static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *re static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) { + struct nfs_pgio_header *hdr = + container_of(res, struct nfs_pgio_header, res); + struct nfs_pgio_args *args = &hdr->args; uint32_t eof, segments, type; int status, i; __be32 *p; @@ -1104,7 +1122,7 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) if (type == NFS4_CONTENT_DATA) status = decode_read_plus_data(xdr, res, &eof); else if (type == NFS4_CONTENT_HOLE) - status = decode_read_plus_hole(xdr, res, &eof); + status = decode_read_plus_hole(xdr, args, res, &eof); else return -EINVAL; -- cgit v1.2.3 From dac3b1059b499c570f02cd94f3172d8c8df3a9dd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Dec 2020 08:41:01 -0500 Subject: NFSv4.2: Handle hole lengths that exceed the READ_PLUS read buffer If a hole extends beyond the READ_PLUS read buffer, then we want to fill just the remaining buffer with zeros. Also ignore eof... Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index f9faa131a4f5..6ba2a28e7e03 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1080,6 +1080,12 @@ static int decode_read_plus_hole(struct xdr_stream *xdr, } length -= args->offset + res->count - offset; } + if (length + res->count > args->count) { + *eof = 0; + if (unlikely(res->count >= args->count)) + return 1; + length = args->count - res->count; + } recvd = xdr_expand_hole(xdr, res->count, length); res->count += recvd; -- cgit v1.2.3 From 503b934a752f7e789a5f33217520e0a79f3096ac Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Dec 2020 07:51:29 -0500 Subject: NFSv4.2: Don't error when exiting early on a READ_PLUS buffer overflow Expanding the READ_PLUS extents can cause the read buffer to overflow. If it does, then don't error, but just exit early. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 36 +++++++++++++++++------------------- 1 file changed, 17 insertions(+), 19 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 6ba2a28e7e03..9ef5261a1a70 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1025,16 +1025,16 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re return decode_op_hdr(xdr, OP_DEALLOCATE); } -static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res, - uint32_t *eof) +static int decode_read_plus_data(struct xdr_stream *xdr, + struct nfs_pgio_res *res) { uint32_t count, recvd; uint64_t offset; __be32 *p; p = xdr_inline_decode(xdr, 8 + 4); - if (unlikely(!p)) - return -EIO; + if (!p) + return 1; p = xdr_decode_hyper(p, &offset); count = be32_to_cpup(p); @@ -1043,13 +1043,8 @@ static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *re recvd = count; res->count += recvd; - if (count > recvd) { - dprintk("NFS: server cheating in read reply: " - "count %u > recvd %u\n", count, recvd); - *eof = 0; + if (count > recvd) return 1; - } - return 0; } @@ -1061,8 +1056,8 @@ static int decode_read_plus_hole(struct xdr_stream *xdr, __be32 *p; p = xdr_inline_decode(xdr, 8 + 8); - if (unlikely(!p)) - return -EIO; + if (!p) + return 1; p = xdr_decode_hyper(p, &offset); p = xdr_decode_hyper(p, &length); @@ -1089,10 +1084,8 @@ static int decode_read_plus_hole(struct xdr_stream *xdr, recvd = xdr_expand_hole(xdr, res->count, length); res->count += recvd; - if (recvd < length) { - *eof = 0; + if (recvd < length) return 1; - } return 0; } @@ -1121,12 +1114,12 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) for (i = 0; i < segments; i++) { p = xdr_inline_decode(xdr, 4); - if (unlikely(!p)) - return -EIO; + if (!p) + goto early_out; type = be32_to_cpup(p++); if (type == NFS4_CONTENT_DATA) - status = decode_read_plus_data(xdr, res, &eof); + status = decode_read_plus_data(xdr, res); else if (type == NFS4_CONTENT_HOLE) status = decode_read_plus_hole(xdr, args, res, &eof); else @@ -1135,12 +1128,17 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) if (status < 0) return status; if (status > 0) - break; + goto early_out; } out: res->eof = eof; return 0; +early_out: + if (unlikely(!i)) + return -EIO; + res->eof = 0; + return 0; } static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res) -- cgit v1.2.3 From 7aedc687c9f62e0d22b3231a100030e02344be1a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 8 Dec 2020 09:03:51 -0500 Subject: NFSv4.2: Deal with potential READ_PLUS data extent buffer overflow If the server returns more data than we have buffer space for, then we need to truncate and exit early. Signed-off-by: Trond Myklebust --- fs/nfs/nfs42xdr.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 9ef5261a1a70..8386ca45a43f 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1026,6 +1026,7 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re } static int decode_read_plus_data(struct xdr_stream *xdr, + struct nfs_pgio_args *args, struct nfs_pgio_res *res) { uint32_t count, recvd; @@ -1041,8 +1042,12 @@ static int decode_read_plus_data(struct xdr_stream *xdr, recvd = xdr_align_data(xdr, res->count, xdr_align_size(count)); if (recvd > count) recvd = count; + if (res->count + recvd > args->count) { + if (args->count > res->count) + res->count += args->count - res->count; + return 1; + } res->count += recvd; - if (count > recvd) return 1; return 0; @@ -1119,7 +1124,7 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) type = be32_to_cpup(p++); if (type == NFS4_CONTENT_DATA) - status = decode_read_plus_data(xdr, res); + status = decode_read_plus_data(xdr, args, res); else if (type == NFS4_CONTENT_HOLE) status = decode_read_plus_hole(xdr, args, res, &eof); else -- cgit v1.2.3 From 5c3485bb12c90945f86d6b1c901bbe76aa8b45c9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 10 Dec 2020 09:34:34 -0500 Subject: NFSv4.2/pnfs: Don't use READ_PLUS with pNFS yet We have no way of tracking server READ_PLUS support in pNFS for now, so just disable it. Reported-by: "Mkrtchyan, Tigran" Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 7ab40d0e6a74..61a07dcb963d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5320,17 +5320,17 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) } #ifdef CONFIG_NFS_V4_2 -static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg) +static void nfs42_read_plus_support(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - if (server->caps & NFS_CAP_READ_PLUS) + /* Note: We don't use READ_PLUS with pNFS yet */ + if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS]; - else - msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; } #else -static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg) +static void nfs42_read_plus_support(struct nfs_pgio_header *hdr, + struct rpc_message *msg) { - msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; } #endif /* CONFIG_NFS_V4_2 */ @@ -5340,7 +5340,8 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr, hdr->timestamp = jiffies; if (!hdr->pgio_done_cb) hdr->pgio_done_cb = nfs4_read_done_cb; - nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg); + msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; + nfs42_read_plus_support(hdr, msg); nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0); } -- cgit v1.2.3 From 7be9b38afafbfcc58ede3be66bfc4ea415b3d5f1 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Wed, 16 Dec 2020 12:25:13 +0000 Subject: NFSv4.2: fix error return on memory allocation failure Currently when an alloc_page fails the error return is not set in variable err and a garbage initialized value is returned. Fix this by setting err to -ENOMEM before taking the error return path. Addresses-Coverity: ("Uninitialized scalar variable") Fixes: a1f26739ccdc ("NFSv4.2: improve page handling for GETXATTR") Signed-off-by: Colin Ian King Signed-off-by: Trond Myklebust --- fs/nfs/nfs42proc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index b9836e2ce4a2..f3fd935620fc 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -1301,6 +1301,7 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name, pages[i] = alloc_page(GFP_KERNEL); if (!pages[i]) { np = i + 1; + err = -ENOMEM; goto out; } } -- cgit v1.2.3 From cac1d3a2b8f7f0817ac4feab76f5d3b12e4b02d7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Dec 2020 16:31:26 -0500 Subject: NFSv4/pnfs: Add tracing for the deviceid cache Add tracepoints to allow debugging of the deviceid cache. Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 2 ++ fs/nfs/nfs4trace.h | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfs/pnfs_dev.c | 23 +++++++++++------ 3 files changed, 92 insertions(+), 8 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 61a07dcb963d..8e03a647c61a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -9662,6 +9662,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, if (res.notification != args.notify_types) pdev->nocache = 1; + trace_nfs4_getdeviceinfo(server, &pdev->dev_id, status); + dprintk("<-- %s status=%d\n", __func__, status); return status; diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h index 484c1da96dea..48d761e593fb 100644 --- a/fs/nfs/nfs4trace.h +++ b/fs/nfs/nfs4trace.h @@ -2189,6 +2189,81 @@ DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done); DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist); DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist); +DECLARE_EVENT_CLASS(nfs4_deviceid_event, + TP_PROTO( + const struct nfs_client *clp, + const struct nfs4_deviceid *deviceid + ), + + TP_ARGS(clp, deviceid), + + TP_STRUCT__entry( + __string(dstaddr, clp->cl_hostname) + __array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE) + ), + + TP_fast_assign( + __assign_str(dstaddr, clp->cl_hostname); + memcpy(__entry->deviceid, deviceid->data, + NFS4_DEVICEID4_SIZE); + ), + + TP_printk( + "deviceid=%s, dstaddr=%s", + __print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE), + __get_str(dstaddr) + ) +); +#define DEFINE_PNFS_DEVICEID_EVENT(name) \ + DEFINE_EVENT(nfs4_deviceid_event, name, \ + TP_PROTO(const struct nfs_client *clp, \ + const struct nfs4_deviceid *deviceid \ + ), \ + TP_ARGS(clp, deviceid)) +DEFINE_PNFS_DEVICEID_EVENT(nfs4_deviceid_free); + +DECLARE_EVENT_CLASS(nfs4_deviceid_status, + TP_PROTO( + const struct nfs_server *server, + const struct nfs4_deviceid *deviceid, + int status + ), + + TP_ARGS(server, deviceid, status), + + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, status) + __string(dstaddr, server->nfs_client->cl_hostname) + __array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE) + ), + + TP_fast_assign( + __entry->dev = server->s_dev; + __entry->status = status; + __assign_str(dstaddr, server->nfs_client->cl_hostname); + memcpy(__entry->deviceid, deviceid->data, + NFS4_DEVICEID4_SIZE); + ), + + TP_printk( + "dev=%02x:%02x: deviceid=%s, dstaddr=%s, status=%d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE), + __get_str(dstaddr), + __entry->status + ) +); +#define DEFINE_PNFS_DEVICEID_STATUS(name) \ + DEFINE_EVENT(nfs4_deviceid_status, name, \ + TP_PROTO(const struct nfs_server *server, \ + const struct nfs4_deviceid *deviceid, \ + int status \ + ), \ + TP_ARGS(server, deviceid, status)) +DEFINE_PNFS_DEVICEID_STATUS(nfs4_getdeviceinfo); +DEFINE_PNFS_DEVICEID_STATUS(nfs4_find_deviceid); + DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event, TP_PROTO( const struct nfs_pgio_header *hdr diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index 537b80d693f1..ddbbf4fcda86 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -34,6 +34,8 @@ #include "internal.h" #include "pnfs.h" +#include "nfs4trace.h" + #define NFSDBG_FACILITY NFSDBG_PNFS /* @@ -192,24 +194,28 @@ nfs4_find_get_deviceid(struct nfs_server *server, d = __nfs4_find_get_deviceid(server, id, hash); if (d) - return d; + goto found; new = nfs4_get_device_info(server, id, cred, gfp_mask); - if (!new) + if (!new) { + trace_nfs4_find_deviceid(server, id, -ENOENT); return new; + } spin_lock(&nfs4_deviceid_lock); d = __nfs4_find_get_deviceid(server, id, hash); if (d) { spin_unlock(&nfs4_deviceid_lock); server->pnfs_curr_ld->free_deviceid_node(new); - return d; + } else { + atomic_inc(&new->ref); + hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); + spin_unlock(&nfs4_deviceid_lock); + d = new; } - hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]); - atomic_inc(&new->ref); - spin_unlock(&nfs4_deviceid_lock); - - return new; +found: + trace_nfs4_find_deviceid(server, id, 0); + return d; } EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); @@ -278,6 +284,7 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) } if (!atomic_dec_and_test(&d->ref)) return false; + trace_nfs4_deviceid_free(d->nfs_client, &d->deviceid); d->ld->free_deviceid_node(d); return true; } -- cgit v1.2.3 From 9bfffea3524b49d0268d01f8e7967f06c4d0a942 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Dec 2020 14:01:07 -0500 Subject: pNFS/flexfiles: Avoid spurious layout returns in ff_layout_choose_ds_for_read The callers of ff_layout_choose_ds_for_read() should decide whether or not they want to return the layout on error. Sometimes, we may just want to retry from the beginning. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index b7c26836b2cb..439169301d56 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -740,16 +740,12 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg, struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); struct nfs4_ff_layout_mirror *mirror; struct nfs4_pnfs_ds *ds; - bool fail_return = false; u32 idx; /* mirrors are initially sorted by efficiency */ for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) { - if (idx+1 == fls->mirror_array_cnt) - fail_return = !check_device; - mirror = FF_LAYOUT_COMP(lseg, idx); - ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return); + ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false); if (!ds) continue; -- cgit v1.2.3 From 52104f274e2d7f134d34bab11cada8913d4544e2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 16 Dec 2020 17:17:45 -0500 Subject: NFS/pNFS: Fix a typo in ff_layout_resend_pnfs_read() Don't bump the index twice. Fixes: 563c53e73b8b ("NFS: Fix flexfiles read failover") Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 439169301d56..43ed743d10ea 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1052,7 +1052,7 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr) u32 idx = hdr->pgio_mirror_idx + 1; u32 new_idx = 0; - if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx)) + if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx)) ff_layout_send_layouterror(hdr->lseg); else pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg); -- cgit v1.2.3