From 1530827b90025cdf80c9b0d07a166d045a0a7b81 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 5 Dec 2023 10:05:01 -0500 Subject: blocklayoutdriver: Fix reference leak of pnfs_device_node The error path for blocklayout's device lookup is missing a reference drop for the case where a lookup finds the device, but the device is marked with NFS_DEVICEID_UNAVAILABLE. Fixes: b3dce6a2f060 ("pnfs/blocklayout: handle transient devices") Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/blocklayout.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 943aeea1eb16..1d1d7abc3205 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -580,6 +580,8 @@ retry: nfs4_delete_deviceid(node->ld, node->nfs_client, id); goto retry; } + + nfs4_put_deviceid_node(node); return ERR_PTR(-ENODEV); } -- cgit v1.2.3 From d76c769c8db4c321aa4d697e810e0ed0b30bcc91 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 5 Dec 2023 10:05:02 -0500 Subject: pnfs/blocklayout: Don't add zero-length pnfs_block_dev We noticed a SCSI device that refused to allow READ CAPACITY when the device had a PR with exclusive access, registrants only. The result of this situation is that the blocklayout driver adds a pnfs_block_dev of zero length which always fails the offset_in_map tests. Instead of continuously trying to do pNFS for this case, just mark the device as unavailable which will allow the client to fallback to the MDS for the duration of PNFS_DEVICE_RETRY_TIMEOUT. Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/dev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index f318a05a80e1..c97ebc42ec0f 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -351,6 +351,9 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, d->map = bl_map_simple; d->pr_key = v->scsi.pr_key; + if (d->len == 0) + return -ENODEV; + pr_info("pNFS: using block device %s (reservation key 0x%llx)\n", d->bdev_handle->bdev->bd_disk->disk_name, d->pr_key); -- cgit v1.2.3 From b4d4fd60f884e75d52bf9254bc56b3ea41ea2686 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 6 Dec 2023 08:10:22 -0500 Subject: NFSv4: Always ask for type with READDIR Again we have claimed regressions for walking a directory tree, this time with the "find" utility which always tries to optimize away asking for any attributes until it has a complete list of entries. This behavior makes the readdir plus heuristic do the wrong thing, which causes a storm of GETATTRs to determine each entry's type in order to continue the walk. For v4 add the type attribute to each READDIR request to include it no matter the heuristic. This allows a simple `find` command to proceed quickly through a directory tree. Suggested-by: Jeff Layton Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Reviewed-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/nfs4xdr.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index deec76cf5afe..69406e60f391 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -1602,7 +1602,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) { uint32_t attrs[3] = { - FATTR4_WORD0_RDATTR_ERROR, + FATTR4_WORD0_TYPE + | FATTR4_WORD0_RDATTR_ERROR, FATTR4_WORD1_MOUNTED_ON_FILEID, }; uint32_t dircount = readdir->count; @@ -1612,12 +1613,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg unsigned int i; if (readdir->plus) { - attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| - FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID; - attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER| - FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| - FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| - FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; + attrs[0] |= FATTR4_WORD0_CHANGE + | FATTR4_WORD0_SIZE + | FATTR4_WORD0_FSID + | FATTR4_WORD0_FILEHANDLE + | FATTR4_WORD0_FILEID; + attrs[1] |= FATTR4_WORD1_MODE + | FATTR4_WORD1_NUMLINKS + | FATTR4_WORD1_OWNER + | FATTR4_WORD1_OWNER_GROUP + | FATTR4_WORD1_RAWDEV + | FATTR4_WORD1_SPACE_USED + | FATTR4_WORD1_TIME_ACCESS + | FATTR4_WORD1_TIME_METADATA + | FATTR4_WORD1_TIME_MODIFY; attrs[2] |= FATTR4_WORD2_SECURITY_LABEL; } /* Use mounted_on_fileid only if the server supports it */ -- cgit v1.2.3 From a10a9233073d984b239e22358ba21825e27e2e88 Mon Sep 17 00:00:00 2001 From: Scott Mayhew Date: Tue, 5 Dec 2023 09:10:54 -0500 Subject: NFS: Use parent's objective cred in nfs_access_login_time() The subjective cred (task->cred) can potentially be overridden and subsquently freed in non-RCU context, which could lead to a panic if we try to use it in cred_fscmp(). Use __task_cred(), which returns the objective cred (task->real_cred) instead. Fixes: 0eb43812c027 ("NFS: Clear the file access cache upon login") Fixes: 5e9a7b9c2ea1 ("NFS: Fix up a sparse warning") Signed-off-by: Scott Mayhew Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 13dffe4201e6..273c0b68abf4 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2963,7 +2963,7 @@ static u64 nfs_access_login_time(const struct task_struct *task, rcu_read_lock(); for (;;) { parent = rcu_dereference(task->real_parent); - pcred = rcu_dereference(parent->cred); + pcred = __task_cred(parent); if (parent == task || cred_fscmp(pcred, cred) != 0) break; task = parent; -- cgit v1.2.3 From e3fd54e7dc5a7f3fb7bb7c33bee1361ca0c36ed3 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 15 Nov 2023 13:55:27 -0500 Subject: NFSv4: Track the number of referring calls in struct cb_process_state When the server gives us a set of referring calls, to tell us that the NFSv4.1 callback needs to be ordered with respect to those calls, then we may want to make that information available to the operations. In certain cases, it may allow them to optimise their behaviour due to the extra knowledge. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/callback.h | 5 +++-- fs/nfs/callback_proc.c | 11 ++++++++--- 2 files changed, 11 insertions(+), 5 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h index ccd4f245cae2..cc1b6620a0c2 100644 --- a/fs/nfs/callback.h +++ b/fs/nfs/callback.h @@ -40,11 +40,12 @@ enum nfs4_callback_opnum { struct nfs4_slot; struct cb_process_state { - __be32 drc_status; struct nfs_client *clp; struct nfs4_slot *slot; - u32 minorversion; struct net *net; + u32 minorversion; + __be32 drc_status; + unsigned int referring_calls; }; struct cb_compound_hdr_arg { diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 96a4923080ae..5d7fe37d226c 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -450,6 +450,7 @@ static int referring_call_exists(struct nfs_client *clp, __acquires(lock) { int status = 0; + int found = 0; int i, j; struct nfs4_session *session; struct nfs4_slot_table *tbl; @@ -478,11 +479,12 @@ static int referring_call_exists(struct nfs_client *clp, spin_lock(lock); if (status) goto out; + found++; } } out: - return status; + return status < 0 ? status : found; } __be32 nfs4_callback_sequence(void *argp, void *resp, @@ -493,6 +495,7 @@ __be32 nfs4_callback_sequence(void *argp, void *resp, struct nfs4_slot_table *tbl; struct nfs4_slot *slot; struct nfs_client *clp; + int ret; int i; __be32 status = htonl(NFS4ERR_BADSESSION); @@ -552,11 +555,13 @@ __be32 nfs4_callback_sequence(void *argp, void *resp, * related callback was received before the response to the original * call. */ - if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists, - &tbl->slot_tbl_lock) < 0) { + ret = referring_call_exists(clp, args->csa_nrclists, args->csa_rclists, + &tbl->slot_tbl_lock); + if (ret < 0) { status = htonl(NFS4ERR_DELAY); goto out_unlock; } + cps->referring_calls = ret; /* * RFC5661 20.9.3 -- cgit v1.2.3 From dce72920c81b7e2ee6e0c9b2fde8762160b9992a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 15 Nov 2023 13:55:28 -0500 Subject: NFSv4.1: if referring calls are complete, trust the stateid argument If the server is recalling a layout, and sends us a list of referring calls that we can see are complete, then we should just trust that the stateid argument is correct, even if the sequence id doesn't match the one we hold. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/callback_proc.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index 5d7fe37d226c..76cea34477ae 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -207,7 +207,8 @@ static struct inode *nfs_layout_find_inode(struct nfs_client *clp, * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing) */ static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo, - const nfs4_stateid *new) + const nfs4_stateid *new, + struct cb_process_state *cps) { u32 oldseq, newseq; @@ -221,28 +222,29 @@ static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo, newseq = be32_to_cpu(new->seqid); /* Are we already in a layout recall situation? */ - if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) && - lo->plh_return_seq != 0) { - if (newseq < lo->plh_return_seq) - return NFS4ERR_OLD_STATEID; - if (newseq > lo->plh_return_seq) - return NFS4ERR_DELAY; - goto out; - } + if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags)) + return NFS4ERR_DELAY; - /* Check that the stateid matches what we think it should be. */ + /* + * Check that the stateid matches what we think it should be. + * Note that if the server sent us a list of referring calls, + * and we know that those have completed, then we trust the + * stateid argument is correct. + */ oldseq = be32_to_cpu(lo->plh_stateid.seqid); - if (newseq > oldseq + 1) + if (newseq > oldseq + 1 && !cps->referring_calls) return NFS4ERR_DELAY; + /* Crazy server! */ if (newseq <= oldseq) return NFS4ERR_OLD_STATEID; -out: + return NFS_OK; } static u32 initiate_file_draining(struct nfs_client *clp, - struct cb_layoutrecallargs *args) + struct cb_layoutrecallargs *args, + struct cb_process_state *cps) { struct inode *ino; struct pnfs_layout_hdr *lo; @@ -266,7 +268,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, goto out; } pnfs_get_layout_hdr(lo); - rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid); + rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid, cps); if (rv != NFS_OK) goto unlock; @@ -326,10 +328,11 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, } static u32 do_callback_layoutrecall(struct nfs_client *clp, - struct cb_layoutrecallargs *args) + struct cb_layoutrecallargs *args, + struct cb_process_state *cps) { if (args->cbl_recall_type == RETURN_FILE) - return initiate_file_draining(clp, args); + return initiate_file_draining(clp, args, cps); return initiate_bulk_draining(clp, args); } @@ -340,11 +343,12 @@ __be32 nfs4_callback_layoutrecall(void *argp, void *resp, u32 res = NFS4ERR_OP_NOT_IN_SESSION; if (cps->clp) - res = do_callback_layoutrecall(cps->clp, args); + res = do_callback_layoutrecall(cps->clp, args, cps); return cpu_to_be32(res); } -static void pnfs_recall_all_layouts(struct nfs_client *clp) +static void pnfs_recall_all_layouts(struct nfs_client *clp, + struct cb_process_state *cps) { struct cb_layoutrecallargs args; @@ -352,7 +356,7 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp) memset(&args, 0, sizeof(args)); args.cbl_recall_type = RETURN_ALL; /* FIXME we ignore errors, what should we do? */ - do_callback_layoutrecall(clp, &args); + do_callback_layoutrecall(clp, &args, cps); } __be32 nfs4_callback_devicenotify(void *argp, void *resp, @@ -622,7 +626,7 @@ __be32 nfs4_callback_recallany(void *argp, void *resp, nfs_expire_unused_delegation_types(cps->clp, flags); if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT)) - pnfs_recall_all_layouts(cps->clp); + pnfs_recall_all_layouts(cps->clp, cps); if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_READ)) { set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &cps->clp->cl_state); -- cgit v1.2.3 From 037e56a22ff37f9a9c2330b66cff55d3d1ff9b90 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 15 Nov 2023 13:55:29 -0500 Subject: NFSv4.1/pnfs: Ensure we handle the error NFS4ERR_RETURNCONFLICT Once the client has processed the CB_LAYOUTRECALL, but has not yet successfully returned the layout, the server is supposed to switch to returning NFS4ERR_RETURNCONFLICT. This patch ensures that we handle that return value correctly. Fixes: 183d9e7b112a ("pnfs: rework LAYOUTGET retry handling") Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/nfs4proc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 8a943fffaad5..23819a756508 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -170,6 +170,7 @@ static int nfs4_map_errors(int err) case -NFS4ERR_RESOURCE: case -NFS4ERR_LAYOUTTRYLATER: case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_RETURNCONFLICT: return -EREMOTEIO; case -NFS4ERR_WRONGSEC: case -NFS4ERR_WRONG_CRED: @@ -558,6 +559,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server, case -NFS4ERR_GRACE: case -NFS4ERR_LAYOUTTRYLATER: case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_RETURNCONFLICT: exception->delay = 1; return 0; @@ -9691,6 +9693,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task, status = -EBUSY; break; case -NFS4ERR_RECALLCONFLICT: + case -NFS4ERR_RETURNCONFLICT: status = -ERECALLCONFLICT; break; case -NFS4ERR_DELEG_REVOKED: -- cgit v1.2.3 From 283064fca3e8c6a2f3d1d70bb56f5c01ee01118c Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Nov 2023 06:10:49 -0400 Subject: nfs: add new tracepoint at nfs4 revalidate entry point Add a call to the v4 d_revalidate entrypoint, just like the v3 one. Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 273c0b68abf4..c8ecbe999059 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -2194,6 +2194,8 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry, { struct inode *inode; + trace_nfs_lookup_revalidate_enter(dir, dentry, flags); + if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY)) goto full_reval; if (d_mountpoint(dentry)) -- cgit v1.2.3 From 310b1f89ea81ff753f55cba7f6ec457b6bbf6549 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Nov 2023 06:10:50 -0400 Subject: nfs: rename the nfs_async_rename_done tracepoint We do async renames in other cases besides sillyrenames now. This tracepoint name is now misleading. Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 2 +- fs/nfs/unlink.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 4e90ca531176..8ad1bed09295 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -893,7 +893,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done, DEFINE_NFS_RENAME_EVENT(nfs_rename_enter); DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit); -DEFINE_NFS_RENAME_EVENT_DONE(nfs_sillyrename_rename); +DEFINE_NFS_RENAME_EVENT_DONE(nfs_async_rename_done); TRACE_EVENT(nfs_sillyrename_unlink, TP_PROTO( diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 150a953a8be9..0110299643a2 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -267,7 +267,7 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct inode *new_dir = data->new_dir; struct dentry *old_dentry = data->old_dentry; - trace_nfs_sillyrename_rename(old_dir, old_dentry, + trace_nfs_async_rename_done(old_dir, old_dentry, new_dir, data->new_dentry, task->tk_status); if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { rpc_restart_call_prepare(task); -- cgit v1.2.3 From f6e70c59edee4203e9541e790e8c416a9250f190 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 3 Nov 2023 06:10:51 -0400 Subject: nfs: print fileid in lookup tracepoints With this we can see the dentry -> inode linkage that's being revalidated. A fileid of 0 means "negative dentry". Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- fs/nfs/nfstrace.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index 8ad1bed09295..de3adc4a2ce0 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -400,6 +400,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) + __field(u64, fileid) __string(name, dentry->d_name.name) ), @@ -407,16 +408,18 @@ DECLARE_EVENT_CLASS(nfs_lookup_event, __entry->dev = dir->i_sb->s_dev; __entry->dir = NFS_FILEID(dir); __entry->flags = flags; + __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); __assign_str(name, dentry->d_name.name); ), TP_printk( - "flags=0x%lx (%s) name=%02x:%02x:%llu/%s", + "flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu", __entry->flags, show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, - __get_str(name) + __get_str(name), + __entry->fileid ) ); @@ -444,6 +447,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, __field(unsigned long, flags) __field(dev_t, dev) __field(u64, dir) + __field(u64, fileid) __string(name, dentry->d_name.name) ), @@ -452,17 +456,19 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done, __entry->dir = NFS_FILEID(dir); __entry->error = error < 0 ? -error : 0; __entry->flags = flags; + __entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry)); __assign_str(name, dentry->d_name.name); ), TP_printk( - "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s", + "error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu", -__entry->error, show_nfs_status(__entry->error), __entry->flags, show_fs_lookup_flags(__entry->flags), MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->dir, - __get_str(name) + __get_str(name), + __entry->fileid ) ); -- cgit v1.2.3 From 8a6291bf3b0eae1bf26621e6419a91682f2d6227 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 17 Nov 2023 06:25:13 -0500 Subject: pNFS: Fix the pnfs block driver's calculation of layoutget size Instead of relying on the value of the 'bytes_left' field, we should calculate the layout size based on the offset of the request that is being written out. Reported-by: Benjamin Coddington Signed-off-by: Trond Myklebust Fixes: 954998b60caa ("NFS: Fix error handling for O_DIRECT write scheduling") Reviewed-by: Benjamin Coddington Tested-by: Benjamin Coddington Reviewed-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/blocklayout.c | 5 ++--- fs/nfs/direct.c | 5 +++-- fs/nfs/internal.h | 2 +- fs/nfs/pnfs.c | 3 ++- 4 files changed, 8 insertions(+), 7 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 1d1d7abc3205..6be13e0ec170 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -895,10 +895,9 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) } if (pgio->pg_dreq == NULL) - wb_size = pnfs_num_cont_bytes(pgio->pg_inode, - req->wb_index); + wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index); else - wb_size = nfs_dreq_bytes_left(pgio->pg_dreq); + wb_size = nfs_dreq_bytes_left(pgio->pg_dreq, req_offset(req)); pnfs_generic_pg_init_write(pgio, req, wb_size); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index f6c74f424691..5918c67dae0d 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -205,9 +205,10 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq) kref_put(&dreq->kref, nfs_direct_req_free); } -ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) +ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset) { - return dreq->bytes_left; + loff_t start = offset - dreq->io_start; + return dreq->max_count - start; } EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 9c9cf764f600..b1fa81c9dff6 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -655,7 +655,7 @@ extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); /* direct.c */ void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, struct nfs_direct_req *dreq); -extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq); +extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset); /* nfs4proc.c */ extern struct nfs_client *nfs4_init_client(struct nfs_client *clp, diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 21a365357629..0c0fed1ecd0b 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2733,7 +2733,8 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r if (pgio->pg_dreq == NULL) rd_size = i_size_read(pgio->pg_inode) - req_offset(req); else - rd_size = nfs_dreq_bytes_left(pgio->pg_dreq); + rd_size = nfs_dreq_bytes_left(pgio->pg_dreq, + req_offset(req)); pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req), -- cgit v1.2.3 From 1fd5394e6ab8b11465a5d0867f188fad1835a762 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Fri, 17 Nov 2023 06:25:14 -0500 Subject: NFS: drop unused nfs_direct_req bytes_left Now that we're calculating how large a remaining IO should be based on the current request's offset, we no longer need to track bytes_left on each struct nfs_direct_req. Drop the field, and clean up the direct request tracepoints. Signed-off-by: Benjamin Coddington Reviewed-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 6 ++---- fs/nfs/internal.h | 1 - fs/nfs/nfstrace.h | 6 ++---- 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 5918c67dae0d..c03926a1cc73 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -369,7 +369,6 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, bytes -= req_len; requested_bytes += req_len; pos += req_len; - dreq->bytes_left -= req_len; } nfs_direct_release_pages(pagevec, npages); kvfree(pagevec); @@ -441,7 +440,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, goto out; dreq->inode = inode; - dreq->bytes_left = dreq->max_count = count; + dreq->max_count = count; dreq->io_start = iocb->ki_pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); @@ -874,7 +873,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, bytes -= req_len; requested_bytes += req_len; pos += req_len; - dreq->bytes_left -= req_len; if (defer) { nfs_mark_request_commit(req, NULL, &cinfo, 0); @@ -981,7 +979,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, goto out; dreq->inode = inode; - dreq->bytes_left = dreq->max_count = count; + dreq->max_count = count; dreq->io_start = pos; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index b1fa81c9dff6..e3722ce6722e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -936,7 +936,6 @@ struct nfs_direct_req { loff_t io_start; /* Start offset for I/O */ ssize_t count, /* bytes actually processed */ max_count, /* max expected count */ - bytes_left, /* bytes left to be sent */ error; /* any reported error */ struct completion completion; /* wait for i/o completion */ diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h index de3adc4a2ce0..afedb449b54f 100644 --- a/fs/nfs/nfstrace.h +++ b/fs/nfs/nfstrace.h @@ -1545,7 +1545,6 @@ DECLARE_EVENT_CLASS(nfs_direct_req_class, __field(u32, fhandle) __field(loff_t, offset) __field(ssize_t, count) - __field(ssize_t, bytes_left) __field(ssize_t, error) __field(int, flags) ), @@ -1560,19 +1559,18 @@ DECLARE_EVENT_CLASS(nfs_direct_req_class, __entry->fhandle = nfs_fhandle_hash(fh); __entry->offset = dreq->io_start; __entry->count = dreq->count; - __entry->bytes_left = dreq->bytes_left; __entry->error = dreq->error; __entry->flags = dreq->flags; ), TP_printk( "error=%zd fileid=%02x:%02x:%llu fhandle=0x%08x " - "offset=%lld count=%zd bytes_left=%zd flags=%s", + "offset=%lld count=%zd flags=%s", __entry->error, MAJOR(__entry->dev), MINOR(__entry->dev), (unsigned long long)__entry->fileid, __entry->fhandle, __entry->offset, - __entry->count, __entry->bytes_left, + __entry->count, nfs_show_direct_req_flags(__entry->flags) ) ); -- cgit v1.2.3 From 12fc0a963128b54b82e98b9909f463e784b90b07 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 15 Dec 2023 20:47:07 +0000 Subject: nfs: Remove writepage NFS already has writepages and migrate_folio, so it does not need to implement writepage. The writepage operation is deprecated as it leads to worse performance under high memory pressure due to folios being written out in LRU order rather than sequentially within a file. Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 1 - fs/nfs/write.c | 11 ----------- 2 files changed, 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 3f9768810427..e437de2a5ad4 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -558,7 +558,6 @@ const struct address_space_operations nfs_file_aops = { .read_folio = nfs_read_folio, .readahead = nfs_readahead, .dirty_folio = filemap_dirty_folio, - .writepage = nfs_writepage, .writepages = nfs_writepages, .write_begin = nfs_write_begin, .write_end = nfs_write_end, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index b664caea8b4e..0b4fdad511d9 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -680,17 +680,6 @@ static int nfs_writepage_locked(struct folio *folio, return err; } -int nfs_writepage(struct page *page, struct writeback_control *wbc) -{ - struct folio *folio = page_folio(page); - int ret; - - ret = nfs_writepage_locked(folio, wbc); - if (ret != AOP_WRITEPAGE_ACTIVATE) - unlock_page(page); - return ret; -} - static int nfs_writepages_callback(struct folio *folio, struct writeback_control *wbc, void *data) { -- cgit v1.2.3 From 597a421798033cd150586b5f2607389fa4052550 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 29 Dec 2023 13:18:56 +0100 Subject: rpc_pipefs: Replace one label in bl_resolve_deviceid() The kfree() function was called in one case by the bl_resolve_deviceid() function during error handling even if the passed data structure member contained a null pointer. This issue was detected by using the Coccinelle software. Thus use an other label. Signed-off-by: Markus Elfring Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/rpc_pipefs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c index 6c977288cc28..d8d50a88de04 100644 --- a/fs/nfs/blocklayout/rpc_pipefs.c +++ b/fs/nfs/blocklayout/rpc_pipefs.c @@ -75,7 +75,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b, msg->len = sizeof(*bl_msg) + b->simple.len; msg->data = kzalloc(msg->len, gfp_mask); if (!msg->data) - goto out_free_data; + goto out_unlock; bl_msg = msg->data; bl_msg->type = BL_DEVICE_MOUNT; -- cgit v1.2.3 From 57331a59ac0d680f606403eb24edd3c35aecba31 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 4 Jan 2024 09:58:46 -0500 Subject: NFSv4.1: Use the nfs_client's rpc timeouts for backchannel For backchannel requests that lookup the appropriate nfs_client, use the state-management rpc_clnt's rpc_timeout parameters for the backchannel's response. When the nfs_client cannot be found, fall back to using the xprt's default timeout parameters. Signed-off-by: Benjamin Coddington Tested-by: Chuck Lever Tested-by: Jeff Layton Signed-off-by: Anna Schumaker --- fs/nfs/callback_xdr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c index 321af81c456e..9369488f2ed4 100644 --- a/fs/nfs/callback_xdr.c +++ b/fs/nfs/callback_xdr.c @@ -967,6 +967,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp) nops--; } + if (svc_is_backchannel(rqstp) && cps.clp) { + rqstp->bc_to_initval = cps.clp->cl_rpcclient->cl_timeout->to_initval; + rqstp->bc_to_retries = cps.clp->cl_rpcclient->cl_timeout->to_retries; + } + *hdr_res.status = status; *hdr_res.nops = htonl(nops); nfs4_cb_free_slot(&cps); -- cgit v1.2.3