From 1530827b90025cdf80c9b0d07a166d045a0a7b81 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 5 Dec 2023 10:05:01 -0500
Subject: blocklayoutdriver: Fix reference leak of pnfs_device_node

The error path for blocklayout's device lookup is missing a reference drop
for the case where a lookup finds the device, but the device is marked with
NFS_DEVICEID_UNAVAILABLE.

Fixes: b3dce6a2f060 ("pnfs/blocklayout: handle transient devices")
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/blocklayout/blocklayout.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/nfs')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 943aeea1eb16..1d1d7abc3205 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -580,6 +580,8 @@ retry:
 		nfs4_delete_deviceid(node->ld, node->nfs_client, id);
 		goto retry;
 	}
+
+	nfs4_put_deviceid_node(node);
 	return ERR_PTR(-ENODEV);
 }
 
-- 
cgit v1.2.3


From d76c769c8db4c321aa4d697e810e0ed0b30bcc91 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 5 Dec 2023 10:05:02 -0500
Subject: pnfs/blocklayout: Don't add zero-length pnfs_block_dev

We noticed a SCSI device that refused to allow READ CAPACITY when the
device had a PR with exclusive access, registrants only.  The result of
this situation is that the blocklayout driver adds a pnfs_block_dev of zero
length which always fails the offset_in_map tests.  Instead of continuously
trying to do pNFS for this case, just mark the device as unavailable which
will allow the client to fallback to the MDS for the duration of
PNFS_DEVICE_RETRY_TIMEOUT.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/blocklayout/dev.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs/nfs')

diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index f318a05a80e1..c97ebc42ec0f 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -351,6 +351,9 @@ bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
 	d->map = bl_map_simple;
 	d->pr_key = v->scsi.pr_key;
 
+	if (d->len == 0)
+		return -ENODEV;
+
 	pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
 		d->bdev_handle->bdev->bd_disk->disk_name, d->pr_key);
 
-- 
cgit v1.2.3


From b4d4fd60f884e75d52bf9254bc56b3ea41ea2686 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Wed, 6 Dec 2023 08:10:22 -0500
Subject: NFSv4: Always ask for type with READDIR

Again we have claimed regressions for walking a directory tree, this time
with the "find" utility which always tries to optimize away asking for any
attributes until it has a complete list of entries.  This behavior makes
the readdir plus heuristic do the wrong thing, which causes a storm of
GETATTRs to determine each entry's type in order to continue the walk.

For v4 add the type attribute to each READDIR request to include it no
matter the heuristic.  This allows a simple `find` command to proceed
quickly through a directory tree.

Suggested-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4xdr.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index deec76cf5afe..69406e60f391 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1602,7 +1602,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args
 static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
 {
 	uint32_t attrs[3] = {
-		FATTR4_WORD0_RDATTR_ERROR,
+		FATTR4_WORD0_TYPE
+		| FATTR4_WORD0_RDATTR_ERROR,
 		FATTR4_WORD1_MOUNTED_ON_FILEID,
 	};
 	uint32_t dircount = readdir->count;
@@ -1612,12 +1613,20 @@ static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 	unsigned int i;
 
 	if (readdir->plus) {
-		attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
-			FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID;
-		attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
-			FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
-			FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
-			FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
+		attrs[0] |= FATTR4_WORD0_CHANGE
+			| FATTR4_WORD0_SIZE
+			| FATTR4_WORD0_FSID
+			| FATTR4_WORD0_FILEHANDLE
+			| FATTR4_WORD0_FILEID;
+		attrs[1] |= FATTR4_WORD1_MODE
+			| FATTR4_WORD1_NUMLINKS
+			| FATTR4_WORD1_OWNER
+			| FATTR4_WORD1_OWNER_GROUP
+			| FATTR4_WORD1_RAWDEV
+			| FATTR4_WORD1_SPACE_USED
+			| FATTR4_WORD1_TIME_ACCESS
+			| FATTR4_WORD1_TIME_METADATA
+			| FATTR4_WORD1_TIME_MODIFY;
 		attrs[2] |= FATTR4_WORD2_SECURITY_LABEL;
 	}
 	/* Use mounted_on_fileid only if the server supports it */
-- 
cgit v1.2.3


From a10a9233073d984b239e22358ba21825e27e2e88 Mon Sep 17 00:00:00 2001
From: Scott Mayhew <smayhew@redhat.com>
Date: Tue, 5 Dec 2023 09:10:54 -0500
Subject: NFS: Use parent's objective cred in nfs_access_login_time()

The subjective cred (task->cred) can potentially be overridden and
subsquently freed in non-RCU context, which could lead to a panic if we
try to use it in cred_fscmp().  Use __task_cred(), which returns the
objective cred (task->real_cred) instead.

Fixes: 0eb43812c027 ("NFS: Clear the file access cache upon login")
Fixes: 5e9a7b9c2ea1 ("NFS: Fix up a sparse warning")

Signed-off-by: Scott Mayhew <smayhew@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 13dffe4201e6..273c0b68abf4 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2963,7 +2963,7 @@ static u64 nfs_access_login_time(const struct task_struct *task,
 	rcu_read_lock();
 	for (;;) {
 		parent = rcu_dereference(task->real_parent);
-		pcred = rcu_dereference(parent->cred);
+		pcred = __task_cred(parent);
 		if (parent == task || cred_fscmp(pcred, cred) != 0)
 			break;
 		task = parent;
-- 
cgit v1.2.3


From e3fd54e7dc5a7f3fb7bb7c33bee1361ca0c36ed3 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Wed, 15 Nov 2023 13:55:27 -0500
Subject: NFSv4: Track the number of referring calls in struct cb_process_state

When the server gives us a set of referring calls, to tell us that the
NFSv4.1 callback needs to be ordered with respect to those calls, then
we may want to make that information available to the operations. In
certain cases, it may allow them to optimise their behaviour due to the
extra knowledge.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback.h      |  5 +++--
 fs/nfs/callback_proc.c | 11 ++++++++---
 2 files changed, 11 insertions(+), 5 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index ccd4f245cae2..cc1b6620a0c2 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -40,11 +40,12 @@ enum nfs4_callback_opnum {
 
 struct nfs4_slot;
 struct cb_process_state {
-	__be32			drc_status;
 	struct nfs_client	*clp;
 	struct nfs4_slot	*slot;
-	u32			minorversion;
 	struct net		*net;
+	u32			minorversion;
+	__be32			drc_status;
+	unsigned int		referring_calls;
 };
 
 struct cb_compound_hdr_arg {
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 96a4923080ae..5d7fe37d226c 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -450,6 +450,7 @@ static int referring_call_exists(struct nfs_client *clp,
 	__acquires(lock)
 {
 	int status = 0;
+	int found = 0;
 	int i, j;
 	struct nfs4_session *session;
 	struct nfs4_slot_table *tbl;
@@ -478,11 +479,12 @@ static int referring_call_exists(struct nfs_client *clp,
 			spin_lock(lock);
 			if (status)
 				goto out;
+			found++;
 		}
 	}
 
 out:
-	return status;
+	return status < 0 ? status : found;
 }
 
 __be32 nfs4_callback_sequence(void *argp, void *resp,
@@ -493,6 +495,7 @@ __be32 nfs4_callback_sequence(void *argp, void *resp,
 	struct nfs4_slot_table *tbl;
 	struct nfs4_slot *slot;
 	struct nfs_client *clp;
+	int ret;
 	int i;
 	__be32 status = htonl(NFS4ERR_BADSESSION);
 
@@ -552,11 +555,13 @@ __be32 nfs4_callback_sequence(void *argp, void *resp,
 	 * related callback was received before the response to the original
 	 * call.
 	 */
-	if (referring_call_exists(clp, args->csa_nrclists, args->csa_rclists,
-				&tbl->slot_tbl_lock) < 0) {
+	ret = referring_call_exists(clp, args->csa_nrclists, args->csa_rclists,
+				    &tbl->slot_tbl_lock);
+	if (ret < 0) {
 		status = htonl(NFS4ERR_DELAY);
 		goto out_unlock;
 	}
+	cps->referring_calls = ret;
 
 	/*
 	 * RFC5661 20.9.3
-- 
cgit v1.2.3


From dce72920c81b7e2ee6e0c9b2fde8762160b9992a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Wed, 15 Nov 2023 13:55:28 -0500
Subject: NFSv4.1: if referring calls are complete, trust the stateid argument

If the server is recalling a layout, and sends us a list of referring
calls that we can see are complete, then we should just trust that the
stateid argument is correct, even if the sequence id doesn't match the
one we hold.

Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback_proc.c | 44 ++++++++++++++++++++++++--------------------
 1 file changed, 24 insertions(+), 20 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 5d7fe37d226c..76cea34477ae 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -207,7 +207,8 @@ static struct inode *nfs_layout_find_inode(struct nfs_client *clp,
  * Enforce RFC5661 section 12.5.5.2.1. (Layout Recall and Return Sequencing)
  */
 static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo,
-					const nfs4_stateid *new)
+					const nfs4_stateid *new,
+					struct cb_process_state *cps)
 {
 	u32 oldseq, newseq;
 
@@ -221,28 +222,29 @@ static u32 pnfs_check_callback_stateid(struct pnfs_layout_hdr *lo,
 
 	newseq = be32_to_cpu(new->seqid);
 	/* Are we already in a layout recall situation? */
-	if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags) &&
-	    lo->plh_return_seq != 0) {
-		if (newseq < lo->plh_return_seq)
-			return NFS4ERR_OLD_STATEID;
-		if (newseq > lo->plh_return_seq)
-			return NFS4ERR_DELAY;
-		goto out;
-	}
+	if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags))
+		return NFS4ERR_DELAY;
 
-	/* Check that the stateid matches what we think it should be. */
+	/*
+	 * Check that the stateid matches what we think it should be.
+	 * Note that if the server sent us a list of referring calls,
+	 * and we know that those have completed, then we trust the
+	 * stateid argument is correct.
+	 */
 	oldseq = be32_to_cpu(lo->plh_stateid.seqid);
-	if (newseq > oldseq + 1)
+	if (newseq > oldseq + 1 && !cps->referring_calls)
 		return NFS4ERR_DELAY;
+
 	/* Crazy server! */
 	if (newseq <= oldseq)
 		return NFS4ERR_OLD_STATEID;
-out:
+
 	return NFS_OK;
 }
 
 static u32 initiate_file_draining(struct nfs_client *clp,
-				  struct cb_layoutrecallargs *args)
+				  struct cb_layoutrecallargs *args,
+				  struct cb_process_state *cps)
 {
 	struct inode *ino;
 	struct pnfs_layout_hdr *lo;
@@ -266,7 +268,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
 		goto out;
 	}
 	pnfs_get_layout_hdr(lo);
-	rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid);
+	rv = pnfs_check_callback_stateid(lo, &args->cbl_stateid, cps);
 	if (rv != NFS_OK)
 		goto unlock;
 
@@ -326,10 +328,11 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
 }
 
 static u32 do_callback_layoutrecall(struct nfs_client *clp,
-				    struct cb_layoutrecallargs *args)
+				    struct cb_layoutrecallargs *args,
+				    struct cb_process_state *cps)
 {
 	if (args->cbl_recall_type == RETURN_FILE)
-		return initiate_file_draining(clp, args);
+		return initiate_file_draining(clp, args, cps);
 	return initiate_bulk_draining(clp, args);
 }
 
@@ -340,11 +343,12 @@ __be32 nfs4_callback_layoutrecall(void *argp, void *resp,
 	u32 res = NFS4ERR_OP_NOT_IN_SESSION;
 
 	if (cps->clp)
-		res = do_callback_layoutrecall(cps->clp, args);
+		res = do_callback_layoutrecall(cps->clp, args, cps);
 	return cpu_to_be32(res);
 }
 
-static void pnfs_recall_all_layouts(struct nfs_client *clp)
+static void pnfs_recall_all_layouts(struct nfs_client *clp,
+				    struct cb_process_state *cps)
 {
 	struct cb_layoutrecallargs args;
 
@@ -352,7 +356,7 @@ static void pnfs_recall_all_layouts(struct nfs_client *clp)
 	memset(&args, 0, sizeof(args));
 	args.cbl_recall_type = RETURN_ALL;
 	/* FIXME we ignore errors, what should we do? */
-	do_callback_layoutrecall(clp, &args);
+	do_callback_layoutrecall(clp, &args, cps);
 }
 
 __be32 nfs4_callback_devicenotify(void *argp, void *resp,
@@ -622,7 +626,7 @@ __be32 nfs4_callback_recallany(void *argp, void *resp,
 		nfs_expire_unused_delegation_types(cps->clp, flags);
 
 	if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT))
-		pnfs_recall_all_layouts(cps->clp);
+		pnfs_recall_all_layouts(cps->clp, cps);
 
 	if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_READ)) {
 		set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &cps->clp->cl_state);
-- 
cgit v1.2.3


From 037e56a22ff37f9a9c2330b66cff55d3d1ff9b90 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Wed, 15 Nov 2023 13:55:29 -0500
Subject: NFSv4.1/pnfs: Ensure we handle the error NFS4ERR_RETURNCONFLICT

Once the client has processed the CB_LAYOUTRECALL, but has not yet
successfully returned the layout, the server is supposed to switch to
returning NFS4ERR_RETURNCONFLICT. This patch ensures that we handle
that return value correctly.

Fixes: 183d9e7b112a ("pnfs: rework LAYOUTGET retry handling")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfs4proc.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs/nfs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8a943fffaad5..23819a756508 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -170,6 +170,7 @@ static int nfs4_map_errors(int err)
 	case -NFS4ERR_RESOURCE:
 	case -NFS4ERR_LAYOUTTRYLATER:
 	case -NFS4ERR_RECALLCONFLICT:
+	case -NFS4ERR_RETURNCONFLICT:
 		return -EREMOTEIO;
 	case -NFS4ERR_WRONGSEC:
 	case -NFS4ERR_WRONG_CRED:
@@ -558,6 +559,7 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
 		case -NFS4ERR_GRACE:
 		case -NFS4ERR_LAYOUTTRYLATER:
 		case -NFS4ERR_RECALLCONFLICT:
+		case -NFS4ERR_RETURNCONFLICT:
 			exception->delay = 1;
 			return 0;
 
@@ -9691,6 +9693,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
 		status = -EBUSY;
 		break;
 	case -NFS4ERR_RECALLCONFLICT:
+	case -NFS4ERR_RETURNCONFLICT:
 		status = -ERECALLCONFLICT;
 		break;
 	case -NFS4ERR_DELEG_REVOKED:
-- 
cgit v1.2.3


From 283064fca3e8c6a2f3d1d70bb56f5c01ee01118c Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 3 Nov 2023 06:10:49 -0400
Subject: nfs: add new tracepoint at nfs4 revalidate entry point

Add a call to the v4 d_revalidate entrypoint, just like the v3 one.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/dir.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs/nfs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 273c0b68abf4..c8ecbe999059 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2194,6 +2194,8 @@ nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
 {
 	struct inode *inode;
 
+	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
+
 	if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
 		goto full_reval;
 	if (d_mountpoint(dentry))
-- 
cgit v1.2.3


From 310b1f89ea81ff753f55cba7f6ec457b6bbf6549 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 3 Nov 2023 06:10:50 -0400
Subject: nfs: rename the nfs_async_rename_done tracepoint

We do async renames in other cases besides sillyrenames now. This
tracepoint name is now misleading.

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfstrace.h | 2 +-
 fs/nfs/unlink.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 4e90ca531176..8ad1bed09295 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -893,7 +893,7 @@ DECLARE_EVENT_CLASS(nfs_rename_event_done,
 DEFINE_NFS_RENAME_EVENT(nfs_rename_enter);
 DEFINE_NFS_RENAME_EVENT_DONE(nfs_rename_exit);
 
-DEFINE_NFS_RENAME_EVENT_DONE(nfs_sillyrename_rename);
+DEFINE_NFS_RENAME_EVENT_DONE(nfs_async_rename_done);
 
 TRACE_EVENT(nfs_sillyrename_unlink,
 		TP_PROTO(
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 150a953a8be9..0110299643a2 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -267,7 +267,7 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
 	struct inode *new_dir = data->new_dir;
 	struct dentry *old_dentry = data->old_dentry;
 
-	trace_nfs_sillyrename_rename(old_dir, old_dentry,
+	trace_nfs_async_rename_done(old_dir, old_dentry,
 			new_dir, data->new_dentry, task->tk_status);
 	if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
 		rpc_restart_call_prepare(task);
-- 
cgit v1.2.3


From f6e70c59edee4203e9541e790e8c416a9250f190 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@kernel.org>
Date: Fri, 3 Nov 2023 06:10:51 -0400
Subject: nfs: print fileid in lookup tracepoints

With this we can see the dentry -> inode linkage that's being
revalidated. A fileid of 0 means "negative dentry".

Signed-off-by: Jeff Layton <jlayton@kernel.org>
Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/nfstrace.h | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 8ad1bed09295..de3adc4a2ce0 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -400,6 +400,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event,
 			__field(unsigned long, flags)
 			__field(dev_t, dev)
 			__field(u64, dir)
+			__field(u64, fileid)
 			__string(name, dentry->d_name.name)
 		),
 
@@ -407,16 +408,18 @@ DECLARE_EVENT_CLASS(nfs_lookup_event,
 			__entry->dev = dir->i_sb->s_dev;
 			__entry->dir = NFS_FILEID(dir);
 			__entry->flags = flags;
+			__entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry));
 			__assign_str(name, dentry->d_name.name);
 		),
 
 		TP_printk(
-			"flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
+			"flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu",
 			__entry->flags,
 			show_fs_lookup_flags(__entry->flags),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->dir,
-			__get_str(name)
+			__get_str(name),
+			__entry->fileid
 		)
 );
 
@@ -444,6 +447,7 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done,
 			__field(unsigned long, flags)
 			__field(dev_t, dev)
 			__field(u64, dir)
+			__field(u64, fileid)
 			__string(name, dentry->d_name.name)
 		),
 
@@ -452,17 +456,19 @@ DECLARE_EVENT_CLASS(nfs_lookup_event_done,
 			__entry->dir = NFS_FILEID(dir);
 			__entry->error = error < 0 ? -error : 0;
 			__entry->flags = flags;
+			__entry->fileid = d_is_negative(dentry) ? 0 : NFS_FILEID(d_inode(dentry));
 			__assign_str(name, dentry->d_name.name);
 		),
 
 		TP_printk(
-			"error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s",
+			"error=%ld (%s) flags=0x%lx (%s) name=%02x:%02x:%llu/%s fileid=%llu",
 			-__entry->error, show_nfs_status(__entry->error),
 			__entry->flags,
 			show_fs_lookup_flags(__entry->flags),
 			MAJOR(__entry->dev), MINOR(__entry->dev),
 			(unsigned long long)__entry->dir,
-			__get_str(name)
+			__get_str(name),
+			__entry->fileid
 		)
 );
 
-- 
cgit v1.2.3


From 8a6291bf3b0eae1bf26621e6419a91682f2d6227 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Fri, 17 Nov 2023 06:25:13 -0500
Subject: pNFS: Fix the pnfs block driver's calculation of layoutget size

Instead of relying on the value of the 'bytes_left' field, we should
calculate the layout size based on the offset of the request that is
being written out.

Reported-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Fixes: 954998b60caa ("NFS: Fix error handling for O_DIRECT write scheduling")
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Tested-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/blocklayout/blocklayout.c | 5 ++---
 fs/nfs/direct.c                  | 5 +++--
 fs/nfs/internal.h                | 2 +-
 fs/nfs/pnfs.c                    | 3 ++-
 4 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 1d1d7abc3205..6be13e0ec170 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -895,10 +895,9 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
 	}
 
 	if (pgio->pg_dreq == NULL)
-		wb_size = pnfs_num_cont_bytes(pgio->pg_inode,
-					      req->wb_index);
+		wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index);
 	else
-		wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
+		wb_size = nfs_dreq_bytes_left(pgio->pg_dreq, req_offset(req));
 
 	pnfs_generic_pg_init_write(pgio, req, wb_size);
 
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f6c74f424691..5918c67dae0d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -205,9 +205,10 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq)
 	kref_put(&dreq->kref, nfs_direct_req_free);
 }
 
-ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq)
+ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset)
 {
-	return dreq->bytes_left;
+	loff_t start = offset - dreq->io_start;
+	return dreq->max_count - start;
 }
 EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left);
 
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9c9cf764f600..b1fa81c9dff6 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -655,7 +655,7 @@ extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
 /* direct.c */
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
 			      struct nfs_direct_req *dreq);
-extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
+extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset);
 
 /* nfs4proc.c */
 extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 21a365357629..0c0fed1ecd0b 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -2733,7 +2733,8 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
 		if (pgio->pg_dreq == NULL)
 			rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
 		else
-			rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
+			rd_size = nfs_dreq_bytes_left(pgio->pg_dreq,
+						      req_offset(req));
 
 		pgio->pg_lseg =
 			pnfs_update_layout(pgio->pg_inode, nfs_req_openctx(req),
-- 
cgit v1.2.3


From 1fd5394e6ab8b11465a5d0867f188fad1835a762 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Fri, 17 Nov 2023 06:25:14 -0500
Subject: NFS: drop unused nfs_direct_req bytes_left

Now that we're calculating how large a remaining IO should be based
on the current request's offset, we no longer need to track bytes_left on
each struct nfs_direct_req.  Drop the field, and clean up the direct
request tracepoints.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/direct.c   | 6 ++----
 fs/nfs/internal.h | 1 -
 fs/nfs/nfstrace.h | 6 ++----
 3 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 5918c67dae0d..c03926a1cc73 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -369,7 +369,6 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 			bytes -= req_len;
 			requested_bytes += req_len;
 			pos += req_len;
-			dreq->bytes_left -= req_len;
 		}
 		nfs_direct_release_pages(pagevec, npages);
 		kvfree(pagevec);
@@ -441,7 +440,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
 		goto out;
 
 	dreq->inode = inode;
-	dreq->bytes_left = dreq->max_count = count;
+	dreq->max_count = count;
 	dreq->io_start = iocb->ki_pos;
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
 	l_ctx = nfs_get_lock_context(dreq->ctx);
@@ -874,7 +873,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 			bytes -= req_len;
 			requested_bytes += req_len;
 			pos += req_len;
-			dreq->bytes_left -= req_len;
 
 			if (defer) {
 				nfs_mark_request_commit(req, NULL, &cinfo, 0);
@@ -981,7 +979,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
 		goto out;
 
 	dreq->inode = inode;
-	dreq->bytes_left = dreq->max_count = count;
+	dreq->max_count = count;
 	dreq->io_start = pos;
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
 	l_ctx = nfs_get_lock_context(dreq->ctx);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b1fa81c9dff6..e3722ce6722e 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -936,7 +936,6 @@ struct nfs_direct_req {
 	loff_t			io_start;	/* Start offset for I/O */
 	ssize_t			count,		/* bytes actually processed */
 				max_count,	/* max expected count */
-				bytes_left,	/* bytes left to be sent */
 				error;		/* any reported error */
 	struct completion	completion;	/* wait for i/o completion */
 
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index de3adc4a2ce0..afedb449b54f 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -1545,7 +1545,6 @@ DECLARE_EVENT_CLASS(nfs_direct_req_class,
 			__field(u32, fhandle)
 			__field(loff_t, offset)
 			__field(ssize_t, count)
-			__field(ssize_t, bytes_left)
 			__field(ssize_t, error)
 			__field(int, flags)
 		),
@@ -1560,19 +1559,18 @@ DECLARE_EVENT_CLASS(nfs_direct_req_class,
 			__entry->fhandle = nfs_fhandle_hash(fh);
 			__entry->offset = dreq->io_start;
 			__entry->count = dreq->count;
-			__entry->bytes_left = dreq->bytes_left;
 			__entry->error = dreq->error;
 			__entry->flags = dreq->flags;
 		),
 
 		TP_printk(
 			"error=%zd fileid=%02x:%02x:%llu fhandle=0x%08x "
-			"offset=%lld count=%zd bytes_left=%zd flags=%s",
+			"offset=%lld count=%zd flags=%s",
 			__entry->error, MAJOR(__entry->dev),
 			MINOR(__entry->dev),
 			(unsigned long long)__entry->fileid,
 			__entry->fhandle, __entry->offset,
-			__entry->count, __entry->bytes_left,
+			__entry->count,
 			nfs_show_direct_req_flags(__entry->flags)
 		)
 );
-- 
cgit v1.2.3


From 12fc0a963128b54b82e98b9909f463e784b90b07 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Fri, 15 Dec 2023 20:47:07 +0000
Subject: nfs: Remove writepage

NFS already has writepages and migrate_folio, so it does not need to
implement writepage.  The writepage operation is deprecated as it leads
to worse performance under high memory pressure due to folios being
written out in LRU order rather than sequentially within a file.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/file.c  |  1 -
 fs/nfs/write.c | 11 -----------
 2 files changed, 12 deletions(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 3f9768810427..e437de2a5ad4 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -558,7 +558,6 @@ const struct address_space_operations nfs_file_aops = {
 	.read_folio = nfs_read_folio,
 	.readahead = nfs_readahead,
 	.dirty_folio = filemap_dirty_folio,
-	.writepage = nfs_writepage,
 	.writepages = nfs_writepages,
 	.write_begin = nfs_write_begin,
 	.write_end = nfs_write_end,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index b664caea8b4e..0b4fdad511d9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -680,17 +680,6 @@ static int nfs_writepage_locked(struct folio *folio,
 	return err;
 }
 
-int nfs_writepage(struct page *page, struct writeback_control *wbc)
-{
-	struct folio *folio = page_folio(page);
-	int ret;
-
-	ret = nfs_writepage_locked(folio, wbc);
-	if (ret != AOP_WRITEPAGE_ACTIVATE)
-		unlock_page(page);
-	return ret;
-}
-
 static int nfs_writepages_callback(struct folio *folio,
 				   struct writeback_control *wbc, void *data)
 {
-- 
cgit v1.2.3


From 597a421798033cd150586b5f2607389fa4052550 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Fri, 29 Dec 2023 13:18:56 +0100
Subject: rpc_pipefs: Replace one label in bl_resolve_deviceid()

The kfree() function was called in one case by
the bl_resolve_deviceid() function during error handling
even if the passed data structure member contained a null pointer.
This issue was detected by using the Coccinelle software.

Thus use an other label.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Reviewed-by: Benjamin Coddington <bcodding@redhat.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/blocklayout/rpc_pipefs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs/nfs')

diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index 6c977288cc28..d8d50a88de04 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -75,7 +75,7 @@ bl_resolve_deviceid(struct nfs_server *server, struct pnfs_block_volume *b,
 	msg->len = sizeof(*bl_msg) + b->simple.len;
 	msg->data = kzalloc(msg->len, gfp_mask);
 	if (!msg->data)
-		goto out_free_data;
+		goto out_unlock;
 
 	bl_msg = msg->data;
 	bl_msg->type = BL_DEVICE_MOUNT;
-- 
cgit v1.2.3


From 57331a59ac0d680f606403eb24edd3c35aecba31 Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Thu, 4 Jan 2024 09:58:46 -0500
Subject: NFSv4.1: Use the nfs_client's rpc timeouts for backchannel

For backchannel requests that lookup the appropriate nfs_client, use the
state-management rpc_clnt's rpc_timeout parameters for the backchannel's
response.  When the nfs_client cannot be found, fall back to using the
xprt's default timeout parameters.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Tested-by: Chuck Lever <chuck.lever@oracle.com>
Tested-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 fs/nfs/callback_xdr.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs/nfs')

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 321af81c456e..9369488f2ed4 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -967,6 +967,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
 		nops--;
 	}
 
+	if (svc_is_backchannel(rqstp) && cps.clp) {
+		rqstp->bc_to_initval = cps.clp->cl_rpcclient->cl_timeout->to_initval;
+		rqstp->bc_to_retries = cps.clp->cl_rpcclient->cl_timeout->to_retries;
+	}
+
 	*hdr_res.status = status;
 	*hdr_res.nops = htonl(nops);
 	nfs4_cb_free_slot(&cps);
-- 
cgit v1.2.3