From 2785259631697ebb0749a3782cca206e2e542939 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Mon, 4 Feb 2008 23:48:37 -0800
Subject: nfs: use GFP_NOFS preloads for radix-tree insertion

NFS should use GFP_NOFS mode radix tree preloads rather than GFP_ATOMIC
allocations at radix-tree insertion-time.  This is important to reduce the
atomic memory requirement.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: "J. Bruce Fields" <bfields@fieldses.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f55c437124a2..7be42e6eb63a 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -360,15 +360,13 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 /*
  * Insert a write request into an inode
  */
-static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
+static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 {
 	struct nfs_inode *nfsi = NFS_I(inode);
 	int error;
 
 	error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
-	BUG_ON(error == -EEXIST);
-	if (error)
-		return error;
+	BUG_ON(error);
 	if (!nfsi->npages) {
 		igrab(inode);
 		if (nfs_have_delegation(inode, FMODE_WRITE))
@@ -378,8 +376,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 	set_page_private(req->wb_page, (unsigned long)req);
 	nfsi->npages++;
 	kref_get(&req->wb_kref);
-	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
-	return 0;
+	radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index,
+				NFS_PAGE_TAG_LOCKED);
 }
 
 /*
@@ -591,6 +589,13 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 		/* Loop over all inode entries and see if we find
 		 * A request for the page we wish to update
 		 */
+		if (new) {
+			if (radix_tree_preload(GFP_NOFS)) {
+				nfs_release_request(new);
+				return ERR_PTR(-ENOMEM);
+			}
+		}
+
 		spin_lock(&inode->i_lock);
 		req = nfs_page_find_request_locked(page);
 		if (req) {
@@ -601,28 +606,27 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 				error = nfs_wait_on_request(req);
 				nfs_release_request(req);
 				if (error < 0) {
-					if (new)
+					if (new) {
+						radix_tree_preload_end();
 						nfs_release_request(new);
+					}
 					return ERR_PTR(error);
 				}
 				continue;
 			}
 			spin_unlock(&inode->i_lock);
-			if (new)
+			if (new) {
+				radix_tree_preload_end();
 				nfs_release_request(new);
+			}
 			break;
 		}
 
 		if (new) {
-			int error;
 			nfs_lock_request_dontget(new);
-			error = nfs_inode_add_request(inode, new);
-			if (error) {
-				spin_unlock(&inode->i_lock);
-				nfs_unlock_request(new);
-				return ERR_PTR(error);
-			}
+			nfs_inode_add_request(inode, new);
 			spin_unlock(&inode->i_lock);
+			radix_tree_preload_end();
 			req = new;
 			goto zero_page;
 		}
-- 
cgit v1.2.3


From 4b5621f6b127bce9218998c187bd25bf7f9fc371 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 25 Feb 2008 15:56:29 -0800
Subject: NFS: Fix an f_mode/f_flags confusion in fs/nfs/write.c

O_SYNC is stored in filp->f_flags.
Thanks to Al Viro for pointing out the bug.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f55c437124a2..80c61fdb2720 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -734,7 +734,7 @@ int nfs_updatepage(struct file *file, struct page *page,
 	 */
 	if (nfs_write_pageuptodate(page, inode) &&
 			inode->i_flock == NULL &&
-			!(file->f_mode & O_SYNC)) {
+			!(file->f_flags & O_SYNC)) {
 		count = max(count + offset, nfs_page_length(page));
 		offset = 0;
 	}
-- 
cgit v1.2.3


From 383ba71938519959be8e0b598ec658f0c211ff45 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 19 Feb 2008 20:04:20 -0500
Subject: NFS: Fix a deadlock with lazy umount

We can't allow rpc callback functions like task->tk_ops->rpc_call_prepare()
and task->tk_ops->rpc_call_done() to call mntput() in any way, since
that will cause a deadlock when the call to rpc_shutdown_client() attempts
to wait on 'task' to complete.

We can avoid the above deadlock by moving calls to mntput to
task->tk_ops->rpc_release() callback, since at that time the task will be
marked as completed, and so rpc_shutdown_client won't attempt to wait on
it.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c |  5 +++--
 fs/nfs/inode.c  |  6 +++++-
 fs/nfs/read.c   |  7 +++++--
 fs/nfs/write.c  | 13 ++++++++++---
 4 files changed, 23 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 16844f98f50e..e0170407a885 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -323,7 +323,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		data->inode = inode;
 		data->cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
-		data->args.context = ctx;
+		data->args.context = get_nfs_open_context(ctx);
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = data->pagevec;
@@ -546,6 +546,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 	data->args.fh = NFS_FH(data->inode);
 	data->args.offset = 0;
 	data->args.count = 0;
+	data->args.context = get_nfs_open_context(dreq->ctx);
 	data->res.count = 0;
 	data->res.fattr = &data->fattr;
 	data->res.verf = &data->verf;
@@ -728,7 +729,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		data->inode = inode;
 		data->cred = msg.rpc_cred;
 		data->args.fh = NFS_FH(inode);
-		data->args.context = ctx;
+		data->args.context = get_nfs_open_context(ctx);
 		data->args.offset = pos;
 		data->args.pgbase = pgbase;
 		data->args.pages = data->pagevec;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 966a8850aa30..a499fb58d858 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -521,8 +521,12 @@ struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
 
 static void __put_nfs_open_context(struct nfs_open_context *ctx, int wait)
 {
-	struct inode *inode = ctx->path.dentry->d_inode;
+	struct inode *inode;
 
+	if (ctx == NULL)
+		return;
+
+	inode = ctx->path.dentry->d_inode;
 	if (!atomic_dec_and_lock(&ctx->count, &inode->i_lock))
 		return;
 	list_del(&ctx->list);
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 3d7d9631e125..fab0d3720a03 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -73,7 +73,10 @@ static void nfs_readdata_free(struct nfs_read_data *rdata)
 
 void nfs_readdata_release(void *data)
 {
-        nfs_readdata_free(data);
+	struct nfs_read_data *rdata = data;
+
+	put_nfs_open_context(rdata->args.context);
+	nfs_readdata_free(rdata);
 }
 
 static
@@ -186,7 +189,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	data->args.pgbase = req->wb_pgbase + offset;
 	data->args.pages  = data->pagevec;
 	data->args.count  = count;
-	data->args.context = req->wb_context;
+	data->args.context = get_nfs_open_context(req->wb_context);
 
 	data->res.fattr   = &data->fattr;
 	data->res.count   = count;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 80c61fdb2720..69b4158d9a10 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -105,8 +105,11 @@ static void nfs_writedata_free(struct nfs_write_data *wdata)
 	call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free);
 }
 
-void nfs_writedata_release(void *wdata)
+void nfs_writedata_release(void *data)
 {
+	struct nfs_write_data *wdata = data;
+
+	put_nfs_open_context(wdata->args.context);
 	nfs_writedata_free(wdata);
 }
 
@@ -816,7 +819,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 	data->args.pgbase = req->wb_pgbase + offset;
 	data->args.pages  = data->pagevec;
 	data->args.count  = count;
-	data->args.context = req->wb_context;
+	data->args.context = get_nfs_open_context(req->wb_context);
 	data->args.stable  = NFS_UNSTABLE;
 	if (how & FLUSH_STABLE) {
 		data->args.stable = NFS_DATA_SYNC;
@@ -1153,8 +1156,11 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-void nfs_commit_release(void *wdata)
+void nfs_commit_release(void *data)
 {
+	struct nfs_write_data *wdata = data;
+
+	put_nfs_open_context(wdata->args.context);
 	nfs_commit_free(wdata);
 }
 
@@ -1197,6 +1203,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	/* Note: we always request a commit of the entire inode */
 	data->args.offset = 0;
 	data->args.count  = 0;
+	data->args.context = get_nfs_open_context(first->wb_context);
 	data->res.count   = 0;
 	data->res.fattr   = &data->fattr;
 	data->res.verf    = &data->verf;
-- 
cgit v1.2.3


From 5746006f1d17d9d5a3015051ea54de4341cb31f9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 19 Feb 2008 20:04:22 -0500
Subject: NFS: Add an nfsiod workqueue

NFS post-rpciod cleanups often involve tasks that cannot be safely
performed within the rpciod context (due to deadlock concerns). We
therefore add a dedicated NFS workqueue that can perform tasks like
cleaning up state after an interrupted NFSv4 open() call, or calling
put_nfs_open_context() after an asynchronous read or write call.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c    | 37 +++++++++++++++++++++++++++++++++++++
 fs/nfs/internal.h |  1 +
 2 files changed, 38 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index a499fb58d858..36cb99985d22 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1219,6 +1219,36 @@ static void nfs_destroy_inodecache(void)
 	kmem_cache_destroy(nfs_inode_cachep);
 }
 
+struct workqueue_struct *nfsiod_workqueue;
+
+/*
+ * start up the nfsiod workqueue
+ */
+static int nfsiod_start(void)
+{
+	struct workqueue_struct *wq;
+	dprintk("RPC:       creating workqueue nfsiod\n");
+	wq = create_singlethread_workqueue("nfsiod");
+	if (wq == NULL)
+		return -ENOMEM;
+	nfsiod_workqueue = wq;
+	return 0;
+}
+
+/*
+ * Destroy the nfsiod workqueue
+ */
+static void nfsiod_stop(void)
+{
+	struct workqueue_struct *wq;
+
+	wq = nfsiod_workqueue;
+	if (wq == NULL)
+		return;
+	nfsiod_workqueue = NULL;
+	destroy_workqueue(wq);
+}
+
 /*
  * Initialize NFS
  */
@@ -1226,6 +1256,10 @@ static int __init init_nfs_fs(void)
 {
 	int err;
 
+	err = nfsiod_start();
+	if (err)
+		goto out6;
+
 	err = nfs_fs_proc_init();
 	if (err)
 		goto out5;
@@ -1272,6 +1306,8 @@ out3:
 out4:
 	nfs_fs_proc_exit();
 out5:
+	nfsiod_stop();
+out6:
 	return err;
 }
 
@@ -1287,6 +1323,7 @@ static void __exit exit_nfs_fs(void)
 #endif
 	unregister_nfs_fs();
 	nfs_fs_proc_exit();
+	nfsiod_stop();
 }
 
 /* Not quite true; I just maintain it */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 0f5619611b8d..985dc2931031 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -143,6 +143,7 @@ extern struct rpc_procinfo nfs4_procedures[];
 extern int nfs_access_cache_shrinker(int nr_to_scan, gfp_t gfp_mask);
 
 /* inode.c */
+extern struct workqueue_struct *nfsiod_workqueue;
 extern struct inode *nfs_alloc_inode(struct super_block *sb);
 extern void nfs_destroy_inode(struct inode *);
 extern int nfs_write_inode(struct inode *,int);
-- 
cgit v1.2.3


From 101070ca2fe67186f5f5517b66cb4757b17f4e29 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 19 Feb 2008 20:04:23 -0500
Subject: NFS: Ensure that the asynchronous RPC calls complete on nfsiod.

We want to ensure that rpc_call_ops that involve mntput() are run on nfsiod
rather than on rpciod, so that they don't deadlock when the resulting
umount calls rpc_shutdown_client(). Hence we specify that read, write and
commit calls must complete on nfsiod.
Ditto for NFSv4 open, lock, locku and close asynchronous calls.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c   | 4 ++++
 fs/nfs/nfs4proc.c | 6 ++++++
 fs/nfs/read.c     | 1 +
 fs/nfs/write.c    | 2 ++
 4 files changed, 13 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e0170407a885..e44200579c8d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -280,6 +280,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		.rpc_client = NFS_CLIENT(inode),
 		.rpc_message = &msg,
 		.callback_ops = &nfs_read_direct_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	unsigned int pgbase;
@@ -446,6 +447,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	struct rpc_task_setup task_setup_data = {
 		.rpc_client = NFS_CLIENT(inode),
 		.callback_ops = &nfs_write_direct_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 
@@ -537,6 +539,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 		.rpc_message = &msg,
 		.callback_ops = &nfs_commit_direct_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 
@@ -683,6 +686,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		.rpc_client = NFS_CLIENT(inode),
 		.rpc_message = &msg,
 		.callback_ops = &nfs_write_direct_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	size_t wsize = NFS_SERVER(inode)->wsize;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7ce07862c2fb..b6db833f33e6 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,6 +51,7 @@
 
 #include "nfs4_fs.h"
 #include "delegation.h"
+#include "internal.h"
 #include "iostat.h"
 
 #define NFSDBG_FACILITY		NFSDBG_PROC
@@ -773,6 +774,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_open_confirm_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	int status;
@@ -910,6 +912,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_open_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	int status;
@@ -1315,6 +1318,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 		.rpc_client = server->client,
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_close_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	int status = -ENOMEM;
@@ -3235,6 +3239,7 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
 		.rpc_client = NFS_CLIENT(lsp->ls_state->inode),
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_locku_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 
@@ -3419,6 +3424,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
 		.rpc_client = NFS_CLIENT(state->inode),
 		.rpc_message = &msg,
 		.callback_ops = &nfs4_lock_ops,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC,
 	};
 	int ret;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index fab0d3720a03..87546cd277d5 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -177,6 +177,7 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 		.rpc_message = &msg,
 		.callback_ops = call_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = RPC_TASK_ASYNC | swap_flags,
 	};
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 69b4158d9a10..84495ef90728 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -803,6 +803,7 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 		.rpc_message = &msg,
 		.callback_ops = call_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = flags,
 		.priority = priority,
 	};
@@ -1187,6 +1188,7 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 		.rpc_message = &msg,
 		.callback_ops = &nfs_commit_ops,
 		.callback_data = data,
+		.workqueue = nfsiod_workqueue,
 		.flags = flags,
 		.priority = priority,
 	};
-- 
cgit v1.2.3


From fda1393938035559b417dd5b26b9cc293a7aee00 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 22 Feb 2008 16:34:12 -0500
Subject: SUNRPC: Convert users of rpc_wake_up_task to use
 rpc_wake_up_queued_task

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b6db833f33e6..54743396b666 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2768,7 +2768,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 			rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL);
 			nfs4_schedule_state_recovery(clp);
 			if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0)
-				rpc_wake_up_task(task);
+				rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
 			task->tk_status = 0;
 			return -EAGAIN;
 		case -NFS4ERR_DELAY:
-- 
cgit v1.2.3


From 5d00837b90340af9106dcd93af75fd664c8eb87f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 22 Feb 2008 16:34:17 -0500
Subject: SUNRPC: Run rpc timeout functions as callbacks instead of in softirqs

An audit of the current RPC timeout functions shows that they don't really
ever need to run in the softirq context. As long as the softirq is
able to signal that the wakeup is due to a timeout (which it can do by
setting task->tk_status to -ETIMEDOUT) then the callback functions can just
run as standard task->tk_callback functions (in the rpciod/process
context).

The only possible border-line case would be xprt_timer() for the case of
UDP, when the callback is used to reduce the size of the transport
congestion window. In testing, however, the effect of moving that update
to a callback would appear to be minor.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c  | 2 +-
 fs/nfs/nfs4state.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 54743396b666..bbb0d58ee6ac 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2765,7 +2765,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server)
 		case -NFS4ERR_STALE_CLIENTID:
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL);
+			rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
 			nfs4_schedule_state_recovery(clp);
 			if (test_bit(NFS4CLNT_STATE_RECOVER, &clp->cl_state) == 0)
 				rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index b962397004c1..a2ef02824aa8 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -731,7 +731,7 @@ int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task)
 		list_add_tail(&seqid->list, &sequence->list);
 	if (list_first_entry(&sequence->list, struct nfs_seqid, list) == seqid)
 		goto unlock;
-	rpc_sleep_on(&sequence->wait, task, NULL, NULL);
+	rpc_sleep_on(&sequence->wait, task, NULL);
 	status = -EAGAIN;
 unlock:
 	spin_unlock(&sequence->lock);
-- 
cgit v1.2.3


From f6a1cc89309f0ae847a9b6fe418d1c4215e5bc55 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 22 Feb 2008 17:06:55 -0500
Subject: SUNRPC: Add a (empty for the moment) destructor for rpc_wait_queues

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c    | 2 ++
 fs/nfs/nfs4state.c | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index c5c0175898f6..06f064d8fbbe 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -170,6 +170,8 @@ static void nfs4_shutdown_client(struct nfs_client *clp)
 	BUG_ON(!RB_EMPTY_ROOT(&clp->cl_state_owners));
 	if (__test_and_clear_bit(NFS_CS_IDMAP, &clp->cl_res_state))
 		nfs_idmap_delete(clp);
+
+	rpc_destroy_wait_queue(&clp->cl_rpcwaitq);
 #endif
 }
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index a2ef02824aa8..7775435ea7a5 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -292,8 +292,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server, struct
 	spin_unlock(&clp->cl_lock);
 	if (sp == new)
 		get_rpccred(cred);
-	else
+	else {
+		rpc_destroy_wait_queue(&new->so_sequence.wait);
 		kfree(new);
+	}
 	return sp;
 }
 
@@ -310,6 +312,7 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 		return;
 	nfs4_remove_state_owner(clp, sp);
 	spin_unlock(&clp->cl_lock);
+	rpc_destroy_wait_queue(&sp->so_sequence.wait);
 	put_rpccred(cred);
 	kfree(sp);
 }
@@ -529,6 +532,7 @@ static void nfs4_free_lock_state(struct nfs4_lock_state *lsp)
 	spin_lock(&clp->cl_lock);
 	nfs_free_unique_id(&clp->cl_lockowner_id, &lsp->ls_id);
 	spin_unlock(&clp->cl_lock);
+	rpc_destroy_wait_queue(&lsp->ls_sequence.wait);
 	kfree(lsp);
 }
 
-- 
cgit v1.2.3


From 5e4424af9a1f062c6451681dff24a26e27741cc6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 25 Feb 2008 21:53:49 -0800
Subject: SUNRPC: Remove now-redundant RCU-safe rpc_task free path

Now that we've tightened up the locking rules for RPC queue wakeups, we can
remove the RCU-safe kfree calls...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/read.c  |  8 +-------
 fs/nfs/write.c | 16 ++--------------
 2 files changed, 3 insertions(+), 21 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 87546cd277d5..be9e8270f4d7 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -58,19 +58,13 @@ struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 	return p;
 }
 
-static void nfs_readdata_rcu_free(struct rcu_head *head)
+static void nfs_readdata_free(struct nfs_read_data *p)
 {
-	struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
 	mempool_free(p, nfs_rdata_mempool);
 }
 
-static void nfs_readdata_free(struct nfs_read_data *rdata)
-{
-	call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
-}
-
 void nfs_readdata_release(void *data)
 {
 	struct nfs_read_data *rdata = data;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 84495ef90728..30513029c00c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -58,19 +58,13 @@ struct nfs_write_data *nfs_commit_alloc(void)
 	return p;
 }
 
-static void nfs_commit_rcu_free(struct rcu_head *head)
+void nfs_commit_free(struct nfs_write_data *p)
 {
-	struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
 	mempool_free(p, nfs_commit_mempool);
 }
 
-void nfs_commit_free(struct nfs_write_data *wdata)
-{
-	call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
-}
-
 struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS);
@@ -92,19 +86,13 @@ struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 	return p;
 }
 
-static void nfs_writedata_rcu_free(struct rcu_head *head)
+static void nfs_writedata_free(struct nfs_write_data *p)
 {
-	struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
 	if (p && (p->pagevec != &p->page_array[0]))
 		kfree(p->pagevec);
 	mempool_free(p, nfs_wdata_mempool);
 }
 
-static void nfs_writedata_free(struct nfs_write_data *wdata)
-{
-	call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free);
-}
-
 void nfs_writedata_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
-- 
cgit v1.2.3


From 98a8e3239427051f5d44f2025b398bdcc3918f37 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 12 Mar 2008 12:25:28 -0400
Subject: SUNRPC: Add a helper rpcauth_lookup_generic_cred()

The NFSv4 protocol allows clients to negotiate security protocols on the
fly in the case where an administrator on the server changes the export
settings and/or in the case where we may have a filesystem migration event.

Instead of having the NFS client code cache credentials that are tied to a
particular AUTH method it is therefore preferable to have a generic credential
that can be converted into whatever AUTH is in use by the RPC client when
the read/write/sillyrename/... is put on the wire.

We do this by means of the new "generic" credential, which basically just
caches the minimal information that is needed to look up an RPCSEC_GSS,
AUTH_SYS, or AUTH_NULL credential.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/dir.c      | 2 +-
 fs/nfs/inode.c    | 2 +-
 fs/nfs/nfs4proc.c | 8 ++++----
 fs/nfs/unlink.c   | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 6cea7479c5b4..d583654a0b39 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1966,7 +1966,7 @@ force_lookup:
 	if (!NFS_PROTO(inode)->access)
 		goto out_notsup;
 
-	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+	cred = rpc_lookup_cred();
 	if (!IS_ERR(cred)) {
 		res = nfs_do_access(inode, cred, mask);
 		put_rpccred(cred);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index c49f6d8b42d2..15f787355d27 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -613,7 +613,7 @@ int nfs_open(struct inode *inode, struct file *filp)
 	struct nfs_open_context *ctx;
 	struct rpc_cred *cred;
 
-	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+	cred = rpc_lookup_cred();
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 	ctx = alloc_nfs_open_context(filp->f_path.mnt, filp->f_path.dentry, cred);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index bbb0d58ee6ac..f38d0573be18 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1408,7 +1408,7 @@ nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 		BUG_ON(nd->intent.open.flags & O_CREAT);
 	}
 
-	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+	cred = rpc_lookup_cred();
 	if (IS_ERR(cred))
 		return (struct dentry *)cred;
 	parent = dentry->d_parent;
@@ -1443,7 +1443,7 @@ nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags, st
 	struct rpc_cred *cred;
 	struct nfs4_state *state;
 
-	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+	cred = rpc_lookup_cred();
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 	state = nfs4_do_open(dir, &path, openflags, NULL, cred);
@@ -1660,7 +1660,7 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 
 	nfs_fattr_init(fattr);
 	
-	cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+	cred = rpc_lookup_cred();
 	if (IS_ERR(cred))
 		return PTR_ERR(cred);
 
@@ -1896,7 +1896,7 @@ nfs4_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
 	struct rpc_cred *cred;
 	int status = 0;
 
-	cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+	cred = rpc_lookup_cred();
 	if (IS_ERR(cred)) {
 		status = PTR_ERR(cred);
 		goto out;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 757415363422..3adf8b266461 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -234,7 +234,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
 	if (data == NULL)
 		goto out;
 
-	data->cred = rpcauth_lookupcred(NFS_CLIENT(dir)->cl_auth, 0);
+	data->cred = rpc_lookup_cred();
 	if (IS_ERR(data->cred)) {
 		status = PTR_ERR(data->cred);
 		goto out_free;
-- 
cgit v1.2.3


From 2f42b5d043ee271d1e5d30ecd77186b6c4d4e534 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@citi.umich.edu>
Date: Thu, 13 Mar 2008 15:26:30 +0200
Subject: NFS: fix encode_fsinfo_maxsz

The previous value was not taking into account space for bitmap array size.

Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index db1ed9c46ede..37421dd4805d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -110,7 +110,7 @@ static int nfs4_stat_to_errno(int);
 #define decode_savefh_maxsz     (op_decode_hdr_maxsz)
 #define encode_restorefh_maxsz  (op_encode_hdr_maxsz)
 #define decode_restorefh_maxsz  (op_decode_hdr_maxsz)
-#define encode_fsinfo_maxsz	(op_encode_hdr_maxsz + 2)
+#define encode_fsinfo_maxsz	(encode_getattr_maxsz)
 #define decode_fsinfo_maxsz	(op_decode_hdr_maxsz + 11)
 #define encode_renew_maxsz	(op_encode_hdr_maxsz + 3)
 #define decode_renew_maxsz	(op_decode_hdr_maxsz)
-- 
cgit v1.2.3


From 6d884e8fc8114dc8877218f06a9a9a1d801901e4 Mon Sep 17 00:00:00 2001
From: Fred <iisaman@citi.umich.edu>
Date: Wed, 19 Mar 2008 11:24:38 -0400
Subject: nfs: nfs_redirty_request

Both flush functions have the same error handling routine.  Pull
it out as a function.

Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4cb88df12f83..ce40cadb15db 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -282,8 +282,6 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
 	spin_unlock(&inode->i_lock);
 	if (!nfs_pageio_add_request(pgio, req)) {
 		nfs_redirty_request(req);
-		nfs_end_page_writeback(page);
-		nfs_clear_page_tag_locked(req);
 		return pgio->pg_error;
 	}
 	return 0;
@@ -402,7 +400,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 }
 
 static void
-nfs_redirty_request(struct nfs_page *req)
+nfs_mark_request_dirty(struct nfs_page *req)
 {
 	__set_page_dirty_nobuffers(req->wb_page);
 }
@@ -456,7 +454,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
 		return 1;
 	}
 	if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
-		nfs_redirty_request(req);
+		nfs_mark_request_dirty(req);
 		return 1;
 	}
 	return 0;
@@ -847,6 +845,17 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 		rpc_put_task(task);
 }
 
+/* If a nfs_flush_* function fails, it should remove reqs from @head and
+ * call this on each, which will prepare them to be retried on next
+ * writeback using standard nfs.
+ */
+static void nfs_redirty_request(struct nfs_page *req)
+{
+	nfs_mark_request_dirty(req);
+	nfs_end_page_writeback(req->wb_page);
+	nfs_clear_page_tag_locked(req);
+}
+
 /*
  * Generate multiple small requests to write out a single
  * contiguous dirty area on one page.
@@ -902,8 +911,6 @@ out_bad:
 		nfs_writedata_release(data);
 	}
 	nfs_redirty_request(req);
-	nfs_end_page_writeback(req->wb_page);
-	nfs_clear_page_tag_locked(req);
 	return -ENOMEM;
 }
 
@@ -944,8 +951,6 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
 		nfs_redirty_request(req);
-		nfs_end_page_writeback(req->wb_page);
-		nfs_clear_page_tag_locked(req);
 	}
 	return -ENOMEM;
 }
@@ -1298,7 +1303,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 		}
 		/* We have a mismatch. Write the page again */
 		dprintk(" mismatch\n");
-		nfs_redirty_request(req);
+		nfs_mark_request_dirty(req);
 	next:
 		nfs_clear_page_tag_locked(req);
 	}
-- 
cgit v1.2.3


From 4af68bffac444a23f027e18ff244101e63b79227 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@citi.umich.edu>
Date: Wed, 19 Mar 2008 11:54:04 -0400
Subject: nfs: remove duplicate initializations of nfs_read_data field

Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/read.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index ab2f7d233e01..d333f5fedca1 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -251,7 +251,6 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
 		data = nfs_readdata_alloc(1);
 		if (!data)
 			goto out_bad;
-		INIT_LIST_HEAD(&data->pages);
 		list_add(&data->pages, &list);
 		requests++;
 		nbytes -= len;
@@ -298,7 +297,6 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
 	if (!data)
 		goto out_bad;
 
-	INIT_LIST_HEAD(&data->pages);
 	pages = data->pagevec;
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
-- 
cgit v1.2.3


From caa02bd540618e4b447a1f776363ba27c4c79090 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 22 Feb 2008 14:49:59 -0500
Subject: NFS: clean up short packet handling for NFSv2 readdir

Currently, the NFS readdir decoders have a workaround for buggy servers
that send an empty readdir response with the EOF bit unset. If the
server sends a malformed response in some cases, this workaround kicks
in and just returns an empty response rather than returning a proper
error to the caller.

This patch does 3 things:

1) have malformed responses with no entries return error (-EIO)

2) preserve existing workaround for servers that send empty
   responses with the EOF marker unset.

3) Add some comments to clarify the logic in nfs_xdr_readdirres().

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs2xdr.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1f7ea675e0c5..86a80b33ec82 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -428,7 +428,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
 	size_t hdrlen;
 	unsigned int pglen, recvd;
 	u32 len;
-	int status, nr;
+	int status, nr = 0;
 	__be32 *end, *entry, *kaddr;
 
 	if ((status = ntohl(*p++)))
@@ -452,7 +452,12 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
 	kaddr = p = kmap_atomic(*page, KM_USER0);
 	end = (__be32 *)((char *)p + pglen);
 	entry = p;
-	for (nr = 0; *p++; nr++) {
+
+	/* Make sure the packet actually has a value_follows and EOF entry */
+	if ((entry + 1) > end)
+		goto short_pkt;
+
+	for (; *p++; nr++) {
 		if (p + 2 > end)
 			goto short_pkt;
 		p++; /* fileid */
@@ -467,18 +472,32 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
 			goto short_pkt;
 		entry = p;
 	}
-	if (!nr && (entry[0] != 0 || entry[1] == 0))
-		goto short_pkt;
+
+	/*
+	 * Apparently some server sends responses that are a valid size, but
+	 * contain no entries, and have value_follows==0 and EOF==0. For
+	 * those, just set the EOF marker.
+	 */
+	if (!nr && entry[1] == 0) {
+		dprintk("NFS: readdir reply truncated!\n");
+		entry[1] = 1;
+	}
  out:
 	kunmap_atomic(kaddr, KM_USER0);
 	return nr;
  short_pkt:
+	/*
+	 * When we get a short packet there are 2 possibilities. We can
+	 * return an error, or fix up the response to look like a valid
+	 * response and return what we have so far. If there are no
+	 * entries and the packet was short, then return -EIO. If there
+	 * are valid entries in the response, return them and pretend that
+	 * the call was successful, but incomplete. The caller can retry the
+	 * readdir starting at the last cookie.
+	 */
 	entry[0] = entry[1] = 0;
-	/* truncate listing ? */
-	if (!nr) {
-		dprintk("NFS: readdir reply truncated!\n");
-		entry[1] = 1;
-	}
+	if (!nr)
+		nr = -errno_NFSERR_IO;
 	goto out;
 err_unmap:
 	nr = -errno_NFSERR_IO;
-- 
cgit v1.2.3


From 643f81115baca3630e544f6874567648b605efae Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 22 Feb 2008 14:50:00 -0500
Subject: NFS: clean up short packet handling for NFSv3 readdir

Currently, the NFS readdir decoders have a workaround for buggy servers
that send an empty readdir response with the EOF bit unset. If the
server sends a malformed response in some cases, this workaround kicks
in and just returns an empty response rather than returning a proper
error to the caller.

This patch does 3 things:

1) have malformed responses with no entries return error (-EIO)

2) preserve existing workaround for servers that send empty
   responses with the EOF marker unset.

3) Add some comments to clarify the logic in nfs3_xdr_readdirres().

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3xdr.c | 37 ++++++++++++++++++++++++++++---------
 1 file changed, 28 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 3917e2fa4e40..fb03048ac650 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -508,7 +508,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 	struct page **page;
 	size_t hdrlen;
 	u32 len, recvd, pglen;
-	int status, nr;
+	int status, nr = 0;
 	__be32 *entry, *end, *kaddr;
 
 	status = ntohl(*p++);
@@ -542,7 +542,12 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 	kaddr = p = kmap_atomic(*page, KM_USER0);
 	end = (__be32 *)((char *)p + pglen);
 	entry = p;
-	for (nr = 0; *p++; nr++) {
+
+	/* Make sure the packet actually has a value_follows and EOF entry */
+	if ((entry + 1) > end)
+		goto short_pkt;
+
+	for (; *p++; nr++) {
 		if (p + 3 > end)
 			goto short_pkt;
 		p += 2;				/* inode # */
@@ -581,18 +586,32 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 			goto short_pkt;
 		entry = p;
 	}
-	if (!nr && (entry[0] != 0 || entry[1] == 0))
-		goto short_pkt;
+
+	/*
+	 * Apparently some server sends responses that are a valid size, but
+	 * contain no entries, and have value_follows==0 and EOF==0. For
+	 * those, just set the EOF marker.
+	 */
+	if (!nr && entry[1] == 0) {
+		dprintk("NFS: readdir reply truncated!\n");
+		entry[1] = 1;
+	}
  out:
 	kunmap_atomic(kaddr, KM_USER0);
 	return nr;
  short_pkt:
+	/*
+	 * When we get a short packet there are 2 possibilities. We can
+	 * return an error, or fix up the response to look like a valid
+	 * response and return what we have so far. If there are no
+	 * entries and the packet was short, then return -EIO. If there
+	 * are valid entries in the response, return them and pretend that
+	 * the call was successful, but incomplete. The caller can retry the
+	 * readdir starting at the last cookie.
+	 */
 	entry[0] = entry[1] = 0;
-	/* truncate listing ? */
-	if (!nr) {
-		dprintk("NFS: readdir reply truncated!\n");
-		entry[1] = 1;
-	}
+	if (!nr)
+		nr = -errno_NFSERR_IO;
 	goto out;
 err_unmap:
 	nr = -errno_NFSERR_IO;
-- 
cgit v1.2.3


From 7bda2cdf484a00e52b0ed925e99d4bf4696b2c7a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 22 Feb 2008 14:50:01 -0500
Subject: NFS: clean up short packet handling for NFSv4 readdir

Currently, the NFS readdir decoders have a workaround for buggy servers
that send an empty readdir response with the EOF bit unset. If the
server sends a malformed response in some cases, this workaround kicks
in and just returns an empty response rather than returning a proper
error to the caller.

This patch does 3 things:

1) have malformed responses with no entries return error (-EIO)

2) preserve existing workaround for servers that send empty
   responses with the EOF marker unset.

3) Add some comments to clarify the logic in decode_readdir().

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 37421dd4805d..2b519f6325f9 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3481,7 +3481,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	size_t		hdrlen;
 	u32		recvd, pglen = rcvbuf->page_len;
 	__be32		*end, *entry, *p, *kaddr;
-	unsigned int	nr;
+	unsigned int	nr = 0;
 	int		status;
 
 	status = decode_op_hdr(xdr, OP_READDIR);
@@ -3505,7 +3505,12 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	kaddr = p = kmap_atomic(page, KM_USER0);
 	end = p + ((pglen + readdir->pgbase) >> 2);
 	entry = p;
-	for (nr = 0; *p++; nr++) {
+
+	/* Make sure the packet actually has a value_follows and EOF entry */
+	if ((entry + 1) > end)
+		goto short_pkt;
+
+	for (; *p++; nr++) {
 		u32 len, attrlen, xlen;
 		if (end - p < 3)
 			goto short_pkt;
@@ -3532,20 +3537,32 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 		p += attrlen;		/* attributes */
 		entry = p;
 	}
-	if (!nr && (entry[0] != 0 || entry[1] == 0))
-		goto short_pkt;
+	/*
+	 * Apparently some server sends responses that are a valid size, but
+	 * contain no entries, and have value_follows==0 and EOF==0. For
+	 * those, just set the EOF marker.
+	 */
+	if (!nr && entry[1] == 0) {
+		dprintk("NFS: readdir reply truncated!\n");
+		entry[1] = 1;
+	}
 out:	
 	kunmap_atomic(kaddr, KM_USER0);
 	return 0;
 short_pkt:
+	/*
+	 * When we get a short packet there are 2 possibilities. We can
+	 * return an error, or fix up the response to look like a valid
+	 * response and return what we have so far. If there are no
+	 * entries and the packet was short, then return -EIO. If there
+	 * are valid entries in the response, return them and pretend that
+	 * the call was successful, but incomplete. The caller can retry the
+	 * readdir starting at the last cookie.
+	 */
 	dprintk("%s: short packet at entry %d\n", __FUNCTION__, nr);
 	entry[0] = entry[1] = 0;
-	/* truncate listing ? */
-	if (!nr) {
-		dprintk("NFS: readdir reply truncated!\n");
-		entry[1] = 1;
-	}
-	goto out;
+	if (nr)
+		goto out;
 err_unmap:
 	kunmap_atomic(kaddr, KM_USER0);
 	return -errno_NFSERR_IO;
-- 
cgit v1.2.3


From 2d76743227028a26f884284aade03d1e43f4f249 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:10:08 -0400
Subject: NFS: numeric mount parameters are unsigned

Clean up: use %u instead of %d when displaying NFS mount options.

Nit: Fix reporting of "namlen=" option in nfs_show_mount_stats.  The mount
option is called "namlen" without the "e".

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index fcf4b982c885..53a67c6d4d2a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -463,17 +463,17 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	const struct proc_nfs_info *nfs_infop;
 	struct nfs_client *clp = nfss->nfs_client;
 
-	seq_printf(m, ",vers=%d", clp->rpc_ops->version);
-	seq_printf(m, ",rsize=%d", nfss->rsize);
-	seq_printf(m, ",wsize=%d", nfss->wsize);
+	seq_printf(m, ",vers=%u", clp->rpc_ops->version);
+	seq_printf(m, ",rsize=%u", nfss->rsize);
+	seq_printf(m, ",wsize=%u", nfss->wsize);
 	if (nfss->acregmin != 3*HZ || showdefaults)
-		seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ);
+		seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ);
 	if (nfss->acregmax != 60*HZ || showdefaults)
-		seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ);
+		seq_printf(m, ",acregmax=%u", nfss->acregmax/HZ);
 	if (nfss->acdirmin != 30*HZ || showdefaults)
-		seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ);
+		seq_printf(m, ",acdirmin=%u", nfss->acdirmin/HZ);
 	if (nfss->acdirmax != 60*HZ || showdefaults)
-		seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ);
+		seq_printf(m, ",acdirmax=%u", nfss->acdirmax/HZ);
 	for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) {
 		if (nfss->flags & nfs_infop->flag)
 			seq_puts(m, nfs_infop->str);
@@ -529,10 +529,10 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 
 	seq_printf(m, "\n\tcaps:\t");
 	seq_printf(m, "caps=0x%x", nfss->caps);
-	seq_printf(m, ",wtmult=%d", nfss->wtmult);
-	seq_printf(m, ",dtsize=%d", nfss->dtsize);
-	seq_printf(m, ",bsize=%d", nfss->bsize);
-	seq_printf(m, ",namelen=%d", nfss->namelen);
+	seq_printf(m, ",wtmult=%u", nfss->wtmult);
+	seq_printf(m, ",dtsize=%u", nfss->dtsize);
+	seq_printf(m, ",bsize=%u", nfss->bsize);
+	seq_printf(m, ",namlen=%u", nfss->namelen);
 
 #ifdef CONFIG_NFS_V4
 	if (nfss->nfs_client->rpc_ops->version == 4) {
@@ -546,9 +546,9 @@ static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt)
 	/*
 	 * Display security flavor in effect for this mount
 	 */
-	seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor);
+	seq_printf(m, "\n\tsec:\tflavor=%u", auth->au_ops->au_flavor);
 	if (auth->au_flavor)
-		seq_printf(m, ",pseudoflavor=%d", auth->au_flavor);
+		seq_printf(m, ",pseudoflavor=%u", auth->au_flavor);
 
 	/*
 	 * Display superblock I/O counters
-- 
cgit v1.2.3


From 78fa701f341564e60461de91cd08ff5f7fb09b31 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:10:15 -0400
Subject: NFS: Fix up data types of fields in nfs_parsed_mount_options

Clean up: make data types of fields in nfs_parsed_mount_options more
consistent with other uses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/internal.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 4c1122a13844..e89688a955bf 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -46,9 +46,9 @@ struct nfs_parsed_mount_data {
 		struct sockaddr_storage	address;
 		size_t			addrlen;
 		char			*hostname;
-		unsigned int		version;
+		u32			version;
 		unsigned short		port;
-		int			protocol;
+		unsigned short		protocol;
 	} mount_server;
 
 	struct {
@@ -56,7 +56,7 @@ struct nfs_parsed_mount_data {
 		size_t			addrlen;
 		char			*hostname;
 		char			*export_path;
-		int			protocol;
+		unsigned short		protocol;
 	} nfs_server;
 
 	struct security_mnt_opts lsm_opts;
-- 
cgit v1.2.3


From f22d6d79fe227245363a8849ea8c85fe6c6598c3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:10:22 -0400
Subject: NFS: Save the value of the "port=" mount option

During a remount based on the mount options displayed in /proc/mounts, we
want to preserve the original behavior of the mount request.  Let's save
the original setting of the "port=" mount option in the mount's nfs_server
structure.

This allows us to simplify the default behavior of port setting for NFSv4
mounts: by default, NFSv2/3 mounts first try an RPC bind to determine the
NFS server's port, unless the user specified the "port=" mount option;
Users can force the client to skip the RPC bind by explicitly specifying
"port=<value>".

NFSv4, by contrast, assumes the NFS server port is 2049 and skips the RPC
bind, unless the user specifies "port=".  Users can force an RPC bind for
NFSv4 by explicitly specifying "port=0".

I added a couple of extra comments to clarify this behavior.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c   |  4 ++++
 fs/nfs/internal.h |  1 +
 fs/nfs/super.c    | 37 ++++++-------------------------------
 3 files changed, 11 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 06f064d8fbbe..874018113d0e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -682,6 +682,8 @@ static int nfs_init_server(struct nfs_server *server,
 	if (error < 0)
 		goto error;
 
+	server->port = data->nfs_server.port;
+
 	error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
 	if (error < 0)
 		goto error;
@@ -1064,6 +1066,8 @@ static int nfs4_init_server(struct nfs_server *server,
 	server->acdirmin = data->acdirmin * HZ;
 	server->acdirmax = data->acdirmax * HZ;
 
+	server->port = data->nfs_server.port;
+
 	error = nfs_init_server_rpcclient(server, &timeparms, data->auth_flavors[0]);
 
 error:
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index e89688a955bf..999ad8ee0645 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -56,6 +56,7 @@ struct nfs_parsed_mount_data {
 		size_t			addrlen;
 		char			*hostname;
 		char			*export_path;
+		unsigned short		port;
 		unsigned short		protocol;
 	} nfs_server;
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 53a67c6d4d2a..3c6f53aa7317 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -685,7 +685,6 @@ static int nfs_parse_mount_options(char *raw,
 				   struct nfs_parsed_mount_data *mnt)
 {
 	char *p, *string, *secdata;
-	unsigned short port = 0;
 	int rc;
 
 	if (!raw) {
@@ -800,7 +799,7 @@ static int nfs_parse_mount_options(char *raw,
 				return 0;
 			if (option < 0 || option > 65535)
 				return 0;
-			port = option;
+			mnt->nfs_server.port = option;
 			break;
 		case Opt_rsize:
 			if (match_int(args, &mnt->rsize))
@@ -1050,7 +1049,8 @@ static int nfs_parse_mount_options(char *raw,
 		}
 	}
 
-	nfs_set_port((struct sockaddr *)&mnt->nfs_server.address, port);
+	nfs_set_port((struct sockaddr *)&mnt->nfs_server.address,
+				mnt->nfs_server.port);
 
 	return 1;
 
@@ -1171,7 +1171,9 @@ static int nfs_validate_mount_data(void *options,
 	args->acregmax		= 60;
 	args->acdirmin		= 30;
 	args->acdirmax		= 60;
+	args->mount_server.port	= 0;	/* autobind unless user sets port */
 	args->mount_server.protocol = XPRT_TRANSPORT_UDP;
+	args->nfs_server.port	= 0;	/* autobind unless user sets port */
 	args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
 
 	switch (data->version) {
@@ -1707,28 +1709,6 @@ static void nfs4_fill_super(struct super_block *sb)
 	nfs_initialise_sb(sb);
 }
 
-/*
- * If the user didn't specify a port, set the port number to
- * the NFS version 4 default port.
- */
-static void nfs4_default_port(struct sockaddr *sap)
-{
-	switch (sap->sa_family) {
-	case AF_INET: {
-		struct sockaddr_in *ap = (struct sockaddr_in *)sap;
-		if (ap->sin_port == 0)
-			ap->sin_port = htons(NFS_PORT);
-		break;
-	}
-	case AF_INET6: {
-		struct sockaddr_in6 *ap = (struct sockaddr_in6 *)sap;
-		if (ap->sin6_port == 0)
-			ap->sin6_port = htons(NFS_PORT);
-		break;
-	}
-	}
-}
-
 /*
  * Validate NFSv4 mount options
  */
@@ -1753,6 +1733,7 @@ static int nfs4_validate_mount_data(void *options,
 	args->acregmax		= 60;
 	args->acdirmin		= 30;
 	args->acdirmax		= 60;
+	args->nfs_server.port	= NFS_PORT; /* 2049 unless user set port= */
 	args->nfs_server.protocol = XPRT_TRANSPORT_TCP;
 
 	switch (data->version) {
@@ -1769,9 +1750,6 @@ static int nfs4_validate_mount_data(void *options,
 						&args->nfs_server.address))
 			goto out_no_address;
 
-		nfs4_default_port((struct sockaddr *)
-				  &args->nfs_server.address);
-
 		switch (data->auth_flavourlen) {
 		case 0:
 			args->auth_flavors[0] = RPC_AUTH_UNIX;
@@ -1829,9 +1807,6 @@ static int nfs4_validate_mount_data(void *options,
 						&args->nfs_server.address))
 			return -EINVAL;
 
-		nfs4_default_port((struct sockaddr *)
-				  &args->nfs_server.address);
-
 		switch (args->auth_flavor_len) {
 		case 0:
 			args->auth_flavors[0] = RPC_AUTH_UNIX;
-- 
cgit v1.2.3


From 3f8400d1f1f9d5fb175bdbf6236e564dde454f28 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:10:30 -0400
Subject: NFS: Save the values of the "mount*=" mount options

Save the value of the mountproto= mountport= mountvers= and mountaddr=
options so that these values can be displayed later via
nfs_show_options().

This preserves the intent of the original mount options, should the file
system need to be remounted based on what's displayed in /proc/mounts.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 874018113d0e..93dfd75aba7c 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -688,6 +688,16 @@ static int nfs_init_server(struct nfs_server *server,
 	if (error < 0)
 		goto error;
 
+	/* Preserve the values of mount_server-related mount options */
+	if (data->mount_server.addrlen) {
+		memcpy(&server->mountd_address, &data->mount_server.address,
+			data->mount_server.addrlen);
+		server->mountd_addrlen = data->mount_server.addrlen;
+	}
+	server->mountd_version = data->mount_server.version;
+	server->mountd_port = data->mount_server.port;
+	server->mountd_protocol = data->mount_server.protocol;
+
 	server->namelen  = data->namlen;
 	/* Create a client RPC handle for the NFSv3 ACL management interface */
 	nfs_init_server_aclclient(server);
-- 
cgit v1.2.3


From 82d101d58a2312297ee79f96d44c1d8c7fe1032d Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:10:37 -0400
Subject: NFS: Show most mount options via nfs_show_options()

Display all mount options in /proc/mount which may be needed to reconstruct
a previous mount.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 65 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3c6f53aa7317..c99ca1f992ce 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -441,10 +441,52 @@ static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour)
 	return sec_flavours[i].str;
 }
 
+static void nfs_show_mountd_options(struct seq_file *m, struct nfs_server *nfss,
+				    int showdefaults)
+{
+	struct sockaddr *sap = (struct sockaddr *)&nfss->mountd_address;
+
+	switch (sap->sa_family) {
+	case AF_INET: {
+		struct sockaddr_in *sin = (struct sockaddr_in *)sap;
+		seq_printf(m, ",mountaddr=" NIPQUAD_FMT,
+				NIPQUAD(sin->sin_addr.s_addr));
+		break;
+	}
+	case AF_INET6: {
+		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
+		seq_printf(m, ",mountaddr=" NIP6_FMT,
+				NIP6(sin6->sin6_addr));
+		break;
+	}
+	default:
+		if (showdefaults)
+			seq_printf(m, ",mountaddr=unspecified");
+	}
+
+	if (nfss->mountd_version || showdefaults)
+		seq_printf(m, ",mountvers=%u", nfss->mountd_version);
+	if (nfss->mountd_port || showdefaults)
+		seq_printf(m, ",mountport=%u", nfss->mountd_port);
+
+	switch (nfss->mountd_protocol) {
+	case IPPROTO_UDP:
+		seq_printf(m, ",mountproto=udp");
+		break;
+	case IPPROTO_TCP:
+		seq_printf(m, ",mountproto=tcp");
+		break;
+	default:
+		if (showdefaults)
+			seq_printf(m, ",mountproto=auto");
+	}
+}
+
 /*
  * Describe the mount options in force on this server representation
  */
-static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults)
+static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
+				   int showdefaults)
 {
 	static const struct proc_nfs_info {
 		int flag;
@@ -452,6 +494,8 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 		const char *nostr;
 	} nfs_info[] = {
 		{ NFS_MOUNT_SOFT, ",soft", ",hard" },
+		{ NFS_MOUNT_INTR, ",intr", ",nointr" },
+		{ NFS_MOUNT_POSIX, ",posix", "" },
 		{ NFS_MOUNT_NOCTO, ",nocto", "" },
 		{ NFS_MOUNT_NOAC, ",noac", "" },
 		{ NFS_MOUNT_NONLM, ",nolock", "" },
@@ -462,10 +506,14 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	};
 	const struct proc_nfs_info *nfs_infop;
 	struct nfs_client *clp = nfss->nfs_client;
+	u32 version = clp->rpc_ops->version;
 
-	seq_printf(m, ",vers=%u", clp->rpc_ops->version);
+	seq_printf(m, ",vers=%u", version);
 	seq_printf(m, ",rsize=%u", nfss->rsize);
 	seq_printf(m, ",wsize=%u", nfss->wsize);
+	if (nfss->bsize != 0)
+		seq_printf(m, ",bsize=%u", nfss->bsize);
+	seq_printf(m, ",namlen=%u", nfss->namelen);
 	if (nfss->acregmin != 3*HZ || showdefaults)
 		seq_printf(m, ",acregmin=%u", nfss->acregmin/HZ);
 	if (nfss->acregmax != 60*HZ || showdefaults)
@@ -482,9 +530,24 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
 	}
 	seq_printf(m, ",proto=%s",
 		   rpc_peeraddr2str(nfss->client, RPC_DISPLAY_PROTO));
+	if (version == 4) {
+		if (nfss->port != NFS_PORT)
+			seq_printf(m, ",port=%u", nfss->port);
+	} else
+		if (nfss->port)
+			seq_printf(m, ",port=%u", nfss->port);
+
 	seq_printf(m, ",timeo=%lu", 10U * nfss->client->cl_timeout->to_initval / HZ);
 	seq_printf(m, ",retrans=%u", nfss->client->cl_timeout->to_retries);
 	seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor));
+
+	if (version != 4)
+		nfs_show_mountd_options(m, nfss, showdefaults);
+
+#ifdef CONFIG_NFS_V4
+	if (clp->rpc_ops->version == 4)
+		seq_printf(m, ",clientaddr=%s", clp->cl_ipaddr);
+#endif
 }
 
 /*
-- 
cgit v1.2.3


From ecfc555a8327ff09b07066d73a98c04115007eec Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:14:56 -0400
Subject: NFS: Always enable NFS direct I/O

Since O_DIRECT is a standard feature that is enabled in most distros,
eliminate the CONFIG_NFS_DIRECTIO build option, and change the
fs/nfs/Makefile to always build in the NFS direct I/O engine.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/Kconfig        | 24 ------------------------
 fs/nfs/Makefile   |  3 +--
 fs/nfs/file.c     |  6 ------
 fs/nfs/internal.h |  5 -----
 4 files changed, 1 insertion(+), 37 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index d7312825592b..55c1a0374e13 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1637,30 +1637,6 @@ config NFS_V4
 
 	  If unsure, say N.
 
-config NFS_DIRECTIO
-	bool "Allow direct I/O on NFS files"
-	depends on NFS_FS
-	help
-	  This option enables applications to perform uncached I/O on files
-	  in NFS file systems using the O_DIRECT open() flag.  When O_DIRECT
-	  is set for a file, its data is not cached in the system's page
-	  cache.  Data is moved to and from user-level application buffers
-	  directly.  Unlike local disk-based file systems, NFS O_DIRECT has
-	  no alignment restrictions.
-
-	  Unless your program is designed to use O_DIRECT properly, you are
-	  much better off allowing the NFS client to manage data caching for
-	  you.  Misusing O_DIRECT can cause poor server performance or network
-	  storms.  This kernel build option defaults OFF to avoid exposing
-	  system administrators unwittingly to a potentially hazardous
-	  feature.
-
-	  For more details on NFS O_DIRECT, see fs/nfs/direct.c.
-
-	  If unsure, say N.  This reduces the size of the NFS client, and
-	  causes open() to return EINVAL if a file residing in NFS is
-	  opened with the O_DIRECT flag.
-
 config NFSD
 	tristate "NFS server support"
 	depends on INET
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index df0f41e09885..ac6170c594a3 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_NFS_FS) += nfs.o
 
 nfs-y 			:= client.o dir.o file.o getroot.o inode.o super.o nfs2xdr.o \
-			   pagelist.o proc.o read.o symlink.o unlink.o \
+			   direct.o pagelist.o proc.o read.o symlink.o unlink.o \
 			   write.o namespace.o mount_clnt.o
 nfs-$(CONFIG_ROOT_NFS)	+= nfsroot.o
 nfs-$(CONFIG_NFS_V3)	+= nfs3proc.o nfs3xdr.o
@@ -14,5 +14,4 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
 			   delegation.o idmap.o \
 			   callback.o callback_xdr.o callback_proc.o \
 			   nfs4namespace.o
-nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
 nfs-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index ef57a5ae5904..10e8b807e7ff 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -234,10 +234,8 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
 
-#ifdef CONFIG_NFS_DIRECTIO
 	if (iocb->ki_filp->f_flags & O_DIRECT)
 		return nfs_file_direct_read(iocb, iov, nr_segs, pos);
-#endif
 
 	dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -383,9 +381,7 @@ const struct address_space_operations nfs_file_aops = {
 	.write_end = nfs_write_end,
 	.invalidatepage = nfs_invalidate_page,
 	.releasepage = nfs_release_page,
-#ifdef CONFIG_NFS_DIRECTIO
 	.direct_IO = nfs_direct_IO,
-#endif
 	.launder_page = nfs_launder_page,
 };
 
@@ -443,10 +439,8 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 	ssize_t result;
 	size_t count = iov_length(iov, nr_segs);
 
-#ifdef CONFIG_NFS_DIRECTIO
 	if (iocb->ki_filp->f_flags & O_DIRECT)
 		return nfs_file_direct_write(iocb, iov, nr_segs, pos);
-#endif
 
 	dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n",
 		dentry->d_parent->d_name.name, dentry->d_name.name,
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 999ad8ee0645..04ae867dddba 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -116,13 +116,8 @@ extern void nfs_destroy_readpagecache(void);
 extern int __init nfs_init_writepagecache(void);
 extern void nfs_destroy_writepagecache(void);
 
-#ifdef CONFIG_NFS_DIRECTIO
 extern int __init nfs_init_directcache(void);
 extern void nfs_destroy_directcache(void);
-#else
-#define nfs_init_directcache() (0)
-#define nfs_destroy_directcache() do {} while(0)
-#endif
 
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(int);
-- 
cgit v1.2.3


From 327a299d8abd0f4580098cbd6c58b0f017417005 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:15:11 -0400
Subject: SUNRPC: Update help Kconfig text

Clean up: refresh the help text for Kconfig items related to the sunrpc
module.  Remove obsolete URLs, and make the language consistent among
the options.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/Kconfig | 45 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 55c1a0374e13..83201856b362 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1757,15 +1757,32 @@ config SUNRPC_XPRT_RDMA
 	tristate
 	depends on SUNRPC && INFINIBAND && EXPERIMENTAL
 	default SUNRPC && INFINIBAND
+	help
+	  This option enables an RPC client transport capability that
+	  allows the NFS client to mount servers via an RDMA-enabled
+	  transport.
+
+	  To compile RPC client RDMA transport support as a module,
+	  choose M here: the module will be called xprtrdma.
+
+	  If unsure, say N.
 
 config SUNRPC_BIND34
 	bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
 	depends on SUNRPC && EXPERIMENTAL
 	help
-	  Provides kernel support for querying rpcbind servers via versions 3
-	  and 4 of the rpcbind protocol.  The kernel automatically falls back
-	  to version 2 if a remote rpcbind service does not support versions
-	  3 or 4.
+	  RPC requests over IPv6 networks require support for larger
+	  addresses when performing an RPC bind.  Sun added support for
+	  IPv6 addressing by creating two new versions of the rpcbind
+	  protocol (RFC 1833).
+
+	  This option enables support in the kernel RPC client for
+	  querying rpcbind servers via versions 3 and 4 of the rpcbind
+	  protocol.  The kernel automatically falls back to version 2
+	  if a remote rpcbind service does not support versions 3 or 4.
+	  By themselves, these new versions do not provide support for
+	  RPC over IPv6, but the new protocol versions are necessary to
+	  support it.
 
 	  If unsure, say N to get traditional behavior (version 2 rpcbind
 	  requests only).
@@ -1779,12 +1796,13 @@ config RPCSEC_GSS_KRB5
 	select CRYPTO_DES
 	select CRYPTO_CBC
 	help
-	  Provides for secure RPC calls by means of a gss-api
-	  mechanism based on Kerberos V5. This is required for
-	  NFSv4.
+	  Choose Y here to enable Secure RPC using the Kerberos version 5
+	  GSS-API mechanism (RFC 1964).
 
-	  Note: Requires an auxiliary userspace daemon which may be found on
-		http://www.citi.umich.edu/projects/nfsv4/
+	  Secure RPC calls with Kerberos require an auxiliary user-space
+	  daemon which may be found in the Linux nfs-utils package
+	  available from http://linux-nfs.org/.  In addition, user-space
+	  Kerberos support should be installed.
 
 	  If unsure, say N.
 
@@ -1798,11 +1816,12 @@ config RPCSEC_GSS_SPKM3
 	select CRYPTO_CAST5
 	select CRYPTO_CBC
 	help
-	  Provides for secure RPC calls by means of a gss-api
-	  mechanism based on the SPKM3 public-key mechanism.
+	  Choose Y here to enable Secure RPC using the SPKM3 public key
+	  GSS-API mechansim (RFC 2025).
 
-	  Note: Requires an auxiliary userspace daemon which may be found on
-	  	http://www.citi.umich.edu/projects/nfsv4/
+	  Secure RPC calls with SPKM3 require an auxiliary userspace
+	  daemon which may be found in the Linux nfs-utils package
+	  available from http://linux-nfs.org/.
 
 	  If unsure, say N.
 
-- 
cgit v1.2.3


From 1e40316bc44851c5920490df01482ed862e4dbc4 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:15:18 -0400
Subject: SUNRPC: Add a default setting for CONFIG_SUNRPC_BIND34

Most distros will want support for rpcbind protocols 3 and 4 to default off
until they have integrated user-space support for the new rpcbind daemon
which supports IPv6 RPC services.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/Kconfig | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/Kconfig b/fs/Kconfig
index 83201856b362..4115deb567e1 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -1770,6 +1770,7 @@ config SUNRPC_XPRT_RDMA
 config SUNRPC_BIND34
 	bool "Support for rpcbind versions 3 & 4 (EXPERIMENTAL)"
 	depends on SUNRPC && EXPERIMENTAL
+	default n
 	help
 	  RPC requests over IPv6 networks require support for larger
 	  addresses when performing an RPC bind.  Sun added support for
-- 
cgit v1.2.3


From 90d5b18061656993410dfd57ddb88aa5a3f34563 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:18:30 -0400
Subject: NLM: LOCKD fails to load if CONFIG_SYSCTL is not set

Bruce Fields says:
"By the way, we've got another config-related nit here:

	http://bugzilla.linux-nfs.org/show_bug.cgi?id=156

You can build lockd without CONFIG_SYSCTL set, but then the module will
fail to load."

For now, disable the sysctl registration calls in lockd if CONFIG_SYSCTL
is not enabled.  This allows the kernel to build properly if PROC_FS or
SYSCTL is not enabled, but an NFS client is desired.

In the long run, we would like to be able to build the kernel with an
NFS client but without lockd.  This makes sense, for example, if you want
an NFSv4-only NFS client, as NFSv4 doesn't use NLM at all.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svc.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1ed8bd4de941..38c2f0b1dd7d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -74,7 +74,9 @@ static const unsigned long	nlm_timeout_min = 3;
 static const unsigned long	nlm_timeout_max = 20;
 static const int		nlm_port_min = 0, nlm_port_max = 65535;
 
+#ifdef CONFIG_SYSCTL
 static struct ctl_table_header * nlm_sysctl_table;
+#endif
 
 static unsigned long get_lockd_grace_period(void)
 {
@@ -359,6 +361,8 @@ out:
 }
 EXPORT_SYMBOL(lockd_down);
 
+#ifdef CONFIG_SYSCTL
+
 /*
  * Sysctl parameters (same as module parameters, different interface).
  */
@@ -443,6 +447,8 @@ static ctl_table nlm_sysctl_root[] = {
 	{ .ctl_name = 0 }
 };
 
+#endif	/* CONFIG_SYSCTL */
+
 /*
  * Module (and sysfs) parameters.
  */
@@ -516,15 +522,21 @@ module_param(nsm_use_hostnames, bool, 0644);
 
 static int __init init_nlm(void)
 {
+#ifdef CONFIG_SYSCTL
 	nlm_sysctl_table = register_sysctl_table(nlm_sysctl_root);
 	return nlm_sysctl_table ? 0 : -ENOMEM;
+#else
+	return 0;
+#endif
 }
 
 static void __exit exit_nlm(void)
 {
 	/* FIXME: delete all NLM clients */
 	nlm_shutdown_hosts();
+#ifdef CONFIG_SYSCTL
 	unregister_sysctl_table(nlm_sysctl_table);
+#endif
 }
 
 module_init(init_nlm);
-- 
cgit v1.2.3


From eb18860e1385bfc7f08fcb7ba362e4a5156c8324 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:18:37 -0400
Subject: NLM: NLM protocol version numbers are u32

Clean up: RPC protocol version numbers are u32.  Make sure we use an
appropriate type for NLM version numbers when calling nlm_lookup_host().

Eliminates a harmless mixed sign comparison in nlm_host_lookup().

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f1ef49fff118..f23750db1650 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -42,11 +42,12 @@ static struct nsm_handle *	nsm_find(const struct sockaddr_in *sin,
 /*
  * Common host lookup routine for server & client
  */
-static struct nlm_host *
-nlm_lookup_host(int server, const struct sockaddr_in *sin,
-		int proto, int version, const char *hostname,
-		unsigned int hostname_len,
-		const struct sockaddr_in *ssin)
+static struct nlm_host *nlm_lookup_host(int server,
+					const struct sockaddr_in *sin,
+					int proto, u32 version,
+					const char *hostname,
+					unsigned int hostname_len,
+					const struct sockaddr_in *ssin)
 {
 	struct hlist_head *chain;
 	struct hlist_node *pos;
@@ -55,7 +56,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
 	int		hash;
 
 	dprintk("lockd: nlm_lookup_host("NIPQUAD_FMT"->"NIPQUAD_FMT
-			", p=%d, v=%d, my role=%s, name=%.*s)\n",
+			", p=%d, v=%u, my role=%s, name=%.*s)\n",
 			NIPQUAD(ssin->sin_addr.s_addr),
 			NIPQUAD(sin->sin_addr.s_addr), proto, version,
 			server? "server" : "client",
@@ -175,9 +176,10 @@ nlm_destroy_host(struct nlm_host *host)
 /*
  * Find an NLM server handle in the cache. If there is none, create it.
  */
-struct nlm_host *
-nlmclnt_lookup_host(const struct sockaddr_in *sin, int proto, int version,
-			const char *hostname, unsigned int hostname_len)
+struct nlm_host *nlmclnt_lookup_host(const struct sockaddr_in *sin,
+				     int proto, u32 version,
+				     const char *hostname,
+				     unsigned int hostname_len)
 {
 	struct sockaddr_in ssin = {0};
 
-- 
cgit v1.2.3


From 099bd05f27ff24a3041d54e7ed42d2eb681484b9 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:25:32 -0400
Subject: lockd: Ensure NSM strings aren't longer than protocol allows

Introduce a special helper function to check the length of NSM strings
before they are placed on the wire.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/mon.c | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'fs')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 908b23fadd05..84fd84cb67b7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -149,6 +149,15 @@ nsm_create(void)
  * XDR functions for NSM.
  */
 
+static __be32 *xdr_encode_nsm_string(__be32 *p, char *string)
+{
+	size_t len = strlen(string);
+
+	if (len > SM_MAXSTRLEN)
+		len = SM_MAXSTRLEN;
+	return xdr_encode_opaque(p, string, len);
+}
+
 static __be32 *
 xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
-- 
cgit v1.2.3


From 496951743100b2d2ccd8b268257e79425e489851 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:25:39 -0400
Subject: lockd: refactor SM_MON mon_name argument encoder

Clean up: introduce a new XDR encoder specifically for the mon_name
argument of SM_MON requests.  This will be updated later to support IPv6
addresses in addition to IPv4 addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/mon.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'fs')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 84fd84cb67b7..87bd4e4d65dd 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -18,6 +18,8 @@
 
 #define NLMDBG_FACILITY		NLMDBG_MONITOR
 
+#define XDR_ADDRBUF_LEN		(20)
+
 static struct rpc_clnt *	nsm_create(void);
 
 static struct rpc_program	nsm_program;
@@ -158,6 +160,29 @@ static __be32 *xdr_encode_nsm_string(__be32 *p, char *string)
 	return xdr_encode_opaque(p, string, len);
 }
 
+/*
+ * "mon_name" specifies the host to be monitored.
+ *
+ * Linux uses a text version of the IP address of the remote
+ * host as the host identifier (the "mon_name" argument).
+ *
+ * Linux statd always looks up the canonical hostname first for
+ * whatever remote hostname it receives, so this works alright.
+ */
+static __be32 *xdr_encode_mon_name(__be32 *p, struct nsm_args *argp)
+{
+	char	buffer[XDR_ADDRBUF_LEN + 1];
+	char	*name = argp->mon_name;
+
+	if (!nsm_use_hostnames) {
+		snprintf(buffer, XDR_ADDRBUF_LEN,
+			 NIPQUAD_FMT, NIPQUAD(argp->addr));
+		name = buffer;
+	}
+
+	return xdr_encode_nsm_string(p, name);
+}
+
 static __be32 *
 xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
-- 
cgit v1.2.3


From 850c95fd07b63704d06909c9a081c51272d32db1 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:25:46 -0400
Subject: lockd: refactor SM_MON my_id argument encoder

Clean up: introduce a new XDR encoder specifically for the my_id
argument of SM_MON requests.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/mon.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'fs')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 87bd4e4d65dd..06216d6715f7 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -183,6 +183,25 @@ static __be32 *xdr_encode_mon_name(__be32 *p, struct nsm_args *argp)
 	return xdr_encode_nsm_string(p, name);
 }
 
+/*
+ * The "my_id" argument specifies the hostname and RPC procedure
+ * to be called when the status manager receives notification
+ * (via the SM_NOTIFY call) that the state of host "mon_name"
+ * has changed.
+ */
+static __be32 *xdr_encode_my_id(__be32 *p, struct nsm_args *argp)
+{
+	p = xdr_encode_nsm_string(p, utsname()->nodename);
+	if (!p)
+		return ERR_PTR(-EIO);
+
+	*p++ = htonl(argp->prog);
+	*p++ = htonl(argp->vers);
+	*p++ = htonl(argp->proc);
+
+	return p;
+}
+
 static __be32 *
 xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
-- 
cgit v1.2.3


From ea72a7f170e686baf00ceee57b6197bef686889c Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:25:53 -0400
Subject: lockd: document use of mon_id argument in SM_MON requests

Clean up: document the argument type that xdr_encode_common() is
marshalling by introducing a new function.  The new function will replace
xdr_encode_common() in just a sec.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/mon.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'fs')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 06216d6715f7..3935d7b604f5 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -202,6 +202,19 @@ static __be32 *xdr_encode_my_id(__be32 *p, struct nsm_args *argp)
 	return p;
 }
 
+/*
+ * The "mon_id" argument specifies the non-private arguments
+ * of an SM_MON or SM_UNMON call.
+ */
+static __be32 *xdr_encode_mon_id(__be32 *p, struct nsm_args *argp)
+{
+	p = xdr_encode_mon_name(p, argp);
+	if (!p)
+		return ERR_PTR(-EIO);
+
+	return xdr_encode_my_id(p, argp);
+}
+
 static __be32 *
 xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
-- 
cgit v1.2.3


From 2ca7754d4c96d68e1475690422a202ba5f0443d8 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:26:01 -0400
Subject: lockd: Fix up incorrect RPC buffer size calculations.

Switch to using the new mon_id encoder function.

Now that we've refactored the encoding of SM_MON requests, we've
discovered that the pre-computed buffer length maximums are
incorrect!

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/mon.c | 39 ++++++++-------------------------------
 1 file changed, 8 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 3935d7b604f5..f2507fec7c6f 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -149,6 +149,9 @@ nsm_create(void)
 
 /*
  * XDR functions for NSM.
+ *
+ * See http://www.opengroup.org/ for details on the Network
+ * Status Monitor wire protocol.
  */
 
 static __be32 *xdr_encode_nsm_string(__be32 *p, char *string)
@@ -215,37 +218,10 @@ static __be32 *xdr_encode_mon_id(__be32 *p, struct nsm_args *argp)
 	return xdr_encode_my_id(p, argp);
 }
 
-static __be32 *
-xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
-{
-	char	buffer[20], *name;
-
-	/*
-	 * Use the dotted-quad IP address of the remote host as
-	 * identifier. Linux statd always looks up the canonical
-	 * hostname first for whatever remote hostname it receives,
-	 * so this works alright.
-	 */
-	if (nsm_use_hostnames) {
-		name = argp->mon_name;
-	} else {
-		sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(argp->addr));
-		name = buffer;
-	}
-	if (!(p = xdr_encode_string(p, name))
-	 || !(p = xdr_encode_string(p, utsname()->nodename)))
-		return ERR_PTR(-EIO);
-	*p++ = htonl(argp->prog);
-	*p++ = htonl(argp->vers);
-	*p++ = htonl(argp->proc);
-
-	return p;
-}
-
 static int
 xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
-	p = xdr_encode_common(rqstp, p, argp);
+	p = xdr_encode_mon_id(p, argp);
 	if (IS_ERR(p))
 		return PTR_ERR(p);
 
@@ -261,7 +237,7 @@ xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 static int
 xdr_encode_unmon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
-	p = xdr_encode_common(rqstp, p, argp);
+	p = xdr_encode_mon_id(p, argp);
 	if (IS_ERR(p))
 		return PTR_ERR(p);
 	rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p);
@@ -286,8 +262,9 @@ xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp)
 }
 
 #define SM_my_name_sz	(1+XDR_QUADLEN(SM_MAXSTRLEN))
-#define SM_my_id_sz	(3+1+SM_my_name_sz)
-#define SM_mon_id_sz	(1+XDR_QUADLEN(20)+SM_my_id_sz)
+#define SM_my_id_sz	(SM_my_name_sz+3)
+#define SM_mon_name_sz	(1+XDR_QUADLEN(SM_MAXSTRLEN))
+#define SM_mon_id_sz	(SM_mon_name_sz+SM_my_id_sz)
 #define SM_mon_sz	(SM_mon_id_sz+4)
 #define SM_monres_sz	2
 #define SM_unmonres_sz	1
-- 
cgit v1.2.3


From 0490a54a00c14212f22c5948c8c13a4553d745bd Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 14 Mar 2008 14:26:08 -0400
Subject: lockd: introduce new function to encode private argument in SM_MON
 requests

Clean up: refactor the encoding of the opaque 16-byte private argument in
xdr_encode_mon().  This will be updated later to support IPv6 addresses.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/mon.c | 30 ++++++++++++++++++++++++------
 1 file changed, 24 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index f2507fec7c6f..e4d563543b11 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -218,6 +218,24 @@ static __be32 *xdr_encode_mon_id(__be32 *p, struct nsm_args *argp)
 	return xdr_encode_my_id(p, argp);
 }
 
+/*
+ * The "priv" argument may contain private information required
+ * by the SM_MON call. This information will be supplied in the
+ * SM_NOTIFY call.
+ *
+ * Linux provides the raw IP address of the monitored host,
+ * left in network byte order.
+ */
+static __be32 *xdr_encode_priv(__be32 *p, struct nsm_args *argp)
+{
+	*p++ = argp->addr;
+	*p++ = 0;
+	*p++ = 0;
+	*p++ = 0;
+
+	return p;
+}
+
 static int
 xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
@@ -225,11 +243,10 @@ xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 	if (IS_ERR(p))
 		return PTR_ERR(p);
 
-	/* Surprise - there may even be room for an IPv6 address now */
-	*p++ = argp->addr;
-	*p++ = 0;
-	*p++ = 0;
-	*p++ = 0;
+	p = xdr_encode_priv(p, argp);
+	if (IS_ERR(p))
+		return PTR_ERR(p);
+
 	rqstp->rq_slen = xdr_adjust_iovec(rqstp->rq_svec, p);
 	return 0;
 }
@@ -265,7 +282,8 @@ xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp)
 #define SM_my_id_sz	(SM_my_name_sz+3)
 #define SM_mon_name_sz	(1+XDR_QUADLEN(SM_MAXSTRLEN))
 #define SM_mon_id_sz	(SM_mon_name_sz+SM_my_id_sz)
-#define SM_mon_sz	(SM_mon_id_sz+4)
+#define SM_priv_sz	(XDR_QUADLEN(SM_PRIV_SIZE))
+#define SM_mon_sz	(SM_mon_id_sz+SM_priv_sz)
 #define SM_monres_sz	2
 #define SM_unmonres_sz	1
 
-- 
cgit v1.2.3


From 73e3302f60c0e11a0db0b34b903f591139c4f937 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 11 Apr 2008 16:03:54 -0400
Subject: NFS: Fix nfs_wb_page() to always exit with an error or a clean page

It is possible for nfs_wb_page() to sometimes exit with 0 return value, yet
the page is left in a dirty state.
For instance in the case where the server rebooted, and the COMMIT request
failed, then all the previously "clean" pages which were cached by the
server, but were not guaranteed to have been writted out to disk,
have to be redirtied and resent to the server.
The fix is to have nfs_wb_page_priority() check that the page is clean
before it exits...

This fixes a condition that triggers the BUG_ON(PagePrivate(page)) in
nfs_create_request() when we're in the nfs_readpage() path.

Also eliminate a redundant BUG_ON(!PageLocked(page)) while we're at it. It
turns out that clear_page_dirty_for_io() has the exact same test.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ce40cadb15db..997b42aa3702 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1493,18 +1493,19 @@ static int nfs_wb_page_priority(struct inode *inode, struct page *page,
 	};
 	int ret;
 
-	BUG_ON(!PageLocked(page));
-	if (clear_page_dirty_for_io(page)) {
-		ret = nfs_writepage_locked(page, &wbc);
+	do {
+		if (clear_page_dirty_for_io(page)) {
+			ret = nfs_writepage_locked(page, &wbc);
+			if (ret < 0)
+				goto out_error;
+		} else if (!PagePrivate(page))
+			break;
+		ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
 		if (ret < 0)
-			goto out;
-	}
-	if (!PagePrivate(page))
-		return 0;
-	ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
-	if (ret >= 0)
-		return 0;
-out:
+			goto out_error;
+	} while (PagePrivate(page));
+	return 0;
+out_error:
 	__mark_inode_dirty(inode, I_DIRTY_PAGES);
 	return ret;
 }
-- 
cgit v1.2.3


From fdd1e74c89fe39259a29c494209abad63ff76f82 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 15 Apr 2008 16:33:58 -0400
Subject: NFS: Ensure that the read code cleans up properly when rpc_run_task()
 fails

In the case of readpage() we need to ensure that the pages get unlocked,
and that the error is flagged.

In the case of O_DIRECT, we need to ensure that the pages are all released.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 19 +++++++++++++------
 fs/nfs/read.c   | 53 +++++++++++++++++++++++++++++++++--------------------
 2 files changed, 46 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e44200579c8d..9d9085b93a32 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -229,14 +229,20 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
 static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
-	if (nfs_readpage_result(task, data) != 0)
-		return;
+	nfs_readpage_result(task, data);
+}
+
+static void nfs_direct_read_release(void *calldata)
+{
+
+	struct nfs_read_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	int status = data->task.tk_status;
 
 	spin_lock(&dreq->lock);
-	if (unlikely(task->tk_status < 0)) {
-		dreq->error = task->tk_status;
+	if (unlikely(status < 0)) {
+		dreq->error = status;
 		spin_unlock(&dreq->lock);
 	} else {
 		dreq->count += data->res.count;
@@ -249,11 +255,12 @@ static void nfs_direct_read_result(struct rpc_task *task, void *calldata)
 
 	if (put_dreq(dreq))
 		nfs_direct_complete(dreq);
+	nfs_readdata_release(calldata);
 }
 
 static const struct rpc_call_ops nfs_read_direct_ops = {
 	.rpc_call_done = nfs_direct_read_result,
-	.rpc_release = nfs_readdata_release,
+	.rpc_release = nfs_direct_read_release,
 };
 
 /*
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index d333f5fedca1..6f9208a549a0 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -338,26 +338,25 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
 	return 0;
 }
 
-static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
 {
 	struct nfs_readargs *argp = &data->args;
 	struct nfs_readres *resp = &data->res;
 
 	if (resp->eof || resp->count == argp->count)
-		return 0;
+		return;
 
 	/* This is a short read! */
 	nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
 	/* Has the server at least made some progress? */
 	if (resp->count == 0)
-		return 0;
+		return;
 
 	/* Yes, so retry the read at the end of the data */
 	argp->offset += resp->count;
 	argp->pgbase += resp->count;
 	argp->count -= resp->count;
 	rpc_restart_call(task);
-	return -EAGAIN;
 }
 
 /*
@@ -366,29 +365,37 @@ static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
 {
 	struct nfs_read_data *data = calldata;
-	struct nfs_page *req = data->req;
-	struct page *page = req->wb_page;
  
 	if (nfs_readpage_result(task, data) != 0)
 		return;
+	if (task->tk_status < 0)
+		return;
 
-	if (likely(task->tk_status >= 0)) {
-		nfs_readpage_truncate_uninitialised_page(data);
-		if (nfs_readpage_retry(task, data) != 0)
-			return;
-	}
-	if (unlikely(task->tk_status < 0))
+	nfs_readpage_truncate_uninitialised_page(data);
+	nfs_readpage_retry(task, data);
+}
+
+static void nfs_readpage_release_partial(void *calldata)
+{
+	struct nfs_read_data *data = calldata;
+	struct nfs_page *req = data->req;
+	struct page *page = req->wb_page;
+	int status = data->task.tk_status;
+
+	if (status < 0)
 		SetPageError(page);
+
 	if (atomic_dec_and_test(&req->wb_complete)) {
 		if (!PageError(page))
 			SetPageUptodate(page);
 		nfs_readpage_release(req);
 	}
+	nfs_readdata_release(calldata);
 }
 
 static const struct rpc_call_ops nfs_read_partial_ops = {
 	.rpc_call_done = nfs_readpage_result_partial,
-	.rpc_release = nfs_readdata_release,
+	.rpc_release = nfs_readpage_release_partial,
 };
 
 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
@@ -423,29 +430,35 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 
 	if (nfs_readpage_result(task, data) != 0)
 		return;
+	if (task->tk_status < 0)
+		return;
 	/*
 	 * Note: nfs_readpage_retry may change the values of
 	 * data->args. In the multi-page case, we therefore need
 	 * to ensure that we call nfs_readpage_set_pages_uptodate()
 	 * first.
 	 */
-	if (likely(task->tk_status >= 0)) {
-		nfs_readpage_truncate_uninitialised_page(data);
-		nfs_readpage_set_pages_uptodate(data);
-		if (nfs_readpage_retry(task, data) != 0)
-			return;
-	}
+	nfs_readpage_truncate_uninitialised_page(data);
+	nfs_readpage_set_pages_uptodate(data);
+	nfs_readpage_retry(task, data);
+}
+
+static void nfs_readpage_release_full(void *calldata)
+{
+	struct nfs_read_data *data = calldata;
+
 	while (!list_empty(&data->pages)) {
 		struct nfs_page *req = nfs_list_entry(data->pages.next);
 
 		nfs_list_remove_request(req);
 		nfs_readpage_release(req);
 	}
+	nfs_readdata_release(calldata);
 }
 
 static const struct rpc_call_ops nfs_read_full_ops = {
 	.rpc_call_done = nfs_readpage_result_full,
-	.rpc_release = nfs_readdata_release,
+	.rpc_release = nfs_readpage_release_full,
 };
 
 /*
-- 
cgit v1.2.3


From c9d8f89d9816c1d16ada492aa547a4d692508c0d Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 15 Apr 2008 16:56:39 -0400
Subject: NFS: Ensure that the write code cleans up properly when
 rpc_run_task() fails

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 50 +++++++++++++++++++++++-------------------
 fs/nfs/write.c  | 67 ++++++++++++++++++++++++++++++++++++---------------------
 2 files changed, 71 insertions(+), 46 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9d9085b93a32..abf8e0286e35 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -508,27 +508,34 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 static void nfs_direct_commit_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
 	/* Call the NFS version-specific code */
-	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
-		return;
-	if (unlikely(task->tk_status < 0)) {
+	NFS_PROTO(data->inode)->commit_done(task, data);
+}
+
+static void nfs_direct_commit_release(void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	int status = data->task.tk_status;
+
+	if (status < 0) {
 		dprintk("NFS: %5u commit failed with error %d.\n",
-				task->tk_pid, task->tk_status);
+				data->task.tk_pid, status);
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 	} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
-		dprintk("NFS: %5u commit verify failed\n", task->tk_pid);
+		dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 	}
 
-	dprintk("NFS: %5u commit returned %d\n", task->tk_pid, task->tk_status);
+	dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
 	nfs_direct_write_complete(dreq, data->inode);
+	nfs_commitdata_release(calldata);
 }
 
 static const struct rpc_call_ops nfs_commit_direct_ops = {
 	.rpc_call_done = nfs_direct_commit_result,
-	.rpc_release = nfs_commit_release,
+	.rpc_release = nfs_direct_commit_release,
 };
 
 static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
@@ -596,7 +603,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 
 static void nfs_alloc_commit_data(struct nfs_direct_req *dreq)
 {
-	dreq->commit_data = nfs_commit_alloc();
+	dreq->commit_data = nfs_commitdata_alloc();
 	if (dreq->commit_data != NULL)
 		dreq->commit_data->req = (struct nfs_page *) dreq;
 }
@@ -617,11 +624,20 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
-	int status = task->tk_status;
 
 	if (nfs_writeback_done(task, data) != 0)
 		return;
+}
+
+/*
+ * NB: Return the value of the first error return code.  Subsequent
+ *     errors after the first one are ignored.
+ */
+static void nfs_direct_write_release(void *calldata)
+{
+	struct nfs_write_data *data = calldata;
+	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
+	int status = data->task.tk_status;
 
 	spin_lock(&dreq->lock);
 
@@ -643,23 +659,13 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
 				break;
 			case NFS_ODIRECT_DO_COMMIT:
 				if (memcmp(&dreq->verf, &data->verf, sizeof(dreq->verf))) {
-					dprintk("NFS: %5u write verify failed\n", task->tk_pid);
+					dprintk("NFS: %5u write verify failed\n", data->task.tk_pid);
 					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 				}
 		}
 	}
 out_unlock:
 	spin_unlock(&dreq->lock);
-}
-
-/*
- * NB: Return the value of the first error return code.  Subsequent
- *     errors after the first one are ignored.
- */
-static void nfs_direct_write_release(void *calldata)
-{
-	struct nfs_write_data *data = calldata;
-	struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req;
 
 	if (put_dreq(dreq))
 		nfs_direct_write_complete(dreq, data->inode);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 997b42aa3702..31681bb5e4c1 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -48,7 +48,7 @@ static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
-struct nfs_write_data *nfs_commit_alloc(void)
+struct nfs_write_data *nfs_commitdata_alloc(void)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
 
@@ -973,7 +973,6 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req = data->req;
-	struct page		*page = req->wb_page;
 
 	dprintk("NFS: write (%s/%Ld %d@%Ld)",
 		req->wb_context->path.dentry->d_inode->i_sb->s_id,
@@ -981,13 +980,20 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 		req->wb_bytes,
 		(long long)req_offset(req));
 
-	if (nfs_writeback_done(task, data) != 0)
-		return;
+	nfs_writeback_done(task, data);
+}
 
-	if (task->tk_status < 0) {
+static void nfs_writeback_release_partial(void *calldata)
+{
+	struct nfs_write_data	*data = calldata;
+	struct nfs_page		*req = data->req;
+	struct page		*page = req->wb_page;
+	int status = data->task.tk_status;
+
+	if (status < 0) {
 		nfs_set_pageerror(page);
-		nfs_context_set_write_error(req->wb_context, task->tk_status);
-		dprintk(", error = %d\n", task->tk_status);
+		nfs_context_set_write_error(req->wb_context, status);
+		dprintk(", error = %d\n", status);
 		goto out;
 	}
 
@@ -1012,11 +1018,12 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 out:
 	if (atomic_dec_and_test(&req->wb_complete))
 		nfs_writepage_release(req);
+	nfs_writedata_release(calldata);
 }
 
 static const struct rpc_call_ops nfs_write_partial_ops = {
 	.rpc_call_done = nfs_writeback_done_partial,
-	.rpc_release = nfs_writedata_release,
+	.rpc_release = nfs_writeback_release_partial,
 };
 
 /*
@@ -1029,17 +1036,21 @@ static const struct rpc_call_ops nfs_write_partial_ops = {
 static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
-	struct nfs_page		*req;
-	struct page		*page;
 
-	if (nfs_writeback_done(task, data) != 0)
-		return;
+	nfs_writeback_done(task, data);
+}
+
+static void nfs_writeback_release_full(void *calldata)
+{
+	struct nfs_write_data	*data = calldata;
+	int status = data->task.tk_status;
 
 	/* Update attributes as result of writeback. */
 	while (!list_empty(&data->pages)) {
-		req = nfs_list_entry(data->pages.next);
+		struct nfs_page *req = nfs_list_entry(data->pages.next);
+		struct page *page = req->wb_page;
+
 		nfs_list_remove_request(req);
-		page = req->wb_page;
 
 		dprintk("NFS: write (%s/%Ld %d@%Ld)",
 			req->wb_context->path.dentry->d_inode->i_sb->s_id,
@@ -1047,10 +1058,10 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 			req->wb_bytes,
 			(long long)req_offset(req));
 
-		if (task->tk_status < 0) {
+		if (status < 0) {
 			nfs_set_pageerror(page);
-			nfs_context_set_write_error(req->wb_context, task->tk_status);
-			dprintk(", error = %d\n", task->tk_status);
+			nfs_context_set_write_error(req->wb_context, status);
+			dprintk(", error = %d\n", status);
 			goto remove_request;
 		}
 
@@ -1070,11 +1081,12 @@ remove_request:
 	next:
 		nfs_clear_page_tag_locked(req);
 	}
+	nfs_writedata_release(calldata);
 }
 
 static const struct rpc_call_ops nfs_write_full_ops = {
 	.rpc_call_done = nfs_writeback_done_full,
-	.rpc_release = nfs_writedata_release,
+	.rpc_release = nfs_writeback_release_full,
 };
 
 
@@ -1160,7 +1172,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-void nfs_commit_release(void *data)
+void nfs_commitdata_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
 
@@ -1233,7 +1245,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 	struct nfs_write_data	*data;
 	struct nfs_page         *req;
 
-	data = nfs_commit_alloc();
+	data = nfs_commitdata_alloc();
 
 	if (!data)
 		goto out_bad;
@@ -1261,7 +1273,6 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 static void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
 	struct nfs_write_data	*data = calldata;
-	struct nfs_page		*req;
 
         dprintk("NFS: %5u nfs_commit_done (status %d)\n",
                                 task->tk_pid, task->tk_status);
@@ -1269,6 +1280,13 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 	/* Call the NFS version-specific code */
 	if (NFS_PROTO(data->inode)->commit_done(task, data) != 0)
 		return;
+}
+
+static void nfs_commit_release(void *calldata)
+{
+	struct nfs_write_data	*data = calldata;
+	struct nfs_page		*req;
+	int status = data->task.tk_status;
 
 	while (!list_empty(&data->pages)) {
 		req = nfs_list_entry(data->pages.next);
@@ -1283,10 +1301,10 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
-		if (task->tk_status < 0) {
-			nfs_context_set_write_error(req->wb_context, task->tk_status);
+		if (status < 0) {
+			nfs_context_set_write_error(req->wb_context, status);
 			nfs_inode_remove_request(req);
-			dprintk(", error = %d\n", task->tk_status);
+			dprintk(", error = %d\n", status);
 			goto next;
 		}
 
@@ -1307,6 +1325,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
 	next:
 		nfs_clear_page_tag_locked(req);
 	}
+	nfs_commitdata_release(calldata);
 }
 
 static const struct rpc_call_ops nfs_commit_ops = {
-- 
cgit v1.2.3


From dbae4c73f08b8a7980cc912954ade3d4c1fb6147 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 14 Apr 2008 14:54:53 -0400
Subject: NFS: Ensure that rpc_run_task() errors are propagated back to the
 caller

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 10 ++++++----
 fs/nfs/read.c   | 23 +++++++++++++++--------
 fs/nfs/write.c  | 33 +++++++++++++++++++--------------
 3 files changed, 40 insertions(+), 26 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index abf8e0286e35..4757a2b326a1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -347,8 +347,9 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
 		NFS_PROTO(inode)->read_setup(data, &msg);
 
 		task = rpc_run_task(&task_setup_data);
-		if (!IS_ERR(task))
-			rpc_put_task(task);
+		if (IS_ERR(task))
+			break;
+		rpc_put_task(task);
 
 		dprintk("NFS: %5u initiated direct read call "
 			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
@@ -763,8 +764,9 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
 		NFS_PROTO(inode)->write_setup(data, &msg);
 
 		task = rpc_run_task(&task_setup_data);
-		if (!IS_ERR(task))
-			rpc_put_task(task);
+		if (IS_ERR(task))
+			break;
+		rpc_put_task(task);
 
 		dprintk("NFS: %5u initiated direct write call "
 			"(req %s/%Ld, %zu bytes @ offset %Lu)\n",
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 6f9208a549a0..16f57e0af999 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -153,7 +153,7 @@ static void nfs_readpage_release(struct nfs_page *req)
 /*
  * Set up the NFS read request struct
  */
-static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
+static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset)
 {
@@ -202,8 +202,10 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 			(unsigned long long)data->args.offset);
 
 	task = rpc_run_task(&task_setup_data);
-	if (!IS_ERR(task))
-		rpc_put_task(task);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	rpc_put_task(task);
+	return 0;
 }
 
 static void
@@ -240,6 +242,7 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
 	size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
 	unsigned int offset;
 	int requests = 0;
+	int ret = 0;
 	LIST_HEAD(list);
 
 	nfs_list_remove_request(req);
@@ -261,6 +264,8 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
 	offset = 0;
 	nbytes = count;
 	do {
+		int ret2;
+
 		data = list_entry(list.next, struct nfs_read_data, pages);
 		list_del_init(&data->pages);
 
@@ -268,13 +273,15 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
 
 		if (nbytes < rsize)
 			rsize = nbytes;
-		nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
+		ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
 				  rsize, offset);
+		if (ret == 0)
+			ret = ret2;
 		offset += rsize;
 		nbytes -= rsize;
 	} while (nbytes != 0);
 
-	return 0;
+	return ret;
 
 out_bad:
 	while (!list_empty(&list)) {
@@ -292,6 +299,7 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
 	struct nfs_page		*req;
 	struct page		**pages;
 	struct nfs_read_data	*data;
+	int ret = -ENOMEM;
 
 	data = nfs_readdata_alloc(npages);
 	if (!data)
@@ -307,11 +315,10 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
 	}
 	req = nfs_list_entry(data->pages.next);
 
-	nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
-	return 0;
+	return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
 out_bad:
 	nfs_async_read_error(head);
-	return -ENOMEM;
+	return ret;
 }
 
 /*
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 31681bb5e4c1..1ade11d1ba07 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -778,7 +778,7 @@ static int flush_task_priority(int how)
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-static void nfs_write_rpcsetup(struct nfs_page *req,
+static int nfs_write_rpcsetup(struct nfs_page *req,
 		struct nfs_write_data *data,
 		const struct rpc_call_ops *call_ops,
 		unsigned int count, unsigned int offset,
@@ -841,8 +841,10 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 		(unsigned long long)data->args.offset);
 
 	task = rpc_run_task(&task_setup_data);
-	if (!IS_ERR(task))
-		rpc_put_task(task);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	rpc_put_task(task);
+	return 0;
 }
 
 /* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -868,6 +870,7 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
 	size_t wsize = NFS_SERVER(inode)->wsize, nbytes;
 	unsigned int offset;
 	int requests = 0;
+	int ret = 0;
 	LIST_HEAD(list);
 
 	nfs_list_remove_request(req);
@@ -889,6 +892,8 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
 	offset = 0;
 	nbytes = count;
 	do {
+		int ret2;
+
 		data = list_entry(list.next, struct nfs_write_data, pages);
 		list_del_init(&data->pages);
 
@@ -896,13 +901,15 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
 
 		if (nbytes < wsize)
 			wsize = nbytes;
-		nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
+		ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
 				   wsize, offset, how);
+		if (ret == 0)
+			ret = ret2;
 		offset += wsize;
 		nbytes -= wsize;
 	} while (nbytes != 0);
 
-	return 0;
+	return ret;
 
 out_bad:
 	while (!list_empty(&list)) {
@@ -943,9 +950,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
 	req = nfs_list_entry(data->pages.next);
 
 	/* Set up the argument struct */
-	nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
-
-	return 0;
+	return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how);
  out_bad:
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
@@ -1183,7 +1188,7 @@ void nfs_commitdata_release(void *data)
 /*
  * Set up the argument/result storage required for the RPC call.
  */
-static void nfs_commit_rpcsetup(struct list_head *head,
+static int nfs_commit_rpcsetup(struct list_head *head,
 		struct nfs_write_data *data,
 		int how)
 {
@@ -1232,8 +1237,10 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 	dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
 
 	task = rpc_run_task(&task_setup_data);
-	if (!IS_ERR(task))
-		rpc_put_task(task);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	rpc_put_task(task);
+	return 0;
 }
 
 /*
@@ -1251,9 +1258,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 		goto out_bad;
 
 	/* Set up the argument struct */
-	nfs_commit_rpcsetup(head, data, how);
-
-	return 0;
+	return nfs_commit_rpcsetup(head, data, how);
  out_bad:
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
-- 
cgit v1.2.3


From 35d05778e25ee16dbddb60331be0bc1309efba19 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 5 Apr 2008 15:54:17 -0400
Subject: NFSv4: Remove bogus call to nfs4_drop_state_owner() in
 _nfs4_open_expired()

There should be no need to invalidate a perfectly good state owner just
because of a stale filehandle. Doing so can cause the state recovery code
to break, since nfs4_get_renew_cred() and nfs4_get_setclientid_cred() rely
on finding active state owners.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f38d0573be18..424aa206470f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -982,11 +982,8 @@ static int _nfs4_open_expired(struct nfs_open_context *ctx, struct nfs4_state *s
 	if (IS_ERR(opendata))
 		return PTR_ERR(opendata);
 	ret = nfs4_open_recover(opendata, state);
-	if (ret == -ESTALE) {
-		/* Invalidate the state owner so we don't ever use it again */
-		nfs4_drop_state_owner(state->owner);
+	if (ret == -ESTALE)
 		d_drop(ctx->path.dentry);
-	}
 	nfs4_opendata_put(opendata);
 	return ret;
 }
-- 
cgit v1.2.3


From c1d519312dcdf11532fed9f99a8ecc3547ffd9d6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 7 Apr 2008 13:20:54 -0400
Subject: NFSv4: Only increment the sequence id if the server saw it

It is quite possible that the OPEN, CLOSE, LOCK, LOCKU,... compounds fail
before the actual stateful operation has been executed (for instance in the
PUTFH call). There is no way to tell from the overall status result which
operations were executed from the COMPOUND.

The fix is to move incrementing of the sequence id into the XDR layer,
so that we do it as we process the results from the stateful operation.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 12 ++++++------
 fs/nfs/nfs4xdr.c  | 18 +++++++++++++++++-
 2 files changed, 23 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 424aa206470f..9f2759da74eb 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -240,6 +240,8 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
 {
 	p->o_res.f_attr = &p->f_attr;
 	p->o_res.dir_attr = &p->dir_attr;
+	p->o_res.seqid = p->o_arg.seqid;
+	p->c_res.seqid = p->c_arg.seqid;
 	p->o_res.server = p->o_arg.server;
 	nfs_fattr_init(&p->f_attr);
 	nfs_fattr_init(&p->dir_attr);
@@ -730,7 +732,6 @@ static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
 		renew_lease(data->o_res.server, data->timestamp);
 		data->rpc_done = 1;
 	}
-	nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
 }
 
 static void nfs4_open_confirm_release(void *calldata)
@@ -860,7 +861,6 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
 		if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
 			nfs_confirm_seqid(&data->owner->so_seqid, 0);
 	}
-	nfs_increment_open_seqid(data->rpc_status, data->o_arg.seqid);
 	data->rpc_done = 1;
 }
 
@@ -1226,7 +1226,6 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
         /* hmm. we are done with the inode, and in the process of freeing
 	 * the state_owner. we keep this around to process errors
 	 */
-	nfs_increment_open_seqid(task->tk_status, calldata->arg.seqid);
 	switch (task->tk_status) {
 		case 0:
 			nfs_set_open_stateid(state, &calldata->res.stateid, 0);
@@ -1333,6 +1332,7 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
 		goto out_free_calldata;
 	calldata->arg.bitmask = server->attr_bitmask;
 	calldata->res.fattr = &calldata->fattr;
+	calldata->res.seqid = calldata->arg.seqid;
 	calldata->res.server = server;
 	calldata->path.mnt = mntget(path->mnt);
 	calldata->path.dentry = dget(path->dentry);
@@ -3159,6 +3159,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
 	p->arg.fh = NFS_FH(inode);
 	p->arg.fl = &p->fl;
 	p->arg.seqid = seqid;
+	p->res.seqid = seqid;
 	p->arg.stateid = &lsp->ls_stateid;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
@@ -3184,7 +3185,6 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 
 	if (RPC_ASSASSINATED(task))
 		return;
-	nfs_increment_lock_seqid(task->tk_status, calldata->arg.seqid);
 	switch (task->tk_status) {
 		case 0:
 			memcpy(calldata->lsp->ls_stateid.data,
@@ -3322,6 +3322,7 @@ static struct nfs4_lockdata *nfs4_alloc_lockdata(struct file_lock *fl,
 	p->arg.lock_stateid = &lsp->ls_stateid;
 	p->arg.lock_owner.clientid = server->nfs_client->cl_clientid;
 	p->arg.lock_owner.id = lsp->ls_id.id;
+	p->res.lock_seqid = p->arg.lock_seqid;
 	p->lsp = lsp;
 	atomic_inc(&lsp->ls_count);
 	p->ctx = get_nfs_open_context(ctx);
@@ -3348,6 +3349,7 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
 			return;
 		data->arg.open_stateid = &state->stateid;
 		data->arg.new_lock_owner = 1;
+		data->res.open_seqid = data->arg.open_seqid;
 	} else
 		data->arg.new_lock_owner = 0;
 	data->timestamp = jiffies;
@@ -3365,7 +3367,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 	if (RPC_ASSASSINATED(task))
 		goto out;
 	if (data->arg.new_lock_owner != 0) {
-		nfs_increment_open_seqid(data->rpc_status, data->arg.open_seqid);
 		if (data->rpc_status == 0)
 			nfs_confirm_seqid(&data->lsp->ls_seqid, 0);
 		else
@@ -3377,7 +3378,6 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
 		data->lsp->ls_flags |= NFS_LOCK_INITIALIZED;
 		renew_lease(NFS_SERVER(data->ctx->path.dentry->d_inode), data->timestamp);
 	}
-	nfs_increment_lock_seqid(data->rpc_status, data->arg.lock_seqid);
 out:
 	dprintk("%s: done, ret = %d!\n", __FUNCTION__, data->rpc_status);
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2b519f6325f9..b8b2e391d183 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3005,6 +3005,8 @@ static int decode_close(struct xdr_stream *xdr, struct nfs_closeres *res)
 	int status;
 
 	status = decode_op_hdr(xdr, OP_CLOSE);
+	if (status != -EIO)
+		nfs_increment_open_seqid(status, res->seqid);
 	if (status)
 		return status;
 	READ_BUF(NFS4_STATEID_SIZE);
@@ -3296,11 +3298,17 @@ static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
 	int status;
 
 	status = decode_op_hdr(xdr, OP_LOCK);
+	if (status == -EIO)
+		goto out;
 	if (status == 0) {
 		READ_BUF(NFS4_STATEID_SIZE);
 		COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
 	} else if (status == -NFS4ERR_DENIED)
-		return decode_lock_denied(xdr, NULL);
+		status = decode_lock_denied(xdr, NULL);
+	if (res->open_seqid != NULL)
+		nfs_increment_open_seqid(status, res->open_seqid);
+	nfs_increment_lock_seqid(status, res->lock_seqid);
+out:
 	return status;
 }
 
@@ -3319,6 +3327,8 @@ static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
 	int status;
 
 	status = decode_op_hdr(xdr, OP_LOCKU);
+	if (status != -EIO)
+		nfs_increment_lock_seqid(status, res->seqid);
 	if (status == 0) {
 		READ_BUF(NFS4_STATEID_SIZE);
 		COPYMEM(res->stateid.data, NFS4_STATEID_SIZE);
@@ -3384,6 +3394,8 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
         int status;
 
         status = decode_op_hdr(xdr, OP_OPEN);
+	if (status != -EIO)
+		nfs_increment_open_seqid(status, res->seqid);
         if (status)
                 return status;
         READ_BUF(NFS4_STATEID_SIZE);
@@ -3416,6 +3428,8 @@ static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmre
 	int status;
 
         status = decode_op_hdr(xdr, OP_OPEN_CONFIRM);
+	if (status != -EIO)
+		nfs_increment_open_seqid(status, res->seqid);
         if (status)
                 return status;
         READ_BUF(NFS4_STATEID_SIZE);
@@ -3429,6 +3443,8 @@ static int decode_open_downgrade(struct xdr_stream *xdr, struct nfs_closeres *re
 	int status;
 
 	status = decode_op_hdr(xdr, OP_OPEN_DOWNGRADE);
+	if (status != -EIO)
+		nfs_increment_open_seqid(status, res->seqid);
 	if (status)
 		return status;
 	READ_BUF(NFS4_STATEID_SIZE);
-- 
cgit v1.2.3


From 4a9af59fee0701d9db99bc148d87b8852d6d6dd8 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 1 Apr 2008 18:57:06 -0400
Subject: NLM/lockd: Ensure we don't corrupt fl->fl_flags in nlmclnt_unlock()

Also fix up nlmclnt_lock() so that it doesn't pass modified versions of
fl->fl_flags to nlmclnt_cancel() and other helpers.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index b6b74a60e1eb..4e1c0123b45d 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -493,6 +493,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	}
 	fl->fl_flags |= FL_ACCESS;
 	status = do_vfs_lock(fl);
+	fl->fl_flags = fl_flags;
 	if (status < 0)
 		goto out;
 
@@ -530,10 +531,11 @@ again:
 			goto again;
 		}
 		/* Ensure the resulting lock will get added to granted list */
-		fl->fl_flags = fl_flags | FL_SLEEP;
+		fl->fl_flags |= FL_SLEEP;
 		if (do_vfs_lock(fl) < 0)
 			printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
 		up_read(&host->h_rwsem);
+		fl->fl_flags = fl_flags;
 	}
 	status = nlm_stat_to_errno(resp->status);
 out_unblock:
@@ -543,7 +545,6 @@ out_unblock:
 		nlmclnt_cancel(host, req->a_args.block, fl);
 out:
 	nlm_release_call(req);
-	fl->fl_flags = fl_flags;
 	return status;
 }
 
@@ -598,7 +599,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 {
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
-	int status = 0;
+	int status;
+	unsigned char fl_flags = fl->fl_flags;
 
 	/*
 	 * Note: the server is supposed to either grant us the unlock
@@ -607,11 +609,13 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	 */
 	fl->fl_flags |= FL_EXISTS;
 	down_read(&host->h_rwsem);
-	if (do_vfs_lock(fl) == -ENOENT) {
-		up_read(&host->h_rwsem);
+	status = do_vfs_lock(fl);
+	up_read(&host->h_rwsem);
+	fl->fl_flags = fl_flags;
+	if (status == -ENOENT) {
+		status = 0;
 		goto out;
 	}
-	up_read(&host->h_rwsem);
 
 	if (req->a_flags & RPC_TASK_ASYNC)
 		return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
-- 
cgit v1.2.3


From 536ff0f809b0f4d56e1c41e66768d330668e0a55 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 4 Apr 2008 15:08:02 -0400
Subject: NFSv4: Ensure we don't corrupt fl->fl_flags in nfs4_proc_unlck

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9f2759da74eb..a1069325b876 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3263,6 +3263,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 	struct nfs4_lock_state *lsp;
 	struct rpc_task *task;
 	int status = 0;
+	unsigned char fl_flags = request->fl_flags;
 
 	status = nfs4_set_lock_state(state, request);
 	/* Unlock _before_ we do the RPC call */
@@ -3286,6 +3287,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 	status = nfs4_wait_for_completion_rpc_task(task);
 	rpc_put_task(task);
 out:
+	request->fl_flags = fl_flags;
 	return status;
 }
 
-- 
cgit v1.2.3


From 5e7f37a76fa5b604949020b7317962262812b2dd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 1 Apr 2008 18:58:49 -0400
Subject: NLM/lockd: Add a reference counter to struct nlm_rqst

When we replace the existing synchronous RPC calls with asynchronous calls,
the reference count will be needed in order to allow us to examine the
result of the RPC call.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 4e1c0123b45d..749eb5328cb0 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -221,6 +221,7 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 	for(;;) {
 		call = kzalloc(sizeof(*call), GFP_KERNEL);
 		if (call != NULL) {
+			atomic_set(&call->a_count, 1);
 			locks_init_lock(&call->a_args.lock.fl);
 			locks_init_lock(&call->a_res.lock.fl);
 			call->a_host = host;
@@ -237,6 +238,8 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
 
 void nlm_release_call(struct nlm_rqst *call)
 {
+	if (!atomic_dec_and_test(&call->a_count))
+		return;
 	nlm_release_host(call->a_host);
 	nlmclnt_release_lockargs(call);
 	kfree(call);
-- 
cgit v1.2.3


From dc9d8d048168ff61c458bec06b28996cb90b182a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 28 Mar 2008 16:04:36 -0400
Subject: NLM/lockd: convert __nlm_async_call to use rpc_run_task()

Peter Staubach comments:

> In the course of investigating testing failures in the locking phase of
> the Connectathon testsuite, I discovered a couple of things.  One was
> that one of the tests in the locking tests was racy when it didn't seem
> to need to be and two, that the NFS client asynchronously releases locks
> when a process is exiting.
...
> The Single UNIX Specification Version 3 specifies that:  "All locks
> associated with a file for a given process shall be removed when a file
> descriptor for that file is closed by that process or the process holding
> that file descriptor terminates.".
>
> This does not specify whether those locks must be released prior to the
> completion of the exit processing for the process or not.  However,
> general assumptions seem to be that those locks will be released.  This
> leads to more deterministic behavior under normal circumstances.

The following patch converts the NFSv2/v3 locking code to use the same
mechanism as NFSv4 for sending asynchronous RPC calls and then waiting for
them to complete. This ensures that the UNLOCK and CANCEL RPC calls will
complete even if the user interrupts the call, yet satisfies the
above request for synchronous behaviour on process exit.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 64 ++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 54 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 749eb5328cb0..a34b709006a1 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -346,10 +346,16 @@ in_grace_period:
 /*
  * Generic NLM call, async version.
  */
-static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
+static struct rpc_task *__nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
+	struct rpc_task_setup task_setup_data = {
+		.rpc_message = msg,
+		.callback_ops = tk_ops,
+		.callback_data = req,
+		.flags = RPC_TASK_ASYNC,
+	};
 
 	dprintk("lockd: call procedure %d on %s (async)\n",
 			(int)proc, host->h_name);
@@ -359,21 +365,36 @@ static int __nlm_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *
 	if (clnt == NULL)
 		goto out_err;
 	msg->rpc_proc = &clnt->cl_procinfo[proc];
+	task_setup_data.rpc_client = clnt;
 
         /* bootstrap and kick off the async RPC call */
-        return rpc_call_async(clnt, msg, RPC_TASK_ASYNC, tk_ops, req);
+	return rpc_run_task(&task_setup_data);
 out_err:
 	tk_ops->rpc_release(req);
-	return -ENOLCK;
+	return ERR_PTR(-ENOLCK);
+}
+
+static int nlm_do_async_call(struct nlm_rqst *req, u32 proc, struct rpc_message *msg, const struct rpc_call_ops *tk_ops)
+{
+	struct rpc_task *task;
+
+	task = __nlm_async_call(req, proc, msg, tk_ops);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	rpc_put_task(task);
+	return 0;
 }
 
+/*
+ * NLM asynchronous call.
+ */
 int nlm_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
 {
 	struct rpc_message msg = {
 		.rpc_argp	= &req->a_args,
 		.rpc_resp	= &req->a_res,
 	};
-	return __nlm_async_call(req, proc, &msg, tk_ops);
+	return nlm_do_async_call(req, proc, &msg, tk_ops);
 }
 
 int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
@@ -381,7 +402,32 @@ int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *t
 	struct rpc_message msg = {
 		.rpc_argp	= &req->a_res,
 	};
-	return __nlm_async_call(req, proc, &msg, tk_ops);
+	return nlm_do_async_call(req, proc, &msg, tk_ops);
+}
+
+/*
+ * NLM client asynchronous call.
+ *
+ * Note that although the calls are asynchronous, and are therefore
+ *      guaranteed to complete, we still always attempt to wait for
+ *      completion in order to be able to correctly track the lock
+ *      state.
+ */
+static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+{
+	struct rpc_message msg = {
+		.rpc_argp	= &req->a_args,
+		.rpc_resp	= &req->a_res,
+	};
+	struct rpc_task *task;
+	int err;
+
+	task = __nlm_async_call(req, proc, &msg, tk_ops);
+	if (IS_ERR(task))
+		return PTR_ERR(task);
+	err = rpc_wait_for_completion_task(task);
+	rpc_put_task(task);
+	return err;
 }
 
 /*
@@ -620,10 +666,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 		goto out;
 	}
 
-	if (req->a_flags & RPC_TASK_ASYNC)
-		return nlm_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
-
-	status = nlmclnt_call(req, NLMPROC_UNLOCK);
+	atomic_inc(&req->a_count);
+	status = nlmclnt_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
 	if (status < 0)
 		goto out;
 
@@ -697,7 +741,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
 	nlmclnt_setlockargs(req, fl);
 	req->a_args.block = block;
 
-	status = nlm_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
+	status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 
 	spin_lock_irqsave(&current->sighand->siglock, flags);
 	current->blocked = oldset;
-- 
cgit v1.2.3


From 8ec7ff74448f65ac963e330795d771ab14ec8408 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 28 Mar 2008 16:04:51 -0400
Subject: NLM: Remove the signal masking in nlmclnt_proc/nlmclnt_cancel

The signal masks have been rendered obsolete by the preceding patch.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 42 +-----------------------------------------
 1 file changed, 1 insertion(+), 41 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index a34b709006a1..5f13e0363b28 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -155,8 +155,6 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
 int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
 {
 	struct nlm_rqst		*call;
-	sigset_t		oldset;
-	unsigned long		flags;
 	int			status;
 
 	nlm_get_host(host);
@@ -168,22 +166,6 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
 	/* Set up the argument struct */
 	nlmclnt_setlockargs(call, fl);
 
-	/* Keep the old signal mask */
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	oldset = current->blocked;
-
-	/* If we're cleaning up locks because the process is exiting,
-	 * perform the RPC call asynchronously. */
-	if ((IS_SETLK(cmd) || IS_SETLKW(cmd))
-	    && fl->fl_type == F_UNLCK
-	    && (current->flags & PF_EXITING)) {
-		sigfillset(&current->blocked);	/* Mask all signals */
-		recalc_sigpending();
-
-		call->a_flags = RPC_TASK_ASYNC;
-	}
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
-
 	if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
 		if (fl->fl_type != F_UNLCK) {
 			call->a_args.block = IS_SETLKW(cmd) ? 1 : 0;
@@ -198,11 +180,6 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
 	fl->fl_ops->fl_release_private(fl);
 	fl->fl_ops = NULL;
 
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	current->blocked = oldset;
-	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
-
 	dprintk("lockd: clnt proc returns %d\n", status);
 	return status;
 }
@@ -722,16 +699,6 @@ static const struct rpc_call_ops nlmclnt_unlock_ops = {
 static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl)
 {
 	struct nlm_rqst	*req;
-	unsigned long	flags;
-	sigset_t	oldset;
-	int		status;
-
-	/* Block all signals while setting up call */
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	oldset = current->blocked;
-	sigfillset(&current->blocked);
-	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
 
 	req = nlm_alloc_call(nlm_get_host(host));
 	if (!req)
@@ -741,14 +708,7 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
 	nlmclnt_setlockargs(req, fl);
 	req->a_args.block = block;
 
-	status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
-
-	spin_lock_irqsave(&current->sighand->siglock, flags);
-	current->blocked = oldset;
-	recalc_sigpending();
-	spin_unlock_irqrestore(&current->sighand->siglock, flags);
-
-	return status;
+	return nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 }
 
 static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
-- 
cgit v1.2.3


From 6b4b3a752b3464f2fd9fe2837fb19270c23c1d6b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 2 Apr 2008 14:40:53 -0400
Subject: NLM/lockd: Ensure that nlmclnt_cancel() returns results of the CANCEL
 call

Currently, it returns success as long as the RPC call was sent. We'd like
to know if the CANCEL operation succeeded on the server.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 5f13e0363b28..ea1a6940af22 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -699,6 +699,10 @@ static const struct rpc_call_ops nlmclnt_unlock_ops = {
 static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl)
 {
 	struct nlm_rqst	*req;
+	int status;
+
+	dprintk("lockd: blocking lock attempt was interrupted by a signal.\n"
+		"       Attempting to cancel lock.\n");
 
 	req = nlm_alloc_call(nlm_get_host(host));
 	if (!req)
@@ -708,7 +712,12 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
 	nlmclnt_setlockargs(req, fl);
 	req->a_args.block = block;
 
-	return nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
+	atomic_inc(&req->a_count);
+	status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
+	if (status == 0 && req->a_res.status == nlm_lck_denied)
+		status = -ENOLCK;
+	nlm_release_call(req);
+	return status;
 }
 
 static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
-- 
cgit v1.2.3


From 5f50c0c6d644d6c8180d9079c13c5d9de3adeb34 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 1 Apr 2008 20:26:22 -0400
Subject: NLM/lockd: Fix a race when cancelling a blocking lock

We shouldn't remove the lock from the list of blocked locks until the
CANCEL call has completed since we may be racing with a GRANTED callback.

Also ensure that we send an UNLOCK if the CANCEL request failed. Normally
that should only happen if the process gets hit with a fatal signal.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 43 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index ea1a6940af22..37d1aa20a607 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -510,6 +510,7 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 	struct nlm_res	*resp = &req->a_res;
 	struct nlm_wait *block = NULL;
 	unsigned char fl_flags = fl->fl_flags;
+	unsigned char fl_type;
 	int status = -ENOLCK;
 
 	if (nsm_monitor(host) < 0) {
@@ -525,13 +526,16 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 
 	block = nlmclnt_prepare_block(host, fl);
 again:
+	/*
+	 * Initialise resp->status to a valid non-zero value,
+	 * since 0 == nlm_lck_granted
+	 */
+	resp->status = nlm_lck_blocked;
 	for(;;) {
 		/* Reboot protection */
 		fl->fl_u.nfs_fl.state = host->h_state;
 		status = nlmclnt_call(req, NLMPROC_LOCK);
 		if (status < 0)
-			goto out_unblock;
-		if (!req->a_args.block)
 			break;
 		/* Did a reclaimer thread notify us of a server reboot? */
 		if (resp->status ==  nlm_lck_denied_grace_period)
@@ -540,15 +544,22 @@ again:
 			break;
 		/* Wait on an NLM blocking lock */
 		status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
-		/* if we were interrupted. Send a CANCEL request to the server
-		 * and exit
-		 */
 		if (status < 0)
-			goto out_unblock;
+			break;
 		if (resp->status != nlm_lck_blocked)
 			break;
 	}
 
+	/* if we were interrupted while blocking, then cancel the lock request
+	 * and exit
+	 */
+	if (resp->status == nlm_lck_blocked) {
+		if (!req->a_args.block)
+			goto out_unlock;
+		if (nlmclnt_cancel(host, req->a_args.block, fl) == 0)
+			goto out_unblock;
+	}
+
 	if (resp->status == nlm_granted) {
 		down_read(&host->h_rwsem);
 		/* Check whether or not the server has rebooted */
@@ -562,16 +573,30 @@ again:
 			printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", __FUNCTION__);
 		up_read(&host->h_rwsem);
 		fl->fl_flags = fl_flags;
+		status = 0;
 	}
+	if (status < 0)
+		goto out_unlock;
 	status = nlm_stat_to_errno(resp->status);
 out_unblock:
 	nlmclnt_finish_block(block);
-	/* Cancel the blocked request if it is still pending */
-	if (resp->status == nlm_lck_blocked)
-		nlmclnt_cancel(host, req->a_args.block, fl);
 out:
 	nlm_release_call(req);
 	return status;
+out_unlock:
+	/* Fatal error: ensure that we remove the lock altogether */
+	dprintk("lockd: lock attempt ended in fatal error.\n"
+		"       Attempting to unlock.\n");
+	nlmclnt_finish_block(block);
+	fl_type = fl->fl_type;
+	fl->fl_type = F_UNLCK;
+	down_read(&host->h_rwsem);
+	do_vfs_lock(fl);
+	up_read(&host->h_rwsem);
+	fl->fl_type = fl_type;
+	fl->fl_flags = fl_flags;
+	nlmclnt_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
+	return status;
 }
 
 /*
-- 
cgit v1.2.3


From c4d7c402b788b73dc24f1e54a57f89d3dc5eb7bc Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 1 Apr 2008 20:26:52 -0400
Subject: NFS: Remove the buggy lock-if-signalled case from do_setlk()

Both NLM and NFSv4 should be able to clean up adequately in the case where
the user interrupts the RPC call...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/file.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 10e8b807e7ff..742cb745cb47 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -566,17 +566,9 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 
 	lock_kernel();
 	/* Use local locking if mounted with "-onolock" */
-	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) {
+	if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM))
 		status = NFS_PROTO(inode)->lock(filp, cmd, fl);
-		/* If we were signalled we still need to ensure that
-		 * we clean up any state on the server. We therefore
-		 * record the lock call as having succeeded in order to
-		 * ensure that locks_remove_posix() cleans it out when
-		 * the process exits.
-		 */
-		if (status == -EINTR || status == -ERESTARTSYS)
-			do_vfs_lock(filp, fl);
-	} else
+	else
 		status = do_vfs_lock(filp, fl);
 	unlock_kernel();
 	if (status < 0)
-- 
cgit v1.2.3


From d11d10cc05c94a32632d6928d15a1034200dd9a5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Wed, 2 Apr 2008 14:44:05 -0400
Subject: NLM/lockd: Ensure client locking calls use correct credentials

Now that we've added the 'generic' credentials (that are independent of the
rpc_client) to the nfs_open_context, we can use those in the NLM client to
ensure that the lock/unlock requests are authenticated to whoever
originally opened the file.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 37d1aa20a607..40b16f23e49a 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -247,7 +247,7 @@ static int nlm_wait_on_grace(wait_queue_head_t *queue)
  * Generic NLM call
  */
 static int
-nlmclnt_call(struct nlm_rqst *req, u32 proc)
+nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
@@ -256,6 +256,7 @@ nlmclnt_call(struct nlm_rqst *req, u32 proc)
 	struct rpc_message msg = {
 		.rpc_argp	= argp,
 		.rpc_resp	= resp,
+		.rpc_cred	= cred,
 	};
 	int		status;
 
@@ -390,11 +391,12 @@ int nlm_async_reply(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *t
  *      completion in order to be able to correctly track the lock
  *      state.
  */
-static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
+static int nlmclnt_async_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
 {
 	struct rpc_message msg = {
 		.rpc_argp	= &req->a_args,
 		.rpc_resp	= &req->a_res,
+		.rpc_cred	= cred,
 	};
 	struct rpc_task *task;
 	int err;
@@ -415,7 +417,7 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
 {
 	int	status;
 
-	status = nlmclnt_call(req, NLMPROC_TEST);
+	status = nlmclnt_call(nfs_file_cred(fl->fl_file), req, NLMPROC_TEST);
 	if (status < 0)
 		goto out;
 
@@ -506,6 +508,7 @@ static int do_vfs_lock(struct file_lock *fl)
 static int
 nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
 {
+	struct rpc_cred *cred = nfs_file_cred(fl->fl_file);
 	struct nlm_host	*host = req->a_host;
 	struct nlm_res	*resp = &req->a_res;
 	struct nlm_wait *block = NULL;
@@ -534,7 +537,7 @@ again:
 	for(;;) {
 		/* Reboot protection */
 		fl->fl_u.nfs_fl.state = host->h_state;
-		status = nlmclnt_call(req, NLMPROC_LOCK);
+		status = nlmclnt_call(cred, req, NLMPROC_LOCK);
 		if (status < 0)
 			break;
 		/* Did a reclaimer thread notify us of a server reboot? */
@@ -595,7 +598,7 @@ out_unlock:
 	up_read(&host->h_rwsem);
 	fl->fl_type = fl_type;
 	fl->fl_flags = fl_flags;
-	nlmclnt_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
+	nlmclnt_async_call(cred, req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
 	return status;
 }
 
@@ -619,8 +622,8 @@ nlmclnt_reclaim(struct nlm_host *host, struct file_lock *fl)
 	nlmclnt_setlockargs(req, fl);
 	req->a_args.reclaim = 1;
 
-	if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0
-	 && req->a_res.status == nlm_granted)
+	status = nlmclnt_call(nfs_file_cred(fl->fl_file), req, NLMPROC_LOCK);
+	if (status >= 0 && req->a_res.status == nlm_granted)
 		return 0;
 
 	printk(KERN_WARNING "lockd: failed to reclaim lock for pid %d "
@@ -669,7 +672,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	}
 
 	atomic_inc(&req->a_count);
-	status = nlmclnt_async_call(req, NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
+	status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
+			NLMPROC_UNLOCK, &nlmclnt_unlock_ops);
 	if (status < 0)
 		goto out;
 
@@ -738,7 +742,8 @@ static int nlmclnt_cancel(struct nlm_host *host, int block, struct file_lock *fl
 	req->a_args.block = block;
 
 	atomic_inc(&req->a_count);
-	status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
+	status = nlmclnt_async_call(nfs_file_cred(fl->fl_file), req,
+			NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 	if (status == 0 && req->a_res.status == nlm_lck_denied)
 		status = -ENOLCK;
 	nlm_release_call(req);
-- 
cgit v1.2.3


From 856dff3d3875bdc8b88e4a65779873af76776a69 Mon Sep 17 00:00:00 2001
From: Benny Halevy <bhalevy@panasas.com>
Date: Mon, 31 Mar 2008 17:39:06 +0300
Subject: nfs: return negative error value from nfs{,4}_stat_to_errno

All use sites for nfs{,4}_stat_to_errno negate their return value.
It's more efficient to return a negative error from the stat_to_errno convertors
rather than negating its return value everywhere. This also produces slightly
smaller code.

Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs2xdr.c | 76 ++++++++++++++++++++++++++---------------------------
 fs/nfs/nfs3xdr.c | 34 ++++++++++++------------
 fs/nfs/nfs4xdr.c | 80 ++++++++++++++++++++++++++++----------------------------
 3 files changed, 95 insertions(+), 95 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 86a80b33ec82..28bab67d1519 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -267,7 +267,7 @@ nfs_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 	int status;
 
 	if ((status = ntohl(*p++)))
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 	p = xdr_decode_fattr(p, res->fattr);
 
 	count = ntohl(*p++);
@@ -432,7 +432,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
 	__be32 *end, *entry, *kaddr;
 
 	if ((status = ntohl(*p++)))
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 
 	hdrlen = (u8 *) p - (u8 *) iov->iov_base;
 	if (iov->iov_len < hdrlen) {
@@ -537,7 +537,7 @@ nfs_xdr_stat(struct rpc_rqst *req, __be32 *p, void *dummy)
 	int	status;
 
 	if ((status = ntohl(*p++)) != 0)
-		status = -nfs_stat_to_errno(status);
+		status = nfs_stat_to_errno(status);
 	return status;
 }
 
@@ -551,7 +551,7 @@ nfs_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 	int	status;
 
 	if ((status = ntohl(*p++)))
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 	xdr_decode_fattr(p, fattr);
 	return 0;
 }
@@ -566,7 +566,7 @@ nfs_xdr_diropres(struct rpc_rqst *req, __be32 *p, struct nfs_diropok *res)
 	int	status;
 
 	if ((status = ntohl(*p++)))
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 	p = xdr_decode_fhandle(p, res->fh);
 	xdr_decode_fattr(p, res->fattr);
 	return 0;
@@ -604,7 +604,7 @@ nfs_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, void *dummy)
 	int	status;
 
 	if ((status = ntohl(*p++)))
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 	/* Convert length of symlink */
 	len = ntohl(*p++);
 	if (len >= rcvbuf->page_len) {
@@ -653,7 +653,7 @@ nfs_xdr_statfsres(struct rpc_rqst *req, __be32 *p, struct nfs2_fsstat *res)
 	int	status;
 
 	if ((status = ntohl(*p++)))
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 
 	res->tsize  = ntohl(*p++);
 	res->bsize  = ntohl(*p++);
@@ -672,39 +672,39 @@ static struct {
 	int errno;
 } nfs_errtbl[] = {
 	{ NFS_OK,		0		},
-	{ NFSERR_PERM,		EPERM		},
-	{ NFSERR_NOENT,		ENOENT		},
-	{ NFSERR_IO,		errno_NFSERR_IO	},
-	{ NFSERR_NXIO,		ENXIO		},
-/*	{ NFSERR_EAGAIN,	EAGAIN		}, */
-	{ NFSERR_ACCES,		EACCES		},
-	{ NFSERR_EXIST,		EEXIST		},
-	{ NFSERR_XDEV,		EXDEV		},
-	{ NFSERR_NODEV,		ENODEV		},
-	{ NFSERR_NOTDIR,	ENOTDIR		},
-	{ NFSERR_ISDIR,		EISDIR		},
-	{ NFSERR_INVAL,		EINVAL		},
-	{ NFSERR_FBIG,		EFBIG		},
-	{ NFSERR_NOSPC,		ENOSPC		},
-	{ NFSERR_ROFS,		EROFS		},
-	{ NFSERR_MLINK,		EMLINK		},
-	{ NFSERR_NAMETOOLONG,	ENAMETOOLONG	},
-	{ NFSERR_NOTEMPTY,	ENOTEMPTY	},
-	{ NFSERR_DQUOT,		EDQUOT		},
-	{ NFSERR_STALE,		ESTALE		},
-	{ NFSERR_REMOTE,	EREMOTE		},
+	{ NFSERR_PERM,		-EPERM		},
+	{ NFSERR_NOENT,		-ENOENT		},
+	{ NFSERR_IO,		-errno_NFSERR_IO},
+	{ NFSERR_NXIO,		-ENXIO		},
+/*	{ NFSERR_EAGAIN,	-EAGAIN		}, */
+	{ NFSERR_ACCES,		-EACCES		},
+	{ NFSERR_EXIST,		-EEXIST		},
+	{ NFSERR_XDEV,		-EXDEV		},
+	{ NFSERR_NODEV,		-ENODEV		},
+	{ NFSERR_NOTDIR,	-ENOTDIR	},
+	{ NFSERR_ISDIR,		-EISDIR		},
+	{ NFSERR_INVAL,		-EINVAL		},
+	{ NFSERR_FBIG,		-EFBIG		},
+	{ NFSERR_NOSPC,		-ENOSPC		},
+	{ NFSERR_ROFS,		-EROFS		},
+	{ NFSERR_MLINK,		-EMLINK		},
+	{ NFSERR_NAMETOOLONG,	-ENAMETOOLONG	},
+	{ NFSERR_NOTEMPTY,	-ENOTEMPTY	},
+	{ NFSERR_DQUOT,		-EDQUOT		},
+	{ NFSERR_STALE,		-ESTALE		},
+	{ NFSERR_REMOTE,	-EREMOTE	},
 #ifdef EWFLUSH
-	{ NFSERR_WFLUSH,	EWFLUSH		},
+	{ NFSERR_WFLUSH,	-EWFLUSH	},
 #endif
-	{ NFSERR_BADHANDLE,	EBADHANDLE	},
-	{ NFSERR_NOT_SYNC,	ENOTSYNC	},
-	{ NFSERR_BAD_COOKIE,	EBADCOOKIE	},
-	{ NFSERR_NOTSUPP,	ENOTSUPP	},
-	{ NFSERR_TOOSMALL,	ETOOSMALL	},
-	{ NFSERR_SERVERFAULT,	ESERVERFAULT	},
-	{ NFSERR_BADTYPE,	EBADTYPE	},
-	{ NFSERR_JUKEBOX,	EJUKEBOX	},
-	{ -1,			EIO		}
+	{ NFSERR_BADHANDLE,	-EBADHANDLE	},
+	{ NFSERR_NOT_SYNC,	-ENOTSYNC	},
+	{ NFSERR_BAD_COOKIE,	-EBADCOOKIE	},
+	{ NFSERR_NOTSUPP,	-ENOTSUPP	},
+	{ NFSERR_TOOSMALL,	-ETOOSMALL	},
+	{ NFSERR_SERVERFAULT,	-ESERVERFAULT	},
+	{ NFSERR_BADTYPE,	-EBADTYPE	},
+	{ NFSERR_JUKEBOX,	-EJUKEBOX	},
+	{ -1,			-EIO		}
 };
 
 /*
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index fb03048ac650..11cdddec1432 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -515,7 +515,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 	/* Decode post_op_attrs */
 	p = xdr_decode_post_op_attr(p, res->dir_attr);
 	if (status)
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 	/* Decode verifier cookie */
 	if (res->verf) {
 		res->verf[0] = *p++;
@@ -751,7 +751,7 @@ nfs3_xdr_attrstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 	int	status;
 
 	if ((status = ntohl(*p++)))
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 	xdr_decode_fattr(p, fattr);
 	return 0;
 }
@@ -766,7 +766,7 @@ nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 	int	status;
 
 	if ((status = ntohl(*p++)))
-		status = -nfs_stat_to_errno(status);
+		status = nfs_stat_to_errno(status);
 	xdr_decode_wcc_data(p, fattr);
 	return status;
 }
@@ -786,7 +786,7 @@ nfs3_xdr_lookupres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
 	int	status;
 
 	if ((status = ntohl(*p++))) {
-		status = -nfs_stat_to_errno(status);
+		status = nfs_stat_to_errno(status);
 	} else {
 		if (!(p = xdr_decode_fhandle(p, res->fh)))
 			return -errno_NFSERR_IO;
@@ -806,7 +806,7 @@ nfs3_xdr_accessres(struct rpc_rqst *req, __be32 *p, struct nfs3_accessres *res)
 
 	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status)
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 	res->access = ntohl(*p++);
 	return 0;
 }
@@ -843,7 +843,7 @@ nfs3_xdr_readlinkres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 	p = xdr_decode_post_op_attr(p, fattr);
 
 	if (status != 0)
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 
 	/* Convert length of symlink */
 	len = ntohl(*p++);
@@ -891,7 +891,7 @@ nfs3_xdr_readres(struct rpc_rqst *req, __be32 *p, struct nfs_readres *res)
 	p = xdr_decode_post_op_attr(p, res->fattr);
 
 	if (status != 0)
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 
 	/* Decode reply count and EOF flag. NFSv3 is somewhat redundant
 	 * in that it puts the count both in the res struct and in the
@@ -941,7 +941,7 @@ nfs3_xdr_writeres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
 	p = xdr_decode_wcc_data(p, res->fattr);
 
 	if (status != 0)
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 
 	res->count = ntohl(*p++);
 	res->verf->committed = (enum nfs3_stable_how)ntohl(*p++);
@@ -972,7 +972,7 @@ nfs3_xdr_createres(struct rpc_rqst *req, __be32 *p, struct nfs3_diropres *res)
 			res->fattr->valid = 0;
 		}
 	} else {
-		status = -nfs_stat_to_errno(status);
+		status = nfs_stat_to_errno(status);
 	}
 	p = xdr_decode_wcc_data(p, res->dir_attr);
 	return status;
@@ -987,7 +987,7 @@ nfs3_xdr_renameres(struct rpc_rqst *req, __be32 *p, struct nfs3_renameres *res)
 	int	status;
 
 	if ((status = ntohl(*p++)) != 0)
-		status = -nfs_stat_to_errno(status);
+		status = nfs_stat_to_errno(status);
 	p = xdr_decode_wcc_data(p, res->fromattr);
 	p = xdr_decode_wcc_data(p, res->toattr);
 	return status;
@@ -1002,7 +1002,7 @@ nfs3_xdr_linkres(struct rpc_rqst *req, __be32 *p, struct nfs3_linkres *res)
 	int	status;
 
 	if ((status = ntohl(*p++)) != 0)
-		status = -nfs_stat_to_errno(status);
+		status = nfs_stat_to_errno(status);
 	p = xdr_decode_post_op_attr(p, res->fattr);
 	p = xdr_decode_wcc_data(p, res->dir_attr);
 	return status;
@@ -1020,7 +1020,7 @@ nfs3_xdr_fsstatres(struct rpc_rqst *req, __be32 *p, struct nfs_fsstat *res)
 
 	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 
 	p = xdr_decode_hyper(p, &res->tbytes);
 	p = xdr_decode_hyper(p, &res->fbytes);
@@ -1045,7 +1045,7 @@ nfs3_xdr_fsinfores(struct rpc_rqst *req, __be32 *p, struct nfs_fsinfo *res)
 
 	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 
 	res->rtmax  = ntohl(*p++);
 	res->rtpref = ntohl(*p++);
@@ -1073,7 +1073,7 @@ nfs3_xdr_pathconfres(struct rpc_rqst *req, __be32 *p, struct nfs_pathconf *res)
 
 	p = xdr_decode_post_op_attr(p, res->fattr);
 	if (status != 0)
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 	res->max_link = ntohl(*p++);
 	res->max_namelen = ntohl(*p++);
 
@@ -1092,7 +1092,7 @@ nfs3_xdr_commitres(struct rpc_rqst *req, __be32 *p, struct nfs_writeres *res)
 	status = ntohl(*p++);
 	p = xdr_decode_wcc_data(p, res->fattr);
 	if (status != 0)
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 
 	res->verf->verifier[0] = *p++;
 	res->verf->verifier[1] = *p++;
@@ -1114,7 +1114,7 @@ nfs3_xdr_getaclres(struct rpc_rqst *req, __be32 *p,
 	int err, base;
 
 	if (status != 0)
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 	p = xdr_decode_post_op_attr(p, res->fattr);
 	res->mask = ntohl(*p++);
 	if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT))
@@ -1141,7 +1141,7 @@ nfs3_xdr_setaclres(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 	int status = ntohl(*p++);
 
 	if (status)
-		return -nfs_stat_to_errno(status);
+		return nfs_stat_to_errno(status);
 	xdr_decode_post_op_attr(p, fattr);
 	return 0;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index b8b2e391d183..809ef0d787e3 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -2241,7 +2241,7 @@ static int decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected)
 	}
 	READ32(nfserr);
 	if (nfserr != NFS_OK)
-		return -nfs4_stat_to_errno(nfserr);
+		return nfs4_stat_to_errno(nfserr);
 	return 0;
 }
 
@@ -3760,7 +3760,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
 		READ_BUF(len);
 		return -NFSERR_CLID_INUSE;
 	} else
-		return -nfs4_stat_to_errno(nfserr);
+		return nfs4_stat_to_errno(nfserr);
 
 	return 0;
 }
@@ -4422,7 +4422,7 @@ static int nfs4_xdr_dec_fsinfo(struct rpc_rqst *req, __be32 *p, struct nfs_fsinf
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs4_stat_to_errno(hdr.status);
+		status = nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4512,7 +4512,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
 	if (!status)
 		status = decode_setclientid(&xdr, clp);
 	if (!status)
-		status = -nfs4_stat_to_errno(hdr.status);
+		status = nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4534,7 +4534,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
 	if (!status)
 		status = decode_fsinfo(&xdr, fsinfo);
 	if (!status)
-		status = -nfs4_stat_to_errno(hdr.status);
+		status = nfs4_stat_to_errno(hdr.status);
 	return status;
 }
 
@@ -4644,42 +4644,42 @@ static struct {
 	int errno;
 } nfs_errtbl[] = {
 	{ NFS4_OK,		0		},
-	{ NFS4ERR_PERM,		EPERM		},
-	{ NFS4ERR_NOENT,	ENOENT		},
-	{ NFS4ERR_IO,		errno_NFSERR_IO	},
-	{ NFS4ERR_NXIO,		ENXIO		},
-	{ NFS4ERR_ACCESS,	EACCES		},
-	{ NFS4ERR_EXIST,	EEXIST		},
-	{ NFS4ERR_XDEV,		EXDEV		},
-	{ NFS4ERR_NOTDIR,	ENOTDIR		},
-	{ NFS4ERR_ISDIR,	EISDIR		},
-	{ NFS4ERR_INVAL,	EINVAL		},
-	{ NFS4ERR_FBIG,		EFBIG		},
-	{ NFS4ERR_NOSPC,	ENOSPC		},
-	{ NFS4ERR_ROFS,		EROFS		},
-	{ NFS4ERR_MLINK,	EMLINK		},
-	{ NFS4ERR_NAMETOOLONG,	ENAMETOOLONG	},
-	{ NFS4ERR_NOTEMPTY,	ENOTEMPTY	},
-	{ NFS4ERR_DQUOT,	EDQUOT		},
-	{ NFS4ERR_STALE,	ESTALE		},
-	{ NFS4ERR_BADHANDLE,	EBADHANDLE	},
-	{ NFS4ERR_BADOWNER,	EINVAL		},
-	{ NFS4ERR_BADNAME,	EINVAL		},
-	{ NFS4ERR_BAD_COOKIE,	EBADCOOKIE	},
-	{ NFS4ERR_NOTSUPP,	ENOTSUPP	},
-	{ NFS4ERR_TOOSMALL,	ETOOSMALL	},
-	{ NFS4ERR_SERVERFAULT,	ESERVERFAULT	},
-	{ NFS4ERR_BADTYPE,	EBADTYPE	},
-	{ NFS4ERR_LOCKED,	EAGAIN		},
-	{ NFS4ERR_RESOURCE,	EREMOTEIO	},
-	{ NFS4ERR_SYMLINK,	ELOOP		},
-	{ NFS4ERR_OP_ILLEGAL,	EOPNOTSUPP	},
-	{ NFS4ERR_DEADLOCK,	EDEADLK		},
-	{ NFS4ERR_WRONGSEC,	EPERM		}, /* FIXME: this needs
+	{ NFS4ERR_PERM,		-EPERM		},
+	{ NFS4ERR_NOENT,	-ENOENT		},
+	{ NFS4ERR_IO,		-errno_NFSERR_IO},
+	{ NFS4ERR_NXIO,		-ENXIO		},
+	{ NFS4ERR_ACCESS,	-EACCES		},
+	{ NFS4ERR_EXIST,	-EEXIST		},
+	{ NFS4ERR_XDEV,		-EXDEV		},
+	{ NFS4ERR_NOTDIR,	-ENOTDIR	},
+	{ NFS4ERR_ISDIR,	-EISDIR		},
+	{ NFS4ERR_INVAL,	-EINVAL		},
+	{ NFS4ERR_FBIG,		-EFBIG		},
+	{ NFS4ERR_NOSPC,	-ENOSPC		},
+	{ NFS4ERR_ROFS,		-EROFS		},
+	{ NFS4ERR_MLINK,	-EMLINK		},
+	{ NFS4ERR_NAMETOOLONG,	-ENAMETOOLONG	},
+	{ NFS4ERR_NOTEMPTY,	-ENOTEMPTY	},
+	{ NFS4ERR_DQUOT,	-EDQUOT		},
+	{ NFS4ERR_STALE,	-ESTALE		},
+	{ NFS4ERR_BADHANDLE,	-EBADHANDLE	},
+	{ NFS4ERR_BADOWNER,	-EINVAL		},
+	{ NFS4ERR_BADNAME,	-EINVAL		},
+	{ NFS4ERR_BAD_COOKIE,	-EBADCOOKIE	},
+	{ NFS4ERR_NOTSUPP,	-ENOTSUPP	},
+	{ NFS4ERR_TOOSMALL,	-ETOOSMALL	},
+	{ NFS4ERR_SERVERFAULT,	-ESERVERFAULT	},
+	{ NFS4ERR_BADTYPE,	-EBADTYPE	},
+	{ NFS4ERR_LOCKED,	-EAGAIN		},
+	{ NFS4ERR_RESOURCE,	-EREMOTEIO	},
+	{ NFS4ERR_SYMLINK,	-ELOOP		},
+	{ NFS4ERR_OP_ILLEGAL,	-EOPNOTSUPP	},
+	{ NFS4ERR_DEADLOCK,	-EDEADLK	},
+	{ NFS4ERR_WRONGSEC,	-EPERM		}, /* FIXME: this needs
 						    * to be handled by a
 						    * middle-layer.
 						    */
-	{ -1,			EIO		}
+	{ -1,			-EIO		}
 };
 
 /*
@@ -4696,14 +4696,14 @@ nfs4_stat_to_errno(int stat)
 	}
 	if (stat <= 10000 || stat > 10100) {
 		/* The server is looney tunes. */
-		return ESERVERFAULT;
+		return -ESERVERFAULT;
 	}
 	/* If we cannot translate the error, the recovery routines should
 	 * handle it.
 	 * Note: remaining NFSv4 error codes have values > 10000, so should
 	 * not conflict with native Linux error codes.
 	 */
-	return stat;
+	return -stat;
 }
 
 #define PROC(proc, argtype, restype)				\
-- 
cgit v1.2.3


From 441092415770ddec648800701895913c4bfd60c1 Mon Sep 17 00:00:00 2001
From: Fred Isaman <iisaman@citi.umich.edu>
Date: Wed, 2 Apr 2008 15:21:15 +0300
Subject: nfs: fix printout of multiword bitfields

Benny points out that zero-padding of multiword bitfields is necessary,
and that delimiting each word is nice to avoid endianess confusion.

bhalevy: without zero padding output can be ambiguous. Also,
since the printed array of two 32-bit unsigned integers is not a
64-bit number, delimiting the output with a semicolon makes more sense.

Signed-off-by: Fred Isaman <iisaman@citi.umich.edu>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4xdr.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 809ef0d787e3..5a2d64927b35 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1191,8 +1191,8 @@ static int encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg
 		attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
 	WRITE32(attrs[0] & readdir->bitmask[0]);
 	WRITE32(attrs[1] & readdir->bitmask[1]);
-	dprintk("%s: cookie = %Lu, verifier = 0x%x%x, bitmap = 0x%x%x\n",
-			__FUNCTION__,
+	dprintk("%s: cookie = %Lu, verifier = %08x:%08x, bitmap = %08x:%08x\n",
+			__func__,
 			(unsigned long long)readdir->cookie,
 			((u32 *)readdir->verifier.data)[0],
 			((u32 *)readdir->verifier.data)[1],
@@ -2291,7 +2291,7 @@ static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint3
 		bitmap[0] &= ~FATTR4_WORD0_SUPPORTED_ATTRS;
 	} else
 		bitmask[0] = bitmask[1] = 0;
-	dprintk("%s: bitmask=0x%x%x\n", __FUNCTION__, bitmask[0], bitmask[1]);
+	dprintk("%s: bitmask=%08x:%08x\n", __func__, bitmask[0], bitmask[1]);
 	return 0;
 }
 
@@ -3505,8 +3505,8 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 		return status;
 	READ_BUF(8);
 	COPYMEM(readdir->verifier.data, 8);
-	dprintk("%s: verifier = 0x%x%x\n",
-			__FUNCTION__,
+	dprintk("%s: verifier = %08x:%08x\n",
+			__func__,
 			((u32 *)readdir->verifier.data)[0],
 			((u32 *)readdir->verifier.data)[1]);
 
-- 
cgit v1.2.3


From 78ea323be6380a9313e87fe241809e912e8ae401 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 7 Apr 2008 20:49:28 -0400
Subject: NFSv4: Don't use cred->cr_ops->cr_name in nfs4_proc_setclientid()

With the recent change to generic creds, we can no longer use
cred->cr_ops->cr_name to distinguish between RPCSEC_GSS principals and
AUTH_SYS/AUTH_NULL identities. Replace it with the rpc_authops->au_name
instead...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index a1069325b876..dbc09271af02 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2885,7 +2885,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
 							RPC_DISPLAY_ADDR),
 				rpc_peeraddr2str(clp->cl_rpcclient,
 							RPC_DISPLAY_PROTO),
-				cred->cr_ops->cr_name,
+				clp->cl_rpcclient->cl_auth->au_ops->au_name,
 				clp->cl_id_uniquifier);
 		setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
 				sizeof(setclientid.sc_netid),
-- 
cgit v1.2.3


From 7c67db3a8a98045744f06fcd6d8f476d9df0ba5c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Mon, 7 Apr 2008 20:50:11 -0400
Subject: NFSv4: Reintroduce machine creds

We need to try to ensure that we always use the same credentials whenever
we re-establish the clientid on the server. If not, the server won't
recognise that we're the same client, and so may not allow us to recover
state.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 93dfd75aba7c..f2f3b284e6dd 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -112,6 +112,7 @@ struct nfs_client_initdata {
 static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
 {
 	struct nfs_client *clp;
+	struct rpc_cred *cred;
 
 	if ((clp = kzalloc(sizeof(*clp), GFP_KERNEL)) == NULL)
 		goto error_0;
@@ -150,6 +151,9 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
 	clp->cl_boot_time = CURRENT_TIME;
 	clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
 #endif
+	cred = rpc_lookup_machine_cred();
+	if (!IS_ERR(cred))
+		clp->cl_machine_cred = cred;
 
 	return clp;
 
@@ -191,6 +195,9 @@ static void nfs_free_client(struct nfs_client *clp)
 	if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
 		nfs_callback_down();
 
+	if (clp->cl_machine_cred != NULL)
+		put_rpccred(clp->cl_machine_cred);
+
 	kfree(clp->cl_hostname);
 	kfree(clp);
 
-- 
cgit v1.2.3


From a2b2bb8822c78806930bbb4d4c5bb3ae69648fd0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 8 Apr 2008 16:02:17 -0400
Subject: NFSv4: Attempt to use machine credentials in SETCLIENTID calls

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4state.c | 41 +++++++++++++++++++++++++++++++++++++----
 1 file changed, 37 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 7775435ea7a5..46eb624e4f16 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -71,6 +71,29 @@ static int nfs4_init_client(struct nfs_client *clp, struct rpc_cred *cred)
 	return status;
 }
 
+static struct rpc_cred *nfs4_get_machine_cred(struct nfs_client *clp)
+{
+	struct rpc_cred *cred = NULL;
+
+	spin_lock(&clp->cl_lock);
+	if (clp->cl_machine_cred != NULL)
+		cred = get_rpccred(clp->cl_machine_cred);
+	spin_unlock(&clp->cl_lock);
+	return cred;
+}
+
+static void nfs4_clear_machine_cred(struct nfs_client *clp)
+{
+	struct rpc_cred *cred;
+
+	spin_lock(&clp->cl_lock);
+	cred = clp->cl_machine_cred;
+	clp->cl_machine_cred = NULL;
+	spin_unlock(&clp->cl_lock);
+	if (cred != NULL)
+		put_rpccred(cred);
+}
+
 struct rpc_cred *nfs4_get_renew_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
@@ -91,13 +114,18 @@ static struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp)
 {
 	struct nfs4_state_owner *sp;
 	struct rb_node *pos;
+	struct rpc_cred *cred;
 
+	cred = nfs4_get_machine_cred(clp);
+	if (cred != NULL)
+		goto out;
 	pos = rb_first(&clp->cl_state_owners);
 	if (pos != NULL) {
 		sp = rb_entry(pos, struct nfs4_state_owner, so_client_node);
-		return get_rpccred(sp->so_cred);
+		cred = get_rpccred(sp->so_cred);
 	}
-	return NULL;
+out:
+	return cred;
 }
 
 static void nfs_alloc_unique_id(struct rb_root *root, struct nfs_unique_id *new,
@@ -924,10 +952,10 @@ restart_loop:
 	if (cred != NULL) {
 		/* Yes there are: try to renew the old lease */
 		status = nfs4_proc_renew(clp, cred);
+		put_rpccred(cred);
 		switch (status) {
 			case 0:
 			case -NFS4ERR_CB_PATH_DOWN:
-				put_rpccred(cred);
 				goto out;
 			case -NFS4ERR_STALE_CLIENTID:
 			case -NFS4ERR_LEASE_MOVED:
@@ -936,14 +964,19 @@ restart_loop:
 	} else {
 		/* "reboot" to ensure we clear all state on the server */
 		clp->cl_boot_time = CURRENT_TIME;
-		cred = nfs4_get_setclientid_cred(clp);
 	}
 	/* We're going to have to re-establish a clientid */
 	nfs4_state_mark_reclaim(clp);
 	status = -ENOENT;
+	cred = nfs4_get_setclientid_cred(clp);
 	if (cred != NULL) {
 		status = nfs4_init_client(clp, cred);
 		put_rpccred(cred);
+		/* Handle case where the user hasn't set up machine creds */
+		if (status == -EACCES && cred == clp->cl_machine_cred) {
+			nfs4_clear_machine_cred(clp);
+			goto restart_loop;
+		}
 	}
 	if (status)
 		goto out_error;
-- 
cgit v1.2.3


From 63649bd7080a6a50fabcb1935f4b7c4e64155066 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@gmail.com>
Date: Thu, 17 Apr 2008 20:42:09 +0400
Subject: NFS - fix potential NULL pointer dereference v2

There is possible NULL pointer dereference if kstr[n]dup failed.
So fix them for safety.

Signed-off-by: Cyrill Gorcunov <gorcunov@gmail.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index c99ca1f992ce..2215bcd24bd2 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1297,6 +1297,8 @@ static int nfs_validate_mount_data(void *options,
 		args->namlen		= data->namlen;
 		args->bsize		= data->bsize;
 		args->auth_flavors[0]	= data->pseudoflavor;
+		if (!args->nfs_server.hostname)
+			goto out_nomem;
 
 		/*
 		 * The legacy version 6 binary mount data from userspace has a
@@ -1343,6 +1345,8 @@ static int nfs_validate_mount_data(void *options,
 		len = c - dev_name;
 		/* N.B. caller will free nfs_server.hostname in all cases */
 		args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
+		if (!args->nfs_server.hostname)
+			goto out_nomem;
 
 		c++;
 		if (strlen(c) > NFS_MAXPATHLEN)
@@ -1386,6 +1390,10 @@ out_v3_not_compiled:
 	return -EPROTONOSUPPORT;
 #endif /* !CONFIG_NFS_V3 */
 
+out_nomem:
+	dfprintk(MOUNT, "NFS: not enough memory to handle mount options\n");
+	return -ENOMEM;
+
 out_no_address:
 	dfprintk(MOUNT, "NFS: mount program didn't pass remote address\n");
 	return -EINVAL;
@@ -1892,12 +1900,16 @@ static int nfs4_validate_mount_data(void *options,
 			return -ENAMETOOLONG;
 		/* N.B. caller will free nfs_server.hostname in all cases */
 		args->nfs_server.hostname = kstrndup(dev_name, len, GFP_KERNEL);
+		if (!args->nfs_server.hostname)
+			goto out_nomem;
 
 		c++;			/* step over the ':' */
 		len = strlen(c);
 		if (len > NFS4_MAXPATHLEN)
 			return -ENAMETOOLONG;
 		args->nfs_server.export_path = kstrndup(c, len, GFP_KERNEL);
+		if (!args->nfs_server.export_path)
+			goto out_nomem;
 
 		dprintk("NFS: MNTPATH: '%s'\n", args->nfs_server.export_path);
 
@@ -1919,6 +1931,10 @@ out_inval_auth:
 		 data->auth_flavourlen);
 	return -EINVAL;
 
+out_nomem:
+	dfprintk(MOUNT, "NFS4: not enough memory to handle mount options\n");
+	return -ENOMEM;
+
 out_no_address:
 	dfprintk(MOUNT, "NFS4: mount program didn't pass remote address\n");
 	return -EINVAL;
-- 
cgit v1.2.3


From daa7da5fd3040e08e3d7878cd0667c1f4cfd338a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Fri, 11 Apr 2008 11:50:58 -0400
Subject: NFS: remove duplicate flags assignment from nfs_validate_mount_data

Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2215bcd24bd2..88a0ecd84c65 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1275,7 +1275,6 @@ static int nfs_validate_mount_data(void *options,
 		args->flags		= data->flags;
 		args->rsize		= data->rsize;
 		args->wsize		= data->wsize;
-		args->flags		= data->flags;
 		args->timeo		= data->timeo;
 		args->retrans		= data->retrans;
 		args->acregmin		= data->acregmin;
-- 
cgit v1.2.3


From a3dab293539031b0970585b9b355cebbc91ecbd4 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@kernel.org>
Date: Mon, 14 Apr 2008 21:41:32 +0300
Subject: make nfs_automount_list static

nfs_automount_list can now become static.

Signed-off-by: Adrian Bunk <bunk@kernel.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/namespace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 607f6eb9cdb5..af4d0f1e402c 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -20,7 +20,7 @@
 
 static void nfs_expire_automounts(struct work_struct *work);
 
-LIST_HEAD(nfs_automount_list);
+static LIST_HEAD(nfs_automount_list);
 static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
 int nfs_mountpoint_expiry_timeout = 500 * HZ;
 
-- 
cgit v1.2.3