summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/lockd/Makefile6
-rw-r--r--fs/lockd/clntlock.c58
-rw-r--r--fs/lockd/clntproc.c42
-rw-r--r--fs/lockd/host.c1
-rw-r--r--fs/lockd/svclock.c21
-rw-r--r--fs/lockd/trace.c3
-rw-r--r--fs/lockd/trace.h106
-rw-r--r--fs/nfs/export.c9
-rw-r--r--fs/nfs/internal.h15
-rw-r--r--fs/nfsd/export.c64
-rw-r--r--fs/nfsd/export.h1
-rw-r--r--fs/nfsd/filecache.c430
-rw-r--r--fs/nfsd/filecache.h9
-rw-r--r--fs/nfsd/nfs4idmap.c8
-rw-r--r--fs/nfsd/vfs.c13
-rw-r--r--include/linux/exportfs.h1
-rw-r--r--include/linux/lockd/lockd.h29
-rw-r--r--include/linux/nfs.h20
-rw-r--r--include/linux/sunrpc/cache.h15
-rw-r--r--include/linux/sunrpc/svc.h16
-rw-r--r--include/linux/sunrpc/svc_xprt.h5
-rw-r--r--include/linux/sunrpc/svcsock.h4
-rw-r--r--include/net/tls.h2
-rw-r--r--include/trace/events/sunrpc.h41
-rw-r--r--include/uapi/linux/nfsd/export.h13
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c12
-rw-r--r--net/sunrpc/svc.c49
-rw-r--r--net/sunrpc/svc_xprt.c33
-rw-r--r--net/sunrpc/svcauth_unix.c23
-rw-r--r--net/sunrpc/svcsock.c174
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma.c21
31 files changed, 802 insertions, 442 deletions
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index 6d5e83ed4476..ac9f9d84510e 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -3,10 +3,12 @@
# Makefile for the linux lock manager stuff
#
+ccflags-y += -I$(src) # needed for trace events
+
obj-$(CONFIG_LOCKD) += lockd.o
-lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
- svcshare.o svcproc.o svcsubs.o mon.o xdr.o
+lockd-objs-y += clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
+ svcshare.o svcproc.o svcsubs.o mon.o trace.o xdr.o
lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
lockd-objs-$(CONFIG_PROC_FS) += procfs.o
lockd-objs := $(lockd-objs-y)
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 82b19a30e0f0..e3972aa3045a 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -14,9 +14,12 @@
#include <linux/nfs_fs.h>
#include <linux/sunrpc/addr.h>
#include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/svc_xprt.h>
#include <linux/lockd/lockd.h>
#include <linux/kthread.h>
+#include "trace.h"
+
#define NLMDBG_FACILITY NLMDBG_CLIENT
/*
@@ -29,18 +32,6 @@ static int reclaimer(void *ptr);
* client perspective.
*/
-/*
- * This is the representation of a blocked client lock.
- */
-struct nlm_wait {
- struct list_head b_list; /* linked list */
- wait_queue_head_t b_wait; /* where to wait on */
- struct nlm_host * b_host;
- struct file_lock * b_lock; /* local file lock */
- unsigned short b_reclaim; /* got to reclaim lock */
- __be32 b_status; /* grant callback status */
-};
-
static LIST_HEAD(nlm_blocked);
static DEFINE_SPINLOCK(nlm_blocked_lock);
@@ -94,41 +85,42 @@ void nlmclnt_done(struct nlm_host *host)
}
EXPORT_SYMBOL_GPL(nlmclnt_done);
+void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host, struct file_lock *fl)
+{
+ block->b_host = host;
+ block->b_lock = fl;
+ init_waitqueue_head(&block->b_wait);
+ block->b_status = nlm_lck_blocked;
+}
+
/*
* Queue up a lock for blocking so that the GRANTED request can see it
*/
-struct nlm_wait *nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl)
+void nlmclnt_queue_block(struct nlm_wait *block)
{
- struct nlm_wait *block;
-
- block = kmalloc(sizeof(*block), GFP_KERNEL);
- if (block != NULL) {
- block->b_host = host;
- block->b_lock = fl;
- init_waitqueue_head(&block->b_wait);
- block->b_status = nlm_lck_blocked;
-
- spin_lock(&nlm_blocked_lock);
- list_add(&block->b_list, &nlm_blocked);
- spin_unlock(&nlm_blocked_lock);
- }
- return block;
+ spin_lock(&nlm_blocked_lock);
+ list_add(&block->b_list, &nlm_blocked);
+ spin_unlock(&nlm_blocked_lock);
}
-void nlmclnt_finish_block(struct nlm_wait *block)
+/*
+ * Dequeue the block and return its final status
+ */
+__be32 nlmclnt_dequeue_block(struct nlm_wait *block)
{
- if (block == NULL)
- return;
+ __be32 status;
+
spin_lock(&nlm_blocked_lock);
list_del(&block->b_list);
+ status = block->b_status;
spin_unlock(&nlm_blocked_lock);
- kfree(block);
+ return status;
}
/*
* Block on a lock
*/
-int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
+int nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
{
long ret;
@@ -154,7 +146,6 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
/* Reset the lock status after a server reboot so we resend */
if (block->b_status == nlm_lck_denied_grace_period)
block->b_status = nlm_lck_blocked;
- req->a_res.status = block->b_status;
return 0;
}
@@ -198,6 +189,7 @@ __be32 nlmclnt_grant(const struct sockaddr *addr, const struct nlm_lock *lock)
res = nlm_granted;
}
spin_unlock(&nlm_blocked_lock);
+ trace_nlmclnt_grant(lock, addr, svc_addr_len(addr), res);
return res;
}
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 16b4de868cd2..fba6c7fa7474 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -20,6 +20,8 @@
#include <linux/sunrpc/svc.h>
#include <linux/lockd/lockd.h>
+#include "trace.h"
+
#define NLMDBG_FACILITY NLMDBG_CLIENT
#define NLMCLNT_GRACE_WAIT (5*HZ)
#define NLMCLNT_POLL_TIMEOUT (30*HZ)
@@ -451,6 +453,9 @@ nlmclnt_test(struct nlm_rqst *req, struct file_lock *fl)
status = nlm_stat_to_errno(req->a_res.status);
}
out:
+ trace_nlmclnt_test(&req->a_args.lock,
+ (const struct sockaddr *)&req->a_host->h_addr,
+ req->a_host->h_addrlen, req->a_res.status);
nlmclnt_release_call(req);
return status;
}
@@ -516,9 +521,10 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
const struct cred *cred = nfs_file_cred(fl->fl_file);
struct nlm_host *host = req->a_host;
struct nlm_res *resp = &req->a_res;
- struct nlm_wait *block = NULL;
+ struct nlm_wait block;
unsigned char fl_flags = fl->fl_flags;
unsigned char fl_type;
+ __be32 b_status;
int status = -ENOLCK;
if (nsm_monitor(host) < 0)
@@ -531,31 +537,41 @@ nlmclnt_lock(struct nlm_rqst *req, struct file_lock *fl)
if (status < 0)
goto out;
- block = nlmclnt_prepare_block(host, fl);
+ nlmclnt_prepare_block(&block, host, fl);
again:
/*
* Initialise resp->status to a valid non-zero value,
* since 0 == nlm_lck_granted
*/
resp->status = nlm_lck_blocked;
- for(;;) {
+
+ /*
+ * A GRANTED callback can come at any time -- even before the reply
+ * to the LOCK request arrives, so we queue the wait before
+ * requesting the lock.
+ */
+ nlmclnt_queue_block(&block);
+ for (;;) {
/* Reboot protection */
fl->fl_u.nfs_fl.state = host->h_state;
status = nlmclnt_call(cred, req, NLMPROC_LOCK);
if (status < 0)
break;
/* Did a reclaimer thread notify us of a server reboot? */
- if (resp->status == nlm_lck_denied_grace_period)
+ if (resp->status == nlm_lck_denied_grace_period)
continue;
if (resp->status != nlm_lck_blocked)
break;
/* Wait on an NLM blocking lock */
- status = nlmclnt_block(block, req, NLMCLNT_POLL_TIMEOUT);
+ status = nlmclnt_wait(&block, req, NLMCLNT_POLL_TIMEOUT);
if (status < 0)
break;
- if (resp->status != nlm_lck_blocked)
+ if (block.b_status != nlm_lck_blocked)
break;
}
+ b_status = nlmclnt_dequeue_block(&block);
+ if (resp->status == nlm_lck_blocked)
+ resp->status = b_status;
/* if we were interrupted while blocking, then cancel the lock request
* and exit
@@ -564,7 +580,7 @@ again:
if (!req->a_args.block)
goto out_unlock;
if (nlmclnt_cancel(host, req->a_args.block, fl) == 0)
- goto out_unblock;
+ goto out;
}
if (resp->status == nlm_granted) {
@@ -593,16 +609,19 @@ again:
status = -ENOLCK;
else
status = nlm_stat_to_errno(resp->status);
-out_unblock:
- nlmclnt_finish_block(block);
out:
+ trace_nlmclnt_lock(&req->a_args.lock,
+ (const struct sockaddr *)&req->a_host->h_addr,
+ req->a_host->h_addrlen, req->a_res.status);
nlmclnt_release_call(req);
return status;
out_unlock:
/* Fatal error: ensure that we remove the lock altogether */
+ trace_nlmclnt_lock(&req->a_args.lock,
+ (const struct sockaddr *)&req->a_host->h_addr,
+ req->a_host->h_addrlen, req->a_res.status);
dprintk("lockd: lock attempt ended in fatal error.\n"
" Attempting to unlock.\n");
- nlmclnt_finish_block(block);
fl_type = fl->fl_type;
fl->fl_type = F_UNLCK;
down_read(&host->h_rwsem);
@@ -696,6 +715,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
/* What to do now? I'm out of my depth... */
status = -ENOLCK;
out:
+ trace_nlmclnt_unlock(&req->a_args.lock,
+ (const struct sockaddr *)&req->a_host->h_addr,
+ req->a_host->h_addrlen, req->a_res.status);
nlmclnt_release_call(req);
return status;
}
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index cdc8e12cdac4..127a728fcbc8 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -629,6 +629,7 @@ nlm_shutdown_hosts_net(struct net *net)
rpc_shutdown_client(host->h_rpcclnt);
host->h_rpcclnt = NULL;
}
+ nlmsvc_free_host_resources(host);
}
/* Then, perform a garbage collection pass */
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 4e30f3c50970..c43ccdf28ed9 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -954,19 +954,32 @@ void
nlmsvc_grant_reply(struct nlm_cookie *cookie, __be32 status)
{
struct nlm_block *block;
+ struct file_lock *fl;
+ int error;
dprintk("grant_reply: looking for cookie %x, s=%d \n",
*(unsigned int *)(cookie->data), status);
if (!(block = nlmsvc_find_block(cookie)))
return;
- if (status == nlm_lck_denied_grace_period) {
+ switch (status) {
+ case nlm_lck_denied_grace_period:
/* Try again in a couple of seconds */
nlmsvc_insert_block(block, 10 * HZ);
- } else {
+ break;
+ case nlm_lck_denied:
+ /* Client doesn't want it, just unlock it */
+ nlmsvc_unlink_block(block);
+ fl = &block->b_call->a_args.lock.fl;
+ fl->fl_type = F_UNLCK;
+ error = vfs_lock_file(fl->fl_file, F_SETLK, fl, NULL);
+ if (error)
+ pr_warn("lockd: unable to unlock lock rejected by client!\n");
+ break;
+ default:
/*
- * Lock is now held by client, or has been rejected.
- * In both cases, the block should be removed.
+ * Either it was accepted or the status makes no sense
+ * just unlink it either way.
*/
nlmsvc_unlink_block(block);
}
diff --git a/fs/lockd/trace.c b/fs/lockd/trace.c
new file mode 100644
index 000000000000..d9a6ff6e673c
--- /dev/null
+++ b/fs/lockd/trace.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define CREATE_TRACE_POINTS
+#include "trace.h"
diff --git a/fs/lockd/trace.h b/fs/lockd/trace.h
new file mode 100644
index 000000000000..7461b13b6e74
--- /dev/null
+++ b/fs/lockd/trace.h
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM lockd
+
+#if !defined(_TRACE_LOCKD_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_LOCKD_H
+
+#include <linux/tracepoint.h>
+#include <linux/crc32.h>
+#include <linux/nfs.h>
+#include <linux/lockd/lockd.h>
+
+#ifdef CONFIG_LOCKD_V4
+#define NLM_STATUS_LIST \
+ nlm_status_code(LCK_GRANTED) \
+ nlm_status_code(LCK_DENIED) \
+ nlm_status_code(LCK_DENIED_NOLOCKS) \
+ nlm_status_code(LCK_BLOCKED) \
+ nlm_status_code(LCK_DENIED_GRACE_PERIOD) \
+ nlm_status_code(DEADLCK) \
+ nlm_status_code(ROFS) \
+ nlm_status_code(STALE_FH) \
+ nlm_status_code(FBIG) \
+ nlm_status_code_end(FAILED)
+#else
+#define NLM_STATUS_LIST \
+ nlm_status_code(LCK_GRANTED) \
+ nlm_status_code(LCK_DENIED) \
+ nlm_status_code(LCK_DENIED_NOLOCKS) \
+ nlm_status_code(LCK_BLOCKED) \
+ nlm_status_code_end(LCK_DENIED_GRACE_PERIOD)
+#endif
+
+#undef nlm_status_code
+#undef nlm_status_code_end
+#define nlm_status_code(x) TRACE_DEFINE_ENUM(NLM_##x);
+#define nlm_status_code_end(x) TRACE_DEFINE_ENUM(NLM_##x);
+
+NLM_STATUS_LIST
+
+#undef nlm_status_code
+#undef nlm_status_code_end
+#define nlm_status_code(x) { NLM_##x, #x },
+#define nlm_status_code_end(x) { NLM_##x, #x }
+
+#define show_nlm_status(x) __print_symbolic(x, NLM_STATUS_LIST)
+
+DECLARE_EVENT_CLASS(nlmclnt_lock_event,
+ TP_PROTO(
+ const struct nlm_lock *lock,
+ const struct sockaddr *addr,
+ unsigned int addrlen,
+ __be32 status
+ ),
+
+ TP_ARGS(lock, addr, addrlen, status),
+
+ TP_STRUCT__entry(
+ __field(u32, oh)
+ __field(u32, svid)
+ __field(u32, fh)
+ __field(unsigned long, status)
+ __field(u64, start)
+ __field(u64, len)
+ __sockaddr(addr, addrlen)
+ ),
+
+ TP_fast_assign(
+ __entry->oh = ~crc32_le(0xffffffff, lock->oh.data, lock->oh.len);
+ __entry->svid = lock->svid;
+ __entry->fh = nfs_fhandle_hash(&lock->fh);
+ __entry->start = lock->lock_start;
+ __entry->len = lock->lock_len;
+ __entry->status = be32_to_cpu(status);
+ __assign_sockaddr(addr, addr, addrlen);
+ ),
+
+ TP_printk(
+ "addr=%pISpc oh=0x%08x svid=0x%08x fh=0x%08x start=%llu len=%llu status=%s",
+ __get_sockaddr(addr), __entry->oh, __entry->svid,
+ __entry->fh, __entry->start, __entry->len,
+ show_nlm_status(__entry->status)
+ )
+);
+
+#define DEFINE_NLMCLNT_EVENT(name) \
+ DEFINE_EVENT(nlmclnt_lock_event, name, \
+ TP_PROTO( \
+ const struct nlm_lock *lock, \
+ const struct sockaddr *addr, \
+ unsigned int addrlen, \
+ __be32 status \
+ ), \
+ TP_ARGS(lock, addr, addrlen, status))
+
+DEFINE_NLMCLNT_EVENT(nlmclnt_test);
+DEFINE_NLMCLNT_EVENT(nlmclnt_lock);
+DEFINE_NLMCLNT_EVENT(nlmclnt_unlock);
+DEFINE_NLMCLNT_EVENT(nlmclnt_grant);
+
+#endif /* _TRACE_LOCKD_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace
+#include <trace/define_trace.h>
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index d6a6d1ebb8fd..be686b8e0c54 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -149,7 +149,10 @@ const struct export_operations nfs_export_ops = {
.encode_fh = nfs_encode_fh,
.fh_to_dentry = nfs_fh_to_dentry,
.get_parent = nfs_get_parent,
- .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
- EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
- EXPORT_OP_NOATOMIC_ATTR,
+ .flags = EXPORT_OP_NOWCC |
+ EXPORT_OP_NOSUBTREECHK |
+ EXPORT_OP_CLOSE_BEFORE_UNLINK |
+ EXPORT_OP_REMOTE_FS |
+ EXPORT_OP_NOATOMIC_ATTR |
+ EXPORT_OP_FLUSH_ON_CLOSE,
};
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9ba07553eab8..3cc027d3bd58 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -855,27 +855,12 @@ u64 nfs_timespec_to_change_attr(const struct timespec64 *ts)
}
#ifdef CONFIG_CRC32
-/**
- * nfs_fhandle_hash - calculate the crc32 hash for the filehandle
- * @fh - pointer to filehandle
- *
- * returns a crc32 hash for the filehandle that is compatible with
- * the one displayed by "wireshark".
- */
-static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
-{
- return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size);
-}
static inline u32 nfs_stateid_hash(const nfs4_stateid *stateid)
{
return ~crc32_le(0xFFFFFFFF, &stateid->other[0],
NFS4_STATEID_OTHER_SIZE);
}
#else
-static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
-{
- return 0;
-}
static inline u32 nfs_stateid_hash(nfs4_stateid *stateid)
{
return 0;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 668c7527b17e..ae85257b4238 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -123,11 +123,11 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
/* OK, we seem to have a valid key */
key.h.flags = 0;
- key.h.expiry_time = get_expiry(&mesg);
- if (key.h.expiry_time == 0)
+ err = get_expiry(&mesg, &key.h.expiry_time);
+ if (err)
goto out;
- key.ek_client = dom;
+ key.ek_client = dom;
key.ek_fsidtype = fsidtype;
memcpy(key.ek_fsid, buf, len);
@@ -439,7 +439,6 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid)
return -EINVAL;
}
return 0;
-
}
#ifdef CONFIG_NFSD_V4
@@ -546,6 +545,29 @@ static inline int
secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
#endif
+static int xprtsec_parse(char **mesg, char *buf, struct svc_export *exp)
+{
+ unsigned int i, mode, listsize;
+ int err;
+
+ err = get_uint(mesg, &listsize);
+ if (err)
+ return err;
+ if (listsize > NFSEXP_XPRTSEC_NUM)
+ return -EINVAL;
+
+ exp->ex_xprtsec_modes = 0;
+ for (i = 0; i < listsize; i++) {
+ err = get_uint(mesg, &mode);
+ if (err)
+ return err;
+ if (mode > NFSEXP_XPRTSEC_MTLS)
+ return -EINVAL;
+ exp->ex_xprtsec_modes |= mode;
+ }
+ return 0;
+}
+
static inline int
nfsd_uuid_parse(char **mesg, char *buf, unsigned char **puuid)
{
@@ -608,11 +630,11 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
exp.ex_client = dom;
exp.cd = cd;
exp.ex_devid_map = NULL;
+ exp.ex_xprtsec_modes = NFSEXP_XPRTSEC_ALL;
/* expiry */
- err = -EINVAL;
- exp.h.expiry_time = get_expiry(&mesg);
- if (exp.h.expiry_time == 0)
+ err = get_expiry(&mesg, &exp.h.expiry_time);
+ if (err)
goto out3;
/* flags */
@@ -624,7 +646,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
if (err || an_int < 0)
goto out3;
exp.ex_flags= an_int;
-
+
/* anon uid */
err = get_int(&mesg, &an_int);
if (err)
@@ -650,6 +672,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
err = nfsd_uuid_parse(&mesg, buf, &exp.ex_uuid);
else if (strcmp(buf, "secinfo") == 0)
err = secinfo_parse(&mesg, buf, &exp);
+ else if (strcmp(buf, "xprtsec") == 0)
+ err = xprtsec_parse(&mesg, buf, &exp);
else
/* quietly ignore unknown words and anything
* following. Newer user-space can try to set
@@ -663,6 +687,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
err = check_export(&exp.ex_path, &exp.ex_flags, exp.ex_uuid);
if (err)
goto out4;
+
/*
* No point caching this if it would immediately expire.
* Also, this protects exportfs's dummy export from the
@@ -824,6 +849,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
for (i = 0; i < MAX_SECINFO_LIST; i++) {
new->ex_flavors[i] = item->ex_flavors[i];
}
+ new->ex_xprtsec_modes = item->ex_xprtsec_modes;
}
static struct cache_head *svc_export_alloc(void)
@@ -1035,9 +1061,26 @@ static struct svc_export *exp_find(struct cache_detail *cd,
__be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
{
- struct exp_flavor_info *f;
- struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
+ struct exp_flavor_info *f, *end = exp->ex_flavors + exp->ex_nflavors;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+
+ if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_NONE) {
+ if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags))
+ goto ok;
+ }
+ if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_TLS) {
+ if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
+ !test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
+ goto ok;
+ }
+ if (exp->ex_xprtsec_modes & NFSEXP_XPRTSEC_MTLS) {
+ if (test_bit(XPT_TLS_SESSION, &xprt->xpt_flags) &&
+ test_bit(XPT_PEER_AUTH, &xprt->xpt_flags))
+ goto ok;
+ }
+ goto denied;
+ok:
/* legacy gss-only clients are always OK: */
if (exp->ex_client == rqstp->rq_gssclient)
return 0;
@@ -1062,6 +1105,7 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
if (nfsd4_spo_must_allow(rqstp))
return 0;
+denied:
return rqstp->rq_vers < 4 ? nfserr_acces : nfserr_wrongsec;
}
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index d03f7f6a8642..2df8ae25aad3 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -77,6 +77,7 @@ struct svc_export {
struct cache_detail *cd;
struct rcu_head ex_rcu;
struct export_stats ex_stats;
+ unsigned long ex_xprtsec_modes;
};
/* an "export key" (expkey) maps a filehandlefragement to an
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 6e8712bd7c99..ee9c923192e0 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -74,70 +74,9 @@ static struct list_lru nfsd_file_lru;
static unsigned long nfsd_file_flags;
static struct fsnotify_group *nfsd_file_fsnotify_group;
static struct delayed_work nfsd_filecache_laundrette;
-static struct rhashtable nfsd_file_rhash_tbl
+static struct rhltable nfsd_file_rhltable
____cacheline_aligned_in_smp;
-enum nfsd_file_lookup_type {
- NFSD_FILE_KEY_INODE,
- NFSD_FILE_KEY_FULL,
-};
-
-struct nfsd_file_lookup_key {
- struct inode *inode;
- struct net *net;
- const struct cred *cred;
- unsigned char need;
- bool gc;
- enum nfsd_file_lookup_type type;
-};
-
-/*
- * The returned hash value is based solely on the address of an in-code
- * inode, a pointer to a slab-allocated object. The entropy in such a
- * pointer is concentrated in its middle bits.
- */
-static u32 nfsd_file_inode_hash(const struct inode *inode, u32 seed)
-{
- unsigned long ptr = (unsigned long)inode;
- u32 k;
-
- k = ptr >> L1_CACHE_SHIFT;
- k &= 0x00ffffff;
- return jhash2(&k, 1, seed);
-}
-
-/**
- * nfsd_file_key_hashfn - Compute the hash value of a lookup key
- * @data: key on which to compute the hash value
- * @len: rhash table's key_len parameter (unused)
- * @seed: rhash table's random seed of the day
- *
- * Return value:
- * Computed 32-bit hash value
- */
-static u32 nfsd_file_key_hashfn(const void *data, u32 len, u32 seed)
-{
- const struct nfsd_file_lookup_key *key = data;
-
- return nfsd_file_inode_hash(key->inode, seed);
-}
-
-/**
- * nfsd_file_obj_hashfn - Compute the hash value of an nfsd_file
- * @data: object on which to compute the hash value
- * @len: rhash table's key_len parameter (unused)
- * @seed: rhash table's random seed of the day
- *
- * Return value:
- * Computed 32-bit hash value
- */
-static u32 nfsd_file_obj_hashfn(const void *data, u32 len, u32 seed)
-{
- const struct nfsd_file *nf = data;
-
- return nfsd_file_inode_hash(nf->nf_inode, seed);
-}
-
static bool
nfsd_match_cred(const struct cred *c1, const struct cred *c2)
{
@@ -158,53 +97,16 @@ nfsd_match_cred(const struct cred *c1, const struct cred *c2)
return true;
}
-/**
- * nfsd_file_obj_cmpfn - Match a cache item against search criteria
- * @arg: search criteria
- * @ptr: cache item to check
- *
- * Return values:
- * %0 - Item matches search criteria
- * %1 - Item does not match search criteria
- */
-static int nfsd_file_obj_cmpfn(struct rhashtable_compare_arg *arg,
- const void *ptr)
-{
- const struct nfsd_file_lookup_key *key = arg->key;
- const struct nfsd_file *nf = ptr;
-
- switch (key->type) {
- case NFSD_FILE_KEY_INODE:
- if (nf->nf_inode != key->inode)
- return 1;
- break;
- case NFSD_FILE_KEY_FULL:
- if (nf->nf_inode != key->inode)
- return 1;
- if (nf->nf_may != key->need)
- return 1;
- if (nf->nf_net != key->net)
- return 1;
- if (!nfsd_match_cred(nf->nf_cred, key->cred))
- return 1;
- if (!!test_bit(NFSD_FILE_GC, &nf->nf_flags) != key->gc)
- return 1;
- if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
- return 1;
- break;
- }
- return 0;
-}
-
static const struct rhashtable_params nfsd_file_rhash_params = {
.key_len = sizeof_field(struct nfsd_file, nf_inode),
.key_offset = offsetof(struct nfsd_file, nf_inode),
- .head_offset = offsetof(struct nfsd_file, nf_rhash),
- .hashfn = nfsd_file_key_hashfn,
- .obj_hashfn = nfsd_file_obj_hashfn,
- .obj_cmpfn = nfsd_file_obj_cmpfn,
- /* Reduce resizing churn on light workloads */
- .min_size = 512, /* buckets */
+ .head_offset = offsetof(struct nfsd_file, nf_rlist),
+
+ /*
+ * Start with a single page hash table to reduce resizing churn
+ * on light workloads.
+ */
+ .min_size = 256,
.automatic_shrinking = true,
};
@@ -307,27 +209,27 @@ nfsd_file_mark_find_or_create(struct nfsd_file *nf, struct inode *inode)
}
static struct nfsd_file *
-nfsd_file_alloc(struct nfsd_file_lookup_key *key, unsigned int may)
+nfsd_file_alloc(struct net *net, struct inode *inode, unsigned char need,
+ bool want_gc)
{
struct nfsd_file *nf;
nf = kmem_cache_alloc(nfsd_file_slab, GFP_KERNEL);
- if (nf) {
- INIT_LIST_HEAD(&nf->nf_lru);
- nf->nf_birthtime = ktime_get();
- nf->nf_file = NULL;
- nf->nf_cred = get_current_cred();
- nf->nf_net = key->net;
- nf->nf_flags = 0;
- __set_bit(NFSD_FILE_HASHED, &nf->nf_flags);
- __set_bit(NFSD_FILE_PENDING, &nf->nf_flags);
- if (key->gc)
- __set_bit(NFSD_FILE_GC, &nf->nf_flags);
- nf->nf_inode = key->inode;
- refcount_set(&nf->nf_ref, 1);
- nf->nf_may = key->need;
- nf->nf_mark = NULL;
- }
+ if (unlikely(!nf))
+ return NULL;
+
+ INIT_LIST_HEAD(&nf->nf_lru);
+ nf->nf_birthtime = ktime_get();
+ nf->nf_file = NULL;
+ nf->nf_cred = get_current_cred();
+ nf->nf_net = net;
+ nf->nf_flags = want_gc ?
+ BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING) | BIT(NFSD_FILE_GC) :
+ BIT(NFSD_FILE_HASHED) | BIT(NFSD_FILE_PENDING);
+ nf->nf_inode = inode;
+ refcount_set(&nf->nf_ref, 1);
+ nf->nf_may = need;
+ nf->nf_mark = NULL;
return nf;
}
@@ -352,8 +254,8 @@ static void
nfsd_file_hash_remove(struct nfsd_file *nf)
{
trace_nfsd_file_unhash(nf);
- rhashtable_remove_fast(&nfsd_file_rhash_tbl, &nf->nf_rhash,
- nfsd_file_rhash_params);
+ rhltable_remove(&nfsd_file_rhltable, &nf->nf_rlist,
+ nfsd_file_rhash_params);
}
static bool
@@ -380,10 +282,8 @@ nfsd_file_free(struct nfsd_file *nf)
if (nf->nf_mark)
nfsd_file_mark_put(nf->nf_mark);
if (nf->nf_file) {
- get_file(nf->nf_file);
- filp_close(nf->nf_file, NULL);
nfsd_file_check_write_error(nf);
- fput(nf->nf_file);
+ filp_close(nf->nf_file, NULL);
}
/*
@@ -402,13 +302,23 @@ nfsd_file_check_writeback(struct nfsd_file *nf)
struct file *file = nf->nf_file;
struct address_space *mapping;
- if (!file || !(file->f_mode & FMODE_WRITE))
+ /* File not open for write? */
+ if (!(file->f_mode & FMODE_WRITE))
+ return false;
+
+ /*
+ * Some filesystems (e.g. NFS) flush all dirty data on close.
+ * On others, there is no need to wait for writeback.
+ */
+ if (!(file_inode(file)->i_sb->s_export_op->flags & EXPORT_OP_FLUSH_ON_CLOSE))
return false;
+
mapping = file->f_mapping;
return mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) ||
mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK);
}
+
static bool nfsd_file_lru_add(struct nfsd_file *nf)
{
set_bit(NFSD_FILE_REFERENCED, &nf->nf_flags);
@@ -492,49 +402,26 @@ nfsd_file_dispose_list(struct list_head *dispose)
}
}
-static void
-nfsd_file_list_remove_disposal(struct list_head *dst,
- struct nfsd_fcache_disposal *l)
-{
- spin_lock(&l->lock);
- list_splice_init(&l->freeme, dst);
- spin_unlock(&l->lock);
-}
-
-static void
-nfsd_file_list_add_disposal(struct list_head *files, struct net *net)
-{
- struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- struct nfsd_fcache_disposal *l = nn->fcache_disposal;
-
- spin_lock(&l->lock);
- list_splice_tail_init(files, &l->freeme);
- spin_unlock(&l->lock);
- queue_work(nfsd_filecache_wq, &l->work);
-}
-
-static void
-nfsd_file_list_add_pernet(struct list_head *dst, struct list_head *src,
- struct net *net)
-{
- struct nfsd_file *nf, *tmp;
-
- list_for_each_entry_safe(nf, tmp, src, nf_lru) {
- if (nf->nf_net == net)
- list_move_tail(&nf->nf_lru, dst);
- }
-}
-
+/**
+ * nfsd_file_dispose_list_delayed - move list of dead files to net's freeme list
+ * @dispose: list of nfsd_files to be disposed
+ *
+ * Transfers each file to the "freeme" list for its nfsd_net, to eventually
+ * be disposed of by the per-net garbage collector.
+ */
static void
nfsd_file_dispose_list_delayed(struct list_head *dispose)
{
- LIST_HEAD(list);
- struct nfsd_file *nf;
-
while(!list_empty(dispose)) {
- nf = list_first_entry(dispose, struct nfsd_file, nf_lru);
- nfsd_file_list_add_pernet(&list, dispose, nf->nf_net);
- nfsd_file_list_add_disposal(&list, nf->nf_net);
+ struct nfsd_file *nf = list_first_entry(dispose,
+ struct nfsd_file, nf_lru);
+ struct nfsd_net *nn = net_generic(nf->nf_net, nfsd_net_id);
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+
+ spin_lock(&l->lock);
+ list_move_tail(&nf->nf_lru, &l->freeme);
+ spin_unlock(&l->lock);
+ queue_work(nfsd_filecache_wq, &l->work);
}
}
@@ -678,8 +565,8 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
* @inode: inode on which to close out nfsd_files
* @dispose: list on which to gather nfsd_files to close out
*
- * An nfsd_file represents a struct file being held open on behalf of nfsd. An
- * open file however can block other activity (such as leases), or cause
+ * An nfsd_file represents a struct file being held open on behalf of nfsd.
+ * An open file however can block other activity (such as leases), or cause
* undesirable behavior (e.g. spurious silly-renames when reexporting NFS).
*
* This function is intended to find open nfsd_files when this sort of
@@ -692,20 +579,17 @@ nfsd_file_cond_queue(struct nfsd_file *nf, struct list_head *dispose)
static void
nfsd_file_queue_for_close(struct inode *inode, struct list_head *dispose)
{
- struct nfsd_file_lookup_key key = {
- .type = NFSD_FILE_KEY_INODE,
- .inode = inode,
- };
+ struct rhlist_head *tmp, *list;
struct nfsd_file *nf;
rcu_read_lock();
- do {
- nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
- nfsd_file_rhash_params);
- if (!nf)
- break;
+ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
+ nfsd_file_rhash_params);
+ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
+ if (!test_bit(NFSD_FILE_GC, &nf->nf_flags))
+ continue;
nfsd_file_cond_queue(nf, dispose);
- } while (1);
+ }
rcu_read_unlock();
}
@@ -758,8 +642,8 @@ nfsd_file_close_inode_sync(struct inode *inode)
* nfsd_file_delayed_close - close unused nfsd_files
* @work: dummy
*
- * Walk the LRU list and destroy any entries that have not been used since
- * the last scan.
+ * Scrape the freeme list for this nfsd_net, and then dispose of them
+ * all.
*/
static void
nfsd_file_delayed_close(struct work_struct *work)
@@ -768,7 +652,10 @@ nfsd_file_delayed_close(struct work_struct *work)
struct nfsd_fcache_disposal *l = container_of(work,
struct nfsd_fcache_disposal, work);
- nfsd_file_list_remove_disposal(&head, l);
+ spin_lock(&l->lock);
+ list_splice_init(&l->freeme, &head);
+ spin_unlock(&l->lock);
+
nfsd_file_dispose_list(&head);
}
@@ -829,7 +716,7 @@ nfsd_file_cache_init(void)
if (test_and_set_bit(NFSD_FILE_CACHE_UP, &nfsd_file_flags) == 1)
return 0;
- ret = rhashtable_init(&nfsd_file_rhash_tbl, &nfsd_file_rhash_params);
+ ret = rhltable_init(&nfsd_file_rhltable, &nfsd_file_rhash_params);
if (ret)
return ret;
@@ -897,7 +784,7 @@ out_err:
nfsd_file_mark_slab = NULL;
destroy_workqueue(nfsd_filecache_wq);
nfsd_filecache_wq = NULL;
- rhashtable_destroy(&nfsd_file_rhash_tbl);
+ rhltable_destroy(&nfsd_file_rhltable);
goto out;
}
@@ -906,7 +793,8 @@ out_err:
* @net: net-namespace to shut down the cache (may be NULL)
*
* Walk the nfsd_file cache and close out any that match @net. If @net is NULL,
- * then close out everything. Called when an nfsd instance is being shut down.
+ * then close out everything. Called when an nfsd instance is being shut down,
+ * and when the exports table is flushed.
*/
static void
__nfsd_file_cache_purge(struct net *net)
@@ -915,7 +803,7 @@ __nfsd_file_cache_purge(struct net *net)
struct nfsd_file *nf;
LIST_HEAD(dispose);
- rhashtable_walk_enter(&nfsd_file_rhash_tbl, &iter);
+ rhltable_walk_enter(&nfsd_file_rhltable, &iter);
do {
rhashtable_walk_start(&iter);
@@ -1021,7 +909,7 @@ nfsd_file_cache_shutdown(void)
nfsd_file_mark_slab = NULL;
destroy_workqueue(nfsd_filecache_wq);
nfsd_filecache_wq = NULL;
- rhashtable_destroy(&nfsd_file_rhash_tbl);
+ rhltable_destroy(&nfsd_file_rhltable);
for_each_possible_cpu(i) {
per_cpu(nfsd_file_cache_hits, i) = 0;
@@ -1032,6 +920,35 @@ nfsd_file_cache_shutdown(void)
}
}
+static struct nfsd_file *
+nfsd_file_lookup_locked(const struct net *net, const struct cred *cred,
+ struct inode *inode, unsigned char need,
+ bool want_gc)
+{
+ struct rhlist_head *tmp, *list;
+ struct nfsd_file *nf;
+
+ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
+ nfsd_file_rhash_params);
+ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist) {
+ if (nf->nf_may != need)
+ continue;
+ if (nf->nf_net != net)
+ continue;
+ if (!nfsd_match_cred(nf->nf_cred, cred))
+ continue;
+ if (test_bit(NFSD_FILE_GC, &nf->nf_flags) != want_gc)
+ continue;
+ if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0)
+ continue;
+
+ if (!nfsd_file_get(nf))
+ continue;
+ return nf;
+ }
+ return NULL;
+}
+
/**
* nfsd_file_is_cached - are there any cached open files for this inode?
* @inode: inode to check
@@ -1046,15 +963,20 @@ nfsd_file_cache_shutdown(void)
bool
nfsd_file_is_cached(struct inode *inode)
{
- struct nfsd_file_lookup_key key = {
- .type = NFSD_FILE_KEY_INODE,
- .inode = inode,
- };
+ struct rhlist_head *tmp, *list;
+ struct nfsd_file *nf;
bool ret = false;
- if (rhashtable_lookup_fast(&nfsd_file_rhash_tbl, &key,
- nfsd_file_rhash_params) != NULL)
- ret = true;
+ rcu_read_lock();
+ list = rhltable_lookup(&nfsd_file_rhltable, &inode,
+ nfsd_file_rhash_params);
+ rhl_for_each_entry_rcu(nf, tmp, list, nf_rlist)
+ if (test_bit(NFSD_FILE_GC, &nf->nf_flags)) {
+ ret = true;
+ break;
+ }
+ rcu_read_unlock();
+
trace_nfsd_file_is_cached(inode, (int)ret);
return ret;
}
@@ -1064,14 +986,12 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct file *file,
struct nfsd_file **pnf, bool want_gc)
{
- struct nfsd_file_lookup_key key = {
- .type = NFSD_FILE_KEY_FULL,
- .need = may_flags & NFSD_FILE_MAY_MASK,
- .net = SVC_NET(rqstp),
- .gc = want_gc,
- };
+ unsigned char need = may_flags & NFSD_FILE_MAY_MASK;
+ struct net *net = SVC_NET(rqstp);
+ struct nfsd_file *new, *nf;
+ const struct cred *cred;
bool open_retry = true;
- struct nfsd_file *nf;
+ struct inode *inode;
__be32 status;
int ret;
@@ -1079,80 +999,88 @@ nfsd_file_do_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
may_flags|NFSD_MAY_OWNER_OVERRIDE);
if (status != nfs_ok)
return status;
- key.inode = d_inode(fhp->fh_dentry);
- key.cred = get_current_cred();
+ inode = d_inode(fhp->fh_dentry);
+ cred = get_current_cred();
retry:
rcu_read_lock();
- nf = rhashtable_lookup(&nfsd_file_rhash_tbl, &key,
- nfsd_file_rhash_params);
- nf = nfsd_file_get(nf);
+ nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
rcu_read_unlock();
if (nf) {
+ /*
+ * If the nf is on the LRU then it holds an extra reference
+ * that must be put if it's removed. It had better not be
+ * the last one however, since we should hold another.
+ */
if (nfsd_file_lru_remove(nf))
WARN_ON_ONCE(refcount_dec_and_test(&nf->nf_ref));
goto wait_for_construction;
}
- nf = nfsd_file_alloc(&key, may_flags);
- if (!nf) {
+ new = nfsd_file_alloc(net, inode, need, want_gc);
+ if (!new) {
status = nfserr_jukebox;
- goto out_status;
+ goto out;
}
- ret = rhashtable_lookup_insert_key(&nfsd_file_rhash_tbl,
- &key, &nf->nf_rhash,
- nfsd_file_rhash_params);
+ rcu_read_lock();
+ spin_lock(&inode->i_lock);
+ nf = nfsd_file_lookup_locked(net, cred, inode, need, want_gc);
+ if (unlikely(nf)) {
+ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
+ nfsd_file_slab_free(&new->nf_rcu);
+ goto wait_for_construction;
+ }
+ nf = new;
+ ret = rhltable_insert(&nfsd_file_rhltable, &nf->nf_rlist,
+ nfsd_file_rhash_params);
+ spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
if (likely(ret == 0))
goto open_file;
- nfsd_file_slab_free(&nf->nf_rcu);
- nf = NULL;
if (ret == -EEXIST)
goto retry;
- trace_nfsd_file_insert_err(rqstp, key.inode, may_flags, ret);
+ trace_nfsd_file_insert_err(rqstp, inode, may_flags, ret);
status = nfserr_jukebox;
- goto out_status;
+ goto construction_err;
wait_for_construction:
wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE);
/* Did construction of this file fail? */
if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) {
- trace_nfsd_file_cons_err(rqstp, key.inode, may_flags, nf);
+ trace_nfsd_file_cons_err(rqstp, inode, may_flags, nf);
if (!open_retry) {
status = nfserr_jukebox;
- goto out;
+ goto construction_err;
}
open_retry = false;
- if (refcount_dec_and_test(&nf->nf_ref))
- nfsd_file_free(nf);
goto retry;
}
-
this_cpu_inc(nfsd_file_cache_hits);
status = nfserrno(nfsd_open_break_lease(file_inode(nf->nf_file), may_flags));
+ if (status != nfs_ok) {
+ nfsd_file_put(nf);
+ nf = NULL;
+ }
+
out:
if (status == nfs_ok) {
this_cpu_inc(nfsd_file_acquisitions);
nfsd_file_check_write_error(nf);
*pnf = nf;
- } else {
- if (refcount_dec_and_test(&nf->nf_ref))
- nfsd_file_free(nf);
- nf = NULL;
}
-
-out_status:
- put_cred(key.cred);
- trace_nfsd_file_acquire(rqstp, key.inode, may_flags, nf, status);
+ put_cred(cred);
+ trace_nfsd_file_acquire(rqstp, inode, may_flags, nf, status);
return status;
open_file:
trace_nfsd_file_alloc(nf);
- nf->nf_mark = nfsd_file_mark_find_or_create(nf, key.inode);
+ nf->nf_mark = nfsd_file_mark_find_or_create(nf, inode);
if (nf->nf_mark) {
if (file) {
get_file(file);
@@ -1170,13 +1098,16 @@ open_file:
* If construction failed, or we raced with a call to unlink()
* then unhash.
*/
- if (status == nfs_ok && key.inode->i_nlink == 0)
- status = nfserr_jukebox;
- if (status != nfs_ok)
+ if (status != nfs_ok || inode->i_nlink == 0)
nfsd_file_unhash(nf);
- clear_bit_unlock(NFSD_FILE_PENDING, &nf->nf_flags);
- smp_mb__after_atomic();
- wake_up_bit(&nf->nf_flags, NFSD_FILE_PENDING);
+ clear_and_wake_up_bit(NFSD_FILE_PENDING, &nf->nf_flags);
+ if (status == nfs_ok)
+ goto out;
+
+construction_err:
+ if (refcount_dec_and_test(&nf->nf_ref))
+ nfsd_file_free(nf);
+ nf = NULL;
goto out;
}
@@ -1192,8 +1123,11 @@ open_file:
* seconds after the final nfsd_file_put() in case the caller
* wants to re-use it.
*
- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
- * network byte order is returned.
+ * Return values:
+ * %nfs_ok - @pnf points to an nfsd_file with its reference
+ * count boosted.
+ *
+ * On error, an nfsstat value in network byte order is returned.
*/
__be32
nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -1213,8 +1147,11 @@ nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
* but not garbage-collected. The object is unhashed after the
* final nfsd_file_put().
*
- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
- * network byte order is returned.
+ * Return values:
+ * %nfs_ok - @pnf points to an nfsd_file with its reference
+ * count boosted.
+ *
+ * On error, an nfsstat value in network byte order is returned.
*/
__be32
nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -1235,8 +1172,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp,
* and @file is non-NULL, use it to instantiate a new nfsd_file instead of
* opening a new one.
*
- * Returns nfs_ok and sets @pnf on success; otherwise an nfsstat in
- * network byte order is returned.
+ * Return values:
+ * %nfs_ok - @pnf points to an nfsd_file with its reference
+ * count boosted.
+ *
+ * On error, an nfsstat value in network byte order is returned.
*/
__be32
nfsd_file_acquire_opened(struct svc_rqst *rqstp, struct svc_fh *fhp,
@@ -1267,7 +1207,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
lru = list_lru_count(&nfsd_file_lru);
rcu_read_lock();
- ht = &nfsd_file_rhash_tbl;
+ ht = &nfsd_file_rhltable.ht;
count = atomic_read(&ht->nelems);
tbl = rht_dereference_rcu(ht->tbl, ht);
buckets = tbl->size;
@@ -1283,7 +1223,7 @@ int nfsd_file_cache_stats_show(struct seq_file *m, void *v)
evictions += per_cpu(nfsd_file_evictions, i);
}
- seq_printf(m, "total entries: %u\n", count);
+ seq_printf(m, "total inodes: %u\n", count);
seq_printf(m, "hash buckets: %u\n", buckets);
seq_printf(m, "lru entries: %lu\n", lru);
seq_printf(m, "cache hits: %lu\n", hits);
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index 41516a4263ea..e54165a3224f 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -29,9 +29,8 @@ struct nfsd_file_mark {
* never be dereferenced, only used for comparison.
*/
struct nfsd_file {
- struct rhash_head nf_rhash;
- struct list_head nf_lru;
- struct rcu_head nf_rcu;
+ struct rhlist_head nf_rlist;
+ void *nf_inode;
struct file *nf_file;
const struct cred *nf_cred;
struct net *nf_net;
@@ -40,10 +39,12 @@ struct nfsd_file {
#define NFSD_FILE_REFERENCED (2)
#define NFSD_FILE_GC (3)
unsigned long nf_flags;
- struct inode *nf_inode; /* don't deref */
refcount_t nf_ref;
unsigned char nf_may;
+
struct nfsd_file_mark *nf_mark;
+ struct list_head nf_lru;
+ struct rcu_head nf_rcu;
ktime_t nf_birthtime;
};
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 5e9809aff37e..7a806ac13e31 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -240,8 +240,8 @@ idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
goto out;
/* expiry */
- ent.h.expiry_time = get_expiry(&buf);
- if (ent.h.expiry_time == 0)
+ error = get_expiry(&buf, &ent.h.expiry_time);
+ if (error)
goto out;
error = -ENOMEM;
@@ -408,8 +408,8 @@ nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
memcpy(ent.name, buf1, sizeof(ent.name));
/* expiry */
- ent.h.expiry_time = get_expiry(&buf);
- if (ent.h.expiry_time == 0)
+ error = get_expiry(&buf, &ent.h.expiry_time);
+ if (error)
goto out;
/* ID */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 5783209f17fc..bb9d47172162 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -930,6 +930,9 @@ nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags,
* Grab and keep cached pages associated with a file in the svc_rqst
* so that they can be passed to the network sendmsg/sendpage routines
* directly. They will be released after the sending has completed.
+ *
+ * Return values: Number of bytes consumed, or -EIO if there are no
+ * remaining pages in rqstp->rq_pages.
*/
static int
nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
@@ -948,7 +951,8 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
*/
if (page == *(rqstp->rq_next_page - 1))
continue;
- svc_rqst_replace_page(rqstp, page);
+ if (unlikely(!svc_rqst_replace_page(rqstp, page)))
+ return -EIO;
}
if (rqstp->rq_res.page_len == 0) // first call
rqstp->rq_res.page_base = offset % PAGE_SIZE;
@@ -2164,7 +2168,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
goto out;
}
- buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS);
+ buf = kvmalloc(len, GFP_KERNEL);
if (buf == NULL) {
err = nfserr_jukebox;
goto out;
@@ -2227,10 +2231,7 @@ nfsd_listxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char **bufp,
goto out;
}
- /*
- * We're holding i_rwsem - use GFP_NOFS.
- */
- buf = kvmalloc(len, GFP_KERNEL | GFP_NOFS);
+ buf = kvmalloc(len, GFP_KERNEL);
if (buf == NULL) {
err = nfserr_jukebox;
goto out;
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 601700fedc91..9edb29101ec8 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -220,6 +220,7 @@ struct export_operations {
#define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply
atomic attribute updates
*/
+#define EXPORT_OP_FLUSH_ON_CLOSE (0x20) /* fs flushes file data on close */
unsigned long flags;
};
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 0168ac9fdda8..f42594a9efe0 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -99,21 +99,11 @@ struct nsm_handle {
/*
* Rigorous type checking on sockaddr type conversions
*/
-static inline struct sockaddr_in *nlm_addr_in(const struct nlm_host *host)
-{
- return (struct sockaddr_in *)&host->h_addr;
-}
-
static inline struct sockaddr *nlm_addr(const struct nlm_host *host)
{
return (struct sockaddr *)&host->h_addr;
}
-static inline struct sockaddr_in *nlm_srcaddr_in(const struct nlm_host *host)
-{
- return (struct sockaddr_in *)&host->h_srcaddr;
-}
-
static inline struct sockaddr *nlm_srcaddr(const struct nlm_host *host)
{
return (struct sockaddr *)&host->h_srcaddr;
@@ -131,7 +121,16 @@ struct nlm_lockowner {
uint32_t pid;
};
-struct nlm_wait;
+/*
+ * This is the representation of a blocked client lock.
+ */
+struct nlm_wait {
+ struct list_head b_list; /* linked list */
+ wait_queue_head_t b_wait; /* where to wait on */
+ struct nlm_host *b_host;
+ struct file_lock *b_lock; /* local file lock */
+ __be32 b_status; /* grant callback status */
+};
/*
* Memory chunk for NLM client RPC request.
@@ -212,9 +211,11 @@ struct nlm_rqst * nlm_alloc_call(struct nlm_host *host);
int nlm_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
int nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
void nlmclnt_release_call(struct nlm_rqst *);
-struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
-void nlmclnt_finish_block(struct nlm_wait *block);
-int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
+void nlmclnt_prepare_block(struct nlm_wait *block, struct nlm_host *host,
+ struct file_lock *fl);
+void nlmclnt_queue_block(struct nlm_wait *block);
+__be32 nlmclnt_dequeue_block(struct nlm_wait *block);
+int nlmclnt_wait(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
__be32 nlmclnt_grant(const struct sockaddr *addr,
const struct nlm_lock *lock);
void nlmclnt_recovery(struct nlm_host *);
diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index b06375e88e58..ceb70a926b95 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -10,6 +10,7 @@
#include <linux/sunrpc/msg_prot.h>
#include <linux/string.h>
+#include <linux/crc32.h>
#include <uapi/linux/nfs.h>
/*
@@ -44,4 +45,23 @@ enum nfs3_stable_how {
/* used by direct.c to mark verf as invalid */
NFS_INVALID_STABLE_HOW = -1
};
+
+#ifdef CONFIG_CRC32
+/**
+ * nfs_fhandle_hash - calculate the crc32 hash for the filehandle
+ * @fh - pointer to filehandle
+ *
+ * returns a crc32 hash for the filehandle that is compatible with
+ * the one displayed by "wireshark".
+ */
+static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
+{
+ return ~crc32_le(0xFFFFFFFF, &fh->data[0], fh->size);
+}
+#else /* CONFIG_CRC32 */
+static inline u32 nfs_fhandle_hash(const struct nfs_fh *fh)
+{
+ return 0;
+}
+#endif /* CONFIG_CRC32 */
#endif /* _LINUX_NFS_H */
diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h
index ec5a555df96f..518bd28f5ab8 100644
--- a/include/linux/sunrpc/cache.h
+++ b/include/linux/sunrpc/cache.h
@@ -300,17 +300,18 @@ static inline int get_time(char **bpp, time64_t *time)
return 0;
}
-static inline time64_t get_expiry(char **bpp)
+static inline int get_expiry(char **bpp, time64_t *rvp)
{
- time64_t rv;
+ int error;
struct timespec64 boot;
- if (get_time(bpp, &rv))
- return 0;
- if (rv < 0)
- return 0;
+ error = get_time(bpp, rvp);
+ if (error)
+ return error;
+
getboottime64(&boot);
- return rv - boot.tv_sec;
+ (*rvp) -= boot.tv_sec;
+ return 0;
}
#endif /* _LINUX_SUNRPC_CACHE_H_ */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 877891536c2f..762d7231e574 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -309,17 +309,6 @@ static inline struct sockaddr *svc_daddr(const struct svc_rqst *rqst)
return (struct sockaddr *) &rqst->rq_daddr;
}
-static inline void svc_free_res_pages(struct svc_rqst *rqstp)
-{
- while (rqstp->rq_next_page != rqstp->rq_respages) {
- struct page **pp = --rqstp->rq_next_page;
- if (*pp) {
- put_page(*pp);
- *pp = NULL;
- }
- }
-}
-
struct svc_deferred_req {
u32 prot; /* protocol (UDP or TCP) */
struct svc_xprt *xprt;
@@ -422,15 +411,16 @@ struct svc_serv *svc_create(struct svc_program *, unsigned int,
int (*threadfn)(void *data));
struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
struct svc_pool *pool, int node);
-void svc_rqst_replace_page(struct svc_rqst *rqstp,
+bool svc_rqst_replace_page(struct svc_rqst *rqstp,
struct page *page);
+void svc_rqst_release_pages(struct svc_rqst *rqstp);
void svc_rqst_free(struct svc_rqst *);
void svc_exit_thread(struct svc_rqst *);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
int (*threadfn)(void *data));
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
-int svc_process(struct svc_rqst *);
+void svc_process(struct svc_rqst *rqstp);
int bc_svc_process(struct svc_serv *, struct rpc_rqst *,
struct svc_rqst *);
int svc_register(const struct svc_serv *, struct net *, const int,
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index 775368802762..867479204840 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -27,7 +27,7 @@ struct svc_xprt_ops {
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
void (*xpo_kill_temp_xprt)(struct svc_xprt *);
- void (*xpo_start_tls)(struct svc_xprt *);
+ void (*xpo_handshake)(struct svc_xprt *xprt);
};
struct svc_xprt_class {
@@ -70,6 +70,9 @@ struct svc_xprt {
#define XPT_LOCAL 12 /* connection from loopback interface */
#define XPT_KILL_TEMP 13 /* call xpo_kill_temp_xprt before closing */
#define XPT_CONG_CTRL 14 /* has congestion control */
+#define XPT_HANDSHAKE 15 /* xprt requests a handshake */
+#define XPT_TLS_SESSION 16 /* transport-layer security established */
+#define XPT_PEER_AUTH 17 /* peer has been authenticated */
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h
index bcc555c7ae9c..d16ae621782c 100644
--- a/include/linux/sunrpc/svcsock.h
+++ b/include/linux/sunrpc/svcsock.h
@@ -38,6 +38,8 @@ struct svc_sock {
/* Number of queued send requests */
atomic_t sk_sendqlen;
+ struct completion sk_handshake_done;
+
struct page * sk_pages[RPCSVC_MAXPAGES]; /* received data */
};
@@ -56,7 +58,7 @@ static inline u32 svc_sock_final_rec(struct svc_sock *svsk)
*/
void svc_close_net(struct svc_serv *, struct net *);
int svc_recv(struct svc_rqst *, long);
-int svc_send(struct svc_rqst *);
+void svc_send(struct svc_rqst *rqstp);
void svc_drop(struct svc_rqst *);
void svc_sock_update_bufs(struct svc_serv *serv);
bool svc_alien_sock(struct net *net, int fd);
diff --git a/include/net/tls.h b/include/net/tls.h
index 154949c7b0c8..6056ce5a2aa5 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -69,6 +69,8 @@ extern const struct tls_cipher_size_desc tls_cipher_size_desc[];
#define TLS_CRYPTO_INFO_READY(info) ((info)->cipher_type)
+#define TLS_RECORD_TYPE_ALERT 0x15
+#define TLS_RECORD_TYPE_HANDSHAKE 0x16
#define TLS_RECORD_TYPE_DATA 0x17
#define TLS_AAD_SPACE_SIZE 13
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 3ca54536f8f7..31bc7025cb44 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1790,6 +1790,31 @@ DEFINE_EVENT(svc_rqst_status, svc_send,
TP_PROTO(const struct svc_rqst *rqst, int status),
TP_ARGS(rqst, status));
+TRACE_EVENT(svc_replace_page_err,
+ TP_PROTO(const struct svc_rqst *rqst),
+
+ TP_ARGS(rqst),
+ TP_STRUCT__entry(
+ SVC_RQST_ENDPOINT_FIELDS(rqst)
+
+ __field(const void *, begin)
+ __field(const void *, respages)
+ __field(const void *, nextpage)
+ ),
+
+ TP_fast_assign(
+ SVC_RQST_ENDPOINT_ASSIGNMENTS(rqst);
+
+ __entry->begin = rqst->rq_pages;
+ __entry->respages = rqst->rq_respages;
+ __entry->nextpage = rqst->rq_next_page;
+ ),
+
+ TP_printk(SVC_RQST_ENDPOINT_FORMAT " begin=%p respages=%p nextpage=%p",
+ SVC_RQST_ENDPOINT_VARARGS,
+ __entry->begin, __entry->respages, __entry->nextpage)
+);
+
TRACE_EVENT(svc_stats_latency,
TP_PROTO(
const struct svc_rqst *rqst
@@ -1832,7 +1857,10 @@ TRACE_EVENT(svc_stats_latency,
{ BIT(XPT_CACHE_AUTH), "CACHE_AUTH" }, \
{ BIT(XPT_LOCAL), "LOCAL" }, \
{ BIT(XPT_KILL_TEMP), "KILL_TEMP" }, \
- { BIT(XPT_CONG_CTRL), "CONG_CTRL" })
+ { BIT(XPT_CONG_CTRL), "CONG_CTRL" }, \
+ { BIT(XPT_HANDSHAKE), "HANDSHAKE" }, \
+ { BIT(XPT_TLS_SESSION), "TLS_SESSION" }, \
+ { BIT(XPT_PEER_AUTH), "PEER_AUTH" })
TRACE_EVENT(svc_xprt_create_err,
TP_PROTO(
@@ -1965,6 +1993,17 @@ DEFINE_SVC_XPRT_EVENT(close);
DEFINE_SVC_XPRT_EVENT(detach);
DEFINE_SVC_XPRT_EVENT(free);
+#define DEFINE_SVC_TLS_EVENT(name) \
+ DEFINE_EVENT(svc_xprt_event, svc_tls_##name, \
+ TP_PROTO(const struct svc_xprt *xprt), \
+ TP_ARGS(xprt))
+
+DEFINE_SVC_TLS_EVENT(start);
+DEFINE_SVC_TLS_EVENT(upcall);
+DEFINE_SVC_TLS_EVENT(unavailable);
+DEFINE_SVC_TLS_EVENT(not_started);
+DEFINE_SVC_TLS_EVENT(timed_out);
+
TRACE_EVENT(svc_xprt_accept,
TP_PROTO(
const struct svc_xprt *xprt,
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h
index 2124ba904779..a73ca3703abb 100644
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -62,5 +62,18 @@
| NFSEXP_ALLSQUASH \
| NFSEXP_INSECURE_PORT)
+/*
+ * Transport layer security policies that are permitted to access
+ * an export
+ */
+#define NFSEXP_XPRTSEC_NONE 0x0001
+#define NFSEXP_XPRTSEC_TLS 0x0002
+#define NFSEXP_XPRTSEC_MTLS 0x0004
+
+#define NFSEXP_XPRTSEC_NUM (3)
+
+#define NFSEXP_XPRTSEC_ALL (NFSEXP_XPRTSEC_NONE | \
+ NFSEXP_XPRTSEC_TLS | \
+ NFSEXP_XPRTSEC_MTLS)
#endif /* _UAPINFSD_EXPORT_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 9c843974bb48..c4a566737085 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -257,11 +257,11 @@ static int rsi_parse(struct cache_detail *cd,
rsii.h.flags = 0;
/* expiry */
- expiry = get_expiry(&mesg);
- status = -EINVAL;
- if (expiry == 0)
+ status = get_expiry(&mesg, &expiry);
+ if (status)
goto out;
+ status = -EINVAL;
/* major/minor */
len = qword_get(&mesg, buf, mlen);
if (len <= 0)
@@ -483,11 +483,11 @@ static int rsc_parse(struct cache_detail *cd,
rsci.h.flags = 0;
/* expiry */
- expiry = get_expiry(&mesg);
- status = -EINVAL;
- if (expiry == 0)
+ status = get_expiry(&mesg, &expiry);
+ if (status)
goto out;
+ status = -EINVAL;
rscp = rsc_lookup(cd, &rsci);
if (!rscp)
goto out;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index fea7ce8fba14..26367cf4c17a 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -649,6 +649,8 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
if (!rqstp)
return rqstp;
+ pagevec_init(&rqstp->rq_pvec);
+
__set_bit(RQ_BUSY, &rqstp->rq_flags);
rqstp->rq_server = serv;
rqstp->rq_pool = pool;
@@ -842,9 +844,21 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads);
*
* When replacing a page in rq_pages, batch the release of the
* replaced pages to avoid hammering the page allocator.
+ *
+ * Return values:
+ * %true: page replaced
+ * %false: array bounds checking failed
*/
-void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
+bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
{
+ struct page **begin = rqstp->rq_pages;
+ struct page **end = &rqstp->rq_pages[RPCSVC_MAXPAGES];
+
+ if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) {
+ trace_svc_replace_page_err(rqstp);
+ return false;
+ }
+
if (*rqstp->rq_next_page) {
if (!pagevec_space(&rqstp->rq_pvec))
__pagevec_release(&rqstp->rq_pvec);
@@ -853,9 +867,28 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page)
get_page(page);
*(rqstp->rq_next_page++) = page;
+ return true;
}
EXPORT_SYMBOL_GPL(svc_rqst_replace_page);
+/**
+ * svc_rqst_release_pages - Release Reply buffer pages
+ * @rqstp: RPC transaction context
+ *
+ * Release response pages that might still be in flight after
+ * svc_send, and any spliced filesystem-owned pages.
+ */
+void svc_rqst_release_pages(struct svc_rqst *rqstp)
+{
+ int i, count = rqstp->rq_next_page - rqstp->rq_respages;
+
+ if (count) {
+ release_pages(rqstp->rq_respages, count);
+ for (i = 0; i < count; i++)
+ rqstp->rq_respages[i] = NULL;
+ }
+}
+
/*
* Called from a server thread as it's exiting. Caller must hold the "service
* mutex" for the service.
@@ -863,6 +896,7 @@ EXPORT_SYMBOL_GPL(svc_rqst_replace_page);
void
svc_rqst_free(struct svc_rqst *rqstp)
{
+ pagevec_release(&rqstp->rq_pvec);
svc_release_buffer(rqstp);
if (rqstp->rq_scratch_page)
put_page(rqstp->rq_scratch_page);
@@ -1431,11 +1465,12 @@ err_system_err:
goto sendit;
}
-/*
- * Process the RPC request.
+/**
+ * svc_process - Execute one RPC transaction
+ * @rqstp: RPC transaction context
+ *
*/
-int
-svc_process(struct svc_rqst *rqstp)
+void svc_process(struct svc_rqst *rqstp)
{
struct kvec *resv = &rqstp->rq_res.head[0];
__be32 *p;
@@ -1471,7 +1506,8 @@ svc_process(struct svc_rqst *rqstp)
if (!svc_process_common(rqstp))
goto out_drop;
- return svc_send(rqstp);
+ svc_send(rqstp);
+ return;
out_baddir:
svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
@@ -1479,7 +1515,6 @@ out_baddir:
rqstp->rq_server->sv_stats->rpcbadfmt++;
out_drop:
svc_drop(rqstp);
- return 0;
}
EXPORT_SYMBOL_GPL(svc_process);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index ba629297da4e..84e5d7d31481 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt)
if (xpt_flags & BIT(XPT_BUSY))
return false;
- if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE)))
+ if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE)))
return true;
if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) {
if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
@@ -541,8 +541,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
kfree(rqstp->rq_deferred);
rqstp->rq_deferred = NULL;
- pagevec_release(&rqstp->rq_pvec);
- svc_free_res_pages(rqstp);
+ svc_rqst_release_pages(rqstp);
rqstp->rq_res.page_len = 0;
rqstp->rq_res.page_base = 0;
@@ -667,8 +666,6 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
struct xdr_buf *arg = &rqstp->rq_arg;
unsigned long pages, filled, ret;
- pagevec_init(&rqstp->rq_pvec);
-
pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT;
if (pages > RPCSVC_MAXPAGES) {
pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n",
@@ -704,6 +701,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
arg->page_len = (pages-2)*PAGE_SIZE;
arg->len = (pages-1)*PAGE_SIZE;
arg->tail[0].iov_len = 0;
+
+ rqstp->rq_xid = xdr_zero;
return 0;
}
@@ -829,6 +828,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
module_put(xprt->xpt_class->xcl_owner);
}
svc_xprt_received(xprt);
+ } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) {
+ xprt->xpt_ops->xpo_handshake(xprt);
+ svc_xprt_received(xprt);
} else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
@@ -909,18 +911,20 @@ void svc_drop(struct svc_rqst *rqstp)
}
EXPORT_SYMBOL_GPL(svc_drop);
-/*
- * Return reply to client.
+/**
+ * svc_send - Return reply to client
+ * @rqstp: RPC transaction context
+ *
*/
-int svc_send(struct svc_rqst *rqstp)
+void svc_send(struct svc_rqst *rqstp)
{
struct svc_xprt *xprt;
- int len = -EFAULT;
struct xdr_buf *xb;
+ int status;
xprt = rqstp->rq_xprt;
if (!xprt)
- goto out;
+ return;
/* calculate over-all length */
xb = &rqstp->rq_res;
@@ -930,15 +934,10 @@ int svc_send(struct svc_rqst *rqstp)
trace_svc_xdr_sendto(rqstp->rq_xid, xb);
trace_svc_stats_latency(rqstp);
- len = xprt->xpt_ops->xpo_sendto(rqstp);
+ status = xprt->xpt_ops->xpo_sendto(rqstp);
- trace_svc_send(rqstp, len);
+ trace_svc_send(rqstp, status);
svc_xprt_release(rqstp);
-
- if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN)
- len = 0;
-out:
- return len;
}
/*
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 4246363cb095..174783f804fa 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -17,8 +17,9 @@
#include <net/ipv6.h>
#include <linux/kernel.h>
#include <linux/user_namespace.h>
-#define RPCDBG_FACILITY RPCDBG_AUTH
+#include <trace/events/sunrpc.h>
+#define RPCDBG_FACILITY RPCDBG_AUTH
#include "netns.h"
@@ -225,9 +226,9 @@ static int ip_map_parse(struct cache_detail *cd,
return -EINVAL;
}
- expiry = get_expiry(&mesg);
- if (expiry ==0)
- return -EINVAL;
+ err = get_expiry(&mesg, &expiry);
+ if (err)
+ return err;
/* domainname, or empty for NEGATIVE */
len = qword_get(&mesg, buf, mlen);
@@ -506,9 +507,9 @@ static int unix_gid_parse(struct cache_detail *cd,
uid = make_kuid(current_user_ns(), id);
ug.uid = uid;
- expiry = get_expiry(&mesg);
- if (expiry == 0)
- return -EINVAL;
+ err = get_expiry(&mesg, &expiry);
+ if (err)
+ return err;
rv = get_int(&mesg, &gids);
if (rv || gids < 0 || gids > 8192)
@@ -832,6 +833,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
{
struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct svc_cred *cred = &rqstp->rq_cred;
+ struct svc_xprt *xprt = rqstp->rq_xprt;
u32 flavor, len;
void *body;
__be32 *p;
@@ -865,14 +867,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
if (cred->cr_group_info == NULL)
return SVC_CLOSE;
- if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
+ if (xprt->xpt_ops->xpo_handshake) {
p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8);
if (!p)
return SVC_CLOSE;
+ trace_svc_tls_start(xprt);
*p++ = rpc_auth_null;
*p++ = cpu_to_be32(8);
memcpy(p, "STARTTLS", 8);
+
+ set_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
} else {
+ trace_svc_tls_unavailable(xprt);
if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
RPC_AUTH_NULL, NULL, 0) < 0)
return SVC_CLOSE;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 03a4f5615086..a51c9b989d58 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -43,9 +43,12 @@
#include <net/udp.h>
#include <net/tcp.h>
#include <net/tcp_states.h>
+#include <net/tls.h>
+#include <net/handshake.h>
#include <linux/uaccess.h>
#include <linux/highmem.h>
#include <asm/ioctls.h>
+#include <linux/key.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/clnt.h>
@@ -63,6 +66,12 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+/* To-do: to avoid tying up an nfsd thread while waiting for a
+ * handshake request, the request could instead be deferred.
+ */
+enum {
+ SVC_HANDSHAKE_TO = 5U * HZ
+};
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
int flags);
@@ -216,6 +225,49 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)
return len;
}
+static int
+svc_tcp_sock_process_cmsg(struct svc_sock *svsk, struct msghdr *msg,
+ struct cmsghdr *cmsg, int ret)
+{
+ if (cmsg->cmsg_level == SOL_TLS &&
+ cmsg->cmsg_type == TLS_GET_RECORD_TYPE) {
+ u8 content_type = *((u8 *)CMSG_DATA(cmsg));
+
+ switch (content_type) {
+ case TLS_RECORD_TYPE_DATA:
+ /* TLS sets EOR at the end of each application data
+ * record, even though there might be more frames
+ * waiting to be decrypted.
+ */
+ msg->msg_flags &= ~MSG_EOR;
+ break;
+ case TLS_RECORD_TYPE_ALERT:
+ ret = -ENOTCONN;
+ break;
+ default:
+ ret = -EAGAIN;
+ }
+ }
+ return ret;
+}
+
+static int
+svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg)
+{
+ union {
+ struct cmsghdr cmsg;
+ u8 buf[CMSG_SPACE(sizeof(u8))];
+ } u;
+ int ret;
+
+ msg->msg_control = &u;
+ msg->msg_controllen = sizeof(u);
+ ret = sock_recvmsg(svsk->sk_sock, msg, MSG_DONTWAIT);
+ if (unlikely(msg->msg_controllen != sizeof(u)))
+ ret = svc_tcp_sock_process_cmsg(svsk, msg, &u.cmsg, ret);
+ return ret;
+}
+
#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek)
{
@@ -263,7 +315,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
iov_iter_advance(&msg.msg_iter, seek);
buflen -= seek;
}
- len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
+ len = svc_tcp_sock_recv_cmsg(svsk, &msg);
if (len > 0)
svc_flush_bvec(bvec, len, seek);
@@ -315,6 +367,8 @@ static void svc_data_ready(struct sock *sk)
rmb();
svsk->sk_odata(sk);
trace_svcsock_data_ready(&svsk->sk_xprt, 0);
+ if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags))
+ return;
if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
svc_xprt_enqueue(&svsk->sk_xprt);
}
@@ -352,6 +406,88 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
sock_no_linger(svsk->sk_sock->sk);
}
+/**
+ * svc_tcp_handshake_done - Handshake completion handler
+ * @data: address of xprt to wake
+ * @status: status of handshake
+ * @peerid: serial number of key containing the remote peer's identity
+ *
+ * If a security policy is specified as an export option, we don't
+ * have a specific export here to check. So we set a "TLS session
+ * is present" flag on the xprt and let an upper layer enforce local
+ * security policy.
+ */
+static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid)
+{
+ struct svc_xprt *xprt = data;
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+
+ if (!status) {
+ if (peerid != TLS_NO_PEERID)
+ set_bit(XPT_PEER_AUTH, &xprt->xpt_flags);
+ set_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
+ }
+ clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
+ complete_all(&svsk->sk_handshake_done);
+}
+
+/**
+ * svc_tcp_handshake - Perform a transport-layer security handshake
+ * @xprt: connected transport endpoint
+ *
+ */
+static void svc_tcp_handshake(struct svc_xprt *xprt)
+{
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct sock *sk = svsk->sk_sock->sk;
+ struct tls_handshake_args args = {
+ .ta_sock = svsk->sk_sock,
+ .ta_done = svc_tcp_handshake_done,
+ .ta_data = xprt,
+ };
+ int ret;
+
+ trace_svc_tls_upcall(xprt);
+
+ clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags);
+ init_completion(&svsk->sk_handshake_done);
+
+ ret = tls_server_hello_x509(&args, GFP_KERNEL);
+ if (ret) {
+ trace_svc_tls_not_started(xprt);
+ goto out_failed;
+ }
+
+ ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done,
+ SVC_HANDSHAKE_TO);
+ if (ret <= 0) {
+ if (tls_handshake_cancel(sk)) {
+ trace_svc_tls_timed_out(xprt);
+ goto out_close;
+ }
+ }
+
+ if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) {
+ trace_svc_tls_unavailable(xprt);
+ goto out_close;
+ }
+
+ /* Mark the transport ready in case the remote sent RPC
+ * traffic before the kernel received the handshake
+ * completion downcall.
+ */
+ set_bit(XPT_DATA, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
+ return;
+
+out_close:
+ set_bit(XPT_CLOSE, &xprt->xpt_flags);
+out_failed:
+ clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags);
+ set_bit(XPT_DATA, &xprt->xpt_flags);
+ svc_xprt_enqueue(xprt);
+}
+
/*
* See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo
*/
@@ -877,7 +1013,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen;
iov.iov_len = want;
iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want);
- len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT);
+ len = svc_tcp_sock_recv_cmsg(svsk, &msg);
if (len < 0)
return len;
svsk->sk_tcplen += len;
@@ -1213,6 +1349,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
+ .xpo_handshake = svc_tcp_handshake,
};
static struct svc_xprt_class svc_tcp_class = {
@@ -1293,26 +1430,37 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
struct socket *sock,
int flags)
{
+ struct file *filp = NULL;
struct svc_sock *svsk;
struct sock *inet;
int pmap_register = !(flags & SVC_SOCK_ANONYMOUS);
- int err = 0;
svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);
if (!svsk)
return ERR_PTR(-ENOMEM);
+ if (!sock->file) {
+ filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
+ if (IS_ERR(filp)) {
+ kfree(svsk);
+ return ERR_CAST(filp);
+ }
+ }
+
inet = sock->sk;
- /* Register socket with portmapper */
- if (pmap_register)
+ if (pmap_register) {
+ int err;
+
err = svc_register(serv, sock_net(sock->sk), inet->sk_family,
inet->sk_protocol,
ntohs(inet_sk(inet)->inet_sport));
-
- if (err < 0) {
- kfree(svsk);
- return ERR_PTR(err);
+ if (err < 0) {
+ if (filp)
+ fput(filp);
+ kfree(svsk);
+ return ERR_PTR(err);
+ }
}
svsk->sk_sock = sock;
@@ -1525,10 +1673,12 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
static void svc_sock_free(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
+ struct socket *sock = svsk->sk_sock;
- if (svsk->sk_sock->file)
- sockfd_put(svsk->sk_sock);
+ tls_handshake_cancel(sock->sk);
+ if (sock->file)
+ sockfd_put(sock);
else
- sock_release(svsk->sk_sock);
+ sock_release(sock);
kfree(svsk);
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 5bc20e9d09cd..f0d5eeed4c88 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -212,24 +212,6 @@ static struct ctl_table svcrdma_parm_table[] = {
{ },
};
-static struct ctl_table svcrdma_table[] = {
- {
- .procname = "svc_rdma",
- .mode = 0555,
- .child = svcrdma_parm_table
- },
- { },
-};
-
-static struct ctl_table svcrdma_root_table[] = {
- {
- .procname = "sunrpc",
- .mode = 0555,
- .child = svcrdma_table
- },
- { },
-};
-
static void svc_rdma_proc_cleanup(void)
{
if (!svcrdma_table_header)
@@ -263,7 +245,8 @@ static int svc_rdma_proc_init(void)
if (rc)
goto out_err;
- svcrdma_table_header = register_sysctl_table(svcrdma_root_table);
+ svcrdma_table_header = register_sysctl("sunrpc/svc_rdma",
+ svcrdma_parm_table);
return 0;
out_err: