summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--fs/lockd/svc.c3
-rw-r--r--fs/nfs/callback.c3
-rw-r--r--fs/nfsd/blocklayout.c4
-rw-r--r--fs/nfsd/cache.h2
-rw-r--r--fs/nfsd/filecache.c76
-rw-r--r--fs/nfsd/filecache.h1
-rw-r--r--fs/nfsd/netns.h29
-rw-r--r--fs/nfsd/nfs3proc.c6
-rw-r--r--fs/nfsd/nfs3xdr.c5
-rw-r--r--fs/nfsd/nfs4callback.c191
-rw-r--r--fs/nfsd/nfs4layouts.c63
-rw-r--r--fs/nfsd/nfs4proc.c13
-rw-r--r--fs/nfsd/nfs4state.c826
-rw-r--r--fs/nfsd/nfs4xdr.c58
-rw-r--r--fs/nfsd/nfscache.c43
-rw-r--r--fs/nfsd/nfsctl.c17
-rw-r--r--fs/nfsd/nfsd.h3
-rw-r--r--fs/nfsd/nfsfh.c3
-rw-r--r--fs/nfsd/nfsproc.c6
-rw-r--r--fs/nfsd/nfssvc.c16
-rw-r--r--fs/nfsd/pnfs.h8
-rw-r--r--fs/nfsd/state.h83
-rw-r--r--fs/nfsd/stats.c52
-rw-r--r--fs/nfsd/stats.h70
-rw-r--r--fs/nfsd/trace.h212
-rw-r--r--fs/nfsd/vfs.c84
-rw-r--r--fs/nfsd/vfs.h4
-rw-r--r--fs/nfsd/xdr3.h2
-rw-r--r--fs/nfsd/xdr4cb.h18
-rw-r--r--include/linux/sunrpc/svc.h5
-rw-r--r--include/linux/sunrpc/svc_rdma.h55
-rw-r--r--include/trace/events/rpcrdma.h4
-rw-r--r--include/trace/misc/nfs.h34
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c14
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c11
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c27
-rw-r--r--net/sunrpc/stats.c2
-rw-r--r--net/sunrpc/svc.c40
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c245
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c151
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c15
-rw-r--r--net/sunrpc/xprtsock.c9
44 files changed, 1763 insertions, 753 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index c660e9be8ce6..f31f987bda42 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8172,6 +8172,7 @@ F: include/uapi/scsi/fc/
FILE LOCKING (flock() and fcntl()/lockf())
M: Jeff Layton <jlayton@kernel.org>
M: Chuck Lever <chuck.lever@oracle.com>
+R: Alexander Aring <alex.aring@gmail.com>
L: linux-fsdevel@vger.kernel.org
S: Maintained
F: fs/fcntl.c
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index ce5862482097..ab8042a5b895 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -710,8 +710,6 @@ static const struct svc_version *nlmsvc_version[] = {
#endif
};
-static struct svc_stat nlmsvc_stats;
-
#define NLM_NRVERS ARRAY_SIZE(nlmsvc_version)
static struct svc_program nlmsvc_program = {
.pg_prog = NLM_PROGRAM, /* program number */
@@ -719,7 +717,6 @@ static struct svc_program nlmsvc_program = {
.pg_vers = nlmsvc_version, /* version table */
.pg_name = "lockd", /* service name */
.pg_class = "nfsd", /* share authentication with nfsd */
- .pg_stats = &nlmsvc_stats, /* stats table */
.pg_authenticate = &lockd_authenticate, /* export authentication */
.pg_init_request = svc_generic_init_request,
.pg_rpcbind_set = svc_generic_rpcbind_set,
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 760d27dd7225..8adfcd4c8c1a 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -356,15 +356,12 @@ static const struct svc_version *nfs4_callback_version[] = {
[4] = &nfs4_callback_version4,
};
-static struct svc_stat nfs4_callback_stats;
-
static struct svc_program nfs4_callback_program = {
.pg_prog = NFS4_CALLBACK, /* RPC service number */
.pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */
.pg_vers = nfs4_callback_version, /* version table */
.pg_name = "NFSv4 callback", /* service name */
.pg_class = "nfs", /* authentication class */
- .pg_stats = &nfs4_callback_stats,
.pg_authenticate = nfs_callback_authenticate,
.pg_init_request = svc_generic_init_request,
.pg_rpcbind_set = svc_generic_rpcbind_set,
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 46fd74d91ea9..3c040c81c77d 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -328,10 +328,10 @@ nfsd4_scsi_proc_layoutcommit(struct inode *inode,
}
static void
-nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
+nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
- struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev;
+ struct block_device *bdev = file->nf_file->f_path.mnt->mnt_sb->s_bdev;
bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
nfsd4_scsi_pr_key(clp), 0, true);
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
index 4cbe0434cbb8..66a05fefae98 100644
--- a/fs/nfsd/cache.h
+++ b/fs/nfsd/cache.h
@@ -80,8 +80,6 @@ enum {
int nfsd_drc_slab_create(void);
void nfsd_drc_slab_free(void);
-int nfsd_net_reply_cache_init(struct nfsd_net *nn);
-void nfsd_net_reply_cache_destroy(struct nfsd_net *nn);
int nfsd_reply_cache_init(struct nfsd_net *);
void nfsd_reply_cache_shutdown(struct nfsd_net *);
int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index b86d8494052c..ddd3e0d9cfa6 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -61,13 +61,10 @@ static DEFINE_PER_CPU(unsigned long, nfsd_file_total_age);
static DEFINE_PER_CPU(unsigned long, nfsd_file_evictions);
struct nfsd_fcache_disposal {
- struct work_struct work;
spinlock_t lock;
struct list_head freeme;
};
-static struct workqueue_struct *nfsd_filecache_wq __read_mostly;
-
static struct kmem_cache *nfsd_file_slab;
static struct kmem_cache *nfsd_file_mark_slab;
static struct list_lru nfsd_file_lru;
@@ -283,7 +280,7 @@ nfsd_file_free(struct nfsd_file *nf)
nfsd_file_mark_put(nf->nf_mark);
if (nf->nf_file) {
nfsd_file_check_write_error(nf);
- filp_close(nf->nf_file, NULL);
+ nfsd_filp_close(nf->nf_file);
}
/*
@@ -421,7 +418,37 @@ nfsd_file_dispose_list_delayed(struct list_head *dispose)
spin_lock(&l->lock);
list_move_tail(&nf->nf_lru, &l->freeme);
spin_unlock(&l->lock);
- queue_work(nfsd_filecache_wq, &l->work);
+ svc_wake_up(nn->nfsd_serv);
+ }
+}
+
+/**
+ * nfsd_file_net_dispose - deal with nfsd_files waiting to be disposed.
+ * @nn: nfsd_net in which to find files to be disposed.
+ *
+ * When files held open for nfsv3 are removed from the filecache, whether
+ * due to memory pressure or garbage collection, they are queued to
+ * a per-net-ns queue. This function completes the disposal, either
+ * directly or by waking another nfsd thread to help with the work.
+ */
+void nfsd_file_net_dispose(struct nfsd_net *nn)
+{
+ struct nfsd_fcache_disposal *l = nn->fcache_disposal;
+
+ if (!list_empty(&l->freeme)) {
+ LIST_HEAD(dispose);
+ int i;
+
+ spin_lock(&l->lock);
+ for (i = 0; i < 8 && !list_empty(&l->freeme); i++)
+ list_move(l->freeme.next, &dispose);
+ spin_unlock(&l->lock);
+ if (!list_empty(&l->freeme))
+ /* Wake up another thread to share the work
+ * *before* doing any actual disposing.
+ */
+ svc_wake_up(nn->nfsd_serv);
+ nfsd_file_dispose_list(&dispose);
}
}
@@ -631,28 +658,6 @@ nfsd_file_close_inode_sync(struct inode *inode)
list_del_init(&nf->nf_lru);
nfsd_file_free(nf);
}
- flush_delayed_fput();
-}
-
-/**
- * nfsd_file_delayed_close - close unused nfsd_files
- * @work: dummy
- *
- * Scrape the freeme list for this nfsd_net, and then dispose of them
- * all.
- */
-static void
-nfsd_file_delayed_close(struct work_struct *work)
-{
- LIST_HEAD(head);
- struct nfsd_fcache_disposal *l = container_of(work,
- struct nfsd_fcache_disposal, work);
-
- spin_lock(&l->lock);
- list_splice_init(&l->freeme, &head);
- spin_unlock(&l->lock);
-
- nfsd_file_dispose_list(&head);
}
static int
@@ -717,25 +722,18 @@ nfsd_file_cache_init(void)
return ret;
ret = -ENOMEM;
- nfsd_filecache_wq = alloc_workqueue("nfsd_filecache", WQ_UNBOUND, 0);
- if (!nfsd_filecache_wq)
- goto out;
-
- nfsd_file_slab = kmem_cache_create("nfsd_file",
- sizeof(struct nfsd_file), 0, 0, NULL);
+ nfsd_file_slab = KMEM_CACHE(nfsd_file, 0);
if (!nfsd_file_slab) {
pr_err("nfsd: unable to create nfsd_file_slab\n");
goto out_err;
}
- nfsd_file_mark_slab = kmem_cache_create("nfsd_file_mark",
- sizeof(struct nfsd_file_mark), 0, 0, NULL);
+ nfsd_file_mark_slab = KMEM_CACHE(nfsd_file_mark, 0);
if (!nfsd_file_mark_slab) {
pr_err("nfsd: unable to create nfsd_file_mark_slab\n");
goto out_err;
}
-
ret = list_lru_init(&nfsd_file_lru);
if (ret) {
pr_err("nfsd: failed to init nfsd_file_lru: %d\n", ret);
@@ -785,8 +783,6 @@ out_err:
nfsd_file_slab = NULL;
kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL;
- destroy_workqueue(nfsd_filecache_wq);
- nfsd_filecache_wq = NULL;
rhltable_destroy(&nfsd_file_rhltable);
goto out;
}
@@ -832,7 +828,6 @@ nfsd_alloc_fcache_disposal(void)
l = kmalloc(sizeof(*l), GFP_KERNEL);
if (!l)
return NULL;
- INIT_WORK(&l->work, nfsd_file_delayed_close);
spin_lock_init(&l->lock);
INIT_LIST_HEAD(&l->freeme);
return l;
@@ -841,7 +836,6 @@ nfsd_alloc_fcache_disposal(void)
static void
nfsd_free_fcache_disposal(struct nfsd_fcache_disposal *l)
{
- cancel_work_sync(&l->work);
nfsd_file_dispose_list(&l->freeme);
kfree(l);
}
@@ -910,8 +904,6 @@ nfsd_file_cache_shutdown(void)
fsnotify_wait_marks_destroyed();
kmem_cache_destroy(nfsd_file_mark_slab);
nfsd_file_mark_slab = NULL;
- destroy_workqueue(nfsd_filecache_wq);
- nfsd_filecache_wq = NULL;
rhltable_destroy(&nfsd_file_rhltable);
for_each_possible_cpu(i) {
diff --git a/fs/nfsd/filecache.h b/fs/nfsd/filecache.h
index e54165a3224f..c61884def906 100644
--- a/fs/nfsd/filecache.h
+++ b/fs/nfsd/filecache.h
@@ -56,6 +56,7 @@ void nfsd_file_cache_shutdown_net(struct net *net);
void nfsd_file_put(struct nfsd_file *nf);
struct nfsd_file *nfsd_file_get(struct nfsd_file *nf);
void nfsd_file_close_inode_sync(struct inode *inode);
+void nfsd_file_net_dispose(struct nfsd_net *nn);
bool nfsd_file_is_cached(struct inode *inode);
__be32 nfsd_file_acquire_gc(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned int may_flags, struct nfsd_file **nfp);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 74b4360779a1..d4be519b5734 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -11,8 +11,10 @@
#include <net/net_namespace.h>
#include <net/netns/generic.h>
#include <linux/filelock.h>
+#include <linux/nfs4.h>
#include <linux/percpu_counter.h>
#include <linux/siphash.h>
+#include <linux/sunrpc/stats.h>
/* Hash tables for nfs4_clientid state */
#define CLIENT_HASH_BITS 4
@@ -26,10 +28,22 @@ struct nfsd4_client_tracking_ops;
enum {
/* cache misses due only to checksum comparison failures */
- NFSD_NET_PAYLOAD_MISSES,
+ NFSD_STATS_PAYLOAD_MISSES,
/* amount of memory (in bytes) currently consumed by the DRC */
- NFSD_NET_DRC_MEM_USAGE,
- NFSD_NET_COUNTERS_NUM
+ NFSD_STATS_DRC_MEM_USAGE,
+ NFSD_STATS_RC_HITS, /* repcache hits */
+ NFSD_STATS_RC_MISSES, /* repcache misses */
+ NFSD_STATS_RC_NOCACHE, /* uncached reqs */
+ NFSD_STATS_FH_STALE, /* FH stale error */
+ NFSD_STATS_IO_READ, /* bytes returned to read requests */
+ NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
+#ifdef CONFIG_NFSD_V4
+ NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
+ NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
+#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
+ NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
+#endif
+ NFSD_STATS_COUNTERS_NUM
};
/*
@@ -164,7 +178,10 @@ struct nfsd_net {
atomic_t num_drc_entries;
/* Per-netns stats counters */
- struct percpu_counter counter[NFSD_NET_COUNTERS_NUM];
+ struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
+
+ /* sunrpc svc stats */
+ struct svc_stat nfsd_svcstats;
/* longest hash chain seen */
unsigned int longest_chain;
@@ -192,6 +209,10 @@ struct nfsd_net {
atomic_t nfsd_courtesy_clients;
struct shrinker *nfsd_client_shrinker;
struct work_struct nfsd_shrinker_work;
+
+ /* last time an admin-revoke happened for NFSv4.0 */
+ time64_t nfs40_last_revoke;
+
};
/* Simple check to find out if a given net was properly initialized */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index b78eceebd945..dfcc957e460d 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -71,13 +71,15 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp)
struct nfsd_attrs attrs = {
.na_iattr = &argp->attrs,
};
+ const struct timespec64 *guardtime = NULL;
dprintk("nfsd: SETATTR(3) %s\n",
SVCFH_fmt(&argp->fh));
fh_copy(&resp->fh, &argp->fh);
- resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs,
- argp->check_guard, argp->guardtime);
+ if (argp->check_guard)
+ guardtime = &argp->guardtime;
+ resp->status = nfsd_setattr(rqstp, &resp->fh, &attrs, guardtime);
return rpc_success;
}
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index f32128955ec8..a7a07470c1f8 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -295,17 +295,14 @@ svcxdr_decode_sattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr,
static bool
svcxdr_decode_sattrguard3(struct xdr_stream *xdr, struct nfsd3_sattrargs *args)
{
- __be32 *p;
u32 check;
if (xdr_stream_decode_bool(xdr, &check) < 0)
return false;
if (check) {
- p = xdr_inline_decode(xdr, XDR_UNIT * 2);
- if (!p)
+ if (!svcxdr_decode_nfstime3(xdr, &args->guardtime))
return false;
args->check_guard = 1;
- args->guardtime = be32_to_cpup(p);
} else
args->check_guard = 0;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 32d23ef3e5de..87c9547989f6 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -45,7 +45,7 @@
#define NFSDDBG_FACILITY NFSDDBG_PROC
-static void nfsd4_mark_cb_fault(struct nfs4_client *, int reason);
+static void nfsd4_mark_cb_fault(struct nfs4_client *clp);
#define NFSPROC4_CB_NULL 0
#define NFSPROC4_CB_COMPOUND 1
@@ -85,7 +85,21 @@ static void encode_uint32(struct xdr_stream *xdr, u32 n)
static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap,
size_t len)
{
- WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0);
+ xdr_stream_encode_uint32_array(xdr, bitmap, len);
+}
+
+static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap,
+ struct nfs4_cb_fattr *fattr)
+{
+ fattr->ncf_cb_change = 0;
+ fattr->ncf_cb_fsize = 0;
+ if (bitmap[0] & FATTR4_WORD0_CHANGE)
+ if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0)
+ return -NFSERR_BAD_XDR;
+ if (bitmap[0] & FATTR4_WORD0_SIZE)
+ if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0)
+ return -NFSERR_BAD_XDR;
+ return 0;
}
static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op)
@@ -334,6 +348,30 @@ encode_cb_recallany4args(struct xdr_stream *xdr,
}
/*
+ * CB_GETATTR4args
+ * struct CB_GETATTR4args {
+ * nfs_fh4 fh;
+ * bitmap4 attr_request;
+ * };
+ *
+ * The size and change attributes are the only one
+ * guaranteed to be serviced by the client.
+ */
+static void
+encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr,
+ struct nfs4_cb_fattr *fattr)
+{
+ struct nfs4_delegation *dp =
+ container_of(fattr, struct nfs4_delegation, dl_cb_fattr);
+ struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle;
+
+ encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR);
+ encode_nfs_fh4(xdr, fh);
+ encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap));
+ hdr->nops++;
+}
+
+/*
* CB_SEQUENCE4args
*
* struct CB_SEQUENCE4args {
@@ -469,6 +507,26 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
}
/*
+ * 20.1. Operation 3: CB_GETATTR - Get Attributes
+ */
+static void nfs4_xdr_enc_cb_getattr(struct rpc_rqst *req,
+ struct xdr_stream *xdr, const void *data)
+{
+ const struct nfsd4_callback *cb = data;
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+ struct nfs4_cb_compound_hdr hdr = {
+ .ident = cb->cb_clp->cl_cb_ident,
+ .minorversion = cb->cb_clp->cl_minorversion,
+ };
+
+ encode_cb_compound4args(xdr, &hdr);
+ encode_cb_sequence4args(xdr, cb, &hdr);
+ encode_cb_getattr4args(xdr, &hdr, ncf);
+ encode_cb_nops(&hdr);
+}
+
+/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -524,6 +582,42 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr,
}
/*
+ * 20.1. Operation 3: CB_GETATTR - Get Attributes
+ */
+static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ void *data)
+{
+ struct nfsd4_callback *cb = data;
+ struct nfs4_cb_compound_hdr hdr;
+ int status;
+ u32 bitmap[3] = {0};
+ u32 attrlen;
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+
+ status = decode_cb_compound4res(xdr, &hdr);
+ if (unlikely(status))
+ return status;
+
+ status = decode_cb_sequence4res(xdr, cb);
+ if (unlikely(status || cb->cb_seq_status))
+ return status;
+
+ status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status);
+ if (status)
+ return status;
+ if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0)
+ return -NFSERR_BAD_XDR;
+ if (xdr_stream_decode_u32(xdr, &attrlen) < 0)
+ return -NFSERR_BAD_XDR;
+ if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize)))
+ return -NFSERR_BAD_XDR;
+ status = decode_cb_fattr4(xdr, bitmap, ncf);
+ return status;
+}
+
+/*
* 20.2. Operation 4: CB_RECALL - Recall a Delegation
*/
static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp,
@@ -831,6 +925,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = {
PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock),
PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload),
PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any),
+ PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr),
};
static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)];
@@ -887,7 +982,16 @@ static struct workqueue_struct *callback_wq;
static bool nfsd4_queue_cb(struct nfsd4_callback *cb)
{
- return queue_work(callback_wq, &cb->cb_work);
+ trace_nfsd_cb_queue(cb->cb_clp, cb);
+ return queue_delayed_work(callback_wq, &cb->cb_work, 0);
+}
+
+static void nfsd4_queue_cb_delayed(struct nfsd4_callback *cb,
+ unsigned long msecs)
+{
+ trace_nfsd_cb_queue(cb->cb_clp, cb);
+ queue_delayed_work(callback_wq, &cb->cb_work,
+ msecs_to_jiffies(msecs));
}
static void nfsd41_cb_inflight_begin(struct nfs4_client *clp)
@@ -999,18 +1103,18 @@ static void nfsd4_mark_cb_state(struct nfs4_client *clp, int newstate)
{
if (clp->cl_cb_state != newstate) {
clp->cl_cb_state = newstate;
- trace_nfsd_cb_state(clp);
+ trace_nfsd_cb_new_state(clp);
}
}
-static void nfsd4_mark_cb_down(struct nfs4_client *clp, int reason)
+static void nfsd4_mark_cb_down(struct nfs4_client *clp)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
nfsd4_mark_cb_state(clp, NFSD4_CB_DOWN);
}
-static void nfsd4_mark_cb_fault(struct nfs4_client *clp, int reason)
+static void nfsd4_mark_cb_fault(struct nfs4_client *clp)
{
if (test_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags))
return;
@@ -1022,7 +1126,7 @@ static void nfsd4_cb_probe_done(struct rpc_task *task, void *calldata)
struct nfs4_client *clp = container_of(calldata, struct nfs4_client, cl_cb_null);
if (task->tk_status)
- nfsd4_mark_cb_down(clp, task->tk_status);
+ nfsd4_mark_cb_down(clp);
else
nfsd4_mark_cb_state(clp, NFSD4_CB_UP);
}
@@ -1106,6 +1210,7 @@ static void nfsd41_destroy_cb(struct nfsd4_callback *cb)
{
struct nfs4_client *clp = cb->cb_clp;
+ trace_nfsd_cb_destroy(clp, cb);
nfsd41_cb_release_slot(cb);
if (cb->cb_ops && cb->cb_ops->release)
cb->cb_ops->release(cb);
@@ -1158,6 +1263,8 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
if (!cb->cb_holds_slot)
goto need_restart;
+ /* This is the operation status code for CB_SEQUENCE */
+ trace_nfsd_cb_seq_status(task, cb);
switch (cb->cb_seq_status) {
case 0:
/*
@@ -1171,13 +1278,23 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
break;
case -ESERVERFAULT:
++session->se_cb_seq_nr;
- fallthrough;
+ nfsd4_mark_cb_fault(cb->cb_clp);
+ ret = false;
+ break;
case 1:
+ /*
+ * cb_seq_status remains 1 if an RPC Reply was never
+ * received. NFSD can't know if the client processed
+ * the CB_SEQUENCE operation. Ask the client to send a
+ * DESTROY_SESSION to recover.
+ */
+ fallthrough;
case -NFS4ERR_BADSESSION:
- nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
+ nfsd4_mark_cb_fault(cb->cb_clp);
ret = false;
- break;
+ goto need_restart;
case -NFS4ERR_DELAY:
+ cb->cb_seq_status = 1;
if (!rpc_restart_call(task))
goto out;
@@ -1192,14 +1309,11 @@ static bool nfsd4_cb_sequence_done(struct rpc_task *task, struct nfsd4_callback
}
break;
default:
- nfsd4_mark_cb_fault(cb->cb_clp, cb->cb_seq_status);
- dprintk("%s: unprocessed error %d\n", __func__,
- cb->cb_seq_status);
+ nfsd4_mark_cb_fault(cb->cb_clp);
}
-
nfsd41_cb_release_slot(cb);
- dprintk("%s: freed slot, new seqid=%d\n", __func__,
- clp->cl_cb_session->se_cb_seq_nr);
+
+ trace_nfsd_cb_free_slot(task, cb);
if (RPC_SIGNALLED(task))
goto need_restart;
@@ -1211,6 +1325,7 @@ retry_nowait:
goto out;
need_restart:
if (!test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags)) {
+ trace_nfsd_cb_restart(clp, cb);
task->tk_status = 0;
cb->cb_need_restart = true;
}
@@ -1240,7 +1355,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
case -EIO:
case -ETIMEDOUT:
case -EACCES:
- nfsd4_mark_cb_down(clp, task->tk_status);
+ nfsd4_mark_cb_down(clp);
}
break;
default:
@@ -1295,12 +1410,13 @@ void nfsd4_shutdown_callback(struct nfs4_client *clp)
nfsd41_cb_inflight_wait_complete(clp);
}
-/* requires cl_lock: */
static struct nfsd4_conn * __nfsd4_find_backchannel(struct nfs4_client *clp)
{
struct nfsd4_session *s;
struct nfsd4_conn *c;
+ lockdep_assert_held(&clp->cl_lock);
+
list_for_each_entry(s, &clp->cl_sessions, se_perclnt) {
list_for_each_entry(c, &s->se_conns, cn_persession) {
if (c->cn_flags & NFS4_CDFC4_BACK)
@@ -1324,11 +1440,14 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
struct nfsd4_conn *c;
int err;
+ trace_nfsd_cb_bc_update(clp, cb);
+
/*
* This is either an update, or the client dying; in either case,
* kill the old client:
*/
if (clp->cl_cb_client) {
+ trace_nfsd_cb_bc_shutdown(clp, cb);
rpc_shutdown_client(clp->cl_cb_client);
clp->cl_cb_client = NULL;
put_cred(clp->cl_cb_cred);
@@ -1340,13 +1459,15 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
}
if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
return;
+
spin_lock(&clp->cl_lock);
/*
* Only serialized callback code is allowed to clear these
* flags; main nfsd code can only set them:
*/
- BUG_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
+ WARN_ON(!(clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK));
clear_bit(NFSD4_CLIENT_CB_UPDATE, &clp->cl_flags);
+
memcpy(&conn, &cb->cb_clp->cl_cb_conn, sizeof(struct nfs4_cb_conn));
c = __nfsd4_find_backchannel(clp);
if (c) {
@@ -1358,7 +1479,7 @@ static void nfsd4_process_cb_update(struct nfsd4_callback *cb)
err = setup_callback_client(clp, &conn, ses);
if (err) {
- nfsd4_mark_cb_down(clp, err);
+ nfsd4_mark_cb_down(clp);
if (c)
svc_xprt_put(c->cn_xprt);
return;
@@ -1369,25 +1490,28 @@ static void
nfsd4_run_cb_work(struct work_struct *work)
{
struct nfsd4_callback *cb =
- container_of(work, struct nfsd4_callback, cb_work);
+ container_of(work, struct nfsd4_callback, cb_work.work);
struct nfs4_client *clp = cb->cb_clp;
struct rpc_clnt *clnt;
int flags;
- if (cb->cb_need_restart) {
- cb->cb_need_restart = false;
- } else {
- if (cb->cb_ops && cb->cb_ops->prepare)
- cb->cb_ops->prepare(cb);
- }
+ trace_nfsd_cb_start(clp);
if (clp->cl_flags & NFSD4_CLIENT_CB_FLAG_MASK)
nfsd4_process_cb_update(cb);
clnt = clp->cl_cb_client;
if (!clnt) {
- /* Callback channel broken, or client killed; give up: */
- nfsd41_destroy_cb(cb);
+ if (test_bit(NFSD4_CLIENT_CB_KILL, &clp->cl_flags))
+ nfsd41_destroy_cb(cb);
+ else {
+ /*
+ * XXX: Ideally, we could wait for the client to
+ * reconnect, but I haven't figured out how
+ * to do that yet.
+ */
+ nfsd4_queue_cb_delayed(cb, 25);
+ }
return;
}
@@ -1400,6 +1524,12 @@ nfsd4_run_cb_work(struct work_struct *work)
return;
}
+ if (cb->cb_need_restart) {
+ cb->cb_need_restart = false;
+ } else {
+ if (cb->cb_ops && cb->cb_ops->prepare)
+ cb->cb_ops->prepare(cb);
+ }
cb->cb_msg.rpc_cred = clp->cl_cb_cred;
flags = clp->cl_minorversion ? RPC_TASK_NOCONNECT : RPC_TASK_SOFTCONN;
rpc_call_async(clnt, &cb->cb_msg, RPC_TASK_SOFT | flags,
@@ -1414,8 +1544,7 @@ void nfsd4_init_cb(struct nfsd4_callback *cb, struct nfs4_client *clp,
cb->cb_msg.rpc_argp = cb;
cb->cb_msg.rpc_resp = cb;
cb->cb_ops = ops;
- INIT_WORK(&cb->cb_work, nfsd4_run_cb_work);
- cb->cb_seq_status = 1;
+ INIT_DELAYED_WORK(&cb->cb_work, nfsd4_run_cb_work);
cb->cb_status = 0;
cb->cb_need_restart = false;
cb->cb_holds_slot = false;
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index 4c0d00bdfbb1..4f3072b5979a 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -152,6 +152,23 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
#endif
}
+void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
+{
+ struct nfsd_file *fl;
+
+ spin_lock(&ls->ls_stid.sc_file->fi_lock);
+ fl = ls->ls_file;
+ ls->ls_file = NULL;
+ spin_unlock(&ls->ls_stid.sc_file->fi_lock);
+
+ if (fl) {
+ if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
+ kernel_setlease(fl->nf_file, F_UNLCK, NULL,
+ (void **)&ls);
+ nfsd_file_put(fl);
+ }
+}
+
static void
nfsd4_free_layout_stateid(struct nfs4_stid *stid)
{
@@ -169,9 +186,7 @@ nfsd4_free_layout_stateid(struct nfs4_stid *stid)
list_del_init(&ls->ls_perfile);
spin_unlock(&fp->fi_lock);
- if (!nfsd4_layout_ops[ls->ls_layout_type]->disable_recalls)
- kernel_setlease(ls->ls_file->nf_file, F_UNLCK, NULL, (void **)&ls);
- nfsd_file_put(ls->ls_file);
+ nfsd4_close_layout(ls);
if (ls->ls_recalled)
atomic_dec(&ls->ls_stid.sc_file->fi_lo_recalls);
@@ -235,7 +250,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
nfsd4_init_cb(&ls->ls_recall, clp, &nfsd4_cb_layout_ops,
NFSPROC4_CLNT_CB_LAYOUT);
- if (parent->sc_type == NFS4_DELEG_STID)
+ if (parent->sc_type == SC_TYPE_DELEG)
ls->ls_file = nfsd_file_get(fp->fi_deleg_file);
else
ls->ls_file = find_any_file(fp);
@@ -249,7 +264,7 @@ nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
}
spin_lock(&clp->cl_lock);
- stp->sc_type = NFS4_LAYOUT_STID;
+ stp->sc_type = SC_TYPE_LAYOUT;
list_add(&ls->ls_perclnt, &clp->cl_lo_states);
spin_unlock(&clp->cl_lock);
@@ -268,13 +283,13 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
{
struct nfs4_layout_stateid *ls;
struct nfs4_stid *stid;
- unsigned char typemask = NFS4_LAYOUT_STID;
+ unsigned short typemask = SC_TYPE_LAYOUT;
__be32 status;
if (create)
- typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
+ typemask |= (SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG);
- status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
+ status = nfsd4_lookup_stateid(cstate, stateid, typemask, 0, &stid,
net_generic(SVC_NET(rqstp), nfsd_net_id));
if (status)
goto out;
@@ -285,7 +300,7 @@ nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
goto out_put_stid;
}
- if (stid->sc_type != NFS4_LAYOUT_STID) {
+ if (stid->sc_type != SC_TYPE_LAYOUT) {
ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type);
nfs4_put_stid(stid);
@@ -517,7 +532,7 @@ nfsd4_return_file_layouts(struct svc_rqst *rqstp,
lrp->lrs_present = true;
} else {
trace_nfsd_layoutstate_unhash(&ls->ls_stid.sc_stateid);
- nfs4_unhash_stid(&ls->ls_stid);
+ ls->ls_stid.sc_status |= SC_STATUS_CLOSED;
lrp->lrs_present = false;
}
spin_unlock(&ls->ls_lock);
@@ -604,7 +619,7 @@ nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
}
static void
-nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
+nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls, struct nfsd_file *file)
{
struct nfs4_client *clp = ls->ls_stid.sc_client;
char addr_str[INET6_ADDRSTRLEN];
@@ -626,7 +641,7 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
argv[0] = (char *)nfsd_recall_failed;
argv[1] = addr_str;
- argv[2] = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_id;
+ argv[2] = file->nf_file->f_path.mnt->mnt_sb->s_id;
argv[3] = NULL;
error = call_usermodehelper(nfsd_recall_failed, argv, envp,
@@ -656,6 +671,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
struct nfsd_net *nn;
ktime_t now, cutoff;
const struct nfsd4_layout_ops *ops;
+ struct nfsd_file *fl;
trace_nfsd_cb_layout_done(&ls->ls_stid.sc_stateid, task);
switch (task->tk_status) {
@@ -687,12 +703,17 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
* Unknown error or non-responding client, we'll need to fence.
*/
trace_nfsd_layout_recall_fail(&ls->ls_stid.sc_stateid);
-
- ops = nfsd4_layout_ops[ls->ls_layout_type];
- if (ops->fence_client)
- ops->fence_client(ls);
- else
- nfsd4_cb_layout_fail(ls);
+ rcu_read_lock();
+ fl = nfsd_file_get(ls->ls_file);
+ rcu_read_unlock();
+ if (fl) {
+ ops = nfsd4_layout_ops[ls->ls_layout_type];
+ if (ops->fence_client)
+ ops->fence_client(ls, fl);
+ else
+ nfsd4_cb_layout_fail(ls, fl);
+ nfsd_file_put(fl);
+ }
return 1;
case -NFS4ERR_NOMATCHING_LAYOUT:
trace_nfsd_layout_recall_done(&ls->ls_stid.sc_stateid);
@@ -755,13 +776,11 @@ nfsd4_init_pnfs(void)
for (i = 0; i < DEVID_HASH_SIZE; i++)
INIT_LIST_HEAD(&nfsd_devid_hash[i]);
- nfs4_layout_cache = kmem_cache_create("nfs4_layout",
- sizeof(struct nfs4_layout), 0, 0, NULL);
+ nfs4_layout_cache = KMEM_CACHE(nfs4_layout, 0);
if (!nfs4_layout_cache)
return -ENOMEM;
- nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid",
- sizeof(struct nfs4_layout_stateid), 0, 0, NULL);
+ nfs4_layout_stateid_cache = KMEM_CACHE(nfs4_layout_stateid, 0);
if (!nfs4_layout_stateid_cache) {
kmem_cache_destroy(nfs4_layout_cache);
return -ENOMEM;
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 14712fa08f76..2927b1263f08 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1143,6 +1143,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
};
struct inode *inode;
__be32 status = nfs_ok;
+ bool save_no_wcc;
int err;
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
@@ -1168,8 +1169,10 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (status)
goto out;
- status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs,
- 0, (time64_t)0);
+ save_no_wcc = cstate->current_fh.fh_no_wcc;
+ cstate->current_fh.fh_no_wcc = true;
+ status = nfsd_setattr(rqstp, &cstate->current_fh, &attrs, NULL);
+ cstate->current_fh.fh_no_wcc = save_no_wcc;
if (!status)
status = nfserrno(attrs.na_labelerr);
if (!status)
@@ -2490,10 +2493,10 @@ nfsd4_proc_null(struct svc_rqst *rqstp)
return rpc_success;
}
-static inline void nfsd4_increment_op_stats(u32 opnum)
+static inline void nfsd4_increment_op_stats(struct nfsd_net *nn, u32 opnum)
{
if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP)
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_NFS4_OP(opnum)]);
}
static const struct nfsd4_operation nfsd4_ops[];
@@ -2768,7 +2771,7 @@ encode_op:
status, nfsd4_op_name(op->opnum));
nfsd4_cstate_clear_replay(cstate);
- nfsd4_increment_op_stats(op->opnum);
+ nfsd4_increment_op_stats(nn, op->opnum);
}
fh_put(current_fh);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9257425cbd1a..1a93c7fcf76c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -87,6 +87,7 @@ static void nfs4_free_ol_stateid(struct nfs4_stid *stid);
void nfsd4_end_grace(struct nfsd_net *nn);
static void _free_cpntf_state_locked(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
static void nfsd4_file_hash_remove(struct nfs4_file *fi);
+static void deleg_reaper(struct nfsd_net *nn);
/* Locking: */
@@ -127,6 +128,7 @@ static void free_session(struct nfsd4_session *);
static const struct nfsd4_callback_ops nfsd4_cb_recall_ops;
static const struct nfsd4_callback_ops nfsd4_cb_notify_lock_ops;
+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops;
static struct workqueue_struct *laundry_wq;
@@ -318,6 +320,7 @@ free_nbl(struct kref *kref)
struct nfsd4_blocked_lock *nbl;
nbl = container_of(kref, struct nfsd4_blocked_lock, nbl_kref);
+ locks_release_private(&nbl->nbl_lock);
kfree(nbl);
}
@@ -325,7 +328,6 @@ static void
free_blocked_lock(struct nfsd4_blocked_lock *nbl)
{
locks_delete_block(&nbl->nbl_lock);
- locks_release_private(&nbl->nbl_lock);
kref_put(&nbl->nbl_kref, free_nbl);
}
@@ -1189,6 +1191,10 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_file *fp,
dp->dl_recalled = false;
nfsd4_init_cb(&dp->dl_recall, dp->dl_stid.sc_client,
&nfsd4_cb_recall_ops, NFSPROC4_CLNT_CB_RECALL);
+ nfsd4_init_cb(&dp->dl_cb_fattr.ncf_getattr, dp->dl_stid.sc_client,
+ &nfsd4_cb_getattr_ops, NFSPROC4_CLNT_CB_GETATTR);
+ dp->dl_cb_fattr.ncf_file_modified = false;
+ dp->dl_cb_fattr.ncf_cb_bmap[0] = FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE;
get_nfs4_file(fp);
dp->dl_stid.sc_file = fp;
return dp;
@@ -1210,6 +1216,8 @@ nfs4_put_stid(struct nfs4_stid *s)
return;
}
idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED)
+ atomic_dec(&s->sc_client->cl_admin_revoked);
nfs4_free_cpntf_statelist(clp->net, s);
spin_unlock(&clp->cl_lock);
s->sc_free(s);
@@ -1260,11 +1268,6 @@ static void destroy_unhashed_deleg(struct nfs4_delegation *dp)
nfs4_put_stid(&dp->dl_stid);
}
-void nfs4_unhash_stid(struct nfs4_stid *s)
-{
- s->sc_type = 0;
-}
-
/**
* nfs4_delegation_exists - Discover if this delegation already exists
* @clp: a pointer to the nfs4_client we're granting a delegation to
@@ -1312,11 +1315,12 @@ hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
lockdep_assert_held(&state_lock);
lockdep_assert_held(&fp->fi_lock);
+ lockdep_assert_held(&clp->cl_lock);
if (nfs4_delegation_exists(clp, fp))
return -EAGAIN;
refcount_inc(&dp->dl_stid.sc_count);
- dp->dl_stid.sc_type = NFS4_DELEG_STID;
+ dp->dl_stid.sc_type = SC_TYPE_DELEG;
list_add(&dp->dl_perfile, &fp->fi_delegations);
list_add(&dp->dl_perclnt, &clp->cl_delegations);
return 0;
@@ -1328,7 +1332,7 @@ static bool delegation_hashed(struct nfs4_delegation *dp)
}
static bool
-unhash_delegation_locked(struct nfs4_delegation *dp)
+unhash_delegation_locked(struct nfs4_delegation *dp, unsigned short statusmask)
{
struct nfs4_file *fp = dp->dl_stid.sc_file;
@@ -1337,7 +1341,13 @@ unhash_delegation_locked(struct nfs4_delegation *dp)
if (!delegation_hashed(dp))
return false;
- dp->dl_stid.sc_type = NFS4_CLOSED_DELEG_STID;
+ if (statusmask == SC_STATUS_REVOKED &&
+ dp->dl_stid.sc_client->cl_minorversion == 0)
+ statusmask = SC_STATUS_CLOSED;
+ dp->dl_stid.sc_status |= statusmask;
+ if (statusmask & SC_STATUS_ADMIN_REVOKED)
+ atomic_inc(&dp->dl_stid.sc_client->cl_admin_revoked);
+
/* Ensure that deleg break won't try to requeue it */
++dp->dl_time;
spin_lock(&fp->fi_lock);
@@ -1353,7 +1363,7 @@ static void destroy_delegation(struct nfs4_delegation *dp)
bool unhashed;
spin_lock(&state_lock);
- unhashed = unhash_delegation_locked(dp);
+ unhashed = unhash_delegation_locked(dp, SC_STATUS_CLOSED);
spin_unlock(&state_lock);
if (unhashed)
destroy_unhashed_deleg(dp);
@@ -1367,9 +1377,9 @@ static void revoke_delegation(struct nfs4_delegation *dp)
trace_nfsd_stid_revoke(&dp->dl_stid);
- if (clp->cl_minorversion) {
+ if (dp->dl_stid.sc_status &
+ (SC_STATUS_REVOKED | SC_STATUS_ADMIN_REVOKED)) {
spin_lock(&clp->cl_lock);
- dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
refcount_inc(&dp->dl_stid.sc_count);
list_add(&dp->dl_recall_lru, &clp->cl_revoked);
spin_unlock(&clp->cl_lock);
@@ -1377,8 +1387,8 @@ static void revoke_delegation(struct nfs4_delegation *dp)
destroy_unhashed_deleg(dp);
}
-/*
- * SETCLIENTID state
+/*
+ * SETCLIENTID state
*/
static unsigned int clientid_hashval(u32 id)
@@ -1531,6 +1541,8 @@ static void put_ol_stateid_locked(struct nfs4_ol_stateid *stp,
}
idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED)
+ atomic_dec(&s->sc_client->cl_admin_revoked);
list_add(&stp->st_locks, reaplist);
}
@@ -1541,7 +1553,7 @@ static bool unhash_lock_stateid(struct nfs4_ol_stateid *stp)
if (!unhash_ol_stateid(stp))
return false;
list_del_init(&stp->st_locks);
- nfs4_unhash_stid(&stp->st_stid);
+ stp->st_stid.sc_status |= SC_STATUS_CLOSED;
return true;
}
@@ -1599,7 +1611,7 @@ static void release_open_stateid_locks(struct nfs4_ol_stateid *open_stp,
while (!list_empty(&open_stp->st_locks)) {
stp = list_entry(open_stp->st_locks.next,
struct nfs4_ol_stateid, st_locks);
- WARN_ON(!unhash_lock_stateid(stp));
+ unhash_lock_stateid(stp);
put_ol_stateid_locked(stp, reaplist);
}
}
@@ -1620,6 +1632,7 @@ static void release_open_stateid(struct nfs4_ol_stateid *stp)
LIST_HEAD(reaplist);
spin_lock(&stp->st_stid.sc_client->cl_lock);
+ stp->st_stid.sc_status |= SC_STATUS_CLOSED;
if (unhash_open_stateid(stp, &reaplist))
put_ol_stateid_locked(stp, &reaplist);
spin_unlock(&stp->st_stid.sc_client->cl_lock);
@@ -1675,6 +1688,136 @@ static void release_openowner(struct nfs4_openowner *oo)
nfs4_put_stateowner(&oo->oo_owner);
}
+static struct nfs4_stid *find_one_sb_stid(struct nfs4_client *clp,
+ struct super_block *sb,
+ unsigned int sc_types)
+{
+ unsigned long id, tmp;
+ struct nfs4_stid *stid;
+
+ spin_lock(&clp->cl_lock);
+ idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
+ if ((stid->sc_type & sc_types) &&
+ stid->sc_status == 0 &&
+ stid->sc_file->fi_inode->i_sb == sb) {
+ refcount_inc(&stid->sc_count);
+ break;
+ }
+ spin_unlock(&clp->cl_lock);
+ return stid;
+}
+
+/**
+ * nfsd4_revoke_states - revoke all nfsv4 states associated with given filesystem
+ * @net: used to identify instance of nfsd (there is one per net namespace)
+ * @sb: super_block used to identify target filesystem
+ *
+ * All nfs4 states (open, lock, delegation, layout) held by the server instance
+ * and associated with a file on the given filesystem will be revoked resulting
+ * in any files being closed and so all references from nfsd to the filesystem
+ * being released. Thus nfsd will no longer prevent the filesystem from being
+ * unmounted.
+ *
+ * The clients which own the states will subsequently being notified that the
+ * states have been "admin-revoked".
+ */
+void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ unsigned int idhashval;
+ unsigned int sc_types;
+
+ sc_types = SC_TYPE_OPEN | SC_TYPE_LOCK | SC_TYPE_DELEG | SC_TYPE_LAYOUT;
+
+ spin_lock(&nn->client_lock);
+ for (idhashval = 0; idhashval < CLIENT_HASH_MASK; idhashval++) {
+ struct list_head *head = &nn->conf_id_hashtbl[idhashval];
+ struct nfs4_client *clp;
+ retry:
+ list_for_each_entry(clp, head, cl_idhash) {
+ struct nfs4_stid *stid = find_one_sb_stid(clp, sb,
+ sc_types);
+ if (stid) {
+ struct nfs4_ol_stateid *stp;
+ struct nfs4_delegation *dp;
+ struct nfs4_layout_stateid *ls;
+
+ spin_unlock(&nn->client_lock);
+ switch (stid->sc_type) {
+ case SC_TYPE_OPEN:
+ stp = openlockstateid(stid);
+ mutex_lock_nested(&stp->st_mutex,
+ OPEN_STATEID_MUTEX);
+
+ spin_lock(&clp->cl_lock);
+ if (stid->sc_status == 0) {
+ stid->sc_status |=
+ SC_STATUS_ADMIN_REVOKED;
+ atomic_inc(&clp->cl_admin_revoked);
+ spin_unlock(&clp->cl_lock);
+ release_all_access(stp);
+ } else
+ spin_unlock(&clp->cl_lock);
+ mutex_unlock(&stp->st_mutex);
+ break;
+ case SC_TYPE_LOCK:
+ stp = openlockstateid(stid);
+ mutex_lock_nested(&stp->st_mutex,
+ LOCK_STATEID_MUTEX);
+ spin_lock(&clp->cl_lock);
+ if (stid->sc_status == 0) {
+ struct nfs4_lockowner *lo =
+ lockowner(stp->st_stateowner);
+ struct nfsd_file *nf;
+
+ stid->sc_status |=
+ SC_STATUS_ADMIN_REVOKED;
+ atomic_inc(&clp->cl_admin_revoked);
+ spin_unlock(&clp->cl_lock);
+ nf = find_any_file(stp->st_stid.sc_file);
+ if (nf) {
+ get_file(nf->nf_file);
+ filp_close(nf->nf_file,
+ (fl_owner_t)lo);
+ nfsd_file_put(nf);
+ }
+ release_all_access(stp);
+ } else
+ spin_unlock(&clp->cl_lock);
+ mutex_unlock(&stp->st_mutex);
+ break;
+ case SC_TYPE_DELEG:
+ dp = delegstateid(stid);
+ spin_lock(&state_lock);
+ if (!unhash_delegation_locked(
+ dp, SC_STATUS_ADMIN_REVOKED))
+ dp = NULL;
+ spin_unlock(&state_lock);
+ if (dp)
+ revoke_delegation(dp);
+ break;
+ case SC_TYPE_LAYOUT:
+ ls = layoutstateid(stid);
+ nfsd4_close_layout(ls);
+ break;
+ }
+ nfs4_put_stid(stid);
+ spin_lock(&nn->client_lock);
+ if (clp->cl_minorversion == 0)
+ /* Allow cleanup after a lease period.
+ * store_release ensures cleanup will
+ * see any newly revoked states if it
+ * sees the time updated.
+ */
+ nn->nfs40_last_revoke =
+ ktime_get_boottime_seconds();
+ goto retry;
+ }
+ }
+ }
+ spin_unlock(&nn->client_lock);
+}
+
static inline int
hash_sessionid(struct nfs4_sessionid *sessionid)
{
@@ -2228,7 +2371,7 @@ __destroy_client(struct nfs4_client *clp)
spin_lock(&state_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
- WARN_ON(!unhash_delegation_locked(dp));
+ unhash_delegation_locked(dp, SC_STATUS_CLOSED);
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -2460,14 +2603,16 @@ find_stateid_locked(struct nfs4_client *cl, stateid_t *t)
}
static struct nfs4_stid *
-find_stateid_by_type(struct nfs4_client *cl, stateid_t *t, char typemask)
+find_stateid_by_type(struct nfs4_client *cl, stateid_t *t,
+ unsigned short typemask, unsigned short ok_states)
{
struct nfs4_stid *s;
spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, t);
if (s != NULL) {
- if (typemask & s->sc_type)
+ if ((s->sc_status & ~ok_states) == 0 &&
+ (typemask & s->sc_type))
refcount_inc(&s->sc_count);
else
s = NULL;
@@ -2487,9 +2632,9 @@ static struct nfs4_client *get_nfsdfs_clp(struct inode *inode)
static void seq_quote_mem(struct seq_file *m, char *data, int len)
{
- seq_printf(m, "\"");
+ seq_puts(m, "\"");
seq_escape_mem(m, data, len, ESCAPE_HEX | ESCAPE_NAP | ESCAPE_APPEND, "\"\\");
- seq_printf(m, "\"");
+ seq_puts(m, "\"");
}
static const char *cb_state2str(int state)
@@ -2530,20 +2675,22 @@ static int client_info_show(struct seq_file *m, void *v)
seq_puts(m, "status: unconfirmed\n");
seq_printf(m, "seconds from last renew: %lld\n",
ktime_get_boottime_seconds() - clp->cl_time);
- seq_printf(m, "name: ");
+ seq_puts(m, "name: ");
seq_quote_mem(m, clp->cl_name.data, clp->cl_name.len);
seq_printf(m, "\nminor version: %d\n", clp->cl_minorversion);
if (clp->cl_nii_domain.data) {
- seq_printf(m, "Implementation domain: ");
+ seq_puts(m, "Implementation domain: ");
seq_quote_mem(m, clp->cl_nii_domain.data,
clp->cl_nii_domain.len);
- seq_printf(m, "\nImplementation name: ");
+ seq_puts(m, "\nImplementation name: ");
seq_quote_mem(m, clp->cl_nii_name.data, clp->cl_nii_name.len);
seq_printf(m, "\nImplementation time: [%lld, %ld]\n",
clp->cl_nii_time.tv_sec, clp->cl_nii_time.tv_nsec);
}
seq_printf(m, "callback state: %s\n", cb_state2str(clp->cl_cb_state));
seq_printf(m, "callback address: %pISpc\n", &clp->cl_cb_conn.cb_addr);
+ seq_printf(m, "admin-revoked states: %d\n",
+ atomic_read(&clp->cl_admin_revoked));
drop_client(clp);
return 0;
@@ -2602,7 +2749,7 @@ static void nfs4_show_superblock(struct seq_file *s, struct nfsd_file *f)
static void nfs4_show_owner(struct seq_file *s, struct nfs4_stateowner *oo)
{
- seq_printf(s, "owner: ");
+ seq_puts(s, "owner: ");
seq_quote_mem(s, oo->so_owner.data, oo->so_owner.len);
}
@@ -2620,20 +2767,13 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
struct nfs4_stateowner *oo;
unsigned int access, deny;
- if (st->sc_type != NFS4_OPEN_STID && st->sc_type != NFS4_LOCK_STID)
- return 0; /* XXX: or SEQ_SKIP? */
ols = openlockstateid(st);
oo = ols->st_stateowner;
nf = st->sc_file;
- spin_lock(&nf->fi_lock);
- file = find_any_file_locked(nf);
- if (!file)
- goto out;
-
- seq_printf(s, "- ");
+ seq_puts(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
- seq_printf(s, ": { type: open, ");
+ seq_puts(s, ": { type: open, ");
access = bmap_to_share_mode(ols->st_access_bmap);
deny = bmap_to_share_mode(ols->st_deny_bmap);
@@ -2645,14 +2785,19 @@ static int nfs4_show_open(struct seq_file *s, struct nfs4_stid *st)
deny & NFS4_SHARE_ACCESS_READ ? "r" : "-",
deny & NFS4_SHARE_ACCESS_WRITE ? "w" : "-");
- nfs4_show_superblock(s, file);
- seq_printf(s, ", ");
- nfs4_show_fname(s, file);
- seq_printf(s, ", ");
- nfs4_show_owner(s, oo);
- seq_printf(s, " }\n");
-out:
+ spin_lock(&nf->fi_lock);
+ file = find_any_file_locked(nf);
+ if (file) {
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ seq_puts(s, ", ");
+ }
spin_unlock(&nf->fi_lock);
+ nfs4_show_owner(s, oo);
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
return 0;
}
@@ -2666,30 +2811,31 @@ static int nfs4_show_lock(struct seq_file *s, struct nfs4_stid *st)
ols = openlockstateid(st);
oo = ols->st_stateowner;
nf = st->sc_file;
- spin_lock(&nf->fi_lock);
- file = find_any_file_locked(nf);
- if (!file)
- goto out;
- seq_printf(s, "- ");
+ seq_puts(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
- seq_printf(s, ": { type: lock, ");
+ seq_puts(s, ": { type: lock, ");
- /*
- * Note: a lock stateid isn't really the same thing as a lock,
- * it's the locking state held by one owner on a file, and there
- * may be multiple (or no) lock ranges associated with it.
- * (Same for the matter is true of open stateids.)
- */
+ spin_lock(&nf->fi_lock);
+ file = find_any_file_locked(nf);
+ if (file) {
+ /*
+ * Note: a lock stateid isn't really the same thing as a lock,
+ * it's the locking state held by one owner on a file, and there
+ * may be multiple (or no) lock ranges associated with it.
+ * (Same for the matter is true of open stateids.)
+ */
- nfs4_show_superblock(s, file);
- /* XXX: open stateid? */
- seq_printf(s, ", ");
- nfs4_show_fname(s, file);
- seq_printf(s, ", ");
+ nfs4_show_superblock(s, file);
+ /* XXX: open stateid? */
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ seq_puts(s, ", ");
+ }
nfs4_show_owner(s, oo);
- seq_printf(s, " }\n");
-out:
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
spin_unlock(&nf->fi_lock);
return 0;
}
@@ -2702,27 +2848,28 @@ static int nfs4_show_deleg(struct seq_file *s, struct nfs4_stid *st)
ds = delegstateid(st);
nf = st->sc_file;
- spin_lock(&nf->fi_lock);
- file = nf->fi_deleg_file;
- if (!file)
- goto out;
- seq_printf(s, "- ");
+ seq_puts(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
- seq_printf(s, ": { type: deleg, ");
+ seq_puts(s, ": { type: deleg, ");
- /* Kinda dead code as long as we only support read delegs: */
- seq_printf(s, "access: %s, ",
- ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
+ seq_printf(s, "access: %s",
+ ds->dl_type == NFS4_OPEN_DELEGATE_READ ? "r" : "w");
/* XXX: lease time, whether it's being recalled. */
- nfs4_show_superblock(s, file);
- seq_printf(s, ", ");
- nfs4_show_fname(s, file);
- seq_printf(s, " }\n");
-out:
+ spin_lock(&nf->fi_lock);
+ file = nf->fi_deleg_file;
+ if (file) {
+ seq_puts(s, ", ");
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ }
spin_unlock(&nf->fi_lock);
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
return 0;
}
@@ -2732,18 +2879,25 @@ static int nfs4_show_layout(struct seq_file *s, struct nfs4_stid *st)
struct nfsd_file *file;
ls = container_of(st, struct nfs4_layout_stateid, ls_stid);
- file = ls->ls_file;
- seq_printf(s, "- ");
+ seq_puts(s, "- ");
nfs4_show_stateid(s, &st->sc_stateid);
- seq_printf(s, ": { type: layout, ");
+ seq_puts(s, ": { type: layout");
/* XXX: What else would be useful? */
- nfs4_show_superblock(s, file);
- seq_printf(s, ", ");
- nfs4_show_fname(s, file);
- seq_printf(s, " }\n");
+ spin_lock(&ls->ls_stid.sc_file->fi_lock);
+ file = ls->ls_file;
+ if (file) {
+ seq_puts(s, ", ");
+ nfs4_show_superblock(s, file);
+ seq_puts(s, ", ");
+ nfs4_show_fname(s, file);
+ }
+ spin_unlock(&ls->ls_stid.sc_file->fi_lock);
+ if (st->sc_status & SC_STATUS_ADMIN_REVOKED)
+ seq_puts(s, ", admin-revoked");
+ seq_puts(s, " }\n");
return 0;
}
@@ -2753,13 +2907,13 @@ static int states_show(struct seq_file *s, void *v)
struct nfs4_stid *st = v;
switch (st->sc_type) {
- case NFS4_OPEN_STID:
+ case SC_TYPE_OPEN:
return nfs4_show_open(s, st);
- case NFS4_LOCK_STID:
+ case SC_TYPE_LOCK:
return nfs4_show_lock(s, st);
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
return nfs4_show_deleg(s, st);
- case NFS4_LAYOUT_STID:
+ case SC_TYPE_LAYOUT:
return nfs4_show_layout(s, st);
default:
return 0; /* XXX: or SEQ_SKIP? */
@@ -2896,11 +3050,59 @@ nfsd4_cb_recall_any_release(struct nfsd4_callback *cb)
spin_unlock(&nn->client_lock);
}
+static int
+nfsd4_cb_getattr_done(struct nfsd4_callback *cb, struct rpc_task *task)
+{
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+
+ ncf->ncf_cb_status = task->tk_status;
+ switch (task->tk_status) {
+ case -NFS4ERR_DELAY:
+ rpc_delay(task, 2 * HZ);
+ return 0;
+ default:
+ return 1;
+ }
+}
+
+static void
+nfsd4_cb_getattr_release(struct nfsd4_callback *cb)
+{
+ struct nfs4_cb_fattr *ncf =
+ container_of(cb, struct nfs4_cb_fattr, ncf_getattr);
+ struct nfs4_delegation *dp =
+ container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
+
+ nfs4_put_stid(&dp->dl_stid);
+ clear_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags);
+ wake_up_bit(&ncf->ncf_cb_flags, CB_GETATTR_BUSY);
+}
+
static const struct nfsd4_callback_ops nfsd4_cb_recall_any_ops = {
.done = nfsd4_cb_recall_any_done,
.release = nfsd4_cb_recall_any_release,
};
+static const struct nfsd4_callback_ops nfsd4_cb_getattr_ops = {
+ .done = nfsd4_cb_getattr_done,
+ .release = nfsd4_cb_getattr_release,
+};
+
+static void nfs4_cb_getattr(struct nfs4_cb_fattr *ncf)
+{
+ struct nfs4_delegation *dp =
+ container_of(ncf, struct nfs4_delegation, dl_cb_fattr);
+
+ if (test_and_set_bit(CB_GETATTR_BUSY, &ncf->ncf_cb_flags))
+ return;
+ /* set to proper status when nfsd4_cb_getattr_done runs */
+ ncf->ncf_cb_status = NFS4ERR_IO;
+
+ refcount_inc(&dp->dl_stid.sc_count);
+ nfsd4_run_cb(&ncf->ncf_getattr);
+}
+
static struct nfs4_client *create_client(struct xdr_netobj name,
struct svc_rqst *rqstp, nfs4_verifier *verf)
{
@@ -3414,6 +3616,9 @@ out_new:
new->cl_spo_must_allow.u.words[0] = exid->spo_must_allow[0];
new->cl_spo_must_allow.u.words[1] = exid->spo_must_allow[1];
+ /* Contrived initial CREATE_SESSION response */
+ new->cl_cs_slot.sl_status = nfserr_seq_misordered;
+
add_to_unconfirmed(new);
swap(new, conf);
out_copy:
@@ -3584,10 +3789,10 @@ nfsd4_create_session(struct svc_rqst *rqstp,
struct nfsd4_create_session *cr_ses = &u->create_session;
struct sockaddr *sa = svc_addr(rqstp);
struct nfs4_client *conf, *unconf;
+ struct nfsd4_clid_slot *cs_slot;
struct nfs4_client *old = NULL;
struct nfsd4_session *new;
struct nfsd4_conn *conn;
- struct nfsd4_clid_slot *cs_slot = NULL;
__be32 status = 0;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
@@ -3611,53 +3816,60 @@ nfsd4_create_session(struct svc_rqst *rqstp,
goto out_free_session;
spin_lock(&nn->client_lock);
+
+ /* RFC 8881 Section 18.36.4 Phase 1: Client record look-up. */
unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
conf = find_confirmed_client(&cr_ses->clientid, true, nn);
- WARN_ON_ONCE(conf && unconf);
+ if (!conf && !unconf) {
+ status = nfserr_stale_clientid;
+ goto out_free_conn;
+ }
- if (conf) {
- status = nfserr_wrong_cred;
- if (!nfsd4_mach_creds_match(conf, rqstp))
- goto out_free_conn;
+ /* RFC 8881 Section 18.36.4 Phase 2: Sequence ID processing. */
+ if (conf)
cs_slot = &conf->cl_cs_slot;
- status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
- if (status) {
- if (status == nfserr_replay_cache)
- status = nfsd4_replay_create_session(cr_ses, cs_slot);
+ else
+ cs_slot = &unconf->cl_cs_slot;
+ status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
+ if (status) {
+ if (status == nfserr_replay_cache) {
+ status = nfsd4_replay_create_session(cr_ses, cs_slot);
goto out_free_conn;
}
- } else if (unconf) {
+ goto out_cache_error;
+ }
+ cs_slot->sl_seqid++;
+ cr_ses->seqid = cs_slot->sl_seqid;
+
+ /* RFC 8881 Section 18.36.4 Phase 3: Client ID confirmation. */
+ if (conf) {
+ status = nfserr_wrong_cred;
+ if (!nfsd4_mach_creds_match(conf, rqstp))
+ goto out_cache_error;
+ } else {
status = nfserr_clid_inuse;
if (!same_creds(&unconf->cl_cred, &rqstp->rq_cred) ||
!rpc_cmp_addr(sa, (struct sockaddr *) &unconf->cl_addr)) {
trace_nfsd_clid_cred_mismatch(unconf, rqstp);
- goto out_free_conn;
+ goto out_cache_error;
}
status = nfserr_wrong_cred;
if (!nfsd4_mach_creds_match(unconf, rqstp))
- goto out_free_conn;
- cs_slot = &unconf->cl_cs_slot;
- status = check_slot_seqid(cr_ses->seqid, cs_slot->sl_seqid, 0);
- if (status) {
- /* an unconfirmed replay returns misordered */
- status = nfserr_seq_misordered;
- goto out_free_conn;
- }
+ goto out_cache_error;
old = find_confirmed_client_by_name(&unconf->cl_name, nn);
if (old) {
status = mark_client_expired_locked(old);
if (status) {
old = NULL;
- goto out_free_conn;
+ goto out_cache_error;
}
trace_nfsd_clid_replaced(&old->cl_clientid);
}
move_to_confirmed(unconf);
conf = unconf;
- } else {
- status = nfserr_stale_clientid;
- goto out_free_conn;
}
+
+ /* RFC 8881 Section 18.36.4 Phase 4: Session creation. */
status = nfs_ok;
/* Persistent sessions are not supported */
cr_ses->flags &= ~SESSION4_PERSIST;
@@ -3669,8 +3881,6 @@ nfsd4_create_session(struct svc_rqst *rqstp,
memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
NFS4_MAX_SESSIONID_LEN);
- cs_slot->sl_seqid++;
- cr_ses->seqid = cs_slot->sl_seqid;
/* cache solo and embedded create sessions under the client_lock */
nfsd4_cache_create_session(cr_ses, cs_slot, status);
@@ -3683,6 +3893,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
if (old)
expire_client(old);
return status;
+
+out_cache_error:
+ nfsd4_cache_create_session(cr_ses, cs_slot, status);
out_free_conn:
spin_unlock(&nn->client_lock);
free_conn(conn);
@@ -4058,6 +4271,9 @@ out:
}
if (!list_empty(&clp->cl_revoked))
seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
+ if (atomic_read(&clp->cl_admin_revoked))
+ seq->status_flags |= SEQ4_STATUS_ADMIN_STATE_REVOKED;
+ trace_nfsd_seq4_status(rqstp, seq);
out_no_session:
if (conn)
free_conn(conn);
@@ -4352,32 +4568,25 @@ nfsd4_free_slabs(void)
int
nfsd4_init_slabs(void)
{
- client_slab = kmem_cache_create("nfsd4_clients",
- sizeof(struct nfs4_client), 0, 0, NULL);
+ client_slab = KMEM_CACHE(nfs4_client, 0);
if (client_slab == NULL)
goto out;
- openowner_slab = kmem_cache_create("nfsd4_openowners",
- sizeof(struct nfs4_openowner), 0, 0, NULL);
+ openowner_slab = KMEM_CACHE(nfs4_openowner, 0);
if (openowner_slab == NULL)
goto out_free_client_slab;
- lockowner_slab = kmem_cache_create("nfsd4_lockowners",
- sizeof(struct nfs4_lockowner), 0, 0, NULL);
+ lockowner_slab = KMEM_CACHE(nfs4_lockowner, 0);
if (lockowner_slab == NULL)
goto out_free_openowner_slab;
- file_slab = kmem_cache_create("nfsd4_files",
- sizeof(struct nfs4_file), 0, 0, NULL);
+ file_slab = KMEM_CACHE(nfs4_file, 0);
if (file_slab == NULL)
goto out_free_lockowner_slab;
- stateid_slab = kmem_cache_create("nfsd4_stateids",
- sizeof(struct nfs4_ol_stateid), 0, 0, NULL);
+ stateid_slab = KMEM_CACHE(nfs4_ol_stateid, 0);
if (stateid_slab == NULL)
goto out_free_file_slab;
- deleg_slab = kmem_cache_create("nfsd4_delegations",
- sizeof(struct nfs4_delegation), 0, 0, NULL);
+ deleg_slab = KMEM_CACHE(nfs4_delegation, 0);
if (deleg_slab == NULL)
goto out_free_stateid_slab;
- odstate_slab = kmem_cache_create("nfsd4_odstate",
- sizeof(struct nfs4_clnt_odstate), 0, 0, NULL);
+ odstate_slab = KMEM_CACHE(nfs4_clnt_odstate, 0);
if (odstate_slab == NULL)
goto out_free_deleg_slab;
return 0;
@@ -4531,7 +4740,8 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
continue;
if (local->st_stateowner != &oo->oo_owner)
continue;
- if (local->st_stid.sc_type == NFS4_OPEN_STID) {
+ if (local->st_stid.sc_type == SC_TYPE_OPEN &&
+ !local->st_stid.sc_status) {
ret = local;
refcount_inc(&ret->st_stid.sc_count);
break;
@@ -4540,22 +4750,75 @@ nfsd4_find_existing_open(struct nfs4_file *fp, struct nfsd4_open *open)
return ret;
}
-static __be32
-nfsd4_verify_open_stid(struct nfs4_stid *s)
+static void nfsd4_drop_revoked_stid(struct nfs4_stid *s)
+ __releases(&s->sc_client->cl_lock)
{
- __be32 ret = nfs_ok;
+ struct nfs4_client *cl = s->sc_client;
+ LIST_HEAD(reaplist);
+ struct nfs4_ol_stateid *stp;
+ struct nfs4_delegation *dp;
+ bool unhashed;
switch (s->sc_type) {
- default:
+ case SC_TYPE_OPEN:
+ stp = openlockstateid(s);
+ if (unhash_open_stateid(stp, &reaplist))
+ put_ol_stateid_locked(stp, &reaplist);
+ spin_unlock(&cl->cl_lock);
+ free_ol_stateid_reaplist(&reaplist);
break;
- case 0:
- case NFS4_CLOSED_STID:
- case NFS4_CLOSED_DELEG_STID:
- ret = nfserr_bad_stateid;
+ case SC_TYPE_LOCK:
+ stp = openlockstateid(s);
+ unhashed = unhash_lock_stateid(stp);
+ spin_unlock(&cl->cl_lock);
+ if (unhashed)
+ nfs4_put_stid(s);
break;
- case NFS4_REVOKED_DELEG_STID:
- ret = nfserr_deleg_revoked;
+ case SC_TYPE_DELEG:
+ dp = delegstateid(s);
+ list_del_init(&dp->dl_recall_lru);
+ spin_unlock(&cl->cl_lock);
+ nfs4_put_stid(s);
+ break;
+ default:
+ spin_unlock(&cl->cl_lock);
}
+}
+
+static void nfsd40_drop_revoked_stid(struct nfs4_client *cl,
+ stateid_t *stid)
+{
+ /* NFSv4.0 has no way for the client to tell the server
+ * that it can forget an admin-revoked stateid.
+ * So we keep it around until the first time that the
+ * client uses it, and drop it the first time
+ * nfserr_admin_revoked is returned.
+ * For v4.1 and later we wait until explicitly told
+ * to free the stateid.
+ */
+ if (cl->cl_minorversion == 0) {
+ struct nfs4_stid *st;
+
+ spin_lock(&cl->cl_lock);
+ st = find_stateid_locked(cl, stid);
+ if (st)
+ nfsd4_drop_revoked_stid(st);
+ else
+ spin_unlock(&cl->cl_lock);
+ }
+}
+
+static __be32
+nfsd4_verify_open_stid(struct nfs4_stid *s)
+{
+ __be32 ret = nfs_ok;
+
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED)
+ ret = nfserr_admin_revoked;
+ else if (s->sc_status & SC_STATUS_REVOKED)
+ ret = nfserr_deleg_revoked;
+ else if (s->sc_status & SC_STATUS_CLOSED)
+ ret = nfserr_bad_stateid;
return ret;
}
@@ -4567,6 +4830,10 @@ nfsd4_lock_ol_stateid(struct nfs4_ol_stateid *stp)
mutex_lock_nested(&stp->st_mutex, LOCK_STATEID_MUTEX);
ret = nfsd4_verify_open_stid(&stp->st_stid);
+ if (ret == nfserr_admin_revoked)
+ nfsd40_drop_revoked_stid(stp->st_stid.sc_client,
+ &stp->st_stid.sc_stateid);
+
if (ret != nfs_ok)
mutex_unlock(&stp->st_mutex);
return ret;
@@ -4641,7 +4908,7 @@ retry:
open->op_stp = NULL;
refcount_inc(&stp->st_stid.sc_count);
- stp->st_stid.sc_type = NFS4_OPEN_STID;
+ stp->st_stid.sc_type = SC_TYPE_OPEN;
INIT_LIST_HEAD(&stp->st_locks);
stp->st_stateowner = nfs4_get_stateowner(&oo->oo_owner);
get_nfs4_file(fp);
@@ -4868,9 +5135,9 @@ static int nfsd4_cb_recall_done(struct nfsd4_callback *cb,
trace_nfsd_cb_recall_done(&dp->dl_stid.sc_stateid, task);
- if (dp->dl_stid.sc_type == NFS4_CLOSED_DELEG_STID ||
- dp->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID)
- return 1;
+ if (dp->dl_stid.sc_status)
+ /* CLOSED or REVOKED */
+ return 1;
switch (task->tk_status) {
case 0:
@@ -5113,12 +5380,12 @@ static int share_access_to_flags(u32 share_access)
return share_access == NFS4_SHARE_ACCESS_READ ? RD_STATE : WR_STATE;
}
-static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl, stateid_t *s)
+static struct nfs4_delegation *find_deleg_stateid(struct nfs4_client *cl,
+ stateid_t *s)
{
struct nfs4_stid *ret;
- ret = find_stateid_by_type(cl, s,
- NFS4_DELEG_STID|NFS4_REVOKED_DELEG_STID);
+ ret = find_stateid_by_type(cl, s, SC_TYPE_DELEG, SC_STATUS_REVOKED);
if (!ret)
return NULL;
return delegstateid(ret);
@@ -5141,10 +5408,15 @@ nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
deleg = find_deleg_stateid(cl, &open->op_delegate_stateid);
if (deleg == NULL)
goto out;
- if (deleg->dl_stid.sc_type == NFS4_REVOKED_DELEG_STID) {
+ if (deleg->dl_stid.sc_status & SC_STATUS_ADMIN_REVOKED) {
nfs4_put_stid(&deleg->dl_stid);
- if (cl->cl_minorversion)
- status = nfserr_deleg_revoked;
+ status = nfserr_admin_revoked;
+ goto out;
+ }
+ if (deleg->dl_stid.sc_status & SC_STATUS_REVOKED) {
+ nfs4_put_stid(&deleg->dl_stid);
+ nfsd40_drop_revoked_stid(cl, &open->op_delegate_stateid);
+ status = nfserr_deleg_revoked;
goto out;
}
flags = share_access_to_flags(open->op_share_access);
@@ -5189,7 +5461,7 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
return 0;
if (!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
return nfserr_inval;
- return nfsd_setattr(rqstp, fh, &attrs, 0, (time64_t)0);
+ return nfsd_setattr(rqstp, fh, &attrs, NULL);
}
static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp,
@@ -5560,9 +5832,11 @@ nfs4_set_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
goto out_unlock;
spin_lock(&state_lock);
+ spin_lock(&clp->cl_lock);
spin_lock(&fp->fi_lock);
status = hash_delegation_locked(dp, fp);
spin_unlock(&fp->fi_lock);
+ spin_unlock(&clp->cl_lock);
spin_unlock(&state_lock);
if (status)
@@ -5634,6 +5908,8 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
struct svc_fh *parent = NULL;
int cb_up;
int status = 0;
+ struct kstat stat;
+ struct path path;
cb_up = nfsd4_cb_channel_good(oo->oo_owner.so_client);
open->op_recall = false;
@@ -5671,6 +5947,18 @@ nfs4_open_delegation(struct nfsd4_open *open, struct nfs4_ol_stateid *stp,
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) {
open->op_delegate_type = NFS4_OPEN_DELEGATE_WRITE;
trace_nfsd_deleg_write(&dp->dl_stid.sc_stateid);
+ path.mnt = currentfh->fh_export->ex_path.mnt;
+ path.dentry = currentfh->fh_dentry;
+ if (vfs_getattr(&path, &stat,
+ (STATX_SIZE | STATX_CTIME | STATX_CHANGE_COOKIE),
+ AT_STATX_SYNC_AS_STAT)) {
+ nfs4_put_stid(&dp->dl_stid);
+ destroy_delegation(dp);
+ goto out_no_deleg;
+ }
+ dp->dl_cb_fattr.ncf_cur_fsize = stat.size;
+ dp->dl_cb_fattr.ncf_initial_cinfo =
+ nfsd4_change_attribute(&stat, d_inode(currentfh->fh_dentry));
} else {
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
trace_nfsd_deleg_read(&dp->dl_stid.sc_stateid);
@@ -5774,7 +6062,6 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
} else {
status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open, true);
if (status) {
- stp->st_stid.sc_type = NFS4_CLOSED_STID;
release_open_stateid(stp);
mutex_unlock(&stp->st_mutex);
goto out;
@@ -6128,6 +6415,43 @@ nfs4_process_client_reaplist(struct list_head *reaplist)
}
}
+static void nfs40_clean_admin_revoked(struct nfsd_net *nn,
+ struct laundry_time *lt)
+{
+ struct nfs4_client *clp;
+
+ spin_lock(&nn->client_lock);
+ if (nn->nfs40_last_revoke == 0 ||
+ nn->nfs40_last_revoke > lt->cutoff) {
+ spin_unlock(&nn->client_lock);
+ return;
+ }
+ nn->nfs40_last_revoke = 0;
+
+retry:
+ list_for_each_entry(clp, &nn->client_lru, cl_lru) {
+ unsigned long id, tmp;
+ struct nfs4_stid *stid;
+
+ if (atomic_read(&clp->cl_admin_revoked) == 0)
+ continue;
+
+ spin_lock(&clp->cl_lock);
+ idr_for_each_entry_ul(&clp->cl_stateids, stid, tmp, id)
+ if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) {
+ refcount_inc(&stid->sc_count);
+ spin_unlock(&nn->client_lock);
+ /* this function drops ->cl_lock */
+ nfsd4_drop_revoked_stid(stid);
+ nfs4_put_stid(stid);
+ spin_lock(&nn->client_lock);
+ goto retry;
+ }
+ spin_unlock(&clp->cl_lock);
+ }
+ spin_unlock(&nn->client_lock);
+}
+
static time64_t
nfs4_laundromat(struct nfsd_net *nn)
{
@@ -6161,12 +6485,14 @@ nfs4_laundromat(struct nfsd_net *nn)
nfs4_get_client_reaplist(nn, &reaplist, &lt);
nfs4_process_client_reaplist(&reaplist);
+ nfs40_clean_admin_revoked(nn, &lt);
+
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
if (!state_expired(&lt, dp->dl_time))
break;
- WARN_ON(!unhash_delegation_locked(dp));
+ unhash_delegation_locked(dp, SC_STATUS_REVOKED);
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -6225,6 +6551,8 @@ nfs4_laundromat(struct nfsd_net *nn)
/* service the server-to-server copy delayed unmount list */
nfsd4_ssc_expire_umount(nn);
#endif
+ if (atomic_long_read(&num_delegations) >= max_delegations)
+ deleg_reaper(nn);
out:
return max_t(time64_t, lt.new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
}
@@ -6286,6 +6614,8 @@ deleg_reaper(struct nfsd_net *nn)
list_del_init(&clp->cl_ra_cblist);
clp->cl_ra->ra_keep = 0;
clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG);
+ clp->cl_ra->ra_bmval[0] = BIT(RCA4_TYPE_MASK_RDATA_DLG) |
+ BIT(RCA4_TYPE_MASK_WDATA_DLG);
trace_nfsd_cb_recall_any(clp->cl_ra);
nfsd4_run_cb(&clp->cl_ra->ra_cb);
}
@@ -6379,6 +6709,9 @@ static __be32 nfsd4_stid_check_stateid_generation(stateid_t *in, struct nfs4_sti
if (ret == nfs_ok)
ret = check_stateid_generation(in, &s->sc_stateid, has_session);
spin_unlock(&s->sc_lock);
+ if (ret == nfserr_admin_revoked)
+ nfsd40_drop_revoked_stid(s->sc_client,
+ &s->sc_stateid);
return ret;
}
@@ -6405,32 +6738,33 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
status = nfsd4_stid_check_stateid_generation(stateid, s, 1);
if (status)
goto out_unlock;
+ status = nfsd4_verify_open_stid(s);
+ if (status)
+ goto out_unlock;
+
switch (s->sc_type) {
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
status = nfs_ok;
break;
- case NFS4_REVOKED_DELEG_STID:
- status = nfserr_deleg_revoked;
- break;
- case NFS4_OPEN_STID:
- case NFS4_LOCK_STID:
+ case SC_TYPE_OPEN:
+ case SC_TYPE_LOCK:
status = nfsd4_check_openowner_confirmed(openlockstateid(s));
break;
default:
printk("unknown stateid type %x\n", s->sc_type);
- fallthrough;
- case NFS4_CLOSED_STID:
- case NFS4_CLOSED_DELEG_STID:
status = nfserr_bad_stateid;
}
out_unlock:
spin_unlock(&cl->cl_lock);
+ if (status == nfserr_admin_revoked)
+ nfsd40_drop_revoked_stid(cl, stateid);
return status;
}
__be32
nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
- stateid_t *stateid, unsigned char typemask,
+ stateid_t *stateid,
+ unsigned short typemask, unsigned short statusmask,
struct nfs4_stid **s, struct nfsd_net *nn)
{
__be32 status;
@@ -6441,10 +6775,15 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
* only return revoked delegations if explicitly asked.
* otherwise we report revoked or bad_stateid status.
*/
- if (typemask & NFS4_REVOKED_DELEG_STID)
+ if (statusmask & SC_STATUS_REVOKED)
return_revoked = true;
- else if (typemask & NFS4_DELEG_STID)
- typemask |= NFS4_REVOKED_DELEG_STID;
+ if (typemask & SC_TYPE_DELEG)
+ /* Always allow REVOKED for DELEG so we can
+ * retturn the appropriate error.
+ */
+ statusmask |= SC_STATUS_REVOKED;
+
+ statusmask |= SC_STATUS_ADMIN_REVOKED;
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid) ||
CLOSE_STATEID(stateid))
@@ -6457,14 +6796,17 @@ nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
}
if (status)
return status;
- stid = find_stateid_by_type(cstate->clp, stateid, typemask);
+ stid = find_stateid_by_type(cstate->clp, stateid, typemask, statusmask);
if (!stid)
return nfserr_bad_stateid;
- if ((stid->sc_type == NFS4_REVOKED_DELEG_STID) && !return_revoked) {
+ if ((stid->sc_status & SC_STATUS_REVOKED) && !return_revoked) {
nfs4_put_stid(stid);
- if (cstate->minorversion)
- return nfserr_deleg_revoked;
- return nfserr_bad_stateid;
+ return nfserr_deleg_revoked;
+ }
+ if (stid->sc_status & SC_STATUS_ADMIN_REVOKED) {
+ nfsd40_drop_revoked_stid(cstate->clp, stateid);
+ nfs4_put_stid(stid);
+ return nfserr_admin_revoked;
}
*s = stid;
return nfs_ok;
@@ -6475,17 +6817,17 @@ nfs4_find_file(struct nfs4_stid *s, int flags)
{
struct nfsd_file *ret = NULL;
- if (!s)
+ if (!s || s->sc_status)
return NULL;
switch (s->sc_type) {
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
spin_lock(&s->sc_file->fi_lock);
ret = nfsd_file_get(s->sc_file->fi_deleg_file);
spin_unlock(&s->sc_file->fi_lock);
break;
- case NFS4_OPEN_STID:
- case NFS4_LOCK_STID:
+ case SC_TYPE_OPEN:
+ case SC_TYPE_LOCK:
if (flags & RD_STATE)
ret = find_readable_file(s->sc_file);
else
@@ -6598,7 +6940,8 @@ static __be32 find_cpntf_state(struct nfsd_net *nn, stateid_t *st,
goto out;
*stid = find_stateid_by_type(found, &cps->cp_p_stateid,
- NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID);
+ SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK,
+ 0);
if (*stid)
status = nfs_ok;
else
@@ -6655,8 +6998,8 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
}
status = nfsd4_lookup_stateid(cstate, stateid,
- NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
- &s, nn);
+ SC_TYPE_DELEG|SC_TYPE_OPEN|SC_TYPE_LOCK,
+ 0, &s, nn);
if (status == nfserr_bad_stateid)
status = find_cpntf_state(nn, stateid, &s);
if (status)
@@ -6667,16 +7010,13 @@ nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
goto out;
switch (s->sc_type) {
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
status = nfs4_check_delegmode(delegstateid(s), flags);
break;
- case NFS4_OPEN_STID:
- case NFS4_LOCK_STID:
+ case SC_TYPE_OPEN:
+ case SC_TYPE_LOCK:
status = nfs4_check_olstateid(openlockstateid(s), flags);
break;
- default:
- status = nfserr_bad_stateid;
- break;
}
if (status)
goto out;
@@ -6755,34 +7095,39 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
spin_lock(&cl->cl_lock);
s = find_stateid_locked(cl, stateid);
- if (!s)
+ if (!s || s->sc_status & SC_STATUS_CLOSED)
goto out_unlock;
+ if (s->sc_status & SC_STATUS_ADMIN_REVOKED) {
+ nfsd4_drop_revoked_stid(s);
+ ret = nfs_ok;
+ goto out;
+ }
spin_lock(&s->sc_lock);
switch (s->sc_type) {
- case NFS4_DELEG_STID:
+ case SC_TYPE_DELEG:
+ if (s->sc_status & SC_STATUS_REVOKED) {
+ spin_unlock(&s->sc_lock);
+ dp = delegstateid(s);
+ list_del_init(&dp->dl_recall_lru);
+ spin_unlock(&cl->cl_lock);
+ nfs4_put_stid(s);
+ ret = nfs_ok;
+ goto out;
+ }
ret = nfserr_locks_held;
break;
- case NFS4_OPEN_STID:
+ case SC_TYPE_OPEN:
ret = check_stateid_generation(stateid, &s->sc_stateid, 1);
if (ret)
break;
ret = nfserr_locks_held;
break;
- case NFS4_LOCK_STID:
+ case SC_TYPE_LOCK:
spin_unlock(&s->sc_lock);
refcount_inc(&s->sc_count);
spin_unlock(&cl->cl_lock);
ret = nfsd4_free_lock_stateid(stateid, s);
goto out;
- case NFS4_REVOKED_DELEG_STID:
- spin_unlock(&s->sc_lock);
- dp = delegstateid(s);
- list_del_init(&dp->dl_recall_lru);
- spin_unlock(&cl->cl_lock);
- nfs4_put_stid(s);
- ret = nfs_ok;
- goto out;
- /* Default falls through and returns nfserr_bad_stateid */
}
spin_unlock(&s->sc_lock);
out_unlock:
@@ -6824,6 +7169,7 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
* @seqid: seqid (provided by client)
* @stateid: stateid (provided by client)
* @typemask: mask of allowable types for this operation
+ * @statusmask: mask of allowed states: 0 or STID_CLOSED
* @stpp: return pointer for the stateid found
* @nn: net namespace for request
*
@@ -6833,7 +7179,8 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
*/
static __be32
nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
- stateid_t *stateid, char typemask,
+ stateid_t *stateid,
+ unsigned short typemask, unsigned short statusmask,
struct nfs4_ol_stateid **stpp,
struct nfsd_net *nn)
{
@@ -6844,7 +7191,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
trace_nfsd_preprocess(seqid, stateid);
*stpp = NULL;
- status = nfsd4_lookup_stateid(cstate, stateid, typemask, &s, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid,
+ typemask, statusmask, &s, nn);
if (status)
return status;
stp = openlockstateid(s);
@@ -6866,7 +7214,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
struct nfs4_ol_stateid *stp;
status = nfs4_preprocess_seqid_op(cstate, seqid, stateid,
- NFS4_OPEN_STID, &stp, nn);
+ SC_TYPE_OPEN, 0, &stp, nn);
if (status)
return status;
oo = openowner(stp->st_stateowner);
@@ -6897,8 +7245,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return status;
status = nfs4_preprocess_seqid_op(cstate,
- oc->oc_seqid, &oc->oc_req_stateid,
- NFS4_OPEN_STID, &stp, nn);
+ oc->oc_seqid, &oc->oc_req_stateid,
+ SC_TYPE_OPEN, 0, &stp, nn);
if (status)
goto out;
oo = openowner(stp->st_stateowner);
@@ -7028,18 +7376,20 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- dprintk("NFSD: nfsd4_close on file %pd\n",
+ dprintk("NFSD: nfsd4_close on file %pd\n",
cstate->current_fh.fh_dentry);
status = nfs4_preprocess_seqid_op(cstate, close->cl_seqid,
- &close->cl_stateid,
- NFS4_OPEN_STID|NFS4_CLOSED_STID,
- &stp, nn);
+ &close->cl_stateid,
+ SC_TYPE_OPEN, SC_STATUS_CLOSED,
+ &stp, nn);
nfsd4_bump_seqid(cstate, status);
if (status)
- goto out;
+ goto out;
- stp->st_stid.sc_type = NFS4_CLOSED_STID;
+ spin_lock(&stp->st_stid.sc_client->cl_lock);
+ stp->st_stid.sc_status |= SC_STATUS_CLOSED;
+ spin_unlock(&stp->st_stid.sc_client->cl_lock);
/*
* Technically we don't _really_ have to increment or copy it, since
@@ -7081,7 +7431,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if ((status = fh_verify(rqstp, &cstate->current_fh, S_IFREG, 0)))
return status;
- status = nfsd4_lookup_stateid(cstate, stateid, NFS4_DELEG_STID, &s, nn);
+ status = nfsd4_lookup_stateid(cstate, stateid, SC_TYPE_DELEG, 0, &s, nn);
if (status)
goto out;
dp = delegstateid(s);
@@ -7348,7 +7698,7 @@ retry:
if (retstp)
goto out_found;
refcount_inc(&stp->st_stid.sc_count);
- stp->st_stid.sc_type = NFS4_LOCK_STID;
+ stp->st_stid.sc_type = SC_TYPE_LOCK;
stp->st_stateowner = nfs4_get_stateowner(&lo->lo_owner);
get_nfs4_file(fp);
stp->st_stid.sc_file = fp;
@@ -7535,9 +7885,10 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
&lock_stp, &new);
} else {
status = nfs4_preprocess_seqid_op(cstate,
- lock->lk_old_lock_seqid,
- &lock->lk_old_lock_stateid,
- NFS4_LOCK_STID, &lock_stp, nn);
+ lock->lk_old_lock_seqid,
+ &lock->lk_old_lock_stateid,
+ SC_TYPE_LOCK, 0, &lock_stp,
+ nn);
}
if (status)
goto out;
@@ -7850,8 +8201,8 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_inval;
status = nfs4_preprocess_seqid_op(cstate, locku->lu_seqid,
- &locku->lu_stateid, NFS4_LOCK_STID,
- &stp, nn);
+ &locku->lu_stateid, SC_TYPE_LOCK, 0,
+ &stp, nn);
if (status)
goto out;
nf = find_any_file(stp->st_stid.sc_file);
@@ -7996,7 +8347,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
stp = list_first_entry(&lo->lo_owner.so_stateids,
struct nfs4_ol_stateid,
st_perstateowner);
- WARN_ON(!unhash_lock_stateid(stp));
+ unhash_lock_stateid(stp);
put_ol_stateid_locked(stp, &reaplist);
}
spin_unlock(&clp->cl_lock);
@@ -8289,7 +8640,7 @@ nfs4_state_shutdown_net(struct net *net)
spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
- WARN_ON(!unhash_delegation_locked(dp));
+ unhash_delegation_locked(dp, SC_STATUS_CLOSED);
list_add(&dp->dl_recall_lru, &reaplist);
}
spin_unlock(&state_lock);
@@ -8431,6 +8782,8 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* nfsd4_deleg_getattr_conflict - Recall if GETATTR causes conflict
* @rqstp: RPC transaction context
* @inode: file to be checked for a conflict
+ * @modified: return true if file was modified
+ * @size: new size of file if modified is true
*
* This function is called when there is a conflict between a write
* delegation and a change/size GETATTR from another client. The server
@@ -8439,21 +8792,22 @@ nfsd4_get_writestateid(struct nfsd4_compound_state *cstate,
* delegation before replying to the GETATTR. See RFC 8881 section
* 18.7.4.
*
- * The current implementation does not support CB_GETATTR yet. However
- * this can avoid recalling the delegation could be added in follow up
- * work.
- *
* Returns 0 if there is no conflict; otherwise an nfs_stat
* code is returned.
*/
__be32
-nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
+nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode,
+ bool *modified, u64 *size)
{
__be32 status;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct file_lock_context *ctx;
struct file_lease *fl;
struct nfs4_delegation *dp;
+ struct iattr attrs;
+ struct nfs4_cb_fattr *ncf;
+ *modified = false;
ctx = locks_inode_context(inode);
if (!ctx)
return 0;
@@ -8480,12 +8834,38 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode)
return 0;
}
break_lease:
+ nfsd_stats_wdeleg_getattr_inc(nn);
+ dp = fl->c.flc_owner;
+ ncf = &dp->dl_cb_fattr;
+ nfs4_cb_getattr(&dp->dl_cb_fattr);
spin_unlock(&ctx->flc_lock);
- nfsd_stats_wdeleg_getattr_inc();
- status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
- if (status != nfserr_jukebox ||
- !nfsd_wait_for_delegreturn(rqstp, inode))
- return status;
+ wait_on_bit_timeout(&ncf->ncf_cb_flags, CB_GETATTR_BUSY,
+ TASK_INTERRUPTIBLE, NFSD_CB_GETATTR_TIMEOUT);
+ if (ncf->ncf_cb_status) {
+ /* Recall delegation only if client didn't respond */
+ status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ));
+ if (status != nfserr_jukebox ||
+ !nfsd_wait_for_delegreturn(rqstp, inode))
+ return status;
+ }
+ if (!ncf->ncf_file_modified &&
+ (ncf->ncf_initial_cinfo != ncf->ncf_cb_change ||
+ ncf->ncf_cur_fsize != ncf->ncf_cb_fsize))
+ ncf->ncf_file_modified = true;
+ if (ncf->ncf_file_modified) {
+ /*
+ * Per section 10.4.3 of RFC 8881, the server would
+ * not update the file's metadata with the client's
+ * modified size
+ */
+ attrs.ia_mtime = attrs.ia_ctime = current_time(inode);
+ attrs.ia_valid = ATTR_MTIME | ATTR_CTIME;
+ setattr_copy(&nop_mnt_idmap, inode, &attrs);
+ mark_inode_dirty(inode);
+ ncf->ncf_cur_fsize = ncf->ncf_cb_fsize;
+ *size = ncf->ncf_cur_fsize;
+ *modified = true;
+ }
return 0;
}
break;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c719c475a068..fac938f563ad 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3507,6 +3507,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
unsigned long mask[2];
} u;
unsigned long bit;
+ bool file_modified = false;
+ u64 size = 0;
WARN_ON_ONCE(bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1);
WARN_ON_ONCE(!nfsd_attrs_supported(minorversion, bmval));
@@ -3533,7 +3535,8 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
}
args.size = 0;
if (u.attrmask[0] & (FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE)) {
- status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry));
+ status = nfsd4_deleg_getattr_conflict(rqstp, d_inode(dentry),
+ &file_modified, &size);
if (status)
goto out;
}
@@ -3543,7 +3546,10 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr,
AT_STATX_SYNC_AS_STAT);
if (err)
goto out_nfserr;
- args.size = args.stat.size;
+ if (file_modified)
+ args.size = size;
+ else
+ args.size = args.stat.size;
if (!(args.stat.result_mask & STATX_BTIME))
/* underlying FS does not offer btime so we can't share it */
@@ -5386,16 +5392,11 @@ nfsd4_listxattr_validate_cookie(struct nfsd4_listxattrs *listxattrs,
/*
* If the cookie is larger than the maximum number we can fit
- * in either the buffer we just got back from vfs_listxattr, or,
- * XDR-encoded, in the return buffer, it's invalid.
+ * in the buffer we just got back from vfs_listxattr, it's invalid.
*/
if (cookie > (listxattrs->lsxa_len) / (XATTR_USER_PREFIX_LEN + 2))
return nfserr_badcookie;
- if (cookie > (listxattrs->lsxa_maxcount /
- (XDR_QUADLEN(XATTR_USER_PREFIX_LEN + 2) + 4)))
- return nfserr_badcookie;
-
*offsetp = (u32)cookie;
return 0;
}
@@ -5412,6 +5413,7 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
u64 cookie;
char *sp;
__be32 status, tmp;
+ __be64 wire_cookie;
__be32 *p;
u32 nuser;
@@ -5427,7 +5429,7 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
*/
cookie_offset = xdr->buf->len;
count_offset = cookie_offset + 8;
- p = xdr_reserve_space(xdr, 12);
+ p = xdr_reserve_space(xdr, XDR_UNIT * 3);
if (!p) {
status = nfserr_resource;
goto out;
@@ -5438,7 +5440,8 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
sp = listxattrs->lsxa_buf;
nuser = 0;
- xdrleft = listxattrs->lsxa_maxcount;
+ /* Bytes left is maxcount - 8 (cookie) - 4 (array count) */
+ xdrleft = listxattrs->lsxa_maxcount - XDR_UNIT * 3;
while (left > 0 && xdrleft > 0) {
slen = strlen(sp);
@@ -5451,7 +5454,8 @@ nfsd4_encode_listxattrs(struct nfsd4_compoundres *resp, __be32 nfserr,
slen -= XATTR_USER_PREFIX_LEN;
xdrlen = 4 + ((slen + 3) & ~3);
- if (xdrlen > xdrleft) {
+ /* Check if both entry and eof can fit in the XDR buffer */
+ if (xdrlen + XDR_UNIT > xdrleft) {
if (count == 0) {
/*
* Can't even fit the first attribute name.
@@ -5503,7 +5507,8 @@ wreof:
cookie = offset + count;
- write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &cookie, 8);
+ wire_cookie = cpu_to_be64(cookie);
+ write_bytes_to_xdr_buf(xdr->buf, cookie_offset, &wire_cookie, 8);
tmp = cpu_to_be32(count);
write_bytes_to_xdr_buf(xdr->buf, count_offset, &tmp, 4);
out:
@@ -5727,27 +5732,24 @@ release:
rqstp->rq_next_page = xdr->page_ptr + 1;
}
-/*
- * Encode the reply stored in the stateowner reply cache
- *
- * XDR note: do not encode rp->rp_buflen: the buffer contains the
- * previously sent already encoded operation.
+/**
+ * nfsd4_encode_replay - encode a result stored in the stateowner reply cache
+ * @xdr: send buffer's XDR stream
+ * @op: operation being replayed
+ *
+ * @op->replay->rp_buf contains the previously-sent already-encoded result.
*/
-void
-nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
+void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
{
- __be32 *p;
struct nfs4_replay *rp = op->replay;
- p = xdr_reserve_space(xdr, 8 + rp->rp_buflen);
- if (!p) {
- WARN_ON_ONCE(1);
- return;
- }
- *p++ = cpu_to_be32(op->opnum);
- *p++ = rp->rp_status; /* already xdr'ed */
+ trace_nfsd_stateowner_replay(op->opnum, rp);
- p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen);
+ if (xdr_stream_encode_u32(xdr, op->opnum) != XDR_UNIT)
+ return;
+ if (xdr_stream_encode_be32(xdr, rp->rp_status) != XDR_UNIT)
+ return;
+ xdr_stream_encode_opaque_fixed(xdr, rp->rp_buf, rp->rp_buflen);
}
void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 5c1a4a0aa605..ba9d326b3de6 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -166,8 +166,7 @@ nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct nfsd_cacherep *rp,
int nfsd_drc_slab_create(void)
{
- drc_slab = kmem_cache_create("nfsd_drc",
- sizeof(struct nfsd_cacherep), 0, 0, NULL);
+ drc_slab = KMEM_CACHE(nfsd_cacherep, 0);
return drc_slab ? 0: -ENOMEM;
}
@@ -176,27 +175,6 @@ void nfsd_drc_slab_free(void)
kmem_cache_destroy(drc_slab);
}
-/**
- * nfsd_net_reply_cache_init - per net namespace reply cache set-up
- * @nn: nfsd_net being initialized
- *
- * Returns zero on succes; otherwise a negative errno is returned.
- */
-int nfsd_net_reply_cache_init(struct nfsd_net *nn)
-{
- return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM);
-}
-
-/**
- * nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down
- * @nn: nfsd_net being freed
- *
- */
-void nfsd_net_reply_cache_destroy(struct nfsd_net *nn)
-{
- nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM);
-}
-
int nfsd_reply_cache_init(struct nfsd_net *nn)
{
unsigned int hashsize;
@@ -501,7 +479,7 @@ out:
int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
unsigned int len, struct nfsd_cacherep **cacherep)
{
- struct nfsd_net *nn;
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct nfsd_cacherep *rp, *found;
__wsum csum;
struct nfsd_drc_bucket *b;
@@ -510,7 +488,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
int rtn = RC_DOIT;
if (type == RC_NOCACHE) {
- nfsd_stats_rc_nocache_inc();
+ nfsd_stats_rc_nocache_inc(nn);
goto out;
}
@@ -520,7 +498,6 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
* Since the common case is a cache miss followed by an insert,
* preallocate an entry.
*/
- nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
rp = nfsd_cacherep_alloc(rqstp, csum, nn);
if (!rp)
goto out;
@@ -537,7 +514,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
nfsd_cacherep_dispose(&dispose);
- nfsd_stats_rc_misses_inc();
+ nfsd_stats_rc_misses_inc(nn);
atomic_inc(&nn->num_drc_entries);
nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp));
goto out;
@@ -545,7 +522,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start,
found_entry:
/* We found a matching entry which is either in progress or done. */
nfsd_reply_cache_free_locked(NULL, rp, nn);
- nfsd_stats_rc_hits_inc();
+ nfsd_stats_rc_hits_inc(nn);
rtn = RC_DROPIT;
rp = found;
@@ -687,15 +664,15 @@ int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
atomic_read(&nn->num_drc_entries));
seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits);
seq_printf(m, "mem usage: %lld\n",
- percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_DRC_MEM_USAGE]));
seq_printf(m, "cache hits: %lld\n",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]));
seq_printf(m, "cache misses: %lld\n",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]));
seq_printf(m, "not cached: %lld\n",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]));
seq_printf(m, "payload misses: %lld\n",
- percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]));
seq_printf(m, "longest chain len: %u\n", nn->longest_chain);
seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize);
return 0;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f206ca32e7f5..ecd18bffeebc 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -281,6 +281,7 @@ static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size)
* 3. Is that directory the root of an exported file system?
*/
error = nlmsvc_unlock_all_by_sb(path.dentry->d_sb);
+ nfsd4_revoke_states(netns(file), path.dentry->d_sb);
path_put(&path);
return error;
@@ -1671,14 +1672,17 @@ static __net_init int nfsd_net_init(struct net *net)
retval = nfsd_idmap_init(net);
if (retval)
goto out_idmap_error;
- retval = nfsd_net_reply_cache_init(nn);
+ retval = nfsd_stat_counters_init(nn);
if (retval)
goto out_repcache_error;
+ memset(&nn->nfsd_svcstats, 0, sizeof(nn->nfsd_svcstats));
+ nn->nfsd_svcstats.program = &nfsd_program;
nn->nfsd_versions = NULL;
nn->nfsd4_minorversions = NULL;
nfsd4_init_leases_net(nn);
get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key));
seqlock_init(&nn->writeverf_lock);
+ nfsd_proc_stat_init(net);
return 0;
@@ -1699,7 +1703,8 @@ static __net_exit void nfsd_net_exit(struct net *net)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- nfsd_net_reply_cache_destroy(nn);
+ nfsd_proc_stat_shutdown(net);
+ nfsd_stat_counters_destroy(nn);
nfsd_idmap_shutdown(net);
nfsd_export_shutdown(net);
nfsd_netns_free_versions(nn);
@@ -1722,12 +1727,9 @@ static int __init init_nfsd(void)
retval = nfsd4_init_pnfs();
if (retval)
goto out_free_slabs;
- retval = nfsd_stat_init(); /* Statistics */
- if (retval)
- goto out_free_pnfs;
retval = nfsd_drc_slab_create();
if (retval)
- goto out_free_stat;
+ goto out_free_pnfs;
nfsd_lockd_init(); /* lockd->nfsd callbacks */
retval = create_proc_exports_entry();
if (retval)
@@ -1761,8 +1763,6 @@ out_free_exports:
out_free_lockd:
nfsd_lockd_shutdown();
nfsd_drc_slab_free();
-out_free_stat:
- nfsd_stat_shutdown();
out_free_pnfs:
nfsd4_exit_pnfs();
out_free_slabs:
@@ -1780,7 +1780,6 @@ static void __exit exit_nfsd(void)
nfsd_drc_slab_free();
remove_proc_entry("fs/nfs/exports", NULL);
remove_proc_entry("fs/nfs", NULL);
- nfsd_stat_shutdown();
nfsd_lockd_shutdown();
nfsd4_free_slabs();
nfsd4_exit_pnfs();
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 304e9728b929..16c5a05f340e 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -86,6 +86,7 @@ extern struct mutex nfsd_mutex;
extern spinlock_t nfsd_drc_lock;
extern unsigned long nfsd_drc_max_mem;
extern unsigned long nfsd_drc_mem_used;
+extern atomic_t nfsd_th_cnt; /* number of available threads */
extern const struct seq_operations nfs_exports_op;
@@ -274,6 +275,7 @@ void nfsd_lockd_shutdown(void);
#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
+#define nfserr_admin_revoked cpu_to_be32(NFS4ERR_ADMIN_REVOKED)
#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
@@ -365,6 +367,7 @@ void nfsd_lockd_shutdown(void);
#define NFSD_CLIENT_MAX_TRIM_PER_RUN 128
#define NFS4_CLIENTS_PER_GB 1024
#define NFSD_DELEGRETURN_TIMEOUT (HZ / 34) /* 30ms */
+#define NFSD_CB_GETATTR_TIMEOUT NFSD_DELEGRETURN_TIMEOUT
/*
* The following attributes are currently not supported by the NFSv4 server:
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index dbfa0ac13564..40fecf7b224f 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -327,6 +327,7 @@ out:
__be32
fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access)
{
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct svc_export *exp = NULL;
struct dentry *dentry;
__be32 error;
@@ -395,7 +396,7 @@ skip_pseudoflavor_check:
out:
trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error);
if (error == nfserr_stale)
- nfsd_stats_fh_stale_inc(exp);
+ nfsd_stats_fh_stale_inc(nn, exp);
return error;
}
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index a7315928a760..36370b957b63 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -103,7 +103,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
}
}
- resp->status = nfsd_setattr(rqstp, fhp, &attrs, 0, (time64_t)0);
+ resp->status = nfsd_setattr(rqstp, fhp, &attrs, NULL);
if (resp->status != nfs_ok)
goto out;
@@ -390,8 +390,8 @@ nfsd_proc_create(struct svc_rqst *rqstp)
*/
attr->ia_valid &= ATTR_SIZE;
if (attr->ia_valid)
- resp->status = nfsd_setattr(rqstp, newfhp, &attrs, 0,
- (time64_t)0);
+ resp->status = nfsd_setattr(rqstp, newfhp, &attrs,
+ NULL);
}
out_unlock:
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index a667802e08e7..c0d17b92b249 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -34,6 +34,7 @@
#define NFSDDBG_FACILITY NFSDDBG_SVC
+atomic_t nfsd_th_cnt = ATOMIC_INIT(0);
extern struct svc_program nfsd_program;
static int nfsd(void *vrqstp);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
@@ -80,7 +81,6 @@ unsigned long nfsd_drc_max_mem;
unsigned long nfsd_drc_mem_used;
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-static struct svc_stat nfsd_acl_svcstats;
static const struct svc_version *nfsd_acl_version[] = {
# if defined(CONFIG_NFSD_V2_ACL)
[2] = &nfsd_acl_version2,
@@ -99,15 +99,11 @@ static struct svc_program nfsd_acl_program = {
.pg_vers = nfsd_acl_version,
.pg_name = "nfsacl",
.pg_class = "nfsd",
- .pg_stats = &nfsd_acl_svcstats,
.pg_authenticate = &svc_set_client,
.pg_init_request = nfsd_acl_init_request,
.pg_rpcbind_set = nfsd_acl_rpcbind_set,
};
-static struct svc_stat nfsd_acl_svcstats = {
- .program = &nfsd_acl_program,
-};
#endif /* defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL) */
static const struct svc_version *nfsd_version[] = {
@@ -132,7 +128,6 @@ struct svc_program nfsd_program = {
.pg_vers = nfsd_version, /* version table */
.pg_name = "nfsd", /* program name */
.pg_class = "nfsd", /* authentication class */
- .pg_stats = &nfsd_svcstats, /* version table */
.pg_authenticate = &svc_set_client, /* export authentication */
.pg_init_request = nfsd_init_request,
.pg_rpcbind_set = nfsd_rpcbind_set,
@@ -666,7 +661,8 @@ int nfsd_create_serv(struct net *net)
if (nfsd_max_blksize == 0)
nfsd_max_blksize = nfsd_get_default_max_blksize();
nfsd_reset_versions(nn);
- serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, nfsd);
+ serv = svc_create_pooled(&nfsd_program, &nn->nfsd_svcstats,
+ nfsd_max_blksize, nfsd);
if (serv == NULL)
return -ENOMEM;
@@ -929,7 +925,7 @@ nfsd(void *vrqstp)
current->fs->umask = 0;
- atomic_inc(&nfsdstats.th_cnt);
+ atomic_inc(&nfsd_th_cnt);
set_freezable();
@@ -941,9 +937,11 @@ nfsd(void *vrqstp)
rqstp->rq_server->sv_maxconn = nn->max_connections;
svc_recv(rqstp);
+
+ nfsd_file_net_dispose(nn);
}
- atomic_dec(&nfsdstats.th_cnt);
+ atomic_dec(&nfsd_th_cnt);
out:
/* Release the thread */
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index de1e0dfed06a..925817f66917 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -37,7 +37,8 @@ struct nfsd4_layout_ops {
__be32 (*proc_layoutcommit)(struct inode *inode,
struct nfsd4_layoutcommit *lcp);
- void (*fence_client)(struct nfs4_layout_stateid *ls);
+ void (*fence_client)(struct nfs4_layout_stateid *ls,
+ struct nfsd_file *file);
};
extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
@@ -72,11 +73,13 @@ void nfsd4_setup_layout_type(struct svc_export *exp);
void nfsd4_return_all_client_layouts(struct nfs4_client *);
void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
struct nfs4_file *fp);
+void nfsd4_close_layout(struct nfs4_layout_stateid *ls);
int nfsd4_init_pnfs(void);
void nfsd4_exit_pnfs(void);
#else
struct nfs4_client;
struct nfs4_file;
+struct nfs4_layout_stateid;
static inline void nfsd4_setup_layout_type(struct svc_export *exp)
{
@@ -89,6 +92,9 @@ static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
struct nfs4_file *fp)
{
}
+static inline void nfsd4_close_layout(struct nfs4_layout_stateid *ls)
+{
+}
static inline void nfsd4_exit_pnfs(void)
{
}
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 41bdc913fa71..01c6f3445646 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -68,7 +68,7 @@ struct nfsd4_callback {
struct nfs4_client *cb_clp;
struct rpc_message cb_msg;
const struct nfsd4_callback_ops *cb_ops;
- struct work_struct cb_work;
+ struct delayed_work cb_work;
int cb_seq_status;
int cb_status;
bool cb_need_restart;
@@ -88,17 +88,34 @@ struct nfsd4_callback_ops {
*/
struct nfs4_stid {
refcount_t sc_count;
-#define NFS4_OPEN_STID 1
-#define NFS4_LOCK_STID 2
-#define NFS4_DELEG_STID 4
-/* For an open stateid kept around *only* to process close replays: */
-#define NFS4_CLOSED_STID 8
+
+ /* A new stateid is added to the cl_stateids idr early before it
+ * is fully initialised. Its sc_type is then zero. After
+ * initialisation the sc_type it set under cl_lock, and then
+ * never changes.
+ */
+#define SC_TYPE_OPEN BIT(0)
+#define SC_TYPE_LOCK BIT(1)
+#define SC_TYPE_DELEG BIT(2)
+#define SC_TYPE_LAYOUT BIT(3)
+ unsigned short sc_type;
+
+/* state_lock protects sc_status for delegation stateids.
+ * ->cl_lock protects sc_status for open and lock stateids.
+ * ->st_mutex also protect sc_status for open stateids.
+ * ->ls_lock protects sc_status for layout stateids.
+ */
+/*
+ * For an open stateid kept around *only* to process close replays.
+ * For deleg stateid, kept in idr until last reference is dropped.
+ */
+#define SC_STATUS_CLOSED BIT(0)
/* For a deleg stateid kept around only to process free_stateid's: */
-#define NFS4_REVOKED_DELEG_STID 16
-#define NFS4_CLOSED_DELEG_STID 32
-#define NFS4_LAYOUT_STID 64
+#define SC_STATUS_REVOKED BIT(1)
+#define SC_STATUS_ADMIN_REVOKED BIT(2)
+ unsigned short sc_status;
+
struct list_head sc_cp_list;
- unsigned char sc_type;
stateid_t sc_stateid;
spinlock_t sc_lock;
struct nfs4_client *sc_client;
@@ -117,6 +134,24 @@ struct nfs4_cpntf_state {
time64_t cpntf_time; /* last time stateid used */
};
+struct nfs4_cb_fattr {
+ struct nfsd4_callback ncf_getattr;
+ u32 ncf_cb_status;
+ u32 ncf_cb_bmap[1];
+
+ /* from CB_GETATTR reply */
+ u64 ncf_cb_change;
+ u64 ncf_cb_fsize;
+
+ unsigned long ncf_cb_flags;
+ bool ncf_file_modified;
+ u64 ncf_initial_cinfo;
+ u64 ncf_cur_fsize;
+};
+
+/* bits for ncf_cb_flags */
+#define CB_GETATTR_BUSY 0
+
/*
* Represents a delegation stateid. The nfs4_client holds references to these
* and they are put when it is being destroyed or when the delegation is
@@ -150,6 +185,9 @@ struct nfs4_delegation {
int dl_retries;
struct nfsd4_callback dl_recall;
bool dl_recalled;
+
+ /* for CB_GETATTR */
+ struct nfs4_cb_fattr dl_cb_fattr;
};
#define cb_to_delegation(cb) \
@@ -317,8 +355,9 @@ enum {
* 0. If they are not renewed within a lease period, they become eligible for
* destruction by the laundromat.
*
- * These objects can also be destroyed prematurely by the fault injection code,
- * or if the client sends certain forms of SETCLIENTID or EXCHANGE_ID updates.
+ * These objects can also be destroyed if the client sends certain forms of
+ * SETCLIENTID or EXCHANGE_ID operations.
+ *
* Care is taken *not* to do this however when the objects have an elevated
* refcount.
*
@@ -326,7 +365,7 @@ enum {
*
* o Each nfs4_clients is also hashed by name (the opaque quantity initially
* sent by the client to identify itself).
- *
+ *
* o cl_perclient list is used to ensure no dangling stateowner references
* when we expire the nfs4_client
*/
@@ -351,6 +390,7 @@ struct nfs4_client {
clientid_t cl_clientid; /* generated by server */
nfs4_verifier cl_confirm; /* generated by server */
u32 cl_minorversion;
+ atomic_t cl_admin_revoked; /* count of admin-revoked states */
/* NFSv4.1 client implementation id: */
struct xdr_netobj cl_nii_domain;
struct xdr_netobj cl_nii_name;
@@ -640,6 +680,7 @@ enum nfsd4_cb_op {
NFSPROC4_CLNT_CB_SEQUENCE,
NFSPROC4_CLNT_CB_NOTIFY_LOCK,
NFSPROC4_CLNT_CB_RECALL_ANY,
+ NFSPROC4_CLNT_CB_GETATTR,
};
/* Returns true iff a is later than b: */
@@ -672,15 +713,15 @@ extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
stateid_t *stateid, int flags, struct nfsd_file **filp,
struct nfs4_stid **cstid);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
- stateid_t *stateid, unsigned char typemask,
- struct nfs4_stid **s, struct nfsd_net *nn);
+ stateid_t *stateid, unsigned short typemask,
+ unsigned short statusmask,
+ struct nfs4_stid **s, struct nfsd_net *nn);
struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
void (*sc_free)(struct nfs4_stid *));
int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
void nfs4_free_copy_state(struct nfsd4_copy *copy);
struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
struct nfs4_stid *p_stid);
-void nfs4_unhash_stid(struct nfs4_stid *s);
void nfs4_put_stid(struct nfs4_stid *s);
void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
void nfs4_remove_reclaim_record(struct nfs4_client_reclaim *, struct nfsd_net *);
@@ -714,6 +755,14 @@ static inline void get_nfs4_file(struct nfs4_file *fi)
}
struct nfsd_file *find_any_file(struct nfs4_file *f);
+#ifdef CONFIG_NFSD_V4
+void nfsd4_revoke_states(struct net *net, struct super_block *sb);
+#else
+static inline void nfsd4_revoke_states(struct net *net, struct super_block *sb)
+{
+}
+#endif
+
/* grace period management */
void nfsd4_end_grace(struct nfsd_net *nn);
@@ -732,5 +781,5 @@ static inline bool try_to_expire_client(struct nfs4_client *clp)
}
extern __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp,
- struct inode *inode);
+ struct inode *inode, bool *file_modified, u64 *size);
#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 12d79f5d4eb1..be52fb1e928e 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -27,25 +27,22 @@
#include "nfsd.h"
-struct nfsd_stats nfsdstats;
-struct svc_stat nfsd_svcstats = {
- .program = &nfsd_program,
-};
-
static int nfsd_show(struct seq_file *seq, void *v)
{
+ struct net *net = pde_data(file_inode(seq->file));
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int i;
seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]),
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]),
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]),
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]),
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]),
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_FH_STALE]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_READ]),
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_WRITE]));
/* thread usage: */
- seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt));
+ seq_printf(seq, "th %u 0", atomic_read(&nfsd_th_cnt));
/* deprecated thread usage histogram stats */
for (i = 0; i < 10; i++)
@@ -55,7 +52,7 @@ static int nfsd_show(struct seq_file *seq, void *v)
seq_puts(seq, "\nra 0 0 0 0 0 0 0 0 0 0 0 0\n");
/* show my rpc info */
- svc_seq_show(seq, &nfsd_svcstats);
+ svc_seq_show(seq, &nn->nfsd_svcstats);
#ifdef CONFIG_NFSD_V4
/* Show count for individual nfsv4 operations */
@@ -63,10 +60,10 @@ static int nfsd_show(struct seq_file *seq, void *v)
seq_printf(seq, "proc4ops %u", LAST_NFS4_OP + 1);
for (i = 0; i <= LAST_NFS4_OP; i++) {
seq_printf(seq, " %lld",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_NFS4_OP(i)]));
}
seq_printf(seq, "\nwdeleg_getattr %lld",
- percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]));
+ percpu_counter_sum_positive(&nn->counter[NFSD_STATS_WDELEG_GETATTR]));
seq_putc(seq, '\n');
#endif
@@ -108,31 +105,24 @@ void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num)
percpu_counter_destroy(&counters[i]);
}
-static int nfsd_stat_counters_init(void)
+int nfsd_stat_counters_init(struct nfsd_net *nn)
{
- return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
+ return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM);
}
-static void nfsd_stat_counters_destroy(void)
+void nfsd_stat_counters_destroy(struct nfsd_net *nn)
{
- nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM);
+ nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM);
}
-int nfsd_stat_init(void)
+void nfsd_proc_stat_init(struct net *net)
{
- int err;
-
- err = nfsd_stat_counters_init();
- if (err)
- return err;
+ struct nfsd_net *nn = net_generic(net, nfsd_net_id);
- svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops);
-
- return 0;
+ svc_proc_register(net, &nn->nfsd_svcstats, &nfsd_proc_ops);
}
-void nfsd_stat_shutdown(void)
+void nfsd_proc_stat_shutdown(struct net *net)
{
- nfsd_stat_counters_destroy();
- svc_proc_unregister(&init_net, "nfsd");
+ svc_proc_unregister(net, "nfsd");
}
diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h
index 14f50c660b61..d2753e975dfd 100644
--- a/fs/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -10,94 +10,72 @@
#include <uapi/linux/nfsd/stats.h>
#include <linux/percpu_counter.h>
-
-enum {
- NFSD_STATS_RC_HITS, /* repcache hits */
- NFSD_STATS_RC_MISSES, /* repcache misses */
- NFSD_STATS_RC_NOCACHE, /* uncached reqs */
- NFSD_STATS_FH_STALE, /* FH stale error */
- NFSD_STATS_IO_READ, /* bytes returned to read requests */
- NFSD_STATS_IO_WRITE, /* bytes passed in write requests */
-#ifdef CONFIG_NFSD_V4
- NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */
- NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP,
-#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op))
- NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */
-#endif
- NFSD_STATS_COUNTERS_NUM
-};
-
-struct nfsd_stats {
- struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM];
-
- atomic_t th_cnt; /* number of available threads */
-};
-
-extern struct nfsd_stats nfsdstats;
-
-extern struct svc_stat nfsd_svcstats;
-
int nfsd_percpu_counters_init(struct percpu_counter *counters, int num);
void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num);
void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num);
-int nfsd_stat_init(void);
-void nfsd_stat_shutdown(void);
+int nfsd_stat_counters_init(struct nfsd_net *nn);
+void nfsd_stat_counters_destroy(struct nfsd_net *nn);
+void nfsd_proc_stat_init(struct net *net);
+void nfsd_proc_stat_shutdown(struct net *net);
-static inline void nfsd_stats_rc_hits_inc(void)
+static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn)
{
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_HITS]);
}
-static inline void nfsd_stats_rc_misses_inc(void)
+static inline void nfsd_stats_rc_misses_inc(struct nfsd_net *nn)
{
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_MISSES]);
}
-static inline void nfsd_stats_rc_nocache_inc(void)
+static inline void nfsd_stats_rc_nocache_inc(struct nfsd_net *nn)
{
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_RC_NOCACHE]);
}
-static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp)
+static inline void nfsd_stats_fh_stale_inc(struct nfsd_net *nn,
+ struct svc_export *exp)
{
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_FH_STALE]);
if (exp && exp->ex_stats)
percpu_counter_inc(&exp->ex_stats->counter[EXP_STATS_FH_STALE]);
}
-static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount)
+static inline void nfsd_stats_io_read_add(struct nfsd_net *nn,
+ struct svc_export *exp, s64 amount)
{
- percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount);
+ percpu_counter_add(&nn->counter[NFSD_STATS_IO_READ], amount);
if (exp && exp->ex_stats)
percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_READ], amount);
}
-static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount)
+static inline void nfsd_stats_io_write_add(struct nfsd_net *nn,
+ struct svc_export *exp, s64 amount)
{
- percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount);
+ percpu_counter_add(&nn->counter[NFSD_STATS_IO_WRITE], amount);
if (exp && exp->ex_stats)
percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_WRITE], amount);
}
static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn)
{
- percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]);
}
static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount)
{
- percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
+ percpu_counter_add(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
}
static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount)
{
- percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount);
+ percpu_counter_sub(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount);
}
#ifdef CONFIG_NFSD_V4
-static inline void nfsd_stats_wdeleg_getattr_inc(void)
+static inline void nfsd_stats_wdeleg_getattr_inc(struct nfsd_net *nn)
{
- percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]);
+ percpu_counter_inc(&nn->counter[NFSD_STATS_WDELEG_GETATTR]);
}
#endif
#endif /* _NFSD_STATS_H */
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index d1e8cf079b0f..e545e92c4408 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -9,8 +9,10 @@
#define _NFSD_TRACE_H
#include <linux/tracepoint.h>
+#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/xprt.h>
#include <trace/misc/nfs.h>
+#include <trace/misc/sunrpc.h>
#include "export.h"
#include "nfsfh.h"
@@ -641,23 +643,18 @@ DEFINE_EVENT(nfsd_stateseqid_class, nfsd_##name, \
DEFINE_STATESEQID_EVENT(preprocess);
DEFINE_STATESEQID_EVENT(open_confirm);
-TRACE_DEFINE_ENUM(NFS4_OPEN_STID);
-TRACE_DEFINE_ENUM(NFS4_LOCK_STID);
-TRACE_DEFINE_ENUM(NFS4_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_CLOSED_STID);
-TRACE_DEFINE_ENUM(NFS4_REVOKED_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_CLOSED_DELEG_STID);
-TRACE_DEFINE_ENUM(NFS4_LAYOUT_STID);
-
#define show_stid_type(x) \
__print_flags(x, "|", \
- { NFS4_OPEN_STID, "OPEN" }, \
- { NFS4_LOCK_STID, "LOCK" }, \
- { NFS4_DELEG_STID, "DELEG" }, \
- { NFS4_CLOSED_STID, "CLOSED" }, \
- { NFS4_REVOKED_DELEG_STID, "REVOKED" }, \
- { NFS4_CLOSED_DELEG_STID, "CLOSED_DELEG" }, \
- { NFS4_LAYOUT_STID, "LAYOUT" })
+ { SC_TYPE_OPEN, "OPEN" }, \
+ { SC_TYPE_LOCK, "LOCK" }, \
+ { SC_TYPE_DELEG, "DELEG" }, \
+ { SC_TYPE_LAYOUT, "LAYOUT" })
+
+#define show_stid_status(x) \
+ __print_flags(x, "|", \
+ { SC_STATUS_CLOSED, "CLOSED" }, \
+ { SC_STATUS_REVOKED, "REVOKED" }, \
+ { SC_STATUS_ADMIN_REVOKED, "ADMIN_REVOKED" })
DECLARE_EVENT_CLASS(nfsd_stid_class,
TP_PROTO(
@@ -666,6 +663,7 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
TP_ARGS(stid),
TP_STRUCT__entry(
__field(unsigned long, sc_type)
+ __field(unsigned long, sc_status)
__field(int, sc_count)
__field(u32, cl_boot)
__field(u32, cl_id)
@@ -676,16 +674,18 @@ DECLARE_EVENT_CLASS(nfsd_stid_class,
const stateid_t *stp = &stid->sc_stateid;
__entry->sc_type = stid->sc_type;
+ __entry->sc_status = stid->sc_status;
__entry->sc_count = refcount_read(&stid->sc_count);
__entry->cl_boot = stp->si_opaque.so_clid.cl_boot;
__entry->cl_id = stp->si_opaque.so_clid.cl_id;
__entry->si_id = stp->si_opaque.so_id;
__entry->si_generation = stp->si_generation;
),
- TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s",
+ TP_printk("client %08x:%08x stateid %08x:%08x ref=%d type=%s state=%s",
__entry->cl_boot, __entry->cl_id,
__entry->si_id, __entry->si_generation,
- __entry->sc_count, show_stid_type(__entry->sc_type)
+ __entry->sc_count, show_stid_type(__entry->sc_type),
+ show_stid_status(__entry->sc_status)
)
);
@@ -696,6 +696,59 @@ DEFINE_EVENT(nfsd_stid_class, nfsd_stid_##name, \
DEFINE_STID_EVENT(revoke);
+TRACE_EVENT(nfsd_stateowner_replay,
+ TP_PROTO(
+ u32 opnum,
+ const struct nfs4_replay *rp
+ ),
+ TP_ARGS(opnum, rp),
+ TP_STRUCT__entry(
+ __field(unsigned long, status)
+ __field(u32, opnum)
+ ),
+ TP_fast_assign(
+ __entry->status = be32_to_cpu(rp->rp_status);
+ __entry->opnum = opnum;
+ ),
+ TP_printk("opnum=%u status=%lu",
+ __entry->opnum, __entry->status)
+);
+
+TRACE_EVENT_CONDITION(nfsd_seq4_status,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ const struct nfsd4_sequence *sequence
+ ),
+ TP_ARGS(rqstp, sequence),
+ TP_CONDITION(sequence->status_flags),
+ TP_STRUCT__entry(
+ __field(unsigned int, netns_ino)
+ __field(u32, xid)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, seqno)
+ __field(u32, reserved)
+ __field(unsigned long, status_flags)
+ ),
+ TP_fast_assign(
+ const struct nfsd4_sessionid *sid =
+ (struct nfsd4_sessionid *)&sequence->sessionid;
+
+ __entry->netns_ino = SVC_NET(rqstp)->ns.inum;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->cl_boot = sid->clientid.cl_boot;
+ __entry->cl_id = sid->clientid.cl_id;
+ __entry->seqno = sid->sequence;
+ __entry->reserved = sid->reserved;
+ __entry->status_flags = sequence->status_flags;
+ ),
+ TP_printk("xid=0x%08x sessionid=%08x:%08x:%08x:%08x status_flags=%s",
+ __entry->xid, __entry->cl_boot, __entry->cl_id,
+ __entry->seqno, __entry->reserved,
+ show_nfs4_seq4_status(__entry->status_flags)
+ )
+);
+
DECLARE_EVENT_CLASS(nfsd_clientid_class,
TP_PROTO(const clientid_t *clid),
TP_ARGS(clid),
@@ -1334,7 +1387,8 @@ DEFINE_EVENT(nfsd_cb_class, nfsd_cb_##name, \
TP_PROTO(const struct nfs4_client *clp), \
TP_ARGS(clp))
-DEFINE_NFSD_CB_EVENT(state);
+DEFINE_NFSD_CB_EVENT(start);
+DEFINE_NFSD_CB_EVENT(new_state);
DEFINE_NFSD_CB_EVENT(probe);
DEFINE_NFSD_CB_EVENT(lost);
DEFINE_NFSD_CB_EVENT(shutdown);
@@ -1405,6 +1459,128 @@ TRACE_EVENT(nfsd_cb_setup_err,
__entry->error)
);
+DECLARE_EVENT_CLASS(nfsd_cb_lifetime_class,
+ TP_PROTO(
+ const struct nfs4_client *clp,
+ const struct nfsd4_callback *cb
+ ),
+ TP_ARGS(clp, cb),
+ TP_STRUCT__entry(
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(const void *, cb)
+ __field(bool, need_restart)
+ __sockaddr(addr, clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_fast_assign(
+ __entry->cl_boot = clp->cl_clientid.cl_boot;
+ __entry->cl_id = clp->cl_clientid.cl_id;
+ __entry->cb = cb;
+ __entry->need_restart = cb->cb_need_restart;
+ __assign_sockaddr(addr, &clp->cl_cb_conn.cb_addr,
+ clp->cl_cb_conn.cb_addrlen)
+ ),
+ TP_printk("addr=%pISpc client %08x:%08x cb=%p%s",
+ __get_sockaddr(addr), __entry->cl_boot, __entry->cl_id,
+ __entry->cb, __entry->need_restart ?
+ " (need restart)" : " (first try)"
+ )
+);
+
+#define DEFINE_NFSD_CB_LIFETIME_EVENT(name) \
+DEFINE_EVENT(nfsd_cb_lifetime_class, nfsd_cb_##name, \
+ TP_PROTO( \
+ const struct nfs4_client *clp, \
+ const struct nfsd4_callback *cb \
+ ), \
+ TP_ARGS(clp, cb))
+
+DEFINE_NFSD_CB_LIFETIME_EVENT(queue);
+DEFINE_NFSD_CB_LIFETIME_EVENT(destroy);
+DEFINE_NFSD_CB_LIFETIME_EVENT(restart);
+DEFINE_NFSD_CB_LIFETIME_EVENT(bc_update);
+DEFINE_NFSD_CB_LIFETIME_EVENT(bc_shutdown);
+
+TRACE_EVENT(nfsd_cb_seq_status,
+ TP_PROTO(
+ const struct rpc_task *task,
+ const struct nfsd4_callback *cb
+ ),
+ TP_ARGS(task, cb),
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, seqno)
+ __field(u32, reserved)
+ __field(int, tk_status)
+ __field(int, seq_status)
+ ),
+ TP_fast_assign(
+ const struct nfs4_client *clp = cb->cb_clp;
+ const struct nfsd4_session *session = clp->cl_cb_session;
+ const struct nfsd4_sessionid *sid =
+ (struct nfsd4_sessionid *)&session->se_sessionid;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client ?
+ task->tk_client->cl_clid : -1;
+ __entry->cl_boot = sid->clientid.cl_boot;
+ __entry->cl_id = sid->clientid.cl_id;
+ __entry->seqno = sid->sequence;
+ __entry->reserved = sid->reserved;
+ __entry->tk_status = task->tk_status;
+ __entry->seq_status = cb->cb_seq_status;
+ ),
+ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
+ " sessionid=%08x:%08x:%08x:%08x tk_status=%d seq_status=%d\n",
+ __entry->task_id, __entry->client_id,
+ __entry->cl_boot, __entry->cl_id,
+ __entry->seqno, __entry->reserved,
+ __entry->tk_status, __entry->seq_status
+ )
+);
+
+TRACE_EVENT(nfsd_cb_free_slot,
+ TP_PROTO(
+ const struct rpc_task *task,
+ const struct nfsd4_callback *cb
+ ),
+ TP_ARGS(task, cb),
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, cl_boot)
+ __field(u32, cl_id)
+ __field(u32, seqno)
+ __field(u32, reserved)
+ __field(u32, slot_seqno)
+ ),
+ TP_fast_assign(
+ const struct nfs4_client *clp = cb->cb_clp;
+ const struct nfsd4_session *session = clp->cl_cb_session;
+ const struct nfsd4_sessionid *sid =
+ (struct nfsd4_sessionid *)&session->se_sessionid;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client ?
+ task->tk_client->cl_clid : -1;
+ __entry->cl_boot = sid->clientid.cl_boot;
+ __entry->cl_id = sid->clientid.cl_id;
+ __entry->seqno = sid->sequence;
+ __entry->reserved = sid->reserved;
+ __entry->slot_seqno = session->se_cb_seq_nr;
+ ),
+ TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
+ " sessionid=%08x:%08x:%08x:%08x new slot seqno=%u\n",
+ __entry->task_id, __entry->client_id,
+ __entry->cl_boot, __entry->cl_id,
+ __entry->seqno, __entry->reserved,
+ __entry->slot_seqno
+ )
+);
+
TRACE_EVENT_CONDITION(nfsd_cb_recall,
TP_PROTO(
const struct nfs4_stid *stid
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index b7c7a9273ea0..6a4c506038e0 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -476,7 +476,6 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
* @rqstp: controlling RPC transaction
* @fhp: filehandle of target
* @attr: attributes to set
- * @check_guard: set to 1 if guardtime is a valid timestamp
* @guardtime: do not act if ctime.tv_sec does not match this timestamp
*
* This call may adjust the contents of @attr (in particular, this
@@ -488,8 +487,7 @@ static int __nfsd_setattr(struct dentry *dentry, struct iattr *iap)
*/
__be32
nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
- struct nfsd_attrs *attr,
- int check_guard, time64_t guardtime)
+ struct nfsd_attrs *attr, const struct timespec64 *guardtime)
{
struct dentry *dentry;
struct inode *inode;
@@ -497,7 +495,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
int accmode = NFSD_MAY_SATTR;
umode_t ftype = 0;
__be32 err;
- int host_err;
+ int host_err = 0;
bool get_write_count;
bool size_change = (iap->ia_valid & ATTR_SIZE);
int retries;
@@ -538,9 +536,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_sanitize_attrs(inode, iap);
- if (check_guard && guardtime != inode_get_ctime_sec(inode))
- return nfserr_notsync;
-
/*
* The size case is special, it changes the file in addition to the
* attributes, and file systems don't expect it to be mixed with
@@ -555,6 +550,19 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
inode_lock(inode);
+ err = fh_fill_pre_attrs(fhp);
+ if (err)
+ goto out_unlock;
+
+ if (guardtime) {
+ struct timespec64 ctime = inode_get_ctime(inode);
+ if ((u32)guardtime->tv_sec != (u32)ctime.tv_sec ||
+ guardtime->tv_nsec != ctime.tv_nsec) {
+ err = nfserr_notsync;
+ goto out_fill_attrs;
+ }
+ }
+
for (retries = 1;;) {
struct iattr attrs;
@@ -582,13 +590,23 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
attr->na_aclerr = set_posix_acl(&nop_mnt_idmap,
dentry, ACL_TYPE_DEFAULT,
attr->na_dpacl);
+out_fill_attrs:
+ /*
+ * RFC 1813 Section 3.3.2 does not mandate that an NFS server
+ * returns wcc_data for SETATTR. Some client implementations
+ * depend on receiving wcc_data, however, to sort out partial
+ * updates (eg., the client requested that size and mode be
+ * modified, but the server changed only the file mode).
+ */
+ fh_fill_post_attrs(fhp);
+out_unlock:
inode_unlock(inode);
if (size_change)
put_write_access(inode);
out:
if (!host_err)
host_err = commit_metadata(fhp);
- return nfserrno(host_err);
+ return err != 0 ? err : nfserrno(host_err);
}
#if defined(CONFIG_NFSD_V4)
@@ -1002,7 +1020,9 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
unsigned long *count, u32 *eof, ssize_t host_err)
{
if (host_err >= 0) {
- nfsd_stats_io_read_add(fhp->fh_export, host_err);
+ struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
+
+ nfsd_stats_io_read_add(nn, fhp->fh_export, host_err);
*eof = nfsd_eof_on_read(file, offset, host_err, *count);
*count = host_err;
fsnotify_access(file);
@@ -1185,7 +1205,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
goto out_nfserr;
}
*cnt = host_err;
- nfsd_stats_io_write_add(exp, *cnt);
+ nfsd_stats_io_write_add(nn, exp, *cnt);
fsnotify_modify(file);
host_err = filemap_check_wb_err(file->f_mapping, since);
if (host_err < 0)
@@ -1404,7 +1424,7 @@ nfsd_create_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp,
* if the attributes have not changed.
*/
if (iap->ia_valid)
- status = nfsd_setattr(rqstp, resfhp, attrs, 0, (time64_t)0);
+ status = nfsd_setattr(rqstp, resfhp, attrs, NULL);
else
status = nfserrno(commit_metadata(resfhp));
@@ -1906,10 +1926,10 @@ out_unlock:
fh_drop_write(ffhp);
/*
- * If the target dentry has cached open files, then we need to try to
- * close them prior to doing the rename. Flushing delayed fput
- * shouldn't be done with locks held however, so we delay it until this
- * point and then reattempt the whole shebang.
+ * If the target dentry has cached open files, then we need to
+ * try to close them prior to doing the rename. Final fput
+ * shouldn't be done with locks held however, so we delay it
+ * until this point and then reattempt the whole shebang.
*/
if (close_cached) {
close_cached = false;
@@ -2177,11 +2197,43 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
if (err == nfserr_eof || err == nfserr_toosmall)
err = nfs_ok; /* can still be found in ->err */
out_close:
- fput(file);
+ nfsd_filp_close(file);
out:
return err;
}
+/**
+ * nfsd_filp_close: close a file synchronously
+ * @fp: the file to close
+ *
+ * nfsd_filp_close() is similar in behaviour to filp_close().
+ * The difference is that if this is the final close on the
+ * file, the that finalisation happens immediately, rather then
+ * being handed over to a work_queue, as it the case for
+ * filp_close().
+ * When a user-space process closes a file (even when using
+ * filp_close() the finalisation happens before returning to
+ * userspace, so it is effectively synchronous. When a kernel thread
+ * uses file_close(), on the other hand, the handling is completely
+ * asynchronous. This means that any cost imposed by that finalisation
+ * is not imposed on the nfsd thread, and nfsd could potentually
+ * close files more quickly than the work queue finalises the close,
+ * which would lead to unbounded growth in the queue.
+ *
+ * In some contexts is it not safe to synchronously wait for
+ * close finalisation (see comment for __fput_sync()), but nfsd
+ * does not match those contexts. In partcilarly it does not, at the
+ * time that this function is called, hold and locks and no finalisation
+ * of any file, socket, or device driver would have any cause to wait
+ * for nfsd to make progress.
+ */
+void nfsd_filp_close(struct file *fp)
+{
+ get_file(fp);
+ filp_close(fp, NULL);
+ __fput_sync(fp);
+}
+
/*
* Get file system stats
* N.B. After this call fhp needs an fh_put
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index 702fbc4483bf..c60fdb6200fd 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -69,7 +69,7 @@ __be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
const char *, unsigned int,
struct svc_export **, struct dentry **);
__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
- struct nfsd_attrs *, int, time64_t);
+ struct nfsd_attrs *, const struct timespec64 *);
int nfsd_mountpoint(struct dentry *, struct svc_export *);
#ifdef CONFIG_NFSD_V4
__be32 nfsd4_vfs_fallocate(struct svc_rqst *, struct svc_fh *,
@@ -148,6 +148,8 @@ __be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
struct dentry *, int);
+void nfsd_filp_close(struct file *fp);
+
static inline int fh_want_write(struct svc_fh *fh)
{
int ret;
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 03fe4e21306c..522067b7fd75 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -14,7 +14,7 @@ struct nfsd3_sattrargs {
struct svc_fh fh;
struct iattr attrs;
int check_guard;
- time64_t guardtime;
+ struct timespec64 guardtime;
};
struct nfsd3_diropargs {
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
index 0d39af1b00a0..e8b00309c449 100644
--- a/fs/nfsd/xdr4cb.h
+++ b/fs/nfsd/xdr4cb.h
@@ -54,3 +54,21 @@
#define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \
cb_sequence_dec_sz + \
op_dec_sz)
+
+/*
+ * 1: CB_GETATTR opcode (32-bit)
+ * N: file_handle
+ * 1: number of entry in attribute array (32-bit)
+ * 1: entry 0 in attribute array (32-bit)
+ */
+#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \
+ cb_sequence_enc_sz + \
+ 1 + enc_nfs4_fh_sz + 1 + 1)
+/*
+ * 4: fattr_bitmap_maxsz
+ * 1: attribute array len
+ * 2: change attr (64-bit)
+ * 2: size (64-bit)
+ */
+#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \
+ cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 67cf1c9efd80..23617da0e565 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -339,7 +339,6 @@ struct svc_program {
const struct svc_version **pg_vers; /* version array */
char * pg_name; /* service name */
char * pg_class; /* class name: services sharing authentication */
- struct svc_stat * pg_stats; /* rpc statistics */
enum svc_auth_status (*pg_authenticate)(struct svc_rqst *rqstp);
__be32 (*pg_init_request)(struct svc_rqst *,
const struct svc_program *,
@@ -411,7 +410,9 @@ bool svc_rqst_replace_page(struct svc_rqst *rqstp,
void svc_rqst_release_pages(struct svc_rqst *rqstp);
void svc_rqst_free(struct svc_rqst *);
void svc_exit_thread(struct svc_rqst *);
-struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
+struct svc_serv * svc_create_pooled(struct svc_program *prog,
+ struct svc_stat *stats,
+ unsigned int bufsize,
int (*threadfn)(void *data));
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
int svc_pool_stats_open(struct svc_info *si, struct file *file);
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index e7595ae62fe2..24cd199dd6f3 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -203,6 +203,30 @@ struct svc_rdma_recv_ctxt {
struct page *rc_pages[RPCSVC_MAXPAGES];
};
+/*
+ * State for sending a Write chunk.
+ * - Tracks progress of writing one chunk over all its segments
+ * - Stores arguments for the SGL constructor functions
+ */
+struct svc_rdma_write_info {
+ struct svcxprt_rdma *wi_rdma;
+ struct list_head wi_list;
+
+ const struct svc_rdma_chunk *wi_chunk;
+
+ /* write state of this chunk */
+ unsigned int wi_seg_off;
+ unsigned int wi_seg_no;
+
+ /* SGL constructor arguments */
+ const struct xdr_buf *wi_xdr;
+ unsigned char *wi_base;
+ unsigned int wi_next_off;
+
+ struct svc_rdma_chunk_ctxt wi_cc;
+ struct work_struct wi_work;
+};
+
struct svc_rdma_send_ctxt {
struct llist_node sc_node;
struct rpc_rdma_cid sc_cid;
@@ -210,9 +234,15 @@ struct svc_rdma_send_ctxt {
struct svcxprt_rdma *sc_rdma;
struct ib_send_wr sc_send_wr;
+ struct ib_send_wr *sc_wr_chain;
+ int sc_sqecount;
struct ib_cqe sc_cqe;
struct xdr_buf sc_hdrbuf;
struct xdr_stream sc_stream;
+
+ struct list_head sc_write_info_list;
+ struct svc_rdma_write_info sc_reply_info;
+
void *sc_xprt_buf;
int sc_page_count;
int sc_cur_sge_no;
@@ -236,18 +266,27 @@ extern void svc_rdma_release_ctxt(struct svc_xprt *xprt, void *ctxt);
extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_rw.c */
+extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
+ struct svc_rdma_chunk_ctxt *cc);
extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc);
extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
struct svc_rdma_chunk_ctxt *cc,
enum dma_data_direction dir);
-extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_chunk *chunk,
- const struct xdr_buf *xdr);
-extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_recv_ctxt *rctxt,
- const struct xdr_buf *xdr);
+extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt);
+extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt);
+extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr);
+extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ const struct svc_rdma_pcl *reply_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr);
extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
struct svc_rdma_recv_ctxt *head);
@@ -258,8 +297,8 @@ extern struct svc_rdma_send_ctxt *
svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma);
extern void svc_rdma_send_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *ctxt);
-extern int svc_rdma_send(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt);
+extern int svc_rdma_post_send(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt);
extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
const struct svc_rdma_pcl *write_pcl,
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 110c1475c527..027ac3ab457d 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -2118,6 +2118,10 @@ DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_write);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_flush);
DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_write_err);
+DEFINE_SIMPLE_CID_EVENT(svcrdma_wc_reply);
+DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_reply_flush);
+DEFINE_SEND_FLUSH_EVENT(svcrdma_wc_reply_err);
+
TRACE_EVENT(svcrdma_qp_error,
TP_PROTO(
const struct ib_event *event,
diff --git a/include/trace/misc/nfs.h b/include/trace/misc/nfs.h
index 0d9d48dca38a..64ab5dac59ce 100644
--- a/include/trace/misc/nfs.h
+++ b/include/trace/misc/nfs.h
@@ -385,3 +385,37 @@ TRACE_DEFINE_ENUM(IOMODE_ANY);
{ SEQ4_STATUS_RESTART_RECLAIM_NEEDED, "RESTART_RECLAIM_NEEDED" }, \
{ SEQ4_STATUS_CB_PATH_DOWN_SESSION, "CB_PATH_DOWN_SESSION" }, \
{ SEQ4_STATUS_BACKCHANNEL_FAULT, "BACKCHANNEL_FAULT" })
+
+TRACE_DEFINE_ENUM(OP_CB_GETATTR);
+TRACE_DEFINE_ENUM(OP_CB_RECALL);
+TRACE_DEFINE_ENUM(OP_CB_LAYOUTRECALL);
+TRACE_DEFINE_ENUM(OP_CB_NOTIFY);
+TRACE_DEFINE_ENUM(OP_CB_PUSH_DELEG);
+TRACE_DEFINE_ENUM(OP_CB_RECALL_ANY);
+TRACE_DEFINE_ENUM(OP_CB_RECALLABLE_OBJ_AVAIL);
+TRACE_DEFINE_ENUM(OP_CB_RECALL_SLOT);
+TRACE_DEFINE_ENUM(OP_CB_SEQUENCE);
+TRACE_DEFINE_ENUM(OP_CB_WANTS_CANCELLED);
+TRACE_DEFINE_ENUM(OP_CB_NOTIFY_LOCK);
+TRACE_DEFINE_ENUM(OP_CB_NOTIFY_DEVICEID);
+TRACE_DEFINE_ENUM(OP_CB_OFFLOAD);
+TRACE_DEFINE_ENUM(OP_CB_ILLEGAL);
+
+#define show_nfs4_cb_op(x) \
+ __print_symbolic(x, \
+ { 0, "CB_NULL" }, \
+ { 1, "CB_COMPOUND" }, \
+ { OP_CB_GETATTR, "CB_GETATTR" }, \
+ { OP_CB_RECALL, "CB_RECALL" }, \
+ { OP_CB_LAYOUTRECALL, "CB_LAYOUTRECALL" }, \
+ { OP_CB_NOTIFY, "CB_NOTIFY" }, \
+ { OP_CB_PUSH_DELEG, "CB_PUSH_DELEG" }, \
+ { OP_CB_RECALL_ANY, "CB_RECALL_ANY" }, \
+ { OP_CB_RECALLABLE_OBJ_AVAIL, "CB_RECALLABLE_OBJ_AVAIL" }, \
+ { OP_CB_RECALL_SLOT, "CB_RECALL_SLOT" }, \
+ { OP_CB_SEQUENCE, "CB_SEQUENCE" }, \
+ { OP_CB_WANTS_CANCELLED, "CB_WANTS_CANCELLED" }, \
+ { OP_CB_NOTIFY_LOCK, "CB_NOTIFY_LOCK" }, \
+ { OP_CB_NOTIFY_DEVICEID, "CB_NOTIFY_DEVICEID" }, \
+ { OP_CB_OFFLOAD, "CB_OFFLOAD" }, \
+ { OP_CB_ILLEGAL, "CB_ILLEGAL" })
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index d2b02710ab07..b2c1b683a88e 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -921,6 +921,8 @@ out_err:
* Caller provides the truncation length of the output token (h) in
* cksumout.len.
*
+ * Note that for RPCSEC, the "initial cipher state" is always all zeroes.
+ *
* Return values:
* %GSS_S_COMPLETE: Digest computed, @cksumout filled in
* %GSS_S_FAILURE: Call failed
@@ -931,22 +933,19 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
int body_offset, struct xdr_netobj *cksumout)
{
unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher);
+ static const u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
struct ahash_request *req;
struct scatterlist sg[1];
- u8 *iv, *checksumdata;
int err = -ENOMEM;
+ u8 *checksumdata;
checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL);
if (!checksumdata)
return GSS_S_FAILURE;
- /* For RPCSEC, the "initial cipher state" is always all zeroes. */
- iv = kzalloc(ivsize, GFP_KERNEL);
- if (!iv)
- goto out_free_mem;
req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
- goto out_free_mem;
+ goto out_free_cksumdata;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
err = crypto_ahash_init(req);
if (err)
@@ -970,8 +969,7 @@ u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
out_free_ahash:
ahash_request_free(req);
-out_free_mem:
- kfree(iv);
+out_free_cksumdata:
kfree_sensitive(checksumdata);
return err ? GSS_S_FAILURE : GSS_S_COMPLETE;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 64cff717c3d9..3366505bc669 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -398,6 +398,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
u64 seq_send64;
int keylen;
u32 time32;
+ int ret;
p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
if (IS_ERR(p))
@@ -450,8 +451,16 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
}
ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
- return gss_krb5_import_ctx_v2(ctx, gfp_mask);
+ ret = gss_krb5_import_ctx_v2(ctx, gfp_mask);
+ if (ret) {
+ p = ERR_PTR(ret);
+ goto out_free;
+ }
+ return 0;
+
+out_free:
+ kfree(ctx->mech_used.data);
out_err:
return PTR_ERR(p);
}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index d79f12c2550a..cb32ab9a8395 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -250,8 +250,8 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
creds = kzalloc(sizeof(struct svc_cred), GFP_KERNEL);
if (!creds) {
- kfree(oa->data);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto free_oa;
}
oa->data[0].option.data = CREDS_VALUE;
@@ -265,29 +265,40 @@ static int gssx_dec_option_array(struct xdr_stream *xdr,
/* option buffer */
p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- return -ENOSPC;
+ if (unlikely(p == NULL)) {
+ err = -ENOSPC;
+ goto free_creds;
+ }
length = be32_to_cpup(p);
p = xdr_inline_decode(xdr, length);
- if (unlikely(p == NULL))
- return -ENOSPC;
+ if (unlikely(p == NULL)) {
+ err = -ENOSPC;
+ goto free_creds;
+ }
if (length == sizeof(CREDS_VALUE) &&
memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
/* We have creds here. parse them */
err = gssx_dec_linux_creds(xdr, creds);
if (err)
- return err;
+ goto free_creds;
oa->data[0].value.len = 1; /* presence */
} else {
/* consume uninteresting buffer */
err = gssx_dec_buffer(xdr, &dummy);
if (err)
- return err;
+ goto free_creds;
}
}
return 0;
+
+free_creds:
+ kfree(creds);
+free_oa:
+ kfree(oa->data);
+ oa->data = NULL;
+ return err;
}
static int gssx_dec_status(struct xdr_stream *xdr,
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 65fc1297c6df..383860cb1d5b 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -314,7 +314,7 @@ EXPORT_SYMBOL_GPL(rpc_proc_unregister);
struct proc_dir_entry *
svc_proc_register(struct net *net, struct svc_stat *statp, const struct proc_ops *proc_ops)
{
- return do_register(net, statp->program->pg_name, statp, proc_ops);
+ return do_register(net, statp->program->pg_name, net, proc_ops);
}
EXPORT_SYMBOL_GPL(svc_proc_register);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b969e505c7b7..b33e429336fb 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -451,8 +451,8 @@ __svc_init_bc(struct svc_serv *serv)
* Create an RPC service
*/
static struct svc_serv *
-__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- int (*threadfn)(void *data))
+__svc_create(struct svc_program *prog, struct svc_stat *stats,
+ unsigned int bufsize, int npools, int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int vers;
@@ -463,7 +463,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
return NULL;
serv->sv_name = prog->pg_name;
serv->sv_program = prog;
- serv->sv_stats = prog->pg_stats;
+ serv->sv_stats = stats;
if (bufsize > RPCSVC_MAXPAYLOAD)
bufsize = RPCSVC_MAXPAYLOAD;
serv->sv_max_payload = bufsize? bufsize : 4096;
@@ -529,26 +529,28 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize,
int (*threadfn)(void *data))
{
- return __svc_create(prog, bufsize, 1, threadfn);
+ return __svc_create(prog, NULL, bufsize, 1, threadfn);
}
EXPORT_SYMBOL_GPL(svc_create);
/**
* svc_create_pooled - Create an RPC service with pooled threads
* @prog: the RPC program the new service will handle
+ * @stats: the stats struct if desired
* @bufsize: maximum message size for @prog
* @threadfn: a function to service RPC requests for @prog
*
* Returns an instantiated struct svc_serv object or NULL.
*/
struct svc_serv *svc_create_pooled(struct svc_program *prog,
+ struct svc_stat *stats,
unsigned int bufsize,
int (*threadfn)(void *data))
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
- serv = __svc_create(prog, bufsize, npools, threadfn);
+ serv = __svc_create(prog, stats, bufsize, npools, threadfn);
if (!serv)
goto out_err;
return serv;
@@ -1375,7 +1377,8 @@ svc_process_common(struct svc_rqst *rqstp)
goto err_bad_proc;
/* Syntactic check complete */
- serv->sv_stats->rpccnt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpccnt++;
trace_svc_process(rqstp, progp->pg_name);
aoffset = xdr_stream_pos(xdr);
@@ -1427,7 +1430,8 @@ err_short_len:
goto close_xprt;
err_bad_rpc:
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
xdr_stream_encode_u32(xdr, RPC_MISMATCH);
/* Only RPCv2 supported */
@@ -1438,7 +1442,8 @@ err_bad_rpc:
err_bad_auth:
dprintk("svc: authentication failed (%d)\n",
be32_to_cpu(rqstp->rq_auth_stat));
- serv->sv_stats->rpcbadauth++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadauth++;
/* Restore write pointer to location of reply status: */
xdr_truncate_encode(xdr, XDR_UNIT * 2);
xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
@@ -1448,7 +1453,8 @@ err_bad_auth:
err_bad_prog:
dprintk("svc: unknown program %d\n", rqstp->rq_prog);
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_prog_unavail;
goto sendit;
@@ -1456,7 +1462,8 @@ err_bad_vers:
svc_printk(rqstp, "unknown version (%d for prog %d, %s)\n",
rqstp->rq_vers, rqstp->rq_prog, progp->pg_name);
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_prog_mismatch;
/*
@@ -1470,19 +1477,22 @@ err_bad_vers:
err_bad_proc:
svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc);
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_proc_unavail;
goto sendit;
err_garbage_args:
svc_printk(rqstp, "failed to decode RPC header\n");
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_garbage_args;
goto sendit;
err_system_err:
- serv->sv_stats->rpcbadfmt++;
+ if (serv->sv_stats)
+ serv->sv_stats->rpcbadfmt++;
*rqstp->rq_accept_statp = rpc_system_err;
goto sendit;
}
@@ -1534,7 +1544,8 @@ void svc_process(struct svc_rqst *rqstp)
out_baddir:
svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
be32_to_cpu(*p));
- rqstp->rq_server->sv_stats->rpcbadfmt++;
+ if (rqstp->rq_server->sv_stats)
+ rqstp->rq_server->sv_stats->rpcbadfmt++;
out_drop:
svc_drop(rqstp);
}
@@ -1612,7 +1623,6 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
WARN_ON_ONCE(atomic_read(&task->tk_count) != 1);
rpc_put_task(task);
}
-EXPORT_SYMBOL_GPL(svc_process_bc);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
/**
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index c9be6778643b..e5a78b761012 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -90,7 +90,7 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
*/
get_page(virt_to_page(rqst->rq_buffer));
sctxt->sc_send_wr.opcode = IB_WR_SEND;
- return svc_rdma_send(rdma, sctxt);
+ return svc_rdma_post_send(rdma, sctxt);
}
/* Server-side transport endpoint wants a whole page for its send
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index c00fcce61d1e..f2a100c4c81f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -197,28 +197,6 @@ void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
llist_add_batch(first, last, &rdma->sc_rw_ctxts);
}
-/* State for sending a Write or Reply chunk.
- * - Tracks progress of writing one chunk over all its segments
- * - Stores arguments for the SGL constructor functions
- */
-struct svc_rdma_write_info {
- struct svcxprt_rdma *wi_rdma;
-
- const struct svc_rdma_chunk *wi_chunk;
-
- /* write state of this chunk */
- unsigned int wi_seg_off;
- unsigned int wi_seg_no;
-
- /* SGL constructor arguments */
- const struct xdr_buf *wi_xdr;
- unsigned char *wi_base;
- unsigned int wi_next_off;
-
- struct svc_rdma_chunk_ctxt wi_cc;
- struct work_struct wi_work;
-};
-
static struct svc_rdma_write_info *
svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
const struct svc_rdma_chunk *chunk)
@@ -253,6 +231,71 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
}
/**
+ * svc_rdma_write_chunk_release - Release Write chunk I/O resources
+ * @rdma: controlling transport
+ * @ctxt: Send context that is being released
+ */
+void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ struct svc_rdma_write_info *info;
+ struct svc_rdma_chunk_ctxt *cc;
+
+ while (!list_empty(&ctxt->sc_write_info_list)) {
+ info = list_first_entry(&ctxt->sc_write_info_list,
+ struct svc_rdma_write_info, wi_list);
+ list_del(&info->wi_list);
+
+ cc = &info->wi_cc;
+ svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
+ svc_rdma_write_info_free(info);
+ }
+}
+
+/**
+ * svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
+ * @rdma: controlling transport
+ * @ctxt: Send context that is being released
+ */
+void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
+{
+ struct svc_rdma_chunk_ctxt *cc = &ctxt->sc_reply_info.wi_cc;
+
+ if (!cc->cc_sqecount)
+ return;
+ svc_rdma_cc_release(rdma, cc, DMA_TO_DEVICE);
+}
+
+/**
+ * svc_rdma_reply_done - Reply chunk Write completion handler
+ * @cq: controlling Completion Queue
+ * @wc: Work Completion report
+ *
+ * Pages under I/O are released by a subsequent Send completion.
+ */
+static void svc_rdma_reply_done(struct ib_cq *cq, struct ib_wc *wc)
+{
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct svc_rdma_chunk_ctxt *cc =
+ container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
+ struct svcxprt_rdma *rdma = cq->cq_context;
+
+ switch (wc->status) {
+ case IB_WC_SUCCESS:
+ trace_svcrdma_wc_reply(&cc->cc_cid);
+ return;
+ case IB_WC_WR_FLUSH_ERR:
+ trace_svcrdma_wc_reply_flush(wc, &cc->cc_cid);
+ break;
+ default:
+ trace_svcrdma_wc_reply_err(wc, &cc->cc_cid);
+ }
+
+ svc_xprt_deferred_close(&rdma->sc_xprt);
+}
+
+/**
* svc_rdma_write_done - Write chunk completion
* @cq: controlling Completion Queue
* @wc: Work Completion
@@ -265,13 +308,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct svc_rdma_chunk_ctxt *cc =
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
- struct svc_rdma_write_info *info =
- container_of(cc, struct svc_rdma_write_info, wi_cc);
switch (wc->status) {
case IB_WC_SUCCESS:
trace_svcrdma_wc_write(&cc->cc_cid);
- break;
+ return;
case IB_WC_WR_FLUSH_ERR:
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
break;
@@ -279,12 +320,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
}
- svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
-
- if (unlikely(wc->status != IB_WC_SUCCESS))
- svc_xprt_deferred_close(&rdma->sc_xprt);
-
- svc_rdma_write_info_free(info);
+ /* The RDMA Write has flushed, so the client won't get
+ * some of the outgoing RPC message. Signal the loss
+ * to the client by closing the connection.
+ */
+ svc_xprt_deferred_close(&rdma->sc_xprt);
}
/**
@@ -580,41 +620,54 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
return xdr->len;
}
-/**
- * svc_rdma_send_write_chunk - Write all segments in a Write chunk
- * @rdma: controlling RDMA transport
- * @chunk: Write chunk provided by the client
- * @xdr: xdr_buf containing the data payload
- *
- * Returns a non-negative number of bytes the chunk consumed, or
- * %-E2BIG if the payload was larger than the Write chunk,
- * %-EINVAL if client provided too many segments,
- * %-ENOMEM if rdma_rw context pool was exhausted,
- * %-ENOTCONN if posting failed (connection is lost),
- * %-EIO if rdma_rw initialization failed (DMA mapping, etc).
+/* Link Write WRs for @chunk onto @sctxt's WR chain.
*/
-int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_chunk *chunk,
- const struct xdr_buf *xdr)
+static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct svc_rdma_chunk *chunk,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
struct svc_rdma_chunk_ctxt *cc;
+ struct ib_send_wr *first_wr;
+ struct xdr_buf payload;
+ struct list_head *pos;
+ struct ib_cqe *cqe;
int ret;
+ if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
+ chunk->ch_payload_length))
+ return -EMSGSIZE;
+
info = svc_rdma_write_info_alloc(rdma, chunk);
if (!info)
return -ENOMEM;
cc = &info->wi_cc;
- ret = svc_rdma_xb_write(xdr, info);
- if (ret != xdr->len)
+ ret = svc_rdma_xb_write(&payload, info);
+ if (ret != payload.len)
goto out_err;
- trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(rdma, cc);
- if (ret < 0)
+ ret = -EINVAL;
+ if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth))
goto out_err;
- return xdr->len;
+
+ first_wr = sctxt->sc_wr_chain;
+ cqe = &cc->cc_cqe;
+ list_for_each(pos, &cc->cc_rwctxts) {
+ struct svc_rdma_rw_ctxt *rwc;
+
+ rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
+ first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
+ rdma->sc_port_num, cqe, first_wr);
+ cqe = NULL;
+ }
+ sctxt->sc_wr_chain = first_wr;
+ sctxt->sc_sqecount += cc->cc_sqecount;
+ list_add(&info->wi_list, &sctxt->sc_write_info_list);
+
+ trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
+ return 0;
out_err:
svc_rdma_write_info_free(info);
@@ -622,9 +675,39 @@ out_err:
}
/**
- * svc_rdma_send_reply_chunk - Write all segments in the Reply chunk
+ * svc_rdma_prepare_write_list - Construct WR chain for sending Write list
* @rdma: controlling RDMA transport
- * @rctxt: Write and Reply chunks from client
+ * @write_pcl: Write list provisioned by the client
+ * @sctxt: Send WR resources
+ * @xdr: xdr_buf containing an RPC Reply message
+ *
+ * Returns zero on success, or a negative errno if one or more
+ * Write chunks could not be sent.
+ */
+int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr)
+{
+ struct svc_rdma_chunk *chunk;
+ int ret;
+
+ pcl_for_each_chunk(chunk, write_pcl) {
+ if (!chunk->ch_payload_length)
+ break;
+ ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
+ if (ret < 0)
+ return ret;
+ }
+ return 0;
+}
+
+/**
+ * svc_rdma_prepare_reply_chunk - Construct WR chain for writing the Reply chunk
+ * @rdma: controlling RDMA transport
+ * @write_pcl: Write chunk list provided by client
+ * @reply_pcl: Reply chunk provided by client
+ * @sctxt: Send WR resources
* @xdr: xdr_buf containing an RPC Reply
*
* Returns a non-negative number of bytes the chunk consumed, or
@@ -634,39 +717,45 @@ out_err:
* %-ENOTCONN if posting failed (connection is lost),
* %-EIO if rdma_rw initialization failed (DMA mapping, etc).
*/
-int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
- const struct svc_rdma_recv_ctxt *rctxt,
- const struct xdr_buf *xdr)
+int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_pcl *write_pcl,
+ const struct svc_rdma_pcl *reply_pcl,
+ struct svc_rdma_send_ctxt *sctxt,
+ const struct xdr_buf *xdr)
{
- struct svc_rdma_write_info *info;
- struct svc_rdma_chunk_ctxt *cc;
- struct svc_rdma_chunk *chunk;
+ struct svc_rdma_write_info *info = &sctxt->sc_reply_info;
+ struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
+ struct ib_send_wr *first_wr;
+ struct list_head *pos;
+ struct ib_cqe *cqe;
int ret;
- if (pcl_is_empty(&rctxt->rc_reply_pcl))
- return 0;
-
- chunk = pcl_first_chunk(&rctxt->rc_reply_pcl);
- info = svc_rdma_write_info_alloc(rdma, chunk);
- if (!info)
- return -ENOMEM;
- cc = &info->wi_cc;
+ info->wi_rdma = rdma;
+ info->wi_chunk = pcl_first_chunk(reply_pcl);
+ info->wi_seg_off = 0;
+ info->wi_seg_no = 0;
+ info->wi_cc.cc_cqe.done = svc_rdma_reply_done;
- ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ ret = pcl_process_nonpayloads(write_pcl, xdr,
svc_rdma_xb_write, info);
if (ret < 0)
- goto out_err;
+ return ret;
- trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
- ret = svc_rdma_post_chunk_ctxt(rdma, cc);
- if (ret < 0)
- goto out_err;
+ first_wr = sctxt->sc_wr_chain;
+ cqe = &cc->cc_cqe;
+ list_for_each(pos, &cc->cc_rwctxts) {
+ struct svc_rdma_rw_ctxt *rwc;
- return xdr->len;
+ rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
+ first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
+ rdma->sc_port_num, cqe, first_wr);
+ cqe = NULL;
+ }
+ sctxt->sc_wr_chain = first_wr;
+ sctxt->sc_sqecount += cc->cc_sqecount;
-out_err:
- svc_rdma_write_info_free(info);
- return ret;
+ trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
+ return xdr->len;
}
/**
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 1a49b7f02041..dfca39abd16c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -142,6 +142,7 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
ctxt->sc_cqe.done = svc_rdma_wc_send;
+ INIT_LIST_HEAD(&ctxt->sc_write_info_list);
ctxt->sc_xprt_buf = buffer;
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
rdma->sc_max_req_size);
@@ -205,9 +206,13 @@ out:
xdr_init_encode(&ctxt->sc_stream, &ctxt->sc_hdrbuf,
ctxt->sc_xprt_buf, NULL);
+ svc_rdma_cc_init(rdma, &ctxt->sc_reply_info.wi_cc);
ctxt->sc_send_wr.num_sge = 0;
ctxt->sc_cur_sge_no = 0;
ctxt->sc_page_count = 0;
+ ctxt->sc_wr_chain = &ctxt->sc_send_wr;
+ ctxt->sc_sqecount = 1;
+
return ctxt;
out_empty:
@@ -223,6 +228,9 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
struct ib_device *device = rdma->sc_cm_id->device;
unsigned int i;
+ svc_rdma_write_chunk_release(rdma, ctxt);
+ svc_rdma_reply_chunk_release(rdma, ctxt);
+
if (ctxt->sc_page_count)
release_pages(ctxt->sc_pages, ctxt->sc_page_count);
@@ -293,7 +301,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct svc_rdma_send_ctxt *ctxt =
container_of(cqe, struct svc_rdma_send_ctxt, sc_cqe);
- svc_rdma_wake_send_waiters(rdma, 1);
+ svc_rdma_wake_send_waiters(rdma, ctxt->sc_sqecount);
if (unlikely(wc->status != IB_WC_SUCCESS))
goto flushed;
@@ -312,51 +320,76 @@ flushed:
}
/**
- * svc_rdma_send - Post a single Send WR
- * @rdma: transport on which to post the WR
- * @ctxt: send ctxt with a Send WR ready to post
+ * svc_rdma_post_send - Post a WR chain to the Send Queue
+ * @rdma: transport context
+ * @ctxt: WR chain to post
+ *
+ * Copy fields in @ctxt to stack variables in order to guarantee
+ * that these values remain available after the ib_post_send() call.
+ * In some error flow cases, svc_rdma_wc_send() releases @ctxt.
+ *
+ * Note there is potential for starvation when the Send Queue is
+ * full because there is no order to when waiting threads are
+ * awoken. The transport is typically provisioned with a deep
+ * enough Send Queue that SQ exhaustion should be a rare event.
*
- * Returns zero if the Send WR was posted successfully. Otherwise, a
- * negative errno is returned.
+ * Return values:
+ * %0: @ctxt's WR chain was posted successfully
+ * %-ENOTCONN: The connection was lost
*/
-int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
+int svc_rdma_post_send(struct svcxprt_rdma *rdma,
+ struct svc_rdma_send_ctxt *ctxt)
{
- struct ib_send_wr *wr = &ctxt->sc_send_wr;
- int ret;
+ struct ib_send_wr *first_wr = ctxt->sc_wr_chain;
+ struct ib_send_wr *send_wr = &ctxt->sc_send_wr;
+ const struct ib_send_wr *bad_wr = first_wr;
+ struct rpc_rdma_cid cid = ctxt->sc_cid;
+ int ret, sqecount = ctxt->sc_sqecount;
might_sleep();
/* Sync the transport header buffer */
ib_dma_sync_single_for_device(rdma->sc_pd->device,
- wr->sg_list[0].addr,
- wr->sg_list[0].length,
+ send_wr->sg_list[0].addr,
+ send_wr->sg_list[0].length,
DMA_TO_DEVICE);
/* If the SQ is full, wait until an SQ entry is available */
- while (1) {
- if ((atomic_dec_return(&rdma->sc_sq_avail) < 0)) {
+ while (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags)) {
+ if (atomic_sub_return(sqecount, &rdma->sc_sq_avail) < 0) {
+ svc_rdma_wake_send_waiters(rdma, sqecount);
+
+ /* When the transport is torn down, assume
+ * ib_drain_sq() will trigger enough Send
+ * completions to wake us. The XPT_CLOSE test
+ * above should then cause the while loop to
+ * exit.
+ */
percpu_counter_inc(&svcrdma_stat_sq_starve);
- trace_svcrdma_sq_full(rdma, &ctxt->sc_cid);
- atomic_inc(&rdma->sc_sq_avail);
+ trace_svcrdma_sq_full(rdma, &cid);
wait_event(rdma->sc_send_wait,
- atomic_read(&rdma->sc_sq_avail) > 1);
- if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
- return -ENOTCONN;
- trace_svcrdma_sq_retry(rdma, &ctxt->sc_cid);
+ atomic_read(&rdma->sc_sq_avail) > 0);
+ trace_svcrdma_sq_retry(rdma, &cid);
continue;
}
trace_svcrdma_post_send(ctxt);
- ret = ib_post_send(rdma->sc_qp, wr, NULL);
- if (ret)
- break;
+ ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
+ if (ret) {
+ trace_svcrdma_sq_post_err(rdma, &cid, ret);
+ svc_xprt_deferred_close(&rdma->sc_xprt);
+
+ /* If even one WR was posted, there will be a
+ * Send completion that bumps sc_sq_avail.
+ */
+ if (bad_wr == first_wr) {
+ svc_rdma_wake_send_waiters(rdma, sqecount);
+ break;
+ }
+ }
return 0;
}
-
- trace_svcrdma_sq_post_err(rdma, &ctxt->sc_cid, ret);
- svc_xprt_deferred_close(&rdma->sc_xprt);
- wake_up(&rdma->sc_send_wait);
- return ret;
+ return -ENOTCONN;
}
/**
@@ -839,16 +872,10 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
* in sc_sges[0], and the RPC xdr_buf is prepared in following sges.
*
* Depending on whether a Write list or Reply chunk is present,
- * the server may send all, a portion of, or none of the xdr_buf.
+ * the server may Send all, a portion of, or none of the xdr_buf.
* In the latter case, only the transport header (sc_sges[0]) is
* transmitted.
*
- * RDMA Send is the last step of transmitting an RPC reply. Pages
- * involved in the earlier RDMA Writes are here transferred out
- * of the rqstp and into the sctxt's page array. These pages are
- * DMA unmapped by each Write completion, but the subsequent Send
- * completion finally releases these pages.
- *
* Assumptions:
* - The Reply's transport header will never be larger than a page.
*/
@@ -857,6 +884,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
const struct svc_rdma_recv_ctxt *rctxt,
struct svc_rqst *rqstp)
{
+ struct ib_send_wr *send_wr = &sctxt->sc_send_wr;
int ret;
ret = svc_rdma_map_reply_msg(rdma, sctxt, &rctxt->rc_write_pcl,
@@ -864,16 +892,19 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
if (ret < 0)
return ret;
+ /* Transfer pages involved in RDMA Writes to the sctxt's
+ * page array. Completion handling releases these pages.
+ */
svc_rdma_save_io_pages(rqstp, sctxt);
if (rctxt->rc_inv_rkey) {
- sctxt->sc_send_wr.opcode = IB_WR_SEND_WITH_INV;
- sctxt->sc_send_wr.ex.invalidate_rkey = rctxt->rc_inv_rkey;
+ send_wr->opcode = IB_WR_SEND_WITH_INV;
+ send_wr->ex.invalidate_rkey = rctxt->rc_inv_rkey;
} else {
- sctxt->sc_send_wr.opcode = IB_WR_SEND;
+ send_wr->opcode = IB_WR_SEND;
}
- return svc_rdma_send(rdma, sctxt);
+ return svc_rdma_post_send(rdma, sctxt);
}
/**
@@ -937,7 +968,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
sctxt->sc_send_wr.num_sge = 1;
sctxt->sc_send_wr.opcode = IB_WR_SEND;
sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len;
- if (svc_rdma_send(rdma, sctxt))
+ if (svc_rdma_post_send(rdma, sctxt))
goto put_ctxt;
return;
@@ -984,10 +1015,20 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
if (!p)
goto put_ctxt;
- ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res);
+ ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt,
+ &rqstp->rq_res);
if (ret < 0)
- goto reply_chunk;
- rc_size = ret;
+ goto put_ctxt;
+
+ rc_size = 0;
+ if (!pcl_is_empty(&rctxt->rc_reply_pcl)) {
+ ret = svc_rdma_prepare_reply_chunk(rdma, &rctxt->rc_write_pcl,
+ &rctxt->rc_reply_pcl, sctxt,
+ &rqstp->rq_res);
+ if (ret < 0)
+ goto reply_chunk;
+ rc_size = ret;
+ }
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
@@ -1030,45 +1071,33 @@ drop_connection:
/**
* svc_rdma_result_payload - special processing for a result payload
- * @rqstp: svc_rqst to operate on
- * @offset: payload's byte offset in @xdr
+ * @rqstp: RPC transaction context
+ * @offset: payload's byte offset in @rqstp->rq_res
* @length: size of payload, in bytes
*
+ * Assign the passed-in result payload to the current Write chunk,
+ * and advance to cur_result_payload to the next Write chunk, if
+ * there is one.
+ *
* Return values:
* %0 if successful or nothing needed to be done
- * %-EMSGSIZE on XDR buffer overflow
* %-E2BIG if the payload was larger than the Write chunk
- * %-EINVAL if client provided too many segments
- * %-ENOMEM if rdma_rw context pool was exhausted
- * %-ENOTCONN if posting failed (connection is lost)
- * %-EIO if rdma_rw initialization failed (DMA mapping, etc)
*/
int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
unsigned int length)
{
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
struct svc_rdma_chunk *chunk;
- struct svcxprt_rdma *rdma;
- struct xdr_buf subbuf;
- int ret;
chunk = rctxt->rc_cur_result_payload;
if (!length || !chunk)
return 0;
rctxt->rc_cur_result_payload =
pcl_next_chunk(&rctxt->rc_write_pcl, chunk);
+
if (length > chunk->ch_length)
return -E2BIG;
-
chunk->ch_position = offset;
chunk->ch_payload_length = length;
-
- if (xdr_buf_subsegment(&rqstp->rq_res, &subbuf, offset, length))
- return -EMSGSIZE;
-
- rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
- ret = svc_rdma_send_write_chunk(rdma, chunk, &subbuf);
- if (ret < 0)
- return ret;
return 0;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 4f27325ace4a..2b1c16b9547d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -415,15 +415,20 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
if (newxprt->sc_max_send_sges > dev->attrs.max_send_sge)
newxprt->sc_max_send_sges = dev->attrs.max_send_sge;
rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests +
- newxprt->sc_recv_batch;
+ newxprt->sc_recv_batch + 1 /* drain */;
if (rq_depth > dev->attrs.max_qp_wr) {
rq_depth = dev->attrs.max_qp_wr;
newxprt->sc_recv_batch = 1;
newxprt->sc_max_requests = rq_depth - 2;
newxprt->sc_max_bc_requests = 2;
}
- ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
- ctxts *= newxprt->sc_max_requests;
+
+ /* Arbitrarily estimate the number of rw_ctxs needed for
+ * this transport. This is enough rw_ctxs to make forward
+ * progress even if the client is using one rkey per page
+ * in each Read chunk.
+ */
+ ctxts = 3 * RPCSVC_MAXPAGES;
newxprt->sc_sq_depth = rq_depth + ctxts;
if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr)
newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
@@ -460,12 +465,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.cap.max_send_wr, qp_attr.cap.max_recv_wr);
dprintk(" cap.max_send_sge = %d, cap.max_recv_sge = %d\n",
qp_attr.cap.max_send_sge, qp_attr.cap.max_recv_sge);
-
+ dprintk(" send CQ depth = %u, recv CQ depth = %u\n",
+ newxprt->sc_sq_depth, rq_depth);
ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr);
if (ret) {
trace_svcrdma_qp_err(newxprt, ret);
goto errout;
}
+ newxprt->sc_max_send_sges = qp_attr.cap.max_send_sge;
newxprt->sc_qp = newxprt->sc_cm_id->qp;
if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS))
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 58f3dc8d0d71..d92c13e78a56 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2987,20 +2987,11 @@ static int bc_send_request(struct rpc_rqst *req)
return len;
}
-/*
- * The close routine. Since this is client initiated, we do nothing
- */
-
static void bc_close(struct rpc_xprt *xprt)
{
xprt_disconnect_done(xprt);
}
-/*
- * The xprt destroy routine. Again, because this connection is client
- * initiated, we do nothing
- */
-
static void bc_destroy(struct rpc_xprt *xprt)
{
dprintk("RPC: bc_destroy xprt %p\n", xprt);