From 9eb190fca8f9056ea4502526dc55fe52318d9afc Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 20 Jul 2018 18:19:17 -0400 Subject: NFSD CB_OFFLOAD xdr Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4callback.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/state.h | 1 + fs/nfsd/xdr4.h | 6 ++++ fs/nfsd/xdr4cb.h | 10 ++++++ 4 files changed, 115 insertions(+) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 601bf33c26a0..25987bcdf96f 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -39,6 +39,7 @@ #include "state.h" #include "netns.h" #include "xdr4cb.h" +#include "xdr4.h" #define NFSDDBG_FACILITY NFSDDBG_PROC @@ -105,6 +106,7 @@ enum nfs_cb_opnum4 { OP_CB_WANTS_CANCELLED = 12, OP_CB_NOTIFY_LOCK = 13, OP_CB_NOTIFY_DEVICEID = 14, + OP_CB_OFFLOAD = 15, OP_CB_ILLEGAL = 10044 }; @@ -682,6 +684,101 @@ static int nfs4_xdr_dec_cb_notify_lock(struct rpc_rqst *rqstp, return decode_cb_op_status(xdr, OP_CB_NOTIFY_LOCK, &cb->cb_status); } +/* + * struct write_response4 { + * stateid4 wr_callback_id<1>; + * length4 wr_count; + * stable_how4 wr_committed; + * verifier4 wr_writeverf; + * }; + * union offload_info4 switch (nfsstat4 coa_status) { + * case NFS4_OK: + * write_response4 coa_resok4; + * default: + * length4 coa_bytes_copied; + * }; + * struct CB_OFFLOAD4args { + * nfs_fh4 coa_fh; + * stateid4 coa_stateid; + * offload_info4 coa_offload_info; + * }; + */ +static void encode_offload_info4(struct xdr_stream *xdr, + __be32 nfserr, + const struct nfsd4_copy *cp) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 4); + *p++ = nfserr; + if (!nfserr) { + p = xdr_reserve_space(xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE); + p = xdr_encode_empty_array(p); + p = xdr_encode_hyper(p, cp->cp_res.wr_bytes_written); + *p++ = cpu_to_be32(cp->cp_res.wr_stable_how); + p = xdr_encode_opaque_fixed(p, cp->cp_res.wr_verifier.data, + NFS4_VERIFIER_SIZE); + } else { + p = xdr_reserve_space(xdr, 8); + /* We always return success if bytes were written */ + p = xdr_encode_hyper(p, 0); + } +} + +static void encode_cb_offload4args(struct xdr_stream *xdr, + __be32 nfserr, + const struct knfsd_fh *fh, + const struct nfsd4_copy *cp, + struct nfs4_cb_compound_hdr *hdr) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 4); + *p++ = cpu_to_be32(OP_CB_OFFLOAD); + encode_nfs_fh4(xdr, fh); + encode_stateid4(xdr, &cp->cp_res.cb_stateid); + encode_offload_info4(xdr, nfserr, cp); + + hdr->nops++; +} + +static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, + struct xdr_stream *xdr, + const void *data) +{ + const struct nfsd4_callback *cb = data; + const struct nfsd4_copy *cp = + container_of(cb, struct nfsd4_copy, cp_cb); + struct nfs4_cb_compound_hdr hdr = { + .ident = 0, + .minorversion = cb->cb_clp->cl_minorversion, + }; + + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_offload4args(xdr, cp->nfserr, &cp->fh, cp, &hdr); + encode_cb_nops(&hdr); +} + +static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfsd4_callback *cb = data; + struct nfs4_cb_compound_hdr hdr; + int status; + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + return status; + + if (cb) { + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + } + return decode_cb_op_status(xdr, OP_CB_OFFLOAD, &cb->cb_status); +} /* * RPC procedure tables */ @@ -703,6 +800,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { PROC(CB_LAYOUT, COMPOUND, cb_layout, cb_layout), #endif PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), + PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), }; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 0b15dac7e609..6e38d9927448 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -573,6 +573,7 @@ enum nfsd4_cb_op { NFSPROC4_CLNT_CB_NULL = 0, NFSPROC4_CLNT_CB_RECALL, NFSPROC4_CLNT_CB_LAYOUT, + NFSPROC4_CLNT_CB_OFFLOAD, NFSPROC4_CLNT_CB_SEQUENCE, NFSPROC4_CLNT_CB_NOTIFY_LOCK, }; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 17c453a7999c..b7c34f4a1222 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -511,6 +511,7 @@ struct nfsd42_write_res { u64 wr_bytes_written; u32 wr_stable_how; nfs4_verifier wr_verifier; + stateid_t cb_stateid; }; struct nfsd4_copy { @@ -526,6 +527,11 @@ struct nfsd4_copy { /* response */ struct nfsd42_write_res cp_res; + + /* for cb_offload */ + struct nfsd4_callback cp_cb; + __be32 nfserr; + struct knfsd_fh fh; }; struct nfsd4_seek { diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index 517239af0302..547cf07cf4e0 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -38,3 +38,13 @@ #define NFS4_dec_cb_notify_lock_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) +#define enc_cb_offload_info_sz (1 + 1 + 2 + 1 + \ + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) +#define NFS4_enc_cb_offload_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + enc_nfs4_fh_sz + \ + enc_stateid_sz + \ + enc_cb_offload_info_sz) +#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + \ + op_dec_sz) -- cgit v1.2.3 From 6308bc98e86ee8c7bbd56a39839a257a16c9378c Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 20 Jul 2018 18:19:18 -0400 Subject: NFSD OFFLOAD_STATUS xdr Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 20 ++++++++++++++++++++ fs/nfsd/nfs4xdr.c | 27 +++++++++++++++++++++++++-- fs/nfsd/xdr4.h | 10 ++++++++++ 3 files changed, 55 insertions(+), 2 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index b7bc6e1a85ac..aa05aa3947e0 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1144,6 +1144,13 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, fput(file); return status; } +static __be32 +nfsd4_offload_status(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + return nfserr_notsupp; +} static __be32 nfsd4_allocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -2047,6 +2054,14 @@ static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) 1 /* cr_synchronous */) * sizeof(__be32); } +static inline u32 nfsd4_offload_status_rsize(struct svc_rqst *rqstp, + struct nfsd4_op *op) +{ + return (op_encode_hdr_size + + 2 /* osr_count */ + + 1 /* osr_complete<1> optional 0 for now */) * sizeof(__be32); +} + #ifdef CONFIG_NFSD_PNFS static inline u32 nfsd4_getdeviceinfo_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op) { @@ -2460,6 +2475,11 @@ static const struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_SEEK", .op_rsize_bop = nfsd4_seek_rsize, }, + [OP_OFFLOAD_STATUS] = { + .op_func = nfsd4_offload_status, + .op_name = "OP_OFFLOAD_STATUS", + .op_rsize_bop = nfsd4_offload_status_rsize, + }, }; /** diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 418fa9c78186..67b7e6f19ed6 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1767,6 +1767,13 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy) DECODE_TAIL; } +static __be32 +nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp, + struct nfsd4_offload_status *os) +{ + return nfsd4_decode_stateid(argp, &os->stateid); +} + static __be32 nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek) { @@ -1874,7 +1881,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp, @@ -4256,6 +4263,22 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, return 0; } +static __be32 +nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr, + struct nfsd4_offload_status *os) +{ + struct xdr_stream *xdr = &resp->xdr; + __be32 *p; + + p = xdr_reserve_space(xdr, 8 + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_hyper(p, os->count); + *p++ = cpu_to_be32(0); + + return nfserr; +} + static __be32 nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_seek *seek) @@ -4359,7 +4382,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = { [OP_LAYOUTERROR] = (nfsd4_enc)nfsd4_encode_noop, [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop, [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop, - [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop, + [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status, [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop, [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek, [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop, diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index b7c34f4a1222..06cf218944c5 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -545,6 +545,15 @@ struct nfsd4_seek { loff_t seek_pos; }; +struct nfsd4_offload_status { + /* request */ + stateid_t stateid; + + /* response */ + u64 count; + u32 status; +}; + struct nfsd4_op { int opnum; const struct nfsd4_operation * opdesc; @@ -603,6 +612,7 @@ struct nfsd4_op { struct nfsd4_fallocate deallocate; struct nfsd4_clone clone; struct nfsd4_copy copy; + struct nfsd4_offload_status offload_status; struct nfsd4_seek seek; } u; struct nfs4_replay * replay; -- cgit v1.2.3 From 885e2bf3ea5121975ade0d7866ab6226a8547dc9 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 20 Jul 2018 18:19:19 -0400 Subject: NFSD OFFLOAD_CANCEL xdr Signed-off-by: Olga Kornievskaia Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 14 ++++++++++++++ fs/nfsd/nfs4xdr.c | 2 +- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index aa05aa3947e0..0c7832321010 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1122,6 +1122,14 @@ out: return status; } +static __be32 +nfsd4_offload_cancel(struct svc_rqst *rqstp, + struct nfsd4_compound_state *cstate, + union nfsd4_op_u *u) +{ + return 0; +} + static __be32 nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_fallocate *fallocate, int flags) @@ -2480,6 +2488,12 @@ static const struct nfsd4_operation nfsd4_ops[] = { .op_name = "OP_OFFLOAD_STATUS", .op_rsize_bop = nfsd4_offload_status_rsize, }, + [OP_OFFLOAD_CANCEL] = { + .op_func = nfsd4_offload_cancel, + .op_flags = OP_MODIFIES_SOMETHING, + .op_name = "OP_OFFLOAD_CANCEL", + .op_rsize_bop = nfsd4_only_status_rsize, + }, }; /** diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 67b7e6f19ed6..b78280a8af73 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1880,7 +1880,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = { [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_LAYOUTERROR] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status, [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status, [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek, -- cgit v1.2.3 From e0639dc5805a9d4faaa2c07ad98fa853b9529dd3 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 20 Jul 2018 18:19:20 -0400 Subject: NFSD introduce async copy feature Upon receiving a request for async copy, create a new kthread. If we get asynchronous request, make sure to copy the needed arguments/state from the stack before starting the copy. Then start the thread and reply back to the client indicating copy is asynchronous. nfsd_copy_file_range() will copy in a loop over the total number of bytes is needed to copy. In case a failure happens in the middle, we ignore the error and return how much we copied so far. Once done creating a workitem for the callback workqueue and send CB_OFFLOAD with the results. The lifetime of the copy stateid is bound to the vfs copy. This way we don't need to keep the nfsd_net structure for the callback. We could keep it around longer so that an OFFLOAD_STATUS that came late would still get results, but clients should be able to deal without that. We handle OFFLOAD_CANCEL by sending a signal to the copy thread and calling kthread_stop. A client should cancel any ongoing copies before calling DESTROY_CLIENT; if not, we return a CLIENT_BUSY error. If the client is destroyed for some other reason (lease expiration, or server shutdown), we must clean up any ongoing copies ourselves. Signed-off-by: Olga Kornievskaia [colin.king@canonical.com: fix leak in error case] [bfields@fieldses.org: remove signalling, merge patches] Signed-off-by: J. Bruce Fields --- fs/nfsd/netns.h | 8 ++ fs/nfsd/nfs4proc.c | 261 ++++++++++++++++++++++++++++++++++++++++++++++++---- fs/nfsd/nfs4state.c | 38 +++++++- fs/nfsd/nfs4xdr.c | 21 ++++- fs/nfsd/nfsctl.c | 1 + fs/nfsd/state.h | 9 ++ fs/nfsd/xdr4.h | 12 +++ 7 files changed, 326 insertions(+), 24 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 426f55005697..32cb8c027483 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -123,6 +123,14 @@ struct nfsd_net { wait_queue_head_t ntf_wq; atomic_t ntf_refcnt; + + /* + * clientid and stateid data for construction of net unique COPY + * stateids. + */ + u32 s2s_cp_cl_id; + struct idr s2s_cp_stateids; + spinlock_t s2s_cp_lock; }; /* Simple check to find out if a given net was properly initialized */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 0c7832321010..edff074d38c7 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "idmap.h" #include "cache.h" @@ -1089,37 +1090,236 @@ out: return status; } +void nfs4_put_copy(struct nfsd4_copy *copy) +{ + if (!refcount_dec_and_test(©->refcount)) + return; + kfree(copy); +} + +static bool +check_and_set_stop_copy(struct nfsd4_copy *copy) +{ + bool value; + + spin_lock(©->cp_clp->async_lock); + value = copy->stopped; + if (!copy->stopped) + copy->stopped = true; + spin_unlock(©->cp_clp->async_lock); + return value; +} + +static void nfsd4_stop_copy(struct nfsd4_copy *copy) +{ + /* only 1 thread should stop the copy */ + if (!check_and_set_stop_copy(copy)) + kthread_stop(copy->copy_task); + nfs4_put_copy(copy); +} + +static struct nfsd4_copy *nfsd4_get_copy(struct nfs4_client *clp) +{ + struct nfsd4_copy *copy = NULL; + + spin_lock(&clp->async_lock); + if (!list_empty(&clp->async_copies)) { + copy = list_first_entry(&clp->async_copies, struct nfsd4_copy, + copies); + refcount_inc(©->refcount); + } + spin_unlock(&clp->async_lock); + return copy; +} + +void nfsd4_shutdown_copy(struct nfs4_client *clp) +{ + struct nfsd4_copy *copy; + + while ((copy = nfsd4_get_copy(clp)) != NULL) + nfsd4_stop_copy(copy); +} + +static void nfsd4_cb_offload_release(struct nfsd4_callback *cb) +{ + struct nfsd4_copy *copy = container_of(cb, struct nfsd4_copy, cp_cb); + + nfs4_put_copy(copy); +} + +static int nfsd4_cb_offload_done(struct nfsd4_callback *cb, + struct rpc_task *task) +{ + return 1; +} + +static const struct nfsd4_callback_ops nfsd4_cb_offload_ops = { + .release = nfsd4_cb_offload_release, + .done = nfsd4_cb_offload_done +}; + +static void nfsd4_init_copy_res(struct nfsd4_copy *copy, bool sync) +{ + copy->cp_res.wr_stable_how = NFS_UNSTABLE; + copy->cp_synchronous = sync; + gen_boot_verifier(©->cp_res.wr_verifier, copy->cp_clp->net); +} + +static ssize_t _nfsd_copy_file_range(struct nfsd4_copy *copy) +{ + ssize_t bytes_copied = 0; + size_t bytes_total = copy->cp_count; + u64 src_pos = copy->cp_src_pos; + u64 dst_pos = copy->cp_dst_pos; + + do { + if (kthread_should_stop()) + break; + bytes_copied = nfsd_copy_file_range(copy->file_src, src_pos, + copy->file_dst, dst_pos, bytes_total); + if (bytes_copied <= 0) + break; + bytes_total -= bytes_copied; + copy->cp_res.wr_bytes_written += bytes_copied; + src_pos += bytes_copied; + dst_pos += bytes_copied; + } while (bytes_total > 0 && !copy->cp_synchronous); + return bytes_copied; +} + +static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync) +{ + __be32 status; + ssize_t bytes; + + bytes = _nfsd_copy_file_range(copy); + /* for async copy, we ignore the error, client can always retry + * to get the error + */ + if (bytes < 0 && !copy->cp_res.wr_bytes_written) + status = nfserrno(bytes); + else { + nfsd4_init_copy_res(copy, sync); + status = nfs_ok; + } + + fput(copy->file_src); + fput(copy->file_dst); + return status; +} + +static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) +{ + dst->cp_src_pos = src->cp_src_pos; + dst->cp_dst_pos = src->cp_dst_pos; + dst->cp_count = src->cp_count; + dst->cp_synchronous = src->cp_synchronous; + memcpy(&dst->cp_res, &src->cp_res, sizeof(src->cp_res)); + memcpy(&dst->fh, &src->fh, sizeof(src->fh)); + dst->cp_clp = src->cp_clp; + dst->file_dst = get_file(src->file_dst); + dst->file_src = get_file(src->file_src); + memcpy(&dst->cp_stateid, &src->cp_stateid, sizeof(src->cp_stateid)); +} + +static void cleanup_async_copy(struct nfsd4_copy *copy) +{ + nfs4_free_cp_state(copy); + fput(copy->file_dst); + fput(copy->file_src); + spin_lock(©->cp_clp->async_lock); + list_del(©->copies); + spin_unlock(©->cp_clp->async_lock); + nfs4_put_copy(copy); +} + +static int nfsd4_do_async_copy(void *data) +{ + struct nfsd4_copy *copy = (struct nfsd4_copy *)data; + struct nfsd4_copy *cb_copy; + + copy->nfserr = nfsd4_do_copy(copy, 0); + cb_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); + if (!cb_copy) + goto out; + memcpy(&cb_copy->cp_res, ©->cp_res, sizeof(copy->cp_res)); + cb_copy->cp_clp = copy->cp_clp; + cb_copy->nfserr = copy->nfserr; + memcpy(&cb_copy->fh, ©->fh, sizeof(copy->fh)); + nfsd4_init_cb(&cb_copy->cp_cb, cb_copy->cp_clp, + &nfsd4_cb_offload_ops, NFSPROC4_CLNT_CB_OFFLOAD); + nfsd4_run_cb(&cb_copy->cp_cb); +out: + cleanup_async_copy(copy); + return 0; +} + static __be32 nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { struct nfsd4_copy *copy = &u->copy; - struct file *src, *dst; __be32 status; - ssize_t bytes; + struct nfsd4_copy *async_copy = NULL; - status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, &src, - ©->cp_dst_stateid, &dst); + status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, + ©->file_src, ©->cp_dst_stateid, + ©->file_dst); if (status) goto out; - bytes = nfsd_copy_file_range(src, copy->cp_src_pos, - dst, copy->cp_dst_pos, copy->cp_count); + copy->cp_clp = cstate->clp; + memcpy(©->fh, &cstate->current_fh.fh_handle, + sizeof(struct knfsd_fh)); + if (!copy->cp_synchronous) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); - if (bytes < 0) - status = nfserrno(bytes); - else { - copy->cp_res.wr_bytes_written = bytes; - copy->cp_res.wr_stable_how = NFS_UNSTABLE; - copy->cp_synchronous = 1; - gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp)); + status = nfserrno(-ENOMEM); + async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); + if (!async_copy) + goto out; + if (!nfs4_init_cp_state(nn, copy)) { + kfree(async_copy); + goto out; + } + refcount_set(&async_copy->refcount, 1); + memcpy(©->cp_res.cb_stateid, ©->cp_stateid, + sizeof(copy->cp_stateid)); + dup_copy_fields(copy, async_copy); + async_copy->copy_task = kthread_create(nfsd4_do_async_copy, + async_copy, "%s", "copy thread"); + if (IS_ERR(async_copy->copy_task)) + goto out_err; + spin_lock(&async_copy->cp_clp->async_lock); + list_add(&async_copy->copies, + &async_copy->cp_clp->async_copies); + spin_unlock(&async_copy->cp_clp->async_lock); + wake_up_process(async_copy->copy_task); status = nfs_ok; - } - - fput(src); - fput(dst); + } else + status = nfsd4_do_copy(copy, 1); out: return status; +out_err: + cleanup_async_copy(async_copy); + goto out; +} + +struct nfsd4_copy * +find_async_copy(struct nfs4_client *clp, stateid_t *stateid) +{ + struct nfsd4_copy *copy; + + spin_lock(&clp->async_lock); + list_for_each_entry(copy, &clp->async_copies, copies) { + if (memcmp(©->cp_stateid, stateid, NFS4_STATEID_SIZE)) + continue; + refcount_inc(©->refcount); + spin_unlock(&clp->async_lock); + return copy; + } + spin_unlock(&clp->async_lock); + return NULL; } static __be32 @@ -1127,7 +1327,18 @@ nfsd4_offload_cancel(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - return 0; + struct nfsd4_offload_status *os = &u->offload_status; + __be32 status = 0; + struct nfsd4_copy *copy; + struct nfs4_client *clp = cstate->clp; + + copy = find_async_copy(clp, &os->stateid); + if (copy) + nfsd4_stop_copy(copy); + else + status = nfserr_bad_stateid; + + return status; } static __be32 @@ -1157,7 +1368,19 @@ nfsd4_offload_status(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) { - return nfserr_notsupp; + struct nfsd4_offload_status *os = &u->offload_status; + __be32 status = 0; + struct nfsd4_copy *copy; + struct nfs4_client *clp = cstate->clp; + + copy = find_async_copy(clp, &os->stateid); + if (copy) { + os->count = copy->cp_res.wr_bytes_written; + nfs4_put_copy(copy); + } else + status = nfserr_bad_stateid; + + return status; } static __be32 diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index b0ca0efd2875..07a57d024f95 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -713,6 +713,36 @@ out_free: return NULL; } +/* + * Create a unique stateid_t to represent each COPY. + */ +int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy) +{ + int new_id; + + idr_preload(GFP_KERNEL); + spin_lock(&nn->s2s_cp_lock); + new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT); + spin_unlock(&nn->s2s_cp_lock); + idr_preload_end(); + if (new_id < 0) + return 0; + copy->cp_stateid.si_opaque.so_id = new_id; + copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time; + copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; + return 1; +} + +void nfs4_free_cp_state(struct nfsd4_copy *copy) +{ + struct nfsd_net *nn; + + nn = net_generic(copy->cp_clp->net, nfsd_net_id); + spin_lock(&nn->s2s_cp_lock); + idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id); + spin_unlock(&nn->s2s_cp_lock); +} + static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) { struct nfs4_stid *stid; @@ -1827,6 +1857,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) #ifdef CONFIG_NFSD_PNFS INIT_LIST_HEAD(&clp->cl_lo_states); #endif + INIT_LIST_HEAD(&clp->async_copies); + spin_lock_init(&clp->async_lock); spin_lock_init(&clp->cl_lock); rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); return clp; @@ -1942,6 +1974,7 @@ __destroy_client(struct nfs4_client *clp) } } nfsd4_return_all_client_layouts(clp); + nfsd4_shutdown_copy(clp); nfsd4_shutdown_callback(clp); if (clp->cl_cb_conn.cb_xprt) svc_xprt_put(clp->cl_cb_conn.cb_xprt); @@ -2475,7 +2508,8 @@ static bool client_has_state(struct nfs4_client *clp) || !list_empty(&clp->cl_lo_states) #endif || !list_empty(&clp->cl_delegations) - || !list_empty(&clp->cl_sessions); + || !list_empty(&clp->cl_sessions) + || !list_empty(&clp->async_copies); } __be32 @@ -7161,6 +7195,8 @@ static int nfs4_state_create_net(struct net *net) INIT_LIST_HEAD(&nn->close_lru); INIT_LIST_HEAD(&nn->del_recall_lru); spin_lock_init(&nn->client_lock); + spin_lock_init(&nn->s2s_cp_lock); + idr_init(&nn->s2s_cp_stateids); spin_lock_init(&nn->blocked_locks_lock); INIT_LIST_HEAD(&nn->blocked_locks_lru); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index b78280a8af73..3de42a729093 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4231,15 +4231,27 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, #endif /* CONFIG_NFSD_PNFS */ static __be32 -nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write) +nfsd42_encode_write_res(struct nfsd4_compoundres *resp, + struct nfsd42_write_res *write, bool sync) { __be32 *p; + p = xdr_reserve_space(&resp->xdr, 4); + if (!p) + return nfserr_resource; - p = xdr_reserve_space(&resp->xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE); + if (sync) + *p++ = cpu_to_be32(0); + else { + __be32 nfserr; + *p++ = cpu_to_be32(1); + nfserr = nfsd4_encode_stateid(&resp->xdr, &write->cb_stateid); + if (nfserr) + return nfserr; + } + p = xdr_reserve_space(&resp->xdr, 8 + 4 + NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; - *p++ = cpu_to_be32(0); p = xdr_encode_hyper(p, write->wr_bytes_written); *p++ = cpu_to_be32(write->wr_stable_how); p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, @@ -4253,7 +4265,8 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, { __be32 *p; - nfserr = nfsd42_encode_write_res(resp, ©->cp_res); + nfserr = nfsd42_encode_write_res(resp, ©->cp_res, + copy->cp_synchronous); if (nfserr) return nfserr; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 7fb9f7c667b1..6384c9b94898 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1242,6 +1242,7 @@ static __net_init int nfsd_init_net(struct net *net) nn->somebody_reclaimed = false; nn->clverifier_counter = prandom_u32(); nn->clientid_counter = prandom_u32(); + nn->s2s_cp_cl_id = nn->clientid_counter++; atomic_set(&nn->ntf_refcnt, 0); init_waitqueue_head(&nn->ntf_wq); diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 6e38d9927448..6aacb325b6a0 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -355,6 +355,8 @@ struct nfs4_client { struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */ /* wait here for slots */ struct net *net; + struct list_head async_copies; /* list of async copies */ + spinlock_t async_lock; /* lock for async copies */ }; /* struct nfs4_client_reset @@ -600,6 +602,7 @@ struct nfsd4_blocked_lock { struct nfsd4_compound_state; struct nfsd_net; +struct nfsd4_copy; extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct svc_fh *fhp, @@ -609,6 +612,8 @@ __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, struct nfs4_stid **s, struct nfsd_net *nn); struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, void (*sc_free)(struct nfs4_stid *)); +int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy); +void nfs4_free_cp_state(struct nfsd4_copy *copy); void nfs4_unhash_stid(struct nfs4_stid *s); void nfs4_put_stid(struct nfs4_stid *s); void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); @@ -627,6 +632,7 @@ extern void nfsd4_run_cb(struct nfsd4_callback *cb); extern int nfsd4_create_callback_queue(void); extern void nfsd4_destroy_callback_queue(void); extern void nfsd4_shutdown_callback(struct nfs4_client *); +extern void nfsd4_shutdown_copy(struct nfs4_client *clp); extern void nfsd4_prepare_cb_recall(struct nfs4_delegation *dp); extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, struct nfsd_net *nn); @@ -634,6 +640,9 @@ extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); struct nfs4_file *find_file(struct knfsd_fh *fh); void put_nfs4_file(struct nfs4_file *fi); +extern void nfs4_put_copy(struct nfsd4_copy *copy); +extern struct nfsd4_copy * +find_async_copy(struct nfs4_client *clp, stateid_t *staetid); static inline void get_nfs4_file(struct nfs4_file *fi) { refcount_inc(&fi->fi_ref); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 06cf218944c5..feeb6d4bdffd 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -532,6 +532,18 @@ struct nfsd4_copy { struct nfsd4_callback cp_cb; __be32 nfserr; struct knfsd_fh fh; + + struct nfs4_client *cp_clp; + + struct file *file_src; + struct file *file_dst; + + stateid_t cp_stateid; + + struct list_head copies; + struct task_struct *copy_task; + refcount_t refcount; + bool stopped; }; struct nfsd4_seek { -- cgit v1.2.3 From 7d20b6a2728fe3ea9fa8f4fc49cd438fcc781dd1 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Tue, 25 Sep 2018 11:22:53 +0000 Subject: nfsd: remove set but not used variable 'dirp' Fixes gcc '-Wunused-but-set-variable' warning: fs/nfsd/vfs.c: In function 'nfsd_create': fs/nfsd/vfs.c:1279:16: warning: variable 'dirp' set but not used [-Wunused-but-set-variable] Signed-off-by: YueHaibing Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 55a099e47ba2..ddbac8776bd3 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1275,7 +1275,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, dev_t rdev, struct svc_fh *resfhp) { struct dentry *dentry, *dchild = NULL; - struct inode *dirp; __be32 err; int host_err; @@ -1287,7 +1286,6 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, return err; dentry = fhp->fh_dentry; - dirp = d_inode(dentry); host_err = fh_want_write(fhp); if (host_err) -- cgit v1.2.3 From 9ceddd9da13434a5906255c0fc528c385aded283 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:47 -0400 Subject: knfsd: Allow lockless lookups of the exports Convert structs svc_expkey and svc_export to allow RCU protected lookups. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/export.c | 14 +++++++------- fs/nfsd/export.h | 2 ++ 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index a1143f7c2201..802993d8912f 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -46,7 +46,7 @@ static void expkey_put(struct kref *ref) !test_bit(CACHE_NEGATIVE, &key->h.flags)) path_put(&key->ek_path); auth_domain_put(key->ek_client); - kfree(key); + kfree_rcu(key, ek_rcu); } static void expkey_request(struct cache_detail *cd, @@ -265,7 +265,7 @@ svc_expkey_lookup(struct cache_detail *cd, struct svc_expkey *item) struct cache_head *ch; int hash = svc_expkey_hash(item); - ch = sunrpc_cache_lookup(cd, &item->h, hash); + ch = sunrpc_cache_lookup_rcu(cd, &item->h, hash); if (ch) return container_of(ch, struct svc_expkey, h); else @@ -314,7 +314,7 @@ static void svc_export_put(struct kref *ref) auth_domain_put(exp->ex_client); nfsd4_fslocs_free(&exp->ex_fslocs); kfree(exp->ex_uuid); - kfree(exp); + kfree_rcu(exp, ex_rcu); } static void svc_export_request(struct cache_detail *cd, @@ -780,7 +780,7 @@ svc_export_lookup(struct svc_export *exp) struct cache_head *ch; int hash = svc_export_hash(exp); - ch = sunrpc_cache_lookup(exp->cd, &exp->h, hash); + ch = sunrpc_cache_lookup_rcu(exp->cd, &exp->h, hash); if (ch) return container_of(ch, struct svc_export, h); else @@ -1216,9 +1216,9 @@ static int e_show(struct seq_file *m, void *p) } const struct seq_operations nfs_exports_op = { - .start = cache_seq_start, - .next = cache_seq_next, - .stop = cache_seq_stop, + .start = cache_seq_start_rcu, + .next = cache_seq_next_rcu, + .stop = cache_seq_stop_rcu, .show = e_show, }; diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h index c8b74126ddaa..e7daa1f246f0 100644 --- a/fs/nfsd/export.h +++ b/fs/nfsd/export.h @@ -61,6 +61,7 @@ struct svc_export { u32 ex_layout_types; struct nfsd4_deviceid_map *ex_devid_map; struct cache_detail *cd; + struct rcu_head ex_rcu; }; /* an "export key" (expkey) maps a filehandlefragement to an @@ -75,6 +76,7 @@ struct svc_expkey { u32 ek_fsid[6]; struct path ek_path; + struct rcu_head ek_rcu; }; #define EX_ISSYNC(exp) (!((exp)->ex_flags & NFSEXP_ASYNC)) -- cgit v1.2.3 From 9d5afd9491c80779730686159aeec7fa06ead085 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:49 -0400 Subject: knfsd: Lockless lookup of NFSv4 identities. Enable RCU protected lookups of the NFSv4 idmap. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4idmap.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c index a5bb76593ce7..bf137fec33ff 100644 --- a/fs/nfsd/nfs4idmap.c +++ b/fs/nfsd/nfs4idmap.c @@ -65,6 +65,7 @@ struct ent { u32 id; char name[IDMAP_NAMESZ]; char authname[IDMAP_NAMESZ]; + struct rcu_head rcu_head; }; /* Common entry handling */ @@ -89,7 +90,7 @@ static void ent_put(struct kref *ref) { struct ent *map = container_of(ref, struct ent, h.ref); - kfree(map); + kfree_rcu(map, rcu_head); } static struct cache_head * @@ -264,8 +265,8 @@ out: static struct ent * idtoname_lookup(struct cache_detail *cd, struct ent *item) { - struct cache_head *ch = sunrpc_cache_lookup(cd, &item->h, - idtoname_hash(item)); + struct cache_head *ch = sunrpc_cache_lookup_rcu(cd, &item->h, + idtoname_hash(item)); if (ch) return container_of(ch, struct ent, h); else @@ -422,8 +423,8 @@ out: static struct ent * nametoid_lookup(struct cache_detail *cd, struct ent *item) { - struct cache_head *ch = sunrpc_cache_lookup(cd, &item->h, - nametoid_hash(item)); + struct cache_head *ch = sunrpc_cache_lookup_rcu(cd, &item->h, + nametoid_hash(item)); if (ch) return container_of(ch, struct ent, h); else -- cgit v1.2.3 From 3e87da5145fc25e18fb934eb496f4e7c4d038e71 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:54 -0400 Subject: knfsd: Remove dead code from nfsd_cache_lookup The preallocated cache entry is always set to type RC_NOCACHE, and that type isn't changed until we later call nfsd_cache_update(). Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index dbdeb9d6af03..cef4686f87ef 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -446,14 +446,6 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) rp->c_csum = csum; lru_put_end(b, rp); - - /* release any buffer */ - if (rp->c_type == RC_REPLBUFF) { - drc_mem_usage -= rp->c_replvec.iov_len; - kfree(rp->c_replvec.iov_base); - rp->c_replvec.iov_base = NULL; - } - rp->c_type = RC_NOCACHE; out: spin_unlock(&b->cache_lock); return rtn; -- cgit v1.2.3 From 76ecec21197ab23bb821d8bf584949013efd0494 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:55 -0400 Subject: knfsd: Simplify NFS duplicate replay cache Simplify the duplicate replay cache by initialising the preallocated cache entry, so that we can use it as a key for the cache lookup. Note that the 99.999% case we want to optimise for is still the one where the lookup fails, and we have to add this entry to the cache, so preinitialising should not cause a performance penalty. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/nfscache.c | 94 +++++++++++++++++++++++++----------------------------- 1 file changed, 44 insertions(+), 50 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index cef4686f87ef..527ce4c65765 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -121,7 +121,7 @@ nfsd_cache_hash(__be32 xid) } static struct svc_cacherep * -nfsd_reply_cache_alloc(void) +nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum) { struct svc_cacherep *rp; @@ -130,6 +130,16 @@ nfsd_reply_cache_alloc(void) rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; INIT_LIST_HEAD(&rp->c_lru); + + rp->c_xid = rqstp->rq_xid; + rp->c_proc = rqstp->rq_proc; + memset(&rp->c_addr, 0, sizeof(rp->c_addr)); + rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp)); + rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); + rp->c_prot = rqstp->rq_prot; + rp->c_vers = rqstp->rq_vers; + rp->c_len = rqstp->rq_arg.len; + rp->c_csum = csum; } return rp; } @@ -141,9 +151,11 @@ nfsd_reply_cache_free_locked(struct svc_cacherep *rp) drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); } - list_del(&rp->c_lru); - atomic_dec(&num_drc_entries); - drc_mem_usage -= sizeof(*rp); + if (rp->c_state != RC_UNUSED) { + list_del(&rp->c_lru); + atomic_dec(&num_drc_entries); + drc_mem_usage -= sizeof(*rp); + } kmem_cache_free(drc_slab, rp); } @@ -319,24 +331,23 @@ nfsd_cache_csum(struct svc_rqst *rqstp) } static bool -nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) +nfsd_cache_match(const struct svc_cacherep *key, const struct svc_cacherep *rp) { /* Check RPC XID first */ - if (rqstp->rq_xid != rp->c_xid) + if (key->c_xid != rp->c_xid) return false; /* compare checksum of NFS data */ - if (csum != rp->c_csum) { + if (key->c_csum != rp->c_csum) { ++payload_misses; return false; } /* Other discriminators */ - if (rqstp->rq_proc != rp->c_proc || - rqstp->rq_prot != rp->c_prot || - rqstp->rq_vers != rp->c_vers || - rqstp->rq_arg.len != rp->c_len || - !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) || - rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr)) + if (key->c_proc != rp->c_proc || + key->c_prot != rp->c_prot || + key->c_vers != rp->c_vers || + key->c_len != rp->c_len || + memcmp(&key->c_addr, &rp->c_addr, sizeof(key->c_addr)) != 0) return false; return true; @@ -345,19 +356,18 @@ nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp) /* * Search the request hash for an entry that matches the given rqstp. * Must be called with cache_lock held. Returns the found entry or - * NULL on failure. + * inserts an empty key on failure. */ static struct svc_cacherep * -nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp, - __wsum csum) +nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) { - struct svc_cacherep *rp, *ret = NULL; + struct svc_cacherep *rp, *ret = key; struct list_head *rh = &b->lru_head; unsigned int entries = 0; list_for_each_entry(rp, rh, c_lru) { ++entries; - if (nfsd_cache_match(rqstp, csum, rp)) { + if (nfsd_cache_match(key, rp)) { ret = rp; break; } @@ -374,6 +384,7 @@ nfsd_cache_search(struct nfsd_drc_bucket *b, struct svc_rqst *rqstp, atomic_read(&num_drc_entries)); } + lru_put_end(b, ret); return ret; } @@ -389,9 +400,6 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) { struct svc_cacherep *rp, *found; __be32 xid = rqstp->rq_xid; - u32 proto = rqstp->rq_prot, - vers = rqstp->rq_vers, - proc = rqstp->rq_proc; __wsum csum; u32 hash = nfsd_cache_hash(xid); struct nfsd_drc_bucket *b = &drc_hashtbl[hash]; @@ -410,52 +418,38 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) * Since the common case is a cache miss followed by an insert, * preallocate an entry. */ - rp = nfsd_reply_cache_alloc(); - spin_lock(&b->cache_lock); - if (likely(rp)) { - atomic_inc(&num_drc_entries); - drc_mem_usage += sizeof(*rp); + rp = nfsd_reply_cache_alloc(rqstp, csum); + if (!rp) { + dprintk("nfsd: unable to allocate DRC entry!\n"); + return rtn; } - /* go ahead and prune the cache */ - prune_bucket(b); - - found = nfsd_cache_search(b, rqstp, csum); - if (found) { - if (likely(rp)) - nfsd_reply_cache_free_locked(rp); + spin_lock(&b->cache_lock); + found = nfsd_cache_insert(b, rp); + if (found != rp) { + nfsd_reply_cache_free_locked(rp); rp = found; goto found_entry; } - if (!rp) { - dprintk("nfsd: unable to allocate DRC entry!\n"); - goto out; - } - nfsdstats.rcmisses++; rqstp->rq_cacherep = rp; rp->c_state = RC_INPROG; - rp->c_xid = xid; - rp->c_proc = proc; - rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp)); - rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); - rp->c_prot = proto; - rp->c_vers = vers; - rp->c_len = rqstp->rq_arg.len; - rp->c_csum = csum; - lru_put_end(b, rp); + atomic_inc(&num_drc_entries); + drc_mem_usage += sizeof(*rp); + + /* go ahead and prune the cache */ + prune_bucket(b); out: spin_unlock(&b->cache_lock); return rtn; found_entry: - nfsdstats.rchits++; /* We found a matching entry which is either in progress or done. */ - lru_put_end(b, rp); - + nfsdstats.rchits++; rtn = RC_DROPIT; + /* Request being processed */ if (rp->c_state == RC_INPROG) goto out; -- cgit v1.2.3 From ed00c2f65267f3a5a8727ac74a90d32470f91679 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 3 Oct 2018 13:11:51 -0400 Subject: knfsd: Further simplify the cache lookup Order the structure so that the key can be compared using memcmp(). Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/cache.h | 19 +++++++++++-------- fs/nfsd/nfscache.c | 45 ++++++++++++++++----------------------------- 2 files changed, 27 insertions(+), 37 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index b7559c6f2b97..745c861237ca 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -19,18 +19,21 @@ * is much larger than a sockaddr_in6. */ struct svc_cacherep { - struct list_head c_lru; + struct { + /* Keep often-read xid, csum in the same cache line: */ + __be32 k_xid; + __wsum k_csum; + u32 k_proc; + u32 k_prot; + u32 k_vers; + unsigned int k_len; + struct sockaddr_in6 k_addr; + } c_key; + struct list_head c_lru; unsigned char c_state, /* unused, inprog, done */ c_type, /* status, buffer */ c_secure : 1; /* req came from port < 1024 */ - struct sockaddr_in6 c_addr; - __be32 c_xid; - u32 c_prot; - u32 c_proc; - u32 c_vers; - unsigned int c_len; - __wsum c_csum; unsigned long c_timestamp; union { struct kvec u_vec; diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 527ce4c65765..230cc83921ad 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -131,15 +131,15 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum) rp->c_type = RC_NOCACHE; INIT_LIST_HEAD(&rp->c_lru); - rp->c_xid = rqstp->rq_xid; - rp->c_proc = rqstp->rq_proc; - memset(&rp->c_addr, 0, sizeof(rp->c_addr)); - rpc_copy_addr((struct sockaddr *)&rp->c_addr, svc_addr(rqstp)); - rpc_set_port((struct sockaddr *)&rp->c_addr, rpc_get_port(svc_addr(rqstp))); - rp->c_prot = rqstp->rq_prot; - rp->c_vers = rqstp->rq_vers; - rp->c_len = rqstp->rq_arg.len; - rp->c_csum = csum; + memset(&rp->c_key, 0, sizeof(rp->c_key)); + rp->c_key.k_xid = rqstp->rq_xid; + rp->c_key.k_proc = rqstp->rq_proc; + rpc_copy_addr((struct sockaddr *)&rp->c_key.k_addr, svc_addr(rqstp)); + rpc_set_port((struct sockaddr *)&rp->c_key.k_addr, rpc_get_port(svc_addr(rqstp))); + rp->c_key.k_prot = rqstp->rq_prot; + rp->c_key.k_vers = rqstp->rq_vers; + rp->c_key.k_len = rqstp->rq_arg.len; + rp->c_key.k_csum = csum; } return rp; } @@ -330,27 +330,14 @@ nfsd_cache_csum(struct svc_rqst *rqstp) return csum; } -static bool -nfsd_cache_match(const struct svc_cacherep *key, const struct svc_cacherep *rp) +static int +nfsd_cache_key_cmp(const struct svc_cacherep *key, const struct svc_cacherep *rp) { - /* Check RPC XID first */ - if (key->c_xid != rp->c_xid) - return false; - /* compare checksum of NFS data */ - if (key->c_csum != rp->c_csum) { + if (key->c_key.k_xid == rp->c_key.k_xid && + key->c_key.k_csum != rp->c_key.k_csum) ++payload_misses; - return false; - } - - /* Other discriminators */ - if (key->c_proc != rp->c_proc || - key->c_prot != rp->c_prot || - key->c_vers != rp->c_vers || - key->c_len != rp->c_len || - memcmp(&key->c_addr, &rp->c_addr, sizeof(key->c_addr)) != 0) - return false; - return true; + return memcmp(&key->c_key, &rp->c_key, sizeof(key->c_key)); } /* @@ -367,7 +354,7 @@ nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) list_for_each_entry(rp, rh, c_lru) { ++entries; - if (nfsd_cache_match(key, rp)) { + if (nfsd_cache_key_cmp(key, rp) == 0) { ret = rp; break; } @@ -510,7 +497,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp) if (!rp) return; - hash = nfsd_cache_hash(rp->c_xid); + hash = nfsd_cache_hash(rp->c_key.k_xid); b = &drc_hashtbl[hash]; len = resv->iov_len - ((char*)statp - (char*)resv->iov_base); -- cgit v1.2.3 From 736c6625de666f3fd0b47428f10568154033151a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 1 Oct 2018 10:41:57 -0400 Subject: knfsd: Improve lookup performance in the duplicate reply cache using an rbtree Use an rbtree to ensure the lookup/insert of an entry in a DRC bucket is O(log(N)). Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- fs/nfsd/cache.h | 1 + fs/nfsd/nfscache.c | 37 ++++++++++++++++++++++++++----------- 2 files changed, 27 insertions(+), 11 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 745c861237ca..4a98537efb0f 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -30,6 +30,7 @@ struct svc_cacherep { struct sockaddr_in6 k_addr; } c_key; + struct rb_node c_node; struct list_head c_lru; unsigned char c_state, /* unused, inprog, done */ c_type, /* status, buffer */ diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 230cc83921ad..e2fe0e9ce0df 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -30,6 +30,7 @@ #define TARGET_BUCKET_SIZE 64 struct nfsd_drc_bucket { + struct rb_root rb_head; struct list_head lru_head; spinlock_t cache_lock; }; @@ -129,6 +130,7 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum) if (rp) { rp->c_state = RC_UNUSED; rp->c_type = RC_NOCACHE; + RB_CLEAR_NODE(&rp->c_node); INIT_LIST_HEAD(&rp->c_lru); memset(&rp->c_key, 0, sizeof(rp->c_key)); @@ -145,13 +147,14 @@ nfsd_reply_cache_alloc(struct svc_rqst *rqstp, __wsum csum) } static void -nfsd_reply_cache_free_locked(struct svc_cacherep *rp) +nfsd_reply_cache_free_locked(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) { drc_mem_usage -= rp->c_replvec.iov_len; kfree(rp->c_replvec.iov_base); } if (rp->c_state != RC_UNUSED) { + rb_erase(&rp->c_node, &b->rb_head); list_del(&rp->c_lru); atomic_dec(&num_drc_entries); drc_mem_usage -= sizeof(*rp); @@ -163,7 +166,7 @@ static void nfsd_reply_cache_free(struct nfsd_drc_bucket *b, struct svc_cacherep *rp) { spin_lock(&b->cache_lock); - nfsd_reply_cache_free_locked(rp); + nfsd_reply_cache_free_locked(b, rp); spin_unlock(&b->cache_lock); } @@ -219,7 +222,7 @@ void nfsd_reply_cache_shutdown(void) struct list_head *head = &drc_hashtbl[i].lru_head; while (!list_empty(head)) { rp = list_first_entry(head, struct svc_cacherep, c_lru); - nfsd_reply_cache_free_locked(rp); + nfsd_reply_cache_free_locked(&drc_hashtbl[i], rp); } } @@ -258,7 +261,7 @@ prune_bucket(struct nfsd_drc_bucket *b) if (atomic_read(&num_drc_entries) <= max_drc_entries && time_before(jiffies, rp->c_timestamp + RC_EXPIRE)) break; - nfsd_reply_cache_free_locked(rp); + nfsd_reply_cache_free_locked(b, rp); freed++; } return freed; @@ -349,17 +352,29 @@ static struct svc_cacherep * nfsd_cache_insert(struct nfsd_drc_bucket *b, struct svc_cacherep *key) { struct svc_cacherep *rp, *ret = key; - struct list_head *rh = &b->lru_head; + struct rb_node **p = &b->rb_head.rb_node, + *parent = NULL; unsigned int entries = 0; + int cmp; - list_for_each_entry(rp, rh, c_lru) { + while (*p != NULL) { ++entries; - if (nfsd_cache_key_cmp(key, rp) == 0) { + parent = *p; + rp = rb_entry(parent, struct svc_cacherep, c_node); + + cmp = nfsd_cache_key_cmp(key, rp); + if (cmp < 0) + p = &parent->rb_left; + else if (cmp > 0) + p = &parent->rb_right; + else { ret = rp; - break; + goto out; } } - + rb_link_node(&key->c_node, parent, p); + rb_insert_color(&key->c_node, &b->rb_head); +out: /* tally hash chain length stats */ if (entries > longest_chain) { longest_chain = entries; @@ -414,7 +429,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp) spin_lock(&b->cache_lock); found = nfsd_cache_insert(b, rp); if (found != rp) { - nfsd_reply_cache_free_locked(rp); + nfsd_reply_cache_free_locked(NULL, rp); rp = found; goto found_entry; } @@ -462,7 +477,7 @@ found_entry: break; default: printk(KERN_WARNING "nfsd: bad repcache type %d\n", rp->c_type); - nfsd_reply_cache_free_locked(rp); + nfsd_reply_cache_free_locked(b, rp); } goto out; -- cgit v1.2.3 From 0ac203cb1f03734606c0674eded43aaefb5a491a Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 2 Oct 2018 12:08:48 +0200 Subject: nfsd: fix fall-through annotations Replace "fallthru" with a proper "fall through" annotation. Also, add an annotation were it is expected to fall through. These fixes are part of the ongoing efforts to enabling -Wimplicit-fallthrough Signed-off-by: Gustavo A. R. Silva Signed-off-by: J. Bruce Fields --- fs/nfsd/vfs.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index ddbac8776bd3..94412768b782 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1406,6 +1406,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, *created = 1; break; } + /* fall through */ case NFS4_CREATE_EXCLUSIVE4_1: if ( d_inode(dchild)->i_mtime.tv_sec == v_mtime && d_inode(dchild)->i_atime.tv_sec == v_atime @@ -1414,7 +1415,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, *created = 1; goto set_attr; } - /* fallthru */ + /* fall through */ case NFS3_CREATE_GUARDED: err = nfserr_exist; } -- cgit v1.2.3 From bd8d725078867cda250fe94b9c5a067b4a64ca74 Mon Sep 17 00:00:00 2001 From: Andrew Elble Date: Fri, 5 Oct 2018 09:32:08 -0400 Subject: nfsd: correctly decrement odstate refcount in error path alloc_init_deleg() both allocates an nfs4_delegation, and bumps the refcount on odstate. So after this point, we need to put_clnt_odstate() and nfs4_put_stid() to not leave the odstate refcount inappropriately bumped. Signed-off-by: Andrew Elble Reviewed-by: Jeff Layton Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/nfsd') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 07a57d024f95..f093fbe47133 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4398,7 +4398,7 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh, fl = nfs4_alloc_init_lease(dp, NFS4_OPEN_DELEGATE_READ); if (!fl) - goto out_stid; + goto out_clnt_odstate; status = vfs_setlease(fp->fi_deleg_file, fl->fl_type, &fl, NULL); if (fl) @@ -4423,7 +4423,6 @@ out_unlock: vfs_setlease(fp->fi_deleg_file, F_UNLCK, NULL, (void **)&dp); out_clnt_odstate: put_clnt_odstate(dp->dl_clnt_odstate); -out_stid: nfs4_put_stid(&dp->dl_stid); out_delegees: put_deleg_file(fp); -- cgit v1.2.3