diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-29 11:04:14 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-29 11:04:14 -0700 |
commit | 4e1c80ae5cf458792bec9815ee77bc3851046fb8 (patch) | |
tree | a69f8d6065ecb4e3e75e448372ec1c498108dbea /net | |
parent | 0127f25b5dfcc3d0349eb29d692178183e101652 (diff) | |
parent | 9280c577431401544e63dfb489a830a42bee25eb (diff) | |
download | linux-4e1c80ae5cf458792bec9815ee77bc3851046fb8.tar.gz linux-4e1c80ae5cf458792bec9815ee77bc3851046fb8.tar.bz2 linux-4e1c80ae5cf458792bec9815ee77bc3851046fb8.zip |
Merge tag 'nfsd-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
Pull nfsd updates from Chuck Lever:
"The big ticket item for this release is that support for RPC-with-TLS
[RFC 9289] has been added to the Linux NFS server.
The goal is to provide a simple-to-deploy, low-overhead in-transit
confidentiality and peer authentication mechanism. It can supplement
NFS Kerberos and it can protect the use of legacy non-cryptographic
user authentication flavors such as AUTH_SYS. The TLS Record protocol
is handled entirely by kTLS, meaning it can use either software
encryption or offload encryption to smart NICs.
Aside from that, work continues on improving NFSD's open file cache.
Among the many clean-ups in that area is a patch to convert the
rhashtable to use the list-hashing version of that data structure"
* tag 'nfsd-6.4' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux: (31 commits)
NFSD: Handle new xprtsec= export option
SUNRPC: Support TLS handshake in the server-side TCP socket code
NFSD: Clean up xattr memory allocation flags
NFSD: Fix problem of COMMIT and NFS4ERR_DELAY in infinite loop
SUNRPC: Clear rq_xid when receiving a new RPC Call
SUNRPC: Recognize control messages in server-side TCP socket code
SUNRPC: Be even lazier about releasing pages
SUNRPC: Convert svc_xprt_release() to the release_pages() API
SUNRPC: Relocate svc_free_res_pages()
nfsd: simplify the delayed disposal list code
SUNRPC: Ignore return value of ->xpo_sendto
SUNRPC: Ensure server-side sockets have a sock->file
NFSD: Watch for rq_pages bounds checking errors in nfsd_splice_actor()
sunrpc: simplify two-level sysctl registration for svcrdma_parm_table
SUNRPC: return proper error from get_expiry()
lockd: add some client-side tracepoints
nfs: move nfs_fhandle_hash to common include file
lockd: server should unlock lock if client rejects the grant
lockd: fix races in client GRANTED_MSG wait logic
lockd: move struct nlm_wait to lockd.h
...
Diffstat (limited to 'net')
-rw-r--r-- | net/sunrpc/auth_gss/svcauth_gss.c | 12 | ||||
-rw-r--r-- | net/sunrpc/svc.c | 49 | ||||
-rw-r--r-- | net/sunrpc/svc_xprt.c | 33 | ||||
-rw-r--r-- | net/sunrpc/svcauth_unix.c | 23 | ||||
-rw-r--r-- | net/sunrpc/svcsock.c | 174 | ||||
-rw-r--r-- | net/sunrpc/xprtrdma/svc_rdma.c | 21 |
6 files changed, 243 insertions, 69 deletions
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 9c843974bb48..c4a566737085 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -257,11 +257,11 @@ static int rsi_parse(struct cache_detail *cd, rsii.h.flags = 0; /* expiry */ - expiry = get_expiry(&mesg); - status = -EINVAL; - if (expiry == 0) + status = get_expiry(&mesg, &expiry); + if (status) goto out; + status = -EINVAL; /* major/minor */ len = qword_get(&mesg, buf, mlen); if (len <= 0) @@ -483,11 +483,11 @@ static int rsc_parse(struct cache_detail *cd, rsci.h.flags = 0; /* expiry */ - expiry = get_expiry(&mesg); - status = -EINVAL; - if (expiry == 0) + status = get_expiry(&mesg, &expiry); + if (status) goto out; + status = -EINVAL; rscp = rsc_lookup(cd, &rsci); if (!rscp) goto out; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index fea7ce8fba14..26367cf4c17a 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -649,6 +649,8 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) if (!rqstp) return rqstp; + pagevec_init(&rqstp->rq_pvec); + __set_bit(RQ_BUSY, &rqstp->rq_flags); rqstp->rq_server = serv; rqstp->rq_pool = pool; @@ -842,9 +844,21 @@ EXPORT_SYMBOL_GPL(svc_set_num_threads); * * When replacing a page in rq_pages, batch the release of the * replaced pages to avoid hammering the page allocator. + * + * Return values: + * %true: page replaced + * %false: array bounds checking failed */ -void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) +bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) { + struct page **begin = rqstp->rq_pages; + struct page **end = &rqstp->rq_pages[RPCSVC_MAXPAGES]; + + if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) { + trace_svc_replace_page_err(rqstp); + return false; + } + if (*rqstp->rq_next_page) { if (!pagevec_space(&rqstp->rq_pvec)) __pagevec_release(&rqstp->rq_pvec); @@ -853,9 +867,28 @@ void svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) get_page(page); *(rqstp->rq_next_page++) = page; + return true; } EXPORT_SYMBOL_GPL(svc_rqst_replace_page); +/** + * svc_rqst_release_pages - Release Reply buffer pages + * @rqstp: RPC transaction context + * + * Release response pages that might still be in flight after + * svc_send, and any spliced filesystem-owned pages. + */ +void svc_rqst_release_pages(struct svc_rqst *rqstp) +{ + int i, count = rqstp->rq_next_page - rqstp->rq_respages; + + if (count) { + release_pages(rqstp->rq_respages, count); + for (i = 0; i < count; i++) + rqstp->rq_respages[i] = NULL; + } +} + /* * Called from a server thread as it's exiting. Caller must hold the "service * mutex" for the service. @@ -863,6 +896,7 @@ EXPORT_SYMBOL_GPL(svc_rqst_replace_page); void svc_rqst_free(struct svc_rqst *rqstp) { + pagevec_release(&rqstp->rq_pvec); svc_release_buffer(rqstp); if (rqstp->rq_scratch_page) put_page(rqstp->rq_scratch_page); @@ -1431,11 +1465,12 @@ err_system_err: goto sendit; } -/* - * Process the RPC request. +/** + * svc_process - Execute one RPC transaction + * @rqstp: RPC transaction context + * */ -int -svc_process(struct svc_rqst *rqstp) +void svc_process(struct svc_rqst *rqstp) { struct kvec *resv = &rqstp->rq_res.head[0]; __be32 *p; @@ -1471,7 +1506,8 @@ svc_process(struct svc_rqst *rqstp) if (!svc_process_common(rqstp)) goto out_drop; - return svc_send(rqstp); + svc_send(rqstp); + return; out_baddir: svc_printk(rqstp, "bad direction 0x%08x, dropping request\n", @@ -1479,7 +1515,6 @@ out_baddir: rqstp->rq_server->sv_stats->rpcbadfmt++; out_drop: svc_drop(rqstp); - return 0; } EXPORT_SYMBOL_GPL(svc_process); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index ba629297da4e..84e5d7d31481 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -427,7 +427,7 @@ static bool svc_xprt_ready(struct svc_xprt *xprt) if (xpt_flags & BIT(XPT_BUSY)) return false; - if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE))) + if (xpt_flags & (BIT(XPT_CONN) | BIT(XPT_CLOSE) | BIT(XPT_HANDSHAKE))) return true; if (xpt_flags & (BIT(XPT_DATA) | BIT(XPT_DEFERRED))) { if (xprt->xpt_ops->xpo_has_wspace(xprt) && @@ -541,8 +541,7 @@ static void svc_xprt_release(struct svc_rqst *rqstp) kfree(rqstp->rq_deferred); rqstp->rq_deferred = NULL; - pagevec_release(&rqstp->rq_pvec); - svc_free_res_pages(rqstp); + svc_rqst_release_pages(rqstp); rqstp->rq_res.page_len = 0; rqstp->rq_res.page_base = 0; @@ -667,8 +666,6 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) struct xdr_buf *arg = &rqstp->rq_arg; unsigned long pages, filled, ret; - pagevec_init(&rqstp->rq_pvec); - pages = (serv->sv_max_mesg + 2 * PAGE_SIZE) >> PAGE_SHIFT; if (pages > RPCSVC_MAXPAGES) { pr_warn_once("svc: warning: pages=%lu > RPCSVC_MAXPAGES=%lu\n", @@ -704,6 +701,8 @@ static int svc_alloc_arg(struct svc_rqst *rqstp) arg->page_len = (pages-2)*PAGE_SIZE; arg->len = (pages-1)*PAGE_SIZE; arg->tail[0].iov_len = 0; + + rqstp->rq_xid = xdr_zero; return 0; } @@ -829,6 +828,9 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) module_put(xprt->xpt_class->xcl_owner); } svc_xprt_received(xprt); + } else if (test_bit(XPT_HANDSHAKE, &xprt->xpt_flags)) { + xprt->xpt_ops->xpo_handshake(xprt); + svc_xprt_received(xprt); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", @@ -909,18 +911,20 @@ void svc_drop(struct svc_rqst *rqstp) } EXPORT_SYMBOL_GPL(svc_drop); -/* - * Return reply to client. +/** + * svc_send - Return reply to client + * @rqstp: RPC transaction context + * */ -int svc_send(struct svc_rqst *rqstp) +void svc_send(struct svc_rqst *rqstp) { struct svc_xprt *xprt; - int len = -EFAULT; struct xdr_buf *xb; + int status; xprt = rqstp->rq_xprt; if (!xprt) - goto out; + return; /* calculate over-all length */ xb = &rqstp->rq_res; @@ -930,15 +934,10 @@ int svc_send(struct svc_rqst *rqstp) trace_svc_xdr_sendto(rqstp->rq_xid, xb); trace_svc_stats_latency(rqstp); - len = xprt->xpt_ops->xpo_sendto(rqstp); + status = xprt->xpt_ops->xpo_sendto(rqstp); - trace_svc_send(rqstp, len); + trace_svc_send(rqstp, status); svc_xprt_release(rqstp); - - if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) - len = 0; -out: - return len; } /* diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c index 4246363cb095..174783f804fa 100644 --- a/net/sunrpc/svcauth_unix.c +++ b/net/sunrpc/svcauth_unix.c @@ -17,8 +17,9 @@ #include <net/ipv6.h> #include <linux/kernel.h> #include <linux/user_namespace.h> -#define RPCDBG_FACILITY RPCDBG_AUTH +#include <trace/events/sunrpc.h> +#define RPCDBG_FACILITY RPCDBG_AUTH #include "netns.h" @@ -225,9 +226,9 @@ static int ip_map_parse(struct cache_detail *cd, return -EINVAL; } - expiry = get_expiry(&mesg); - if (expiry ==0) - return -EINVAL; + err = get_expiry(&mesg, &expiry); + if (err) + return err; /* domainname, or empty for NEGATIVE */ len = qword_get(&mesg, buf, mlen); @@ -506,9 +507,9 @@ static int unix_gid_parse(struct cache_detail *cd, uid = make_kuid(current_user_ns(), id); ug.uid = uid; - expiry = get_expiry(&mesg); - if (expiry == 0) - return -EINVAL; + err = get_expiry(&mesg, &expiry); + if (err) + return err; rv = get_int(&mesg, &gids); if (rv || gids < 0 || gids > 8192) @@ -832,6 +833,7 @@ svcauth_tls_accept(struct svc_rqst *rqstp) { struct xdr_stream *xdr = &rqstp->rq_arg_stream; struct svc_cred *cred = &rqstp->rq_cred; + struct svc_xprt *xprt = rqstp->rq_xprt; u32 flavor, len; void *body; __be32 *p; @@ -865,14 +867,19 @@ svcauth_tls_accept(struct svc_rqst *rqstp) if (cred->cr_group_info == NULL) return SVC_CLOSE; - if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) { + if (xprt->xpt_ops->xpo_handshake) { p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8); if (!p) return SVC_CLOSE; + trace_svc_tls_start(xprt); *p++ = rpc_auth_null; *p++ = cpu_to_be32(8); memcpy(p, "STARTTLS", 8); + + set_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); } else { + trace_svc_tls_unavailable(xprt); if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, RPC_AUTH_NULL, NULL, 0) < 0) return SVC_CLOSE; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 03a4f5615086..a51c9b989d58 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -43,9 +43,12 @@ #include <net/udp.h> #include <net/tcp.h> #include <net/tcp_states.h> +#include <net/tls.h> +#include <net/handshake.h> #include <linux/uaccess.h> #include <linux/highmem.h> #include <asm/ioctls.h> +#include <linux/key.h> #include <linux/sunrpc/types.h> #include <linux/sunrpc/clnt.h> @@ -63,6 +66,12 @@ #define RPCDBG_FACILITY RPCDBG_SVCXPRT +/* To-do: to avoid tying up an nfsd thread while waiting for a + * handshake request, the request could instead be deferred. + */ +enum { + SVC_HANDSHAKE_TO = 5U * HZ +}; static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, int flags); @@ -216,6 +225,49 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) return len; } +static int +svc_tcp_sock_process_cmsg(struct svc_sock *svsk, struct msghdr *msg, + struct cmsghdr *cmsg, int ret) +{ + if (cmsg->cmsg_level == SOL_TLS && + cmsg->cmsg_type == TLS_GET_RECORD_TYPE) { + u8 content_type = *((u8 *)CMSG_DATA(cmsg)); + + switch (content_type) { + case TLS_RECORD_TYPE_DATA: + /* TLS sets EOR at the end of each application data + * record, even though there might be more frames + * waiting to be decrypted. + */ + msg->msg_flags &= ~MSG_EOR; + break; + case TLS_RECORD_TYPE_ALERT: + ret = -ENOTCONN; + break; + default: + ret = -EAGAIN; + } + } + return ret; +} + +static int +svc_tcp_sock_recv_cmsg(struct svc_sock *svsk, struct msghdr *msg) +{ + union { + struct cmsghdr cmsg; + u8 buf[CMSG_SPACE(sizeof(u8))]; + } u; + int ret; + + msg->msg_control = &u; + msg->msg_controllen = sizeof(u); + ret = sock_recvmsg(svsk->sk_sock, msg, MSG_DONTWAIT); + if (unlikely(msg->msg_controllen != sizeof(u))) + ret = svc_tcp_sock_process_cmsg(svsk, msg, &u.cmsg, ret); + return ret; +} + #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek) { @@ -263,7 +315,7 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, iov_iter_advance(&msg.msg_iter, seek); buflen -= seek; } - len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); + len = svc_tcp_sock_recv_cmsg(svsk, &msg); if (len > 0) svc_flush_bvec(bvec, len, seek); @@ -315,6 +367,8 @@ static void svc_data_ready(struct sock *sk) rmb(); svsk->sk_odata(sk); trace_svcsock_data_ready(&svsk->sk_xprt, 0); + if (test_bit(XPT_HANDSHAKE, &svsk->sk_xprt.xpt_flags)) + return; if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); } @@ -352,6 +406,88 @@ static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt) sock_no_linger(svsk->sk_sock->sk); } +/** + * svc_tcp_handshake_done - Handshake completion handler + * @data: address of xprt to wake + * @status: status of handshake + * @peerid: serial number of key containing the remote peer's identity + * + * If a security policy is specified as an export option, we don't + * have a specific export here to check. So we set a "TLS session + * is present" flag on the xprt and let an upper layer enforce local + * security policy. + */ +static void svc_tcp_handshake_done(void *data, int status, key_serial_t peerid) +{ + struct svc_xprt *xprt = data; + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + + if (!status) { + if (peerid != TLS_NO_PEERID) + set_bit(XPT_PEER_AUTH, &xprt->xpt_flags); + set_bit(XPT_TLS_SESSION, &xprt->xpt_flags); + } + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + complete_all(&svsk->sk_handshake_done); +} + +/** + * svc_tcp_handshake - Perform a transport-layer security handshake + * @xprt: connected transport endpoint + * + */ +static void svc_tcp_handshake(struct svc_xprt *xprt) +{ + struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct sock *sk = svsk->sk_sock->sk; + struct tls_handshake_args args = { + .ta_sock = svsk->sk_sock, + .ta_done = svc_tcp_handshake_done, + .ta_data = xprt, + }; + int ret; + + trace_svc_tls_upcall(xprt); + + clear_bit(XPT_TLS_SESSION, &xprt->xpt_flags); + init_completion(&svsk->sk_handshake_done); + + ret = tls_server_hello_x509(&args, GFP_KERNEL); + if (ret) { + trace_svc_tls_not_started(xprt); + goto out_failed; + } + + ret = wait_for_completion_interruptible_timeout(&svsk->sk_handshake_done, + SVC_HANDSHAKE_TO); + if (ret <= 0) { + if (tls_handshake_cancel(sk)) { + trace_svc_tls_timed_out(xprt); + goto out_close; + } + } + + if (!test_bit(XPT_TLS_SESSION, &xprt->xpt_flags)) { + trace_svc_tls_unavailable(xprt); + goto out_close; + } + + /* Mark the transport ready in case the remote sent RPC + * traffic before the kernel received the handshake + * completion downcall. + */ + set_bit(XPT_DATA, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); + return; + +out_close: + set_bit(XPT_CLOSE, &xprt->xpt_flags); +out_failed: + clear_bit(XPT_HANDSHAKE, &xprt->xpt_flags); + set_bit(XPT_DATA, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); +} + /* * See net/ipv6/ip_sockglue.c : ip_cmsg_recv_pktinfo */ @@ -877,7 +1013,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; iov_iter_kvec(&msg.msg_iter, ITER_DEST, &iov, 1, want); - len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); + len = svc_tcp_sock_recv_cmsg(svsk, &msg); if (len < 0) return len; svsk->sk_tcplen += len; @@ -1213,6 +1349,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { .xpo_has_wspace = svc_tcp_has_wspace, .xpo_accept = svc_tcp_accept, .xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt, + .xpo_handshake = svc_tcp_handshake, }; static struct svc_xprt_class svc_tcp_class = { @@ -1293,26 +1430,37 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, struct socket *sock, int flags) { + struct file *filp = NULL; struct svc_sock *svsk; struct sock *inet; int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); - int err = 0; svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); if (!svsk) return ERR_PTR(-ENOMEM); + if (!sock->file) { + filp = sock_alloc_file(sock, O_NONBLOCK, NULL); + if (IS_ERR(filp)) { + kfree(svsk); + return ERR_CAST(filp); + } + } + inet = sock->sk; - /* Register socket with portmapper */ - if (pmap_register) + if (pmap_register) { + int err; + err = svc_register(serv, sock_net(sock->sk), inet->sk_family, inet->sk_protocol, ntohs(inet_sk(inet)->inet_sport)); - - if (err < 0) { - kfree(svsk); - return ERR_PTR(err); + if (err < 0) { + if (filp) + fput(filp); + kfree(svsk); + return ERR_PTR(err); + } } svsk->sk_sock = sock; @@ -1525,10 +1673,12 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) static void svc_sock_free(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); + struct socket *sock = svsk->sk_sock; - if (svsk->sk_sock->file) - sockfd_put(svsk->sk_sock); + tls_handshake_cancel(sock->sk); + if (sock->file) + sockfd_put(sock); else - sock_release(svsk->sk_sock); + sock_release(sock); kfree(svsk); } diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 5bc20e9d09cd..f0d5eeed4c88 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -212,24 +212,6 @@ static struct ctl_table svcrdma_parm_table[] = { { }, }; -static struct ctl_table svcrdma_table[] = { - { - .procname = "svc_rdma", - .mode = 0555, - .child = svcrdma_parm_table - }, - { }, -}; - -static struct ctl_table svcrdma_root_table[] = { - { - .procname = "sunrpc", - .mode = 0555, - .child = svcrdma_table - }, - { }, -}; - static void svc_rdma_proc_cleanup(void) { if (!svcrdma_table_header) @@ -263,7 +245,8 @@ static int svc_rdma_proc_init(void) if (rc) goto out_err; - svcrdma_table_header = register_sysctl_table(svcrdma_root_table); + svcrdma_table_header = register_sysctl("sunrpc/svc_rdma", + svcrdma_parm_table); return 0; out_err: |