diff options
Diffstat (limited to 'net/rxrpc')
-rw-r--r-- | net/rxrpc/Kconfig | 14 | ||||
-rw-r--r-- | net/rxrpc/Makefile | 1 | ||||
-rw-r--r-- | net/rxrpc/af_rxrpc.c | 175 | ||||
-rw-r--r-- | net/rxrpc/ar-internal.h | 832 | ||||
-rw-r--r-- | net/rxrpc/call_accept.c | 717 | ||||
-rw-r--r-- | net/rxrpc/call_event.c | 1426 | ||||
-rw-r--r-- | net/rxrpc/call_object.c | 796 | ||||
-rw-r--r-- | net/rxrpc/conn_client.c | 993 | ||||
-rw-r--r-- | net/rxrpc/conn_event.c | 271 | ||||
-rw-r--r-- | net/rxrpc/conn_object.c | 204 | ||||
-rw-r--r-- | net/rxrpc/conn_service.c | 117 | ||||
-rw-r--r-- | net/rxrpc/input.c | 1399 | ||||
-rw-r--r-- | net/rxrpc/insecure.c | 26 | ||||
-rw-r--r-- | net/rxrpc/local_event.c | 19 | ||||
-rw-r--r-- | net/rxrpc/local_object.c | 51 | ||||
-rw-r--r-- | net/rxrpc/misc.c | 192 | ||||
-rw-r--r-- | net/rxrpc/output.c | 933 | ||||
-rw-r--r-- | net/rxrpc/peer_event.c | 103 | ||||
-rw-r--r-- | net/rxrpc/peer_object.c | 199 | ||||
-rw-r--r-- | net/rxrpc/proc.c | 72 | ||||
-rw-r--r-- | net/rxrpc/recvmsg.c | 866 | ||||
-rw-r--r-- | net/rxrpc/rxkad.c | 209 | ||||
-rw-r--r-- | net/rxrpc/security.c | 18 | ||||
-rw-r--r-- | net/rxrpc/sendmsg.c | 606 | ||||
-rw-r--r-- | net/rxrpc/skbuff.c | 174 | ||||
-rw-r--r-- | net/rxrpc/sysctl.c | 45 | ||||
-rw-r--r-- | net/rxrpc/utils.c | 2 |
27 files changed, 6026 insertions, 4434 deletions
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 784c53163b7b..86f8853a038c 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -19,6 +19,20 @@ config AF_RXRPC See Documentation/networking/rxrpc.txt. +config AF_RXRPC_IPV6 + bool "IPv6 support for RxRPC" + depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC) + help + Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as + its network transport. + +config AF_RXRPC_INJECT_LOSS + bool "Inject packet loss into RxRPC packet stream" + depends on AF_RXRPC + help + Say Y here to inject packet loss by discarding some received and some + transmitted packets. + config AF_RXRPC_DEBUG bool "RxRPC dynamic debugging" diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index 10f3f48a16a8..8fc6ea347182 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -22,6 +22,7 @@ af-rxrpc-y := \ peer_object.o \ recvmsg.o \ security.o \ + sendmsg.o \ skbuff.o \ utils.o diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 88effadd4b16..44c9c2b0b190 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -16,12 +16,14 @@ #include <linux/net.h> #include <linux/slab.h> #include <linux/skbuff.h> +#include <linux/random.h> #include <linux/poll.h> #include <linux/proc_fs.h> #include <linux/key-type.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/af_rxrpc.h> +#define CREATE_TRACE_POINTS #include "ar-internal.h" MODULE_DESCRIPTION("RxRPC network protocol"); @@ -43,7 +45,7 @@ u32 rxrpc_epoch; atomic_t rxrpc_debug_id; /* count of skbs currently in use */ -atomic_t rxrpc_n_skbs; +atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; struct workqueue_struct *rxrpc_workqueue; @@ -104,19 +106,25 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, case AF_INET: if (srx->transport_len < sizeof(struct sockaddr_in)) return -EINVAL; - _debug("INET: %x @ %pI4", - ntohs(srx->transport.sin.sin_port), - &srx->transport.sin.sin_addr); tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad); break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: + if (srx->transport_len < sizeof(struct sockaddr_in6)) + return -EINVAL; + tail = offsetof(struct sockaddr_rxrpc, transport) + + sizeof(struct sockaddr_in6); + break; +#endif + default: return -EAFNOSUPPORT; } if (tail < len) memset((void *)srx + tail, 0, len - tail); + _debug("INET: %pISp", &srx->transport); return 0; } @@ -128,7 +136,8 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; struct sock *sk = sock->sk; struct rxrpc_local *local; - struct rxrpc_sock *rx = rxrpc_sk(sk), *prx; + struct rxrpc_sock *rx = rxrpc_sk(sk); + u16 service_id = srx->srx_service; int ret; _enter("%p,%p,%d", rx, saddr, len); @@ -152,16 +161,13 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) goto error_unlock; } - if (rx->srx.srx_service) { - write_lock_bh(&local->services_lock); - list_for_each_entry(prx, &local->services, listen_link) { - if (prx->srx.srx_service == rx->srx.srx_service) - goto service_in_use; - } - + if (service_id) { + write_lock(&local->services_lock); + if (rcu_access_pointer(local->service)) + goto service_in_use; rx->local = local; - list_add_tail(&rx->listen_link, &local->services); - write_unlock_bh(&local->services_lock); + rcu_assign_pointer(local->service, rx); + write_unlock(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; } else { @@ -174,7 +180,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) return 0; service_in_use: - write_unlock_bh(&local->services_lock); + write_unlock(&local->services_lock); rxrpc_put_local(local); ret = -EADDRINUSE; error_unlock: @@ -191,7 +197,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; struct rxrpc_sock *rx = rxrpc_sk(sk); - unsigned int max; + unsigned int max, old; int ret; _enter("%p,%d", rx, backlog); @@ -210,9 +216,13 @@ static int rxrpc_listen(struct socket *sock, int backlog) backlog = max; else if (backlog < 0 || backlog > max) break; + old = sk->sk_max_ack_backlog; sk->sk_max_ack_backlog = backlog; - rx->sk.sk_state = RXRPC_SERVER_LISTENING; - ret = 0; + ret = rxrpc_service_prealloc(rx, GFP_KERNEL); + if (ret == 0) + rx->sk.sk_state = RXRPC_SERVER_LISTENING; + else + sk->sk_max_ack_backlog = old; break; default: ret = -EBUSY; @@ -230,6 +240,8 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @srx: The address of the peer to contact * @key: The security context to use (defaults to socket setting) * @user_call_ID: The ID to use + * @gfp: The allocation constraints + * @notify_rx: Where to send notifications instead of socket queue * * Allow a kernel service to begin a call on the nominated socket. This just * sets up all the internal tracking structures and allocates connection and @@ -242,7 +254,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct sockaddr_rxrpc *srx, struct key *key, unsigned long user_call_ID, - gfp_t gfp) + gfp_t gfp, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_conn_parameters cp; struct rxrpc_call *call; @@ -269,6 +282,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.exclusive = false; cp.service_id = srx->srx_service; call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp); + if (!IS_ERR(call)) + call->notify_rx = notify_rx; release_sock(&rx->sk); _leave(" = %p", call); @@ -278,40 +293,39 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call); /** * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using + * @sock: The socket the call is on * @call: The call to end * * Allow a kernel service to end a call it was using. The call must be * complete before this is called (the call should be aborted if necessary). */ -void rxrpc_kernel_end_call(struct rxrpc_call *call) +void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); - rxrpc_remove_user_ID(call->socket, call); - rxrpc_put_call(call); + rxrpc_release_call(rxrpc_sk(sock->sk), call); + rxrpc_put_call(call, rxrpc_call_put_kernel); } EXPORT_SYMBOL(rxrpc_kernel_end_call); /** - * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages + * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on - * @interceptor: The function to pass the messages to + * @notify_new_call: Function to be called when new calls appear + * @discard_new_call: Function to discard preallocated calls * - * Allow a kernel service to intercept messages heading for the Rx queue on an - * RxRPC socket. They get passed to the specified function instead. - * @interceptor should free the socket buffers it is given. @interceptor is - * called with the socket receive queue spinlock held and softirqs disabled - - * this ensures that the messages will be delivered in the right order. + * Allow a kernel service to be given notifications about new calls. */ -void rxrpc_kernel_intercept_rx_messages(struct socket *sock, - rxrpc_interceptor_t interceptor) +void rxrpc_kernel_new_call_notification( + struct socket *sock, + rxrpc_notify_new_call_t notify_new_call, + rxrpc_discard_new_call_t discard_new_call) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - _enter(""); - rx->interceptor = interceptor; + rx->notify_new_call = notify_new_call; + rx->discard_new_call = discard_new_call; } - -EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages); +EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); /* * connect an RxRPC socket @@ -391,6 +405,23 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) switch (rx->sk.sk_state) { case RXRPC_UNBOUND: + rx->srx.srx_family = AF_RXRPC; + rx->srx.srx_service = 0; + rx->srx.transport_type = SOCK_DGRAM; + rx->srx.transport.family = rx->family; + switch (rx->family) { + case AF_INET: + rx->srx.transport_len = sizeof(struct sockaddr_in); + break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + rx->srx.transport_len = sizeof(struct sockaddr_in6); + break; +#endif + default: + ret = -EAFNOSUPPORT; + goto error_unlock; + } local = rxrpc_lookup_local(&rx->srx); if (IS_ERR(local)) { ret = PTR_ERR(local); @@ -505,15 +536,16 @@ error: static unsigned int rxrpc_poll(struct file *file, struct socket *sock, poll_table *wait) { - unsigned int mask; struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + unsigned int mask; sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx * queue */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!list_empty(&rx->recvmsg_q)) mask |= POLLIN | POLLRDNORM; /* the socket is writable if there is space to add new data to the @@ -540,7 +572,8 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; /* we support transport protocol UDP/UDP6 only */ - if (protocol != PF_INET) + if (protocol != PF_INET && + IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6) return -EPROTONOSUPPORT; if (sock->type != SOCK_DGRAM) @@ -554,6 +587,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return -ENOMEM; sock_init_data(sock, sk); + sock_set_flag(sk, SOCK_RCU_FREE); sk->sk_state = RXRPC_UNBOUND; sk->sk_write_space = rxrpc_write_space; sk->sk_max_ack_backlog = 0; @@ -563,9 +597,11 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, rx->family = protocol; rx->calls = RB_ROOT; - INIT_LIST_HEAD(&rx->listen_link); - INIT_LIST_HEAD(&rx->secureq); - INIT_LIST_HEAD(&rx->acceptq); + spin_lock_init(&rx->incoming_lock); + INIT_LIST_HEAD(&rx->sock_calls); + INIT_LIST_HEAD(&rx->to_be_accepted); + INIT_LIST_HEAD(&rx->recvmsg_q); + rwlock_init(&rx->recvmsg_lock); rwlock_init(&rx->call_lock); memset(&rx->srx, 0, sizeof(rx->srx)); @@ -574,6 +610,39 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, } /* + * Kill all the calls on a socket and shut it down. + */ +static int rxrpc_shutdown(struct socket *sock, int flags) +{ + struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + int ret = 0; + + _enter("%p,%d", sk, flags); + + if (flags != SHUT_RDWR) + return -EOPNOTSUPP; + if (sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + lock_sock(sk); + + spin_lock_bh(&sk->sk_receive_queue.lock); + if (sk->sk_state < RXRPC_CLOSE) { + sk->sk_state = RXRPC_CLOSE; + sk->sk_shutdown = SHUTDOWN_MASK; + } else { + ret = -ESHUTDOWN; + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + + rxrpc_discard_prealloc(rx); + + release_sock(sk); + return ret; +} + +/* * RxRPC socket destructor */ static void rxrpc_sock_destructor(struct sock *sk) @@ -609,15 +678,14 @@ static int rxrpc_release_sock(struct sock *sk) sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); - ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1); - - if (!list_empty(&rx->listen_link)) { - write_lock_bh(&rx->local->services_lock); - list_del(&rx->listen_link); - write_unlock_bh(&rx->local->services_lock); + if (rx->local && rx->local->service == rx) { + write_lock(&rx->local->services_lock); + rx->local->service = NULL; + write_unlock(&rx->local->services_lock); } /* try to flush out this socket */ + rxrpc_discard_prealloc(rx); rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); @@ -666,7 +734,7 @@ static const struct proto_ops rxrpc_rpc_ops = { .poll = rxrpc_poll, .ioctl = sock_no_ioctl, .listen = rxrpc_listen, - .shutdown = sock_no_shutdown, + .shutdown = rxrpc_shutdown, .setsockopt = rxrpc_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = rxrpc_sendmsg, @@ -697,7 +765,13 @@ static int __init af_rxrpc_init(void) BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb)); - rxrpc_epoch = get_seconds(); + get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch)); + rxrpc_epoch |= RXRPC_RANDOM_EPOCH; + get_random_bytes(&rxrpc_client_conn_ids.cur, + sizeof(rxrpc_client_conn_ids.cur)); + rxrpc_client_conn_ids.cur &= 0x3fffffff; + if (rxrpc_client_conn_ids.cur == 0) + rxrpc_client_conn_ids.cur = 1; ret = -ENOMEM; rxrpc_call_jar = kmem_cache_create( @@ -788,7 +862,8 @@ static void __exit af_rxrpc_exit(void) proto_unregister(&rxrpc_proto); rxrpc_destroy_all_calls(); rxrpc_destroy_all_connections(); - ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0); + ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); + ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); rxrpc_destroy_all_locals(); remove_proc_entry("rxrpc_conns", init_net.proc_net); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ff83fb1ddd47..d38dffd78085 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -35,11 +35,23 @@ struct rxrpc_crypt { #define rxrpc_queue_delayed_work(WS,D) \ queue_delayed_work(rxrpc_workqueue, (WS), (D)) -#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor) - struct rxrpc_connection; /* + * Mark applied to socket buffers. + */ +enum rxrpc_skb_mark { + RXRPC_SKB_MARK_DATA, /* data message */ + RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ + RXRPC_SKB_MARK_BUSY, /* server busy message */ + RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ + RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ + RXRPC_SKB_MARK_NET_ERROR, /* network error message */ + RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ + RXRPC_SKB_MARK_NEW_CALL, /* local error message */ +}; + +/* * sk_state for RxRPC sockets */ enum { @@ -52,19 +64,44 @@ enum { }; /* + * Service backlog preallocation. + * + * This contains circular buffers of preallocated peers, connections and calls + * for incoming service calls and their head and tail pointers. This allows + * calls to be set up in the data_ready handler, thereby avoiding the need to + * shuffle packets around so much. + */ +struct rxrpc_backlog { + unsigned short peer_backlog_head; + unsigned short peer_backlog_tail; + unsigned short conn_backlog_head; + unsigned short conn_backlog_tail; + unsigned short call_backlog_head; + unsigned short call_backlog_tail; +#define RXRPC_BACKLOG_MAX 32 + struct rxrpc_peer *peer_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_connection *conn_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_call *call_backlog[RXRPC_BACKLOG_MAX]; +}; + +/* * RxRPC socket definition */ struct rxrpc_sock { /* WARNING: sk has to be the first member */ struct sock sk; - rxrpc_interceptor_t interceptor; /* kernel service Rx interceptor function */ + rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ + rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ struct rxrpc_local *local; /* local endpoint */ - struct list_head listen_link; /* link in the local endpoint's listen list */ - struct list_head secureq; /* calls awaiting connection security clearance */ - struct list_head acceptq; /* calls awaiting acceptance */ + struct rxrpc_backlog *backlog; /* Preallocation for services */ + spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */ + struct list_head sock_calls; /* List of calls owned by this socket */ + struct list_head to_be_accepted; /* calls awaiting acceptance */ + struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */ + rwlock_t recvmsg_lock; /* Lock for recvmsg_q */ struct key *key; /* security for this socket */ struct key *securities; /* list of server security descriptors */ - struct rb_root calls; /* outstanding calls on this socket */ + struct rb_root calls; /* User ID -> call mapping */ unsigned long flags; #define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */ rwlock_t call_lock; /* lock for calls */ @@ -103,13 +140,11 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { - struct rxrpc_call *call; /* call with which associated */ - unsigned long resend_at; /* time in jiffies at which to resend */ union { - unsigned int offset; /* offset into buffer of next read */ + u8 nr_jumbo; /* Number of jumbo subpackets */ + }; + union { int remain; /* amount of space remaining for next write */ - u32 error; /* network error code */ - bool need_resend; /* T if needs resending */ }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ @@ -117,13 +152,6 @@ struct rxrpc_skb_priv { #define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb) -enum rxrpc_command { - RXRPC_CMD_SEND_DATA, /* send data message */ - RXRPC_CMD_SEND_ABORT, /* request abort generation */ - RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ - RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ -}; - /* * RxRPC security module interface */ @@ -150,7 +178,12 @@ struct rxrpc_security { void *); /* verify the security on a received packet */ - int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *); + int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, + unsigned int, unsigned int, rxrpc_seq_t, u16); + + /* Locate the data in a received packet that has been verified. */ + void (*locate_data)(struct rxrpc_call *, struct sk_buff *, + unsigned int *, unsigned int *); /* issue a challenge */ int (*issue_challenge)(struct rxrpc_connection *); @@ -180,9 +213,8 @@ struct rxrpc_local { struct list_head link; struct socket *socket; /* my UDP socket */ struct work_struct processor; - struct list_head services; /* services listening on this endpoint */ + struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ - struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ struct rb_root client_conns; /* Client connections by socket params */ @@ -220,10 +252,12 @@ struct rxrpc_peer { /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 - suseconds_t rtt; /* current RTT estimate (in uS) */ - unsigned int rtt_point; /* next entry at which to insert */ - unsigned int rtt_usage; /* amount of cache actually used */ - suseconds_t rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */ + ktime_t rtt_last_req; /* Time of last RTT request */ + u64 rtt; /* Current RTT estimate (in nS) */ + u64 rtt_sum; /* Sum of cache contents */ + u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ + u8 rtt_cursor; /* next entry at which to insert */ + u8 rtt_usage; /* amount of cache actually used */ }; /* @@ -255,6 +289,9 @@ enum rxrpc_conn_flag { RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */ RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */ RXRPC_CONN_IN_CLIENT_CONNS, /* Conn is in local->client_conns */ + RXRPC_CONN_EXPOSED, /* Conn has extra ref for exposure */ + RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ + RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ }; /* @@ -265,17 +302,29 @@ enum rxrpc_conn_event { }; /* + * The connection cache state. + */ +enum rxrpc_conn_cache_state { + RXRPC_CONN_CLIENT_INACTIVE, /* Conn is not yet listed */ + RXRPC_CONN_CLIENT_WAITING, /* Conn is on wait list, waiting for capacity */ + RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */ + RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */ + RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */ + RXRPC_CONN__NR_CACHE_STATES +}; + +/* * The connection protocol state. */ enum rxrpc_conn_proto_state { RXRPC_CONN_UNUSED, /* Connection not yet attempted */ RXRPC_CONN_CLIENT, /* Client connection */ + RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */ RXRPC_CONN_SERVICE, /* Service secured connection */ RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */ RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */ - RXRPC_CONN_NETWORK_ERROR, /* Conn terminated by network error */ RXRPC_CONN__NR_STATES }; @@ -288,23 +337,33 @@ struct rxrpc_connection { struct rxrpc_conn_proto proto; struct rxrpc_conn_parameters params; - spinlock_t channel_lock; + atomic_t usage; + struct rcu_head rcu; + struct list_head cache_link; + spinlock_t channel_lock; + unsigned char active_chans; /* Mask of active channels */ +#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1) + struct list_head waiting_calls; /* Calls waiting for channels */ struct rxrpc_channel { struct rxrpc_call __rcu *call; /* Active call */ u32 call_id; /* ID of current call */ u32 call_counter; /* Call ID counter */ u32 last_call; /* ID of last call */ - u32 last_result; /* Result of last call (0/abort) */ + u8 last_type; /* Type of last packet */ + u16 last_service_id; + union { + u32 last_seq; + u32 last_abort; + }; } channels[RXRPC_MAXCALLS]; - wait_queue_head_t channel_wq; /* queue to wait for channel to become available */ - struct rcu_head rcu; struct work_struct processor; /* connection event processor */ union { struct rb_node client_node; /* Node in local->client_conns */ struct rb_node service_node; /* Node in peer->service_conns */ }; + struct list_head proc_link; /* link in procfs list */ struct list_head link; /* link in master connection list */ struct sk_buff_head rx_queue; /* received conn-level packets */ const struct rxrpc_security *security; /* applied security module */ @@ -313,21 +372,18 @@ struct rxrpc_connection { struct rxrpc_crypt csum_iv; /* packet checksum base */ unsigned long flags; unsigned long events; - unsigned long put_time; /* Time at which last put */ + unsigned long idle_timestamp; /* Time at which last became idle */ spinlock_t state_lock; /* state-change lock */ - atomic_t usage; - enum rxrpc_conn_proto_state state : 8; /* current state of connection */ + enum rxrpc_conn_cache_state cache_state; + enum rxrpc_conn_proto_state state; /* current state of connection */ u32 local_abort; /* local abort code */ u32 remote_abort; /* remote abort code */ - int error; /* local error incurred */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ - atomic_t hi_serial; /* highest serial number received */ - atomic_t avail_chans; /* number of channels available */ + unsigned int hi_serial; /* highest serial number received */ + u32 security_nonce; /* response re-use preventer */ u8 size_align; /* data size alignment (for security) */ - u8 header_size; /* rxrpc + security header size */ u8 security_size; /* security header size */ - u32 security_nonce; /* response re-use preventer */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ }; @@ -337,37 +393,23 @@ struct rxrpc_connection { */ enum rxrpc_call_flag { RXRPC_CALL_RELEASED, /* call has been released - no more message to userspace */ - RXRPC_CALL_TERMINAL_MSG, /* call has given the socket its final message */ - RXRPC_CALL_RCVD_LAST, /* all packets received */ - RXRPC_CALL_RUN_RTIMER, /* Tx resend timer started */ - RXRPC_CALL_TX_SOFT_ACK, /* sent some soft ACKs */ - RXRPC_CALL_PROC_BUSY, /* the processor is busy */ - RXRPC_CALL_INIT_ACCEPT, /* acceptance was initiated */ RXRPC_CALL_HAS_USERID, /* has a user ID attached */ - RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */ + RXRPC_CALL_IS_SERVICE, /* Call is service call */ + RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ + RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ + RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_PINGING, /* Ping in process */ + RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ }; /* * Events that can be raised on a call. */ enum rxrpc_call_event { - RXRPC_CALL_EV_RCVD_ACKALL, /* ACKALL or reply received */ - RXRPC_CALL_EV_RCVD_BUSY, /* busy packet received */ - RXRPC_CALL_EV_RCVD_ABORT, /* abort packet received */ - RXRPC_CALL_EV_RCVD_ERROR, /* network error received */ - RXRPC_CALL_EV_ACK_FINAL, /* need to generate final ACK (and release call) */ RXRPC_CALL_EV_ACK, /* need to generate ACK */ - RXRPC_CALL_EV_REJECT_BUSY, /* need to generate busy message */ RXRPC_CALL_EV_ABORT, /* need to generate abort */ - RXRPC_CALL_EV_CONN_ABORT, /* local connection abort generated */ - RXRPC_CALL_EV_RESEND_TIMER, /* Tx resend timer expired */ + RXRPC_CALL_EV_TIMER, /* Timer expired */ RXRPC_CALL_EV_RESEND, /* Tx resend required */ - RXRPC_CALL_EV_DRAIN_RX_OOS, /* drain the Rx out of sequence queue */ - RXRPC_CALL_EV_LIFE_TIMER, /* call's lifetimer ran out */ - RXRPC_CALL_EV_ACCEPTED, /* incoming call accepted by userspace app */ - RXRPC_CALL_EV_SECURED, /* incoming call's connection is now secure */ - RXRPC_CALL_EV_POST_ACCEPT, /* need to post an "accept?" message to the app */ - RXRPC_CALL_EV_RELEASE, /* need to release the call's resources */ }; /* @@ -379,20 +421,38 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */ RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ - RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */ + RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */ - RXRPC_CALL_COMPLETE, /* - call completed */ - RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */ + RXRPC_CALL_COMPLETE, /* - call complete */ + NR__RXRPC_CALL_STATES +}; + +/* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ - RXRPC_CALL_DEAD, /* - call is dead */ - NR__RXRPC_CALL_STATES + NR__RXRPC_CALL_COMPLETIONS +}; + +/* + * Call Tx congestion management modes. + */ +enum rxrpc_congest_mode { + RXRPC_CALL_SLOW_START, + RXRPC_CALL_CONGEST_AVOIDANCE, + RXRPC_CALL_PACKET_LOSS, + RXRPC_CALL_FAST_RETRANSMIT, + NR__RXRPC_CONGEST_MODES }; /* @@ -402,92 +462,329 @@ enum rxrpc_call_state { struct rxrpc_call { struct rcu_head rcu; struct rxrpc_connection *conn; /* connection carrying call */ - struct rxrpc_sock *socket; /* socket responsible */ - struct timer_list lifetimer; /* lifetime remaining on call */ - struct timer_list deadspan; /* reap timer for re-ACK'ing, etc */ - struct timer_list ack_timer; /* ACK generation timer */ - struct timer_list resend_timer; /* Tx resend timer */ - struct work_struct destroyer; /* call destroyer */ - struct work_struct processor; /* packet processor and ACK generator */ + struct rxrpc_peer *peer; /* Peer record for remote address */ + struct rxrpc_sock __rcu *socket; /* socket responsible */ + ktime_t ack_at; /* When deferred ACK needs to happen */ + ktime_t resend_at; /* When next resend needs to happen */ + ktime_t expire_at; /* When the call times out */ + struct timer_list timer; /* Combined event timer */ + struct work_struct processor; /* Event processor */ + rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ + struct list_head chan_wait_link; /* Link in conn->waiting_calls */ struct hlist_node error_link; /* link in error distribution list */ - struct list_head accept_link; /* calls awaiting acceptance */ - struct rb_node sock_node; /* node in socket call tree */ - struct sk_buff_head rx_queue; /* received packets */ - struct sk_buff_head rx_oos_queue; /* packets received out of sequence */ + struct list_head accept_link; /* Link in rx->acceptq */ + struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ + struct list_head sock_link; /* Link in rx->sock_calls */ + struct rb_node sock_node; /* Node in rx->calls */ struct sk_buff *tx_pending; /* Tx socket buffer being filled */ - wait_queue_head_t tx_waitq; /* wait for Tx window space to become available */ + wait_queue_head_t waitq; /* Wait queue for channel or Tx */ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ unsigned long user_call_ID; /* user-defined call ID */ - unsigned long creation_jif; /* time of call creation */ unsigned long flags; unsigned long events; spinlock_t lock; rwlock_t state_lock; /* lock for state transition */ - atomic_t usage; - atomic_t skb_count; /* Outstanding packets on this call */ - atomic_t sequence; /* Tx data packet sequence counter */ - u32 local_abort; /* local abort code */ - u32 remote_abort; /* remote abort code */ - int error_report; /* Network error (ICMP/local transport) */ + u32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ - enum rxrpc_call_state state : 8; /* current state of call */ + enum rxrpc_call_state state; /* current state of call */ + enum rxrpc_call_completion completion; /* Call completion condition */ + atomic_t usage; + u16 service_id; /* service ID */ + u8 security_ix; /* Security type */ + u32 call_id; /* call ID on connection */ + u32 cid; /* connection ID plus channel index */ int debug_id; /* debug ID for printks */ - u8 channel; /* connection channel occupied by this call */ - - /* transmission-phase ACK management */ - u8 acks_head; /* offset into window of first entry */ - u8 acks_tail; /* offset into window of last entry */ - u8 acks_winsz; /* size of un-ACK'd window */ - u8 acks_unacked; /* lowest unacked packet in last ACK received */ - int acks_latest; /* serial number of latest ACK received */ - rxrpc_seq_t acks_hard; /* highest definitively ACK'd msg seq */ - unsigned long *acks_window; /* sent packet window - * - elements are pointers with LSB set if ACK'd + unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ + unsigned short rx_pkt_len; /* Current recvmsg packet len */ + + /* Rx/Tx circular buffer, depending on phase. + * + * In the Rx phase, packets are annotated with 0 or the number of the + * segment of a jumbo packet each buffer refers to. There can be up to + * 47 segments in a maximum-size UDP packet. + * + * In the Tx phase, packets are annotated with which buffers have been + * acked. + */ +#define RXRPC_RXTX_BUFF_SIZE 64 +#define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1) +#define RXRPC_INIT_RX_WINDOW_SIZE 32 + struct sk_buff **rxtx_buffer; + u8 *rxtx_annotations; +#define RXRPC_TX_ANNO_ACK 0 +#define RXRPC_TX_ANNO_UNACK 1 +#define RXRPC_TX_ANNO_NAK 2 +#define RXRPC_TX_ANNO_RETRANS 3 +#define RXRPC_TX_ANNO_MASK 0x03 +#define RXRPC_TX_ANNO_LAST 0x04 +#define RXRPC_TX_ANNO_RESENT 0x08 + +#define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ +#define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ +#define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ + rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but + * not hard-ACK'd packet follows this. + */ + rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + + /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS + * is fixed, we keep these numbers in terms of segments (ie. DATA + * packets) rather than bytes. + */ +#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN + u8 cong_cwnd; /* Congestion window size */ + u8 cong_extra; /* Extra to send for congestion management */ + u8 cong_ssthresh; /* Slow-start threshold */ + enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */ + u8 cong_dup_acks; /* Count of ACKs showing missing packets */ + u8 cong_cumul_acks; /* Cumulative ACK count */ + ktime_t cong_tstamp; /* Last time cwnd was changed */ + + rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not + * consumed packet follows this. */ + rxrpc_seq_t rx_top; /* Highest Rx slot allocated. */ + rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ + u8 rx_winsize; /* Size of Rx window */ + u8 tx_winsize; /* Maximum size of Tx window */ + bool tx_phase; /* T if transmission phase, F if receive phase */ + u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ /* receive-phase ACK management */ - rxrpc_seq_t rx_data_expect; /* next data seq ID expected to be received */ - rxrpc_seq_t rx_data_post; /* next data seq ID expected to be posted */ - rxrpc_seq_t rx_data_recv; /* last data seq ID encountered by recvmsg */ - rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */ - rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */ - rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */ - rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ u8 ackr_reason; /* reason to ACK */ + u16 ackr_skew; /* skew on packet being ACK'd */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ - atomic_t ackr_not_idle; /* number of packets in Rx queue */ - - /* received packet records, 1 bit per record */ -#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG) - unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1]; + rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ + rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ + rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ + rxrpc_serial_t ackr_ping; /* Last ping sent */ + ktime_t ackr_ping_time; /* Time last ping sent */ - u8 in_clientflag; /* Copy of conn->in_clientflag */ - struct rxrpc_local *local; /* Local endpoint. */ - u32 call_id; /* call ID on connection */ - u32 cid; /* connection ID plus channel index */ - u32 epoch; /* epoch of this connection */ - u16 service_id; /* service ID */ + /* transmission-phase ACK management */ + ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ + rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ + rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ }; /* - * locally abort an RxRPC call + * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ -static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code) -{ - write_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - call->local_abort = abort_code; - call->state = RXRPC_CALL_LOCALLY_ABORTED; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - } - write_unlock_bh(&call->state_lock); -} +struct rxrpc_ack_summary { + u8 ack_reason; + u8 nr_acks; /* Number of ACKs in packet */ + u8 nr_nacks; /* Number of NACKs in packet */ + u8 nr_new_acks; /* Number of new ACKs in packet */ + u8 nr_new_nacks; /* Number of new NACKs in packet */ + u8 nr_rot_new_acks; /* Number of rotated new ACKs */ + bool new_low_nack; /* T if new low NACK found */ + bool retrans_timeo; /* T if reTx due to timeout happened */ + u8 flight_size; /* Number of unreceived transmissions */ + /* Place to stash values for tracing */ + enum rxrpc_congest_mode mode:8; + u8 cwnd; + u8 ssthresh; + u8 dup_acks; + u8 cumulative_acks; +}; + +enum rxrpc_skb_trace { + rxrpc_skb_rx_cleaned, + rxrpc_skb_rx_freed, + rxrpc_skb_rx_got, + rxrpc_skb_rx_lost, + rxrpc_skb_rx_received, + rxrpc_skb_rx_rotated, + rxrpc_skb_rx_purged, + rxrpc_skb_rx_seen, + rxrpc_skb_tx_cleaned, + rxrpc_skb_tx_freed, + rxrpc_skb_tx_got, + rxrpc_skb_tx_new, + rxrpc_skb_tx_rotated, + rxrpc_skb_tx_seen, + rxrpc_skb__nr_trace +}; + +extern const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7]; + +enum rxrpc_conn_trace { + rxrpc_conn_new_client, + rxrpc_conn_new_service, + rxrpc_conn_queued, + rxrpc_conn_seen, + rxrpc_conn_got, + rxrpc_conn_put_client, + rxrpc_conn_put_service, + rxrpc_conn__nr_trace +}; + +extern const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4]; + +enum rxrpc_client_trace { + rxrpc_client_activate_chans, + rxrpc_client_alloc, + rxrpc_client_chan_activate, + rxrpc_client_chan_disconnect, + rxrpc_client_chan_pass, + rxrpc_client_chan_unstarted, + rxrpc_client_cleanup, + rxrpc_client_count, + rxrpc_client_discard, + rxrpc_client_duplicate, + rxrpc_client_exposed, + rxrpc_client_replace, + rxrpc_client_to_active, + rxrpc_client_to_culled, + rxrpc_client_to_idle, + rxrpc_client_to_inactive, + rxrpc_client_to_waiting, + rxrpc_client_uncount, + rxrpc_client__nr_trace +}; + +extern const char rxrpc_client_traces[rxrpc_client__nr_trace][7]; +extern const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5]; + +enum rxrpc_call_trace { + rxrpc_call_new_client, + rxrpc_call_new_service, + rxrpc_call_queued, + rxrpc_call_queued_ref, + rxrpc_call_seen, + rxrpc_call_connected, + rxrpc_call_release, + rxrpc_call_got, + rxrpc_call_got_userid, + rxrpc_call_got_kernel, + rxrpc_call_put, + rxrpc_call_put_userid, + rxrpc_call_put_kernel, + rxrpc_call_put_noqueue, + rxrpc_call_error, + rxrpc_call__nr_trace +}; + +extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; + +enum rxrpc_transmit_trace { + rxrpc_transmit_wait, + rxrpc_transmit_queue, + rxrpc_transmit_queue_last, + rxrpc_transmit_rotate, + rxrpc_transmit_rotate_last, + rxrpc_transmit_await_reply, + rxrpc_transmit_end, + rxrpc_transmit__nr_trace +}; + +extern const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4]; + +enum rxrpc_receive_trace { + rxrpc_receive_incoming, + rxrpc_receive_queue, + rxrpc_receive_queue_last, + rxrpc_receive_front, + rxrpc_receive_rotate, + rxrpc_receive_end, + rxrpc_receive__nr_trace +}; + +extern const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4]; + +enum rxrpc_recvmsg_trace { + rxrpc_recvmsg_enter, + rxrpc_recvmsg_wait, + rxrpc_recvmsg_dequeue, + rxrpc_recvmsg_hole, + rxrpc_recvmsg_next, + rxrpc_recvmsg_cont, + rxrpc_recvmsg_full, + rxrpc_recvmsg_data_return, + rxrpc_recvmsg_terminal, + rxrpc_recvmsg_to_be_accepted, + rxrpc_recvmsg_return, + rxrpc_recvmsg__nr_trace +}; + +extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; + +enum rxrpc_rtt_tx_trace { + rxrpc_rtt_tx_ping, + rxrpc_rtt_tx_data, + rxrpc_rtt_tx__nr_trace +}; + +extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5]; + +enum rxrpc_rtt_rx_trace { + rxrpc_rtt_rx_ping_response, + rxrpc_rtt_rx_requested_ack, + rxrpc_rtt_rx__nr_trace +}; + +extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; + +enum rxrpc_timer_trace { + rxrpc_timer_begin, + rxrpc_timer_init_for_reply, + rxrpc_timer_expired, + rxrpc_timer_set_for_ack, + rxrpc_timer_set_for_resend, + rxrpc_timer_set_for_send, + rxrpc_timer__nr_trace +}; + +extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; + +enum rxrpc_propose_ack_trace { + rxrpc_propose_ack_client_tx_end, + rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_lost_ack, + rxrpc_propose_ack_ping_for_lost_reply, + rxrpc_propose_ack_ping_for_params, + rxrpc_propose_ack_respond_to_ack, + rxrpc_propose_ack_respond_to_ping, + rxrpc_propose_ack_retry_tx, + rxrpc_propose_ack_rotate_rx, + rxrpc_propose_ack_terminal_ack, + rxrpc_propose_ack__nr_trace +}; + +enum rxrpc_propose_ack_outcome { + rxrpc_propose_ack_use, + rxrpc_propose_ack_update, + rxrpc_propose_ack_subsume, + rxrpc_propose_ack__nr_outcomes +}; + +extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8]; +extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes]; + +enum rxrpc_congest_change { + rxrpc_cong_begin_retransmission, + rxrpc_cong_cleared_nacks, + rxrpc_cong_new_low_nack, + rxrpc_cong_no_change, + rxrpc_cong_progress, + rxrpc_cong_retransmit_again, + rxrpc_cong_rtt_window_end, + rxrpc_cong_saw_nack, + rxrpc_congest__nr_change +}; + +extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10]; +extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9]; + +extern const char *const rxrpc_pkts[]; +extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; + +#include <trace/events/rxrpc.h> /* * af_rxrpc.c */ -extern atomic_t rxrpc_n_skbs; +extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; extern u32 rxrpc_epoch; extern atomic_t rxrpc_debug_id; extern struct workqueue_struct *rxrpc_workqueue; @@ -495,70 +792,178 @@ extern struct workqueue_struct *rxrpc_workqueue; /* * call_accept.c */ +int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); +void rxrpc_discard_prealloc(struct rxrpc_sock *); +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, + struct rxrpc_connection *, + struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); -struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long); +struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, + rxrpc_notify_rx_t); int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool); -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool); +void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, + enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); /* * call_object.c */ +extern const char *const rxrpc_call_states[]; +extern const char *const rxrpc_call_completions[]; extern unsigned int rxrpc_max_call_lifetime; -extern unsigned int rxrpc_dead_call_expiry; extern struct kmem_cache *rxrpc_call_jar; extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); +struct rxrpc_call *rxrpc_alloc_call(gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, unsigned long, gfp_t); -struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, - struct rxrpc_connection *, - struct sk_buff *); -void rxrpc_release_call(struct rxrpc_call *); +void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, + struct sk_buff *); +void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); -void __rxrpc_put_call(struct rxrpc_call *); +bool __rxrpc_queue_call(struct rxrpc_call *); +bool rxrpc_queue_call(struct rxrpc_call *); +void rxrpc_see_call(struct rxrpc_call *); +void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); +void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); +void rxrpc_cleanup_call(struct rxrpc_call *); void __exit rxrpc_destroy_all_calls(void); +static inline bool rxrpc_is_service_call(const struct rxrpc_call *call) +{ + return test_bit(RXRPC_CALL_IS_SERVICE, &call->flags); +} + +static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) +{ + return !rxrpc_is_service_call(call); +} + +/* + * Transition a call to the complete state. + */ +static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + if (call->state < RXRPC_CALL_COMPLETE) { + call->abort_code = abort_code; + call->error = error; + call->completion = compl, + call->state = RXRPC_CALL_COMPLETE; + wake_up(&call->waitq); + return true; + } + return false; +} + +static inline bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + bool ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_set_call_completion(call, compl, abort_code, error); + write_unlock_bh(&call->state_lock); + return ret; +} + +/* + * Record that a call successfully completed. + */ +static inline bool __rxrpc_call_completed(struct rxrpc_call *call) +{ + return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); +} + +static inline bool rxrpc_call_completed(struct rxrpc_call *call) +{ + bool ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_call_completed(call); + write_unlock_bh(&call->state_lock); + return ret; +} + +/* + * Record that a call is locally aborted. + */ +static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, + u32 abort_code, int error) +{ + trace_rxrpc_abort(why, call->cid, call->call_id, seq, + abort_code, error); + return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error); +} + +static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) +{ + bool ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_abort_call(why, call, seq, abort_code, error); + write_unlock_bh(&call->state_lock); + return ret; +} + /* * conn_client.c */ +extern unsigned int rxrpc_max_client_connections; +extern unsigned int rxrpc_reap_client_connections; +extern unsigned int rxrpc_conn_idle_client_expiry; +extern unsigned int rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, gfp_t); -void rxrpc_unpublish_client_conn(struct rxrpc_connection *); +void rxrpc_expose_client_call(struct rxrpc_call *); +void rxrpc_disconnect_client_call(struct rxrpc_call *); +void rxrpc_put_client_conn(struct rxrpc_connection *); +void __exit rxrpc_destroy_all_client_connections(void); /* * conn_event.c */ void rxrpc_process_connection(struct work_struct *); -void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *); -void rxrpc_reject_packets(struct rxrpc_local *); /* * conn_object.c */ extern unsigned int rxrpc_connection_expiry; extern struct list_head rxrpc_connections; +extern struct list_head rxrpc_connection_proc_list; extern rwlock_t rxrpc_connection_lock; int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, struct sk_buff *); -void __rxrpc_disconnect_call(struct rxrpc_call *); +void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); -void rxrpc_put_connection(struct rxrpc_connection *); +void rxrpc_kill_connection(struct rxrpc_connection *); +bool rxrpc_queue_conn(struct rxrpc_connection *); +void rxrpc_see_connection(struct rxrpc_connection *); +void rxrpc_get_connection(struct rxrpc_connection *); +struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *); +void rxrpc_put_service_conn(struct rxrpc_connection *); void __exit rxrpc_destroy_all_connections(void); static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn) @@ -571,24 +976,15 @@ static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn) return !rxrpc_conn_is_client(conn); } -static inline void rxrpc_get_connection(struct rxrpc_connection *conn) -{ - atomic_inc(&conn->usage); -} - -static inline -struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn) +static inline void rxrpc_put_connection(struct rxrpc_connection *conn) { - return atomic_inc_not_zero(&conn->usage) ? conn : NULL; -} + if (!conn) + return; -static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) -{ - if (!rxrpc_get_connection_maybe(conn)) - return false; - if (!rxrpc_queue_work(&conn->processor)) - rxrpc_put_connection(conn); - return true; + if (rxrpc_conn_is_client(conn)) + rxrpc_put_client_conn(conn); + else + rxrpc_put_service_conn(conn); } /* @@ -596,17 +992,14 @@ static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) */ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct sk_buff *); -struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *, - struct sockaddr_rxrpc *, - struct sk_buff *); +struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t); +void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ void rxrpc_data_ready(struct sock *); -int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool); -void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); /* * insecure.c @@ -668,25 +1061,24 @@ extern unsigned int rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; +extern unsigned int rxrpc_resend_timeout; -extern const char *const rxrpc_pkts[]; extern const s8 rxrpc_ack_priority[]; -extern const char *rxrpc_acks(u8 reason); - /* * output.c */ -extern unsigned int rxrpc_resend_timeout; - -int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *); -int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); +int rxrpc_send_call_packet(struct rxrpc_call *, u8); +int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); +void rxrpc_reject_packets(struct rxrpc_local *); /* * peer_event.c */ void rxrpc_error_report(struct sock *); void rxrpc_peer_error_distributor(struct work_struct *); +void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, + rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); /* * peer_object.c @@ -696,10 +1088,13 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); +struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *, + struct rxrpc_peer *); -static inline void rxrpc_get_peer(struct rxrpc_peer *peer) +static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) { atomic_inc(&peer->usage); + return peer; } static inline @@ -718,14 +1113,13 @@ static inline void rxrpc_put_peer(struct rxrpc_peer *peer) /* * proc.c */ -extern const char *const rxrpc_call_states[]; extern const struct file_operations rxrpc_call_seq_fops; extern const struct file_operations rxrpc_connection_seq_fops; /* * recvmsg.c */ -void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); +void rxrpc_notify_socket(struct rxrpc_call *); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* @@ -744,9 +1138,21 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *); int rxrpc_init_server_conn_security(struct rxrpc_connection *); /* + * sendmsg.c + */ +int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); + +/* * skbuff.c */ +void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_packet_destructor(struct sk_buff *); +void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_purge_queue(struct sk_buff_head *); /* * sysctl.c @@ -764,6 +1170,23 @@ static inline void rxrpc_sysctl_exit(void) {} */ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); +static inline bool before(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) < 0; +} +static inline bool before_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) <= 0; +} +static inline bool after(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) > 0; +} +static inline bool after_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + /* * debug tracing */ @@ -846,11 +1269,12 @@ do { \ #define ASSERTCMP(X, OP, Y) \ do { \ - unsigned long _x = (unsigned long)(X); \ - unsigned long _y = (unsigned long)(Y); \ + __typeof__(X) _x = (X); \ + __typeof__(Y) _y = (__typeof__(X))(Y); \ if (unlikely(!(_x OP _y))) { \ - pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ - _x, _x, #OP, _y, _y); \ + pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ + (unsigned long)_x, (unsigned long)_x, #OP, \ + (unsigned long)_y, (unsigned long)_y); \ BUG(); \ } \ } while (0) @@ -865,11 +1289,12 @@ do { \ #define ASSERTIFCMP(C, X, OP, Y) \ do { \ - unsigned long _x = (unsigned long)(X); \ - unsigned long _y = (unsigned long)(Y); \ + __typeof__(X) _x = (X); \ + __typeof__(Y) _y = (__typeof__(X))(Y); \ if (unlikely((C) && !(_x OP _y))) { \ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ - _x, _x, #OP, _y, _y); \ + (unsigned long)_x, (unsigned long)_x, #OP, \ + (unsigned long)_y, (unsigned long)_y); \ BUG(); \ } \ } while (0) @@ -893,54 +1318,3 @@ do { \ } while (0) #endif /* __KDEBUGALL */ - -/* - * socket buffer accounting / leak finding - */ -static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn) -{ - //_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs)); - //atomic_inc(&rxrpc_n_skbs); -} - -#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__) - -static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn) -{ - //_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs)); - //atomic_dec(&rxrpc_n_skbs); -} - -#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__) - -static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn) -{ - if (skb) { - CHECK_SLAB_OKAY(&skb->users); - //_net("free skb %p %s [%d]", - // skb, fn, atomic_read(&rxrpc_n_skbs)); - //atomic_dec(&rxrpc_n_skbs); - kfree_skb(skb); - } -} - -#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__) - -static inline void rxrpc_purge_queue(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb = skb_dequeue((list))) != NULL) - rxrpc_free_skb(skb); -} - -#define rxrpc_get_call(CALL) \ -do { \ - CHECK_SLAB_OKAY(&(CALL)->usage); \ - if (atomic_inc_return(&(CALL)->usage) == 1) \ - BUG(); \ -} while (0) - -#define rxrpc_put_call(CALL) \ -do { \ - __rxrpc_put_call(CALL); \ -} while (0) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 9bae21e66d65..3cac231d8405 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -20,265 +20,409 @@ #include <linux/in6.h> #include <linux/icmp.h> #include <linux/gfp.h> +#include <linux/circ_buf.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <net/ip.h> #include "ar-internal.h" /* - * generate a connection-level abort + * Preallocate a single service call, connection and peer and, if possible, + * give them a user ID and attach the user's side of the ID to them. */ -static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, - struct rxrpc_wire_header *whdr) +static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, + struct rxrpc_backlog *b, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp) { - struct msghdr msg; - struct kvec iov[1]; - size_t len; - int ret; + const void *here = __builtin_return_address(0); + struct rxrpc_call *call; + int max, tmp; + unsigned int size = RXRPC_BACKLOG_MAX; + unsigned int head, tail, call_head, call_tail; + + max = rx->sk.sk_max_ack_backlog; + tmp = rx->sk.sk_ack_backlog; + if (tmp >= max) { + _leave(" = -ENOBUFS [full %u]", max); + return -ENOBUFS; + } + max -= tmp; + + /* We don't need more conns and peers than we have calls, but on the + * other hand, we shouldn't ever use more peers than conns or conns + * than calls. + */ + call_head = b->call_backlog_head; + call_tail = READ_ONCE(b->call_backlog_tail); + tmp = CIRC_CNT(call_head, call_tail, size); + if (tmp >= max) { + _leave(" = -ENOBUFS [enough %u]", tmp); + return -ENOBUFS; + } + max = tmp + 1; + + head = b->peer_backlog_head; + tail = READ_ONCE(b->peer_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp); + if (!peer) + return -ENOMEM; + b->peer_backlog[head] = peer; + smp_store_release(&b->peer_backlog_head, + (head + 1) & (size - 1)); + } - _enter("%d,,", local->debug_id); + head = b->conn_backlog_head; + tail = READ_ONCE(b->conn_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_connection *conn; - whdr->type = RXRPC_PACKET_TYPE_BUSY; - whdr->serial = htonl(1); + conn = rxrpc_prealloc_service_connection(gfp); + if (!conn) + return -ENOMEM; + b->conn_backlog[head] = conn; + smp_store_release(&b->conn_backlog_head, + (head + 1) & (size - 1)); - msg.msg_name = &srx->transport.sin; - msg.msg_namelen = sizeof(srx->transport.sin); - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; + trace_rxrpc_conn(conn, rxrpc_conn_new_service, + atomic_read(&conn->usage), here); + } - iov[0].iov_base = whdr; - iov[0].iov_len = sizeof(*whdr); + /* Now it gets complicated, because calls get registered with the + * socket here, particularly if a user ID is preassigned by the user. + */ + call = rxrpc_alloc_call(gfp); + if (!call) + return -ENOMEM; + call->flags |= (1 << RXRPC_CALL_IS_SERVICE); + call->state = RXRPC_CALL_SERVER_PREALLOC; - len = iov[0].iov_len; + trace_rxrpc_call(call, rxrpc_call_new_service, + atomic_read(&call->usage), + here, (const void *)user_call_ID); - _proto("Tx BUSY %%1"); + write_lock(&rx->call_lock); + if (user_attach_call) { + struct rxrpc_call *xcall; + struct rb_node *parent, **pp; + + /* Check the user ID isn't already in use */ + pp = &rx->calls.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + xcall = rb_entry(parent, struct rxrpc_call, sock_node); + if (user_call_ID < call->user_call_ID) + pp = &(*pp)->rb_left; + else if (user_call_ID > call->user_call_ID) + pp = &(*pp)->rb_right; + else + goto id_in_use; + } - ret = kernel_sendmsg(local->socket, &msg, iov, 1, len); - if (ret < 0) { - _leave(" = -EAGAIN [sendmsg failed: %d]", ret); - return -EAGAIN; + call->user_call_ID = user_call_ID; + call->notify_rx = notify_rx; + rxrpc_get_call(call, rxrpc_call_got_kernel); + user_attach_call(call, user_call_ID); + rxrpc_get_call(call, rxrpc_call_got_userid); + rb_link_node(&call->sock_node, parent, pp); + rb_insert_color(&call->sock_node, &rx->calls); + set_bit(RXRPC_CALL_HAS_USERID, &call->flags); } - _leave(" = 0"); + list_add(&call->sock_link, &rx->sock_calls); + + write_unlock(&rx->call_lock); + + write_lock(&rxrpc_call_lock); + list_add_tail(&call->link, &rxrpc_calls); + write_unlock(&rxrpc_call_lock); + + b->call_backlog[call_head] = call; + smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); + _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID); return 0; + +id_in_use: + write_unlock(&rx->call_lock); + rxrpc_cleanup_call(call); + _leave(" = -EBADSLT"); + return -EBADSLT; } /* - * accept an incoming call that needs peer, transport and/or connection setting - * up + * Preallocate sufficient service connections, calls and peers to cover the + * entire backlog of a socket. When a new call comes in, if we don't have + * sufficient of each available, the call gets rejected as busy or ignored. + * + * The backlog is replenished when a connection is accepted or rejected. */ -static int rxrpc_accept_incoming_call(struct rxrpc_local *local, - struct rxrpc_sock *rx, - struct sk_buff *skb, - struct sockaddr_rxrpc *srx) +int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) { - struct rxrpc_connection *conn; - struct rxrpc_skb_priv *sp, *nsp; - struct rxrpc_call *call; - struct sk_buff *notification; - int ret; + struct rxrpc_backlog *b = rx->backlog; - _enter(""); + if (!b) { + b = kzalloc(sizeof(struct rxrpc_backlog), gfp); + if (!b) + return -ENOMEM; + rx->backlog = b; + } + + if (rx->discard_new_call) + return 0; + + while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0) + ; - sp = rxrpc_skb(skb); + return 0; +} - /* get a notification message to send to the server app */ - notification = alloc_skb(0, GFP_NOFS); - if (!notification) { - _debug("no memory"); - ret = -ENOMEM; - goto error_nofree; +/* + * Discard the preallocation on a service. + */ +void rxrpc_discard_prealloc(struct rxrpc_sock *rx) +{ + struct rxrpc_backlog *b = rx->backlog; + unsigned int size = RXRPC_BACKLOG_MAX, head, tail; + + if (!b) + return; + rx->backlog = NULL; + + /* Make sure that there aren't any incoming calls in progress before we + * clear the preallocation buffers. + */ + spin_lock_bh(&rx->incoming_lock); + spin_unlock_bh(&rx->incoming_lock); + + head = b->peer_backlog_head; + tail = b->peer_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_peer *peer = b->peer_backlog[tail]; + kfree(peer); + tail = (tail + 1) & (size - 1); } - rxrpc_new_skb(notification); - notification->mark = RXRPC_SKB_MARK_NEW_CALL; - - conn = rxrpc_incoming_connection(local, srx, skb); - if (IS_ERR(conn)) { - _debug("no conn"); - ret = PTR_ERR(conn); - goto error; + + head = b->conn_backlog_head; + tail = b->conn_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_connection *conn = b->conn_backlog[tail]; + write_lock(&rxrpc_connection_lock); + list_del(&conn->link); + list_del(&conn->proc_link); + write_unlock(&rxrpc_connection_lock); + kfree(conn); + tail = (tail + 1) & (size - 1); } - call = rxrpc_incoming_call(rx, conn, skb); - rxrpc_put_connection(conn); - if (IS_ERR(call)) { - _debug("no call"); - ret = PTR_ERR(call); - goto error; + head = b->call_backlog_head; + tail = b->call_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_call *call = b->call_backlog[tail]; + if (rx->discard_new_call) { + _debug("discard %lx", call->user_call_ID); + rx->discard_new_call(call, call->user_call_ID); + rxrpc_put_call(call, rxrpc_call_put_kernel); + } + rxrpc_call_completed(call); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + tail = (tail + 1) & (size - 1); } - /* attach the call to the socket */ - read_lock_bh(&local->services_lock); - if (rx->sk.sk_state == RXRPC_CLOSE) - goto invalid_service; + kfree(b); +} - write_lock(&rx->call_lock); - if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) { - rxrpc_get_call(call); - - spin_lock(&call->conn->state_lock); - if (sp->hdr.securityIndex > 0 && - call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) { - _debug("await conn sec"); - list_add_tail(&call->accept_link, &rx->secureq); - call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING; - set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); - rxrpc_queue_conn(call->conn); - } else { - _debug("conn ready"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_add_tail(&call->accept_link, &rx->acceptq); - rxrpc_get_call(call); - atomic_inc(&call->skb_count); - nsp = rxrpc_skb(notification); - nsp->call = call; - - ASSERTCMP(atomic_read(&call->usage), >=, 3); - - _debug("notify"); - spin_lock(&call->lock); - ret = rxrpc_queue_rcv_skb(call, notification, true, - false); - spin_unlock(&call->lock); - notification = NULL; - BUG_ON(ret < 0); +/* + * Allocate a new incoming call from the prealloc pool, along with a connection + * and a peer as necessary. + */ +static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_local *local, + struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + struct rxrpc_backlog *b = rx->backlog; + struct rxrpc_peer *peer, *xpeer; + struct rxrpc_call *call; + unsigned short call_head, conn_head, peer_head; + unsigned short call_tail, conn_tail, peer_tail; + unsigned short call_count, conn_count; + + /* #calls >= #conns >= #peers must hold true. */ + call_head = smp_load_acquire(&b->call_backlog_head); + call_tail = b->call_backlog_tail; + call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX); + conn_head = smp_load_acquire(&b->conn_backlog_head); + conn_tail = b->conn_backlog_tail; + conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX); + ASSERTCMP(conn_count, >=, call_count); + peer_head = smp_load_acquire(&b->peer_backlog_head); + peer_tail = b->peer_backlog_tail; + ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=, + conn_count); + + if (call_count == 0) + return NULL; + + if (!conn) { + /* No connection. We're going to need a peer to start off + * with. If one doesn't yet exist, use a spare from the + * preallocation set. We dump the address into the spare in + * anticipation - and to save on stack space. + */ + xpeer = b->peer_backlog[peer_tail]; + if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0) + return NULL; + + peer = rxrpc_lookup_incoming_peer(local, xpeer); + if (peer == xpeer) { + b->peer_backlog[peer_tail] = NULL; + smp_store_release(&b->peer_backlog_tail, + (peer_tail + 1) & + (RXRPC_BACKLOG_MAX - 1)); } - spin_unlock(&call->conn->state_lock); - _debug("queued"); + /* Now allocate and set up the connection */ + conn = b->conn_backlog[conn_tail]; + b->conn_backlog[conn_tail] = NULL; + smp_store_release(&b->conn_backlog_tail, + (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + rxrpc_get_local(local); + conn->params.local = local; + conn->params.peer = peer; + rxrpc_see_connection(conn); + rxrpc_new_incoming_connection(conn, skb); + } else { + rxrpc_get_connection(conn); } - write_unlock(&rx->call_lock); - _debug("process"); - rxrpc_fast_process_packet(call, skb); + /* And now we can allocate and set up a new call */ + call = b->call_backlog[call_tail]; + b->call_backlog[call_tail] = NULL; + smp_store_release(&b->call_backlog_tail, + (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); - _debug("done"); - read_unlock_bh(&local->services_lock); - rxrpc_free_skb(notification); - rxrpc_put_call(call); - _leave(" = 0"); - return 0; - -invalid_service: - _debug("invalid"); - read_unlock_bh(&local->services_lock); - - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - rxrpc_get_call(call); - rxrpc_queue_call(call); - } - read_unlock_bh(&call->state_lock); - rxrpc_put_call(call); - ret = -ECONNREFUSED; -error: - rxrpc_free_skb(notification); -error_nofree: - _leave(" = %d", ret); - return ret; + rxrpc_see_call(call); + call->conn = conn; + call->peer = rxrpc_get_peer(conn->params.peer); + return call; } /* - * accept incoming calls that need peer, transport and/or connection setting up - * - the packets we get are all incoming client DATA packets that have seq == 1 + * Set up a new incoming call. Called in BH context with the RCU read lock + * held. + * + * If this is for a kernel service, when we allocate the call, it will have + * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the + * retainer ref obtained from the backlog buffer. Prealloc calls for userspace + * services only have the ref from the backlog buffer. We want to pass this + * ref to non-BH context to dispose of. + * + * If we want to report an error, we mark the skb with the packet type and + * abort code and return NULL. */ -void rxrpc_accept_incoming_calls(struct rxrpc_local *local) +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_connection *conn, + struct sk_buff *skb) { - struct rxrpc_skb_priv *sp; - struct sockaddr_rxrpc srx; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_sock *rx; - struct rxrpc_wire_header whdr; - struct sk_buff *skb; - int ret; + struct rxrpc_call *call; + u16 service_id = sp->hdr.serviceId; - _enter("%d", local->debug_id); + _enter(""); - skb = skb_dequeue(&local->accept_queue); - if (!skb) { - _leave("\n"); - return; + /* Get the socket providing the service */ + rx = rcu_dereference(local->service); + if (service_id == rx->srx.srx_service) + goto found_service; + + trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EOPNOTSUPP); + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->priority = RX_INVALID_OPERATION; + _leave(" = NULL [service]"); + return NULL; + +found_service: + spin_lock(&rx->incoming_lock); + if (rx->sk.sk_state == RXRPC_CLOSE) { + trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber, + sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->priority = RX_INVALID_OPERATION; + _leave(" = NULL [close]"); + call = NULL; + goto out; } - _net("incoming call skb %p", skb); - - sp = rxrpc_skb(skb); - - /* Set up a response packet header in case we need it */ - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = 0; - whdr.flags = 0; - whdr.type = 0; - whdr.userStatus = 0; - whdr.securityIndex = sp->hdr.securityIndex; - whdr._rsvd = 0; - whdr.serviceId = htons(sp->hdr.serviceId); - - if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) - goto drop; - - /* get the socket providing the service */ - read_lock_bh(&local->services_lock); - list_for_each_entry(rx, &local->services, listen_link) { - if (rx->srx.srx_service == sp->hdr.serviceId && - rx->sk.sk_state != RXRPC_CLOSE) - goto found_service; + call = rxrpc_alloc_incoming_call(rx, local, conn, skb); + if (!call) { + skb->mark = RXRPC_SKB_MARK_BUSY; + _leave(" = NULL [busy]"); + call = NULL; + goto out; } - read_unlock_bh(&local->services_lock); - goto invalid_service; -found_service: - _debug("found service %hd", rx->srx.srx_service); - if (sk_acceptq_is_full(&rx->sk)) - goto backlog_full; - sk_acceptq_added(&rx->sk); - sock_hold(&rx->sk); - read_unlock_bh(&local->services_lock); - - ret = rxrpc_accept_incoming_call(local, rx, skb, &srx); - if (ret < 0) - sk_acceptq_removed(&rx->sk); - sock_put(&rx->sk); - switch (ret) { - case -ECONNRESET: /* old calls are ignored */ - case -ECONNABORTED: /* aborted calls are reaborted or ignored */ - case 0: - return; - case -ECONNREFUSED: - goto invalid_service; - case -EBUSY: - goto busy; - case -EKEYREJECTED: - goto security_mismatch; + trace_rxrpc_receive(call, rxrpc_receive_incoming, + sp->hdr.serial, sp->hdr.seq); + + /* Make the call live. */ + rxrpc_incoming_call(rx, call, skb); + conn = call->conn; + + if (rx->notify_new_call) + rx->notify_new_call(&rx->sk, call, call->user_call_ID); + else + sk_acceptq_added(&rx->sk); + + spin_lock(&conn->state_lock); + switch (conn->state) { + case RXRPC_CONN_SERVICE_UNSECURED: + conn->state = RXRPC_CONN_SERVICE_CHALLENGING; + set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); + rxrpc_queue_conn(call->conn); + break; + + case RXRPC_CONN_SERVICE: + write_lock(&call->state_lock); + if (rx->discard_new_call) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + else + call->state = RXRPC_CALL_SERVER_ACCEPTING; + write_unlock(&call->state_lock); + break; + + case RXRPC_CONN_REMOTELY_ABORTED: + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + conn->remote_abort, ECONNABORTED); + break; + case RXRPC_CONN_LOCALLY_ABORTED: + rxrpc_abort_call("CON", call, sp->hdr.seq, + conn->local_abort, ECONNABORTED); + break; default: BUG(); } + spin_unlock(&conn->state_lock); -backlog_full: - read_unlock_bh(&local->services_lock); -busy: - rxrpc_busy(local, &srx, &whdr); - rxrpc_free_skb(skb); - return; + if (call->state == RXRPC_CALL_SERVER_ACCEPTING) + rxrpc_notify_socket(call); -drop: - rxrpc_free_skb(skb); - return; + /* We have to discard the prealloc queue's ref here and rely on a + * combination of the RCU read lock and refs held either by the socket + * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel + * service to prevent the call from being deallocated too early. + */ + rxrpc_put_call(call, rxrpc_call_put); -invalid_service: - skb->priority = RX_INVALID_OPERATION; - rxrpc_reject_packet(local, skb); - return; - - /* can't change connection security type mid-flow */ -security_mismatch: - skb->priority = RX_PROTOCOL_ERROR; - rxrpc_reject_packet(local, skb); - return; + _leave(" = %p{%d}", call, call->debug_id); +out: + spin_unlock(&rx->incoming_lock); + return call; } /* @@ -286,7 +430,8 @@ security_mismatch: * - assign the user call ID to the call at the front of the queue */ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, - unsigned long user_call_ID) + unsigned long user_call_ID, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_call *call; struct rb_node *parent, **pp; @@ -298,12 +443,13 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, write_lock(&rx->call_lock); - ret = -ENODATA; - if (list_empty(&rx->acceptq)) - goto out; + if (list_empty(&rx->to_be_accepted)) { + write_unlock(&rx->call_lock); + kleave(" = -ENODATA [empty]"); + return ERR_PTR(-ENODATA); + } /* check the user ID isn't already in use */ - ret = -EBADSLT; pp = &rx->calls.rb_node; parent = NULL; while (*pp) { @@ -315,62 +461,59 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, else if (user_call_ID > call->user_call_ID) pp = &(*pp)->rb_right; else - goto out; + goto id_in_use; } - /* dequeue the first call and check it's still valid */ - call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); + /* Dequeue the first call and check it's still valid. We gain + * responsibility for the queue's reference. + */ + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); + rxrpc_see_call(call); write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: call->state = RXRPC_CALL_SERVER_RECV_REQUEST; break; - case RXRPC_CALL_REMOTELY_ABORTED: - case RXRPC_CALL_LOCALLY_ABORTED: - ret = -ECONNABORTED; - goto out_release; - case RXRPC_CALL_NETWORK_ERROR: - ret = call->conn->error; + case RXRPC_CALL_COMPLETE: + ret = call->error; goto out_release; - case RXRPC_CALL_DEAD: - ret = -ETIME; - goto out_discard; default: BUG(); } /* formalise the acceptance */ + call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; + rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags)) BUG(); - if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) - BUG(); - rxrpc_queue_call(call); - rxrpc_get_call(call); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); + rxrpc_notify_socket(call); + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %p{%d}", call, call->debug_id); return call; - /* if the call is already dying or dead, then we leave the socket's ref - * on it to be released by rxrpc_dead_call_expired() as induced by - * rxrpc_release_call() */ out_release: _debug("release %p", call); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); -out_discard: write_unlock_bh(&call->state_lock); - _debug("discard %p", call); -out: write_unlock(&rx->call_lock); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + goto out; + +id_in_use: + ret = -EBADSLT; + write_unlock(&rx->call_lock); +out: + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %d", ret); return ERR_PTR(ret); } @@ -382,6 +525,7 @@ out: int rxrpc_reject_call(struct rxrpc_sock *rx) { struct rxrpc_call *call; + bool abort = false; int ret; _enter(""); @@ -390,88 +534,73 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock(&rx->call_lock); - ret = -ENODATA; - if (list_empty(&rx->acceptq)) - goto out; + if (list_empty(&rx->to_be_accepted)) { + write_unlock(&rx->call_lock); + return -ENODATA; + } - /* dequeue the first call and check it's still valid */ - call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); + /* Dequeue the first call and check it's still valid. We gain + * responsibility for the queue's reference. + */ + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); + rxrpc_see_call(call); write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: - call->state = RXRPC_CALL_SERVER_BUSY; - if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) - rxrpc_queue_call(call); - ret = 0; - goto out_release; - case RXRPC_CALL_REMOTELY_ABORTED: - case RXRPC_CALL_LOCALLY_ABORTED: - ret = -ECONNABORTED; - goto out_release; - case RXRPC_CALL_NETWORK_ERROR: - ret = call->conn->error; - goto out_release; - case RXRPC_CALL_DEAD: - ret = -ETIME; + __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED); + abort = true; + /* fall through */ + case RXRPC_CALL_COMPLETE: + ret = call->error; goto out_discard; default: BUG(); } - /* if the call is already dying or dead, then we leave the socket's ref - * on it to be released by rxrpc_dead_call_expired() as induced by - * rxrpc_release_call() */ -out_release: - _debug("release %p", call); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); out_discard: write_unlock_bh(&call->state_lock); - _debug("discard %p", call); -out: write_unlock(&rx->call_lock); + if (abort) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + } + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %d", ret); return ret; } -/** - * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call - * @sock: The socket on which the impending call is waiting - * @user_call_ID: The tag to attach to the call +/* + * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls + * @sock: The socket on which to preallocate + * @notify_rx: Event notification function for the call + * @user_attach_call: Func to attach call to user_call_ID + * @user_call_ID: The tag to attach to the preallocated call + * @gfp: The allocation conditions. * - * Allow a kernel service to accept an incoming call, assuming the incoming - * call is still valid. - */ -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock, - unsigned long user_call_ID) -{ - struct rxrpc_call *call; - - _enter(",%lx", user_call_ID); - call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID); - _leave(" = %p", call); - return call; -} -EXPORT_SYMBOL(rxrpc_kernel_accept_call); - -/** - * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call - * @sock: The socket on which the impending call is waiting + * Charge up the socket with preallocated calls, each with a user ID. A + * function should be provided to effect the attachment from the user's side. + * The user is given a ref to hold on the call. * - * Allow a kernel service to reject an incoming call with a BUSY message, - * assuming the incoming call is still valid. + * Note that the call may be come connected before this function returns. */ -int rxrpc_kernel_reject_call(struct socket *sock) +int rxrpc_kernel_charge_accept(struct socket *sock, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp) { - int ret; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct rxrpc_backlog *b = rx->backlog; - _enter(""); - ret = rxrpc_reject_call(rxrpc_sk(sock->sk)); - _leave(" = %d", ret); - return ret; + if (sock->sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + return rxrpc_service_prealloc_one(rx, b, notify_rx, + user_attach_call, user_call_ID, + gfp); } -EXPORT_SYMBOL(rxrpc_kernel_reject_call); +EXPORT_SYMBOL(rxrpc_kernel_charge_accept); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e60cf65c2232..4f00476630b9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -22,1281 +22,351 @@ #include "ar-internal.h" /* - * propose an ACK be sent + * Set the timer */ -void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u32 serial, bool immediate) +void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, + ktime_t now) { - unsigned long expiry; - s8 prior = rxrpc_ack_priority[ack_reason]; - - ASSERTCMP(prior, >, 0); - - _enter("{%d},%s,%%%x,%u", - call->debug_id, rxrpc_acks(ack_reason), serial, immediate); - - if (prior < rxrpc_ack_priority[call->ackr_reason]) { - if (immediate) - goto cancel_timer; - return; - } + unsigned long t_j, now_j = jiffies; + ktime_t t; + bool queue = false; - /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial - * numbers */ - if (prior == rxrpc_ack_priority[call->ackr_reason]) { - if (prior <= 4) - call->ackr_serial = serial; - if (immediate) - goto cancel_timer; - return; - } - - call->ackr_reason = ack_reason; - call->ackr_serial = serial; - - switch (ack_reason) { - case RXRPC_ACK_DELAY: - _debug("run delay timer"); - expiry = rxrpc_soft_ack_delay; - goto run_timer; - - case RXRPC_ACK_IDLE: - if (!immediate) { - _debug("run defer timer"); - expiry = rxrpc_idle_ack_delay; - goto run_timer; - } - goto cancel_timer; - - case RXRPC_ACK_REQUESTED: - expiry = rxrpc_requested_ack_delay; - if (!expiry) - goto cancel_timer; - if (!immediate || serial == 1) { - _debug("run defer timer"); - goto run_timer; - } - - default: - _debug("immediate ACK"); - goto cancel_timer; - } - -run_timer: - expiry += jiffies; - if (!timer_pending(&call->ack_timer) || - time_after(call->ack_timer.expires, expiry)) - mod_timer(&call->ack_timer, expiry); - return; - -cancel_timer: - _debug("cancel timer %%%u", serial); - try_to_del_timer_sync(&call->ack_timer); read_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); -} - -/* - * propose an ACK be sent, locking the call structure - */ -void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u32 serial, bool immediate) -{ - s8 prior = rxrpc_ack_priority[ack_reason]; - - if (prior > rxrpc_ack_priority[call->ackr_reason]) { - spin_lock_bh(&call->lock); - __rxrpc_propose_ACK(call, ack_reason, serial, immediate); - spin_unlock_bh(&call->lock); - } -} -/* - * set the resend timer - */ -static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend, - unsigned long resend_at) -{ - read_lock_bh(&call->state_lock); - if (call->state >= RXRPC_CALL_COMPLETE) - resend = 0; - - if (resend & 1) { - _debug("SET RESEND"); - set_bit(RXRPC_CALL_EV_RESEND, &call->events); - } - - if (resend & 2) { - _debug("MODIFY RESEND TIMER"); - set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - mod_timer(&call->resend_timer, resend_at); - } else { - _debug("KILL RESEND TIMER"); - del_timer_sync(&call->resend_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } - read_unlock_bh(&call->state_lock); -} - -/* - * resend packets - */ -static void rxrpc_resend(struct rxrpc_call *call) -{ - struct rxrpc_wire_header *whdr; - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - bool stop; - int loop; - u8 resend; - - _enter("{%d,%d,%d,%d},", - call->acks_hard, call->acks_unacked, - atomic_read(&call->sequence), - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - - stop = false; - resend = 0; - resend_at = 0; - - for (loop = call->acks_tail; - loop != call->acks_head || stop; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - if (*p_txb & 1) - continue; - - txb = (struct sk_buff *) *p_txb; - sp = rxrpc_skb(txb); - - if (sp->need_resend) { - sp->need_resend = false; - - /* each Tx packet has a new serial number */ - sp->hdr.serial = atomic_inc_return(&call->conn->serial); - - whdr = (struct rxrpc_wire_header *)txb->head; - whdr->serial = htonl(sp->hdr.serial); - - _proto("Tx DATA %%%u { #%d }", - sp->hdr.serial, sp->hdr.seq); - if (rxrpc_send_data_packet(call->conn, txb) < 0) { - stop = true; - sp->resend_at = jiffies + 3; - } else { - sp->resend_at = - jiffies + rxrpc_resend_timeout; - } + if (call->state < RXRPC_CALL_COMPLETE) { + t = call->expire_at; + if (!ktime_after(t, now)) + goto out; + + if (!ktime_after(call->resend_at, now)) { + call->resend_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + queue = true; + } else if (ktime_before(call->resend_at, t)) { + t = call->resend_at; } - if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; + if (!ktime_after(call->ack_at, now)) { + call->ack_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) + queue = true; + } else if (ktime_before(call->ack_at, t)) { + t = call->ack_at; } - } - - rxrpc_set_resend(call, resend, resend_at); - _leave(""); -} - -/* - * handle resend timer expiry - */ -static void rxrpc_resend_timer(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - int loop; - u8 resend; - - _enter("%d,%d,%d", - call->acks_tail, call->acks_unacked, call->acks_head); - - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - resend = 0; - resend_at = 0; - for (loop = call->acks_unacked; - loop != call->acks_head; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); + t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); + t_j += jiffies; - ASSERT(!(*p_txb & 1)); + /* We have to make sure that the calculated jiffies value falls + * at or after the nsec value, or we may loop ceaselessly + * because the timer times out, but we haven't reached the nsec + * timeout yet. + */ + t_j++; - if (sp->need_resend) { - ; - } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; + if (call->timer.expires != t_j || !timer_pending(&call->timer)) { + mod_timer(&call->timer, t_j); + trace_rxrpc_timer(call, why, now, now_j); } + + if (queue) + rxrpc_queue_call(call); } - rxrpc_set_resend(call, resend, resend_at); - _leave(""); +out: + read_unlock_bh(&call->state_lock); } /* - * process soft ACKs of our transmitted packets - * - these indicate packets the peer has or has not received, but hasn't yet - * given to the consumer, and so can still be discarded and re-requested + * propose an ACK be sent */ -static int rxrpc_process_soft_ACKs(struct rxrpc_call *call, - struct rxrpc_ackpacket *ack, - struct sk_buff *skb) +static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, + u16 skew, u32 serial, bool immediate, + bool background, + enum rxrpc_propose_ack_trace why) { - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - int loop; - u8 sacks[RXRPC_MAXACKS], resend; - - _enter("{%d,%d},{%d},", - call->acks_hard, - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz), - ack->nAcks); - - if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0) - goto protocol_error; - - resend = 0; - resend_at = 0; - for (loop = 0; loop < ack->nAcks; loop++) { - p_txb = call->acks_window; - p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1); - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); - - switch (sacks[loop]) { - case RXRPC_ACK_TYPE_ACK: - sp->need_resend = false; - *p_txb |= 1; - break; - case RXRPC_ACK_TYPE_NACK: - sp->need_resend = true; - *p_txb &= ~1; - resend = 1; - break; - default: - _debug("Unsupported ACK type %d", sacks[loop]); - goto protocol_error; - } - } - - smp_mb(); - call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1); - - /* anything not explicitly ACK'd is implicitly NACK'd, but may just not - * have been received or processed yet by the far end */ - for (loop = call->acks_unacked; - loop != call->acks_head; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); + enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; + unsigned int expiry = rxrpc_soft_ack_delay; + ktime_t now, ack_at; + s8 prior = rxrpc_ack_priority[ack_reason]; - if (*p_txb & 1) { - /* packet must have been discarded */ - sp->need_resend = true; - *p_txb &= ~1; - resend |= 1; - } else if (sp->need_resend) { - ; - } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; + /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial + * numbers, but we don't alter the timeout. + */ + _debug("prior %u %u vs %u %u", + ack_reason, prior, + call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]); + if (ack_reason == call->ackr_reason) { + if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { + outcome = rxrpc_propose_ack_update; + call->ackr_serial = serial; + call->ackr_skew = skew; } + if (!immediate) + goto trace; + } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { + call->ackr_reason = ack_reason; + call->ackr_serial = serial; + call->ackr_skew = skew; + } else { + outcome = rxrpc_propose_ack_subsume; } - rxrpc_set_resend(call, resend, resend_at); - _leave(" = 0"); - return 0; - -protocol_error: - _leave(" = -EPROTO"); - return -EPROTO; -} - -/* - * discard hard-ACK'd packets from the Tx window - */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard) -{ - unsigned long _skb; - int tail = call->acks_tail, old_tail; - int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz); + switch (ack_reason) { + case RXRPC_ACK_REQUESTED: + if (rxrpc_requested_ack_delay < expiry) + expiry = rxrpc_requested_ack_delay; + if (serial == 1) + immediate = false; + break; - _enter("{%u,%u},%u", call->acks_hard, win, hard); + case RXRPC_ACK_DELAY: + if (rxrpc_soft_ack_delay < expiry) + expiry = rxrpc_soft_ack_delay; + break; - ASSERTCMP(hard - call->acks_hard, <=, win); + case RXRPC_ACK_PING: + case RXRPC_ACK_IDLE: + if (rxrpc_idle_ack_delay < expiry) + expiry = rxrpc_idle_ack_delay; + break; - while (call->acks_hard < hard) { - smp_read_barrier_depends(); - _skb = call->acks_window[tail] & ~1; - rxrpc_free_skb((struct sk_buff *) _skb); - old_tail = tail; - tail = (tail + 1) & (call->acks_winsz - 1); - call->acks_tail = tail; - if (call->acks_unacked == old_tail) - call->acks_unacked = tail; - call->acks_hard++; + default: + immediate = true; + break; } - wake_up(&call->tx_waitq); -} - -/* - * clear the Tx window in the event of a failure - */ -static void rxrpc_clear_tx_window(struct rxrpc_call *call) -{ - rxrpc_rotate_tx_window(call, atomic_read(&call->sequence)); -} - -/* - * drain the out of sequence received packet queue into the packet Rx queue - */ -static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - bool terminal; - int ret; - - _enter("{%d,%d}", call->rx_data_post, call->rx_first_oos); - - spin_lock_bh(&call->lock); - - ret = -ECONNRESET; - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) - goto socket_unavailable; - - skb = skb_dequeue(&call->rx_oos_queue); - if (skb) { - sp = rxrpc_skb(skb); - - _debug("drain OOS packet %d [%d]", - sp->hdr.seq, call->rx_first_oos); - - if (sp->hdr.seq != call->rx_first_oos) { - skb_queue_head(&call->rx_oos_queue, skb); - call->rx_first_oos = rxrpc_skb(skb)->hdr.seq; - _debug("requeue %p {%u}", skb, call->rx_first_oos); - } else { - skb->mark = RXRPC_SKB_MARK_DATA; - terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) && - !(sp->hdr.flags & RXRPC_CLIENT_INITIATED)); - ret = rxrpc_queue_rcv_skb(call, skb, true, terminal); - BUG_ON(ret < 0); - _debug("drain #%u", call->rx_data_post); - call->rx_data_post++; - - /* find out what the next packet is */ - skb = skb_peek(&call->rx_oos_queue); - if (skb) - call->rx_first_oos = rxrpc_skb(skb)->hdr.seq; - else - call->rx_first_oos = 0; - _debug("peek %p {%u}", skb, call->rx_first_oos); + if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { + _debug("already scheduled"); + } else if (immediate || expiry == 0) { + _debug("immediate ACK %lx", call->events); + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) && + background) + rxrpc_queue_call(call); + } else { + now = ktime_get_real(); + ack_at = ktime_add_ms(now, expiry); + if (ktime_before(ack_at, call->ack_at)) { + call->ack_at = ack_at; + rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now); } } - ret = 0; -socket_unavailable: - spin_unlock_bh(&call->lock); - _leave(" = %d", ret); - return ret; +trace: + trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate, + background, outcome); } /* - * insert an out of sequence packet into the buffer + * propose an ACK be sent, locking the call structure */ -static void rxrpc_insert_oos_packet(struct rxrpc_call *call, - struct sk_buff *skb) +void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, + u16 skew, u32 serial, bool immediate, bool background, + enum rxrpc_propose_ack_trace why) { - struct rxrpc_skb_priv *sp, *psp; - struct sk_buff *p; - u32 seq; - - sp = rxrpc_skb(skb); - seq = sp->hdr.seq; - _enter(",,{%u}", seq); - - skb->destructor = rxrpc_packet_destructor; - ASSERTCMP(sp->call, ==, NULL); - sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); - - /* insert into the buffer in sequence order */ spin_lock_bh(&call->lock); - - skb_queue_walk(&call->rx_oos_queue, p) { - psp = rxrpc_skb(p); - if (psp->hdr.seq > seq) { - _debug("insert oos #%u before #%u", seq, psp->hdr.seq); - skb_insert(p, skb, &call->rx_oos_queue); - goto inserted; - } - } - - _debug("append oos #%u", seq); - skb_queue_tail(&call->rx_oos_queue, skb); -inserted: - - /* we might now have a new front to the queue */ - if (call->rx_first_oos == 0 || seq < call->rx_first_oos) - call->rx_first_oos = seq; - - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - call->rx_data_post == call->rx_first_oos) { - _debug("drain rx oos now"); - set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events); - } - read_unlock(&call->state_lock); - + __rxrpc_propose_ACK(call, ack_reason, skew, serial, + immediate, background, why); spin_unlock_bh(&call->lock); - _leave(" [stored #%u]", call->rx_first_oos); -} - -/* - * clear the Tx window on final ACK reception - */ -static void rxrpc_zap_tx_window(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - unsigned long _skb, *acks_window; - u8 winsz = call->acks_winsz; - int tail; - - acks_window = call->acks_window; - call->acks_window = NULL; - - while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) { - tail = call->acks_tail; - smp_read_barrier_depends(); - _skb = acks_window[tail] & ~1; - smp_mb(); - call->acks_tail = (call->acks_tail + 1) & (winsz - 1); - - skb = (struct sk_buff *) _skb; - sp = rxrpc_skb(skb); - _debug("+++ clear Tx %u", sp->hdr.seq); - rxrpc_free_skb(skb); - } - - kfree(acks_window); } /* - * process the extra information that may be appended to an ACK packet + * Handle congestion being detected by the retransmit timeout. */ -static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int latest, int nAcks) +static void rxrpc_congestion_timeout(struct rxrpc_call *call) { - struct rxrpc_ackinfo ackinfo; - struct rxrpc_peer *peer; - unsigned int mtu; - - if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) { - _leave(" [no ackinfo]"); - return; - } - - _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", - latest, - ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU), - ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max)); - - mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU)); - - peer = call->conn->params.peer; - if (mtu < peer->maxdata) { - spin_lock_bh(&peer->lock); - peer->maxdata = mtu; - peer->mtu = mtu + peer->hdrsize; - spin_unlock_bh(&peer->lock); - _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); - } + set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); } /* - * process packets in the reception queue + * Perform retransmission of NAK'd and unack'd packets. */ -static int rxrpc_process_rx_queue(struct rxrpc_call *call, - u32 *_abort_code) +static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) { - struct rxrpc_ackpacket ack; struct rxrpc_skb_priv *sp; struct sk_buff *skb; - bool post_ACK; - int latest; - u32 hard, tx; - - _enter(""); - -process_further: - skb = skb_dequeue(&call->rx_queue); - if (!skb) - return -EAGAIN; - - _net("deferred skb %p", skb); - - sp = rxrpc_skb(skb); - - _debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state); - - post_ACK = false; - - switch (sp->hdr.type) { - /* data packets that wind up here have been received out of - * order, need security processing or are jumbo packets */ - case RXRPC_PACKET_TYPE_DATA: - _proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); + rxrpc_seq_t cursor, seq, top; + ktime_t max_age, oldest, ack_ts; + int ix; + u8 annotation, anno_type, retrans = 0, unacked = 0; - /* secured packets must be verified and possibly decrypted */ - if (call->conn->security->verify_packet(call, skb, - _abort_code) < 0) - goto protocol_error; - - rxrpc_insert_oos_packet(call, skb); - goto process_further; - - /* partial ACK to process */ - case RXRPC_PACKET_TYPE_ACK: - if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) { - _debug("extraction failure"); - goto protocol_error; - } - if (!skb_pull(skb, sizeof(ack))) - BUG(); - - latest = sp->hdr.serial; - hard = ntohl(ack.firstPacket); - tx = atomic_read(&call->sequence); - - _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - latest, - ntohs(ack.maxSkew), - hard, - ntohl(ack.previousPacket), - ntohl(ack.serial), - rxrpc_acks(ack.reason), - ack.nAcks); - - rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks); - - if (ack.reason == RXRPC_ACK_PING) { - _proto("Rx ACK %%%u PING Request", latest); - rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - sp->hdr.serial, true); - } - - /* discard any out-of-order or duplicate ACKs */ - if (latest - call->acks_latest <= 0) { - _debug("discard ACK %d <= %d", - latest, call->acks_latest); - goto discard; - } - call->acks_latest = latest; + _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && - call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY && - call->state != RXRPC_CALL_SERVER_SEND_REPLY && - call->state != RXRPC_CALL_SERVER_AWAIT_ACK) - goto discard; + max_age = ktime_sub_ms(now, rxrpc_resend_timeout); - _debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state); - - if (hard > 0) { - if (hard - 1 > tx) { - _debug("hard-ACK'd packet %d not transmitted" - " (%d top)", - hard - 1, tx); - goto protocol_error; - } + spin_lock_bh(&call->lock); - if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY || - call->state == RXRPC_CALL_SERVER_AWAIT_ACK) && - hard > tx) { - call->acks_hard = tx; - goto all_acked; - } + cursor = call->tx_hard_ack; + top = call->tx_top; + ASSERT(before_eq(cursor, top)); + if (cursor == top) + goto out_unlock; + + /* Scan the packet list without dropping the lock and decide which of + * the packets in the Tx buffer we're going to resend and what the new + * resend timeout will be. + */ + oldest = now; + for (seq = cursor + 1; before_eq(seq, top); seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; + if (anno_type == RXRPC_TX_ANNO_ACK) + continue; - smp_rmb(); - rxrpc_rotate_tx_window(call, hard - 1); - } + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb, rxrpc_skb_tx_seen); + sp = rxrpc_skb(skb); - if (ack.nAcks > 0) { - if (hard - 1 + ack.nAcks > tx) { - _debug("soft-ACK'd packet %d+%d not" - " transmitted (%d top)", - hard - 1, ack.nAcks, tx); - goto protocol_error; + if (anno_type == RXRPC_TX_ANNO_UNACK) { + if (ktime_after(skb->tstamp, max_age)) { + if (ktime_before(skb->tstamp, oldest)) + oldest = skb->tstamp; + continue; } - - if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0) - goto protocol_error; + if (!(annotation & RXRPC_TX_ANNO_RESENT)) + unacked++; } - goto discard; - - /* complete ACK to process */ - case RXRPC_PACKET_TYPE_ACKALL: - goto all_acked; - - /* abort and busy are handled elsewhere */ - case RXRPC_PACKET_TYPE_BUSY: - case RXRPC_PACKET_TYPE_ABORT: - BUG(); - /* connection level events - also handled elsewhere */ - case RXRPC_PACKET_TYPE_CHALLENGE: - case RXRPC_PACKET_TYPE_RESPONSE: - case RXRPC_PACKET_TYPE_DEBUG: - BUG(); + /* Okay, we need to retransmit a packet. */ + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; + retrans++; + trace_rxrpc_retransmit(call, seq, annotation | anno_type, + ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } - /* if we've had a hard ACK that covers all the packets we've sent, then - * that ends that phase of the operation */ -all_acked: - write_lock_bh(&call->state_lock); - _debug("ack all %d", call->state); + call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; - break; - case RXRPC_CALL_SERVER_AWAIT_ACK: - _debug("srv complete"); - call->state = RXRPC_CALL_COMPLETE; - post_ACK = true; - break; - case RXRPC_CALL_CLIENT_SEND_REQUEST: - case RXRPC_CALL_SERVER_RECV_REQUEST: - goto protocol_error_unlock; /* can't occur yet */ - default: - write_unlock_bh(&call->state_lock); - goto discard; /* assume packet left over from earlier phase */ - } - - write_unlock_bh(&call->state_lock); + if (unacked) + rxrpc_congestion_timeout(call); - /* if all the packets we sent are hard-ACK'd, then we can discard - * whatever we've got left */ - _debug("clear Tx %d", - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - - del_timer_sync(&call->resend_timer); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - - if (call->acks_window) - rxrpc_zap_tx_window(call); - - if (post_ACK) { - /* post the final ACK message for userspace to pick up */ - _debug("post ACK"); - skb->mark = RXRPC_SKB_MARK_FINAL_ACK; - sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); - spin_lock_bh(&call->lock); - if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0) - BUG(); + /* If there was nothing that needed retransmission then it's likely + * that an ACK got lost somewhere. Send a ping to find out instead of + * retransmitting data. + */ + if (!retrans) { + rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); spin_unlock_bh(&call->lock); - goto process_further; - } - -discard: - rxrpc_free_skb(skb); - goto process_further; - -protocol_error_unlock: - write_unlock_bh(&call->state_lock); -protocol_error: - rxrpc_free_skb(skb); - _leave(" = -EPROTO"); - return -EPROTO; -} - -/* - * post a message to the socket Rx queue for recvmsg() to pick up - */ -static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error, - bool fatal) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - int ret; - - _enter("{%d,%lx},%u,%u,%d", - call->debug_id, call->flags, mark, error, fatal); - - /* remove timers and things for fatal messages */ - if (fatal) { - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } + ack_ts = ktime_sub(now, call->acks_latest_ts); + if (ktime_to_ns(ack_ts) < call->peer->rtt) + goto out; + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ack_ping_for_lost_ack); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + goto out; + } + + /* Now go through the Tx window and perform the retransmissions. We + * have to drop the lock for each send. If an ACK comes in whilst the + * lock is dropped, it may clear some of the retransmission markers for + * packets that it soft-ACKs. + */ + for (seq = cursor + 1; before_eq(seq, top); seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + if (anno_type != RXRPC_TX_ANNO_RETRANS) + continue; - if (mark != RXRPC_SKB_MARK_NEW_CALL && - !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { - _leave("[no userid]"); - return 0; - } + skb = call->rxtx_buffer[ix]; + rxrpc_get_skb(skb, rxrpc_skb_tx_got); + spin_unlock_bh(&call->lock); - if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { - skb = alloc_skb(0, GFP_NOFS); - if (!skb) - return -ENOMEM; + if (rxrpc_send_data_packet(call, skb, true) < 0) { + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + return; + } - rxrpc_new_skb(skb); + if (rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); - skb->mark = mark; + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + spin_lock_bh(&call->lock); - sp = rxrpc_skb(skb); - memset(sp, 0, sizeof(*sp)); - sp->error = error; - sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); + /* We need to clear the retransmit state, but there are two + * things we need to be aware of: A new ACK/NAK might have been + * received and the packet might have been hard-ACK'd (in which + * case it will no longer be in the buffer). + */ + if (after(seq, call->tx_hard_ack)) { + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + if (anno_type == RXRPC_TX_ANNO_RETRANS || + anno_type == RXRPC_TX_ANNO_NAK) { + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_UNACK; + } + annotation |= RXRPC_TX_ANNO_RESENT; + call->rxtx_annotations[ix] = annotation; + } - spin_lock_bh(&call->lock); - ret = rxrpc_queue_rcv_skb(call, skb, true, fatal); - spin_unlock_bh(&call->lock); - BUG_ON(ret < 0); + if (after(call->tx_hard_ack, seq)) + seq = call->tx_hard_ack; } - return 0; +out_unlock: + spin_unlock_bh(&call->lock); +out: + _leave(""); } /* - * handle background processing of incoming call packets and ACK / abort - * generation + * Handle retransmission and deferred ACK/abort generation. */ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - struct rxrpc_wire_header whdr; - struct rxrpc_ackpacket ack; - struct rxrpc_ackinfo ackinfo; - struct msghdr msg; - struct kvec iov[5]; - enum rxrpc_call_event genbit; - unsigned long bits; - __be32 data, pad; - size_t len; - int loop, nbit, ioc, ret, mtu; - u32 serial, abort_code = RX_PROTOCOL_ERROR; - u8 *acks = NULL; - - //printk("\n--------------------\n"); - _enter("{%d,%s,%lx} [%lu]", - call->debug_id, rxrpc_call_states[call->state], call->events, - (jiffies - call->creation_jif) / (HZ / 10)); - - if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) { - _debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX"); - return; - } - - if (!call->conn) - goto skip_msg_init; - - /* there's a good chance we're going to have to send a message, so set - * one up in advance */ - msg.msg_name = &call->conn->params.peer->srx.transport; - msg.msg_namelen = call->conn->params.peer->srx.transport_len; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - whdr.epoch = htonl(call->conn->proto.epoch); - whdr.cid = htonl(call->cid); - whdr.callNumber = htonl(call->call_id); - whdr.seq = 0; - whdr.type = RXRPC_PACKET_TYPE_ACK; - whdr.flags = call->conn->out_clientflag; - whdr.userStatus = 0; - whdr.securityIndex = call->conn->security_ix; - whdr._rsvd = 0; - whdr.serviceId = htons(call->service_id); - - memset(iov, 0, sizeof(iov)); - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); -skip_msg_init: - - /* deal with events of a final nature */ - if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) { - enum rxrpc_skb_mark mark; - int error; - - clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_ABORT, &call->events); - - error = call->error_report; - if (error < RXRPC_LOCAL_ERROR_OFFSET) { - mark = RXRPC_SKB_MARK_NET_ERROR; - _debug("post net error %d", error); - } else { - mark = RXRPC_SKB_MARK_LOCAL_ERROR; - error -= RXRPC_LOCAL_ERROR_OFFSET; - _debug("post net local error %d", error); - } - - if (rxrpc_post_message(call, mark, error, true) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); - goto kill_ACKs; - } - - if (test_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events)) { - ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); - - clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_ABORT, &call->events); + ktime_t now; - _debug("post conn abort"); + rxrpc_see_call(call); - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - call->conn->error, true) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - goto kill_ACKs; - } - - if (test_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) { - whdr.type = RXRPC_PACKET_TYPE_BUSY; - genbit = RXRPC_CALL_EV_REJECT_BUSY; - goto send_message; - } - - if (test_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); - - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - ECONNABORTED, true) < 0) - goto no_mem; - whdr.type = RXRPC_PACKET_TYPE_ABORT; - data = htonl(call->local_abort); - iov[1].iov_base = &data; - iov[1].iov_len = sizeof(data); - genbit = RXRPC_CALL_EV_ABORT; - goto send_message; - } - - if (test_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) { - genbit = RXRPC_CALL_EV_ACK_FINAL; - - ack.bufferSpace = htons(8); - ack.maxSkew = 0; - ack.serial = 0; - ack.reason = RXRPC_ACK_IDLE; - ack.nAcks = 0; - call->ackr_reason = 0; - - spin_lock_bh(&call->lock); - ack.serial = htonl(call->ackr_serial); - ack.previousPacket = htonl(call->ackr_prev_seq); - ack.firstPacket = htonl(call->rx_data_eaten + 1); - spin_unlock_bh(&call->lock); - - pad = 0; + //printk("\n--------------------\n"); + _enter("{%d,%s,%lx}", + call->debug_id, rxrpc_call_states[call->state], call->events); - iov[1].iov_base = &ack; - iov[1].iov_len = sizeof(ack); - iov[2].iov_base = &pad; - iov[2].iov_len = 3; - iov[3].iov_base = &ackinfo; - iov[3].iov_len = sizeof(ackinfo); - goto send_ACK; +recheck_state: + if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + goto recheck_state; } - if (call->events & ((1 << RXRPC_CALL_EV_RCVD_BUSY) | - (1 << RXRPC_CALL_EV_RCVD_ABORT)) - ) { - u32 mark; - - if (test_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events)) - mark = RXRPC_SKB_MARK_REMOTE_ABORT; - else - mark = RXRPC_SKB_MARK_BUSY; - - _debug("post abort/busy"); - rxrpc_clear_tx_window(call); - if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0) - goto no_mem; - - clear_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - goto kill_ACKs; + if (call->state == RXRPC_CALL_COMPLETE) { + del_timer_sync(&call->timer); + goto out_put; } - if (test_and_clear_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events)) { - _debug("do implicit ackall"); - rxrpc_clear_tx_window(call); + now = ktime_get_real(); + if (ktime_before(call->expire_at, now)) { + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + goto recheck_state; } - if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) { - write_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_CALL_TIMEOUT; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { + call->ack_at = call->expire_at; + if (call->ackr_reason) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + goto recheck_state; } - write_unlock_bh(&call->state_lock); - - _debug("post timeout"); - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - ETIME, true) < 0) - goto no_mem; - - clear_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); - goto kill_ACKs; } - /* deal with assorted inbound messages */ - if (!skb_queue_empty(&call->rx_queue)) { - switch (rxrpc_process_rx_queue(call, &abort_code)) { - case 0: - case -EAGAIN: - break; - case -ENOMEM: - goto no_mem; - case -EKEYEXPIRED: - case -EKEYREJECTED: - case -EPROTO: - rxrpc_abort_call(call, abort_code); - goto kill_ACKs; - } + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) { + rxrpc_resend(call, now); + goto recheck_state; } - /* handle resending */ - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_resend_timer(call); - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_resend(call); - - /* consider sending an ordinary ACK */ - if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { - _debug("send ACK: window: %d - %d { %lx }", - call->rx_data_eaten, call->ackr_win_top, - call->ackr_window[0]); - - if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST && - call->ackr_reason != RXRPC_ACK_PING_RESPONSE) { - /* ACK by sending reply DATA packet in this state */ - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - goto maybe_reschedule; - } - - genbit = RXRPC_CALL_EV_ACK; - - acks = kzalloc(call->ackr_win_top - call->rx_data_eaten, - GFP_NOFS); - if (!acks) - goto no_mem; - - //hdr.flags = RXRPC_SLOW_START_OK; - ack.bufferSpace = htons(8); - ack.maxSkew = 0; - - spin_lock_bh(&call->lock); - ack.reason = call->ackr_reason; - ack.serial = htonl(call->ackr_serial); - ack.previousPacket = htonl(call->ackr_prev_seq); - ack.firstPacket = htonl(call->rx_data_eaten + 1); - - ack.nAcks = 0; - for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { - nbit = loop * BITS_PER_LONG; - for (bits = call->ackr_window[loop]; bits; bits >>= 1 - ) { - _debug("- l=%d n=%d b=%lx", loop, nbit, bits); - if (bits & 1) { - acks[nbit] = RXRPC_ACK_TYPE_ACK; - ack.nAcks = nbit + 1; - } - nbit++; - } - } - call->ackr_reason = 0; - spin_unlock_bh(&call->lock); - - pad = 0; - - iov[1].iov_base = &ack; - iov[1].iov_len = sizeof(ack); - iov[2].iov_base = acks; - iov[2].iov_len = ack.nAcks; - iov[3].iov_base = &pad; - iov[3].iov_len = 3; - iov[4].iov_base = &ackinfo; - iov[4].iov_len = sizeof(ackinfo); - - switch (ack.reason) { - case RXRPC_ACK_REQUESTED: - case RXRPC_ACK_DUPLICATE: - case RXRPC_ACK_OUT_OF_SEQUENCE: - case RXRPC_ACK_EXCEEDS_WINDOW: - case RXRPC_ACK_NOSPACE: - case RXRPC_ACK_PING: - case RXRPC_ACK_PING_RESPONSE: - goto send_ACK_with_skew; - case RXRPC_ACK_DELAY: - case RXRPC_ACK_IDLE: - goto send_ACK; - } - } - - /* handle completion of security negotiations on an incoming - * connection */ - if (test_and_clear_bit(RXRPC_CALL_EV_SECURED, &call->events)) { - _debug("secured"); - spin_lock_bh(&call->lock); - - if (call->state == RXRPC_CALL_SERVER_SECURING) { - _debug("securing"); - write_lock(&call->socket->call_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - _debug("not released"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_move_tail(&call->accept_link, - &call->socket->acceptq); - } - write_unlock(&call->socket->call_lock); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); - read_unlock(&call->state_lock); - } - - spin_unlock_bh(&call->lock); - if (!test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) - goto maybe_reschedule; - } - - /* post a notification of an acceptable connection to the app */ - if (test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) { - _debug("post accept"); - if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL, - 0, false) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); - goto maybe_reschedule; - } - - /* handle incoming call acceptance */ - if (test_and_clear_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) { - _debug("accepted"); - ASSERTCMP(call->rx_data_post, ==, 0); - call->rx_data_post = 1; - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events); - read_unlock_bh(&call->state_lock); - } - - /* drain the out of sequence received packet queue into the packet Rx - * queue */ - if (test_and_clear_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) { - while (call->rx_data_post == call->rx_first_oos) - if (rxrpc_drain_rx_oos_queue(call) < 0) - break; - goto maybe_reschedule; - } - - if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - rxrpc_release_call(call); - clear_bit(RXRPC_CALL_EV_RELEASE, &call->events); - } + rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); /* other events may have been raised since we started checking */ - goto maybe_reschedule; - -send_ACK_with_skew: - ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) - - ntohl(ack.serial)); -send_ACK: - mtu = call->conn->params.peer->if_mtu; - mtu -= call->conn->params.peer->hdrsize; - ackinfo.maxMTU = htonl(mtu); - ackinfo.rwind = htonl(rxrpc_rx_window_size); - - /* permit the peer to send us jumbo packets if it wants to */ - ackinfo.rxMTU = htonl(rxrpc_rx_mtu); - ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max); - - serial = atomic_inc_return(&call->conn->serial); - whdr.serial = htonl(serial); - _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - serial, - ntohs(ack.maxSkew), - ntohl(ack.firstPacket), - ntohl(ack.previousPacket), - ntohl(ack.serial), - rxrpc_acks(ack.reason), - ack.nAcks); - - del_timer_sync(&call->ack_timer); - if (ack.nAcks > 0) - set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags); - goto send_message_2; - -send_message: - _debug("send message"); - - serial = atomic_inc_return(&call->conn->serial); - whdr.serial = htonl(serial); - _proto("Tx %s %%%u", rxrpc_pkts[whdr.type], serial); -send_message_2: - - len = iov[0].iov_len; - ioc = 1; - if (iov[4].iov_len) { - ioc = 5; - len += iov[4].iov_len; - len += iov[3].iov_len; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[3].iov_len) { - ioc = 4; - len += iov[3].iov_len; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[2].iov_len) { - ioc = 3; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[1].iov_len) { - ioc = 2; - len += iov[1].iov_len; - } - - ret = kernel_sendmsg(call->conn->params.local->socket, - &msg, iov, ioc, len); - if (ret < 0) { - _debug("sendmsg failed: %d", ret); - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); - goto error; - } - - switch (genbit) { - case RXRPC_CALL_EV_ABORT: - clear_bit(genbit, &call->events); - clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - goto kill_ACKs; - - case RXRPC_CALL_EV_ACK_FINAL: - write_lock_bh(&call->state_lock); - if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK) - call->state = RXRPC_CALL_COMPLETE; - write_unlock_bh(&call->state_lock); - goto kill_ACKs; - - default: - clear_bit(genbit, &call->events); - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - case RXRPC_CALL_CLIENT_RECV_REPLY: - case RXRPC_CALL_SERVER_RECV_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - _debug("start ACK timer"); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, - call->ackr_serial, false); - default: - break; - } - goto maybe_reschedule; - } - -kill_ACKs: - del_timer_sync(&call->ack_timer); - if (test_and_clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) - rxrpc_put_call(call); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - -maybe_reschedule: - if (call->events || !skb_queue_empty(&call->rx_queue)) { - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); - } - - /* don't leave aborted connections on the accept queue */ - if (call->state >= RXRPC_CALL_COMPLETE && - !list_empty(&call->accept_link)) { - _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }", - call, call->events, call->flags, call->conn->proto.cid); - - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); - } - -error: - clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags); - kfree(acks); - - /* because we don't want two CPUs both processing the work item for one - * call at the same time, we use a flag to note when it's busy; however - * this means there's a race between clearing the flag and setting the - * work pending bit and the work item being processed again */ - if (call->events && !work_pending(&call->processor)) { - _debug("jumpstart %x", call->conn->proto.cid); - rxrpc_queue_call(call); + if (call->events && call->state < RXRPC_CALL_COMPLETE) { + __rxrpc_queue_call(call); + goto out; } +out_put: + rxrpc_put_call(call, rxrpc_call_put); +out: _leave(""); - return; - -no_mem: - _debug("out of memory"); - goto maybe_reschedule; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index ae057e0740f3..364b42dc3dce 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -19,23 +19,13 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" -/* - * Maximum lifetime of a call (in jiffies). - */ -unsigned int rxrpc_max_call_lifetime = 60 * HZ; - -/* - * Time till dead call expires after last use (in jiffies). - */ -unsigned int rxrpc_dead_call_expiry = 2 * HZ; - const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { - [RXRPC_CALL_UNINITIALISED] = "Uninit", + [RXRPC_CALL_UNINITIALISED] = "Uninit ", [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn", [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", - [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK", + [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", @@ -43,22 +33,47 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK", [RXRPC_CALL_COMPLETE] = "Complete", - [RXRPC_CALL_SERVER_BUSY] = "SvBusy ", +}; + +const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { + [RXRPC_CALL_SUCCEEDED] = "Complete", [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort", [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort", + [RXRPC_CALL_LOCAL_ERROR] = "LocError", [RXRPC_CALL_NETWORK_ERROR] = "NetError", - [RXRPC_CALL_DEAD] = "Dead ", +}; + +const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { + [rxrpc_call_new_client] = "NWc", + [rxrpc_call_new_service] = "NWs", + [rxrpc_call_queued] = "QUE", + [rxrpc_call_queued_ref] = "QUR", + [rxrpc_call_connected] = "CON", + [rxrpc_call_release] = "RLS", + [rxrpc_call_seen] = "SEE", + [rxrpc_call_got] = "GOT", + [rxrpc_call_got_userid] = "Gus", + [rxrpc_call_got_kernel] = "Gke", + [rxrpc_call_put] = "PUT", + [rxrpc_call_put_userid] = "Pus", + [rxrpc_call_put_kernel] = "Pke", + [rxrpc_call_put_noqueue] = "PNQ", + [rxrpc_call_error] = "*E*", }; struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); -static void rxrpc_destroy_call(struct work_struct *work); -static void rxrpc_call_life_expired(unsigned long _call); -static void rxrpc_dead_call_expired(unsigned long _call); -static void rxrpc_ack_time_expired(unsigned long _call); -static void rxrpc_resend_time_expired(unsigned long _call); +static void rxrpc_call_timer_expired(unsigned long _call) +{ + struct rxrpc_call *call = (struct rxrpc_call *)_call; + + _enter("%d", call->debug_id); + + if (call->state < RXRPC_CALL_COMPLETE) + rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); +} /* * find an extant server call @@ -91,7 +106,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, return NULL; found_extant_call: - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); read_unlock(&rx->call_lock); _leave(" = %p [%d]", call, atomic_read(&call->usage)); return call; @@ -100,7 +115,7 @@ found_extant_call: /* * allocate a new call */ -static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) +struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) { struct rxrpc_call *call; @@ -108,29 +123,25 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) if (!call) return NULL; - call->acks_winsz = 16; - call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long), + call->rxtx_buffer = kcalloc(RXRPC_RXTX_BUFF_SIZE, + sizeof(struct sk_buff *), gfp); - if (!call->acks_window) { - kmem_cache_free(rxrpc_call_jar, call); - return NULL; - } + if (!call->rxtx_buffer) + goto nomem; + + call->rxtx_annotations = kcalloc(RXRPC_RXTX_BUFF_SIZE, sizeof(u8), gfp); + if (!call->rxtx_annotations) + goto nomem_2; - setup_timer(&call->lifetimer, &rxrpc_call_life_expired, - (unsigned long) call); - setup_timer(&call->deadspan, &rxrpc_dead_call_expired, - (unsigned long) call); - setup_timer(&call->ack_timer, &rxrpc_ack_time_expired, - (unsigned long) call); - setup_timer(&call->resend_timer, &rxrpc_resend_time_expired, - (unsigned long) call); - INIT_WORK(&call->destroyer, &rxrpc_destroy_call); + setup_timer(&call->timer, rxrpc_call_timer_expired, + (unsigned long)call); INIT_WORK(&call->processor, &rxrpc_process_call); INIT_LIST_HEAD(&call->link); + INIT_LIST_HEAD(&call->chan_wait_link); INIT_LIST_HEAD(&call->accept_link); - skb_queue_head_init(&call->rx_queue); - skb_queue_head_init(&call->rx_oos_queue); - init_waitqueue_head(&call->tx_waitq); + INIT_LIST_HEAD(&call->recvmsg_link); + INIT_LIST_HEAD(&call->sock_link); + init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); @@ -138,70 +149,65 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) memset(&call->sock_node, 0xed, sizeof(call->sock_node)); - call->rx_data_expect = 1; - call->rx_data_eaten = 0; - call->rx_first_oos = 0; - call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size; - call->creation_jif = jiffies; + /* Leave space in the ring to handle a maxed-out jumbo packet */ + call->rx_winsize = rxrpc_rx_window_size; + call->tx_winsize = 16; + call->rx_expect_next = 1; + + if (RXRPC_TX_SMSS > 2190) + call->cong_cwnd = 2; + else if (RXRPC_TX_SMSS > 1095) + call->cong_cwnd = 3; + else + call->cong_cwnd = 4; + call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1; return call; + +nomem_2: + kfree(call->rxtx_buffer); +nomem: + kmem_cache_free(rxrpc_call_jar, call); + return NULL; } /* * Allocate a new client call. */ -static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, - struct sockaddr_rxrpc *srx, +static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; + ktime_t now; _enter(""); - ASSERT(rx->local != NULL); - call = rxrpc_alloc_call(gfp); if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; - - sock_hold(&rx->sk); - call->socket = rx; - call->rx_data_post = 1; - - call->local = rx->local; call->service_id = srx->srx_service; - call->in_clientflag = 0; + call->tx_phase = true; + now = ktime_get_real(); + call->acks_latest_ts = now; + call->cong_tstamp = now; _leave(" = %p", call); return call; } /* - * Begin client call. + * Initiate the call ack/resend/expiry timer. */ -static int rxrpc_begin_client_call(struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +static void rxrpc_start_call_timer(struct rxrpc_call *call) { - int ret; - - /* Set up or get a connection record and set the protocol parameters, - * including channel number and call ID. - */ - ret = rxrpc_connect_call(call, cp, srx, gfp); - if (ret < 0) - return ret; - - call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - - spin_lock(&call->conn->params.peer->lock); - hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets); - spin_unlock(&call->conn->params.peer->lock); - - call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; - add_timer(&call->lifetimer); - return 0; + ktime_t now = ktime_get_real(), expire_at; + + expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); + call->expire_at = expire_at; + call->ack_at = expire_at; + call->resend_at = expire_at; + call->timer.expires = jiffies + LONG_MAX / 2; + rxrpc_set_timer(call, rxrpc_timer_begin, now); } /* @@ -216,20 +222,21 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, { struct rxrpc_call *call, *xcall; struct rb_node *parent, **pp; + const void *here = __builtin_return_address(0); int ret; _enter("%p,%lx", rx, user_call_ID); - call = rxrpc_alloc_client_call(rx, srx, gfp); + call = rxrpc_alloc_client_call(srx, gfp); if (IS_ERR(call)) { _leave(" = %ld", PTR_ERR(call)); return call; } - /* Publish the call, even though it is incompletely set up as yet */ - call->user_call_ID = user_call_ID; - __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), + here, (const void *)user_call_ID); + /* Publish the call, even though it is incompletely set up as yet */ write_lock(&rx->call_lock); pp = &rx->calls.rb_node; @@ -243,369 +250,285 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, else if (user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else - goto found_user_ID_now_present; + goto error_dup_user_ID; } - rxrpc_get_call(call); - + rcu_assign_pointer(call->socket, rx); + call->user_call_ID = user_call_ID; + __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); + list_add(&call->sock_link, &rx->sock_calls); + write_unlock(&rx->call_lock); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); list_add_tail(&call->link, &rxrpc_calls); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); - ret = rxrpc_begin_client_call(call, cp, srx, gfp); + /* Set up or get a connection record and set the protocol parameters, + * including channel number and call ID. + */ + ret = rxrpc_connect_call(call, cp, srx, gfp); if (ret < 0) goto error; - _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); + trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), + here, ERR_PTR(ret)); - _leave(" = %p [new]", call); - return call; + spin_lock_bh(&call->conn->params.peer->lock); + hlist_add_head(&call->error_link, + &call->conn->params.peer->error_targets); + spin_unlock_bh(&call->conn->params.peer->lock); -error: - write_lock(&rx->call_lock); - rb_erase(&call->sock_node, &rx->calls); - write_unlock(&rx->call_lock); - rxrpc_put_call(call); + rxrpc_start_call_timer(call); - write_lock_bh(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); + _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); - set_bit(RXRPC_CALL_RELEASED, &call->flags); - call->state = RXRPC_CALL_DEAD; - rxrpc_put_call(call); - _leave(" = %d", ret); - return ERR_PTR(ret); + _leave(" = %p [new]", call); + return call; /* We unexpectedly found the user ID in the list after taking * the call_lock. This shouldn't happen unless the user races * with itself and tries to add the same user ID twice at the * same time in different threads. */ -found_user_ID_now_present: +error_dup_user_ID: write_unlock(&rx->call_lock); - set_bit(RXRPC_CALL_RELEASED, &call->flags); - call->state = RXRPC_CALL_DEAD; - rxrpc_put_call(call); - _leave(" = -EEXIST [%p]", call); - return ERR_PTR(-EEXIST); + ret = -EEXIST; + +error: + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); + trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), + here, ERR_PTR(ret)); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = %d", ret); + return ERR_PTR(ret); } /* - * set up an incoming call - * - called in process context with IRQs enabled + * Set up an incoming call. call->conn points to the connection. + * This is called in BH context and isn't allowed to fail. */ -struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, - struct rxrpc_connection *conn, - struct sk_buff *skb) +void rxrpc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct sk_buff *skb) { + struct rxrpc_connection *conn = call->conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call, *candidate; - u32 call_id, chan; - - _enter(",%d", conn->debug_id); - - ASSERT(rx != NULL); - - candidate = rxrpc_alloc_call(GFP_NOIO); - if (!candidate) - return ERR_PTR(-EBUSY); - - chan = sp->hdr.cid & RXRPC_CHANNELMASK; - candidate->socket = rx; - candidate->conn = conn; - candidate->cid = sp->hdr.cid; - candidate->call_id = sp->hdr.callNumber; - candidate->channel = chan; - candidate->rx_data_post = 0; - candidate->state = RXRPC_CALL_SERVER_ACCEPTING; - if (conn->security_ix > 0) - candidate->state = RXRPC_CALL_SERVER_SECURING; - - spin_lock(&conn->channel_lock); - - /* set the channel for this call */ - call = rcu_dereference_protected(conn->channels[chan].call, - lockdep_is_held(&conn->channel_lock)); - - _debug("channel[%u] is %p", candidate->channel, call); - if (call && call->call_id == sp->hdr.callNumber) { - /* already set; must've been a duplicate packet */ - _debug("extant call [%d]", call->state); - ASSERTCMP(call->conn, ==, conn); - - read_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) - rxrpc_queue_call(call); - case RXRPC_CALL_REMOTELY_ABORTED: - read_unlock(&call->state_lock); - goto aborted_call; - default: - rxrpc_get_call(call); - read_unlock(&call->state_lock); - goto extant_call; - } - } - - if (call) { - /* it seems the channel is still in use from the previous call - * - ditch the old binding if its call is now complete */ - _debug("CALL: %u { %s }", - call->debug_id, rxrpc_call_states[call->state]); - - if (call->state >= RXRPC_CALL_COMPLETE) { - __rxrpc_disconnect_call(call); - } else { - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -EBUSY"); - return ERR_PTR(-EBUSY); - } - } - - /* check the call number isn't duplicate */ - _debug("check dup"); - call_id = sp->hdr.callNumber; - - /* We just ignore calls prior to the current call ID. Terminated calls - * are handled via the connection. + u32 chan; + + _enter(",%d", call->conn->debug_id); + + rcu_assign_pointer(call->socket, rx); + call->call_id = sp->hdr.callNumber; + call->service_id = sp->hdr.serviceId; + call->cid = sp->hdr.cid; + call->state = RXRPC_CALL_SERVER_ACCEPTING; + if (sp->hdr.securityIndex > 0) + call->state = RXRPC_CALL_SERVER_SECURING; + call->cong_tstamp = skb->tstamp; + + /* Set the channel for this call. We don't get channel_lock as we're + * only defending against the data_ready handler (which we're called + * from) and the RESPONSE packet parser (which is only really + * interested in call_counter and can cope with a disagreement with the + * call pointer). */ - if (call_id <= conn->channels[chan].call_counter) - goto old_call; /* TODO: Just drop packet */ - - /* make the call available */ - _debug("new call"); - call = candidate; - candidate = NULL; - conn->channels[chan].call_counter = call_id; + chan = sp->hdr.cid & RXRPC_CHANNELMASK; + conn->channels[chan].call_counter = call->call_id; + conn->channels[chan].call_id = call->call_id; rcu_assign_pointer(conn->channels[chan].call, call); - sock_hold(&rx->sk); - rxrpc_get_connection(conn); - spin_unlock(&conn->channel_lock); spin_lock(&conn->params.peer->lock); hlist_add_head(&call->error_link, &conn->params.peer->error_targets); spin_unlock(&conn->params.peer->lock); - write_lock_bh(&rxrpc_call_lock); - list_add_tail(&call->link, &rxrpc_calls); - write_unlock_bh(&rxrpc_call_lock); + _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); - call->local = conn->params.local; - call->epoch = conn->proto.epoch; - call->service_id = conn->params.service_id; - call->in_clientflag = RXRPC_CLIENT_INITIATED; + rxrpc_start_call_timer(call); + _leave(""); +} - _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); +/* + * Queue a call's work processor, getting a ref to pass to the work queue. + */ +bool rxrpc_queue_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + int n = __atomic_add_unless(&call->usage, 1, 0); + if (n == 0) + return false; + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL); + else + rxrpc_put_call(call, rxrpc_call_put_noqueue); + return true; +} - call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; - add_timer(&call->lifetimer); - _leave(" = %p {%d} [new]", call, call->debug_id); - return call; +/* + * Queue a call's work processor, passing the callers ref to the work queue. + */ +bool __rxrpc_queue_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + int n = atomic_read(&call->usage); + ASSERTCMP(n, >=, 1); + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL); + else + rxrpc_put_call(call, rxrpc_call_put_noqueue); + return true; +} -extant_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1); - return call; +/* + * Note the re-emergence of a call. + */ +void rxrpc_see_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + if (call) { + int n = atomic_read(&call->usage); -aborted_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -ECONNABORTED"); - return ERR_PTR(-ECONNABORTED); - -old_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -ECONNRESET [old]"); - return ERR_PTR(-ECONNRESET); + trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL); + } } /* - * detach a call from a socket and set up for release + * Note the addition of a ref on a call. */ -void rxrpc_release_call(struct rxrpc_call *call) +void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&call->usage); + + trace_rxrpc_call(call, op, n, here, NULL); +} + +/* + * Detach a call from its owning socket. + */ +void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); struct rxrpc_connection *conn = call->conn; - struct rxrpc_sock *rx = call->socket; + bool put = false; + int i; - _enter("{%d,%d,%d,%d}", - call->debug_id, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), - call->rx_first_oos); + _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); + + trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage), + here, (const void *)call->flags); + + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); spin_lock_bh(&call->lock); if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); spin_unlock_bh(&call->lock); - /* dissociate from the socket - * - the socket's ref on the call is passed to the death timer - */ - _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); + del_timer_sync(&call->timer); - spin_lock(&conn->params.peer->lock); - hlist_del_init(&call->error_link); - spin_unlock(&conn->params.peer->lock); + /* Make sure we don't get any more notifications */ + write_lock_bh(&rx->recvmsg_lock); - write_lock_bh(&rx->call_lock); - if (!list_empty(&call->accept_link)) { + if (!list_empty(&call->recvmsg_link)) { _debug("unlinking once-pending call %p { e=%lx f=%lx }", call, call->events, call->flags); - ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); - list_del_init(&call->accept_link); - sk_acceptq_removed(&rx->sk); - } else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { - rb_erase(&call->sock_node, &rx->calls); - memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); - clear_bit(RXRPC_CALL_HAS_USERID, &call->flags); + list_del(&call->recvmsg_link); + put = true; } - write_unlock_bh(&rx->call_lock); - /* free up the channel for reuse */ - write_lock_bh(&call->state_lock); + /* list_empty() must return false in rxrpc_notify_socket() */ + call->recvmsg_link.next = NULL; + call->recvmsg_link.prev = NULL; - if (call->state < RXRPC_CALL_COMPLETE && - call->state != RXRPC_CALL_CLIENT_FINAL_ACK) { - _debug("+++ ABORTING STATE %d +++\n", call->state); - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_CALL_DEAD; - } - write_unlock_bh(&call->state_lock); + write_unlock_bh(&rx->recvmsg_lock); + if (put) + rxrpc_put_call(call, rxrpc_call_put); - rxrpc_disconnect_call(call); + write_lock(&rx->call_lock); - /* clean up the Rx queue */ - if (!skb_queue_empty(&call->rx_queue) || - !skb_queue_empty(&call->rx_oos_queue)) { - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; + if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + rb_erase(&call->sock_node, &rx->calls); + memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); + rxrpc_put_call(call, rxrpc_call_put_userid); + } - _debug("purge Rx queues"); + list_del(&call->sock_link); + write_unlock(&rx->call_lock); - spin_lock_bh(&call->lock); - while ((skb = skb_dequeue(&call->rx_queue)) || - (skb = skb_dequeue(&call->rx_oos_queue))) { - spin_unlock_bh(&call->lock); + _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); - sp = rxrpc_skb(skb); - _debug("- zap %s %%%u #%u", - rxrpc_pkts[sp->hdr.type], - sp->hdr.serial, sp->hdr.seq); - rxrpc_free_skb(skb); - spin_lock_bh(&call->lock); - } - spin_unlock_bh(&call->lock); + if (conn) + rxrpc_disconnect_call(call); - ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE); + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { + rxrpc_free_skb(call->rxtx_buffer[i], + (call->tx_phase ? rxrpc_skb_tx_cleaned : + rxrpc_skb_rx_cleaned)); + call->rxtx_buffer[i] = NULL; } - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - del_timer_sync(&call->lifetimer); - call->deadspan.expires = jiffies + rxrpc_dead_call_expiry; - add_timer(&call->deadspan); - _leave(""); } /* - * handle a dead call being ready for reaping - */ -static void rxrpc_dead_call_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - write_lock_bh(&call->state_lock); - call->state = RXRPC_CALL_DEAD; - write_unlock_bh(&call->state_lock); - rxrpc_put_call(call); -} - -/* - * mark a call as to be released, aborting it if it's still in progress - * - called with softirqs disabled - */ -static void rxrpc_mark_call_released(struct rxrpc_call *call) -{ - bool sched; - - write_lock(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) { - sched = false; - if (call->state < RXRPC_CALL_COMPLETE) { - _debug("abort call %p", call); - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_CALL_DEAD; - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) - sched = true; - } - if (!test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - sched = true; - if (sched) - rxrpc_queue_call(call); - } - write_unlock(&call->state_lock); -} - -/* * release all the calls associated with a socket */ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) { struct rxrpc_call *call; - struct rb_node *p; _enter("%p", rx); - read_lock_bh(&rx->call_lock); - - /* mark all the calls as no longer wanting incoming packets */ - for (p = rb_first(&rx->calls); p; p = rb_next(p)) { - call = rb_entry(p, struct rxrpc_call, sock_node); - rxrpc_mark_call_released(call); - } - - /* kill the not-yet-accepted incoming calls */ - list_for_each_entry(call, &rx->secureq, accept_link) { - rxrpc_mark_call_released(call); + while (!list_empty(&rx->to_be_accepted)) { + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); + list_del(&call->accept_link); + rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_put_call(call, rxrpc_call_put); } - list_for_each_entry(call, &rx->acceptq, accept_link) { - rxrpc_mark_call_released(call); + while (!list_empty(&rx->sock_calls)) { + call = list_entry(rx->sock_calls.next, + struct rxrpc_call, sock_link); + rxrpc_get_call(call, rxrpc_call_got); + rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); } - read_unlock_bh(&rx->call_lock); _leave(""); } /* * release a call */ -void __rxrpc_put_call(struct rxrpc_call *call) +void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { + const void *here = __builtin_return_address(0); + int n; + ASSERT(call != NULL); - _enter("%p{u=%d}", call, atomic_read(&call->usage)); + n = atomic_dec_return(&call->usage); + trace_rxrpc_call(call, op, n, here, NULL); + ASSERTCMP(n, >=, 0); + if (n == 0) { + _debug("call %d dead", call->debug_id); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); - ASSERTCMP(atomic_read(&call->usage), >, 0); + write_lock(&rxrpc_call_lock); + list_del_init(&call->link); + write_unlock(&rxrpc_call_lock); - if (atomic_dec_and_test(&call->usage)) { - _debug("call %d dead", call->debug_id); - WARN_ON(atomic_read(&call->skb_count) != 0); - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - rxrpc_queue_work(&call->destroyer); + rxrpc_cleanup_call(call); } - _leave(""); } /* @@ -615,187 +538,70 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) { struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); - rxrpc_purge_queue(&call->rx_queue); + rxrpc_put_peer(call->peer); + kfree(call->rxtx_buffer); + kfree(call->rxtx_annotations); kmem_cache_free(rxrpc_call_jar, call); } /* * clean up a call */ -static void rxrpc_cleanup_call(struct rxrpc_call *call) +void rxrpc_cleanup_call(struct rxrpc_call *call) { - _net("DESTROY CALL %d", call->debug_id); + int i; - ASSERT(call->socket); + _net("DESTROY CALL %d", call->debug_id); memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); - del_timer_sync(&call->lifetimer); - del_timer_sync(&call->deadspan); - del_timer_sync(&call->ack_timer); - del_timer_sync(&call->resend_timer); + del_timer_sync(&call->timer); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERTCMP(call->events, ==, 0); - if (work_pending(&call->processor)) { - _debug("defer destroy"); - rxrpc_queue_work(&call->destroyer); - return; - } - ASSERTCMP(call->conn, ==, NULL); - if (call->acks_window) { - _debug("kill Tx window %d", - CIRC_CNT(call->acks_head, call->acks_tail, - call->acks_winsz)); - smp_mb(); - while (CIRC_CNT(call->acks_head, call->acks_tail, - call->acks_winsz) > 0) { - struct rxrpc_skb_priv *sp; - unsigned long _skb; - - _skb = call->acks_window[call->acks_tail] & ~1; - sp = rxrpc_skb((struct sk_buff *)_skb); - _debug("+++ clear Tx %u", sp->hdr.seq); - rxrpc_free_skb((struct sk_buff *)_skb); - call->acks_tail = - (call->acks_tail + 1) & (call->acks_winsz - 1); - } - - kfree(call->acks_window); - } + /* Clean up the Rx/Tx buffer */ + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) + rxrpc_free_skb(call->rxtx_buffer[i], + (call->tx_phase ? rxrpc_skb_tx_cleaned : + rxrpc_skb_rx_cleaned)); - rxrpc_free_skb(call->tx_pending); + rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned); - rxrpc_purge_queue(&call->rx_queue); - ASSERT(skb_queue_empty(&call->rx_oos_queue)); - sock_put(&call->socket->sk); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } /* - * destroy a call - */ -static void rxrpc_destroy_call(struct work_struct *work) -{ - struct rxrpc_call *call = - container_of(work, struct rxrpc_call, destroyer); - - _enter("%p{%d,%d,%p}", - call, atomic_read(&call->usage), call->channel, call->conn); - - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - - write_lock_bh(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); - - rxrpc_cleanup_call(call); - _leave(""); -} - -/* - * preemptively destroy all the call records from a transport endpoint rather - * than waiting for them to time out + * Make sure that all calls are gone. */ void __exit rxrpc_destroy_all_calls(void) { struct rxrpc_call *call; _enter(""); - write_lock_bh(&rxrpc_call_lock); + + if (list_empty(&rxrpc_calls)) + return; + + write_lock(&rxrpc_call_lock); while (!list_empty(&rxrpc_calls)) { call = list_entry(rxrpc_calls.next, struct rxrpc_call, link); _debug("Zapping call %p", call); + rxrpc_see_call(call); list_del_init(&call->link); - switch (atomic_read(&call->usage)) { - case 0: - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - break; - case 1: - if (del_timer_sync(&call->deadspan) != 0 && - call->state != RXRPC_CALL_DEAD) - rxrpc_dead_call_expired((unsigned long) call); - if (call->state != RXRPC_CALL_DEAD) - break; - default: - pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), - rxrpc_call_states[call->state], - call->flags, call->events); - if (!skb_queue_empty(&call->rx_queue)) - pr_err("Rx queue occupied\n"); - if (!skb_queue_empty(&call->rx_oos_queue)) - pr_err("OOS queue occupied\n"); - break; - } - - write_unlock_bh(&rxrpc_call_lock); - cond_resched(); - write_lock_bh(&rxrpc_call_lock); - } - - write_unlock_bh(&rxrpc_call_lock); - _leave(""); -} - -/* - * handle call lifetime being exceeded - */ -static void rxrpc_call_life_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", + call, atomic_read(&call->usage), + rxrpc_call_states[call->state], + call->flags, call->events); - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - _enter("{%d}", call->debug_id); - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); - rxrpc_queue_call(call); + write_unlock(&rxrpc_call_lock); + cond_resched(); + write_lock(&rxrpc_call_lock); } - read_unlock_bh(&call->state_lock); -} - -/* - * handle resend timer expiry - * - may not take call->state_lock as this can deadlock against del_timer_sync() - */ -static void rxrpc_resend_time_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_queue_call(call); -} - -/* - * handle ACK timer expiry - */ -static void rxrpc_ack_time_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - if (call->state >= RXRPC_CALL_COMPLETE) - return; - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); + write_unlock(&rxrpc_call_lock); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 9e91f27b0d0f..60ef9605167e 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -7,6 +7,68 @@ * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. + * + * + * Client connections need to be cached for a little while after they've made a + * call so as to handle retransmitted DATA packets in case the server didn't + * receive the final ACK or terminating ABORT we sent it. + * + * Client connections can be in one of a number of cache states: + * + * (1) INACTIVE - The connection is not held in any list and may not have been + * exposed to the world. If it has been previously exposed, it was + * discarded from the idle list after expiring. + * + * (2) WAITING - The connection is waiting for the number of client conns to + * drop below the maximum capacity. Calls may be in progress upon it from + * when it was active and got culled. + * + * The connection is on the rxrpc_waiting_client_conns list which is kept + * in to-be-granted order. Culled conns with waiters go to the back of + * the queue just like new conns. + * + * (3) ACTIVE - The connection has at least one call in progress upon it, it + * may freely grant available channels to new calls and calls may be + * waiting on it for channels to become available. + * + * The connection is on the rxrpc_active_client_conns list which is kept + * in activation order for culling purposes. + * + * rxrpc_nr_active_client_conns is held incremented also. + * + * (4) CULLED - The connection got summarily culled to try and free up + * capacity. Calls currently in progress on the connection are allowed to + * continue, but new calls will have to wait. There can be no waiters in + * this state - the conn would have to go to the WAITING state instead. + * + * (5) IDLE - The connection has no calls in progress upon it and must have + * been exposed to the world (ie. the EXPOSED flag must be set). When it + * expires, the EXPOSED flag is cleared and the connection transitions to + * the INACTIVE state. + * + * The connection is on the rxrpc_idle_client_conns list which is kept in + * order of how soon they'll expire. + * + * There are flags of relevance to the cache: + * + * (1) EXPOSED - The connection ID got exposed to the world. If this flag is + * set, an extra ref is added to the connection preventing it from being + * reaped when it has no calls outstanding. This flag is cleared and the + * ref dropped when a conn is discarded from the idle list. + * + * This allows us to move terminal call state retransmission to the + * connection and to discard the call immediately we think it is done + * with. It also give us a chance to reuse the connection. + * + * (2) DONT_REUSE - The connection should be discarded as soon as possible and + * should not be reused. This is set when an exclusive connection is used + * or a call ID counter overflows. + * + * The caching state may only be changed if the cache lock is held. + * + * There are two idle client connection expiry durations. If the total number + * of connections is below the reap threshold, we use the normal duration; if + * it's above, we use the fast duration. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -16,27 +78,50 @@ #include <linux/timer.h> #include "ar-internal.h" +__read_mostly unsigned int rxrpc_max_client_connections = 1000; +__read_mostly unsigned int rxrpc_reap_client_connections = 900; +__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; +__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ; + +static unsigned int rxrpc_nr_client_conns; +static unsigned int rxrpc_nr_active_client_conns; +static __read_mostly bool rxrpc_kill_all_client_conns; + +static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock); +static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex); +static LIST_HEAD(rxrpc_waiting_client_conns); +static LIST_HEAD(rxrpc_active_client_conns); +static LIST_HEAD(rxrpc_idle_client_conns); + /* * We use machine-unique IDs for our client connections. */ DEFINE_IDR(rxrpc_client_conn_ids); static DEFINE_SPINLOCK(rxrpc_conn_id_lock); +static void rxrpc_cull_active_client_conns(void); +static void rxrpc_discard_expired_client_conns(struct work_struct *); + +static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap, + rxrpc_discard_expired_client_conns); + +const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5] = { + [RXRPC_CONN_CLIENT_INACTIVE] = "Inac", + [RXRPC_CONN_CLIENT_WAITING] = "Wait", + [RXRPC_CONN_CLIENT_ACTIVE] = "Actv", + [RXRPC_CONN_CLIENT_CULLED] = "Cull", + [RXRPC_CONN_CLIENT_IDLE] = "Idle", +}; + /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The - * epoch is changed if this wraps. - * - * TODO: The IDR tree gets very expensive on memory if the connection IDs are - * widely scattered throughout the number space, so we shall need to retire - * connections that have, say, an ID more than four times the maximum number of - * client conns away from the current allocation point to try and keep the IDs - * concentrated. We will also need to retire connections from an old epoch. + * epoch doesn't change until the client is rebooted (or, at least, unless the + * module is unloaded). */ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp) { - u32 epoch; int id; _enter(""); @@ -44,34 +129,18 @@ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, idr_preload(gfp); spin_lock(&rxrpc_conn_id_lock); - epoch = rxrpc_epoch; - - /* We could use idr_alloc_cyclic() here, but we really need to know - * when the thing wraps so that we can advance the epoch. - */ - if (rxrpc_client_conn_ids.cur == 0) - rxrpc_client_conn_ids.cur = 1; - id = idr_alloc(&rxrpc_client_conn_ids, conn, - rxrpc_client_conn_ids.cur, 0x40000000, GFP_NOWAIT); - if (id < 0) { - if (id != -ENOSPC) - goto error; - id = idr_alloc(&rxrpc_client_conn_ids, conn, - 1, 0x40000000, GFP_NOWAIT); - if (id < 0) - goto error; - epoch++; - rxrpc_epoch = epoch; - } - rxrpc_client_conn_ids.cur = id + 1; + id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn, + 1, 0x40000000, GFP_NOWAIT); + if (id < 0) + goto error; spin_unlock(&rxrpc_conn_id_lock); idr_preload_end(); - conn->proto.epoch = epoch; + conn->proto.epoch = rxrpc_epoch; conn->proto.cid = id << RXRPC_CIDSHIFT; set_bit(RXRPC_CONN_HAS_IDR, &conn->flags); - _leave(" [CID %x:%x]", epoch, conn->proto.cid); + _leave(" [CID %x]", conn->proto.cid); return 0; error: @@ -114,8 +183,7 @@ void rxrpc_destroy_client_conn_ids(void) } /* - * Allocate a client connection. The caller must take care to clear any - * padding bytes in *cp. + * Allocate a client connection. */ static struct rxrpc_connection * rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) @@ -131,6 +199,10 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) return ERR_PTR(-ENOMEM); } + atomic_set(&conn->usage, 1); + if (cp->exclusive) + __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + conn->params = *cp; conn->out_clientflag = RXRPC_CLIENT_INITIATED; conn->state = RXRPC_CONN_CLIENT; @@ -148,7 +220,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) goto error_2; write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->link, &rxrpc_connections); + list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); write_unlock(&rxrpc_connection_lock); /* We steal the caller's peer ref. */ @@ -156,6 +228,9 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) rxrpc_get_local(conn->params.local); key_get(conn->params.key); + trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage), + __builtin_return_address(0)); + trace_rxrpc_client(conn, -1, rxrpc_client_alloc); _leave(" = %p", conn); return conn; @@ -170,32 +245,68 @@ error_0: } /* - * find a connection for a call - * - called in process context with IRQs enabled + * Determine if a connection may be reused. */ -int rxrpc_connect_call(struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) +{ + int id_cursor, id, distance, limit; + + if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) + goto dont_reuse; + + if (conn->proto.epoch != rxrpc_epoch) + goto mark_dont_reuse; + + /* The IDR tree gets very expensive on memory if the connection IDs are + * widely scattered throughout the number space, so we shall want to + * kill off connections that, say, have an ID more than about four + * times the maximum number of client conns away from the current + * allocation point to try and keep the IDs concentrated. + */ + id_cursor = READ_ONCE(rxrpc_client_conn_ids.cur); + id = conn->proto.cid >> RXRPC_CIDSHIFT; + distance = id - id_cursor; + if (distance < 0) + distance = -distance; + limit = round_up(rxrpc_max_client_connections, IDR_SIZE) * 4; + if (distance > limit) + goto mark_dont_reuse; + + return true; + +mark_dont_reuse: + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); +dont_reuse: + return false; +} + +/* + * Create or find a client connection to use for a call. + * + * If we return with a connection, the call will be on its waiting list. It's + * left to the caller to assign a channel and wake up the call. + */ +static int rxrpc_get_client_conn(struct rxrpc_call *call, + struct rxrpc_conn_parameters *cp, + struct sockaddr_rxrpc *srx, + gfp_t gfp) { struct rxrpc_connection *conn, *candidate = NULL; struct rxrpc_local *local = cp->local; struct rb_node *p, **pp, *parent; long diff; - int chan; - - DECLARE_WAITQUEUE(myself, current); + int ret = -ENOMEM; _enter("{%d,%lx},", call->debug_id, call->user_call_ID); cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp); if (!cp->peer) - return -ENOMEM; + goto error; + /* If the connection is not meant to be exclusive, search the available + * connections to see if the connection we want to use already exists. + */ if (!cp->exclusive) { - /* Search for a existing client connection unless this is going - * to be a connection that's used exclusively for a single call. - */ _debug("search 1"); spin_lock(&local->client_conns_lock); p = local->client_conns.rb_node; @@ -206,39 +317,56 @@ int rxrpc_connect_call(struct rxrpc_call *call, diff = (cmp(peer) ?: cmp(key) ?: cmp(security_level)); - if (diff < 0) +#undef cmp + if (diff < 0) { p = p->rb_left; - else if (diff > 0) + } else if (diff > 0) { p = p->rb_right; - else - goto found_extant_conn; + } else { + if (rxrpc_may_reuse_conn(conn) && + rxrpc_get_connection_maybe(conn)) + goto found_extant_conn; + /* The connection needs replacing. It's better + * to effect that when we have something to + * replace it with so that we don't have to + * rebalance the tree twice. + */ + break; + } } spin_unlock(&local->client_conns_lock); } - /* We didn't find a connection or we want an exclusive one. */ - _debug("get new conn"); + /* There wasn't a connection yet or we need an exclusive connection. + * We need to create a candidate and then potentially redo the search + * in case we're racing with another thread also trying to connect on a + * shareable connection. + */ + _debug("new conn"); candidate = rxrpc_alloc_client_connection(cp, gfp); - if (!candidate) { - _leave(" = -ENOMEM"); - return -ENOMEM; + if (IS_ERR(candidate)) { + ret = PTR_ERR(candidate); + goto error_peer; } + /* Add the call to the new connection's waiting list in case we're + * going to have to wait for the connection to come live. It's our + * connection, so we want first dibs on the channel slots. We would + * normally have to take channel_lock but we do this before anyone else + * can see the connection. + */ + list_add_tail(&call->chan_wait_link, &candidate->waiting_calls); + if (cp->exclusive) { - /* Assign the call on an exclusive connection to channel 0 and - * don't add the connection to the endpoint's shareable conn - * lookup tree. - */ - _debug("exclusive chan 0"); - conn = candidate; - atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1); - spin_lock(&conn->channel_lock); - chan = 0; - goto found_channel; + call->conn = candidate; + call->security_ix = candidate->security_ix; + _leave(" = 0 [exclusive %d]", candidate->debug_id); + return 0; } - /* We need to redo the search before attempting to add a new connection - * lest we race with someone else adding a conflicting instance. + /* Publish the new connection for userspace to find. We need to redo + * the search before doing this lest we race with someone else adding a + * conflicting instance. */ _debug("search 2"); spin_lock(&local->client_conns_lock); @@ -249,124 +377,711 @@ int rxrpc_connect_call(struct rxrpc_call *call, parent = *pp; conn = rb_entry(parent, struct rxrpc_connection, client_node); +#define cmp(X) ((long)conn->params.X - (long)candidate->params.X) diff = (cmp(peer) ?: cmp(key) ?: cmp(security_level)); - if (diff < 0) +#undef cmp + if (diff < 0) { pp = &(*pp)->rb_left; - else if (diff > 0) + } else if (diff > 0) { pp = &(*pp)->rb_right; - else - goto found_extant_conn; + } else { + if (rxrpc_may_reuse_conn(conn) && + rxrpc_get_connection_maybe(conn)) + goto found_extant_conn; + /* The old connection is from an outdated epoch. */ + _debug("replace conn"); + clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags); + rb_replace_node(&conn->client_node, + &candidate->client_node, + &local->client_conns); + trace_rxrpc_client(conn, -1, rxrpc_client_replace); + goto candidate_published; + } } - /* The second search also failed; simply add the new connection with - * the new call in channel 0. Note that we need to take the channel - * lock before dropping the client conn lock. - */ _debug("new conn"); - set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); rb_link_node(&candidate->client_node, parent, pp); rb_insert_color(&candidate->client_node, &local->client_conns); -attached: - conn = candidate; - candidate = NULL; - atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1); - spin_lock(&conn->channel_lock); +candidate_published: + set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); + call->conn = candidate; + call->security_ix = candidate->security_ix; spin_unlock(&local->client_conns_lock); - chan = 0; + _leave(" = 0 [new %d]", candidate->debug_id); + return 0; -found_channel: - _debug("found chan"); - call->conn = conn; - call->channel = chan; - call->epoch = conn->proto.epoch; - call->cid = conn->proto.cid | chan; - call->call_id = ++conn->channels[chan].call_counter; - conn->channels[chan].call_id = call->call_id; - rcu_assign_pointer(conn->channels[chan].call, call); + /* We come here if we found a suitable connection already in existence. + * Discard any candidate we may have allocated, and try to get a + * channel on this one. + */ +found_extant_conn: + _debug("found conn"); + spin_unlock(&local->client_conns_lock); - _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id); + if (candidate) { + trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); + rxrpc_put_connection(candidate); + candidate = NULL; + } + spin_lock(&conn->channel_lock); + call->conn = conn; + call->security_ix = conn->security_ix; + list_add(&call->chan_wait_link, &conn->waiting_calls); spin_unlock(&conn->channel_lock); + _leave(" = 0 [extant %d]", conn->debug_id); + return 0; + +error_peer: rxrpc_put_peer(cp->peer); cp->peer = NULL; - _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage)); - return 0; +error: + _leave(" = %d", ret); + return ret; +} + +/* + * Activate a connection. + */ +static void rxrpc_activate_conn(struct rxrpc_connection *conn) +{ + trace_rxrpc_client(conn, -1, rxrpc_client_to_active); + conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; + rxrpc_nr_active_client_conns++; + list_move_tail(&conn->cache_link, &rxrpc_active_client_conns); +} + +/* + * Attempt to animate a connection for a new call. + * + * If it's not exclusive, the connection is in the endpoint tree, and we're in + * the conn's list of those waiting to grab a channel. There is, however, a + * limit on the number of live connections allowed at any one time, so we may + * have to wait for capacity to become available. + * + * Note that a connection on the waiting queue might *also* have active + * channels if it has been culled to make space and then re-requested by a new + * call. + */ +static void rxrpc_animate_client_conn(struct rxrpc_connection *conn) +{ + unsigned int nr_conns; + + _enter("%d,%d", conn->debug_id, conn->cache_state); + + if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE) + goto out; + + spin_lock(&rxrpc_client_conn_cache_lock); + + nr_conns = rxrpc_nr_client_conns; + if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_count); + rxrpc_nr_client_conns = nr_conns + 1; + } + + switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_ACTIVE: + case RXRPC_CONN_CLIENT_WAITING: + break; + + case RXRPC_CONN_CLIENT_INACTIVE: + case RXRPC_CONN_CLIENT_CULLED: + case RXRPC_CONN_CLIENT_IDLE: + if (nr_conns >= rxrpc_max_client_connections) + goto wait_for_capacity; + goto activate_conn; + + default: + BUG(); + } + +out_unlock: + spin_unlock(&rxrpc_client_conn_cache_lock); +out: + _leave(" [%d]", conn->cache_state); + return; + +activate_conn: + _debug("activate"); + rxrpc_activate_conn(conn); + goto out_unlock; + +wait_for_capacity: + _debug("wait"); + trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); + conn->cache_state = RXRPC_CONN_CLIENT_WAITING; + list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns); + goto out_unlock; +} + +/* + * Deactivate a channel. + */ +static void rxrpc_deactivate_one_channel(struct rxrpc_connection *conn, + unsigned int channel) +{ + struct rxrpc_channel *chan = &conn->channels[channel]; + + rcu_assign_pointer(chan->call, NULL); + conn->active_chans &= ~(1 << channel); +} + +/* + * Assign a channel to the call at the front of the queue and wake the call up. + * We don't increment the callNumber counter until this number has been exposed + * to the world. + */ +static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, + unsigned int channel) +{ + struct rxrpc_channel *chan = &conn->channels[channel]; + struct rxrpc_call *call = list_entry(conn->waiting_calls.next, + struct rxrpc_call, chan_wait_link); + u32 call_id = chan->call_counter + 1; + + trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); + + write_lock_bh(&call->state_lock); + call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; + write_unlock_bh(&call->state_lock); + + rxrpc_see_call(call); + list_del_init(&call->chan_wait_link); + conn->active_chans |= 1 << channel; + call->peer = rxrpc_get_peer(conn->params.peer); + call->cid = conn->proto.cid | channel; + call->call_id = call_id; + + _net("CONNECT call %08x:%08x as call %d on conn %d", + call->cid, call->call_id, call->debug_id, conn->debug_id); - /* We found a potentially suitable connection already in existence. If - * we can reuse it (ie. its usage count hasn't been reduced to 0 by the - * reaper), discard any candidate we may have allocated, and try to get - * a channel on this one, otherwise we have to replace it. + /* Paired with the read barrier in rxrpc_wait_for_channel(). This + * orders cid and epoch in the connection wrt to call_id without the + * need to take the channel_lock. + * + * We provisionally assign a callNumber at this point, but we don't + * confirm it until the call is about to be exposed. + * + * TODO: Pair with a barrier in the data_ready handler when that looks + * at the call ID through a connection channel. */ -found_extant_conn: - _debug("found conn"); - if (!rxrpc_get_connection_maybe(conn)) { - set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); - rb_replace_node(&conn->client_node, - &candidate->client_node, - &local->client_conns); - clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags); - goto attached; + smp_wmb(); + chan->call_id = call_id; + rcu_assign_pointer(chan->call, call); + wake_up(&call->waitq); +} + +/* + * Assign channels and callNumbers to waiting calls with channel_lock + * held by caller. + */ +static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn) +{ + u8 avail, mask; + + switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_ACTIVE: + mask = RXRPC_ACTIVE_CHANS_MASK; + break; + default: + return; } - spin_unlock(&local->client_conns_lock); + while (!list_empty(&conn->waiting_calls) && + (avail = ~conn->active_chans, + avail &= mask, + avail != 0)) + rxrpc_activate_one_channel(conn, __ffs(avail)); +} + +/* + * Assign channels and callNumbers to waiting calls. + */ +static void rxrpc_activate_channels(struct rxrpc_connection *conn) +{ + _enter("%d", conn->debug_id); - rxrpc_put_connection(candidate); + trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans); + + if (conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) + return; + + spin_lock(&conn->channel_lock); + rxrpc_activate_channels_locked(conn); + spin_unlock(&conn->channel_lock); + _leave(""); +} + +/* + * Wait for a callNumber and a channel to be granted to a call. + */ +static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp) +{ + int ret = 0; + + _enter("%d", call->debug_id); + + if (!call->call_id) { + DECLARE_WAITQUEUE(myself, current); - if (!atomic_add_unless(&conn->avail_chans, -1, 0)) { if (!gfpflags_allow_blocking(gfp)) { - rxrpc_put_connection(conn); - _leave(" = -EAGAIN"); - return -EAGAIN; + ret = -EAGAIN; + goto out; } - add_wait_queue(&conn->channel_wq, &myself); + add_wait_queue_exclusive(&call->waitq, &myself); for (;;) { set_current_state(TASK_INTERRUPTIBLE); - if (atomic_add_unless(&conn->avail_chans, -1, 0)) + if (call->call_id) + break; + if (signal_pending(current)) { + ret = -ERESTARTSYS; break; - if (signal_pending(current)) - goto interrupted; + } schedule(); } - remove_wait_queue(&conn->channel_wq, &myself); + remove_wait_queue(&call->waitq, &myself); __set_current_state(TASK_RUNNING); } - /* The connection allegedly now has a free channel and we can now - * attach the call to it. - */ + /* Paired with the write barrier in rxrpc_activate_one_channel(). */ + smp_rmb(); + +out: + _leave(" = %d", ret); + return ret; +} + +/* + * find a connection for a call + * - called in process context with IRQs enabled + */ +int rxrpc_connect_call(struct rxrpc_call *call, + struct rxrpc_conn_parameters *cp, + struct sockaddr_rxrpc *srx, + gfp_t gfp) +{ + int ret; + + _enter("{%d,%lx},", call->debug_id, call->user_call_ID); + + rxrpc_discard_expired_client_conns(NULL); + rxrpc_cull_active_client_conns(); + + ret = rxrpc_get_client_conn(call, cp, srx, gfp); + if (ret < 0) + return ret; + + rxrpc_animate_client_conn(call->conn); + rxrpc_activate_channels(call->conn); + + ret = rxrpc_wait_for_channel(call, gfp); + if (ret < 0) + rxrpc_disconnect_client_call(call); + + _leave(" = %d", ret); + return ret; +} + +/* + * Note that a connection is about to be exposed to the world. Once it is + * exposed, we maintain an extra ref on it that stops it from being summarily + * discarded before it's (a) had a chance to deal with retransmission and (b) + * had a chance at re-use (the per-connection security negotiation is + * expensive). + */ +static void rxrpc_expose_client_conn(struct rxrpc_connection *conn, + unsigned int channel) +{ + if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { + trace_rxrpc_client(conn, channel, rxrpc_client_exposed); + rxrpc_get_connection(conn); + } +} + +/* + * Note that a call, and thus a connection, is about to be exposed to the + * world. + */ +void rxrpc_expose_client_call(struct rxrpc_call *call) +{ + unsigned int channel = call->cid & RXRPC_CHANNELMASK; + struct rxrpc_connection *conn = call->conn; + struct rxrpc_channel *chan = &conn->channels[channel]; + + if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + /* Mark the call ID as being used. If the callNumber counter + * exceeds ~2 billion, we kill the connection after its + * outstanding calls have finished so that the counter doesn't + * wrap. + */ + chan->call_counter++; + if (chan->call_counter >= INT_MAX) + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + rxrpc_expose_client_conn(conn, channel); + } +} + +/* + * Disconnect a client call. + */ +void rxrpc_disconnect_client_call(struct rxrpc_call *call) +{ + unsigned int channel = call->cid & RXRPC_CHANNELMASK; + struct rxrpc_connection *conn = call->conn; + struct rxrpc_channel *chan = &conn->channels[channel]; + + trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); + call->conn = NULL; + spin_lock(&conn->channel_lock); - for (chan = 0; chan < RXRPC_MAXCALLS; chan++) - if (!conn->channels[chan].call) - goto found_channel; - BUG(); + /* Calls that have never actually been assigned a channel can simply be + * discarded. If the conn didn't get used either, it will follow + * immediately unless someone else grabs it in the meantime. + */ + if (!list_empty(&call->chan_wait_link)) { + _debug("call is waiting"); + ASSERTCMP(call->call_id, ==, 0); + ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); + list_del_init(&call->chan_wait_link); + + trace_rxrpc_client(conn, channel, rxrpc_client_chan_unstarted); + + /* We must deactivate or idle the connection if it's now + * waiting for nothing. + */ + spin_lock(&rxrpc_client_conn_cache_lock); + if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING && + list_empty(&conn->waiting_calls) && + !conn->active_chans) + goto idle_connection; + goto out; + } + + ASSERTCMP(rcu_access_pointer(chan->call), ==, call); + + /* If a client call was exposed to the world, we save the result for + * retransmission. + * + * We use a barrier here so that the call number and abort code can be + * read without needing to take a lock. + * + * TODO: Make the incoming packet handler check this and handle + * terminal retransmission without requiring access to the call. + */ + if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + _debug("exposed %u,%u", call->call_id, call->abort_code); + __rxrpc_disconnect_call(conn, call); + } + + /* See if we can pass the channel directly to another call. */ + if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE && + !list_empty(&conn->waiting_calls)) { + trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); + rxrpc_activate_one_channel(conn, channel); + goto out_2; + } + + /* Things are more complex and we need the cache lock. We might be + * able to simply idle the conn or it might now be lurking on the wait + * list. It might even get moved back to the active list whilst we're + * waiting for the lock. + */ + spin_lock(&rxrpc_client_conn_cache_lock); + + switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_ACTIVE: + if (list_empty(&conn->waiting_calls)) { + rxrpc_deactivate_one_channel(conn, channel); + if (!conn->active_chans) { + rxrpc_nr_active_client_conns--; + goto idle_connection; + } + goto out; + } + + trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); + rxrpc_activate_one_channel(conn, channel); + goto out; -interrupted: - remove_wait_queue(&conn->channel_wq, &myself); - __set_current_state(TASK_RUNNING); + case RXRPC_CONN_CLIENT_CULLED: + rxrpc_deactivate_one_channel(conn, channel); + ASSERT(list_empty(&conn->waiting_calls)); + if (!conn->active_chans) + goto idle_connection; + goto out; + + case RXRPC_CONN_CLIENT_WAITING: + rxrpc_deactivate_one_channel(conn, channel); + goto out; + + default: + BUG(); + } + +out: + spin_unlock(&rxrpc_client_conn_cache_lock); +out_2: + spin_unlock(&conn->channel_lock); rxrpc_put_connection(conn); - rxrpc_put_peer(cp->peer); - cp->peer = NULL; - _leave(" = -ERESTARTSYS"); - return -ERESTARTSYS; + _leave(""); + return; + +idle_connection: + /* As no channels remain active, the connection gets deactivated + * immediately or moved to the idle list for a short while. + */ + if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { + trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); + conn->idle_timestamp = jiffies; + conn->cache_state = RXRPC_CONN_CLIENT_IDLE; + list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns); + if (rxrpc_idle_client_conns.next == &conn->cache_link && + !rxrpc_kill_all_client_conns) + queue_delayed_work(rxrpc_workqueue, + &rxrpc_client_conn_reap, + rxrpc_conn_idle_client_expiry); + } else { + trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); + conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; + list_del_init(&conn->cache_link); + } + goto out; } /* - * Remove a client connection from the local endpoint's tree, thereby removing - * it as a target for reuse for new client calls. + * Clean up a dead client connection. */ -void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn) +static struct rxrpc_connection * +rxrpc_put_one_client_conn(struct rxrpc_connection *conn) { + struct rxrpc_connection *next = NULL; struct rxrpc_local *local = conn->params.local; + unsigned int nr_conns; - spin_lock(&local->client_conns_lock); - if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) - rb_erase(&conn->client_node, &local->client_conns); - spin_unlock(&local->client_conns_lock); + trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); + + if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) { + spin_lock(&local->client_conns_lock); + if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, + &conn->flags)) + rb_erase(&conn->client_node, &local->client_conns); + spin_unlock(&local->client_conns_lock); + } rxrpc_put_client_connection_id(conn); + + ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE); + + if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_uncount); + spin_lock(&rxrpc_client_conn_cache_lock); + nr_conns = --rxrpc_nr_client_conns; + + if (nr_conns < rxrpc_max_client_connections && + !list_empty(&rxrpc_waiting_client_conns)) { + next = list_entry(rxrpc_waiting_client_conns.next, + struct rxrpc_connection, cache_link); + rxrpc_get_connection(next); + rxrpc_activate_conn(next); + } + + spin_unlock(&rxrpc_client_conn_cache_lock); + } + + rxrpc_kill_connection(conn); + if (next) + rxrpc_activate_channels(next); + + /* We need to get rid of the temporary ref we took upon next, but we + * can't call rxrpc_put_connection() recursively. + */ + return next; +} + +/* + * Clean up a dead client connections. + */ +void rxrpc_put_client_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n; + + do { + n = atomic_dec_return(&conn->usage); + trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here); + if (n > 0) + return; + ASSERTCMP(n, >=, 0); + + conn = rxrpc_put_one_client_conn(conn); + } while (conn); +} + +/* + * Kill the longest-active client connections to make room for new ones. + */ +static void rxrpc_cull_active_client_conns(void) +{ + struct rxrpc_connection *conn; + unsigned int nr_conns = rxrpc_nr_client_conns; + unsigned int nr_active, limit; + + _enter(""); + + ASSERTCMP(nr_conns, >=, 0); + if (nr_conns < rxrpc_max_client_connections) { + _leave(" [ok]"); + return; + } + limit = rxrpc_reap_client_connections; + + spin_lock(&rxrpc_client_conn_cache_lock); + nr_active = rxrpc_nr_active_client_conns; + + while (nr_active > limit) { + ASSERT(!list_empty(&rxrpc_active_client_conns)); + conn = list_entry(rxrpc_active_client_conns.next, + struct rxrpc_connection, cache_link); + ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE); + + if (list_empty(&conn->waiting_calls)) { + trace_rxrpc_client(conn, -1, rxrpc_client_to_culled); + conn->cache_state = RXRPC_CONN_CLIENT_CULLED; + list_del_init(&conn->cache_link); + } else { + trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); + conn->cache_state = RXRPC_CONN_CLIENT_WAITING; + list_move_tail(&conn->cache_link, + &rxrpc_waiting_client_conns); + } + + nr_active--; + } + + rxrpc_nr_active_client_conns = nr_active; + spin_unlock(&rxrpc_client_conn_cache_lock); + ASSERTCMP(nr_active, >=, 0); + _leave(" [culled]"); +} + +/* + * Discard expired client connections from the idle list. Each conn in the + * idle list has been exposed and holds an extra ref because of that. + * + * This may be called from conn setup or from a work item so cannot be + * considered non-reentrant. + */ +static void rxrpc_discard_expired_client_conns(struct work_struct *work) +{ + struct rxrpc_connection *conn; + unsigned long expiry, conn_expires_at, now; + unsigned int nr_conns; + bool did_discard = false; + + _enter("%c", work ? 'w' : 'n'); + + if (list_empty(&rxrpc_idle_client_conns)) { + _leave(" [empty]"); + return; + } + + /* Don't double up on the discarding */ + if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) { + _leave(" [already]"); + return; + } + + /* We keep an estimate of what the number of conns ought to be after + * we've discarded some so that we don't overdo the discarding. + */ + nr_conns = rxrpc_nr_client_conns; + +next: + spin_lock(&rxrpc_client_conn_cache_lock); + + if (list_empty(&rxrpc_idle_client_conns)) + goto out; + + conn = list_entry(rxrpc_idle_client_conns.next, + struct rxrpc_connection, cache_link); + ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags)); + + if (!rxrpc_kill_all_client_conns) { + /* If the number of connections is over the reap limit, we + * expedite discard by reducing the expiry timeout. We must, + * however, have at least a short grace period to be able to do + * final-ACK or ABORT retransmission. + */ + expiry = rxrpc_conn_idle_client_expiry; + if (nr_conns > rxrpc_reap_client_connections) + expiry = rxrpc_conn_idle_client_fast_expiry; + + conn_expires_at = conn->idle_timestamp + expiry; + + now = READ_ONCE(jiffies); + if (time_after(conn_expires_at, now)) + goto not_yet_expired; + } + + trace_rxrpc_client(conn, -1, rxrpc_client_discard); + if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags)) + BUG(); + conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; + list_del_init(&conn->cache_link); + + spin_unlock(&rxrpc_client_conn_cache_lock); + + /* When we cleared the EXPOSED flag, we took on responsibility for the + * reference that that had on the usage count. We deal with that here. + * If someone re-sets the flag and re-gets the ref, that's fine. + */ + rxrpc_put_connection(conn); + did_discard = true; + nr_conns--; + goto next; + +not_yet_expired: + /* The connection at the front of the queue hasn't yet expired, so + * schedule the work item for that point if we discarded something. + * + * We don't worry if the work item is already scheduled - it can look + * after rescheduling itself at a later time. We could cancel it, but + * then things get messier. + */ + _debug("not yet"); + if (!rxrpc_kill_all_client_conns) + queue_delayed_work(rxrpc_workqueue, + &rxrpc_client_conn_reap, + conn_expires_at - now); + +out: + spin_unlock(&rxrpc_client_conn_cache_lock); + spin_unlock(&rxrpc_client_conn_discard_mutex); + _leave(""); +} + +/* + * Preemptively destroy all the client connection records rather than waiting + * for them to time out + */ +void __exit rxrpc_destroy_all_client_connections(void) +{ + _enter(""); + + spin_lock(&rxrpc_client_conn_cache_lock); + rxrpc_kill_all_client_conns = true; + spin_unlock(&rxrpc_client_conn_cache_lock); + + cancel_delayed_work(&rxrpc_client_conn_reap); + + if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0)) + _debug("destroy: queue failed"); + + _leave(""); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index cee0f35bc1cf..3f9d8d7ec632 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -15,20 +15,128 @@ #include <linux/net.h> #include <linux/skbuff.h> #include <linux/errqueue.h> -#include <linux/udp.h> -#include <linux/in.h> -#include <linux/in6.h> -#include <linux/icmp.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <net/ip.h> #include "ar-internal.h" /* + * Retransmit terminal ACK or ABORT of the previous call. + */ +static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_channel *chan; + struct msghdr msg; + struct kvec iov; + struct { + struct rxrpc_wire_header whdr; + union { + struct { + __be32 code; + } abort; + struct { + struct rxrpc_ackpacket ack; + u8 padding[3]; + struct rxrpc_ackinfo info; + }; + }; + } __attribute__((packed)) pkt; + size_t len; + u32 serial, mtu, call_id; + + _enter("%d", conn->debug_id); + + chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK]; + + /* If the last call got moved on whilst we were waiting to run, just + * ignore this packet. + */ + call_id = READ_ONCE(chan->last_call); + /* Sync with __rxrpc_disconnect_call() */ + smp_rmb(); + if (call_id != sp->hdr.callNumber) + return; + + msg.msg_name = &conn->params.peer->srx.transport; + msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt.whdr.epoch = htonl(sp->hdr.epoch); + pkt.whdr.cid = htonl(sp->hdr.cid); + pkt.whdr.callNumber = htonl(sp->hdr.callNumber); + pkt.whdr.seq = 0; + pkt.whdr.type = chan->last_type; + pkt.whdr.flags = conn->out_clientflag; + pkt.whdr.userStatus = 0; + pkt.whdr.securityIndex = conn->security_ix; + pkt.whdr._rsvd = 0; + pkt.whdr.serviceId = htons(chan->last_service_id); + + len = sizeof(pkt.whdr); + switch (chan->last_type) { + case RXRPC_PACKET_TYPE_ABORT: + pkt.abort.code = htonl(chan->last_abort); + len += sizeof(pkt.abort); + break; + + case RXRPC_PACKET_TYPE_ACK: + mtu = conn->params.peer->if_mtu; + mtu -= conn->params.peer->hdrsize; + pkt.ack.bufferSpace = 0; + pkt.ack.maxSkew = htons(skb->priority); + pkt.ack.firstPacket = htonl(chan->last_seq); + pkt.ack.previousPacket = htonl(chan->last_seq - 1); + pkt.ack.serial = htonl(sp->hdr.serial); + pkt.ack.reason = RXRPC_ACK_DUPLICATE; + pkt.ack.nAcks = 0; + pkt.info.rxMTU = htonl(rxrpc_rx_mtu); + pkt.info.maxMTU = htonl(mtu); + pkt.info.rwind = htonl(rxrpc_rx_window_size); + pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); + pkt.whdr.flags |= RXRPC_SLOW_START_OK; + len += sizeof(pkt.ack) + sizeof(pkt.info); + break; + } + + /* Resync with __rxrpc_disconnect_call() and check that the last call + * didn't get advanced whilst we were filling out the packets. + */ + smp_rmb(); + if (READ_ONCE(chan->last_call) != call_id) + return; + + iov.iov_base = &pkt; + iov.iov_len = len; + + serial = atomic_inc_return(&conn->serial); + pkt.whdr.serial = htonl(serial); + + switch (chan->last_type) { + case RXRPC_PACKET_TYPE_ABORT: + _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); + break; + case RXRPC_PACKET_TYPE_ACK: + trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0, + RXRPC_ACK_DUPLICATE, 0); + _proto("Tx ACK %%%u [re]", serial); + break; + } + + kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len); + _leave(""); + return; +} + +/* * pass a connection-level abort onto all calls on that connection */ -static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state, - u32 abort_code) +static void rxrpc_abort_calls(struct rxrpc_connection *conn, + enum rxrpc_call_completion compl, + u32 abort_code, int error) { struct rxrpc_call *call; int i; @@ -41,19 +149,15 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state, call = rcu_dereference_protected( conn->channels[i].call, lockdep_is_held(&conn->channel_lock)); - write_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = state; - if (state == RXRPC_CALL_LOCALLY_ABORTED) { - call->local_abort = conn->local_abort; - set_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - } else { - call->remote_abort = conn->remote_abort; - set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - } - rxrpc_queue_call(call); + if (call) { + if (compl == RXRPC_CALL_LOCALLY_ABORTED) + trace_rxrpc_abort("CON", call->cid, + call->call_id, 0, + abort_code, error); + if (rxrpc_set_call_completion(call, compl, + abort_code, error)) + rxrpc_notify_socket(call); } - write_unlock_bh(&call->state_lock); } spin_unlock(&conn->channel_lock); @@ -78,17 +182,16 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, /* generate a connection-level abort */ spin_lock_bh(&conn->state_lock); - if (conn->state < RXRPC_CONN_REMOTELY_ABORTED) { - conn->state = RXRPC_CONN_LOCALLY_ABORTED; - conn->error = error; - spin_unlock_bh(&conn->state_lock); - } else { + if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { spin_unlock_bh(&conn->state_lock); _leave(" = 0 [already dead]"); return 0; } - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code); + conn->state = RXRPC_CONN_LOCALLY_ABORTED; + spin_unlock_bh(&conn->state_lock); + + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error); msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; @@ -132,17 +235,18 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, /* * mark a call as being on a now-secured channel - * - must be called with softirqs disabled + * - must be called with BH's disabled. */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { _enter("%p", call); if (call) { - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_SECURED, &call->events)) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); + write_lock_bh(&call->state_lock); + if (call->state == RXRPC_CALL_SERVER_SECURING) { + call->state = RXRPC_CALL_SERVER_ACCEPTING; + rxrpc_notify_socket(call); + } + write_unlock_bh(&call->state_lock); } } @@ -159,22 +263,28 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, int loop, ret; if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - kleave(" = -ECONNABORTED [%u]", conn->state); + _leave(" = -ECONNABORTED [%u]", conn->state); return -ECONNABORTED; } _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_DATA: + case RXRPC_PACKET_TYPE_ACK: + rxrpc_conn_retransmit_call(conn, skb); + return 0; + case RXRPC_PACKET_TYPE_ABORT: - if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) < 0) return -EPROTO; abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); conn->state = RXRPC_CONN_REMOTELY_ABORTED; rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, - abort_code); + abort_code, ECONNABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -199,14 +309,16 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; + spin_unlock(&conn->state_lock); for (loop = 0; loop < RXRPC_MAXCALLS; loop++) rxrpc_call_is_secure( rcu_dereference_protected( conn->channels[loop].call, lockdep_is_held(&conn->channel_lock))); + } else { + spin_unlock(&conn->state_lock); } - spin_unlock(&conn->state_lock); spin_unlock(&conn->channel_lock); return 0; @@ -269,7 +381,7 @@ void rxrpc_process_connection(struct work_struct *work) u32 abort_code = RX_PROTOCOL_ERROR; int ret; - _enter("{%d}", conn->debug_id); + rxrpc_see_connection(conn); if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); @@ -277,6 +389,7 @@ void rxrpc_process_connection(struct work_struct *work) /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); ret = rxrpc_process_event(conn, skb, &abort_code); switch (ret) { case -EPROTO: @@ -287,7 +400,7 @@ void rxrpc_process_connection(struct work_struct *work) goto requeue_and_leave; case -ECONNABORTED: default: - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); break; } } @@ -304,91 +417,7 @@ requeue_and_leave: protocol_error: if (rxrpc_abort_connection(conn, -ret, abort_code) < 0) goto requeue_and_leave; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _leave(" [EPROTO]"); goto out; } - -/* - * put a packet up for transport-level abort - */ -void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) -{ - CHECK_SLAB_OKAY(&local->usage); - - skb_queue_tail(&local->reject_queue, skb); - rxrpc_queue_local(local); -} - -/* - * reject packets through the local endpoint - */ -void rxrpc_reject_packets(struct rxrpc_local *local) -{ - union { - struct sockaddr sa; - struct sockaddr_in sin; - } sa; - struct rxrpc_skb_priv *sp; - struct rxrpc_wire_header whdr; - struct sk_buff *skb; - struct msghdr msg; - struct kvec iov[2]; - size_t size; - __be32 code; - - _enter("%d", local->debug_id); - - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); - iov[1].iov_base = &code; - iov[1].iov_len = sizeof(code); - size = sizeof(whdr) + sizeof(code); - - msg.msg_name = &sa; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - memset(&sa, 0, sizeof(sa)); - sa.sa.sa_family = local->srx.transport.family; - switch (sa.sa.sa_family) { - case AF_INET: - msg.msg_namelen = sizeof(sa.sin); - break; - default: - msg.msg_namelen = 0; - break; - } - - memset(&whdr, 0, sizeof(whdr)); - whdr.type = RXRPC_PACKET_TYPE_ABORT; - - while ((skb = skb_dequeue(&local->reject_queue))) { - sp = rxrpc_skb(skb); - switch (sa.sa.sa_family) { - case AF_INET: - sa.sin.sin_port = udp_hdr(skb)->source; - sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; - code = htonl(skb->priority); - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.serviceId = htons(sp->hdr.serviceId); - whdr.flags = sp->hdr.flags; - whdr.flags ^= RXRPC_CLIENT_INITIATED; - whdr.flags &= RXRPC_CLIENT_INITIATED; - - kernel_sendmsg(local->socket, &msg, iov, 2, size); - break; - - default: - break; - } - - rxrpc_free_skb(skb); - } - - _leave(""); -} diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 896d84493a05..e1e83af47866 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -1,6 +1,6 @@ -/* RxRPC virtual connection handler +/* RxRPC virtual connection handler, common bits. * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -15,8 +15,6 @@ #include <linux/slab.h> #include <linux/net.h> #include <linux/skbuff.h> -#include <net/sock.h> -#include <net/af_rxrpc.h> #include "ar-internal.h" /* @@ -27,9 +25,12 @@ unsigned int rxrpc_connection_expiry = 10 * 60; static void rxrpc_connection_reaper(struct work_struct *work); LIST_HEAD(rxrpc_connections); +LIST_HEAD(rxrpc_connection_proc_list); DEFINE_RWLOCK(rxrpc_connection_lock); static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper); +static void rxrpc_destroy_connection(struct rcu_head *); + /* * allocate a new connection */ @@ -41,21 +42,18 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) conn = kzalloc(sizeof(struct rxrpc_connection), gfp); if (conn) { + INIT_LIST_HEAD(&conn->cache_link); spin_lock_init(&conn->channel_lock); - init_waitqueue_head(&conn->channel_wq); + INIT_LIST_HEAD(&conn->waiting_calls); INIT_WORK(&conn->processor, &rxrpc_process_connection); + INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); skb_queue_head_init(&conn->rx_queue); conn->security = &rxrpc_no_security; spin_lock_init(&conn->state_lock); - /* We maintain an extra ref on the connection whilst it is - * on the rxrpc_connections list. - */ - atomic_set(&conn->usage, 2); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); - atomic_set(&conn->avail_chans, RXRPC_MAXCALLS); conn->size_align = 4; - conn->header_size = sizeof(struct rxrpc_wire_header); + conn->idle_timestamp = jiffies; } _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0); @@ -135,6 +133,16 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, srx.transport.sin.sin_addr.s_addr) goto not_found; break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + if (peer->srx.transport.sin6.sin6_port != + srx.transport.sin6.sin6_port || + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx.transport.sin6.sin6_addr, + sizeof(struct in6_addr)) != 0) + goto not_found; + break; +#endif default: BUG(); } @@ -153,25 +161,32 @@ not_found: * terminates. The caller must hold the channel_lock and must release the * call's ref on the connection. */ -void __rxrpc_disconnect_call(struct rxrpc_call *call) +void __rxrpc_disconnect_call(struct rxrpc_connection *conn, + struct rxrpc_call *call) { - struct rxrpc_connection *conn = call->conn; - struct rxrpc_channel *chan = &conn->channels[call->channel]; + struct rxrpc_channel *chan = + &conn->channels[call->cid & RXRPC_CHANNELMASK]; - _enter("%d,%d", conn->debug_id, call->channel); + _enter("%d,%x", conn->debug_id, call->cid); if (rcu_access_pointer(chan->call) == call) { /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. */ - chan->last_result = call->local_abort; + chan->last_service_id = call->service_id; + if (call->abort_code) { + chan->last_abort = call->abort_code; + chan->last_type = RXRPC_PACKET_TYPE_ABORT; + } else { + chan->last_seq = call->rx_hard_ack; + chan->last_type = RXRPC_PACKET_TYPE_ACK; + } + /* Sync with rxrpc_conn_retransmit(). */ smp_wmb(); chan->last_call = chan->call_id; chan->call_id = chan->call_counter; rcu_assign_pointer(chan->call, NULL); - atomic_inc(&conn->avail_chans); - wake_up(&conn->channel_wq); } _leave(""); @@ -185,34 +200,122 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; + spin_lock_bh(&conn->params.peer->lock); + hlist_del_init(&call->error_link); + spin_unlock_bh(&conn->params.peer->lock); + + if (rxrpc_is_client_call(call)) + return rxrpc_disconnect_client_call(call); + spin_lock(&conn->channel_lock); - __rxrpc_disconnect_call(call); + __rxrpc_disconnect_call(conn, call); spin_unlock(&conn->channel_lock); call->conn = NULL; + conn->idle_timestamp = jiffies; rxrpc_put_connection(conn); } /* - * release a virtual connection + * Kill off a connection. */ -void rxrpc_put_connection(struct rxrpc_connection *conn) +void rxrpc_kill_connection(struct rxrpc_connection *conn) { - if (!conn) - return; + ASSERT(!rcu_access_pointer(conn->channels[0].call) && + !rcu_access_pointer(conn->channels[1].call) && + !rcu_access_pointer(conn->channels[2].call) && + !rcu_access_pointer(conn->channels[3].call)); + ASSERT(list_empty(&conn->cache_link)); - _enter("%p{u=%d,d=%d}", - conn, atomic_read(&conn->usage), conn->debug_id); + write_lock(&rxrpc_connection_lock); + list_del_init(&conn->proc_link); + write_unlock(&rxrpc_connection_lock); - ASSERTCMP(atomic_read(&conn->usage), >, 1); + /* Drain the Rx queue. Note that even though we've unpublished, an + * incoming packet could still be being added to our Rx queue, so we + * will need to drain it again in the RCU cleanup handler. + */ + rxrpc_purge_queue(&conn->rx_queue); - conn->put_time = ktime_get_seconds(); - if (atomic_dec_return(&conn->usage) == 1) { - _debug("zombie"); - rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); + /* Leave final destruction to RCU. The connection processor work item + * must carry a ref on the connection to prevent us getting here whilst + * it is queued or running. + */ + call_rcu(&conn->rcu, rxrpc_destroy_connection); +} + +/* + * Queue a connection's work processor, getting a ref to pass to the work + * queue. + */ +bool rxrpc_queue_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n = __atomic_add_unless(&conn->usage, 1, 0); + if (n == 0) + return false; + if (rxrpc_queue_work(&conn->processor)) + trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here); + else + rxrpc_put_connection(conn); + return true; +} + +/* + * Note the re-emergence of a connection. + */ +void rxrpc_see_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + if (conn) { + int n = atomic_read(&conn->usage); + + trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here); + } +} + +/* + * Get a ref on a connection. + */ +void rxrpc_get_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&conn->usage); + + trace_rxrpc_conn(conn, rxrpc_conn_got, n, here); +} + +/* + * Try to get a ref on a connection. + */ +struct rxrpc_connection * +rxrpc_get_connection_maybe(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + + if (conn) { + int n = __atomic_add_unless(&conn->usage, 1, 0); + if (n > 0) + trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here); + else + conn = NULL; } + return conn; +} - _leave(""); +/* + * Release a service connection + */ +void rxrpc_put_service_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n; + + n = atomic_dec_return(&conn->usage); + trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); + ASSERTCMP(n, >=, 0); + if (n == 0) + rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); } /* @@ -242,19 +345,19 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) } /* - * reap dead connections + * reap dead service connections */ static void rxrpc_connection_reaper(struct work_struct *work) { struct rxrpc_connection *conn, *_p; - unsigned long reap_older_than, earliest, put_time, now; + unsigned long reap_older_than, earliest, idle_timestamp, now; LIST_HEAD(graveyard); _enter(""); - now = ktime_get_seconds(); - reap_older_than = now - rxrpc_connection_expiry; + now = jiffies; + reap_older_than = now - rxrpc_connection_expiry * HZ; earliest = ULONG_MAX; write_lock(&rxrpc_connection_lock); @@ -262,11 +365,17 @@ static void rxrpc_connection_reaper(struct work_struct *work) ASSERTCMP(atomic_read(&conn->usage), >, 0); if (likely(atomic_read(&conn->usage) > 1)) continue; + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) + continue; + + idle_timestamp = READ_ONCE(conn->idle_timestamp); + _debug("reap CONN %d { u=%d,t=%ld }", + conn->debug_id, atomic_read(&conn->usage), + (long)reap_older_than - (long)idle_timestamp); - put_time = READ_ONCE(conn->put_time); - if (time_after(put_time, reap_older_than)) { - if (time_before(put_time, earliest)) - earliest = put_time; + if (time_after(idle_timestamp, reap_older_than)) { + if (time_before(idle_timestamp, earliest)) + earliest = idle_timestamp; continue; } @@ -277,7 +386,7 @@ static void rxrpc_connection_reaper(struct work_struct *work) continue; if (rxrpc_conn_is_client(conn)) - rxrpc_unpublish_client_conn(conn); + BUG(); else rxrpc_unpublish_service_conn(conn); @@ -287,9 +396,9 @@ static void rxrpc_connection_reaper(struct work_struct *work) if (earliest != ULONG_MAX) { _debug("reschedule reaper %ld", (long) earliest - now); - ASSERTCMP(earliest, >, now); + ASSERT(time_after(earliest, now)); rxrpc_queue_delayed_work(&rxrpc_connection_reap, - (earliest - now) * HZ); + earliest - now); } while (!list_empty(&graveyard)) { @@ -298,16 +407,15 @@ static void rxrpc_connection_reaper(struct work_struct *work) list_del_init(&conn->link); ASSERTCMP(atomic_read(&conn->usage), ==, 0); - skb_queue_purge(&conn->rx_queue); - call_rcu(&conn->rcu, rxrpc_destroy_connection); + rxrpc_kill_connection(conn); } _leave(""); } /* - * preemptively destroy all the connection records rather than waiting for them - * to time out + * preemptively destroy all the service connection records rather than + * waiting for them to time out */ void __exit rxrpc_destroy_all_connections(void) { @@ -316,6 +424,8 @@ void __exit rxrpc_destroy_all_connections(void) _enter(""); + rxrpc_destroy_all_client_connections(); + rxrpc_connection_expiry = 0; cancel_delayed_work(&rxrpc_connection_reap); rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); @@ -330,6 +440,8 @@ void __exit rxrpc_destroy_all_connections(void) write_unlock(&rxrpc_connection_lock); BUG_ON(leak); + ASSERT(list_empty(&rxrpc_connection_proc_list)); + /* Make sure the local and peer records pinned by any dying connections * are released. */ diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index fd9027ccba8f..eef551f40dc2 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -65,9 +65,8 @@ done: * Insert a service connection into a peer's tree, thereby making it a target * for incoming packets. */ -static struct rxrpc_connection * -rxrpc_publish_service_conn(struct rxrpc_peer *peer, - struct rxrpc_connection *conn) +static void rxrpc_publish_service_conn(struct rxrpc_peer *peer, + struct rxrpc_connection *conn) { struct rxrpc_connection *cursor = NULL; struct rxrpc_conn_proto k = conn->proto; @@ -96,7 +95,7 @@ conn_published: set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags); write_sequnlock_bh(&peer->service_conn_lock); _leave(" = %d [new]", conn->debug_id); - return conn; + return; found_extant_conn: if (atomic_read(&cursor->usage) == 0) @@ -119,100 +118,58 @@ replace_old_connection: } /* - * get a record of an incoming connection + * Preallocate a service connection. The connection is placed on the proc and + * reap lists so that we don't have to get the lock from BH context. */ -struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local, - struct sockaddr_rxrpc *srx, - struct sk_buff *skb) +struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) { - struct rxrpc_connection *conn; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_peer *peer; - const char *new = "old"; - - _enter(""); + struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp); - peer = rxrpc_lookup_peer(local, srx, GFP_NOIO); - if (!peer) { - _debug("no peer"); - return ERR_PTR(-EBUSY); - } + if (conn) { + /* We maintain an extra ref on the connection whilst it is on + * the rxrpc_connections list. + */ + conn->state = RXRPC_CONN_SERVICE_PREALLOC; + atomic_set(&conn->usage, 2); - ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED); - - rcu_read_lock(); - peer = rxrpc_lookup_peer_rcu(local, srx); - if (peer) { - conn = rxrpc_find_service_conn_rcu(peer, skb); - if (conn) { - if (sp->hdr.securityIndex != conn->security_ix) - goto security_mismatch_rcu; - if (rxrpc_get_connection_maybe(conn)) - goto found_extant_connection_rcu; - - /* The conn has expired but we can't remove it without - * the appropriate lock, so we attempt to replace it - * when we have a new candidate. - */ - } + write_lock(&rxrpc_connection_lock); + list_add_tail(&conn->link, &rxrpc_connections); + list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); + write_unlock(&rxrpc_connection_lock); - if (!rxrpc_get_peer_maybe(peer)) - peer = NULL; + trace_rxrpc_conn(conn, rxrpc_conn_new_service, + atomic_read(&conn->usage), + __builtin_return_address(0)); } - rcu_read_unlock(); - if (!peer) { - peer = rxrpc_lookup_peer(local, srx, GFP_NOIO); - if (!peer) - goto enomem; - } + return conn; +} - /* We don't have a matching record yet. */ - conn = rxrpc_alloc_connection(GFP_NOIO); - if (!conn) - goto enomem_peer; +/* + * Set up an incoming connection. This is called in BH context with the RCU + * read lock held. + */ +void rxrpc_new_incoming_connection(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + _enter(""); conn->proto.epoch = sp->hdr.epoch; conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK; - conn->params.local = local; - conn->params.peer = peer; conn->params.service_id = sp->hdr.serviceId; conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; - conn->state = RXRPC_CONN_SERVICE; - if (conn->params.service_id) + if (conn->security_ix) conn->state = RXRPC_CONN_SERVICE_UNSECURED; - - rxrpc_get_local(local); - - write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->link, &rxrpc_connections); - write_unlock(&rxrpc_connection_lock); + else + conn->state = RXRPC_CONN_SERVICE; /* Make the connection a target for incoming packets. */ - rxrpc_publish_service_conn(peer, conn); - - new = "new"; - -success: - _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid); - _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage)); - return conn; - -found_extant_connection_rcu: - rcu_read_unlock(); - goto success; - -security_mismatch_rcu: - rcu_read_unlock(); - _leave(" = -EKEYREJECTED"); - return ERR_PTR(-EKEYREJECTED); + rxrpc_publish_service_conn(conn->params.peer, conn); -enomem_peer: - rxrpc_put_peer(peer); -enomem: - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid); } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 70bb77818dea..3ad9f75031e3 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1,6 +1,6 @@ /* RxRPC packet reception * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -27,550 +27,920 @@ #include <net/net_namespace.h> #include "ar-internal.h" +static void rxrpc_proto_abort(const char *why, + struct rxrpc_call *call, rxrpc_seq_t seq) +{ + if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) { + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + rxrpc_queue_call(call); + } +} + /* - * queue a packet for recvmsg to pass to userspace - * - the caller must hold a lock on call->lock - * - must not be called with interrupts disabled (sk_filter() disables BH's) - * - eats the packet whether successful or not - * - there must be just one reference to the packet, which the caller passes to - * this function + * Do TCP-style congestion management [RFC 5681]. */ -int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, - bool force, bool terminal) +static void rxrpc_congestion_management(struct rxrpc_call *call, + struct sk_buff *skb, + struct rxrpc_ack_summary *summary, + rxrpc_serial_t acked_serial) { - struct rxrpc_skb_priv *sp; - struct rxrpc_sock *rx = call->socket; - struct sock *sk; - int ret; + enum rxrpc_congest_change change = rxrpc_cong_no_change; + unsigned int cumulative_acks = call->cong_cumul_acks; + unsigned int cwnd = call->cong_cwnd; + bool resend = false; + + summary->flight_size = + (call->tx_top - call->tx_hard_ack) - summary->nr_acks; + + if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { + summary->retrans_timeo = true; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = 1; + if (cwnd >= call->cong_ssthresh && + call->cong_mode == RXRPC_CALL_SLOW_START) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; + cumulative_acks = 0; + } + } - _enter(",,%d,%d", force, terminal); + cumulative_acks += summary->nr_new_acks; + cumulative_acks += summary->nr_rot_new_acks; + if (cumulative_acks > 255) + cumulative_acks = 255; + + summary->mode = call->cong_mode; + summary->cwnd = call->cong_cwnd; + summary->ssthresh = call->cong_ssthresh; + summary->cumulative_acks = cumulative_acks; + summary->dup_acks = call->cong_dup_acks; + + switch (call->cong_mode) { + case RXRPC_CALL_SLOW_START: + if (summary->nr_nacks > 0) + goto packet_loss_detected; + if (summary->cumulative_acks > 0) + cwnd += 1; + if (cwnd >= call->cong_ssthresh) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; + } + goto out; - ASSERT(!irqs_disabled()); + case RXRPC_CALL_CONGEST_AVOIDANCE: + if (summary->nr_nacks > 0) + goto packet_loss_detected; - sp = rxrpc_skb(skb); - ASSERTCMP(sp->call, ==, call); - - /* if we've already posted the terminal message for a call, then we - * don't post any more */ - if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { - _debug("already terminated"); - ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE); - rxrpc_free_skb(skb); - return 0; - } - - sk = &rx->sk; - - if (!force) { - /* cast skb->rcvbuf to unsigned... It's pointless, but - * reduces number of warnings when compiling with -W - * --ANK */ -// ret = -ENOBUFS; -// if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= -// (unsigned int) sk->sk_rcvbuf) -// goto out; - - ret = sk_filter(sk, skb); - if (ret < 0) + /* We analyse the number of packets that get ACK'd per RTT + * period and increase the window if we managed to fill it. + */ + if (call->peer->rtt_usage == 0) goto out; - } + if (ktime_before(skb->tstamp, + ktime_add_ns(call->cong_tstamp, + call->peer->rtt))) + goto out_no_clear_ca; + change = rxrpc_cong_rtt_window_end; + call->cong_tstamp = skb->tstamp; + if (cumulative_acks >= cwnd) + cwnd++; + goto out; - spin_lock_bh(&sk->sk_receive_queue.lock); - if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) && - !test_bit(RXRPC_CALL_RELEASED, &call->flags) && - call->socket->sk.sk_state != RXRPC_CLOSE) { - skb->destructor = rxrpc_packet_destructor; - skb->dev = NULL; - skb->sk = sk; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); + case RXRPC_CALL_PACKET_LOSS: + if (summary->nr_nacks == 0) + goto resume_normality; - if (terminal) { - _debug("<<<< TERMINAL MESSAGE >>>>"); - set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags); + if (summary->new_low_nack) { + change = rxrpc_cong_new_low_nack; + call->cong_dup_acks = 1; + if (call->cong_extra > 1) + call->cong_extra = 1; + goto send_extra_data; } - /* allow interception by a kernel service */ - if (rx->interceptor) { - rx->interceptor(sk, call->user_call_ID, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - } else { - _net("post skb %p", skb); - __skb_queue_tail(&sk->sk_receive_queue, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); + call->cong_dup_acks++; + if (call->cong_dup_acks < 3) + goto send_extra_data; + + change = rxrpc_cong_begin_retransmission; + call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = call->cong_ssthresh + 3; + call->cong_extra = 0; + call->cong_dup_acks = 0; + resend = true; + goto out; - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); + case RXRPC_CALL_FAST_RETRANSMIT: + if (!summary->new_low_nack) { + if (summary->nr_new_acks == 0) + cwnd += 1; + call->cong_dup_acks++; + if (call->cong_dup_acks == 2) { + change = rxrpc_cong_retransmit_again; + call->cong_dup_acks = 0; + resend = true; + } + } else { + change = rxrpc_cong_progress; + cwnd = call->cong_ssthresh; + if (summary->nr_nacks == 0) + goto resume_normality; } - skb = NULL; - } else { - spin_unlock_bh(&sk->sk_receive_queue.lock); + goto out; + + default: + BUG(); + goto out; } - ret = 0; +resume_normality: + change = rxrpc_cong_cleared_nacks; + call->cong_dup_acks = 0; + call->cong_extra = 0; + call->cong_tstamp = skb->tstamp; + if (cwnd < call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_SLOW_START; + else + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; out: - rxrpc_free_skb(skb); + cumulative_acks = 0; +out_no_clear_ca: + if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1) + cwnd = RXRPC_RXTX_BUFF_SIZE - 1; + call->cong_cwnd = cwnd; + call->cong_cumul_acks = cumulative_acks; + trace_rxrpc_congest(call, summary, acked_serial, change); + if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); + return; - _leave(" = %d", ret); - return ret; +packet_loss_detected: + change = rxrpc_cong_saw_nack; + call->cong_mode = RXRPC_CALL_PACKET_LOSS; + call->cong_dup_acks = 0; + goto send_extra_data; + +send_extra_data: + /* Send some previously unsent DATA if we have some to advance the ACK + * state. + */ + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST || + summary->nr_acks != call->tx_top - call->tx_hard_ack) { + call->cong_extra++; + wake_up(&call->waitq); + } + goto out_no_clear_ca; } /* - * process a DATA packet, posting the packet to the appropriate queue - * - eats the packet if successful + * Ping the other end to fill our RTT cache and to retrieve the rwind + * and MTU parameters. */ -static int rxrpc_fast_process_data(struct rxrpc_call *call, - struct sk_buff *skb, u32 seq) +static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, + int skew) { - struct rxrpc_skb_priv *sp; - bool terminal; - int ret, ackbit, ack; - u32 serial; - u8 flags; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + ktime_t now = skb->tstamp; - _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq); + if (call->peer->rtt_usage < 3 || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + true, true, + rxrpc_propose_ack_ping_for_params); +} - sp = rxrpc_skb(skb); - ASSERTCMP(sp->call, ==, NULL); - flags = sp->hdr.flags; - serial = sp->hdr.serial; +/* + * Apply a hard ACK by advancing the Tx window. + */ +static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, + struct rxrpc_ack_summary *summary) +{ + struct sk_buff *skb, *list = NULL; + int ix; + u8 annotation; + + if (call->acks_lowest_nak == call->tx_hard_ack) { + call->acks_lowest_nak = to; + } else if (before_eq(call->acks_lowest_nak, to)) { + summary->new_low_nack = true; + call->acks_lowest_nak = to; + } spin_lock(&call->lock); - if (call->state > RXRPC_CALL_COMPLETE) - goto discard; + while (before(call->tx_hard_ack, to)) { + call->tx_hard_ack++; + ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + annotation = call->rxtx_annotations[ix]; + rxrpc_see_skb(skb, rxrpc_skb_tx_rotated); + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + skb->next = list; + list = skb; + + if (annotation & RXRPC_TX_ANNO_LAST) + set_bit(RXRPC_CALL_TX_LAST, &call->flags); + if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) + summary->nr_rot_new_acks++; + } - ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post); - ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv); - ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten); + spin_unlock(&call->lock); - if (seq < call->rx_data_post) { - _debug("dup #%u [-%u]", seq, call->rx_data_post); - ack = RXRPC_ACK_DUPLICATE; - ret = -ENOBUFS; - goto discard_and_ack; - } + trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + rxrpc_transmit_rotate_last : + rxrpc_transmit_rotate)); + wake_up(&call->waitq); - /* we may already have the packet in the out of sequence queue */ - ackbit = seq - (call->rx_data_eaten + 1); - ASSERTCMP(ackbit, >=, 0); - if (__test_and_set_bit(ackbit, call->ackr_window)) { - _debug("dup oos #%u [%u,%u]", - seq, call->rx_data_eaten, call->rx_data_post); - ack = RXRPC_ACK_DUPLICATE; - goto discard_and_ack; + while (list) { + skb = list; + list = skb->next; + skb->next = NULL; + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); } +} - if (seq >= call->ackr_win_top) { - _debug("exceed #%u [%u]", seq, call->ackr_win_top); - __clear_bit(ackbit, call->ackr_window); - ack = RXRPC_ACK_EXCEEDS_WINDOW; - goto discard_and_ack; - } +/* + * End the transmission phase of a call. + * + * This occurs when we get an ACKALL packet, the first DATA packet of a reply, + * or a final ACK packet. + */ +static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, + const char *abort_why) +{ - if (seq == call->rx_data_expect) { - clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags); - call->rx_data_expect++; - } else if (seq > call->rx_data_expect) { - _debug("oos #%u [%u]", seq, call->rx_data_expect); - call->rx_data_expect = seq + 1; - if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) { - ack = RXRPC_ACK_OUT_OF_SEQUENCE; - goto enqueue_and_ack; - } - goto enqueue_packet; - } + ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); - if (seq != call->rx_data_post) { - _debug("ahead #%u [%u]", seq, call->rx_data_post); - goto enqueue_packet; - } + write_lock(&call->state_lock); - if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags)) - goto protocol_error; + switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + if (reply_begun) + call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + else + call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + break; - /* if the packet need security things doing to it, then it goes down - * the slow path */ - if (call->conn->security_ix) - goto enqueue_packet; + case RXRPC_CALL_SERVER_AWAIT_ACK: + __rxrpc_call_completed(call); + rxrpc_notify_socket(call); + break; - sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); - terminal = ((flags & RXRPC_LAST_PACKET) && - !(flags & RXRPC_CLIENT_INITIATED)); - ret = rxrpc_queue_rcv_skb(call, skb, false, terminal); - if (ret < 0) { - if (ret == -ENOMEM || ret == -ENOBUFS) { - __clear_bit(ackbit, call->ackr_window); - ack = RXRPC_ACK_NOSPACE; - goto discard_and_ack; - } - goto out; + default: + goto bad_state; } - skb = NULL; - sp = NULL; - - _debug("post #%u", seq); - ASSERTCMP(call->rx_data_post, ==, seq); - call->rx_data_post++; + write_unlock(&call->state_lock); + if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true, + rxrpc_propose_ack_client_tx_end); + trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); + } else { + trace_rxrpc_transmit(call, rxrpc_transmit_end); + } + _leave(" = ok"); + return true; + +bad_state: + write_unlock(&call->state_lock); + kdebug("end_tx %s", rxrpc_call_states[call->state]); + rxrpc_proto_abort(abort_why, call, call->tx_top); + return false; +} - if (flags & RXRPC_LAST_PACKET) - set_bit(RXRPC_CALL_RCVD_LAST, &call->flags); +/* + * Begin the reply reception phase of a call. + */ +static bool rxrpc_receiving_reply(struct rxrpc_call *call) +{ + struct rxrpc_ack_summary summary = { 0 }; + rxrpc_seq_t top = READ_ONCE(call->tx_top); + + if (call->ackr_reason) { + spin_lock_bh(&call->lock); + call->ackr_reason = 0; + call->resend_at = call->expire_at; + call->ack_at = call->expire_at; + spin_unlock_bh(&call->lock); + rxrpc_set_timer(call, rxrpc_timer_init_for_reply, + ktime_get_real()); + } - /* if we've reached an out of sequence packet then we need to drain - * that queue into the socket Rx queue now */ - if (call->rx_data_post == call->rx_first_oos) { - _debug("drain rx oos now"); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + rxrpc_rotate_tx_window(call, top, &summary); + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { + rxrpc_proto_abort("TXL", call, top); + return false; } + if (!rxrpc_end_tx_phase(call, true, "ETD")) + return false; + call->tx_phase = false; + return true; +} - spin_unlock(&call->lock); - atomic_inc(&call->ackr_not_idle); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false); - _leave(" = 0 [posted]"); - return 0; +/* + * Scan a jumbo packet to validate its structure and to work out how many + * subpackets it contains. + * + * A jumbo packet is a collection of consecutive packets glued together with + * little headers between that indicate how to change the initial header for + * each subpacket. + * + * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but + * the last are RXRPC_JUMBO_DATALEN in size. The last subpacket may be of any + * size. + */ +static bool rxrpc_validate_jumbo(struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sizeof(struct rxrpc_wire_header); + unsigned int len = skb->len; + int nr_jumbo = 1; + u8 flags = sp->hdr.flags; -protocol_error: - ret = -EBADMSG; -out: - spin_unlock(&call->lock); - _leave(" = %d", ret); - return ret; + do { + nr_jumbo++; + if (len - offset < RXRPC_JUMBO_SUBPKTLEN) + goto protocol_error; + if (flags & RXRPC_LAST_PACKET) + goto protocol_error; + offset += RXRPC_JUMBO_DATALEN; + if (skb_copy_bits(skb, offset, &flags, 1) < 0) + goto protocol_error; + offset += sizeof(struct rxrpc_jumbo_header); + } while (flags & RXRPC_JUMBO_PACKET); -discard_and_ack: - _debug("discard and ACK packet %p", skb); - __rxrpc_propose_ACK(call, ack, serial, true); -discard: - spin_unlock(&call->lock); - rxrpc_free_skb(skb); - _leave(" = 0 [discarded]"); - return 0; + sp->nr_jumbo = nr_jumbo; + return true; -enqueue_and_ack: - __rxrpc_propose_ACK(call, ack, serial, true); -enqueue_packet: - _net("defer skb %p", skb); - spin_unlock(&call->lock); - skb_queue_tail(&call->rx_queue, skb); - atomic_inc(&call->ackr_not_idle); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); - _leave(" = 0 [queued]"); - return 0; +protocol_error: + return false; } /* - * assume an implicit ACKALL of the transmission phase of a client socket upon - * reception of the first reply packet + * Handle reception of a duplicate packet. + * + * We have to take care to avoid an attack here whereby we're given a series of + * jumbograms, each with a sequence number one before the preceding one and + * filled up to maximum UDP size. If they never send us the first packet in + * the sequence, they can cause us to have to hold on to around 2MiB of kernel + * space until the call times out. + * + * We limit the space usage by only accepting three duplicate jumbo packets per + * call. After that, we tell the other side we're no longer accepting jumbos + * (that information is encoded in the ACK packet). */ -static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial) +static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, + u8 annotation, bool *_jumbo_bad) { - write_lock_bh(&call->state_lock); - - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; - call->acks_latest = serial; - - _debug("implicit ACKALL %%%u", call->acks_latest); - set_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events); - write_unlock_bh(&call->state_lock); - - if (try_to_del_timer_sync(&call->resend_timer) >= 0) { - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_RESEND, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } - break; + /* Discard normal packets that are duplicates. */ + if (annotation == 0) + return; - default: - write_unlock_bh(&call->state_lock); - break; + /* Skip jumbo subpackets that are duplicates. When we've had three or + * more partially duplicate jumbo packets, we refuse to take any more + * jumbos for this call. + */ + if (!*_jumbo_bad) { + call->nr_jumbo_bad++; + *_jumbo_bad = true; } } /* - * post an incoming packet to the nominated call to deal with - * - must get rid of the sk_buff, either by freeing it or by queuing it + * Process a DATA packet, adding the packet to the Rx ring. */ -void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) +static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, + u16 skew) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - __be32 wtmp; - u32 hi_serial, abort_code; + unsigned int offset = sizeof(struct rxrpc_wire_header); + unsigned int ix; + rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; + rxrpc_seq_t seq = sp->hdr.seq, hard_ack; + bool immediate_ack = false, jumbo_bad = false, queued; + u16 len; + u8 ack = 0, flags, annotation = 0; - _enter("%p,%p", call, skb); + _enter("{%u,%u},{%u,%u}", + call->rx_hard_ack, call->rx_top, skb->len, seq); - ASSERT(!irqs_disabled()); + _proto("Rx DATA %%%u { #%u f=%02x }", + sp->hdr.serial, seq, sp->hdr.flags); -#if 0 // INJECT RX ERROR - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { - static int skip = 0; - if (++skip == 3) { - printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n"); - skip = 0; - goto free_packet; - } + if (call->state >= RXRPC_CALL_COMPLETE) + return; + + /* Received data implicitly ACKs all of the request packets we sent + * when we're acting as a client. + */ + if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST || + call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && + !rxrpc_receiving_reply(call)) + return; + + call->ackr_prev_seq = seq; + + hard_ack = READ_ONCE(call->rx_hard_ack); + if (after(seq, hard_ack + call->rx_winsize)) { + ack = RXRPC_ACK_EXCEEDS_WINDOW; + ack_serial = serial; + goto ack; } -#endif - /* track the latest serial number on this connection for ACK packet - * information */ - hi_serial = atomic_read(&call->conn->hi_serial); - while (sp->hdr.serial > hi_serial) - hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial, - sp->hdr.serial); + flags = sp->hdr.flags; + if (flags & RXRPC_JUMBO_PACKET) { + if (call->nr_jumbo_bad > 3) { + ack = RXRPC_ACK_NOSPACE; + ack_serial = serial; + goto ack; + } + annotation = 1; + } - /* request ACK generation for any ACK or DATA packet that requests - * it */ - if (sp->hdr.flags & RXRPC_REQUEST_ACK) { - _proto("ACK Requested on %%%u", sp->hdr.serial); - rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false); +next_subpacket: + queued = false; + ix = seq & RXRPC_RXTX_BUFF_MASK; + len = skb->len; + if (flags & RXRPC_JUMBO_PACKET) + len = RXRPC_JUMBO_DATALEN; + + if (flags & RXRPC_LAST_PACKET) { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + seq != call->rx_top) + return rxrpc_proto_abort("LSN", call, seq); + } else { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + after_eq(seq, call->rx_top)) + return rxrpc_proto_abort("LSA", call, seq); } - switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_ABORT: - _debug("abort"); + if (before_eq(seq, hard_ack)) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; + goto skip; + } - if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0) - goto protocol_error; + if (flags & RXRPC_REQUEST_ACK && !ack) { + ack = RXRPC_ACK_REQUESTED; + ack_serial = serial; + } - abort_code = ntohl(wtmp); - _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); - - write_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_REMOTELY_ABORTED; - call->remote_abort = abort_code; - set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - rxrpc_queue_call(call); + if (call->rxtx_buffer[ix]) { + rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad); + if (ack != RXRPC_ACK_DUPLICATE) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; } - goto free_packet_unlock; + immediate_ack = true; + goto skip; + } - case RXRPC_PACKET_TYPE_BUSY: - _proto("Rx BUSY %%%u", sp->hdr.serial); + /* Queue the packet. We use a couple of memory barriers here as need + * to make sure that rx_top is perceived to be set after the buffer + * pointer and that the buffer pointer is set after the annotation and + * the skb data. + * + * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() + * and also rxrpc_fill_out_ack(). + */ + rxrpc_get_skb(skb, rxrpc_skb_rx_got); + call->rxtx_annotations[ix] = annotation; + smp_wmb(); + call->rxtx_buffer[ix] = skb; + if (after(seq, call->rx_top)) { + smp_store_release(&call->rx_top, seq); + } else if (before(seq, call->rx_top)) { + /* Send an immediate ACK if we fill in a hole */ + if (!ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + immediate_ack = true; + } + if (flags & RXRPC_LAST_PACKET) { + set_bit(RXRPC_CALL_RX_LAST, &call->flags); + trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); + } else { + trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq); + } + queued = true; - if (rxrpc_conn_is_service(call->conn)) - goto protocol_error; + if (after_eq(seq, call->rx_expect_next)) { + if (after(seq, call->rx_expect_next)) { + _net("OOS %u > %u", seq, call->rx_expect_next); + ack = RXRPC_ACK_OUT_OF_SEQUENCE; + ack_serial = serial; + } + call->rx_expect_next = seq + 1; + } - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - call->state = RXRPC_CALL_SERVER_BUSY; - set_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events); - rxrpc_queue_call(call); - case RXRPC_CALL_SERVER_BUSY: - goto free_packet_unlock; - default: - goto protocol_error_locked; +skip: + offset += len; + if (flags & RXRPC_JUMBO_PACKET) { + if (skb_copy_bits(skb, offset, &flags, 1) < 0) + return rxrpc_proto_abort("XJF", call, seq); + offset += sizeof(struct rxrpc_jumbo_header); + seq++; + serial++; + annotation++; + if (flags & RXRPC_JUMBO_PACKET) + annotation |= RXRPC_RX_ANNO_JLAST; + if (after(seq, hard_ack + call->rx_winsize)) { + ack = RXRPC_ACK_EXCEEDS_WINDOW; + ack_serial = serial; + if (!jumbo_bad) { + call->nr_jumbo_bad++; + jumbo_bad = true; + } + goto ack; } - default: - _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial); - goto protocol_error; + _proto("Rx DATA Jumbo %%%u", serial); + goto next_subpacket; + } - case RXRPC_PACKET_TYPE_DATA: - _proto("Rx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); + if (queued && flags & RXRPC_LAST_PACKET && !ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } - if (sp->hdr.seq == 0) - goto protocol_error; +ack: + if (ack) + rxrpc_propose_ACK(call, ack, skew, ack_serial, + immediate_ack, true, + rxrpc_propose_ack_input_data); - call->ackr_prev_seq = sp->hdr.seq; + if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) + rxrpc_notify_socket(call); + _leave(" [queued]"); +} - /* received data implicitly ACKs all of the request packets we - * sent when we're acting as a client */ - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) - rxrpc_assume_implicit_ackall(call, sp->hdr.serial); +/* + * Process a requested ACK. + */ +static void rxrpc_input_requested_ack(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + ktime_t sent_at; + int ix; + + for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) { + skb = call->rxtx_buffer[ix]; + if (!skb) + continue; + + sp = rxrpc_skb(skb); + if (sp->hdr.serial != orig_serial) + continue; + smp_rmb(); + sent_at = skb->tstamp; + goto found; + } + return; - switch (rxrpc_fast_process_data(call, skb, sp->hdr.seq)) { - case 0: - skb = NULL; - goto done; +found: + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack, + orig_serial, ack_serial, sent_at, resp_time); +} - default: - BUG(); +/* + * Process a ping response. + */ +static void rxrpc_input_ping_response(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + rxrpc_serial_t ping_serial; + ktime_t ping_time; - /* data packet received beyond the last packet */ - case -EBADMSG: - goto protocol_error; - } + ping_time = call->ackr_ping_time; + smp_rmb(); + ping_serial = call->ackr_ping; - case RXRPC_PACKET_TYPE_ACKALL: - case RXRPC_PACKET_TYPE_ACK: - /* ACK processing is done in process context */ - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) { - skb_queue_tail(&call->rx_queue, skb); - rxrpc_queue_call(call); - skb = NULL; - } - read_unlock_bh(&call->state_lock); - goto free_packet; + if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || + before(orig_serial, ping_serial)) + return; + clear_bit(RXRPC_CALL_PINGING, &call->flags); + if (after(orig_serial, ping_serial)) + return; + + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response, + orig_serial, ack_serial, ping_time, resp_time); +} + +/* + * Process the extra information that may be appended to an ACK packet + */ +static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, + struct rxrpc_ackinfo *ackinfo) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_peer *peer; + unsigned int mtu; + u32 rwind = ntohl(ackinfo->rwind); + + _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", + sp->hdr.serial, + ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), + rwind, ntohl(ackinfo->jumbo_max)); + + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; + call->tx_winsize = rwind; + if (call->cong_ssthresh > rwind) + call->cong_ssthresh = rwind; + + mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); + + peer = call->peer; + if (mtu < peer->maxdata) { + spin_lock_bh(&peer->lock); + peer->maxdata = mtu; + peer->mtu = mtu + peer->hdrsize; + spin_unlock_bh(&peer->lock); + _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); } +} -protocol_error: - _debug("protocol error"); - write_lock_bh(&call->state_lock); -protocol_error_locked: - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_PROTOCOL_ERROR; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - rxrpc_queue_call(call); +/* + * Process individual soft ACKs. + * + * Each ACK in the array corresponds to one packet and can be either an ACK or + * a NAK. If we get find an explicitly NAK'd packet we resend immediately; + * packets that lie beyond the end of the ACK list are scheduled for resend by + * the timer on the basis that the peer might just not have processed them at + * the time the ACK was sent. + */ +static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, + rxrpc_seq_t seq, int nr_acks, + struct rxrpc_ack_summary *summary) +{ + int ix; + u8 annotation, anno_type; + + for (; nr_acks > 0; nr_acks--, seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; + switch (*acks++) { + case RXRPC_ACK_TYPE_ACK: + summary->nr_acks++; + if (anno_type == RXRPC_TX_ANNO_ACK) + continue; + summary->nr_new_acks++; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_ACK | annotation; + break; + case RXRPC_ACK_TYPE_NACK: + if (!summary->nr_nacks && + call->acks_lowest_nak != seq) { + call->acks_lowest_nak = seq; + summary->new_low_nack = true; + } + summary->nr_nacks++; + if (anno_type == RXRPC_TX_ANNO_NAK) + continue; + summary->nr_new_nacks++; + if (anno_type == RXRPC_TX_ANNO_RETRANS) + continue; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_NAK | annotation; + break; + default: + return rxrpc_proto_abort("SFT", call, 0); + } } -free_packet_unlock: - write_unlock_bh(&call->state_lock); -free_packet: - rxrpc_free_skb(skb); -done: - _leave(""); } /* - * split up a jumbo data packet + * Process an ACK packet. + * + * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet + * in the ACK array. Anything before that is hard-ACK'd and may be discarded. + * + * A hard-ACK means that a packet has been processed and may be discarded; a + * soft-ACK means that the packet may be discarded and retransmission + * requested. A phase is complete when all packets are hard-ACK'd. */ -static void rxrpc_process_jumbo_packet(struct rxrpc_call *call, - struct sk_buff *jumbo) +static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, + u16 skew) { - struct rxrpc_jumbo_header jhdr; - struct rxrpc_skb_priv *sp; - struct sk_buff *part; + struct rxrpc_ack_summary summary = { 0 }; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + union { + struct rxrpc_ackpacket ack; + struct rxrpc_ackinfo info; + u8 acks[RXRPC_MAXACKS]; + } buf; + rxrpc_serial_t acked_serial; + rxrpc_seq_t first_soft_ack, hard_ack; + int nr_acks, offset, ioffset; + + _enter(""); + + offset = sizeof(struct rxrpc_wire_header); + if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) { + _debug("extraction failure"); + return rxrpc_proto_abort("XAK", call, 0); + } + offset += sizeof(buf.ack); + + acked_serial = ntohl(buf.ack.serial); + first_soft_ack = ntohl(buf.ack.firstPacket); + hard_ack = first_soft_ack - 1; + nr_acks = buf.ack.nAcks; + summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? + buf.ack.reason : RXRPC_ACK__INVALID); + + trace_rxrpc_rx_ack(call, first_soft_ack, summary.ack_reason, nr_acks); + + _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", + sp->hdr.serial, + ntohs(buf.ack.maxSkew), + first_soft_ack, + ntohl(buf.ack.previousPacket), + acked_serial, + rxrpc_ack_names[summary.ack_reason], + buf.ack.nAcks); + + if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_input_ping_response(call, skb->tstamp, acked_serial, + sp->hdr.serial); + if (buf.ack.reason == RXRPC_ACK_REQUESTED) + rxrpc_input_requested_ack(call, skb->tstamp, acked_serial, + sp->hdr.serial); + + if (buf.ack.reason == RXRPC_ACK_PING) { + _proto("Rx ACK %%%u PING Request", sp->hdr.serial); + rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, + skew, sp->hdr.serial, true, true, + rxrpc_propose_ack_respond_to_ping); + } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { + rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, + skew, sp->hdr.serial, true, true, + rxrpc_propose_ack_respond_to_ack); + } - _enter(",{%u,%u}", jumbo->data_len, jumbo->len); + ioffset = offset + nr_acks + 3; + if (skb->len >= ioffset + sizeof(buf.info)) { + if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); + rxrpc_input_ackinfo(call, skb, &buf.info); + } - sp = rxrpc_skb(jumbo); + if (first_soft_ack == 0) + return rxrpc_proto_abort("AK0", call, 0); - do { - sp->hdr.flags &= ~RXRPC_JUMBO_PACKET; - - /* make a clone to represent the first subpacket in what's left - * of the jumbo packet */ - part = skb_clone(jumbo, GFP_ATOMIC); - if (!part) { - /* simply ditch the tail in the event of ENOMEM */ - pskb_trim(jumbo, RXRPC_JUMBO_DATALEN); - break; - } - rxrpc_new_skb(part); + /* Ignore ACKs unless we are or have just been transmitting. */ + switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + case RXRPC_CALL_SERVER_SEND_REPLY: + case RXRPC_CALL_SERVER_AWAIT_ACK: + break; + default: + return; + } - pskb_trim(part, RXRPC_JUMBO_DATALEN); + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) { + _debug("discard ACK %d <= %d", + sp->hdr.serial, call->acks_latest); + return; + } + call->acks_latest_ts = skb->tstamp; + call->acks_latest = sp->hdr.serial; + + if (before(hard_ack, call->tx_hard_ack) || + after(hard_ack, call->tx_top)) + return rxrpc_proto_abort("AKW", call, 0); + if (nr_acks > call->tx_top - hard_ack) + return rxrpc_proto_abort("AKN", call, 0); + + if (after(hard_ack, call->tx_hard_ack)) + rxrpc_rotate_tx_window(call, hard_ack, &summary); + + if (nr_acks > 0) { + if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) + return rxrpc_proto_abort("XSA", call, 0); + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, + &summary); + } - if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN)) - goto protocol_error; + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { + rxrpc_end_tx_phase(call, false, "ETA"); + return; + } - if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0) - goto protocol_error; - if (!pskb_pull(jumbo, sizeof(jhdr))) - BUG(); + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST && + summary.nr_acks == call->tx_top - hard_ack) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + false, true, + rxrpc_propose_ack_ping_for_lost_reply); - sp->hdr.seq += 1; - sp->hdr.serial += 1; - sp->hdr.flags = jhdr.flags; - sp->hdr._rsvd = ntohs(jhdr._rsvd); + return rxrpc_congestion_management(call, skb, &summary, acked_serial); +} - _proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1); +/* + * Process an ACKALL packet. + */ +static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_ack_summary summary = { 0 }; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - rxrpc_fast_process_packet(call, part); - part = NULL; + _proto("Rx ACKALL %%%u", sp->hdr.serial); - } while (sp->hdr.flags & RXRPC_JUMBO_PACKET); + rxrpc_rotate_tx_window(call, call->tx_top, &summary); + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + rxrpc_end_tx_phase(call, false, "ETL"); +} - rxrpc_fast_process_packet(call, jumbo); - _leave(""); - return; +/* + * Process an ABORT packet. + */ +static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + __be32 wtmp; + u32 abort_code = RX_CALL_DEAD; -protocol_error: - _debug("protocol error"); - rxrpc_free_skb(part); - rxrpc_free_skb(jumbo); - write_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_PROTOCOL_ERROR; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - rxrpc_queue_call(call); - } - write_unlock_bh(&call->state_lock); - _leave(""); + _enter(""); + + if (skb->len >= 4 && + skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) >= 0) + abort_code = ntohl(wtmp); + + _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); + + if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, ECONNABORTED)) + rxrpc_notify_socket(call); } /* - * post an incoming packet to the appropriate call/socket to deal with - * - must get rid of the sk_buff, either by freeing it or by queuing it + * Process an incoming call packet. */ -static void rxrpc_post_packet_to_call(struct rxrpc_call *call, - struct sk_buff *skb) +static void rxrpc_input_call_packet(struct rxrpc_call *call, + struct sk_buff *skb, u16 skew) { - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); _enter("%p,%p", call, skb); - sp = rxrpc_skb(skb); + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_DATA: + rxrpc_input_data(call, skb, skew); + break; - _debug("extant call [%d]", call->state); + case RXRPC_PACKET_TYPE_ACK: + rxrpc_input_ack(call, skb, skew); + break; + + case RXRPC_PACKET_TYPE_BUSY: + _proto("Rx BUSY %%%u", sp->hdr.serial); + + /* Just ignore BUSY packets from the server; the retry and + * lifespan timers will take care of business. BUSY packets + * from the client don't make sense. + */ + break; + + case RXRPC_PACKET_TYPE_ABORT: + rxrpc_input_abort(call, skb); + break; + + case RXRPC_PACKET_TYPE_ACKALL: + rxrpc_input_ackall(call, skb); + break; - read_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - rxrpc_queue_call(call); - goto free_unlock; - } - case RXRPC_CALL_REMOTELY_ABORTED: - case RXRPC_CALL_NETWORK_ERROR: - case RXRPC_CALL_DEAD: - goto dead_call; - case RXRPC_CALL_COMPLETE: - case RXRPC_CALL_CLIENT_FINAL_ACK: - /* complete server call */ - if (rxrpc_conn_is_service(call->conn)) - goto dead_call; - /* resend last packet of a completed call */ - _debug("final ack again"); - rxrpc_get_call(call); - set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_queue_call(call); - goto free_unlock; default: + _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial); break; } - read_unlock(&call->state_lock); - rxrpc_get_call(call); - - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - sp->hdr.flags & RXRPC_JUMBO_PACKET) - rxrpc_process_jumbo_packet(call, skb); - else - rxrpc_fast_process_packet(call, skb); - - rxrpc_put_call(call); - goto done; - -dead_call: - if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { - skb->priority = RX_CALL_DEAD; - rxrpc_reject_packet(call->conn->params.local, skb); - goto unlock; - } -free_unlock: - rxrpc_free_skb(skb); -unlock: - read_unlock(&call->state_lock); -done: _leave(""); } /* * post connection-level events to the connection - * - this includes challenges, responses and some aborts + * - this includes challenges, responses, some aborts and call terminal packet + * retransmission. */ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, struct sk_buff *skb) @@ -595,6 +965,17 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, } /* + * put a packet up for transport-level abort + */ +static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) +{ + CHECK_SLAB_OKAY(&local->usage); + + skb_queue_tail(&local->reject_queue, skb); + rxrpc_queue_local(local); +} + +/* * Extract the wire header from a packet and translate the byte order. */ static noinline @@ -605,8 +986,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) /* dig out the RxRPC connection details */ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) return -EBADMSG; - if (!pskb_pull(skb, sizeof(whdr))) - BUG(); memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); @@ -631,19 +1010,22 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. */ -void rxrpc_data_ready(struct sock *sk) +void rxrpc_data_ready(struct sock *udp_sk) { struct rxrpc_connection *conn; + struct rxrpc_channel *chan; + struct rxrpc_call *call; struct rxrpc_skb_priv *sp; - struct rxrpc_local *local = sk->sk_user_data; + struct rxrpc_local *local = udp_sk->sk_user_data; struct sk_buff *skb; - int ret; + unsigned int channel; + int ret, skew; - _enter("%p", sk); + _enter("%p", udp_sk); ASSERT(!irqs_disabled()); - skb = skb_recv_datagram(sk, 0, 1, &ret); + skb = skb_recv_datagram(udp_sk, 0, 1, &ret); if (!skb) { if (ret == -EAGAIN) return; @@ -651,13 +1033,13 @@ void rxrpc_data_ready(struct sock *sk) return; } - rxrpc_new_skb(skb); + rxrpc_new_skb(skb, rxrpc_skb_rx_received); _net("recv skb %p", skb); /* we'll probably need to checksum it (didn't call sock_recvmsg) */ if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); _leave(" [CSUM failed]"); return; @@ -671,13 +1053,21 @@ void rxrpc_data_ready(struct sock *sk) skb_orphan(skb); sp = rxrpc_skb(skb); - _net("Rx UDP packet from %08x:%04hu", - ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source)); - /* dig out the RxRPC connection details */ if (rxrpc_extract_header(sp, skb) < 0) goto bad_message; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + trace_rxrpc_rx_lose(sp); + rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); + return; + } + } + + trace_rxrpc_rx_packet(sp); + _net("Rx RxRPC %s ep=%x call=%x:%x", sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient", sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber); @@ -688,70 +1078,125 @@ void rxrpc_data_ready(struct sock *sk) goto bad_message; } - if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) { + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_VERSION: rxrpc_post_packet_to_local(local, skb); goto out; - } - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - (sp->hdr.callNumber == 0 || sp->hdr.seq == 0)) - goto bad_message; + case RXRPC_PACKET_TYPE_BUSY: + if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) + goto discard; + + case RXRPC_PACKET_TYPE_DATA: + if (sp->hdr.callNumber == 0) + goto bad_message; + if (sp->hdr.flags & RXRPC_JUMBO_PACKET && + !rxrpc_validate_jumbo(skb)) + goto bad_message; + break; + } rcu_read_lock(); conn = rxrpc_find_connection_rcu(local, skb); - if (!conn) - goto cant_route_call; + if (conn) { + if (sp->hdr.securityIndex != conn->security_ix) + goto wrong_security; + + if (sp->hdr.callNumber == 0) { + /* Connection-level packet */ + _debug("CONN %p {%d}", conn, conn->debug_id); + rxrpc_post_packet_to_conn(conn, skb); + goto out_unlock; + } + + /* Note the serial number skew here */ + skew = (int)sp->hdr.serial - (int)conn->hi_serial; + if (skew >= 0) { + if (skew > 0) + conn->hi_serial = sp->hdr.serial; + } else { + skew = -skew; + skew = min(skew, 65535); + } - if (sp->hdr.callNumber == 0) { - /* Connection-level packet */ - _debug("CONN %p {%d}", conn, conn->debug_id); - rxrpc_post_packet_to_conn(conn, skb); - } else { /* Call-bound packets are routed by connection channel. */ - unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK; - struct rxrpc_channel *chan = &conn->channels[channel]; - struct rxrpc_call *call = rcu_dereference(chan->call); + channel = sp->hdr.cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; + + /* Ignore really old calls */ + if (sp->hdr.callNumber < chan->last_call) + goto discard_unlock; + + if (sp->hdr.callNumber == chan->last_call) { + /* For the previous service call, if completed successfully, we + * discard all further packets. + */ + if (rxrpc_conn_is_service(conn) && + (chan->last_type == RXRPC_PACKET_TYPE_ACK || + sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)) + goto discard_unlock; + + /* But otherwise we need to retransmit the final packet from + * data cached in the connection record. + */ + rxrpc_post_packet_to_conn(conn, skb); + goto out_unlock; + } - if (!call || atomic_read(&call->usage) == 0) - goto cant_route_call; + call = rcu_dereference(chan->call); + } else { + skew = 0; + call = NULL; + } - rxrpc_post_packet_to_call(call, skb); + if (!call || atomic_read(&call->usage) == 0) { + if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) || + sp->hdr.callNumber == 0 || + sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + goto bad_message_unlock; + if (sp->hdr.seq != 1) + goto discard_unlock; + call = rxrpc_new_incoming_call(local, conn, skb); + if (!call) { + rcu_read_unlock(); + goto reject_packet; + } + rxrpc_send_ping(call, skb, skew); } + rxrpc_input_call_packet(call, skb, skew); + goto discard_unlock; + +discard_unlock: rcu_read_unlock(); +discard: + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); out: + trace_rxrpc_rx_done(0, 0); return; -cant_route_call: +out_unlock: rcu_read_unlock(); + goto out; - _debug("can't route call"); - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && - sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { - if (sp->hdr.seq == 1) { - _debug("first packet"); - skb_queue_tail(&local->accept_queue, skb); - rxrpc_queue_work(&local->processor); - _leave(" [incoming]"); - return; - } - skb->priority = RX_INVALID_OPERATION; - } else { - skb->priority = RX_CALL_DEAD; - } - - if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { - _debug("reject type %d",sp->hdr.type); - rxrpc_reject_packet(local, skb); - } else { - rxrpc_free_skb(skb); - } - _leave(" [no call]"); - return; +wrong_security: + rcu_read_unlock(); + trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RXKADINCONSISTENCY, EBADMSG); + skb->priority = RXKADINCONSISTENCY; + goto post_abort; +bad_message_unlock: + rcu_read_unlock(); bad_message: + trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); skb->priority = RX_PROTOCOL_ERROR; +post_abort: + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; +reject_packet: + trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); _leave(" [badmsg]"); } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index c21ad213b337..7d4375e557e6 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -23,31 +23,36 @@ static int none_prime_packet_security(struct rxrpc_connection *conn) } static int none_secure_packet(struct rxrpc_call *call, - struct sk_buff *skb, - size_t data_size, - void *sechdr) + struct sk_buff *skb, + size_t data_size, + void *sechdr) { return 0; } -static int none_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq, u16 expected_cksum) { return 0; } +static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ +} + static int none_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + u32 *_abort_code) { *_abort_code = RX_PROTOCOL_ERROR; return -EPROTO; } static int none_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + u32 *_abort_code) { *_abort_code = RX_PROTOCOL_ERROR; return -EPROTO; @@ -78,6 +83,7 @@ const struct rxrpc_security rxrpc_no_security = { .prime_packet_security = none_prime_packet_security, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, + .locate_data = none_locate_data, .respond_to_challenge = none_respond_to_challenge, .verify_response = none_verify_response, .clear = none_clear, diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 31a3f86ef2f6..540d3955c1bc 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -15,8 +15,6 @@ #include <linux/net.h> #include <linux/skbuff.h> #include <linux/slab.h> -#include <linux/udp.h> -#include <linux/ip.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <generated/utsrelease.h> @@ -33,7 +31,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, { struct rxrpc_wire_header whdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct sockaddr_in sin; + struct sockaddr_rxrpc srx; struct msghdr msg; struct kvec iov[2]; size_t len; @@ -41,12 +39,11 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, _enter(""); - sin.sin_family = AF_INET; - sin.sin_port = udp_hdr(skb)->source; - sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) + return; - msg.msg_name = &sin; - msg.msg_namelen = sizeof(sin); + msg.msg_name = &srx.transport; + msg.msg_namelen = srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -93,11 +90,13 @@ void rxrpc_process_local_events(struct rxrpc_local *local) if (skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); _debug("{%d},{%u}", local->debug_id, sp->hdr.type); switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: - if (skb_copy_bits(skb, 0, &v, 1) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &v, 1) < 0) return; _proto("Rx VERSION { %02x }", v); if (v == 0) @@ -109,7 +108,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) break; } - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); } _leave(""); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index a753796fbe8f..ff4864d550b8 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -58,6 +58,17 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, memcmp(&local->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + /* If the choice of UDP6 port is left up to the transport, then + * the endpoint record doesn't match. + */ + return ((u16 __force)local->srx.transport.sin6.sin6_port - + (u16 __force)srx->transport.sin6.sin6_port) ?: + memcmp(&local->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)); +#endif default: BUG(); } @@ -75,9 +86,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) atomic_set(&local->usage, 1); INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); - INIT_LIST_HEAD(&local->services); init_rwsem(&local->defrag_sem); - skb_queue_head_init(&local->accept_queue); skb_queue_head_init(&local->reject_queue); skb_queue_head_init(&local->event_queue); local->client_conns = RB_ROOT; @@ -101,11 +110,12 @@ static int rxrpc_open_socket(struct rxrpc_local *local) struct sock *sock; int ret, opt; - _enter("%p{%d}", local, local->srx.transport_type); + _enter("%p{%d,%d}", + local, local->srx.transport_type, local->srx.transport.family); /* create a socket to represent the local endpoint */ - ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type, - IPPROTO_UDP, &local->socket); + ret = sock_create_kern(&init_net, local->srx.transport.family, + local->srx.transport_type, 0, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); return ret; @@ -170,18 +180,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) long diff; int ret; - if (srx->transport.family == AF_INET) { - _enter("{%d,%u,%pI4+%hu}", - srx->transport_type, - srx->transport.family, - &srx->transport.sin.sin_addr, - ntohs(srx->transport.sin.sin_port)); - } else { - _enter("{%d,%u}", - srx->transport_type, - srx->transport.family); - return ERR_PTR(-EAFNOSUPPORT); - } + _enter("{%d,%d,%pISp}", + srx->transport_type, srx->transport.family, &srx->transport); mutex_lock(&rxrpc_local_mutex); @@ -234,13 +234,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) found: mutex_unlock(&rxrpc_local_mutex); - _net("LOCAL %s %d {%d,%u,%pI4+%hu}", - age, - local->debug_id, - local->srx.transport_type, - local->srx.transport.family, - &local->srx.transport.sin.sin_addr, - ntohs(local->srx.transport.sin.sin_port)); + _net("LOCAL %s %d {%pISp}", + age, local->debug_id, &local->srx.transport); _leave(" = %p", local); return local; @@ -296,7 +291,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) mutex_unlock(&rxrpc_local_mutex); ASSERT(RB_EMPTY_ROOT(&local->client_conns)); - ASSERT(list_empty(&local->services)); + ASSERT(!local->service); if (socket) { local->socket = NULL; @@ -308,7 +303,6 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) /* At this point, there should be no more packets coming in to the * local endpoint. */ - rxrpc_purge_queue(&local->accept_queue); rxrpc_purge_queue(&local->reject_queue); rxrpc_purge_queue(&local->event_queue); @@ -332,11 +326,6 @@ static void rxrpc_local_processor(struct work_struct *work) if (atomic_read(&local->usage) == 0) return rxrpc_local_destroyer(local); - if (!skb_queue_empty(&local->accept_queue)) { - rxrpc_accept_incoming_calls(local); - again = true; - } - if (!skb_queue_empty(&local->reject_queue)) { rxrpc_reject_packets(local); again = true; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index bdc5e42fe600..9d1c721bc4e8 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -21,28 +21,33 @@ unsigned int rxrpc_max_backlog __read_mostly = 10; /* + * Maximum lifetime of a call (in mx). + */ +unsigned int rxrpc_max_call_lifetime = 60 * 1000; + +/* * How long to wait before scheduling ACK generation after seeing a - * packet with RXRPC_REQUEST_ACK set (in jiffies). + * packet with RXRPC_REQUEST_ACK set (in ms). */ unsigned int rxrpc_requested_ack_delay = 1; /* - * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). + * How long to wait before scheduling an ACK with subtype DELAY (in ms). * * We use this when we've received new data packets. If those packets aren't * all consumed within this time we will send a DELAY ACK if an ACK was not * requested to let the sender know it doesn't need to resend. */ -unsigned int rxrpc_soft_ack_delay = 1 * HZ; +unsigned int rxrpc_soft_ack_delay = 1 * 1000; /* - * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). + * How long to wait before scheduling an ACK with subtype IDLE (in ms). * * We use this when we've consumed some previously soft-ACK'd packets when * further packets aren't immediately received to decide when to send an IDLE * ACK let the other end know that it can free up its Tx buffer space. */ -unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; +unsigned int rxrpc_idle_ack_delay = 0.5 * 1000; /* * Receive window size in packets. This indicates the maximum number of @@ -50,7 +55,10 @@ unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further * packets. */ -unsigned int rxrpc_rx_window_size = 32; +unsigned int rxrpc_rx_window_size = RXRPC_INIT_RX_WINDOW_SIZE; +#if (RXRPC_RXTX_BUFF_SIZE - 1) < RXRPC_INIT_RX_WINDOW_SIZE +#error Need to reduce RXRPC_INIT_RX_WINDOW_SIZE +#endif /* * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet @@ -64,6 +72,11 @@ unsigned int rxrpc_rx_mtu = 5692; */ unsigned int rxrpc_rx_jumbo_max = 4; +/* + * Time till packet resend (in milliseconds). + */ +unsigned int rxrpc_resend_timeout = 4 * 1000; + const char *const rxrpc_pkts[] = { "?00", "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG", @@ -75,21 +88,152 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_DELAY] = 1, [RXRPC_ACK_REQUESTED] = 2, [RXRPC_ACK_IDLE] = 3, - [RXRPC_ACK_PING_RESPONSE] = 4, - [RXRPC_ACK_DUPLICATE] = 5, - [RXRPC_ACK_OUT_OF_SEQUENCE] = 6, - [RXRPC_ACK_EXCEEDS_WINDOW] = 7, - [RXRPC_ACK_NOSPACE] = 8, -}; - -const char *rxrpc_acks(u8 reason) -{ - static const char *const str[] = { - "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", - "IDL", "-?-" - }; - - if (reason >= ARRAY_SIZE(str)) - reason = ARRAY_SIZE(str) - 1; - return str[reason]; -} + [RXRPC_ACK_DUPLICATE] = 4, + [RXRPC_ACK_OUT_OF_SEQUENCE] = 5, + [RXRPC_ACK_EXCEEDS_WINDOW] = 6, + [RXRPC_ACK_NOSPACE] = 7, + [RXRPC_ACK_PING_RESPONSE] = 8, + [RXRPC_ACK_PING] = 9, +}; + +const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { + "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", + "IDL", "-?-" +}; + +const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = { + [rxrpc_skb_rx_cleaned] = "Rx CLN", + [rxrpc_skb_rx_freed] = "Rx FRE", + [rxrpc_skb_rx_got] = "Rx GOT", + [rxrpc_skb_rx_lost] = "Rx *L*", + [rxrpc_skb_rx_received] = "Rx RCV", + [rxrpc_skb_rx_purged] = "Rx PUR", + [rxrpc_skb_rx_rotated] = "Rx ROT", + [rxrpc_skb_rx_seen] = "Rx SEE", + [rxrpc_skb_tx_cleaned] = "Tx CLN", + [rxrpc_skb_tx_freed] = "Tx FRE", + [rxrpc_skb_tx_got] = "Tx GOT", + [rxrpc_skb_tx_new] = "Tx NEW", + [rxrpc_skb_tx_rotated] = "Tx ROT", + [rxrpc_skb_tx_seen] = "Tx SEE", +}; + +const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4] = { + [rxrpc_conn_new_client] = "NWc", + [rxrpc_conn_new_service] = "NWs", + [rxrpc_conn_queued] = "QUE", + [rxrpc_conn_seen] = "SEE", + [rxrpc_conn_got] = "GOT", + [rxrpc_conn_put_client] = "PTc", + [rxrpc_conn_put_service] = "PTs", +}; + +const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = { + [rxrpc_client_activate_chans] = "Activa", + [rxrpc_client_alloc] = "Alloc ", + [rxrpc_client_chan_activate] = "ChActv", + [rxrpc_client_chan_disconnect] = "ChDisc", + [rxrpc_client_chan_pass] = "ChPass", + [rxrpc_client_chan_unstarted] = "ChUnst", + [rxrpc_client_cleanup] = "Clean ", + [rxrpc_client_count] = "Count ", + [rxrpc_client_discard] = "Discar", + [rxrpc_client_duplicate] = "Duplic", + [rxrpc_client_exposed] = "Expose", + [rxrpc_client_replace] = "Replac", + [rxrpc_client_to_active] = "->Actv", + [rxrpc_client_to_culled] = "->Cull", + [rxrpc_client_to_idle] = "->Idle", + [rxrpc_client_to_inactive] = "->Inac", + [rxrpc_client_to_waiting] = "->Wait", + [rxrpc_client_uncount] = "Uncoun", +}; + +const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = { + [rxrpc_transmit_wait] = "WAI", + [rxrpc_transmit_queue] = "QUE", + [rxrpc_transmit_queue_last] = "QLS", + [rxrpc_transmit_rotate] = "ROT", + [rxrpc_transmit_rotate_last] = "RLS", + [rxrpc_transmit_await_reply] = "AWR", + [rxrpc_transmit_end] = "END", +}; + +const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4] = { + [rxrpc_receive_incoming] = "INC", + [rxrpc_receive_queue] = "QUE", + [rxrpc_receive_queue_last] = "QLS", + [rxrpc_receive_front] = "FRN", + [rxrpc_receive_rotate] = "ROT", + [rxrpc_receive_end] = "END", +}; + +const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { + [rxrpc_recvmsg_enter] = "ENTR", + [rxrpc_recvmsg_wait] = "WAIT", + [rxrpc_recvmsg_dequeue] = "DEQU", + [rxrpc_recvmsg_hole] = "HOLE", + [rxrpc_recvmsg_next] = "NEXT", + [rxrpc_recvmsg_cont] = "CONT", + [rxrpc_recvmsg_full] = "FULL", + [rxrpc_recvmsg_data_return] = "DATA", + [rxrpc_recvmsg_terminal] = "TERM", + [rxrpc_recvmsg_to_be_accepted] = "TBAC", + [rxrpc_recvmsg_return] = "RETN", +}; + +const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = { + [rxrpc_rtt_tx_ping] = "PING", + [rxrpc_rtt_tx_data] = "DATA", +}; + +const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { + [rxrpc_rtt_rx_ping_response] = "PONG", + [rxrpc_rtt_rx_requested_ack] = "RACK", +}; + +const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { + [rxrpc_timer_begin] = "Begin ", + [rxrpc_timer_expired] = "*EXPR*", + [rxrpc_timer_init_for_reply] = "IniRpl", + [rxrpc_timer_set_for_ack] = "SetAck", + [rxrpc_timer_set_for_send] = "SetTx ", + [rxrpc_timer_set_for_resend] = "SetRTx", +}; + +const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { + [rxrpc_propose_ack_client_tx_end] = "ClTxEnd", + [rxrpc_propose_ack_input_data] = "DataIn ", + [rxrpc_propose_ack_ping_for_lost_ack] = "LostAck", + [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl", + [rxrpc_propose_ack_ping_for_params] = "Params ", + [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", + [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", + [rxrpc_propose_ack_retry_tx] = "RetryTx", + [rxrpc_propose_ack_rotate_rx] = "RxAck ", + [rxrpc_propose_ack_terminal_ack] = "ClTerm ", +}; + +const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes] = { + [rxrpc_propose_ack_use] = "", + [rxrpc_propose_ack_update] = " Update", + [rxrpc_propose_ack_subsume] = " Subsume", +}; + +const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10] = { + [RXRPC_CALL_SLOW_START] = "SlowStart", + [RXRPC_CALL_CONGEST_AVOIDANCE] = "CongAvoid", + [RXRPC_CALL_PACKET_LOSS] = "PktLoss ", + [RXRPC_CALL_FAST_RETRANSMIT] = "FastReTx ", +}; + +const char rxrpc_congest_changes[rxrpc_congest__nr_change][9] = { + [rxrpc_cong_begin_retransmission] = " Retrans", + [rxrpc_cong_cleared_nacks] = " Cleared", + [rxrpc_cong_new_low_nack] = " NewLowN", + [rxrpc_cong_no_change] = "", + [rxrpc_cong_progress] = " Progres", + [rxrpc_cong_retransmit_again] = " ReTxAgn", + [rxrpc_cong_rtt_window_end] = " RttWinE", + [rxrpc_cong_saw_nack] = " SawNack", +}; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f4bda06b7d2d..0d47db886f6e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -14,336 +14,326 @@ #include <linux/net.h> #include <linux/gfp.h> #include <linux/skbuff.h> -#include <linux/circ_buf.h> #include <linux/export.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include "ar-internal.h" -/* - * Time till packet resend (in jiffies). - */ -unsigned int rxrpc_resend_timeout = 4 * HZ; - -static int rxrpc_send_data(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct msghdr *msg, size_t len); +struct rxrpc_pkt_buffer { + struct rxrpc_wire_header whdr; + union { + struct { + struct rxrpc_ackpacket ack; + u8 acks[255]; + u8 pad[3]; + }; + __be32 abort_code; + }; + struct rxrpc_ackinfo ackinfo; +}; /* - * extract control messages from the sendmsg() control buffer + * Fill out an ACK packet. */ -static int rxrpc_sendmsg_cmsg(struct msghdr *msg, - unsigned long *user_call_ID, - enum rxrpc_command *command, - u32 *abort_code, - bool *_exclusive) +static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, + struct rxrpc_pkt_buffer *pkt, + rxrpc_seq_t *_hard_ack, + rxrpc_seq_t *_top) { - struct cmsghdr *cmsg; - bool got_user_ID = false; - int len; - - *command = RXRPC_CMD_SEND_DATA; - - if (msg->msg_controllen == 0) - return -EINVAL; - - for_each_cmsghdr(cmsg, msg) { - if (!CMSG_OK(msg, cmsg)) - return -EINVAL; - - len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); - _debug("CMSG %d, %d, %d", - cmsg->cmsg_level, cmsg->cmsg_type, len); - - if (cmsg->cmsg_level != SOL_RXRPC) - continue; - - switch (cmsg->cmsg_type) { - case RXRPC_USER_CALL_ID: - if (msg->msg_flags & MSG_CMSG_COMPAT) { - if (len != sizeof(u32)) - return -EINVAL; - *user_call_ID = *(u32 *) CMSG_DATA(cmsg); - } else { - if (len != sizeof(unsigned long)) - return -EINVAL; - *user_call_ID = *(unsigned long *) - CMSG_DATA(cmsg); - } - _debug("User Call ID %lx", *user_call_ID); - got_user_ID = true; - break; - - case RXRPC_ABORT: - if (*command != RXRPC_CMD_SEND_DATA) - return -EINVAL; - *command = RXRPC_CMD_SEND_ABORT; - if (len != sizeof(*abort_code)) - return -EINVAL; - *abort_code = *(unsigned int *) CMSG_DATA(cmsg); - _debug("Abort %x", *abort_code); - if (*abort_code == 0) - return -EINVAL; - break; - - case RXRPC_ACCEPT: - if (*command != RXRPC_CMD_SEND_DATA) - return -EINVAL; - *command = RXRPC_CMD_ACCEPT; - if (len != 0) - return -EINVAL; - break; - - case RXRPC_EXCLUSIVE_CALL: - *_exclusive = true; - if (len != 0) - return -EINVAL; - break; - default: - return -EINVAL; - } + rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top, seq; + int ix; + u32 mtu, jmax; + u8 *ackp = pkt->acks; + + /* Barrier against rxrpc_input_data(). */ + serial = call->ackr_serial; + hard_ack = READ_ONCE(call->rx_hard_ack); + top = smp_load_acquire(&call->rx_top); + *_hard_ack = hard_ack; + *_top = top; + + pkt->ack.bufferSpace = htons(8); + pkt->ack.maxSkew = htons(call->ackr_skew); + pkt->ack.firstPacket = htonl(hard_ack + 1); + pkt->ack.previousPacket = htonl(call->ackr_prev_seq); + pkt->ack.serial = htonl(serial); + pkt->ack.reason = call->ackr_reason; + pkt->ack.nAcks = top - hard_ack; + + if (pkt->ack.reason == RXRPC_ACK_PING) + pkt->whdr.flags |= RXRPC_REQUEST_ACK; + + if (after(top, hard_ack)) { + seq = hard_ack + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + if (call->rxtx_buffer[ix]) + *ackp++ = RXRPC_ACK_TYPE_ACK; + else + *ackp++ = RXRPC_ACK_TYPE_NACK; + seq++; + } while (before_eq(seq, top)); } - if (!got_user_ID) - return -EINVAL; - _leave(" = 0"); - return 0; + mtu = call->conn->params.peer->if_mtu; + mtu -= call->conn->params.peer->hdrsize; + jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; + pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); + pkt->ackinfo.maxMTU = htonl(mtu); + pkt->ackinfo.rwind = htonl(call->rx_winsize); + pkt->ackinfo.jumbo_max = htonl(jmax); + + *ackp++ = 0; + *ackp++ = 0; + *ackp++ = 0; + return top - hard_ack + 3; } /* - * abort a call, sending an ABORT packet to the peer + * Send an ACK or ABORT call packet. */ -static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) +int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) { - write_lock_bh(&call->state_lock); - - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = abort_code; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - rxrpc_queue_call(call); + struct rxrpc_connection *conn = NULL; + struct rxrpc_pkt_buffer *pkt; + struct msghdr msg; + struct kvec iov[2]; + rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top; + size_t len, n; + bool ping = false; + int ioc, ret; + u32 abort_code; + + _enter("%u,%s", call->debug_id, rxrpc_pkts[type]); + + spin_lock_bh(&call->lock); + if (call->conn) + conn = rxrpc_get_connection_maybe(call->conn); + spin_unlock_bh(&call->lock); + if (!conn) + return -ECONNRESET; + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) { + rxrpc_put_connection(conn); + return -ENOMEM; } - write_unlock_bh(&call->state_lock); -} - -/* - * Create a new client call for sendmsg(). - */ -static struct rxrpc_call * -rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, - unsigned long user_call_ID, bool exclusive) -{ - struct rxrpc_conn_parameters cp; - struct rxrpc_call *call; - struct key *key; - - DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name); + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt->whdr.epoch = htonl(conn->proto.epoch); + pkt->whdr.cid = htonl(call->cid); + pkt->whdr.callNumber = htonl(call->call_id); + pkt->whdr.seq = 0; + pkt->whdr.type = type; + pkt->whdr.flags = conn->out_clientflag; + pkt->whdr.userStatus = 0; + pkt->whdr.securityIndex = call->security_ix; + pkt->whdr._rsvd = 0; + pkt->whdr.serviceId = htons(call->service_id); + + iov[0].iov_base = pkt; + iov[0].iov_len = sizeof(pkt->whdr); + len = sizeof(pkt->whdr); + + switch (type) { + case RXRPC_PACKET_TYPE_ACK: + spin_lock_bh(&call->lock); + if (!call->ackr_reason) { + spin_unlock_bh(&call->lock); + ret = 0; + goto out; + } + ping = (call->ackr_reason == RXRPC_ACK_PING); + n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); + call->ackr_reason = 0; - _enter(""); + spin_unlock_bh(&call->lock); - if (!msg->msg_name) - return ERR_PTR(-EDESTADDRREQ); - key = rx->key; - if (key && !rx->key->payload.data[0]) - key = NULL; + pkt->whdr.flags |= RXRPC_SLOW_START_OK; - memset(&cp, 0, sizeof(cp)); - cp.local = rx->local; - cp.key = rx->key; - cp.security_level = rx->min_sec_level; - cp.exclusive = rx->exclusive | exclusive; - cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); + iov[0].iov_len += sizeof(pkt->ack) + n; + iov[1].iov_base = &pkt->ackinfo; + iov[1].iov_len = sizeof(pkt->ackinfo); + len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo); + ioc = 2; + break; - _leave(" = %p\n", call); - return call; -} + case RXRPC_PACKET_TYPE_ABORT: + abort_code = call->abort_code; + pkt->abort_code = htonl(abort_code); + iov[0].iov_len += sizeof(pkt->abort_code); + len += sizeof(pkt->abort_code); + ioc = 1; + break; -/* - * send a message forming part of a client call through an RxRPC socket - * - caller holds the socket locked - * - the socket may be either a client socket or a server socket - */ -int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) -{ - enum rxrpc_command cmd; - struct rxrpc_call *call; - unsigned long user_call_ID = 0; - bool exclusive = false; - u32 abort_code = 0; - int ret; - - _enter(""); - - ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, - &exclusive); - if (ret < 0) - return ret; - - if (cmd == RXRPC_CMD_ACCEPT) { - if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) - return -EINVAL; - call = rxrpc_accept_call(rx, user_call_ID); - if (IS_ERR(call)) - return PTR_ERR(call); - rxrpc_put_call(call); - return 0; + default: + BUG(); + ret = -ENOANO; + goto out; } - call = rxrpc_find_call_by_user_ID(rx, user_call_ID); - if (!call) { - if (cmd != RXRPC_CMD_SEND_DATA) - return -EBADSLT; - call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, - exclusive); - if (IS_ERR(call)) - return PTR_ERR(call); + serial = atomic_inc_return(&conn->serial); + pkt->whdr.serial = htonl(serial); + switch (type) { + case RXRPC_PACKET_TYPE_ACK: + trace_rxrpc_tx_ack(call, serial, + ntohl(pkt->ack.firstPacket), + ntohl(pkt->ack.serial), + pkt->ack.reason, pkt->ack.nAcks); + break; } - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - if (call->state >= RXRPC_CALL_COMPLETE) { - /* it's too late for this call */ - ret = -ECONNRESET; - } else if (cmd == RXRPC_CMD_SEND_ABORT) { - rxrpc_send_abort(call, abort_code); - ret = 0; - } else if (cmd != RXRPC_CMD_SEND_DATA) { - ret = -EINVAL; - } else if (!call->in_clientflag && - call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) { - /* request phase complete for this client call */ - ret = -EPROTO; - } else if (call->in_clientflag && - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { - /* Reply phase not begun or not complete for service call. */ - ret = -EPROTO; - } else { - ret = rxrpc_send_data(rx, call, msg, len); + if (ping) { + call->ackr_ping = serial; + smp_wmb(); + /* We need to stick a time in before we send the packet in case + * the reply gets back before kernel_sendmsg() completes - but + * asking UDP to send the packet can take a relatively long + * time, so we update the time after, on the assumption that + * the packet transmission is more likely to happen towards the + * end of the kernel_sendmsg() call. + */ + call->ackr_ping_time = ktime_get_real(); + set_bit(RXRPC_CALL_PINGING, &call->flags); + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); } - - rxrpc_put_call(call); - _leave(" = %d", ret); - return ret; -} - -/** - * rxrpc_kernel_send_data - Allow a kernel service to send data on a call - * @call: The call to send data through - * @msg: The data to send - * @len: The amount of data to send - * - * Allow a kernel service to send data on a call. The call must be in an state - * appropriate to sending data. No control data should be supplied in @msg, - * nor should an address be supplied. MSG_MORE should be flagged if there's - * more data to come, otherwise this data will end the transmission phase. - */ -int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, - size_t len) -{ - int ret; - - _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); - - ASSERTCMP(msg->msg_name, ==, NULL); - ASSERTCMP(msg->msg_control, ==, NULL); - - lock_sock(&call->socket->sk); - - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - if (call->state >= RXRPC_CALL_COMPLETE) { - ret = -ESHUTDOWN; /* it's too late for this call */ - } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { - ret = -EPROTO; /* request phase complete for this client call */ - } else { - ret = rxrpc_send_data(call->socket, call, msg, len); + ret = kernel_sendmsg(conn->params.local->socket, + &msg, iov, ioc, len); + if (ping) + call->ackr_ping_time = ktime_get_real(); + + if (type == RXRPC_PACKET_TYPE_ACK && + call->state < RXRPC_CALL_COMPLETE) { + if (ret < 0) { + clear_bit(RXRPC_CALL_PINGING, &call->flags); + rxrpc_propose_ACK(call, pkt->ack.reason, + ntohs(pkt->ack.maxSkew), + ntohl(pkt->ack.serial), + true, true, + rxrpc_propose_ack_retry_tx); + } else { + spin_lock_bh(&call->lock); + if (after(hard_ack, call->ackr_consumed)) + call->ackr_consumed = hard_ack; + if (after(top, call->ackr_seen)) + call->ackr_seen = top; + spin_unlock_bh(&call->lock); + } } - release_sock(&call->socket->sk); - _leave(" = %d", ret); +out: + rxrpc_put_connection(conn); + kfree(pkt); return ret; } -EXPORT_SYMBOL(rxrpc_kernel_send_data); - -/** - * rxrpc_kernel_abort_call - Allow a kernel service to abort a call - * @call: The call to be aborted - * @abort_code: The abort code to stick into the ABORT packet - * - * Allow a kernel service to abort a call, if it's still in an abortable state. - */ -void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code) -{ - _enter("{%d},%d", call->debug_id, abort_code); - - lock_sock(&call->socket->sk); - - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - if (call->state < RXRPC_CALL_COMPLETE) - rxrpc_send_abort(call, abort_code); - - release_sock(&call->socket->sk); - _leave(""); -} - -EXPORT_SYMBOL(rxrpc_kernel_abort_call); - /* * send a packet through the transport endpoint */ -int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb) +int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, + bool retrans) { - struct kvec iov[1]; + struct rxrpc_connection *conn = call->conn; + struct rxrpc_wire_header whdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct msghdr msg; + struct kvec iov[2]; + rxrpc_serial_t serial; + size_t len; + bool lost = false; int ret, opt; _enter(",{%d}", skb->len); - iov[0].iov_base = skb->head; - iov[0].iov_len = skb->len; + /* Each transmission of a Tx packet needs a new serial number */ + serial = atomic_inc_return(&conn->serial); + + whdr.epoch = htonl(conn->proto.epoch); + whdr.cid = htonl(call->cid); + whdr.callNumber = htonl(call->call_id); + whdr.seq = htonl(sp->hdr.seq); + whdr.serial = htonl(serial); + whdr.type = RXRPC_PACKET_TYPE_DATA; + whdr.flags = sp->hdr.flags; + whdr.userStatus = 0; + whdr.securityIndex = call->security_ix; + whdr._rsvd = htons(sp->hdr._rsvd); + whdr.serviceId = htons(call->service_id); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = skb->head; + iov[1].iov_len = skb->len; + len = iov[0].iov_len + iov[1].iov_len; - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; - /* send the packet with the don't fragment bit set if we currently - * think it's small enough */ - if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) { - down_read(&conn->params.local->defrag_sem); - /* send the packet by UDP - * - returns -EMSGSIZE if UDP would have to fragment the packet - * to go out of the interface - * - in which case, we'll have processed the ICMP error - * message and update the peer record - */ - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, - iov[0].iov_len); + /* If our RTT cache needs working on, request an ACK. Also request + * ACKs if a DATA packet appears to have been lost. + */ + if (retrans || + call->cong_mode == RXRPC_CALL_SLOW_START || + (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real())) + whdr.flags |= RXRPC_REQUEST_ACK; + + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + ret = 0; + lost = true; + goto done; + } + } - up_read(&conn->params.local->defrag_sem); - if (ret == -EMSGSIZE) - goto send_fragmentable; + _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq); - _leave(" = %d [%u]", ret, conn->params.peer->maxdata); - return ret; + /* send the packet with the don't fragment bit set if we currently + * think it's small enough */ + if (iov[1].iov_len >= call->peer->maxdata) + goto send_fragmentable; + + down_read(&conn->params.local->defrag_sem); + /* send the packet by UDP + * - returns -EMSGSIZE if UDP would have to fragment the packet + * to go out of the interface + * - in which case, we'll have processed the ICMP error + * message and update the peer record + */ + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); + + up_read(&conn->params.local->defrag_sem); + if (ret == -EMSGSIZE) + goto send_fragmentable; + +done: + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, + retrans, lost); + if (ret >= 0) { + ktime_t now = ktime_get_real(); + skb->tstamp = now; + smp_wmb(); + sp->hdr.serial = serial; + if (whdr.flags & RXRPC_REQUEST_ACK) { + call->peer->rtt_last_req = now; + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); + } } + _leave(" = %d [%u]", ret, call->peer->maxdata); + return ret; send_fragmentable: /* attempt to send this message with fragmentation enabled */ @@ -358,8 +348,8 @@ send_fragmentable: SOL_IP, IP_MTU_DISCOVER, (char *)&opt, sizeof(opt)); if (ret == 0) { - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, - iov[0].iov_len); + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 2, len); opt = IP_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, SOL_IP, @@ -367,355 +357,82 @@ send_fragmentable: (char *)&opt, sizeof(opt)); } break; - } - - up_write(&conn->params.local->defrag_sem); - _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata); - return ret; -} -/* - * wait for space to appear in the transmit/ACK window - * - caller holds the socket locked - */ -static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, - struct rxrpc_call *call, - long *timeo) -{ - DECLARE_WAITQUEUE(myself, current); - int ret; - - _enter(",{%d},%ld", - CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz), - *timeo); - - add_wait_queue(&call->tx_waitq, &myself); - - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - ret = 0; - if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 0) - break; - if (signal_pending(current)) { - ret = sock_intr_errno(*timeo); - break; - } - - release_sock(&rx->sk); - *timeo = schedule_timeout(*timeo); - lock_sock(&rx->sk); - } - - remove_wait_queue(&call->tx_waitq, &myself); - set_current_state(TASK_RUNNING); - _leave(" = %d", ret); - return ret; -} - -/* - * attempt to schedule an instant Tx resend - */ -static inline void rxrpc_instant_resend(struct rxrpc_call *call) -{ - read_lock_bh(&call->state_lock); - if (try_to_del_timer_sync(&call->resend_timer) >= 0) { - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_queue_call(call); - } - read_unlock_bh(&call->state_lock); -} +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + opt = IPV6_PMTUDISC_DONT; + ret = kernel_setsockopt(conn->params.local->socket, + SOL_IPV6, IPV6_MTU_DISCOVER, + (char *)&opt, sizeof(opt)); + if (ret == 0) { + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 1, iov[0].iov_len); -/* - * queue a packet for transmission, set the resend timer and attempt - * to send the packet immediately - */ -static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, - bool last) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int ret; - - _net("queue skb %p [%d]", skb, call->acks_head); - - ASSERT(call->acks_window != NULL); - call->acks_window[call->acks_head] = (unsigned long) skb; - smp_wmb(); - call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1); - - if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { - _debug("________awaiting reply/ACK__________"); - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; - break; - case RXRPC_CALL_SERVER_ACK_REQUEST: - call->state = RXRPC_CALL_SERVER_SEND_REPLY; - if (!last) - break; - case RXRPC_CALL_SERVER_SEND_REPLY: - call->state = RXRPC_CALL_SERVER_AWAIT_ACK; - break; - default: - break; + opt = IPV6_PMTUDISC_DO; + kernel_setsockopt(conn->params.local->socket, + SOL_IPV6, IPV6_MTU_DISCOVER, + (char *)&opt, sizeof(opt)); } - write_unlock_bh(&call->state_lock); - } - - _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - - sp->need_resend = false; - sp->resend_at = jiffies + rxrpc_resend_timeout; - if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) { - _debug("run timer"); - call->resend_timer.expires = sp->resend_at; - add_timer(&call->resend_timer); - } - - /* attempt to cancel the rx-ACK timer, deferring reply transmission if - * we're ACK'ing the request phase of an incoming call */ - ret = -EAGAIN; - if (try_to_del_timer_sync(&call->ack_timer) >= 0) { - /* the packet may be freed by rxrpc_process_call() before this - * returns */ - ret = rxrpc_send_data_packet(call->conn, skb); - _net("sent skb %p", skb); - } else { - _debug("failed to delete ACK timer"); - } - - if (ret < 0) { - _debug("need instant resend %d", ret); - sp->need_resend = true; - rxrpc_instant_resend(call); + break; +#endif } - _leave(""); -} - -/* - * Convert a host-endian header into a network-endian header. - */ -static void rxrpc_insert_header(struct sk_buff *skb) -{ - struct rxrpc_wire_header whdr; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = htonl(sp->hdr.serial); - whdr.type = sp->hdr.type; - whdr.flags = sp->hdr.flags; - whdr.userStatus = sp->hdr.userStatus; - whdr.securityIndex = sp->hdr.securityIndex; - whdr._rsvd = htons(sp->hdr._rsvd); - whdr.serviceId = htons(sp->hdr.serviceId); - - memcpy(skb->head, &whdr, sizeof(whdr)); + up_write(&conn->params.local->defrag_sem); + goto done; } /* - * send data through a socket - * - must be called in process context - * - caller holds the socket locked + * reject packets through the local endpoint */ -static int rxrpc_send_data(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct msghdr *msg, size_t len) +void rxrpc_reject_packets(struct rxrpc_local *local) { + struct sockaddr_rxrpc srx; struct rxrpc_skb_priv *sp; + struct rxrpc_wire_header whdr; struct sk_buff *skb; - struct sock *sk = &rx->sk; - long timeo; - bool more; - int ret, copied; - - timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - - /* this should be in poll */ - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - return -EPIPE; - - more = msg->msg_flags & MSG_MORE; - - skb = call->tx_pending; - call->tx_pending = NULL; - - copied = 0; - do { - if (!skb) { - size_t size, chunk, max, space; - - _debug("alloc"); - - if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) <= 0) { - ret = -EAGAIN; - if (msg->msg_flags & MSG_DONTWAIT) - goto maybe_error; - ret = rxrpc_wait_for_tx_window(rx, call, - &timeo); - if (ret < 0) - goto maybe_error; - } - - max = call->conn->params.peer->maxdata; - max -= call->conn->security_size; - max &= ~(call->conn->size_align - 1UL); - - chunk = max; - if (chunk > msg_data_left(msg) && !more) - chunk = msg_data_left(msg); - - space = chunk + call->conn->size_align; - space &= ~(call->conn->size_align - 1UL); - - size = space + call->conn->header_size; - - _debug("SIZE: %zu/%zu/%zu", chunk, space, size); - - /* create a buffer that we can retain until it's ACK'd */ - skb = sock_alloc_send_skb( - sk, size, msg->msg_flags & MSG_DONTWAIT, &ret); - if (!skb) - goto maybe_error; - - rxrpc_new_skb(skb); - - _debug("ALLOC SEND %p", skb); - - ASSERTCMP(skb->mark, ==, 0); + struct msghdr msg; + struct kvec iov[2]; + size_t size; + __be32 code; - _debug("HS: %u", call->conn->header_size); - skb_reserve(skb, call->conn->header_size); - skb->len += call->conn->header_size; + _enter("%d", local->debug_id); - sp = rxrpc_skb(skb); - sp->remain = chunk; - if (sp->remain > skb_tailroom(skb)) - sp->remain = skb_tailroom(skb); + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = &code; + iov[1].iov_len = sizeof(code); + size = sizeof(whdr) + sizeof(code); - _net("skb: hr %d, tr %d, hl %d, rm %d", - skb_headroom(skb), - skb_tailroom(skb), - skb_headlen(skb), - sp->remain); + msg.msg_name = &srx.transport; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; - skb->ip_summed = CHECKSUM_UNNECESSARY; - } + memset(&whdr, 0, sizeof(whdr)); + whdr.type = RXRPC_PACKET_TYPE_ABORT; - _debug("append"); + while ((skb = skb_dequeue(&local->reject_queue))) { + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); - /* append next segment of data to the current buffer */ - if (msg_data_left(msg) > 0) { - int copy = skb_tailroom(skb); - ASSERTCMP(copy, >, 0); - if (copy > msg_data_left(msg)) - copy = msg_data_left(msg); - if (copy > sp->remain) - copy = sp->remain; - - _debug("add"); - ret = skb_add_data(skb, &msg->msg_iter, copy); - _debug("added"); - if (ret < 0) - goto efault; - sp->remain -= copy; - skb->mark += copy; - copied += copy; - } + if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { + msg.msg_namelen = srx.transport_len; - /* check for the far side aborting the call or a network error - * occurring */ - if (call->state > RXRPC_CALL_COMPLETE) - goto call_aborted; - - /* add the packet to the send queue if it's now full */ - if (sp->remain <= 0 || - (msg_data_left(msg) == 0 && !more)) { - struct rxrpc_connection *conn = call->conn; - uint32_t seq; - size_t pad; - - /* pad out if we're using security */ - if (conn->security_ix) { - pad = conn->security_size + skb->mark; - pad = conn->size_align - pad; - pad &= conn->size_align - 1; - _debug("pad %zu", pad); - if (pad) - memset(skb_put(skb, pad), 0, pad); - } - - seq = atomic_inc_return(&call->sequence); - - sp->hdr.epoch = conn->proto.epoch; - sp->hdr.cid = call->cid; - sp->hdr.callNumber = call->call_id; - sp->hdr.seq = seq; - sp->hdr.serial = atomic_inc_return(&conn->serial); - sp->hdr.type = RXRPC_PACKET_TYPE_DATA; - sp->hdr.userStatus = 0; - sp->hdr.securityIndex = conn->security_ix; - sp->hdr._rsvd = 0; - sp->hdr.serviceId = call->service_id; - - sp->hdr.flags = conn->out_clientflag; - if (msg_data_left(msg) == 0 && !more) - sp->hdr.flags |= RXRPC_LAST_PACKET; - else if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 1) - sp->hdr.flags |= RXRPC_MORE_PACKETS; - if (more && seq & 1) - sp->hdr.flags |= RXRPC_REQUEST_ACK; - - ret = conn->security->secure_packet( - call, skb, skb->mark, - skb->head + sizeof(struct rxrpc_wire_header)); - if (ret < 0) - goto out; - - rxrpc_insert_header(skb); - rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); - skb = NULL; - } - } while (msg_data_left(msg) > 0); + code = htonl(skb->priority); -success: - ret = copied; -out: - call->tx_pending = skb; - _leave(" = %d", ret); - return ret; + whdr.epoch = htonl(sp->hdr.epoch); + whdr.cid = htonl(sp->hdr.cid); + whdr.callNumber = htonl(sp->hdr.callNumber); + whdr.serviceId = htons(sp->hdr.serviceId); + whdr.flags = sp->hdr.flags; + whdr.flags ^= RXRPC_CLIENT_INITIATED; + whdr.flags &= RXRPC_CLIENT_INITIATED; -call_aborted: - rxrpc_free_skb(skb); - if (call->state == RXRPC_CALL_NETWORK_ERROR) - ret = call->error_report < RXRPC_LOCAL_ERROR_OFFSET ? - call->error_report : - call->error_report - RXRPC_LOCAL_ERROR_OFFSET; - else - ret = -ECONNABORTED; - _leave(" = %d", ret); - return ret; + kernel_sendmsg(local->socket, &msg, iov, 2, size); + } -maybe_error: - if (copied) - goto success; - goto out; + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + } -efault: - ret = -EFAULT; - goto out; + _leave(""); } diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 8940674b5e08..bf13b8470c9a 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -66,6 +66,32 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, } break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + srx.transport.sin6.sin6_port = serr->port; + srx.transport_len = sizeof(struct sockaddr_in6); + switch (serr->ee.ee_origin) { + case SO_EE_ORIGIN_ICMP6: + _net("Rx ICMP6"); + memcpy(&srx.transport.sin6.sin6_addr, + skb_network_header(skb) + serr->addr_offset, + sizeof(struct in6_addr)); + break; + case SO_EE_ORIGIN_ICMP: + _net("Rx ICMP on v6 sock"); + memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12, + skb_network_header(skb) + serr->addr_offset, + sizeof(struct in_addr)); + break; + default: + memcpy(&srx.transport.sin6.sin6_addr, + &ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + break; + } + break; +#endif + default: BUG(); } @@ -129,22 +155,21 @@ void rxrpc_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } + rxrpc_new_skb(skb, rxrpc_skb_rx_received); serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); - kfree_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); return; } - rxrpc_new_skb(skb); - rcu_read_lock(); peer = rxrpc_lookup_peer_icmp_rcu(local, skb); if (peer && !rxrpc_get_peer_maybe(peer)) peer = NULL; if (!peer) { rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _leave(" [no peer]"); return; } @@ -154,7 +179,7 @@ void rxrpc_error_report(struct sock *sk) serr->ee.ee_code == ICMP_FRAG_NEEDED)) { rxrpc_adjust_mtu(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); rxrpc_put_peer(peer); _leave(" [MTU update]"); return; @@ -162,7 +187,7 @@ void rxrpc_error_report(struct sock *sk) rxrpc_store_error(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); /* The ref we obtained is passed off to the work item */ rxrpc_queue_work(&peer->error_distributor); @@ -248,13 +273,20 @@ void rxrpc_peer_error_distributor(struct work_struct *work) struct rxrpc_peer *peer = container_of(work, struct rxrpc_peer, error_distributor); struct rxrpc_call *call; - int error_report; + enum rxrpc_call_completion compl; + int error; _enter(""); - error_report = READ_ONCE(peer->error_report); + error = READ_ONCE(peer->error_report); + if (error < RXRPC_LOCAL_ERROR_OFFSET) { + compl = RXRPC_CALL_NETWORK_ERROR; + } else { + compl = RXRPC_CALL_LOCAL_ERROR; + error -= RXRPC_LOCAL_ERROR_OFFSET; + } - _debug("ISSUE ERROR %d", error_report); + _debug("ISSUE ERROR %s %d", rxrpc_call_completions[compl], error); spin_lock_bh(&peer->lock); @@ -262,16 +294,10 @@ void rxrpc_peer_error_distributor(struct work_struct *work) call = hlist_entry(peer->error_targets.first, struct rxrpc_call, error_link); hlist_del_init(&call->error_link); + rxrpc_see_call(call); - write_lock(&call->state_lock); - if (call->state != RXRPC_CALL_COMPLETE && - call->state < RXRPC_CALL_NETWORK_ERROR) { - call->error_report = error_report; - call->state = RXRPC_CALL_NETWORK_ERROR; - set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); - rxrpc_queue_call(call); - } - write_unlock(&call->state_lock); + if (rxrpc_set_call_completion(call, compl, 0, error)) + rxrpc_notify_socket(call); } spin_unlock_bh(&peer->lock); @@ -279,3 +305,44 @@ void rxrpc_peer_error_distributor(struct work_struct *work) rxrpc_put_peer(peer); _leave(""); } + +/* + * Add RTT information to cache. This is called in softirq mode and has + * exclusive access to the peer RTT data. + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time) +{ + struct rxrpc_peer *peer = call->peer; + s64 rtt; + u64 sum = peer->rtt_sum, avg; + u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage; + + rtt = ktime_to_ns(ktime_sub(resp_time, send_time)); + if (rtt < 0) + return; + + /* Replace the oldest datum in the RTT buffer */ + sum -= peer->rtt_cache[cursor]; + sum += rtt; + peer->rtt_cache[cursor] = rtt; + peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1); + peer->rtt_sum = sum; + if (usage < RXRPC_RTT_CACHE_SIZE) { + usage++; + peer->rtt_usage = usage; + } + + /* Now recalculate the average */ + if (usage == RXRPC_RTT_CACHE_SIZE) { + avg = sum / RXRPC_RTT_CACHE_SIZE; + } else { + avg = sum; + do_div(avg, usage); + } + + peer->rtt = avg; + trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, + usage, avg); +} diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 538e9831c699..941b724d523b 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -16,12 +16,14 @@ #include <linux/skbuff.h> #include <linux/udp.h> #include <linux/in.h> +#include <linux/in6.h> #include <linux/slab.h> #include <linux/hashtable.h> #include <net/sock.h> #include <net/af_rxrpc.h> #include <net/ip.h> #include <net/route.h> +#include <net/ip6_route.h> #include "ar-internal.h" static DEFINE_HASHTABLE(rxrpc_peer_hash, 10); @@ -50,6 +52,13 @@ static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local, size = sizeof(srx->transport.sin.sin_addr); p = (u16 *)&srx->transport.sin.sin_addr; break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + hash_key += (u16 __force)srx->transport.sin.sin_port; + size = sizeof(srx->transport.sin6.sin6_addr); + p = (u16 *)&srx->transport.sin6.sin6_addr; + break; +#endif default: WARN(1, "AF_RXRPC: Unsupported transport address family\n"); return 0; @@ -93,6 +102,14 @@ static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer, memcmp(&peer->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + return ((u16 __force)peer->srx.transport.sin6.sin6_port - + (u16 __force)srx->transport.sin6.sin6_port) ?: + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)); +#endif default: BUG(); } @@ -130,17 +147,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); if (peer) { - switch (srx->transport.family) { - case AF_INET: - _net("PEER %d {%d,%u,%pI4+%hu}", - peer->debug_id, - peer->srx.transport_type, - peer->srx.transport.family, - &peer->srx.transport.sin.sin_addr, - ntohs(peer->srx.transport.sin.sin_port)); - break; - } - + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); } return peer; @@ -152,22 +159,53 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, */ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) { + struct dst_entry *dst; struct rtable *rt; - struct flowi4 fl4; + struct flowi fl; + struct flowi4 *fl4 = &fl.u.ip4; +#ifdef CONFIG_AF_RXRPC_IPV6 + struct flowi6 *fl6 = &fl.u.ip6; +#endif peer->if_mtu = 1500; - rt = ip_route_output_ports(&init_net, &fl4, NULL, - peer->srx.transport.sin.sin_addr.s_addr, 0, - htons(7000), htons(7001), - IPPROTO_UDP, 0, 0); - if (IS_ERR(rt)) { - _leave(" [route err %ld]", PTR_ERR(rt)); - return; + memset(&fl, 0, sizeof(fl)); + switch (peer->srx.transport.family) { + case AF_INET: + rt = ip_route_output_ports( + &init_net, fl4, NULL, + peer->srx.transport.sin.sin_addr.s_addr, 0, + htons(7000), htons(7001), IPPROTO_UDP, 0, 0); + if (IS_ERR(rt)) { + _leave(" [route err %ld]", PTR_ERR(rt)); + return; + } + dst = &rt->dst; + break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + fl6->flowi6_iif = LOOPBACK_IFINDEX; + fl6->flowi6_scope = RT_SCOPE_UNIVERSE; + fl6->flowi6_proto = IPPROTO_UDP; + memcpy(&fl6->daddr, &peer->srx.transport.sin6.sin6_addr, + sizeof(struct in6_addr)); + fl6->fl6_dport = htons(7001); + fl6->fl6_sport = htons(7000); + dst = ip6_route_output(&init_net, NULL, fl6); + if (IS_ERR(dst)) { + _leave(" [route err %ld]", PTR_ERR(dst)); + return; + } + break; +#endif + + default: + BUG(); } - peer->if_mtu = dst_mtu(&rt->dst); - dst_release(&rt->dst); + peer->if_mtu = dst_mtu(dst); + dst_release(dst); _leave(" [if_mtu %u]", peer->if_mtu); } @@ -199,6 +237,41 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) } /* + * Initialise peer record. + */ +static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) +{ + peer->hash_key = hash_key; + rxrpc_assess_MTU_size(peer); + peer->mtu = peer->if_mtu; + peer->rtt_last_req = ktime_get_real(); + + switch (peer->srx.transport.family) { + case AF_INET: + peer->hdrsize = sizeof(struct iphdr); + break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + peer->hdrsize = sizeof(struct ipv6hdr); + break; +#endif + default: + BUG(); + } + + switch (peer->srx.transport_type) { + case SOCK_DGRAM: + peer->hdrsize += sizeof(struct udphdr); + break; + default: + BUG(); + } + + peer->hdrsize += sizeof(struct rxrpc_wire_header); + peer->maxdata = peer->mtu - peer->hdrsize; +} + +/* * Set up a new peer. */ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, @@ -212,31 +285,40 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, peer = rxrpc_alloc_peer(local, gfp); if (peer) { - peer->hash_key = hash_key; memcpy(&peer->srx, srx, sizeof(*srx)); + rxrpc_init_peer(peer, hash_key); + } - rxrpc_assess_MTU_size(peer); - peer->mtu = peer->if_mtu; - - if (srx->transport.family == AF_INET) { - peer->hdrsize = sizeof(struct iphdr); - switch (srx->transport_type) { - case SOCK_DGRAM: - peer->hdrsize += sizeof(struct udphdr); - break; - default: - BUG(); - break; - } - } else { - BUG(); - } + _leave(" = %p", peer); + return peer; +} + +/* + * Set up a new incoming peer. The address is prestored in the preallocated + * peer. + */ +struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, + struct rxrpc_peer *prealloc) +{ + struct rxrpc_peer *peer; + unsigned long hash_key; + + hash_key = rxrpc_peer_hash_key(local, &prealloc->srx); + prealloc->local = local; + rxrpc_init_peer(prealloc, hash_key); - peer->hdrsize += sizeof(struct rxrpc_wire_header); - peer->maxdata = peer->mtu - peer->hdrsize; + spin_lock(&rxrpc_peer_hash_lock); + + /* Need to check that we aren't racing with someone else */ + peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key); + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + peer = prealloc; + hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key); } - _leave(" = %p", peer); + spin_unlock(&rxrpc_peer_hash_lock); return peer; } @@ -249,11 +331,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, *candidate; unsigned long hash_key = rxrpc_peer_hash_key(local, srx); - _enter("{%d,%d,%pI4+%hu}", - srx->transport_type, - srx->transport_len, - &srx->transport.sin.sin_addr, - ntohs(srx->transport.sin.sin_port)); + _enter("{%pISp}", &srx->transport); /* search the peer list first */ rcu_read_lock(); @@ -272,7 +350,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, return NULL; } - spin_lock(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxrpc_peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); @@ -282,7 +360,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, hash_add_rcu(rxrpc_peer_hash, &candidate->hash_link, hash_key); - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxrpc_peer_hash_lock); if (peer) kfree(candidate); @@ -290,11 +368,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, peer = candidate; } - _net("PEER %d {%d,%pI4+%hu}", - peer->debug_id, - peer->srx.transport_type, - &peer->srx.transport.sin.sin_addr, - ntohs(peer->srx.transport.sin.sin_port)); + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); return peer; @@ -307,9 +381,24 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer) { ASSERT(hlist_empty(&peer->error_targets)); - spin_lock(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxrpc_peer_hash_lock); hash_del_rcu(&peer->hash_link); - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxrpc_peer_hash_lock); kfree_rcu(peer, rcu); } + +/** + * rxrpc_kernel_get_peer - Get the peer address of a call + * @sock: The socket on which the call is in progress. + * @call: The call to query + * @_srx: Where to place the result + * + * Get the address of the remote peer in a call. + */ +void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, + struct sockaddr_rxrpc *_srx) +{ + *_srx = call->peer->srx; +} +EXPORT_SYMBOL(rxrpc_kernel_get_peer); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index ced5f07444e5..65cd980767fa 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -17,12 +17,12 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { [RXRPC_CONN_UNUSED] = "Unused ", [RXRPC_CONN_CLIENT] = "Client ", + [RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc", [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ", [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ", [RXRPC_CONN_SERVICE] = "SvSecure", [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort", [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort", - [RXRPC_CONN_NETWORK_ERROR] = "NetError", }; /* @@ -30,6 +30,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { */ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) { + rcu_read_lock(); read_lock(&rxrpc_call_lock); return seq_list_start_head(&rxrpc_calls, *_pos); } @@ -42,17 +43,21 @@ static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) { read_unlock(&rxrpc_call_lock); + rcu_read_unlock(); } static int rxrpc_call_seq_show(struct seq_file *seq, void *v) { - struct rxrpc_connection *conn; + struct rxrpc_local *local; + struct rxrpc_sock *rx; + struct rxrpc_peer *peer; struct rxrpc_call *call; - char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; + char lbuff[50], rbuff[50]; if (v == &rxrpc_calls) { seq_puts(seq, - "Proto Local Remote " + "Proto Local " + " Remote " " SvID ConnID CallID End Use State Abort " " UserID\n"); return 0; @@ -60,30 +65,35 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call = list_entry(v, struct rxrpc_call, link); - sprintf(lbuff, "%pI4:%u", - &call->local->srx.transport.sin.sin_addr, - ntohs(call->local->srx.transport.sin.sin_port)); + rx = rcu_dereference(call->socket); + if (rx) { + local = READ_ONCE(rx->local); + if (local) + sprintf(lbuff, "%pISpc", &local->srx.transport); + else + strcpy(lbuff, "no_local"); + } else { + strcpy(lbuff, "no_socket"); + } - conn = call->conn; - if (conn) - sprintf(rbuff, "%pI4:%u", - &conn->params.peer->srx.transport.sin.sin_addr, - ntohs(conn->params.peer->srx.transport.sin.sin_port)); + peer = call->peer; + if (peer) + sprintf(rbuff, "%pISpc", &peer->srx.transport); else strcpy(rbuff, "no_connection"); seq_printf(seq, - "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u" + "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" " %-8.8s %08x %lx\n", lbuff, rbuff, call->service_id, call->cid, call->call_id, - call->in_clientflag ? "Svc" : "Clt", + rxrpc_is_service_call(call) ? "Svc" : "Clt", atomic_read(&call->usage), rxrpc_call_states[call->state], - call->remote_abort ?: call->local_abort, + call->abort_code, call->user_call_ID); return 0; @@ -115,13 +125,13 @@ const struct file_operations rxrpc_call_seq_fops = { static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos) { read_lock(&rxrpc_connection_lock); - return seq_list_start_head(&rxrpc_connections, *_pos); + return seq_list_start_head(&rxrpc_connection_proc_list, *_pos); } static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - return seq_list_next(v, &rxrpc_connections, pos); + return seq_list_next(v, &rxrpc_connection_proc_list, pos); } static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) @@ -132,29 +142,31 @@ static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) { struct rxrpc_connection *conn; - char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; + char lbuff[50], rbuff[50]; - if (v == &rxrpc_connections) { + if (v == &rxrpc_connection_proc_list) { seq_puts(seq, - "Proto Local Remote " + "Proto Local " + " Remote " " SvID ConnID End Use State Key " " Serial ISerial\n" ); return 0; } - conn = list_entry(v, struct rxrpc_connection, link); - - sprintf(lbuff, "%pI4:%u", - &conn->params.local->srx.transport.sin.sin_addr, - ntohs(conn->params.local->srx.transport.sin.sin_port)); + conn = list_entry(v, struct rxrpc_connection, proc_link); + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) { + strcpy(lbuff, "no_local"); + strcpy(rbuff, "no_connection"); + goto print; + } - sprintf(rbuff, "%pI4:%u", - &conn->params.peer->srx.transport.sin.sin_addr, - ntohs(conn->params.peer->srx.transport.sin.sin_port)); + sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport); + sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport); +print: seq_printf(seq, - "UDP %-22.22s %-22.22s %4x %08x %s %3u" + "UDP %-47.47s %-47.47s %4x %08x %s %3u" " %s %08x %08x %08x\n", lbuff, rbuff, @@ -165,7 +177,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) rxrpc_conn_states[conn->state], key_serial(conn->params.key), atomic_read(&conn->serial), - atomic_read(&conn->hi_serial)); + conn->hi_serial); return 0; } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 9ed66d533002..f05ea0a88076 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -19,399 +19,645 @@ #include "ar-internal.h" /* - * removal a call's user ID from the socket tree to make the user ID available - * again and so that it won't be seen again in association with that call + * Post a call for attention by the socket or kernel service. Further + * notifications are suppressed by putting recvmsg_link on a dummy queue. */ -void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call) +void rxrpc_notify_socket(struct rxrpc_call *call) { - _debug("RELEASE CALL %d", call->debug_id); + struct rxrpc_sock *rx; + struct sock *sk; + + _enter("%d", call->debug_id); + + if (!list_empty(&call->recvmsg_link)) + return; + + rcu_read_lock(); + + rx = rcu_dereference(call->socket); + sk = &rx->sk; + if (rx && sk->sk_state < RXRPC_CLOSE) { + if (call->notify_rx) { + call->notify_rx(sk, call, call->user_call_ID); + } else { + write_lock_bh(&rx->recvmsg_lock); + if (list_empty(&call->recvmsg_link)) { + rxrpc_get_call(call, rxrpc_call_got); + list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); + } + write_unlock_bh(&rx->recvmsg_lock); - if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { - write_lock_bh(&rx->call_lock); - rb_erase(&call->sock_node, &call->socket->calls); - clear_bit(RXRPC_CALL_HAS_USERID, &call->flags); - write_unlock_bh(&rx->call_lock); + if (!sock_flag(sk, SOCK_DEAD)) { + _debug("call %ps", sk->sk_data_ready); + sk->sk_data_ready(sk); + } + } } - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); + rcu_read_unlock(); + _leave(""); } /* - * receive a message from an RxRPC socket - * - we need to be careful about two or more threads calling recvmsg - * simultaneously + * Pass a call terminating message to userspace. */ -int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, - int flags) +static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) { - struct rxrpc_skb_priv *sp; - struct rxrpc_call *call = NULL, *continue_call = NULL; - struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - struct sk_buff *skb; - long timeo; - int copy, ret, ullen, offset, copied = 0; - u32 abort_code; - - DEFINE_WAIT(wait); - - _enter(",,,%zu,%d", len, flags); - - if (flags & (MSG_OOB | MSG_TRUNC)) - return -EOPNOTSUPP; - - ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long); + u32 tmp = 0; + int ret; + + switch (call->completion) { + case RXRPC_CALL_SUCCEEDED: + ret = 0; + if (rxrpc_is_service_call(call)) + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp); + break; + case RXRPC_CALL_REMOTELY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_LOCALLY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_NETWORK_ERROR: + tmp = call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp); + break; + case RXRPC_CALL_LOCAL_ERROR: + tmp = call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); + break; + default: + pr_err("Invalid terminal call state %u\n", call->state); + BUG(); + break; + } - timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT); - msg->msg_flags |= MSG_MORE; + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_terminal, call->rx_hard_ack, + call->rx_pkt_offset, call->rx_pkt_len, ret); + return ret; +} - lock_sock(&rx->sk); +/* + * Pass back notification of a new call. The call is added to the + * to-be-accepted list. This means that the next call to be accepted might not + * be the last call seen awaiting acceptance, but unless we leave this on the + * front of the queue and block all other messages until someone gives us a + * user_ID for it, there's not a lot we can do. + */ +static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct msghdr *msg, int flags) +{ + int tmp = 0, ret; - for (;;) { - /* return immediately if a client socket has no outstanding - * calls */ - if (RB_EMPTY_ROOT(&rx->calls)) { - if (copied) - goto out; - if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) { - release_sock(&rx->sk); - if (continue_call) - rxrpc_put_call(continue_call); - return -ENODATA; - } - } + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp); - /* get the next message on the Rx queue */ - skb = skb_peek(&rx->sk.sk_receive_queue); - if (!skb) { - /* nothing remains on the queue */ - if (copied && - (flags & MSG_PEEK || timeo == 0)) - goto out; + if (ret == 0 && !(flags & MSG_PEEK)) { + _debug("to be accepted"); + write_lock_bh(&rx->recvmsg_lock); + list_del_init(&call->recvmsg_link); + write_unlock_bh(&rx->recvmsg_lock); - /* wait for a message to turn up */ - release_sock(&rx->sk); - prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, - TASK_INTERRUPTIBLE); - ret = sock_error(&rx->sk); - if (ret) - goto wait_error; - - if (skb_queue_empty(&rx->sk.sk_receive_queue)) { - if (signal_pending(current)) - goto wait_interrupted; - timeo = schedule_timeout(timeo); - } - finish_wait(sk_sleep(&rx->sk), &wait); - lock_sock(&rx->sk); - continue; - } + rxrpc_get_call(call, rxrpc_call_got); + write_lock(&rx->call_lock); + list_add_tail(&call->accept_link, &rx->to_be_accepted); + write_unlock(&rx->call_lock); + } - peek_next_packet: - sp = rxrpc_skb(skb); - call = sp->call; - ASSERT(call != NULL); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_to_be_accepted, 1, 0, 0, ret); + return ret; +} - _debug("next pkt %s", rxrpc_pkts[sp->hdr.type]); +/* + * End the packet reception phase. + */ +static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) +{ + _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); - /* make sure we wait for the state to be updated in this call */ - spin_lock_bh(&call->lock); - spin_unlock_bh(&call->lock); + trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); + ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { - _debug("packet from released call"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - continue; - } + if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, + rxrpc_propose_ack_terminal_ack); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + } - /* determine whether to continue last data receive */ - if (continue_call) { - _debug("maybe cont"); - if (call != continue_call || - skb->mark != RXRPC_SKB_MARK_DATA) { - release_sock(&rx->sk); - rxrpc_put_call(continue_call); - _leave(" = %d [noncont]", copied); - return copied; - } - } + write_lock_bh(&call->state_lock); - rxrpc_get_call(call); + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + __rxrpc_call_completed(call); + break; - /* copy the peer address and timestamp */ - if (!continue_call) { - if (msg->msg_name) { - size_t len = - sizeof(call->conn->params.peer->srx); - memcpy(msg->msg_name, - &call->conn->params.peer->srx, len); - msg->msg_namelen = len; - } - sock_recv_timestamp(msg, &rx->sk, skb); - } + case RXRPC_CALL_SERVER_RECV_REQUEST: + call->tx_phase = true; + call->state = RXRPC_CALL_SERVER_ACK_REQUEST; + break; + default: + break; + } - /* receive the message */ - if (skb->mark != RXRPC_SKB_MARK_DATA) - goto receive_non_data_message; + write_unlock_bh(&call->state_lock); +} - _debug("recvmsg DATA #%u { %d, %d }", - sp->hdr.seq, skb->len, sp->offset); +/* + * Discard a packet we've used up and advance the Rx window by one. + */ +static void rxrpc_rotate_rx_window(struct rxrpc_call *call) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top; + u8 flags; + int ix; + + _enter("%d", call->debug_id); + + hard_ack = call->rx_hard_ack; + top = smp_load_acquire(&call->rx_top); + ASSERT(before(hard_ack, top)); + + hard_ack++; + ix = hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb, rxrpc_skb_rx_rotated); + sp = rxrpc_skb(skb); + flags = sp->hdr.flags; + serial = sp->hdr.serial; + if (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) + serial += (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - 1; + + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + /* Barrier against rxrpc_input_data(). */ + smp_store_release(&call->rx_hard_ack, hard_ack); + + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + + _debug("%u,%u,%02x", hard_ack, top, flags); + trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); + if (flags & RXRPC_LAST_PACKET) { + rxrpc_end_rx_phase(call, serial); + } else { + /* Check to see if there's an ACK that needs sending. */ + if (after_eq(hard_ack, call->ackr_consumed + 2) || + after_eq(top, call->ackr_seen + 2) || + (hard_ack == top && after(hard_ack, call->ackr_consumed))) + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, + true, false, + rxrpc_propose_ack_rotate_rx); + if (call->ackr_reason) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + } +} - if (!continue_call) { - /* only set the control data once per recvmsg() */ - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - ullen, &call->user_call_ID); - if (ret < 0) - goto copy_error; - ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); - } +/* + * Decrypt and verify a (sub)packet. The packet's length may be changed due to + * padding, but if this is the case, the packet length will be resident in the + * socket buffer. Note that we can't modify the master skb info as the skb may + * be the home to multiple subpackets. + */ +static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + u8 annotation, + unsigned int offset, unsigned int len) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_seq_t seq = sp->hdr.seq; + u16 cksum = sp->hdr.cksum; - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - call->rx_data_recv = sp->hdr.seq; + _enter(""); - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); + /* For all but the head jumbo subpacket, the security checksum is in a + * jumbo header immediately prior to the data. + */ + if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) { + __be16 tmp; + if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0) + BUG(); + cksum = ntohs(tmp); + seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1; + } - offset = sp->offset; - copy = skb->len - offset; - if (copy > len - copied) - copy = len - copied; + return call->conn->security->verify_packet(call, skb, offset, len, + seq, cksum); +} - ret = skb_copy_datagram_msg(skb, offset, msg, copy); +/* + * Locate the data within a packet. This is complicated by: + * + * (1) An skb may contain a jumbo packet - so we have to find the appropriate + * subpacket. + * + * (2) The (sub)packets may be encrypted and, if so, the encrypted portion + * contains an extra header which includes the true length of the data, + * excluding any encrypted padding. + */ +static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + u8 *_annotation, + unsigned int *_offset, unsigned int *_len) +{ + unsigned int offset = sizeof(struct rxrpc_wire_header); + unsigned int len = *_len; + int ret; + u8 annotation = *_annotation; + + /* Locate the subpacket */ + len = skb->len - offset; + if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) { + offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) * + RXRPC_JUMBO_SUBPKTLEN); + len = (annotation & RXRPC_RX_ANNO_JLAST) ? + skb->len - offset : RXRPC_JUMBO_SUBPKTLEN; + } + if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { + ret = rxrpc_verify_packet(call, skb, annotation, offset, len); if (ret < 0) - goto copy_error; - - /* handle piecemeal consumption of data packets */ - _debug("copied %d+%d", copy, copied); + return ret; + *_annotation |= RXRPC_RX_ANNO_VERIFIED; + } - offset += copy; - copied += copy; + *_offset = offset; + *_len = len; + call->conn->security->locate_data(call, skb, _offset, _len); + return 0; +} - if (!(flags & MSG_PEEK)) - sp->offset = offset; +/* + * Deliver messages to a call. This keeps processing packets until the buffer + * is filled and we find either more DATA (returns 0) or the end of the DATA + * (returns 1). If more packets are required, it returns -EAGAIN. + */ +static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, struct iov_iter *iter, + size_t len, int flags, size_t *_offset) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + rxrpc_seq_t hard_ack, top, seq; + size_t remain; + bool last; + unsigned int rx_pkt_offset, rx_pkt_len; + int ix, copy, ret = -EAGAIN, ret2; + + rx_pkt_offset = call->rx_pkt_offset; + rx_pkt_len = call->rx_pkt_len; + + if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { + seq = call->rx_hard_ack; + ret = 1; + goto done; + } - if (sp->offset < skb->len) { - _debug("buffer full"); - ASSERTCMP(copied, ==, len); + /* Barriers against rxrpc_input_data(). */ + hard_ack = call->rx_hard_ack; + top = smp_load_acquire(&call->rx_top); + for (seq = hard_ack + 1; before_eq(seq, top); seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + if (!skb) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_hole, seq, + rx_pkt_offset, rx_pkt_len, 0); break; } + smp_rmb(); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); + sp = rxrpc_skb(skb); - /* we transferred the whole data packet */ if (!(flags & MSG_PEEK)) - rxrpc_kernel_data_consumed(call, skb); - - if (sp->hdr.flags & RXRPC_LAST_PACKET) { - _debug("last"); - if (rxrpc_conn_is_client(call->conn)) { - /* last byte of reply received */ - ret = copied; - goto terminal_message; + trace_rxrpc_receive(call, rxrpc_receive_front, + sp->hdr.serial, seq); + + if (msg) + sock_recv_timestamp(msg, sock->sk, skb); + + if (rx_pkt_offset == 0) { + ret2 = rxrpc_locate_data(call, skb, + &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq, + rx_pkt_offset, rx_pkt_len, ret2); + if (ret2 < 0) { + ret = ret2; + goto out; } + } else { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq, + rx_pkt_offset, rx_pkt_len, 0); + } - /* last bit of request received */ - if (!(flags & MSG_PEEK)) { - _debug("eat packet"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != - skb) - BUG(); - rxrpc_free_skb(skb); + /* We have to handle short, empty and used-up DATA packets. */ + remain = len - *_offset; + copy = rx_pkt_len; + if (copy > remain) + copy = remain; + if (copy > 0) { + ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, + copy); + if (ret2 < 0) { + ret = ret2; + goto out; } - msg->msg_flags &= ~MSG_MORE; - break; + + /* handle piecemeal consumption of data packets */ + rx_pkt_offset += copy; + rx_pkt_len -= copy; + *_offset += copy; } - /* move on to the next data message */ - _debug("next"); - if (!continue_call) - continue_call = sp->call; - else - rxrpc_put_call(call); - call = NULL; - - if (flags & MSG_PEEK) { - _debug("peek next"); - skb = skb->next; - if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue) - break; - goto peek_next_packet; + if (rx_pkt_len > 0) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq, + rx_pkt_offset, rx_pkt_len, 0); + ASSERTCMP(*_offset, ==, len); + ret = 0; + break; } - _debug("eat packet"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); + /* The whole packet has been transferred. */ + last = sp->hdr.flags & RXRPC_LAST_PACKET; + if (!(flags & MSG_PEEK)) + rxrpc_rotate_rx_window(call); + rx_pkt_offset = 0; + rx_pkt_len = 0; + + if (last) { + ASSERTCMP(seq, ==, READ_ONCE(call->rx_top)); + ret = 1; + goto out; + } } - /* end of non-terminal data packet reception for the moment */ - _debug("end rcv data"); out: - release_sock(&rx->sk); - if (call) - rxrpc_put_call(call); - if (continue_call) - rxrpc_put_call(continue_call); - _leave(" = %d [data]", copied); - return copied; - - /* handle non-DATA messages such as aborts, incoming connections and - * final ACKs */ -receive_non_data_message: - _debug("non-data"); - - if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) { - _debug("RECV NEW CALL"); - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code); - if (ret < 0) - goto copy_error; - if (!(flags & MSG_PEEK)) { - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - } - goto out; + if (!(flags & MSG_PEEK)) { + call->rx_pkt_offset = rx_pkt_offset; + call->rx_pkt_len = rx_pkt_len; } +done: + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, + rx_pkt_offset, rx_pkt_len, ret); + return ret; +} - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - ullen, &call->user_call_ID); - if (ret < 0) - goto copy_error; - ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); +/* + * Receive a message from an RxRPC socket + * - we need to be careful about two or more threads calling recvmsg + * simultaneously + */ +int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, + int flags) +{ + struct rxrpc_call *call; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct list_head *l; + size_t copied = 0; + long timeo; + int ret; + + DEFINE_WAIT(wait); + + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0, 0, 0, 0); + + if (flags & (MSG_OOB | MSG_TRUNC)) + return -EOPNOTSUPP; + + timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT); + +try_again: + lock_sock(&rx->sk); + + /* Return immediately if a client socket has no outstanding calls */ + if (RB_EMPTY_ROOT(&rx->calls) && + list_empty(&rx->recvmsg_q) && + rx->sk.sk_state != RXRPC_SERVER_LISTENING) { + release_sock(&rx->sk); + return -ENODATA; + } + + if (list_empty(&rx->recvmsg_q)) { + ret = -EWOULDBLOCK; + if (timeo == 0) { + call = NULL; + goto error_no_call; + } + + release_sock(&rx->sk); + + /* Wait for something to happen */ + prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, + TASK_INTERRUPTIBLE); + ret = sock_error(&rx->sk); + if (ret) + goto wait_error; + + if (list_empty(&rx->recvmsg_q)) { + if (signal_pending(current)) + goto wait_interrupted; + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, + 0, 0, 0, 0); + timeo = schedule_timeout(timeo); + } + finish_wait(sk_sleep(&rx->sk), &wait); + goto try_again; + } - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: + /* Find the next call and dequeue it if we're not just peeking. If we + * do dequeue it, that comes with a ref that we will need to release. + */ + write_lock_bh(&rx->recvmsg_lock); + l = rx->recvmsg_q.next; + call = list_entry(l, struct rxrpc_call, recvmsg_link); + if (!(flags & MSG_PEEK)) + list_del_init(&call->recvmsg_link); + else + rxrpc_get_call(call, rxrpc_call_got); + write_unlock_bh(&rx->recvmsg_lock); + + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0); + + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); - case RXRPC_SKB_MARK_FINAL_ACK: - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code); - break; - case RXRPC_SKB_MARK_BUSY: - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code); - break; - case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = call->remote_abort; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); - break; - case RXRPC_SKB_MARK_LOCAL_ABORT: - abort_code = call->local_abort; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); - break; - case RXRPC_SKB_MARK_NET_ERROR: - _debug("RECV NET ERROR %d", sp->error); - abort_code = sp->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code); + + if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + if (flags & MSG_CMSG_COMPAT) { + unsigned int id32 = call->user_call_ID; + + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned int), &id32); + } else { + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned long), + &call->user_call_ID); + } + if (ret < 0) + goto error; + } + + if (msg->msg_name) { + size_t len = sizeof(call->conn->params.peer->srx); + memcpy(msg->msg_name, &call->conn->params.peer->srx, len); + msg->msg_namelen = len; + } + + switch (call->state) { + case RXRPC_CALL_SERVER_ACCEPTING: + ret = rxrpc_recvmsg_new_call(rx, call, msg, flags); break; - case RXRPC_SKB_MARK_LOCAL_ERROR: - _debug("RECV LOCAL ERROR %d", sp->error); - abort_code = sp->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, - &abort_code); + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, + flags, &copied); + if (ret == -EAGAIN) + ret = 0; + + if (after(call->rx_top, call->rx_hard_ack) && + call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK]) + rxrpc_notify_socket(call); break; default: - pr_err("Unknown packet mark %u\n", skb->mark); - BUG(); + ret = 0; break; } if (ret < 0) - goto copy_error; - -terminal_message: - _debug("terminal"); - msg->msg_flags &= ~MSG_MORE; - msg->msg_flags |= MSG_EOR; + goto error; - if (!(flags & MSG_PEEK)) { - _net("free terminal skb %p", skb); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - rxrpc_remove_user_ID(rx, call); + if (call->state == RXRPC_CALL_COMPLETE) { + ret = rxrpc_recvmsg_term(call, msg); + if (ret < 0) + goto error; + if (!(flags & MSG_PEEK)) + rxrpc_release_call(rx, call); + msg->msg_flags |= MSG_EOR; + ret = 1; } - release_sock(&rx->sk); - rxrpc_put_call(call); - if (continue_call) - rxrpc_put_call(continue_call); - _leave(" = %d", ret); - return ret; + if (ret == 0) + msg->msg_flags |= MSG_MORE; + else + msg->msg_flags &= ~MSG_MORE; + ret = copied; -copy_error: - _debug("copy error"); +error: + rxrpc_put_call(call, rxrpc_call_put); +error_no_call: release_sock(&rx->sk); - rxrpc_put_call(call); - if (continue_call) - rxrpc_put_call(continue_call); - _leave(" = %d", ret); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); return ret; wait_interrupted: ret = sock_intr_errno(timeo); wait_error: finish_wait(sk_sleep(&rx->sk), &wait); - if (continue_call) - rxrpc_put_call(continue_call); - if (copied) - copied = ret; - _leave(" = %d [waitfail %d]", copied, ret); - return copied; - + call = NULL; + goto error_no_call; } /** - * rxrpc_kernel_is_data_last - Determine if data message is last one - * @skb: Message holding data + * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info + * @sock: The socket that the call exists on + * @call: The call to send data through + * @buf: The buffer to receive into + * @size: The size of the buffer, including data already read + * @_offset: The running offset into the buffer. + * @want_more: True if more data is expected to be read + * @_abort: Where the abort code is stored if -ECONNABORTED is returned * - * Determine if data message is last one for the parent call. + * Allow a kernel service to receive data and pick up information about the + * state of a call. Returns 0 if got what was asked for and there's more + * available, 1 if we got what was asked for and we're at the end of the data + * and -EAGAIN if we need more data. + * + * Note that we may return -EAGAIN to drain empty packets at the end of the + * data, even if we've already copied over the requested data. + * + * This function adds the amount it transfers to *_offset, so this should be + * precleared as appropriate. Note that the amount remaining in the buffer is + * taken to be size - *_offset. + * + * *_abort should also be initialised to 0. */ -bool rxrpc_kernel_is_data_last(struct sk_buff *skb) +int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, + void *buf, size_t size, size_t *_offset, + bool want_more, u32 *_abort) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct iov_iter iter; + struct kvec iov; + int ret; - ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA); + _enter("{%d,%s},%zu/%zu,%d", + call->debug_id, rxrpc_call_states[call->state], + *_offset, size, want_more); - return sp->hdr.flags & RXRPC_LAST_PACKET; -} + ASSERTCMP(*_offset, <=, size); + ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); -EXPORT_SYMBOL(rxrpc_kernel_is_data_last); + iov.iov_base = buf + *_offset; + iov.iov_len = size - *_offset; + iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset); -/** - * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message - * @skb: Message indicating an abort - * - * Get the abort code from an RxRPC abort message. - */ -u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + lock_sock(sock->sk); + + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0, + _offset); + if (ret < 0) + goto out; + + /* We can only reach here with a partially full buffer if we + * have reached the end of the data. We must otherwise have a + * full buffer or have been given -EAGAIN. + */ + if (ret == 1) { + if (*_offset < size) + goto short_data; + if (!want_more) + goto read_phase_complete; + ret = 0; + goto out; + } + + if (!want_more) + goto excess_data; + goto out; + + case RXRPC_CALL_COMPLETE: + goto call_complete; - switch (skb->mark) { - case RXRPC_SKB_MARK_REMOTE_ABORT: - return sp->call->remote_abort; - case RXRPC_SKB_MARK_LOCAL_ABORT: - return sp->call->local_abort; default: - BUG(); + ret = -EINPROGRESS; + goto out; } -} - -EXPORT_SYMBOL(rxrpc_kernel_get_abort_code); -/** - * rxrpc_kernel_get_error - Get the error number from an RxRPC error message - * @skb: Message indicating an error - * - * Get the error number from an RxRPC error message. - */ -int rxrpc_kernel_get_error_number(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); +read_phase_complete: + ret = 1; +out: + release_sock(sock->sk); + _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); + return ret; - return sp->error; +short_data: + ret = -EBADMSG; + goto out; +excess_data: + ret = -EMSGSIZE; + goto out; +call_complete: + *_abort = call->abort_code; + ret = call->error; + if (call->completion == RXRPC_CALL_SUCCEEDED) { + ret = 1; + if (size > 0) + ret = -ECONNRESET; + } + goto out; } - -EXPORT_SYMBOL(rxrpc_kernel_get_error_number); +EXPORT_SYMBOL(rxrpc_kernel_recv_data); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 63afa9e9cc08..627abed5f999 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -80,12 +80,10 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn) case RXRPC_SECURITY_AUTH: conn->size_align = 8; conn->security_size = sizeof(struct rxkad_level1_hdr); - conn->header_size += sizeof(struct rxkad_level1_hdr); break; case RXRPC_SECURITY_ENCRYPT: conn->size_align = 8; conn->security_size = sizeof(struct rxkad_level2_hdr); - conn->header_size += sizeof(struct rxkad_level2_hdr); break; default: ret = -EKEYREJECTED; @@ -161,7 +159,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, _enter(""); - check = sp->hdr.seq ^ sp->hdr.callNumber; + check = sp->hdr.seq ^ call->call_id; data_size |= (u32)check << 16; hdr.data_size = htonl(data_size); @@ -205,7 +203,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, _enter(""); - check = sp->hdr.seq ^ sp->hdr.callNumber; + check = sp->hdr.seq ^ call->call_id; rxkhdr.data_size = htonl(data_size | (u32)check << 16); rxkhdr.checksum = 0; @@ -275,9 +273,9 @@ static int rxkad_secure_packet(struct rxrpc_call *call, memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); /* calculate the security checksum */ - x = call->channel << (32 - RXRPC_CIDSHIFT); + x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); x |= sp->hdr.seq & 0x3fffffff; - call->crypto_buf[0] = htonl(sp->hdr.callNumber); + call->crypto_buf[0] = htonl(call->call_id); call->crypto_buf[1] = htonl(x); sg_init_one(&sg, call->crypto_buf, 8); @@ -316,12 +314,11 @@ static int rxkad_secure_packet(struct rxrpc_call *call, /* * decrypt partial encryption on a packet (level 1 security) */ -static int rxkad_verify_packet_auth(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq) { struct rxkad_level1_hdr sechdr; - struct rxrpc_skb_priv *sp; SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist sg[16]; @@ -332,15 +329,20 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, _enter(""); - sp = rxrpc_skb(skb); + if (len < 8) { + rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO); + goto protocol_error; + } - /* we want to decrypt the skbuff in-place */ + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ nsg = skb_cow_data(skb, 0, &trailer); if (nsg < 0 || nsg > 16) goto nomem; sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, 8); + skb_to_sgvec(skb, sg, offset, 8); /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -351,35 +353,35 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, crypto_skcipher_decrypt(req); skcipher_request_zero(req); - /* remove the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) - goto datalen_error; - if (!skb_pull(skb, sizeof(sechdr))) - BUG(); + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { + rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } + offset += sizeof(sechdr); + len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; check = buf >> 16; - check ^= sp->hdr.seq ^ sp->hdr.callNumber; + check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - *_abort_code = RXKADSEALEDINCON; + rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* shorten the packet to remove the padding */ - if (data_size > skb->len) - goto datalen_error; - else if (data_size < skb->len) - skb->len = data_size; + if (data_size > len) { + rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } _leave(" = 0 [dlen=%x]", data_size); return 0; -datalen_error: - *_abort_code = RXKADDATALEN; protocol_error: + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO"); return -EPROTO; @@ -391,13 +393,12 @@ nomem: /* * wholly decrypt a packet (level 2 security) */ -static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq) { const struct rxrpc_key_token *token; struct rxkad_level2_hdr sechdr; - struct rxrpc_skb_priv *sp; SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; @@ -408,9 +409,14 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, _enter(",{%d}", skb->len); - sp = rxrpc_skb(skb); + if (len < 8) { + rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO); + goto protocol_error; + } - /* we want to decrypt the skbuff in-place */ + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ nsg = skb_cow_data(skb, 0, &trailer); if (nsg < 0) goto nomem; @@ -423,7 +429,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, } sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, skb->len); + skb_to_sgvec(skb, sg, offset, len); /* decrypt from the session key */ token = call->conn->params.key->payload.data[0]; @@ -431,41 +437,41 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, skcipher_request_set_tfm(req, call->conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x); + skcipher_request_set_crypt(req, sg, sg, len, iv.x); crypto_skcipher_decrypt(req); skcipher_request_zero(req); if (sg != _sg) kfree(sg); - /* remove the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) - goto datalen_error; - if (!skb_pull(skb, sizeof(sechdr))) - BUG(); + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { + rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } + offset += sizeof(sechdr); + len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; check = buf >> 16; - check ^= sp->hdr.seq ^ sp->hdr.callNumber; + check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - *_abort_code = RXKADSEALEDINCON; + rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* shorten the packet to remove the padding */ - if (data_size > skb->len) - goto datalen_error; - else if (data_size < skb->len) - skb->len = data_size; + if (data_size > len) { + rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } _leave(" = 0 [dlen=%x]", data_size); return 0; -datalen_error: - *_abort_code = RXKADDATALEN; protocol_error: + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO"); return -EPROTO; @@ -475,40 +481,31 @@ nomem: } /* - * verify the security on a received packet + * Verify the security on a received packet or subpacket (if part of a + * jumbo packet). */ -static int rxkad_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq, u16 expected_cksum) { SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); - struct rxrpc_skb_priv *sp; struct rxrpc_crypt iv; struct scatterlist sg; u16 cksum; u32 x, y; - int ret; - - sp = rxrpc_skb(skb); _enter("{%d{%x}},{#%u}", - call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq); + call->debug_id, key_serial(call->conn->params.key), seq); if (!call->conn->cipher) return 0; - if (sp->hdr.securityIndex != RXRPC_SECURITY_RXKAD) { - *_abort_code = RXKADINCONSISTENCY; - _leave(" = -EPROTO [not rxkad]"); - return -EPROTO; - } - /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); /* validate the security checksum */ - x = call->channel << (32 - RXRPC_CIDSHIFT); - x |= sp->hdr.seq & 0x3fffffff; + x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); + x |= seq & 0x3fffffff; call->crypto_buf[0] = htonl(call->call_id); call->crypto_buf[1] = htonl(x); @@ -524,29 +521,69 @@ static int rxkad_verify_packet(struct rxrpc_call *call, if (cksum == 0) cksum = 1; /* zero checksums are not permitted */ - if (sp->hdr.cksum != cksum) { - *_abort_code = RXKADSEALEDINCON; + if (cksum != expected_cksum) { + rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO [csum failed]"); return -EPROTO; } switch (call->conn->params.security_level) { case RXRPC_SECURITY_PLAIN: - ret = 0; - break; + return 0; case RXRPC_SECURITY_AUTH: - ret = rxkad_verify_packet_auth(call, skb, _abort_code); - break; + return rxkad_verify_packet_1(call, skb, offset, len, seq); case RXRPC_SECURITY_ENCRYPT: - ret = rxkad_verify_packet_encrypt(call, skb, _abort_code); - break; + return rxkad_verify_packet_2(call, skb, offset, len, seq); default: - ret = -ENOANO; - break; + return -ENOANO; } +} - _leave(" = %d", ret); - return ret; +/* + * Locate the data contained in a packet that was partially encrypted. + */ +static void rxkad_locate_data_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level1_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in a packet that was completely encrypted. + */ +static void rxkad_locate_data_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level2_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in an already decrypted packet. + */ +static void rxkad_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + switch (call->conn->params.security_level) { + case RXRPC_SECURITY_AUTH: + rxkad_locate_data_1(call, skb, _offset, _len); + return; + case RXRPC_SECURITY_ENCRYPT: + rxkad_locate_data_2(call, skb, _offset, _len); + return; + default: + return; + } } /* @@ -716,7 +753,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, struct rxkad_challenge challenge; struct rxkad_response resp __attribute__((aligned(8))); /* must be aligned for crypto */ - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); u32 version, nonce, min_level, abort_code; int ret; @@ -734,8 +771,8 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, } abort_code = RXKADPACKETSHORT; - sp = rxrpc_skb(skb); - if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &challenge, sizeof(challenge)) < 0) goto protocol_error; version = ntohl(challenge.version); @@ -981,7 +1018,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, { struct rxkad_response response __attribute__((aligned(8))); /* must be aligned for crypto */ - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; time_t expiry; void *ticket; @@ -992,7 +1029,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &response, sizeof(response)) < 0) goto protocol_error; if (!pskb_pull(skb, sizeof(response))) BUG(); @@ -1000,7 +1038,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, version = ntohl(response.version); ticket_len = ntohl(response.ticket_len); kvno = ntohl(response.kvno); - sp = rxrpc_skb(skb); _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", sp->hdr.serial, version, kvno, ticket_len); @@ -1022,7 +1059,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, return -ENOMEM; abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + ticket, ticket_len) < 0) goto protocol_error_free; ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key, @@ -1147,6 +1185,7 @@ const struct rxrpc_security rxkad = { .prime_packet_security = rxkad_prime_packet_security, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, + .locate_data = rxkad_locate_data, .issue_challenge = rxkad_issue_challenge, .respond_to_challenge = rxkad_respond_to_challenge, .verify_response = rxkad_verify_response, diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 814d285ff802..7d921e56e715 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -130,20 +130,20 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) } /* find the service */ - read_lock_bh(&local->services_lock); - list_for_each_entry(rx, &local->services, listen_link) { - if (rx->srx.srx_service == conn->params.service_id) - goto found_service; - } + read_lock(&local->services_lock); + rx = rcu_dereference_protected(local->service, + lockdep_is_held(&local->services_lock)); + if (rx && rx->srx.srx_service == conn->params.service_id) + goto found_service; /* the service appears to have died */ - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = -ENOENT"); return -ENOENT; found_service: if (!rx->securities) { - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = -ENOKEY"); return -ENOKEY; } @@ -152,13 +152,13 @@ found_service: kref = keyring_search(make_key_ref(rx->securities, 1UL), &key_type_rxrpc_s, kdesc); if (IS_ERR(kref)) { - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = %ld [search]", PTR_ERR(kref)); return PTR_ERR(kref); } key = key_ref_to_ptr(kref); - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); conn->server_key = key; conn->security = sec; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c new file mode 100644 index 000000000000..3322543d460a --- /dev/null +++ b/net/rxrpc/sendmsg.c @@ -0,0 +1,606 @@ +/* AF_RXRPC sendmsg() implementation. + * + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/net.h> +#include <linux/gfp.h> +#include <linux/skbuff.h> +#include <linux/export.h> +#include <net/sock.h> +#include <net/af_rxrpc.h> +#include "ar-internal.h" + +enum rxrpc_command { + RXRPC_CMD_SEND_DATA, /* send data message */ + RXRPC_CMD_SEND_ABORT, /* request abort generation */ + RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ + RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ +}; + +/* + * wait for space to appear in the transmit/ACK window + * - caller holds the socket locked + */ +static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, + struct rxrpc_call *call, + long *timeo) +{ + DECLARE_WAITQUEUE(myself, current); + int ret; + + _enter(",{%u,%u,%u}", + call->tx_hard_ack, call->tx_top, call->tx_winsize); + + add_wait_queue(&call->waitq, &myself); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + ret = 0; + if (call->tx_top - call->tx_hard_ack < + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) + break; + if (call->state >= RXRPC_CALL_COMPLETE) { + ret = -call->error; + break; + } + if (signal_pending(current)) { + ret = sock_intr_errno(*timeo); + break; + } + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); + release_sock(&rx->sk); + *timeo = schedule_timeout(*timeo); + lock_sock(&rx->sk); + } + + remove_wait_queue(&call->waitq, &myself); + set_current_state(TASK_RUNNING); + _leave(" = %d", ret); + return ret; +} + +/* + * Schedule an instant Tx resend. + */ +static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix) +{ + spin_lock_bh(&call->lock); + + if (call->state < RXRPC_CALL_COMPLETE) { + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); + } + + spin_unlock_bh(&call->lock); +} + +/* + * Queue a DATA packet for transmission, set the resend timeout and send the + * packet immediately + */ +static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, + bool last) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_seq_t seq = sp->hdr.seq; + int ret, ix; + u8 annotation = RXRPC_TX_ANNO_UNACK; + + _net("queue skb %p [%d]", skb, seq); + + ASSERTCMP(seq, ==, call->tx_top + 1); + + if (last) + annotation |= RXRPC_TX_ANNO_LAST; + + /* We have to set the timestamp before queueing as the retransmit + * algorithm can see the packet as soon as we queue it. + */ + skb->tstamp = ktime_get_real(); + + ix = seq & RXRPC_RXTX_BUFF_MASK; + rxrpc_get_skb(skb, rxrpc_skb_tx_got); + call->rxtx_annotations[ix] = annotation; + smp_wmb(); + call->rxtx_buffer[ix] = skb; + call->tx_top = seq; + if (last) + trace_rxrpc_transmit(call, rxrpc_transmit_queue_last); + else + trace_rxrpc_transmit(call, rxrpc_transmit_queue); + + if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { + _debug("________awaiting reply/ACK__________"); + write_lock_bh(&call->state_lock); + switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + break; + case RXRPC_CALL_SERVER_ACK_REQUEST: + call->state = RXRPC_CALL_SERVER_SEND_REPLY; + if (!last) + break; + case RXRPC_CALL_SERVER_SEND_REPLY: + call->state = RXRPC_CALL_SERVER_AWAIT_ACK; + break; + default: + break; + } + write_unlock_bh(&call->state_lock); + } + + if (seq == 1 && rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + + ret = rxrpc_send_data_packet(call, skb, false); + if (ret < 0) { + _debug("need instant resend %d", ret); + rxrpc_instant_resend(call, ix); + } else { + ktime_t now = ktime_get_real(), resend_at; + + resend_at = ktime_add_ms(now, rxrpc_resend_timeout); + + if (ktime_before(resend_at, call->resend_at)) { + call->resend_at = resend_at; + rxrpc_set_timer(call, rxrpc_timer_set_for_send, now); + } + } + + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + _leave(""); +} + +/* + * send data through a socket + * - must be called in process context + * - caller holds the socket locked + */ +static int rxrpc_send_data(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct msghdr *msg, size_t len) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + struct sock *sk = &rx->sk; + long timeo; + bool more; + int ret, copied; + + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + /* this should be in poll */ + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); + + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + return -EPIPE; + + more = msg->msg_flags & MSG_MORE; + + skb = call->tx_pending; + call->tx_pending = NULL; + rxrpc_see_skb(skb, rxrpc_skb_tx_seen); + + copied = 0; + do { + /* Check to see if there's a ping ACK to reply to. */ + if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + + if (!skb) { + size_t size, chunk, max, space; + + _debug("alloc"); + + if (call->tx_top - call->tx_hard_ack >= + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) { + ret = -EAGAIN; + if (msg->msg_flags & MSG_DONTWAIT) + goto maybe_error; + ret = rxrpc_wait_for_tx_window(rx, call, + &timeo); + if (ret < 0) + goto maybe_error; + } + + max = RXRPC_JUMBO_DATALEN; + max -= call->conn->security_size; + max &= ~(call->conn->size_align - 1UL); + + chunk = max; + if (chunk > msg_data_left(msg) && !more) + chunk = msg_data_left(msg); + + space = chunk + call->conn->size_align; + space &= ~(call->conn->size_align - 1UL); + + size = space + call->conn->security_size; + + _debug("SIZE: %zu/%zu/%zu", chunk, space, size); + + /* create a buffer that we can retain until it's ACK'd */ + skb = sock_alloc_send_skb( + sk, size, msg->msg_flags & MSG_DONTWAIT, &ret); + if (!skb) + goto maybe_error; + + rxrpc_new_skb(skb, rxrpc_skb_tx_new); + + _debug("ALLOC SEND %p", skb); + + ASSERTCMP(skb->mark, ==, 0); + + _debug("HS: %u", call->conn->security_size); + skb_reserve(skb, call->conn->security_size); + skb->len += call->conn->security_size; + + sp = rxrpc_skb(skb); + sp->remain = chunk; + if (sp->remain > skb_tailroom(skb)) + sp->remain = skb_tailroom(skb); + + _net("skb: hr %d, tr %d, hl %d, rm %d", + skb_headroom(skb), + skb_tailroom(skb), + skb_headlen(skb), + sp->remain); + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + _debug("append"); + sp = rxrpc_skb(skb); + + /* append next segment of data to the current buffer */ + if (msg_data_left(msg) > 0) { + int copy = skb_tailroom(skb); + ASSERTCMP(copy, >, 0); + if (copy > msg_data_left(msg)) + copy = msg_data_left(msg); + if (copy > sp->remain) + copy = sp->remain; + + _debug("add"); + ret = skb_add_data(skb, &msg->msg_iter, copy); + _debug("added"); + if (ret < 0) + goto efault; + sp->remain -= copy; + skb->mark += copy; + copied += copy; + } + + /* check for the far side aborting the call or a network error + * occurring */ + if (call->state == RXRPC_CALL_COMPLETE) + goto call_terminated; + + /* add the packet to the send queue if it's now full */ + if (sp->remain <= 0 || + (msg_data_left(msg) == 0 && !more)) { + struct rxrpc_connection *conn = call->conn; + uint32_t seq; + size_t pad; + + /* pad out if we're using security */ + if (conn->security_ix) { + pad = conn->security_size + skb->mark; + pad = conn->size_align - pad; + pad &= conn->size_align - 1; + _debug("pad %zu", pad); + if (pad) + memset(skb_put(skb, pad), 0, pad); + } + + seq = call->tx_top + 1; + + sp->hdr.seq = seq; + sp->hdr._rsvd = 0; + sp->hdr.flags = conn->out_clientflag; + + if (msg_data_left(msg) == 0 && !more) + sp->hdr.flags |= RXRPC_LAST_PACKET; + else if (call->tx_top - call->tx_hard_ack < + call->tx_winsize) + sp->hdr.flags |= RXRPC_MORE_PACKETS; + + ret = conn->security->secure_packet( + call, skb, skb->mark, skb->head); + if (ret < 0) + goto out; + + rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); + skb = NULL; + } + } while (msg_data_left(msg) > 0); + +success: + ret = copied; +out: + call->tx_pending = skb; + _leave(" = %d", ret); + return ret; + +call_terminated: + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + _leave(" = %d", -call->error); + return -call->error; + +maybe_error: + if (copied) + goto success; + goto out; + +efault: + ret = -EFAULT; + goto out; +} + +/* + * extract control messages from the sendmsg() control buffer + */ +static int rxrpc_sendmsg_cmsg(struct msghdr *msg, + unsigned long *user_call_ID, + enum rxrpc_command *command, + u32 *abort_code, + bool *_exclusive) +{ + struct cmsghdr *cmsg; + bool got_user_ID = false; + int len; + + *command = RXRPC_CMD_SEND_DATA; + + if (msg->msg_controllen == 0) + return -EINVAL; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); + _debug("CMSG %d, %d, %d", + cmsg->cmsg_level, cmsg->cmsg_type, len); + + if (cmsg->cmsg_level != SOL_RXRPC) + continue; + + switch (cmsg->cmsg_type) { + case RXRPC_USER_CALL_ID: + if (msg->msg_flags & MSG_CMSG_COMPAT) { + if (len != sizeof(u32)) + return -EINVAL; + *user_call_ID = *(u32 *) CMSG_DATA(cmsg); + } else { + if (len != sizeof(unsigned long)) + return -EINVAL; + *user_call_ID = *(unsigned long *) + CMSG_DATA(cmsg); + } + _debug("User Call ID %lx", *user_call_ID); + got_user_ID = true; + break; + + case RXRPC_ABORT: + if (*command != RXRPC_CMD_SEND_DATA) + return -EINVAL; + *command = RXRPC_CMD_SEND_ABORT; + if (len != sizeof(*abort_code)) + return -EINVAL; + *abort_code = *(unsigned int *) CMSG_DATA(cmsg); + _debug("Abort %x", *abort_code); + if (*abort_code == 0) + return -EINVAL; + break; + + case RXRPC_ACCEPT: + if (*command != RXRPC_CMD_SEND_DATA) + return -EINVAL; + *command = RXRPC_CMD_ACCEPT; + if (len != 0) + return -EINVAL; + break; + + case RXRPC_EXCLUSIVE_CALL: + *_exclusive = true; + if (len != 0) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + + if (!got_user_ID) + return -EINVAL; + _leave(" = 0"); + return 0; +} + +/* + * Create a new client call for sendmsg(). + */ +static struct rxrpc_call * +rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, + unsigned long user_call_ID, bool exclusive) +{ + struct rxrpc_conn_parameters cp; + struct rxrpc_call *call; + struct key *key; + + DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name); + + _enter(""); + + if (!msg->msg_name) + return ERR_PTR(-EDESTADDRREQ); + + key = rx->key; + if (key && !rx->key->payload.data[0]) + key = NULL; + + memset(&cp, 0, sizeof(cp)); + cp.local = rx->local; + cp.key = rx->key; + cp.security_level = rx->min_sec_level; + cp.exclusive = rx->exclusive | exclusive; + cp.service_id = srx->srx_service; + call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); + + _leave(" = %p\n", call); + return call; +} + +/* + * send a message forming part of a client call through an RxRPC socket + * - caller holds the socket locked + * - the socket may be either a client socket or a server socket + */ +int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) +{ + enum rxrpc_command cmd; + struct rxrpc_call *call; + unsigned long user_call_ID = 0; + bool exclusive = false; + u32 abort_code = 0; + int ret; + + _enter(""); + + ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, + &exclusive); + if (ret < 0) + return ret; + + if (cmd == RXRPC_CMD_ACCEPT) { + if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) + return -EINVAL; + call = rxrpc_accept_call(rx, user_call_ID, NULL); + if (IS_ERR(call)) + return PTR_ERR(call); + rxrpc_put_call(call, rxrpc_call_put); + return 0; + } + + call = rxrpc_find_call_by_user_ID(rx, user_call_ID); + if (!call) { + if (cmd != RXRPC_CMD_SEND_DATA) + return -EBADSLT; + call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, + exclusive); + if (IS_ERR(call)) + return PTR_ERR(call); + } + + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, call->state, call->conn); + + if (call->state >= RXRPC_CALL_COMPLETE) { + /* it's too late for this call */ + ret = -ESHUTDOWN; + } else if (cmd == RXRPC_CMD_SEND_ABORT) { + ret = 0; + if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED)) + ret = rxrpc_send_call_packet(call, + RXRPC_PACKET_TYPE_ABORT); + } else if (cmd != RXRPC_CMD_SEND_DATA) { + ret = -EINVAL; + } else if (rxrpc_is_client_call(call) && + call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) { + /* request phase complete for this client call */ + ret = -EPROTO; + } else if (rxrpc_is_service_call(call) && + call->state != RXRPC_CALL_SERVER_ACK_REQUEST && + call->state != RXRPC_CALL_SERVER_SEND_REPLY) { + /* Reply phase not begun or not complete for service call. */ + ret = -EPROTO; + } else { + ret = rxrpc_send_data(rx, call, msg, len); + } + + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = %d", ret); + return ret; +} + +/** + * rxrpc_kernel_send_data - Allow a kernel service to send data on a call + * @sock: The socket the call is on + * @call: The call to send data through + * @msg: The data to send + * @len: The amount of data to send + * + * Allow a kernel service to send data on a call. The call must be in an state + * appropriate to sending data. No control data should be supplied in @msg, + * nor should an address be supplied. MSG_MORE should be flagged if there's + * more data to come, otherwise this data will end the transmission phase. + */ +int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, size_t len) +{ + int ret; + + _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); + + ASSERTCMP(msg->msg_name, ==, NULL); + ASSERTCMP(msg->msg_control, ==, NULL); + + lock_sock(sock->sk); + + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, call->state, call->conn); + + if (call->state >= RXRPC_CALL_COMPLETE) { + ret = -ESHUTDOWN; /* it's too late for this call */ + } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && + call->state != RXRPC_CALL_SERVER_ACK_REQUEST && + call->state != RXRPC_CALL_SERVER_SEND_REPLY) { + ret = -EPROTO; /* request phase complete for this client call */ + } else { + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); + } + + release_sock(sock->sk); + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(rxrpc_kernel_send_data); + +/** + * rxrpc_kernel_abort_call - Allow a kernel service to abort a call + * @sock: The socket the call is on + * @call: The call to be aborted + * @abort_code: The abort code to stick into the ABORT packet + * @error: Local error value + * @why: 3-char string indicating why. + * + * Allow a kernel service to abort a call, if it's still in an abortable state. + */ +void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, + u32 abort_code, int error, const char *why) +{ + _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); + + lock_sock(sock->sk); + + if (rxrpc_abort_call(why, call, 0, abort_code, error)) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + + release_sock(sock->sk); + _leave(""); +} + +EXPORT_SYMBOL(rxrpc_kernel_abort_call); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 06c51d4b622d..67b02c45271b 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -18,148 +18,82 @@ #include <net/af_rxrpc.h> #include "ar-internal.h" +#define select_skb_count(op) (op >= rxrpc_skb_tx_cleaned ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs) + /* - * set up for the ACK at the end of the receive phase when we discard the final - * receive phase data packet - * - called with softirqs disabled + * Note the allocation or reception of a socket buffer. */ -static void rxrpc_request_final_ACK(struct rxrpc_call *call) +void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { - /* the call may be aborted before we have a chance to ACK it */ - write_lock(&call->state_lock); - - switch (call->state) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - call->state = RXRPC_CALL_CLIENT_FINAL_ACK; - _debug("request final ACK"); - - /* get an extra ref on the call for the final-ACK generator to - * release */ - rxrpc_get_call(call); - set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - if (try_to_del_timer_sync(&call->ack_timer) >= 0) - rxrpc_queue_call(call); - break; - - case RXRPC_CALL_SERVER_RECV_REQUEST: - call->state = RXRPC_CALL_SERVER_ACK_REQUEST; - default: - break; - } - - write_unlock(&call->state_lock); + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); } /* - * drop the bottom ACK off of the call ACK window and advance the window + * Note the re-emergence of a socket buffer from a queue or buffer. */ -static void rxrpc_hard_ACK_data(struct rxrpc_call *call, - struct rxrpc_skb_priv *sp) +void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { - int loop; - u32 seq; - - spin_lock_bh(&call->lock); - - _debug("hard ACK #%u", sp->hdr.seq); - - for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { - call->ackr_window[loop] >>= 1; - call->ackr_window[loop] |= - call->ackr_window[loop + 1] << (BITS_PER_LONG - 1); - } - - seq = sp->hdr.seq; - ASSERTCMP(seq, ==, call->rx_data_eaten + 1); - call->rx_data_eaten = seq; - - if (call->ackr_win_top < UINT_MAX) - call->ackr_win_top++; - - ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE, - call->rx_data_post, >=, call->rx_data_recv); - ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE, - call->rx_data_recv, >=, call->rx_data_eaten); - - if (sp->hdr.flags & RXRPC_LAST_PACKET) { - rxrpc_request_final_ACK(call); - } else if (atomic_dec_and_test(&call->ackr_not_idle) && - test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) { - /* We previously soft-ACK'd some received packets that have now - * been consumed, so send a hard-ACK if no more packets are - * immediately forthcoming to allow the transmitter to free up - * its Tx bufferage. - */ - _debug("send Rx idle ACK"); - __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial, - false); + const void *here = __builtin_return_address(0); + if (skb) { + int n = atomic_read(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); } - - spin_unlock_bh(&call->lock); } -/** - * rxrpc_kernel_data_consumed - Record consumption of data message - * @call: The call to which the message pertains. - * @skb: Message holding data - * - * Record the consumption of a data message and generate an ACK if appropriate. - * The call state is shifted if this was the final packet. The caller must be - * in process context with no spinlocks held. - * - * TODO: Actually generate the ACK here rather than punting this to the - * workqueue. +/* + * Note the addition of a ref on a socket buffer. */ -void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb) +void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq); - - ASSERTCMP(sp->call, ==, call); - ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA); - - /* TODO: Fix the sequence number tracking */ - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); - - call->rx_data_recv = sp->hdr.seq; - rxrpc_hard_ACK_data(call, sp); + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + skb_get(skb); } -EXPORT_SYMBOL(rxrpc_kernel_data_consumed); /* - * Destroy a packet that has an RxRPC control buffer + * Note the destruction of a socket buffer. */ -void rxrpc_packet_destructor(struct sk_buff *skb) +void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call = sp->call; - - _enter("%p{%p}", skb, call); - - if (call) { - if (atomic_dec_return(&call->skb_count) < 0) - BUG(); - rxrpc_put_call(call); - sp->call = NULL; + const void *here = __builtin_return_address(0); + if (skb) { + int n; + CHECK_SLAB_OKAY(&skb->users); + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + kfree_skb(skb); } +} - if (skb->sk) - sock_rfree(skb); - _leave(""); +/* + * Note the injected loss of a socket buffer. + */ +void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + if (skb) { + int n; + CHECK_SLAB_OKAY(&skb->users); + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + kfree_skb(skb); + } } -/** - * rxrpc_kernel_free_skb - Free an RxRPC socket buffer - * @skb: The socket buffer to be freed - * - * Let RxRPC free its own socket buffer, permitting it to maintain debug - * accounting. +/* + * Clear a queue of socket buffers. */ -void rxrpc_kernel_free_skb(struct sk_buff *skb) +void rxrpc_purge_queue(struct sk_buff_head *list) { - rxrpc_free_skb(skb); + const void *here = __builtin_return_address(0); + struct sk_buff *skb; + while ((skb = skb_dequeue((list))) != NULL) { + int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged)); + trace_rxrpc_skb(skb, rxrpc_skb_rx_purged, + atomic_read(&skb->users), n, here); + kfree_skb(skb); + } } -EXPORT_SYMBOL(rxrpc_kernel_free_skb); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 03ad08774d4e..34c706d2f79c 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -20,7 +20,7 @@ static const unsigned int one = 1; static const unsigned int four = 4; static const unsigned int thirtytwo = 32; static const unsigned int n_65535 = 65535; -static const unsigned int n_max_acks = RXRPC_MAXACKS; +static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; /* * RxRPC operating parameters. @@ -35,7 +35,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_requested_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&zero, }, { @@ -43,7 +43,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_soft_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { @@ -51,7 +51,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_idle_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { @@ -59,6 +59,22 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_resend_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = (void *)&one, + }, + { + .procname = "idle_conn_expiry", + .data = &rxrpc_conn_idle_client_expiry, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&one, + }, + { + .procname = "idle_conn_fast_expiry", + .data = &rxrpc_conn_idle_client_fast_expiry, + .maxlen = sizeof(unsigned int), + .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, .extra1 = (void *)&one, }, @@ -69,29 +85,28 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_max_call_lifetime, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, + + /* Non-time values */ { - .procname = "dead_call_expiry", - .data = &rxrpc_dead_call_expiry, + .procname = "max_client_conns", + .data = &rxrpc_max_client_connections, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - .extra1 = (void *)&one, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&rxrpc_reap_client_connections, }, - - /* Values measured in seconds */ { - .procname = "connection_expiry", - .data = &rxrpc_connection_expiry, + .procname = "reap_client_conns", + .data = &rxrpc_reap_client_connections, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_minmax, .extra1 = (void *)&one, + .extra2 = (void *)&rxrpc_max_client_connections, }, - - /* Non-time values */ { .procname = "max_backlog", .data = &rxrpc_max_backlog, diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c index b88914d53ca5..ff7af71c4b49 100644 --- a/net/rxrpc/utils.c +++ b/net/rxrpc/utils.c @@ -30,6 +30,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; return 0; +#ifdef CONFIG_AF_RXRPC_IPV6 case ETH_P_IPV6: srx->transport_type = SOCK_DGRAM; srx->transport_len = sizeof(srx->transport.sin6); @@ -37,6 +38,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) srx->transport.sin6.sin6_port = udp_hdr(skb)->source; srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr; return 0; +#endif default: pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n", |