summaryrefslogtreecommitdiffstats
path: root/net/rxrpc
diff options
context:
space:
mode:
Diffstat (limited to 'net/rxrpc')
-rw-r--r--net/rxrpc/Kconfig14
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c175
-rw-r--r--net/rxrpc/ar-internal.h832
-rw-r--r--net/rxrpc/call_accept.c717
-rw-r--r--net/rxrpc/call_event.c1426
-rw-r--r--net/rxrpc/call_object.c796
-rw-r--r--net/rxrpc/conn_client.c993
-rw-r--r--net/rxrpc/conn_event.c271
-rw-r--r--net/rxrpc/conn_object.c204
-rw-r--r--net/rxrpc/conn_service.c117
-rw-r--r--net/rxrpc/input.c1399
-rw-r--r--net/rxrpc/insecure.c26
-rw-r--r--net/rxrpc/local_event.c19
-rw-r--r--net/rxrpc/local_object.c51
-rw-r--r--net/rxrpc/misc.c192
-rw-r--r--net/rxrpc/output.c933
-rw-r--r--net/rxrpc/peer_event.c103
-rw-r--r--net/rxrpc/peer_object.c199
-rw-r--r--net/rxrpc/proc.c72
-rw-r--r--net/rxrpc/recvmsg.c866
-rw-r--r--net/rxrpc/rxkad.c209
-rw-r--r--net/rxrpc/security.c18
-rw-r--r--net/rxrpc/sendmsg.c606
-rw-r--r--net/rxrpc/skbuff.c174
-rw-r--r--net/rxrpc/sysctl.c45
-rw-r--r--net/rxrpc/utils.c2
27 files changed, 6026 insertions, 4434 deletions
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 784c53163b7b..86f8853a038c 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -19,6 +19,20 @@ config AF_RXRPC
See Documentation/networking/rxrpc.txt.
+config AF_RXRPC_IPV6
+ bool "IPv6 support for RxRPC"
+ depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC)
+ help
+ Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as
+ its network transport.
+
+config AF_RXRPC_INJECT_LOSS
+ bool "Inject packet loss into RxRPC packet stream"
+ depends on AF_RXRPC
+ help
+ Say Y here to inject packet loss by discarding some received and some
+ transmitted packets.
+
config AF_RXRPC_DEBUG
bool "RxRPC dynamic debugging"
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index 10f3f48a16a8..8fc6ea347182 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -22,6 +22,7 @@ af-rxrpc-y := \
peer_object.o \
recvmsg.o \
security.o \
+ sendmsg.o \
skbuff.o \
utils.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 88effadd4b16..44c9c2b0b190 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -16,12 +16,14 @@
#include <linux/net.h>
#include <linux/slab.h>
#include <linux/skbuff.h>
+#include <linux/random.h>
#include <linux/poll.h>
#include <linux/proc_fs.h>
#include <linux/key-type.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
+#define CREATE_TRACE_POINTS
#include "ar-internal.h"
MODULE_DESCRIPTION("RxRPC network protocol");
@@ -43,7 +45,7 @@ u32 rxrpc_epoch;
atomic_t rxrpc_debug_id;
/* count of skbs currently in use */
-atomic_t rxrpc_n_skbs;
+atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
struct workqueue_struct *rxrpc_workqueue;
@@ -104,19 +106,25 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx,
case AF_INET:
if (srx->transport_len < sizeof(struct sockaddr_in))
return -EINVAL;
- _debug("INET: %x @ %pI4",
- ntohs(srx->transport.sin.sin_port),
- &srx->transport.sin.sin_addr);
tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad);
break;
+#ifdef CONFIG_AF_RXRPC_IPV6
case AF_INET6:
+ if (srx->transport_len < sizeof(struct sockaddr_in6))
+ return -EINVAL;
+ tail = offsetof(struct sockaddr_rxrpc, transport) +
+ sizeof(struct sockaddr_in6);
+ break;
+#endif
+
default:
return -EAFNOSUPPORT;
}
if (tail < len)
memset((void *)srx + tail, 0, len - tail);
+ _debug("INET: %pISp", &srx->transport);
return 0;
}
@@ -128,7 +136,8 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr;
struct sock *sk = sock->sk;
struct rxrpc_local *local;
- struct rxrpc_sock *rx = rxrpc_sk(sk), *prx;
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ u16 service_id = srx->srx_service;
int ret;
_enter("%p,%p,%d", rx, saddr, len);
@@ -152,16 +161,13 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
goto error_unlock;
}
- if (rx->srx.srx_service) {
- write_lock_bh(&local->services_lock);
- list_for_each_entry(prx, &local->services, listen_link) {
- if (prx->srx.srx_service == rx->srx.srx_service)
- goto service_in_use;
- }
-
+ if (service_id) {
+ write_lock(&local->services_lock);
+ if (rcu_access_pointer(local->service))
+ goto service_in_use;
rx->local = local;
- list_add_tail(&rx->listen_link, &local->services);
- write_unlock_bh(&local->services_lock);
+ rcu_assign_pointer(local->service, rx);
+ write_unlock(&local->services_lock);
rx->sk.sk_state = RXRPC_SERVER_BOUND;
} else {
@@ -174,7 +180,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
return 0;
service_in_use:
- write_unlock_bh(&local->services_lock);
+ write_unlock(&local->services_lock);
rxrpc_put_local(local);
ret = -EADDRINUSE;
error_unlock:
@@ -191,7 +197,7 @@ static int rxrpc_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
struct rxrpc_sock *rx = rxrpc_sk(sk);
- unsigned int max;
+ unsigned int max, old;
int ret;
_enter("%p,%d", rx, backlog);
@@ -210,9 +216,13 @@ static int rxrpc_listen(struct socket *sock, int backlog)
backlog = max;
else if (backlog < 0 || backlog > max)
break;
+ old = sk->sk_max_ack_backlog;
sk->sk_max_ack_backlog = backlog;
- rx->sk.sk_state = RXRPC_SERVER_LISTENING;
- ret = 0;
+ ret = rxrpc_service_prealloc(rx, GFP_KERNEL);
+ if (ret == 0)
+ rx->sk.sk_state = RXRPC_SERVER_LISTENING;
+ else
+ sk->sk_max_ack_backlog = old;
break;
default:
ret = -EBUSY;
@@ -230,6 +240,8 @@ static int rxrpc_listen(struct socket *sock, int backlog)
* @srx: The address of the peer to contact
* @key: The security context to use (defaults to socket setting)
* @user_call_ID: The ID to use
+ * @gfp: The allocation constraints
+ * @notify_rx: Where to send notifications instead of socket queue
*
* Allow a kernel service to begin a call on the nominated socket. This just
* sets up all the internal tracking structures and allocates connection and
@@ -242,7 +254,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
struct sockaddr_rxrpc *srx,
struct key *key,
unsigned long user_call_ID,
- gfp_t gfp)
+ gfp_t gfp,
+ rxrpc_notify_rx_t notify_rx)
{
struct rxrpc_conn_parameters cp;
struct rxrpc_call *call;
@@ -269,6 +282,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
cp.exclusive = false;
cp.service_id = srx->srx_service;
call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp);
+ if (!IS_ERR(call))
+ call->notify_rx = notify_rx;
release_sock(&rx->sk);
_leave(" = %p", call);
@@ -278,40 +293,39 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call);
/**
* rxrpc_kernel_end_call - Allow a kernel service to end a call it was using
+ * @sock: The socket the call is on
* @call: The call to end
*
* Allow a kernel service to end a call it was using. The call must be
* complete before this is called (the call should be aborted if necessary).
*/
-void rxrpc_kernel_end_call(struct rxrpc_call *call)
+void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call)
{
_enter("%d{%d}", call->debug_id, atomic_read(&call->usage));
- rxrpc_remove_user_ID(call->socket, call);
- rxrpc_put_call(call);
+ rxrpc_release_call(rxrpc_sk(sock->sk), call);
+ rxrpc_put_call(call, rxrpc_call_put_kernel);
}
EXPORT_SYMBOL(rxrpc_kernel_end_call);
/**
- * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages
+ * rxrpc_kernel_new_call_notification - Get notifications of new calls
* @sock: The socket to intercept received messages on
- * @interceptor: The function to pass the messages to
+ * @notify_new_call: Function to be called when new calls appear
+ * @discard_new_call: Function to discard preallocated calls
*
- * Allow a kernel service to intercept messages heading for the Rx queue on an
- * RxRPC socket. They get passed to the specified function instead.
- * @interceptor should free the socket buffers it is given. @interceptor is
- * called with the socket receive queue spinlock held and softirqs disabled -
- * this ensures that the messages will be delivered in the right order.
+ * Allow a kernel service to be given notifications about new calls.
*/
-void rxrpc_kernel_intercept_rx_messages(struct socket *sock,
- rxrpc_interceptor_t interceptor)
+void rxrpc_kernel_new_call_notification(
+ struct socket *sock,
+ rxrpc_notify_new_call_t notify_new_call,
+ rxrpc_discard_new_call_t discard_new_call)
{
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
- _enter("");
- rx->interceptor = interceptor;
+ rx->notify_new_call = notify_new_call;
+ rx->discard_new_call = discard_new_call;
}
-
-EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages);
+EXPORT_SYMBOL(rxrpc_kernel_new_call_notification);
/*
* connect an RxRPC socket
@@ -391,6 +405,23 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
switch (rx->sk.sk_state) {
case RXRPC_UNBOUND:
+ rx->srx.srx_family = AF_RXRPC;
+ rx->srx.srx_service = 0;
+ rx->srx.transport_type = SOCK_DGRAM;
+ rx->srx.transport.family = rx->family;
+ switch (rx->family) {
+ case AF_INET:
+ rx->srx.transport_len = sizeof(struct sockaddr_in);
+ break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ rx->srx.transport_len = sizeof(struct sockaddr_in6);
+ break;
+#endif
+ default:
+ ret = -EAFNOSUPPORT;
+ goto error_unlock;
+ }
local = rxrpc_lookup_local(&rx->srx);
if (IS_ERR(local)) {
ret = PTR_ERR(local);
@@ -505,15 +536,16 @@ error:
static unsigned int rxrpc_poll(struct file *file, struct socket *sock,
poll_table *wait)
{
- unsigned int mask;
struct sock *sk = sock->sk;
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ unsigned int mask;
sock_poll_wait(file, sk_sleep(sk), wait);
mask = 0;
/* the socket is readable if there are any messages waiting on the Rx
* queue */
- if (!skb_queue_empty(&sk->sk_receive_queue))
+ if (!list_empty(&rx->recvmsg_q))
mask |= POLLIN | POLLRDNORM;
/* the socket is writable if there is space to add new data to the
@@ -540,7 +572,8 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
return -EAFNOSUPPORT;
/* we support transport protocol UDP/UDP6 only */
- if (protocol != PF_INET)
+ if (protocol != PF_INET &&
+ IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6)
return -EPROTONOSUPPORT;
if (sock->type != SOCK_DGRAM)
@@ -554,6 +587,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
return -ENOMEM;
sock_init_data(sock, sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
sk->sk_state = RXRPC_UNBOUND;
sk->sk_write_space = rxrpc_write_space;
sk->sk_max_ack_backlog = 0;
@@ -563,9 +597,11 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
rx->family = protocol;
rx->calls = RB_ROOT;
- INIT_LIST_HEAD(&rx->listen_link);
- INIT_LIST_HEAD(&rx->secureq);
- INIT_LIST_HEAD(&rx->acceptq);
+ spin_lock_init(&rx->incoming_lock);
+ INIT_LIST_HEAD(&rx->sock_calls);
+ INIT_LIST_HEAD(&rx->to_be_accepted);
+ INIT_LIST_HEAD(&rx->recvmsg_q);
+ rwlock_init(&rx->recvmsg_lock);
rwlock_init(&rx->call_lock);
memset(&rx->srx, 0, sizeof(rx->srx));
@@ -574,6 +610,39 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
}
/*
+ * Kill all the calls on a socket and shut it down.
+ */
+static int rxrpc_shutdown(struct socket *sock, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct rxrpc_sock *rx = rxrpc_sk(sk);
+ int ret = 0;
+
+ _enter("%p,%d", sk, flags);
+
+ if (flags != SHUT_RDWR)
+ return -EOPNOTSUPP;
+ if (sk->sk_state == RXRPC_CLOSE)
+ return -ESHUTDOWN;
+
+ lock_sock(sk);
+
+ spin_lock_bh(&sk->sk_receive_queue.lock);
+ if (sk->sk_state < RXRPC_CLOSE) {
+ sk->sk_state = RXRPC_CLOSE;
+ sk->sk_shutdown = SHUTDOWN_MASK;
+ } else {
+ ret = -ESHUTDOWN;
+ }
+ spin_unlock_bh(&sk->sk_receive_queue.lock);
+
+ rxrpc_discard_prealloc(rx);
+
+ release_sock(sk);
+ return ret;
+}
+
+/*
* RxRPC socket destructor
*/
static void rxrpc_sock_destructor(struct sock *sk)
@@ -609,15 +678,14 @@ static int rxrpc_release_sock(struct sock *sk)
sk->sk_state = RXRPC_CLOSE;
spin_unlock_bh(&sk->sk_receive_queue.lock);
- ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1);
-
- if (!list_empty(&rx->listen_link)) {
- write_lock_bh(&rx->local->services_lock);
- list_del(&rx->listen_link);
- write_unlock_bh(&rx->local->services_lock);
+ if (rx->local && rx->local->service == rx) {
+ write_lock(&rx->local->services_lock);
+ rx->local->service = NULL;
+ write_unlock(&rx->local->services_lock);
}
/* try to flush out this socket */
+ rxrpc_discard_prealloc(rx);
rxrpc_release_calls_on_socket(rx);
flush_workqueue(rxrpc_workqueue);
rxrpc_purge_queue(&sk->sk_receive_queue);
@@ -666,7 +734,7 @@ static const struct proto_ops rxrpc_rpc_ops = {
.poll = rxrpc_poll,
.ioctl = sock_no_ioctl,
.listen = rxrpc_listen,
- .shutdown = sock_no_shutdown,
+ .shutdown = rxrpc_shutdown,
.setsockopt = rxrpc_setsockopt,
.getsockopt = sock_no_getsockopt,
.sendmsg = rxrpc_sendmsg,
@@ -697,7 +765,13 @@ static int __init af_rxrpc_init(void)
BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb));
- rxrpc_epoch = get_seconds();
+ get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch));
+ rxrpc_epoch |= RXRPC_RANDOM_EPOCH;
+ get_random_bytes(&rxrpc_client_conn_ids.cur,
+ sizeof(rxrpc_client_conn_ids.cur));
+ rxrpc_client_conn_ids.cur &= 0x3fffffff;
+ if (rxrpc_client_conn_ids.cur == 0)
+ rxrpc_client_conn_ids.cur = 1;
ret = -ENOMEM;
rxrpc_call_jar = kmem_cache_create(
@@ -788,7 +862,8 @@ static void __exit af_rxrpc_exit(void)
proto_unregister(&rxrpc_proto);
rxrpc_destroy_all_calls();
rxrpc_destroy_all_connections();
- ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0);
+ ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0);
+ ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0);
rxrpc_destroy_all_locals();
remove_proc_entry("rxrpc_conns", init_net.proc_net);
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index ff83fb1ddd47..d38dffd78085 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -35,11 +35,23 @@ struct rxrpc_crypt {
#define rxrpc_queue_delayed_work(WS,D) \
queue_delayed_work(rxrpc_workqueue, (WS), (D))
-#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor)
-
struct rxrpc_connection;
/*
+ * Mark applied to socket buffers.
+ */
+enum rxrpc_skb_mark {
+ RXRPC_SKB_MARK_DATA, /* data message */
+ RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */
+ RXRPC_SKB_MARK_BUSY, /* server busy message */
+ RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */
+ RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */
+ RXRPC_SKB_MARK_NET_ERROR, /* network error message */
+ RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */
+ RXRPC_SKB_MARK_NEW_CALL, /* local error message */
+};
+
+/*
* sk_state for RxRPC sockets
*/
enum {
@@ -52,19 +64,44 @@ enum {
};
/*
+ * Service backlog preallocation.
+ *
+ * This contains circular buffers of preallocated peers, connections and calls
+ * for incoming service calls and their head and tail pointers. This allows
+ * calls to be set up in the data_ready handler, thereby avoiding the need to
+ * shuffle packets around so much.
+ */
+struct rxrpc_backlog {
+ unsigned short peer_backlog_head;
+ unsigned short peer_backlog_tail;
+ unsigned short conn_backlog_head;
+ unsigned short conn_backlog_tail;
+ unsigned short call_backlog_head;
+ unsigned short call_backlog_tail;
+#define RXRPC_BACKLOG_MAX 32
+ struct rxrpc_peer *peer_backlog[RXRPC_BACKLOG_MAX];
+ struct rxrpc_connection *conn_backlog[RXRPC_BACKLOG_MAX];
+ struct rxrpc_call *call_backlog[RXRPC_BACKLOG_MAX];
+};
+
+/*
* RxRPC socket definition
*/
struct rxrpc_sock {
/* WARNING: sk has to be the first member */
struct sock sk;
- rxrpc_interceptor_t interceptor; /* kernel service Rx interceptor function */
+ rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */
+ rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */
struct rxrpc_local *local; /* local endpoint */
- struct list_head listen_link; /* link in the local endpoint's listen list */
- struct list_head secureq; /* calls awaiting connection security clearance */
- struct list_head acceptq; /* calls awaiting acceptance */
+ struct rxrpc_backlog *backlog; /* Preallocation for services */
+ spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */
+ struct list_head sock_calls; /* List of calls owned by this socket */
+ struct list_head to_be_accepted; /* calls awaiting acceptance */
+ struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */
+ rwlock_t recvmsg_lock; /* Lock for recvmsg_q */
struct key *key; /* security for this socket */
struct key *securities; /* list of server security descriptors */
- struct rb_root calls; /* outstanding calls on this socket */
+ struct rb_root calls; /* User ID -> call mapping */
unsigned long flags;
#define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */
rwlock_t call_lock; /* lock for calls */
@@ -103,13 +140,11 @@ struct rxrpc_host_header {
* - max 48 bytes (struct sk_buff::cb)
*/
struct rxrpc_skb_priv {
- struct rxrpc_call *call; /* call with which associated */
- unsigned long resend_at; /* time in jiffies at which to resend */
union {
- unsigned int offset; /* offset into buffer of next read */
+ u8 nr_jumbo; /* Number of jumbo subpackets */
+ };
+ union {
int remain; /* amount of space remaining for next write */
- u32 error; /* network error code */
- bool need_resend; /* T if needs resending */
};
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
@@ -117,13 +152,6 @@ struct rxrpc_skb_priv {
#define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb)
-enum rxrpc_command {
- RXRPC_CMD_SEND_DATA, /* send data message */
- RXRPC_CMD_SEND_ABORT, /* request abort generation */
- RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
- RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
-};
-
/*
* RxRPC security module interface
*/
@@ -150,7 +178,12 @@ struct rxrpc_security {
void *);
/* verify the security on a received packet */
- int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *);
+ int (*verify_packet)(struct rxrpc_call *, struct sk_buff *,
+ unsigned int, unsigned int, rxrpc_seq_t, u16);
+
+ /* Locate the data in a received packet that has been verified. */
+ void (*locate_data)(struct rxrpc_call *, struct sk_buff *,
+ unsigned int *, unsigned int *);
/* issue a challenge */
int (*issue_challenge)(struct rxrpc_connection *);
@@ -180,9 +213,8 @@ struct rxrpc_local {
struct list_head link;
struct socket *socket; /* my UDP socket */
struct work_struct processor;
- struct list_head services; /* services listening on this endpoint */
+ struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
- struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */
struct sk_buff_head reject_queue; /* packets awaiting rejection */
struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */
struct rb_root client_conns; /* Client connections by socket params */
@@ -220,10 +252,12 @@ struct rxrpc_peer {
/* calculated RTT cache */
#define RXRPC_RTT_CACHE_SIZE 32
- suseconds_t rtt; /* current RTT estimate (in uS) */
- unsigned int rtt_point; /* next entry at which to insert */
- unsigned int rtt_usage; /* amount of cache actually used */
- suseconds_t rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */
+ ktime_t rtt_last_req; /* Time of last RTT request */
+ u64 rtt; /* Current RTT estimate (in nS) */
+ u64 rtt_sum; /* Sum of cache contents */
+ u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */
+ u8 rtt_cursor; /* next entry at which to insert */
+ u8 rtt_usage; /* amount of cache actually used */
};
/*
@@ -255,6 +289,9 @@ enum rxrpc_conn_flag {
RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */
RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */
RXRPC_CONN_IN_CLIENT_CONNS, /* Conn is in local->client_conns */
+ RXRPC_CONN_EXPOSED, /* Conn has extra ref for exposure */
+ RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */
+ RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */
};
/*
@@ -265,17 +302,29 @@ enum rxrpc_conn_event {
};
/*
+ * The connection cache state.
+ */
+enum rxrpc_conn_cache_state {
+ RXRPC_CONN_CLIENT_INACTIVE, /* Conn is not yet listed */
+ RXRPC_CONN_CLIENT_WAITING, /* Conn is on wait list, waiting for capacity */
+ RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */
+ RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */
+ RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */
+ RXRPC_CONN__NR_CACHE_STATES
+};
+
+/*
* The connection protocol state.
*/
enum rxrpc_conn_proto_state {
RXRPC_CONN_UNUSED, /* Connection not yet attempted */
RXRPC_CONN_CLIENT, /* Client connection */
+ RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */
RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */
RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */
RXRPC_CONN_SERVICE, /* Service secured connection */
RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */
RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */
- RXRPC_CONN_NETWORK_ERROR, /* Conn terminated by network error */
RXRPC_CONN__NR_STATES
};
@@ -288,23 +337,33 @@ struct rxrpc_connection {
struct rxrpc_conn_proto proto;
struct rxrpc_conn_parameters params;
- spinlock_t channel_lock;
+ atomic_t usage;
+ struct rcu_head rcu;
+ struct list_head cache_link;
+ spinlock_t channel_lock;
+ unsigned char active_chans; /* Mask of active channels */
+#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1)
+ struct list_head waiting_calls; /* Calls waiting for channels */
struct rxrpc_channel {
struct rxrpc_call __rcu *call; /* Active call */
u32 call_id; /* ID of current call */
u32 call_counter; /* Call ID counter */
u32 last_call; /* ID of last call */
- u32 last_result; /* Result of last call (0/abort) */
+ u8 last_type; /* Type of last packet */
+ u16 last_service_id;
+ union {
+ u32 last_seq;
+ u32 last_abort;
+ };
} channels[RXRPC_MAXCALLS];
- wait_queue_head_t channel_wq; /* queue to wait for channel to become available */
- struct rcu_head rcu;
struct work_struct processor; /* connection event processor */
union {
struct rb_node client_node; /* Node in local->client_conns */
struct rb_node service_node; /* Node in peer->service_conns */
};
+ struct list_head proc_link; /* link in procfs list */
struct list_head link; /* link in master connection list */
struct sk_buff_head rx_queue; /* received conn-level packets */
const struct rxrpc_security *security; /* applied security module */
@@ -313,21 +372,18 @@ struct rxrpc_connection {
struct rxrpc_crypt csum_iv; /* packet checksum base */
unsigned long flags;
unsigned long events;
- unsigned long put_time; /* Time at which last put */
+ unsigned long idle_timestamp; /* Time at which last became idle */
spinlock_t state_lock; /* state-change lock */
- atomic_t usage;
- enum rxrpc_conn_proto_state state : 8; /* current state of connection */
+ enum rxrpc_conn_cache_state cache_state;
+ enum rxrpc_conn_proto_state state; /* current state of connection */
u32 local_abort; /* local abort code */
u32 remote_abort; /* remote abort code */
- int error; /* local error incurred */
int debug_id; /* debug ID for printks */
atomic_t serial; /* packet serial number counter */
- atomic_t hi_serial; /* highest serial number received */
- atomic_t avail_chans; /* number of channels available */
+ unsigned int hi_serial; /* highest serial number received */
+ u32 security_nonce; /* response re-use preventer */
u8 size_align; /* data size alignment (for security) */
- u8 header_size; /* rxrpc + security header size */
u8 security_size; /* security header size */
- u32 security_nonce; /* response re-use preventer */
u8 security_ix; /* security type */
u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */
};
@@ -337,37 +393,23 @@ struct rxrpc_connection {
*/
enum rxrpc_call_flag {
RXRPC_CALL_RELEASED, /* call has been released - no more message to userspace */
- RXRPC_CALL_TERMINAL_MSG, /* call has given the socket its final message */
- RXRPC_CALL_RCVD_LAST, /* all packets received */
- RXRPC_CALL_RUN_RTIMER, /* Tx resend timer started */
- RXRPC_CALL_TX_SOFT_ACK, /* sent some soft ACKs */
- RXRPC_CALL_PROC_BUSY, /* the processor is busy */
- RXRPC_CALL_INIT_ACCEPT, /* acceptance was initiated */
RXRPC_CALL_HAS_USERID, /* has a user ID attached */
- RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */
+ RXRPC_CALL_IS_SERVICE, /* Call is service call */
+ RXRPC_CALL_EXPOSED, /* The call was exposed to the world */
+ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */
+ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */
+ RXRPC_CALL_PINGING, /* Ping in process */
+ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */
};
/*
* Events that can be raised on a call.
*/
enum rxrpc_call_event {
- RXRPC_CALL_EV_RCVD_ACKALL, /* ACKALL or reply received */
- RXRPC_CALL_EV_RCVD_BUSY, /* busy packet received */
- RXRPC_CALL_EV_RCVD_ABORT, /* abort packet received */
- RXRPC_CALL_EV_RCVD_ERROR, /* network error received */
- RXRPC_CALL_EV_ACK_FINAL, /* need to generate final ACK (and release call) */
RXRPC_CALL_EV_ACK, /* need to generate ACK */
- RXRPC_CALL_EV_REJECT_BUSY, /* need to generate busy message */
RXRPC_CALL_EV_ABORT, /* need to generate abort */
- RXRPC_CALL_EV_CONN_ABORT, /* local connection abort generated */
- RXRPC_CALL_EV_RESEND_TIMER, /* Tx resend timer expired */
+ RXRPC_CALL_EV_TIMER, /* Timer expired */
RXRPC_CALL_EV_RESEND, /* Tx resend required */
- RXRPC_CALL_EV_DRAIN_RX_OOS, /* drain the Rx out of sequence queue */
- RXRPC_CALL_EV_LIFE_TIMER, /* call's lifetimer ran out */
- RXRPC_CALL_EV_ACCEPTED, /* incoming call accepted by userspace app */
- RXRPC_CALL_EV_SECURED, /* incoming call's connection is now secure */
- RXRPC_CALL_EV_POST_ACCEPT, /* need to post an "accept?" message to the app */
- RXRPC_CALL_EV_RELEASE, /* need to release the call's resources */
};
/*
@@ -379,20 +421,38 @@ enum rxrpc_call_state {
RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */
RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */
RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */
- RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */
+ RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */
RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */
RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */
RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */
RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */
RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */
RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */
- RXRPC_CALL_COMPLETE, /* - call completed */
- RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */
+ RXRPC_CALL_COMPLETE, /* - call complete */
+ NR__RXRPC_CALL_STATES
+};
+
+/*
+ * Call completion condition (state == RXRPC_CALL_COMPLETE).
+ */
+enum rxrpc_call_completion {
+ RXRPC_CALL_SUCCEEDED, /* - Normal termination */
RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
+ RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */
RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
- RXRPC_CALL_DEAD, /* - call is dead */
- NR__RXRPC_CALL_STATES
+ NR__RXRPC_CALL_COMPLETIONS
+};
+
+/*
+ * Call Tx congestion management modes.
+ */
+enum rxrpc_congest_mode {
+ RXRPC_CALL_SLOW_START,
+ RXRPC_CALL_CONGEST_AVOIDANCE,
+ RXRPC_CALL_PACKET_LOSS,
+ RXRPC_CALL_FAST_RETRANSMIT,
+ NR__RXRPC_CONGEST_MODES
};
/*
@@ -402,92 +462,329 @@ enum rxrpc_call_state {
struct rxrpc_call {
struct rcu_head rcu;
struct rxrpc_connection *conn; /* connection carrying call */
- struct rxrpc_sock *socket; /* socket responsible */
- struct timer_list lifetimer; /* lifetime remaining on call */
- struct timer_list deadspan; /* reap timer for re-ACK'ing, etc */
- struct timer_list ack_timer; /* ACK generation timer */
- struct timer_list resend_timer; /* Tx resend timer */
- struct work_struct destroyer; /* call destroyer */
- struct work_struct processor; /* packet processor and ACK generator */
+ struct rxrpc_peer *peer; /* Peer record for remote address */
+ struct rxrpc_sock __rcu *socket; /* socket responsible */
+ ktime_t ack_at; /* When deferred ACK needs to happen */
+ ktime_t resend_at; /* When next resend needs to happen */
+ ktime_t expire_at; /* When the call times out */
+ struct timer_list timer; /* Combined event timer */
+ struct work_struct processor; /* Event processor */
+ rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
struct list_head link; /* link in master call list */
+ struct list_head chan_wait_link; /* Link in conn->waiting_calls */
struct hlist_node error_link; /* link in error distribution list */
- struct list_head accept_link; /* calls awaiting acceptance */
- struct rb_node sock_node; /* node in socket call tree */
- struct sk_buff_head rx_queue; /* received packets */
- struct sk_buff_head rx_oos_queue; /* packets received out of sequence */
+ struct list_head accept_link; /* Link in rx->acceptq */
+ struct list_head recvmsg_link; /* Link in rx->recvmsg_q */
+ struct list_head sock_link; /* Link in rx->sock_calls */
+ struct rb_node sock_node; /* Node in rx->calls */
struct sk_buff *tx_pending; /* Tx socket buffer being filled */
- wait_queue_head_t tx_waitq; /* wait for Tx window space to become available */
+ wait_queue_head_t waitq; /* Wait queue for channel or Tx */
__be32 crypto_buf[2]; /* Temporary packet crypto buffer */
unsigned long user_call_ID; /* user-defined call ID */
- unsigned long creation_jif; /* time of call creation */
unsigned long flags;
unsigned long events;
spinlock_t lock;
rwlock_t state_lock; /* lock for state transition */
- atomic_t usage;
- atomic_t skb_count; /* Outstanding packets on this call */
- atomic_t sequence; /* Tx data packet sequence counter */
- u32 local_abort; /* local abort code */
- u32 remote_abort; /* remote abort code */
- int error_report; /* Network error (ICMP/local transport) */
+ u32 abort_code; /* Local/remote abort code */
int error; /* Local error incurred */
- enum rxrpc_call_state state : 8; /* current state of call */
+ enum rxrpc_call_state state; /* current state of call */
+ enum rxrpc_call_completion completion; /* Call completion condition */
+ atomic_t usage;
+ u16 service_id; /* service ID */
+ u8 security_ix; /* Security type */
+ u32 call_id; /* call ID on connection */
+ u32 cid; /* connection ID plus channel index */
int debug_id; /* debug ID for printks */
- u8 channel; /* connection channel occupied by this call */
-
- /* transmission-phase ACK management */
- u8 acks_head; /* offset into window of first entry */
- u8 acks_tail; /* offset into window of last entry */
- u8 acks_winsz; /* size of un-ACK'd window */
- u8 acks_unacked; /* lowest unacked packet in last ACK received */
- int acks_latest; /* serial number of latest ACK received */
- rxrpc_seq_t acks_hard; /* highest definitively ACK'd msg seq */
- unsigned long *acks_window; /* sent packet window
- * - elements are pointers with LSB set if ACK'd
+ unsigned short rx_pkt_offset; /* Current recvmsg packet offset */
+ unsigned short rx_pkt_len; /* Current recvmsg packet len */
+
+ /* Rx/Tx circular buffer, depending on phase.
+ *
+ * In the Rx phase, packets are annotated with 0 or the number of the
+ * segment of a jumbo packet each buffer refers to. There can be up to
+ * 47 segments in a maximum-size UDP packet.
+ *
+ * In the Tx phase, packets are annotated with which buffers have been
+ * acked.
+ */
+#define RXRPC_RXTX_BUFF_SIZE 64
+#define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1)
+#define RXRPC_INIT_RX_WINDOW_SIZE 32
+ struct sk_buff **rxtx_buffer;
+ u8 *rxtx_annotations;
+#define RXRPC_TX_ANNO_ACK 0
+#define RXRPC_TX_ANNO_UNACK 1
+#define RXRPC_TX_ANNO_NAK 2
+#define RXRPC_TX_ANNO_RETRANS 3
+#define RXRPC_TX_ANNO_MASK 0x03
+#define RXRPC_TX_ANNO_LAST 0x04
+#define RXRPC_TX_ANNO_RESENT 0x08
+
+#define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */
+#define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */
+#define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */
+ rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but
+ * not hard-ACK'd packet follows this.
+ */
+ rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */
+
+ /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS
+ * is fixed, we keep these numbers in terms of segments (ie. DATA
+ * packets) rather than bytes.
+ */
+#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN
+ u8 cong_cwnd; /* Congestion window size */
+ u8 cong_extra; /* Extra to send for congestion management */
+ u8 cong_ssthresh; /* Slow-start threshold */
+ enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */
+ u8 cong_dup_acks; /* Count of ACKs showing missing packets */
+ u8 cong_cumul_acks; /* Cumulative ACK count */
+ ktime_t cong_tstamp; /* Last time cwnd was changed */
+
+ rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not
+ * consumed packet follows this.
*/
+ rxrpc_seq_t rx_top; /* Highest Rx slot allocated. */
+ rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */
+ u8 rx_winsize; /* Size of Rx window */
+ u8 tx_winsize; /* Maximum size of Tx window */
+ bool tx_phase; /* T if transmission phase, F if receive phase */
+ u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */
/* receive-phase ACK management */
- rxrpc_seq_t rx_data_expect; /* next data seq ID expected to be received */
- rxrpc_seq_t rx_data_post; /* next data seq ID expected to be posted */
- rxrpc_seq_t rx_data_recv; /* last data seq ID encountered by recvmsg */
- rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */
- rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */
- rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */
- rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
u8 ackr_reason; /* reason to ACK */
+ u16 ackr_skew; /* skew on packet being ACK'd */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
- atomic_t ackr_not_idle; /* number of packets in Rx queue */
-
- /* received packet records, 1 bit per record */
-#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
- unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
+ rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */
+ rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */
+ rxrpc_seq_t ackr_seen; /* Highest packet shown seen */
+ rxrpc_serial_t ackr_ping; /* Last ping sent */
+ ktime_t ackr_ping_time; /* Time last ping sent */
- u8 in_clientflag; /* Copy of conn->in_clientflag */
- struct rxrpc_local *local; /* Local endpoint. */
- u32 call_id; /* call ID on connection */
- u32 cid; /* connection ID plus channel index */
- u32 epoch; /* epoch of this connection */
- u16 service_id; /* service ID */
+ /* transmission-phase ACK management */
+ ktime_t acks_latest_ts; /* Timestamp of latest ACK received */
+ rxrpc_serial_t acks_latest; /* serial number of latest ACK received */
+ rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */
};
/*
- * locally abort an RxRPC call
+ * Summary of a new ACK and the changes it made to the Tx buffer packet states.
*/
-static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code)
-{
- write_lock_bh(&call->state_lock);
- if (call->state < RXRPC_CALL_COMPLETE) {
- call->local_abort = abort_code;
- call->state = RXRPC_CALL_LOCALLY_ABORTED;
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- }
- write_unlock_bh(&call->state_lock);
-}
+struct rxrpc_ack_summary {
+ u8 ack_reason;
+ u8 nr_acks; /* Number of ACKs in packet */
+ u8 nr_nacks; /* Number of NACKs in packet */
+ u8 nr_new_acks; /* Number of new ACKs in packet */
+ u8 nr_new_nacks; /* Number of new NACKs in packet */
+ u8 nr_rot_new_acks; /* Number of rotated new ACKs */
+ bool new_low_nack; /* T if new low NACK found */
+ bool retrans_timeo; /* T if reTx due to timeout happened */
+ u8 flight_size; /* Number of unreceived transmissions */
+ /* Place to stash values for tracing */
+ enum rxrpc_congest_mode mode:8;
+ u8 cwnd;
+ u8 ssthresh;
+ u8 dup_acks;
+ u8 cumulative_acks;
+};
+
+enum rxrpc_skb_trace {
+ rxrpc_skb_rx_cleaned,
+ rxrpc_skb_rx_freed,
+ rxrpc_skb_rx_got,
+ rxrpc_skb_rx_lost,
+ rxrpc_skb_rx_received,
+ rxrpc_skb_rx_rotated,
+ rxrpc_skb_rx_purged,
+ rxrpc_skb_rx_seen,
+ rxrpc_skb_tx_cleaned,
+ rxrpc_skb_tx_freed,
+ rxrpc_skb_tx_got,
+ rxrpc_skb_tx_new,
+ rxrpc_skb_tx_rotated,
+ rxrpc_skb_tx_seen,
+ rxrpc_skb__nr_trace
+};
+
+extern const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7];
+
+enum rxrpc_conn_trace {
+ rxrpc_conn_new_client,
+ rxrpc_conn_new_service,
+ rxrpc_conn_queued,
+ rxrpc_conn_seen,
+ rxrpc_conn_got,
+ rxrpc_conn_put_client,
+ rxrpc_conn_put_service,
+ rxrpc_conn__nr_trace
+};
+
+extern const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4];
+
+enum rxrpc_client_trace {
+ rxrpc_client_activate_chans,
+ rxrpc_client_alloc,
+ rxrpc_client_chan_activate,
+ rxrpc_client_chan_disconnect,
+ rxrpc_client_chan_pass,
+ rxrpc_client_chan_unstarted,
+ rxrpc_client_cleanup,
+ rxrpc_client_count,
+ rxrpc_client_discard,
+ rxrpc_client_duplicate,
+ rxrpc_client_exposed,
+ rxrpc_client_replace,
+ rxrpc_client_to_active,
+ rxrpc_client_to_culled,
+ rxrpc_client_to_idle,
+ rxrpc_client_to_inactive,
+ rxrpc_client_to_waiting,
+ rxrpc_client_uncount,
+ rxrpc_client__nr_trace
+};
+
+extern const char rxrpc_client_traces[rxrpc_client__nr_trace][7];
+extern const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5];
+
+enum rxrpc_call_trace {
+ rxrpc_call_new_client,
+ rxrpc_call_new_service,
+ rxrpc_call_queued,
+ rxrpc_call_queued_ref,
+ rxrpc_call_seen,
+ rxrpc_call_connected,
+ rxrpc_call_release,
+ rxrpc_call_got,
+ rxrpc_call_got_userid,
+ rxrpc_call_got_kernel,
+ rxrpc_call_put,
+ rxrpc_call_put_userid,
+ rxrpc_call_put_kernel,
+ rxrpc_call_put_noqueue,
+ rxrpc_call_error,
+ rxrpc_call__nr_trace
+};
+
+extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4];
+
+enum rxrpc_transmit_trace {
+ rxrpc_transmit_wait,
+ rxrpc_transmit_queue,
+ rxrpc_transmit_queue_last,
+ rxrpc_transmit_rotate,
+ rxrpc_transmit_rotate_last,
+ rxrpc_transmit_await_reply,
+ rxrpc_transmit_end,
+ rxrpc_transmit__nr_trace
+};
+
+extern const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4];
+
+enum rxrpc_receive_trace {
+ rxrpc_receive_incoming,
+ rxrpc_receive_queue,
+ rxrpc_receive_queue_last,
+ rxrpc_receive_front,
+ rxrpc_receive_rotate,
+ rxrpc_receive_end,
+ rxrpc_receive__nr_trace
+};
+
+extern const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4];
+
+enum rxrpc_recvmsg_trace {
+ rxrpc_recvmsg_enter,
+ rxrpc_recvmsg_wait,
+ rxrpc_recvmsg_dequeue,
+ rxrpc_recvmsg_hole,
+ rxrpc_recvmsg_next,
+ rxrpc_recvmsg_cont,
+ rxrpc_recvmsg_full,
+ rxrpc_recvmsg_data_return,
+ rxrpc_recvmsg_terminal,
+ rxrpc_recvmsg_to_be_accepted,
+ rxrpc_recvmsg_return,
+ rxrpc_recvmsg__nr_trace
+};
+
+extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5];
+
+enum rxrpc_rtt_tx_trace {
+ rxrpc_rtt_tx_ping,
+ rxrpc_rtt_tx_data,
+ rxrpc_rtt_tx__nr_trace
+};
+
+extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5];
+
+enum rxrpc_rtt_rx_trace {
+ rxrpc_rtt_rx_ping_response,
+ rxrpc_rtt_rx_requested_ack,
+ rxrpc_rtt_rx__nr_trace
+};
+
+extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5];
+
+enum rxrpc_timer_trace {
+ rxrpc_timer_begin,
+ rxrpc_timer_init_for_reply,
+ rxrpc_timer_expired,
+ rxrpc_timer_set_for_ack,
+ rxrpc_timer_set_for_resend,
+ rxrpc_timer_set_for_send,
+ rxrpc_timer__nr_trace
+};
+
+extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8];
+
+enum rxrpc_propose_ack_trace {
+ rxrpc_propose_ack_client_tx_end,
+ rxrpc_propose_ack_input_data,
+ rxrpc_propose_ack_ping_for_lost_ack,
+ rxrpc_propose_ack_ping_for_lost_reply,
+ rxrpc_propose_ack_ping_for_params,
+ rxrpc_propose_ack_respond_to_ack,
+ rxrpc_propose_ack_respond_to_ping,
+ rxrpc_propose_ack_retry_tx,
+ rxrpc_propose_ack_rotate_rx,
+ rxrpc_propose_ack_terminal_ack,
+ rxrpc_propose_ack__nr_trace
+};
+
+enum rxrpc_propose_ack_outcome {
+ rxrpc_propose_ack_use,
+ rxrpc_propose_ack_update,
+ rxrpc_propose_ack_subsume,
+ rxrpc_propose_ack__nr_outcomes
+};
+
+extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8];
+extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes];
+
+enum rxrpc_congest_change {
+ rxrpc_cong_begin_retransmission,
+ rxrpc_cong_cleared_nacks,
+ rxrpc_cong_new_low_nack,
+ rxrpc_cong_no_change,
+ rxrpc_cong_progress,
+ rxrpc_cong_retransmit_again,
+ rxrpc_cong_rtt_window_end,
+ rxrpc_cong_saw_nack,
+ rxrpc_congest__nr_change
+};
+
+extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10];
+extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9];
+
+extern const char *const rxrpc_pkts[];
+extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4];
+
+#include <trace/events/rxrpc.h>
/*
* af_rxrpc.c
*/
-extern atomic_t rxrpc_n_skbs;
+extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs;
extern u32 rxrpc_epoch;
extern atomic_t rxrpc_debug_id;
extern struct workqueue_struct *rxrpc_workqueue;
@@ -495,70 +792,178 @@ extern struct workqueue_struct *rxrpc_workqueue;
/*
* call_accept.c
*/
+int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
+void rxrpc_discard_prealloc(struct rxrpc_sock *);
+struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *,
+ struct rxrpc_connection *,
+ struct sk_buff *);
void rxrpc_accept_incoming_calls(struct rxrpc_local *);
-struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long);
+struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long,
+ rxrpc_notify_rx_t);
int rxrpc_reject_call(struct rxrpc_sock *);
/*
* call_event.c
*/
-void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
-void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool);
+void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t);
+void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool,
+ enum rxrpc_propose_ack_trace);
void rxrpc_process_call(struct work_struct *);
/*
* call_object.c
*/
+extern const char *const rxrpc_call_states[];
+extern const char *const rxrpc_call_completions[];
extern unsigned int rxrpc_max_call_lifetime;
-extern unsigned int rxrpc_dead_call_expiry;
extern struct kmem_cache *rxrpc_call_jar;
extern struct list_head rxrpc_calls;
extern rwlock_t rxrpc_call_lock;
struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long);
+struct rxrpc_call *rxrpc_alloc_call(gfp_t);
struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct rxrpc_conn_parameters *,
struct sockaddr_rxrpc *,
unsigned long, gfp_t);
-struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *,
- struct rxrpc_connection *,
- struct sk_buff *);
-void rxrpc_release_call(struct rxrpc_call *);
+void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
+ struct sk_buff *);
+void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
void rxrpc_release_calls_on_socket(struct rxrpc_sock *);
-void __rxrpc_put_call(struct rxrpc_call *);
+bool __rxrpc_queue_call(struct rxrpc_call *);
+bool rxrpc_queue_call(struct rxrpc_call *);
+void rxrpc_see_call(struct rxrpc_call *);
+void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace);
+void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace);
+void rxrpc_cleanup_call(struct rxrpc_call *);
void __exit rxrpc_destroy_all_calls(void);
+static inline bool rxrpc_is_service_call(const struct rxrpc_call *call)
+{
+ return test_bit(RXRPC_CALL_IS_SERVICE, &call->flags);
+}
+
+static inline bool rxrpc_is_client_call(const struct rxrpc_call *call)
+{
+ return !rxrpc_is_service_call(call);
+}
+
+/*
+ * Transition a call to the complete state.
+ */
+static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error)
+{
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ call->abort_code = abort_code;
+ call->error = error;
+ call->completion = compl,
+ call->state = RXRPC_CALL_COMPLETE;
+ wake_up(&call->waitq);
+ return true;
+ }
+ return false;
+}
+
+static inline bool rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error)
+{
+ bool ret;
+
+ write_lock_bh(&call->state_lock);
+ ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
+ write_unlock_bh(&call->state_lock);
+ return ret;
+}
+
+/*
+ * Record that a call successfully completed.
+ */
+static inline bool __rxrpc_call_completed(struct rxrpc_call *call)
+{
+ return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
+}
+
+static inline bool rxrpc_call_completed(struct rxrpc_call *call)
+{
+ bool ret;
+
+ write_lock_bh(&call->state_lock);
+ ret = __rxrpc_call_completed(call);
+ write_unlock_bh(&call->state_lock);
+ return ret;
+}
+
+/*
+ * Record that a call is locally aborted.
+ */
+static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
+ rxrpc_seq_t seq,
+ u32 abort_code, int error)
+{
+ trace_rxrpc_abort(why, call->cid, call->call_id, seq,
+ abort_code, error);
+ return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
+ abort_code, error);
+}
+
+static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
+ rxrpc_seq_t seq, u32 abort_code, int error)
+{
+ bool ret;
+
+ write_lock_bh(&call->state_lock);
+ ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
+ write_unlock_bh(&call->state_lock);
+ return ret;
+}
+
/*
* conn_client.c
*/
+extern unsigned int rxrpc_max_client_connections;
+extern unsigned int rxrpc_reap_client_connections;
+extern unsigned int rxrpc_conn_idle_client_expiry;
+extern unsigned int rxrpc_conn_idle_client_fast_expiry;
extern struct idr rxrpc_client_conn_ids;
void rxrpc_destroy_client_conn_ids(void);
int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *,
struct sockaddr_rxrpc *, gfp_t);
-void rxrpc_unpublish_client_conn(struct rxrpc_connection *);
+void rxrpc_expose_client_call(struct rxrpc_call *);
+void rxrpc_disconnect_client_call(struct rxrpc_call *);
+void rxrpc_put_client_conn(struct rxrpc_connection *);
+void __exit rxrpc_destroy_all_client_connections(void);
/*
* conn_event.c
*/
void rxrpc_process_connection(struct work_struct *);
-void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *);
-void rxrpc_reject_packets(struct rxrpc_local *);
/*
* conn_object.c
*/
extern unsigned int rxrpc_connection_expiry;
extern struct list_head rxrpc_connections;
+extern struct list_head rxrpc_connection_proc_list;
extern rwlock_t rxrpc_connection_lock;
int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
struct rxrpc_connection *rxrpc_alloc_connection(gfp_t);
struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *,
struct sk_buff *);
-void __rxrpc_disconnect_call(struct rxrpc_call *);
+void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *);
void rxrpc_disconnect_call(struct rxrpc_call *);
-void rxrpc_put_connection(struct rxrpc_connection *);
+void rxrpc_kill_connection(struct rxrpc_connection *);
+bool rxrpc_queue_conn(struct rxrpc_connection *);
+void rxrpc_see_connection(struct rxrpc_connection *);
+void rxrpc_get_connection(struct rxrpc_connection *);
+struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *);
+void rxrpc_put_service_conn(struct rxrpc_connection *);
void __exit rxrpc_destroy_all_connections(void);
static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn)
@@ -571,24 +976,15 @@ static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn)
return !rxrpc_conn_is_client(conn);
}
-static inline void rxrpc_get_connection(struct rxrpc_connection *conn)
-{
- atomic_inc(&conn->usage);
-}
-
-static inline
-struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
+static inline void rxrpc_put_connection(struct rxrpc_connection *conn)
{
- return atomic_inc_not_zero(&conn->usage) ? conn : NULL;
-}
+ if (!conn)
+ return;
-static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
-{
- if (!rxrpc_get_connection_maybe(conn))
- return false;
- if (!rxrpc_queue_work(&conn->processor))
- rxrpc_put_connection(conn);
- return true;
+ if (rxrpc_conn_is_client(conn))
+ rxrpc_put_client_conn(conn);
+ else
+ rxrpc_put_service_conn(conn);
}
/*
@@ -596,17 +992,14 @@ static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn)
*/
struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *,
struct sk_buff *);
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *,
- struct sockaddr_rxrpc *,
- struct sk_buff *);
+struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t);
+void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *);
void rxrpc_unpublish_service_conn(struct rxrpc_connection *);
/*
* input.c
*/
void rxrpc_data_ready(struct sock *);
-int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool);
-void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
/*
* insecure.c
@@ -668,25 +1061,24 @@ extern unsigned int rxrpc_idle_ack_delay;
extern unsigned int rxrpc_rx_window_size;
extern unsigned int rxrpc_rx_mtu;
extern unsigned int rxrpc_rx_jumbo_max;
+extern unsigned int rxrpc_resend_timeout;
-extern const char *const rxrpc_pkts[];
extern const s8 rxrpc_ack_priority[];
-extern const char *rxrpc_acks(u8 reason);
-
/*
* output.c
*/
-extern unsigned int rxrpc_resend_timeout;
-
-int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *);
-int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
+int rxrpc_send_call_packet(struct rxrpc_call *, u8);
+int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool);
+void rxrpc_reject_packets(struct rxrpc_local *);
/*
* peer_event.c
*/
void rxrpc_error_report(struct sock *);
void rxrpc_peer_error_distributor(struct work_struct *);
+void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace,
+ rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t);
/*
* peer_object.c
@@ -696,10 +1088,13 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *,
struct sockaddr_rxrpc *, gfp_t);
struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t);
+struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *,
+ struct rxrpc_peer *);
-static inline void rxrpc_get_peer(struct rxrpc_peer *peer)
+static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer)
{
atomic_inc(&peer->usage);
+ return peer;
}
static inline
@@ -718,14 +1113,13 @@ static inline void rxrpc_put_peer(struct rxrpc_peer *peer)
/*
* proc.c
*/
-extern const char *const rxrpc_call_states[];
extern const struct file_operations rxrpc_call_seq_fops;
extern const struct file_operations rxrpc_connection_seq_fops;
/*
* recvmsg.c
*/
-void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *);
+void rxrpc_notify_socket(struct rxrpc_call *);
int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
/*
@@ -744,9 +1138,21 @@ int rxrpc_init_client_conn_security(struct rxrpc_connection *);
int rxrpc_init_server_conn_security(struct rxrpc_connection *);
/*
+ * sendmsg.c
+ */
+int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
+
+/*
* skbuff.c
*/
+void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *);
void rxrpc_packet_destructor(struct sk_buff *);
+void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace);
+void rxrpc_purge_queue(struct sk_buff_head *);
/*
* sysctl.c
@@ -764,6 +1170,23 @@ static inline void rxrpc_sysctl_exit(void) {}
*/
int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *);
+static inline bool before(u32 seq1, u32 seq2)
+{
+ return (s32)(seq1 - seq2) < 0;
+}
+static inline bool before_eq(u32 seq1, u32 seq2)
+{
+ return (s32)(seq1 - seq2) <= 0;
+}
+static inline bool after(u32 seq1, u32 seq2)
+{
+ return (s32)(seq1 - seq2) > 0;
+}
+static inline bool after_eq(u32 seq1, u32 seq2)
+{
+ return (s32)(seq1 - seq2) >= 0;
+}
+
/*
* debug tracing
*/
@@ -846,11 +1269,12 @@ do { \
#define ASSERTCMP(X, OP, Y) \
do { \
- unsigned long _x = (unsigned long)(X); \
- unsigned long _y = (unsigned long)(Y); \
+ __typeof__(X) _x = (X); \
+ __typeof__(Y) _y = (__typeof__(X))(Y); \
if (unlikely(!(_x OP _y))) { \
- pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
- _x, _x, #OP, _y, _y); \
+ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
+ (unsigned long)_x, (unsigned long)_x, #OP, \
+ (unsigned long)_y, (unsigned long)_y); \
BUG(); \
} \
} while (0)
@@ -865,11 +1289,12 @@ do { \
#define ASSERTIFCMP(C, X, OP, Y) \
do { \
- unsigned long _x = (unsigned long)(X); \
- unsigned long _y = (unsigned long)(Y); \
+ __typeof__(X) _x = (X); \
+ __typeof__(Y) _y = (__typeof__(X))(Y); \
if (unlikely((C) && !(_x OP _y))) { \
pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \
- _x, _x, #OP, _y, _y); \
+ (unsigned long)_x, (unsigned long)_x, #OP, \
+ (unsigned long)_y, (unsigned long)_y); \
BUG(); \
} \
} while (0)
@@ -893,54 +1318,3 @@ do { \
} while (0)
#endif /* __KDEBUGALL */
-
-/*
- * socket buffer accounting / leak finding
- */
-static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn)
-{
- //_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
- //atomic_inc(&rxrpc_n_skbs);
-}
-
-#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__)
-
-static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn)
-{
- //_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs));
- //atomic_dec(&rxrpc_n_skbs);
-}
-
-#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__)
-
-static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn)
-{
- if (skb) {
- CHECK_SLAB_OKAY(&skb->users);
- //_net("free skb %p %s [%d]",
- // skb, fn, atomic_read(&rxrpc_n_skbs));
- //atomic_dec(&rxrpc_n_skbs);
- kfree_skb(skb);
- }
-}
-
-#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__)
-
-static inline void rxrpc_purge_queue(struct sk_buff_head *list)
-{
- struct sk_buff *skb;
- while ((skb = skb_dequeue((list))) != NULL)
- rxrpc_free_skb(skb);
-}
-
-#define rxrpc_get_call(CALL) \
-do { \
- CHECK_SLAB_OKAY(&(CALL)->usage); \
- if (atomic_inc_return(&(CALL)->usage) == 1) \
- BUG(); \
-} while (0)
-
-#define rxrpc_put_call(CALL) \
-do { \
- __rxrpc_put_call(CALL); \
-} while (0)
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 9bae21e66d65..3cac231d8405 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -20,265 +20,409 @@
#include <linux/in6.h>
#include <linux/icmp.h>
#include <linux/gfp.h>
+#include <linux/circ_buf.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
#include "ar-internal.h"
/*
- * generate a connection-level abort
+ * Preallocate a single service call, connection and peer and, if possible,
+ * give them a user ID and attach the user's side of the ID to them.
*/
-static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx,
- struct rxrpc_wire_header *whdr)
+static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
+ struct rxrpc_backlog *b,
+ rxrpc_notify_rx_t notify_rx,
+ rxrpc_user_attach_call_t user_attach_call,
+ unsigned long user_call_ID, gfp_t gfp)
{
- struct msghdr msg;
- struct kvec iov[1];
- size_t len;
- int ret;
+ const void *here = __builtin_return_address(0);
+ struct rxrpc_call *call;
+ int max, tmp;
+ unsigned int size = RXRPC_BACKLOG_MAX;
+ unsigned int head, tail, call_head, call_tail;
+
+ max = rx->sk.sk_max_ack_backlog;
+ tmp = rx->sk.sk_ack_backlog;
+ if (tmp >= max) {
+ _leave(" = -ENOBUFS [full %u]", max);
+ return -ENOBUFS;
+ }
+ max -= tmp;
+
+ /* We don't need more conns and peers than we have calls, but on the
+ * other hand, we shouldn't ever use more peers than conns or conns
+ * than calls.
+ */
+ call_head = b->call_backlog_head;
+ call_tail = READ_ONCE(b->call_backlog_tail);
+ tmp = CIRC_CNT(call_head, call_tail, size);
+ if (tmp >= max) {
+ _leave(" = -ENOBUFS [enough %u]", tmp);
+ return -ENOBUFS;
+ }
+ max = tmp + 1;
+
+ head = b->peer_backlog_head;
+ tail = READ_ONCE(b->peer_backlog_tail);
+ if (CIRC_CNT(head, tail, size) < max) {
+ struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp);
+ if (!peer)
+ return -ENOMEM;
+ b->peer_backlog[head] = peer;
+ smp_store_release(&b->peer_backlog_head,
+ (head + 1) & (size - 1));
+ }
- _enter("%d,,", local->debug_id);
+ head = b->conn_backlog_head;
+ tail = READ_ONCE(b->conn_backlog_tail);
+ if (CIRC_CNT(head, tail, size) < max) {
+ struct rxrpc_connection *conn;
- whdr->type = RXRPC_PACKET_TYPE_BUSY;
- whdr->serial = htonl(1);
+ conn = rxrpc_prealloc_service_connection(gfp);
+ if (!conn)
+ return -ENOMEM;
+ b->conn_backlog[head] = conn;
+ smp_store_release(&b->conn_backlog_head,
+ (head + 1) & (size - 1));
- msg.msg_name = &srx->transport.sin;
- msg.msg_namelen = sizeof(srx->transport.sin);
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
+ trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+ atomic_read(&conn->usage), here);
+ }
- iov[0].iov_base = whdr;
- iov[0].iov_len = sizeof(*whdr);
+ /* Now it gets complicated, because calls get registered with the
+ * socket here, particularly if a user ID is preassigned by the user.
+ */
+ call = rxrpc_alloc_call(gfp);
+ if (!call)
+ return -ENOMEM;
+ call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
+ call->state = RXRPC_CALL_SERVER_PREALLOC;
- len = iov[0].iov_len;
+ trace_rxrpc_call(call, rxrpc_call_new_service,
+ atomic_read(&call->usage),
+ here, (const void *)user_call_ID);
- _proto("Tx BUSY %%1");
+ write_lock(&rx->call_lock);
+ if (user_attach_call) {
+ struct rxrpc_call *xcall;
+ struct rb_node *parent, **pp;
+
+ /* Check the user ID isn't already in use */
+ pp = &rx->calls.rb_node;
+ parent = NULL;
+ while (*pp) {
+ parent = *pp;
+ xcall = rb_entry(parent, struct rxrpc_call, sock_node);
+ if (user_call_ID < call->user_call_ID)
+ pp = &(*pp)->rb_left;
+ else if (user_call_ID > call->user_call_ID)
+ pp = &(*pp)->rb_right;
+ else
+ goto id_in_use;
+ }
- ret = kernel_sendmsg(local->socket, &msg, iov, 1, len);
- if (ret < 0) {
- _leave(" = -EAGAIN [sendmsg failed: %d]", ret);
- return -EAGAIN;
+ call->user_call_ID = user_call_ID;
+ call->notify_rx = notify_rx;
+ rxrpc_get_call(call, rxrpc_call_got_kernel);
+ user_attach_call(call, user_call_ID);
+ rxrpc_get_call(call, rxrpc_call_got_userid);
+ rb_link_node(&call->sock_node, parent, pp);
+ rb_insert_color(&call->sock_node, &rx->calls);
+ set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
}
- _leave(" = 0");
+ list_add(&call->sock_link, &rx->sock_calls);
+
+ write_unlock(&rx->call_lock);
+
+ write_lock(&rxrpc_call_lock);
+ list_add_tail(&call->link, &rxrpc_calls);
+ write_unlock(&rxrpc_call_lock);
+
+ b->call_backlog[call_head] = call;
+ smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1));
+ _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID);
return 0;
+
+id_in_use:
+ write_unlock(&rx->call_lock);
+ rxrpc_cleanup_call(call);
+ _leave(" = -EBADSLT");
+ return -EBADSLT;
}
/*
- * accept an incoming call that needs peer, transport and/or connection setting
- * up
+ * Preallocate sufficient service connections, calls and peers to cover the
+ * entire backlog of a socket. When a new call comes in, if we don't have
+ * sufficient of each available, the call gets rejected as busy or ignored.
+ *
+ * The backlog is replenished when a connection is accepted or rejected.
*/
-static int rxrpc_accept_incoming_call(struct rxrpc_local *local,
- struct rxrpc_sock *rx,
- struct sk_buff *skb,
- struct sockaddr_rxrpc *srx)
+int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp)
{
- struct rxrpc_connection *conn;
- struct rxrpc_skb_priv *sp, *nsp;
- struct rxrpc_call *call;
- struct sk_buff *notification;
- int ret;
+ struct rxrpc_backlog *b = rx->backlog;
- _enter("");
+ if (!b) {
+ b = kzalloc(sizeof(struct rxrpc_backlog), gfp);
+ if (!b)
+ return -ENOMEM;
+ rx->backlog = b;
+ }
+
+ if (rx->discard_new_call)
+ return 0;
+
+ while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0)
+ ;
- sp = rxrpc_skb(skb);
+ return 0;
+}
- /* get a notification message to send to the server app */
- notification = alloc_skb(0, GFP_NOFS);
- if (!notification) {
- _debug("no memory");
- ret = -ENOMEM;
- goto error_nofree;
+/*
+ * Discard the preallocation on a service.
+ */
+void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
+{
+ struct rxrpc_backlog *b = rx->backlog;
+ unsigned int size = RXRPC_BACKLOG_MAX, head, tail;
+
+ if (!b)
+ return;
+ rx->backlog = NULL;
+
+ /* Make sure that there aren't any incoming calls in progress before we
+ * clear the preallocation buffers.
+ */
+ spin_lock_bh(&rx->incoming_lock);
+ spin_unlock_bh(&rx->incoming_lock);
+
+ head = b->peer_backlog_head;
+ tail = b->peer_backlog_tail;
+ while (CIRC_CNT(head, tail, size) > 0) {
+ struct rxrpc_peer *peer = b->peer_backlog[tail];
+ kfree(peer);
+ tail = (tail + 1) & (size - 1);
}
- rxrpc_new_skb(notification);
- notification->mark = RXRPC_SKB_MARK_NEW_CALL;
-
- conn = rxrpc_incoming_connection(local, srx, skb);
- if (IS_ERR(conn)) {
- _debug("no conn");
- ret = PTR_ERR(conn);
- goto error;
+
+ head = b->conn_backlog_head;
+ tail = b->conn_backlog_tail;
+ while (CIRC_CNT(head, tail, size) > 0) {
+ struct rxrpc_connection *conn = b->conn_backlog[tail];
+ write_lock(&rxrpc_connection_lock);
+ list_del(&conn->link);
+ list_del(&conn->proc_link);
+ write_unlock(&rxrpc_connection_lock);
+ kfree(conn);
+ tail = (tail + 1) & (size - 1);
}
- call = rxrpc_incoming_call(rx, conn, skb);
- rxrpc_put_connection(conn);
- if (IS_ERR(call)) {
- _debug("no call");
- ret = PTR_ERR(call);
- goto error;
+ head = b->call_backlog_head;
+ tail = b->call_backlog_tail;
+ while (CIRC_CNT(head, tail, size) > 0) {
+ struct rxrpc_call *call = b->call_backlog[tail];
+ if (rx->discard_new_call) {
+ _debug("discard %lx", call->user_call_ID);
+ rx->discard_new_call(call, call->user_call_ID);
+ rxrpc_put_call(call, rxrpc_call_put_kernel);
+ }
+ rxrpc_call_completed(call);
+ rxrpc_release_call(rx, call);
+ rxrpc_put_call(call, rxrpc_call_put);
+ tail = (tail + 1) & (size - 1);
}
- /* attach the call to the socket */
- read_lock_bh(&local->services_lock);
- if (rx->sk.sk_state == RXRPC_CLOSE)
- goto invalid_service;
+ kfree(b);
+}
- write_lock(&rx->call_lock);
- if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) {
- rxrpc_get_call(call);
-
- spin_lock(&call->conn->state_lock);
- if (sp->hdr.securityIndex > 0 &&
- call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) {
- _debug("await conn sec");
- list_add_tail(&call->accept_link, &rx->secureq);
- call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
- set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
- rxrpc_queue_conn(call->conn);
- } else {
- _debug("conn ready");
- call->state = RXRPC_CALL_SERVER_ACCEPTING;
- list_add_tail(&call->accept_link, &rx->acceptq);
- rxrpc_get_call(call);
- atomic_inc(&call->skb_count);
- nsp = rxrpc_skb(notification);
- nsp->call = call;
-
- ASSERTCMP(atomic_read(&call->usage), >=, 3);
-
- _debug("notify");
- spin_lock(&call->lock);
- ret = rxrpc_queue_rcv_skb(call, notification, true,
- false);
- spin_unlock(&call->lock);
- notification = NULL;
- BUG_ON(ret < 0);
+/*
+ * Allocate a new incoming call from the prealloc pool, along with a connection
+ * and a peer as necessary.
+ */
+static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
+ struct rxrpc_local *local,
+ struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ struct rxrpc_backlog *b = rx->backlog;
+ struct rxrpc_peer *peer, *xpeer;
+ struct rxrpc_call *call;
+ unsigned short call_head, conn_head, peer_head;
+ unsigned short call_tail, conn_tail, peer_tail;
+ unsigned short call_count, conn_count;
+
+ /* #calls >= #conns >= #peers must hold true. */
+ call_head = smp_load_acquire(&b->call_backlog_head);
+ call_tail = b->call_backlog_tail;
+ call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX);
+ conn_head = smp_load_acquire(&b->conn_backlog_head);
+ conn_tail = b->conn_backlog_tail;
+ conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX);
+ ASSERTCMP(conn_count, >=, call_count);
+ peer_head = smp_load_acquire(&b->peer_backlog_head);
+ peer_tail = b->peer_backlog_tail;
+ ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=,
+ conn_count);
+
+ if (call_count == 0)
+ return NULL;
+
+ if (!conn) {
+ /* No connection. We're going to need a peer to start off
+ * with. If one doesn't yet exist, use a spare from the
+ * preallocation set. We dump the address into the spare in
+ * anticipation - and to save on stack space.
+ */
+ xpeer = b->peer_backlog[peer_tail];
+ if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0)
+ return NULL;
+
+ peer = rxrpc_lookup_incoming_peer(local, xpeer);
+ if (peer == xpeer) {
+ b->peer_backlog[peer_tail] = NULL;
+ smp_store_release(&b->peer_backlog_tail,
+ (peer_tail + 1) &
+ (RXRPC_BACKLOG_MAX - 1));
}
- spin_unlock(&call->conn->state_lock);
- _debug("queued");
+ /* Now allocate and set up the connection */
+ conn = b->conn_backlog[conn_tail];
+ b->conn_backlog[conn_tail] = NULL;
+ smp_store_release(&b->conn_backlog_tail,
+ (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
+ rxrpc_get_local(local);
+ conn->params.local = local;
+ conn->params.peer = peer;
+ rxrpc_see_connection(conn);
+ rxrpc_new_incoming_connection(conn, skb);
+ } else {
+ rxrpc_get_connection(conn);
}
- write_unlock(&rx->call_lock);
- _debug("process");
- rxrpc_fast_process_packet(call, skb);
+ /* And now we can allocate and set up a new call */
+ call = b->call_backlog[call_tail];
+ b->call_backlog[call_tail] = NULL;
+ smp_store_release(&b->call_backlog_tail,
+ (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1));
- _debug("done");
- read_unlock_bh(&local->services_lock);
- rxrpc_free_skb(notification);
- rxrpc_put_call(call);
- _leave(" = 0");
- return 0;
-
-invalid_service:
- _debug("invalid");
- read_unlock_bh(&local->services_lock);
-
- read_lock_bh(&call->state_lock);
- if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
- !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
- rxrpc_get_call(call);
- rxrpc_queue_call(call);
- }
- read_unlock_bh(&call->state_lock);
- rxrpc_put_call(call);
- ret = -ECONNREFUSED;
-error:
- rxrpc_free_skb(notification);
-error_nofree:
- _leave(" = %d", ret);
- return ret;
+ rxrpc_see_call(call);
+ call->conn = conn;
+ call->peer = rxrpc_get_peer(conn->params.peer);
+ return call;
}
/*
- * accept incoming calls that need peer, transport and/or connection setting up
- * - the packets we get are all incoming client DATA packets that have seq == 1
+ * Set up a new incoming call. Called in BH context with the RCU read lock
+ * held.
+ *
+ * If this is for a kernel service, when we allocate the call, it will have
+ * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the
+ * retainer ref obtained from the backlog buffer. Prealloc calls for userspace
+ * services only have the ref from the backlog buffer. We want to pass this
+ * ref to non-BH context to dispose of.
+ *
+ * If we want to report an error, we mark the skb with the packet type and
+ * abort code and return NULL.
*/
-void rxrpc_accept_incoming_calls(struct rxrpc_local *local)
+struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_connection *conn,
+ struct sk_buff *skb)
{
- struct rxrpc_skb_priv *sp;
- struct sockaddr_rxrpc srx;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_sock *rx;
- struct rxrpc_wire_header whdr;
- struct sk_buff *skb;
- int ret;
+ struct rxrpc_call *call;
+ u16 service_id = sp->hdr.serviceId;
- _enter("%d", local->debug_id);
+ _enter("");
- skb = skb_dequeue(&local->accept_queue);
- if (!skb) {
- _leave("\n");
- return;
+ /* Get the socket providing the service */
+ rx = rcu_dereference(local->service);
+ if (service_id == rx->srx.srx_service)
+ goto found_service;
+
+ trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_INVALID_OPERATION, EOPNOTSUPP);
+ skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+ skb->priority = RX_INVALID_OPERATION;
+ _leave(" = NULL [service]");
+ return NULL;
+
+found_service:
+ spin_lock(&rx->incoming_lock);
+ if (rx->sk.sk_state == RXRPC_CLOSE) {
+ trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber,
+ sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
+ skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+ skb->priority = RX_INVALID_OPERATION;
+ _leave(" = NULL [close]");
+ call = NULL;
+ goto out;
}
- _net("incoming call skb %p", skb);
-
- sp = rxrpc_skb(skb);
-
- /* Set up a response packet header in case we need it */
- whdr.epoch = htonl(sp->hdr.epoch);
- whdr.cid = htonl(sp->hdr.cid);
- whdr.callNumber = htonl(sp->hdr.callNumber);
- whdr.seq = htonl(sp->hdr.seq);
- whdr.serial = 0;
- whdr.flags = 0;
- whdr.type = 0;
- whdr.userStatus = 0;
- whdr.securityIndex = sp->hdr.securityIndex;
- whdr._rsvd = 0;
- whdr.serviceId = htons(sp->hdr.serviceId);
-
- if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
- goto drop;
-
- /* get the socket providing the service */
- read_lock_bh(&local->services_lock);
- list_for_each_entry(rx, &local->services, listen_link) {
- if (rx->srx.srx_service == sp->hdr.serviceId &&
- rx->sk.sk_state != RXRPC_CLOSE)
- goto found_service;
+ call = rxrpc_alloc_incoming_call(rx, local, conn, skb);
+ if (!call) {
+ skb->mark = RXRPC_SKB_MARK_BUSY;
+ _leave(" = NULL [busy]");
+ call = NULL;
+ goto out;
}
- read_unlock_bh(&local->services_lock);
- goto invalid_service;
-found_service:
- _debug("found service %hd", rx->srx.srx_service);
- if (sk_acceptq_is_full(&rx->sk))
- goto backlog_full;
- sk_acceptq_added(&rx->sk);
- sock_hold(&rx->sk);
- read_unlock_bh(&local->services_lock);
-
- ret = rxrpc_accept_incoming_call(local, rx, skb, &srx);
- if (ret < 0)
- sk_acceptq_removed(&rx->sk);
- sock_put(&rx->sk);
- switch (ret) {
- case -ECONNRESET: /* old calls are ignored */
- case -ECONNABORTED: /* aborted calls are reaborted or ignored */
- case 0:
- return;
- case -ECONNREFUSED:
- goto invalid_service;
- case -EBUSY:
- goto busy;
- case -EKEYREJECTED:
- goto security_mismatch;
+ trace_rxrpc_receive(call, rxrpc_receive_incoming,
+ sp->hdr.serial, sp->hdr.seq);
+
+ /* Make the call live. */
+ rxrpc_incoming_call(rx, call, skb);
+ conn = call->conn;
+
+ if (rx->notify_new_call)
+ rx->notify_new_call(&rx->sk, call, call->user_call_ID);
+ else
+ sk_acceptq_added(&rx->sk);
+
+ spin_lock(&conn->state_lock);
+ switch (conn->state) {
+ case RXRPC_CONN_SERVICE_UNSECURED:
+ conn->state = RXRPC_CONN_SERVICE_CHALLENGING;
+ set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events);
+ rxrpc_queue_conn(call->conn);
+ break;
+
+ case RXRPC_CONN_SERVICE:
+ write_lock(&call->state_lock);
+ if (rx->discard_new_call)
+ call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+ else
+ call->state = RXRPC_CALL_SERVER_ACCEPTING;
+ write_unlock(&call->state_lock);
+ break;
+
+ case RXRPC_CONN_REMOTELY_ABORTED:
+ rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ conn->remote_abort, ECONNABORTED);
+ break;
+ case RXRPC_CONN_LOCALLY_ABORTED:
+ rxrpc_abort_call("CON", call, sp->hdr.seq,
+ conn->local_abort, ECONNABORTED);
+ break;
default:
BUG();
}
+ spin_unlock(&conn->state_lock);
-backlog_full:
- read_unlock_bh(&local->services_lock);
-busy:
- rxrpc_busy(local, &srx, &whdr);
- rxrpc_free_skb(skb);
- return;
+ if (call->state == RXRPC_CALL_SERVER_ACCEPTING)
+ rxrpc_notify_socket(call);
-drop:
- rxrpc_free_skb(skb);
- return;
+ /* We have to discard the prealloc queue's ref here and rely on a
+ * combination of the RCU read lock and refs held either by the socket
+ * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel
+ * service to prevent the call from being deallocated too early.
+ */
+ rxrpc_put_call(call, rxrpc_call_put);
-invalid_service:
- skb->priority = RX_INVALID_OPERATION;
- rxrpc_reject_packet(local, skb);
- return;
-
- /* can't change connection security type mid-flow */
-security_mismatch:
- skb->priority = RX_PROTOCOL_ERROR;
- rxrpc_reject_packet(local, skb);
- return;
+ _leave(" = %p{%d}", call, call->debug_id);
+out:
+ spin_unlock(&rx->incoming_lock);
+ return call;
}
/*
@@ -286,7 +430,8 @@ security_mismatch:
* - assign the user call ID to the call at the front of the queue
*/
struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
- unsigned long user_call_ID)
+ unsigned long user_call_ID,
+ rxrpc_notify_rx_t notify_rx)
{
struct rxrpc_call *call;
struct rb_node *parent, **pp;
@@ -298,12 +443,13 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
write_lock(&rx->call_lock);
- ret = -ENODATA;
- if (list_empty(&rx->acceptq))
- goto out;
+ if (list_empty(&rx->to_be_accepted)) {
+ write_unlock(&rx->call_lock);
+ kleave(" = -ENODATA [empty]");
+ return ERR_PTR(-ENODATA);
+ }
/* check the user ID isn't already in use */
- ret = -EBADSLT;
pp = &rx->calls.rb_node;
parent = NULL;
while (*pp) {
@@ -315,62 +461,59 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx,
else if (user_call_ID > call->user_call_ID)
pp = &(*pp)->rb_right;
else
- goto out;
+ goto id_in_use;
}
- /* dequeue the first call and check it's still valid */
- call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+ /* Dequeue the first call and check it's still valid. We gain
+ * responsibility for the queue's reference.
+ */
+ call = list_entry(rx->to_be_accepted.next,
+ struct rxrpc_call, accept_link);
list_del_init(&call->accept_link);
sk_acceptq_removed(&rx->sk);
+ rxrpc_see_call(call);
write_lock_bh(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_SERVER_ACCEPTING:
call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
break;
- case RXRPC_CALL_REMOTELY_ABORTED:
- case RXRPC_CALL_LOCALLY_ABORTED:
- ret = -ECONNABORTED;
- goto out_release;
- case RXRPC_CALL_NETWORK_ERROR:
- ret = call->conn->error;
+ case RXRPC_CALL_COMPLETE:
+ ret = call->error;
goto out_release;
- case RXRPC_CALL_DEAD:
- ret = -ETIME;
- goto out_discard;
default:
BUG();
}
/* formalise the acceptance */
+ call->notify_rx = notify_rx;
call->user_call_ID = user_call_ID;
+ rxrpc_get_call(call, rxrpc_call_got_userid);
rb_link_node(&call->sock_node, parent, pp);
rb_insert_color(&call->sock_node, &rx->calls);
if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags))
BUG();
- if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events))
- BUG();
- rxrpc_queue_call(call);
- rxrpc_get_call(call);
write_unlock_bh(&call->state_lock);
write_unlock(&rx->call_lock);
+ rxrpc_notify_socket(call);
+ rxrpc_service_prealloc(rx, GFP_KERNEL);
_leave(" = %p{%d}", call, call->debug_id);
return call;
- /* if the call is already dying or dead, then we leave the socket's ref
- * on it to be released by rxrpc_dead_call_expired() as induced by
- * rxrpc_release_call() */
out_release:
_debug("release %p", call);
- if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
- !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
- rxrpc_queue_call(call);
-out_discard:
write_unlock_bh(&call->state_lock);
- _debug("discard %p", call);
-out:
write_unlock(&rx->call_lock);
+ rxrpc_release_call(rx, call);
+ rxrpc_put_call(call, rxrpc_call_put);
+ goto out;
+
+id_in_use:
+ ret = -EBADSLT;
+ write_unlock(&rx->call_lock);
+out:
+ rxrpc_service_prealloc(rx, GFP_KERNEL);
_leave(" = %d", ret);
return ERR_PTR(ret);
}
@@ -382,6 +525,7 @@ out:
int rxrpc_reject_call(struct rxrpc_sock *rx)
{
struct rxrpc_call *call;
+ bool abort = false;
int ret;
_enter("");
@@ -390,88 +534,73 @@ int rxrpc_reject_call(struct rxrpc_sock *rx)
write_lock(&rx->call_lock);
- ret = -ENODATA;
- if (list_empty(&rx->acceptq))
- goto out;
+ if (list_empty(&rx->to_be_accepted)) {
+ write_unlock(&rx->call_lock);
+ return -ENODATA;
+ }
- /* dequeue the first call and check it's still valid */
- call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link);
+ /* Dequeue the first call and check it's still valid. We gain
+ * responsibility for the queue's reference.
+ */
+ call = list_entry(rx->to_be_accepted.next,
+ struct rxrpc_call, accept_link);
list_del_init(&call->accept_link);
sk_acceptq_removed(&rx->sk);
+ rxrpc_see_call(call);
write_lock_bh(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_SERVER_ACCEPTING:
- call->state = RXRPC_CALL_SERVER_BUSY;
- if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events))
- rxrpc_queue_call(call);
- ret = 0;
- goto out_release;
- case RXRPC_CALL_REMOTELY_ABORTED:
- case RXRPC_CALL_LOCALLY_ABORTED:
- ret = -ECONNABORTED;
- goto out_release;
- case RXRPC_CALL_NETWORK_ERROR:
- ret = call->conn->error;
- goto out_release;
- case RXRPC_CALL_DEAD:
- ret = -ETIME;
+ __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED);
+ abort = true;
+ /* fall through */
+ case RXRPC_CALL_COMPLETE:
+ ret = call->error;
goto out_discard;
default:
BUG();
}
- /* if the call is already dying or dead, then we leave the socket's ref
- * on it to be released by rxrpc_dead_call_expired() as induced by
- * rxrpc_release_call() */
-out_release:
- _debug("release %p", call);
- if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
- !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
- rxrpc_queue_call(call);
out_discard:
write_unlock_bh(&call->state_lock);
- _debug("discard %p", call);
-out:
write_unlock(&rx->call_lock);
+ if (abort) {
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_release_call(rx, call);
+ rxrpc_put_call(call, rxrpc_call_put);
+ }
+ rxrpc_service_prealloc(rx, GFP_KERNEL);
_leave(" = %d", ret);
return ret;
}
-/**
- * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call
- * @sock: The socket on which the impending call is waiting
- * @user_call_ID: The tag to attach to the call
+/*
+ * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls
+ * @sock: The socket on which to preallocate
+ * @notify_rx: Event notification function for the call
+ * @user_attach_call: Func to attach call to user_call_ID
+ * @user_call_ID: The tag to attach to the preallocated call
+ * @gfp: The allocation conditions.
*
- * Allow a kernel service to accept an incoming call, assuming the incoming
- * call is still valid.
- */
-struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock,
- unsigned long user_call_ID)
-{
- struct rxrpc_call *call;
-
- _enter(",%lx", user_call_ID);
- call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID);
- _leave(" = %p", call);
- return call;
-}
-EXPORT_SYMBOL(rxrpc_kernel_accept_call);
-
-/**
- * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call
- * @sock: The socket on which the impending call is waiting
+ * Charge up the socket with preallocated calls, each with a user ID. A
+ * function should be provided to effect the attachment from the user's side.
+ * The user is given a ref to hold on the call.
*
- * Allow a kernel service to reject an incoming call with a BUSY message,
- * assuming the incoming call is still valid.
+ * Note that the call may be come connected before this function returns.
*/
-int rxrpc_kernel_reject_call(struct socket *sock)
+int rxrpc_kernel_charge_accept(struct socket *sock,
+ rxrpc_notify_rx_t notify_rx,
+ rxrpc_user_attach_call_t user_attach_call,
+ unsigned long user_call_ID, gfp_t gfp)
{
- int ret;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ struct rxrpc_backlog *b = rx->backlog;
- _enter("");
- ret = rxrpc_reject_call(rxrpc_sk(sock->sk));
- _leave(" = %d", ret);
- return ret;
+ if (sock->sk->sk_state == RXRPC_CLOSE)
+ return -ESHUTDOWN;
+
+ return rxrpc_service_prealloc_one(rx, b, notify_rx,
+ user_attach_call, user_call_ID,
+ gfp);
}
-EXPORT_SYMBOL(rxrpc_kernel_reject_call);
+EXPORT_SYMBOL(rxrpc_kernel_charge_accept);
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index e60cf65c2232..4f00476630b9 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -22,1281 +22,351 @@
#include "ar-internal.h"
/*
- * propose an ACK be sent
+ * Set the timer
*/
-void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
- u32 serial, bool immediate)
+void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why,
+ ktime_t now)
{
- unsigned long expiry;
- s8 prior = rxrpc_ack_priority[ack_reason];
-
- ASSERTCMP(prior, >, 0);
-
- _enter("{%d},%s,%%%x,%u",
- call->debug_id, rxrpc_acks(ack_reason), serial, immediate);
-
- if (prior < rxrpc_ack_priority[call->ackr_reason]) {
- if (immediate)
- goto cancel_timer;
- return;
- }
+ unsigned long t_j, now_j = jiffies;
+ ktime_t t;
+ bool queue = false;
- /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
- * numbers */
- if (prior == rxrpc_ack_priority[call->ackr_reason]) {
- if (prior <= 4)
- call->ackr_serial = serial;
- if (immediate)
- goto cancel_timer;
- return;
- }
-
- call->ackr_reason = ack_reason;
- call->ackr_serial = serial;
-
- switch (ack_reason) {
- case RXRPC_ACK_DELAY:
- _debug("run delay timer");
- expiry = rxrpc_soft_ack_delay;
- goto run_timer;
-
- case RXRPC_ACK_IDLE:
- if (!immediate) {
- _debug("run defer timer");
- expiry = rxrpc_idle_ack_delay;
- goto run_timer;
- }
- goto cancel_timer;
-
- case RXRPC_ACK_REQUESTED:
- expiry = rxrpc_requested_ack_delay;
- if (!expiry)
- goto cancel_timer;
- if (!immediate || serial == 1) {
- _debug("run defer timer");
- goto run_timer;
- }
-
- default:
- _debug("immediate ACK");
- goto cancel_timer;
- }
-
-run_timer:
- expiry += jiffies;
- if (!timer_pending(&call->ack_timer) ||
- time_after(call->ack_timer.expires, expiry))
- mod_timer(&call->ack_timer, expiry);
- return;
-
-cancel_timer:
- _debug("cancel timer %%%u", serial);
- try_to_del_timer_sync(&call->ack_timer);
read_lock_bh(&call->state_lock);
- if (call->state <= RXRPC_CALL_COMPLETE &&
- !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
- rxrpc_queue_call(call);
- read_unlock_bh(&call->state_lock);
-}
-
-/*
- * propose an ACK be sent, locking the call structure
- */
-void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
- u32 serial, bool immediate)
-{
- s8 prior = rxrpc_ack_priority[ack_reason];
-
- if (prior > rxrpc_ack_priority[call->ackr_reason]) {
- spin_lock_bh(&call->lock);
- __rxrpc_propose_ACK(call, ack_reason, serial, immediate);
- spin_unlock_bh(&call->lock);
- }
-}
-/*
- * set the resend timer
- */
-static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend,
- unsigned long resend_at)
-{
- read_lock_bh(&call->state_lock);
- if (call->state >= RXRPC_CALL_COMPLETE)
- resend = 0;
-
- if (resend & 1) {
- _debug("SET RESEND");
- set_bit(RXRPC_CALL_EV_RESEND, &call->events);
- }
-
- if (resend & 2) {
- _debug("MODIFY RESEND TIMER");
- set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
- mod_timer(&call->resend_timer, resend_at);
- } else {
- _debug("KILL RESEND TIMER");
- del_timer_sync(&call->resend_timer);
- clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
- clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
- }
- read_unlock_bh(&call->state_lock);
-}
-
-/*
- * resend packets
- */
-static void rxrpc_resend(struct rxrpc_call *call)
-{
- struct rxrpc_wire_header *whdr;
- struct rxrpc_skb_priv *sp;
- struct sk_buff *txb;
- unsigned long *p_txb, resend_at;
- bool stop;
- int loop;
- u8 resend;
-
- _enter("{%d,%d,%d,%d},",
- call->acks_hard, call->acks_unacked,
- atomic_read(&call->sequence),
- CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
-
- stop = false;
- resend = 0;
- resend_at = 0;
-
- for (loop = call->acks_tail;
- loop != call->acks_head || stop;
- loop = (loop + 1) & (call->acks_winsz - 1)
- ) {
- p_txb = call->acks_window + loop;
- smp_read_barrier_depends();
- if (*p_txb & 1)
- continue;
-
- txb = (struct sk_buff *) *p_txb;
- sp = rxrpc_skb(txb);
-
- if (sp->need_resend) {
- sp->need_resend = false;
-
- /* each Tx packet has a new serial number */
- sp->hdr.serial = atomic_inc_return(&call->conn->serial);
-
- whdr = (struct rxrpc_wire_header *)txb->head;
- whdr->serial = htonl(sp->hdr.serial);
-
- _proto("Tx DATA %%%u { #%d }",
- sp->hdr.serial, sp->hdr.seq);
- if (rxrpc_send_data_packet(call->conn, txb) < 0) {
- stop = true;
- sp->resend_at = jiffies + 3;
- } else {
- sp->resend_at =
- jiffies + rxrpc_resend_timeout;
- }
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ t = call->expire_at;
+ if (!ktime_after(t, now))
+ goto out;
+
+ if (!ktime_after(call->resend_at, now)) {
+ call->resend_at = call->expire_at;
+ if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+ queue = true;
+ } else if (ktime_before(call->resend_at, t)) {
+ t = call->resend_at;
}
- if (time_after_eq(jiffies + 1, sp->resend_at)) {
- sp->need_resend = true;
- resend |= 1;
- } else if (resend & 2) {
- if (time_before(sp->resend_at, resend_at))
- resend_at = sp->resend_at;
- } else {
- resend_at = sp->resend_at;
- resend |= 2;
+ if (!ktime_after(call->ack_at, now)) {
+ call->ack_at = call->expire_at;
+ if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
+ queue = true;
+ } else if (ktime_before(call->ack_at, t)) {
+ t = call->ack_at;
}
- }
-
- rxrpc_set_resend(call, resend, resend_at);
- _leave("");
-}
-
-/*
- * handle resend timer expiry
- */
-static void rxrpc_resend_timer(struct rxrpc_call *call)
-{
- struct rxrpc_skb_priv *sp;
- struct sk_buff *txb;
- unsigned long *p_txb, resend_at;
- int loop;
- u8 resend;
-
- _enter("%d,%d,%d",
- call->acks_tail, call->acks_unacked, call->acks_head);
-
- if (call->state >= RXRPC_CALL_COMPLETE)
- return;
-
- resend = 0;
- resend_at = 0;
- for (loop = call->acks_unacked;
- loop != call->acks_head;
- loop = (loop + 1) & (call->acks_winsz - 1)
- ) {
- p_txb = call->acks_window + loop;
- smp_read_barrier_depends();
- txb = (struct sk_buff *) (*p_txb & ~1);
- sp = rxrpc_skb(txb);
+ t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now)));
+ t_j += jiffies;
- ASSERT(!(*p_txb & 1));
+ /* We have to make sure that the calculated jiffies value falls
+ * at or after the nsec value, or we may loop ceaselessly
+ * because the timer times out, but we haven't reached the nsec
+ * timeout yet.
+ */
+ t_j++;
- if (sp->need_resend) {
- ;
- } else if (time_after_eq(jiffies + 1, sp->resend_at)) {
- sp->need_resend = true;
- resend |= 1;
- } else if (resend & 2) {
- if (time_before(sp->resend_at, resend_at))
- resend_at = sp->resend_at;
- } else {
- resend_at = sp->resend_at;
- resend |= 2;
+ if (call->timer.expires != t_j || !timer_pending(&call->timer)) {
+ mod_timer(&call->timer, t_j);
+ trace_rxrpc_timer(call, why, now, now_j);
}
+
+ if (queue)
+ rxrpc_queue_call(call);
}
- rxrpc_set_resend(call, resend, resend_at);
- _leave("");
+out:
+ read_unlock_bh(&call->state_lock);
}
/*
- * process soft ACKs of our transmitted packets
- * - these indicate packets the peer has or has not received, but hasn't yet
- * given to the consumer, and so can still be discarded and re-requested
+ * propose an ACK be sent
*/
-static int rxrpc_process_soft_ACKs(struct rxrpc_call *call,
- struct rxrpc_ackpacket *ack,
- struct sk_buff *skb)
+static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
+ u16 skew, u32 serial, bool immediate,
+ bool background,
+ enum rxrpc_propose_ack_trace why)
{
- struct rxrpc_skb_priv *sp;
- struct sk_buff *txb;
- unsigned long *p_txb, resend_at;
- int loop;
- u8 sacks[RXRPC_MAXACKS], resend;
-
- _enter("{%d,%d},{%d},",
- call->acks_hard,
- CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz),
- ack->nAcks);
-
- if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0)
- goto protocol_error;
-
- resend = 0;
- resend_at = 0;
- for (loop = 0; loop < ack->nAcks; loop++) {
- p_txb = call->acks_window;
- p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1);
- smp_read_barrier_depends();
- txb = (struct sk_buff *) (*p_txb & ~1);
- sp = rxrpc_skb(txb);
-
- switch (sacks[loop]) {
- case RXRPC_ACK_TYPE_ACK:
- sp->need_resend = false;
- *p_txb |= 1;
- break;
- case RXRPC_ACK_TYPE_NACK:
- sp->need_resend = true;
- *p_txb &= ~1;
- resend = 1;
- break;
- default:
- _debug("Unsupported ACK type %d", sacks[loop]);
- goto protocol_error;
- }
- }
-
- smp_mb();
- call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1);
-
- /* anything not explicitly ACK'd is implicitly NACK'd, but may just not
- * have been received or processed yet by the far end */
- for (loop = call->acks_unacked;
- loop != call->acks_head;
- loop = (loop + 1) & (call->acks_winsz - 1)
- ) {
- p_txb = call->acks_window + loop;
- smp_read_barrier_depends();
- txb = (struct sk_buff *) (*p_txb & ~1);
- sp = rxrpc_skb(txb);
+ enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use;
+ unsigned int expiry = rxrpc_soft_ack_delay;
+ ktime_t now, ack_at;
+ s8 prior = rxrpc_ack_priority[ack_reason];
- if (*p_txb & 1) {
- /* packet must have been discarded */
- sp->need_resend = true;
- *p_txb &= ~1;
- resend |= 1;
- } else if (sp->need_resend) {
- ;
- } else if (time_after_eq(jiffies + 1, sp->resend_at)) {
- sp->need_resend = true;
- resend |= 1;
- } else if (resend & 2) {
- if (time_before(sp->resend_at, resend_at))
- resend_at = sp->resend_at;
- } else {
- resend_at = sp->resend_at;
- resend |= 2;
+ /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial
+ * numbers, but we don't alter the timeout.
+ */
+ _debug("prior %u %u vs %u %u",
+ ack_reason, prior,
+ call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]);
+ if (ack_reason == call->ackr_reason) {
+ if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) {
+ outcome = rxrpc_propose_ack_update;
+ call->ackr_serial = serial;
+ call->ackr_skew = skew;
}
+ if (!immediate)
+ goto trace;
+ } else if (prior > rxrpc_ack_priority[call->ackr_reason]) {
+ call->ackr_reason = ack_reason;
+ call->ackr_serial = serial;
+ call->ackr_skew = skew;
+ } else {
+ outcome = rxrpc_propose_ack_subsume;
}
- rxrpc_set_resend(call, resend, resend_at);
- _leave(" = 0");
- return 0;
-
-protocol_error:
- _leave(" = -EPROTO");
- return -EPROTO;
-}
-
-/*
- * discard hard-ACK'd packets from the Tx window
- */
-static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard)
-{
- unsigned long _skb;
- int tail = call->acks_tail, old_tail;
- int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz);
+ switch (ack_reason) {
+ case RXRPC_ACK_REQUESTED:
+ if (rxrpc_requested_ack_delay < expiry)
+ expiry = rxrpc_requested_ack_delay;
+ if (serial == 1)
+ immediate = false;
+ break;
- _enter("{%u,%u},%u", call->acks_hard, win, hard);
+ case RXRPC_ACK_DELAY:
+ if (rxrpc_soft_ack_delay < expiry)
+ expiry = rxrpc_soft_ack_delay;
+ break;
- ASSERTCMP(hard - call->acks_hard, <=, win);
+ case RXRPC_ACK_PING:
+ case RXRPC_ACK_IDLE:
+ if (rxrpc_idle_ack_delay < expiry)
+ expiry = rxrpc_idle_ack_delay;
+ break;
- while (call->acks_hard < hard) {
- smp_read_barrier_depends();
- _skb = call->acks_window[tail] & ~1;
- rxrpc_free_skb((struct sk_buff *) _skb);
- old_tail = tail;
- tail = (tail + 1) & (call->acks_winsz - 1);
- call->acks_tail = tail;
- if (call->acks_unacked == old_tail)
- call->acks_unacked = tail;
- call->acks_hard++;
+ default:
+ immediate = true;
+ break;
}
- wake_up(&call->tx_waitq);
-}
-
-/*
- * clear the Tx window in the event of a failure
- */
-static void rxrpc_clear_tx_window(struct rxrpc_call *call)
-{
- rxrpc_rotate_tx_window(call, atomic_read(&call->sequence));
-}
-
-/*
- * drain the out of sequence received packet queue into the packet Rx queue
- */
-static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call)
-{
- struct rxrpc_skb_priv *sp;
- struct sk_buff *skb;
- bool terminal;
- int ret;
-
- _enter("{%d,%d}", call->rx_data_post, call->rx_first_oos);
-
- spin_lock_bh(&call->lock);
-
- ret = -ECONNRESET;
- if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
- goto socket_unavailable;
-
- skb = skb_dequeue(&call->rx_oos_queue);
- if (skb) {
- sp = rxrpc_skb(skb);
-
- _debug("drain OOS packet %d [%d]",
- sp->hdr.seq, call->rx_first_oos);
-
- if (sp->hdr.seq != call->rx_first_oos) {
- skb_queue_head(&call->rx_oos_queue, skb);
- call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
- _debug("requeue %p {%u}", skb, call->rx_first_oos);
- } else {
- skb->mark = RXRPC_SKB_MARK_DATA;
- terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) &&
- !(sp->hdr.flags & RXRPC_CLIENT_INITIATED));
- ret = rxrpc_queue_rcv_skb(call, skb, true, terminal);
- BUG_ON(ret < 0);
- _debug("drain #%u", call->rx_data_post);
- call->rx_data_post++;
-
- /* find out what the next packet is */
- skb = skb_peek(&call->rx_oos_queue);
- if (skb)
- call->rx_first_oos = rxrpc_skb(skb)->hdr.seq;
- else
- call->rx_first_oos = 0;
- _debug("peek %p {%u}", skb, call->rx_first_oos);
+ if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) {
+ _debug("already scheduled");
+ } else if (immediate || expiry == 0) {
+ _debug("immediate ACK %lx", call->events);
+ if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) &&
+ background)
+ rxrpc_queue_call(call);
+ } else {
+ now = ktime_get_real();
+ ack_at = ktime_add_ms(now, expiry);
+ if (ktime_before(ack_at, call->ack_at)) {
+ call->ack_at = ack_at;
+ rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now);
}
}
- ret = 0;
-socket_unavailable:
- spin_unlock_bh(&call->lock);
- _leave(" = %d", ret);
- return ret;
+trace:
+ trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate,
+ background, outcome);
}
/*
- * insert an out of sequence packet into the buffer
+ * propose an ACK be sent, locking the call structure
*/
-static void rxrpc_insert_oos_packet(struct rxrpc_call *call,
- struct sk_buff *skb)
+void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
+ u16 skew, u32 serial, bool immediate, bool background,
+ enum rxrpc_propose_ack_trace why)
{
- struct rxrpc_skb_priv *sp, *psp;
- struct sk_buff *p;
- u32 seq;
-
- sp = rxrpc_skb(skb);
- seq = sp->hdr.seq;
- _enter(",,{%u}", seq);
-
- skb->destructor = rxrpc_packet_destructor;
- ASSERTCMP(sp->call, ==, NULL);
- sp->call = call;
- rxrpc_get_call(call);
- atomic_inc(&call->skb_count);
-
- /* insert into the buffer in sequence order */
spin_lock_bh(&call->lock);
-
- skb_queue_walk(&call->rx_oos_queue, p) {
- psp = rxrpc_skb(p);
- if (psp->hdr.seq > seq) {
- _debug("insert oos #%u before #%u", seq, psp->hdr.seq);
- skb_insert(p, skb, &call->rx_oos_queue);
- goto inserted;
- }
- }
-
- _debug("append oos #%u", seq);
- skb_queue_tail(&call->rx_oos_queue, skb);
-inserted:
-
- /* we might now have a new front to the queue */
- if (call->rx_first_oos == 0 || seq < call->rx_first_oos)
- call->rx_first_oos = seq;
-
- read_lock(&call->state_lock);
- if (call->state < RXRPC_CALL_COMPLETE &&
- call->rx_data_post == call->rx_first_oos) {
- _debug("drain rx oos now");
- set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events);
- }
- read_unlock(&call->state_lock);
-
+ __rxrpc_propose_ACK(call, ack_reason, skew, serial,
+ immediate, background, why);
spin_unlock_bh(&call->lock);
- _leave(" [stored #%u]", call->rx_first_oos);
-}
-
-/*
- * clear the Tx window on final ACK reception
- */
-static void rxrpc_zap_tx_window(struct rxrpc_call *call)
-{
- struct rxrpc_skb_priv *sp;
- struct sk_buff *skb;
- unsigned long _skb, *acks_window;
- u8 winsz = call->acks_winsz;
- int tail;
-
- acks_window = call->acks_window;
- call->acks_window = NULL;
-
- while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) {
- tail = call->acks_tail;
- smp_read_barrier_depends();
- _skb = acks_window[tail] & ~1;
- smp_mb();
- call->acks_tail = (call->acks_tail + 1) & (winsz - 1);
-
- skb = (struct sk_buff *) _skb;
- sp = rxrpc_skb(skb);
- _debug("+++ clear Tx %u", sp->hdr.seq);
- rxrpc_free_skb(skb);
- }
-
- kfree(acks_window);
}
/*
- * process the extra information that may be appended to an ACK packet
+ * Handle congestion being detected by the retransmit timeout.
*/
-static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
- unsigned int latest, int nAcks)
+static void rxrpc_congestion_timeout(struct rxrpc_call *call)
{
- struct rxrpc_ackinfo ackinfo;
- struct rxrpc_peer *peer;
- unsigned int mtu;
-
- if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) {
- _leave(" [no ackinfo]");
- return;
- }
-
- _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
- latest,
- ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU),
- ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max));
-
- mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU));
-
- peer = call->conn->params.peer;
- if (mtu < peer->maxdata) {
- spin_lock_bh(&peer->lock);
- peer->maxdata = mtu;
- peer->mtu = mtu + peer->hdrsize;
- spin_unlock_bh(&peer->lock);
- _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
- }
+ set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags);
}
/*
- * process packets in the reception queue
+ * Perform retransmission of NAK'd and unack'd packets.
*/
-static int rxrpc_process_rx_queue(struct rxrpc_call *call,
- u32 *_abort_code)
+static void rxrpc_resend(struct rxrpc_call *call, ktime_t now)
{
- struct rxrpc_ackpacket ack;
struct rxrpc_skb_priv *sp;
struct sk_buff *skb;
- bool post_ACK;
- int latest;
- u32 hard, tx;
-
- _enter("");
-
-process_further:
- skb = skb_dequeue(&call->rx_queue);
- if (!skb)
- return -EAGAIN;
-
- _net("deferred skb %p", skb);
-
- sp = rxrpc_skb(skb);
-
- _debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state);
-
- post_ACK = false;
-
- switch (sp->hdr.type) {
- /* data packets that wind up here have been received out of
- * order, need security processing or are jumbo packets */
- case RXRPC_PACKET_TYPE_DATA:
- _proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
+ rxrpc_seq_t cursor, seq, top;
+ ktime_t max_age, oldest, ack_ts;
+ int ix;
+ u8 annotation, anno_type, retrans = 0, unacked = 0;
- /* secured packets must be verified and possibly decrypted */
- if (call->conn->security->verify_packet(call, skb,
- _abort_code) < 0)
- goto protocol_error;
-
- rxrpc_insert_oos_packet(call, skb);
- goto process_further;
-
- /* partial ACK to process */
- case RXRPC_PACKET_TYPE_ACK:
- if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) {
- _debug("extraction failure");
- goto protocol_error;
- }
- if (!skb_pull(skb, sizeof(ack)))
- BUG();
-
- latest = sp->hdr.serial;
- hard = ntohl(ack.firstPacket);
- tx = atomic_read(&call->sequence);
-
- _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
- latest,
- ntohs(ack.maxSkew),
- hard,
- ntohl(ack.previousPacket),
- ntohl(ack.serial),
- rxrpc_acks(ack.reason),
- ack.nAcks);
-
- rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks);
-
- if (ack.reason == RXRPC_ACK_PING) {
- _proto("Rx ACK %%%u PING Request", latest);
- rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
- sp->hdr.serial, true);
- }
-
- /* discard any out-of-order or duplicate ACKs */
- if (latest - call->acks_latest <= 0) {
- _debug("discard ACK %d <= %d",
- latest, call->acks_latest);
- goto discard;
- }
- call->acks_latest = latest;
+ _enter("{%d,%d}", call->tx_hard_ack, call->tx_top);
- if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
- call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY &&
- call->state != RXRPC_CALL_SERVER_SEND_REPLY &&
- call->state != RXRPC_CALL_SERVER_AWAIT_ACK)
- goto discard;
+ max_age = ktime_sub_ms(now, rxrpc_resend_timeout);
- _debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state);
-
- if (hard > 0) {
- if (hard - 1 > tx) {
- _debug("hard-ACK'd packet %d not transmitted"
- " (%d top)",
- hard - 1, tx);
- goto protocol_error;
- }
+ spin_lock_bh(&call->lock);
- if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY ||
- call->state == RXRPC_CALL_SERVER_AWAIT_ACK) &&
- hard > tx) {
- call->acks_hard = tx;
- goto all_acked;
- }
+ cursor = call->tx_hard_ack;
+ top = call->tx_top;
+ ASSERT(before_eq(cursor, top));
+ if (cursor == top)
+ goto out_unlock;
+
+ /* Scan the packet list without dropping the lock and decide which of
+ * the packets in the Tx buffer we're going to resend and what the new
+ * resend timeout will be.
+ */
+ oldest = now;
+ for (seq = cursor + 1; before_eq(seq, top); seq++) {
+ ix = seq & RXRPC_RXTX_BUFF_MASK;
+ annotation = call->rxtx_annotations[ix];
+ anno_type = annotation & RXRPC_TX_ANNO_MASK;
+ annotation &= ~RXRPC_TX_ANNO_MASK;
+ if (anno_type == RXRPC_TX_ANNO_ACK)
+ continue;
- smp_rmb();
- rxrpc_rotate_tx_window(call, hard - 1);
- }
+ skb = call->rxtx_buffer[ix];
+ rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
+ sp = rxrpc_skb(skb);
- if (ack.nAcks > 0) {
- if (hard - 1 + ack.nAcks > tx) {
- _debug("soft-ACK'd packet %d+%d not"
- " transmitted (%d top)",
- hard - 1, ack.nAcks, tx);
- goto protocol_error;
+ if (anno_type == RXRPC_TX_ANNO_UNACK) {
+ if (ktime_after(skb->tstamp, max_age)) {
+ if (ktime_before(skb->tstamp, oldest))
+ oldest = skb->tstamp;
+ continue;
}
-
- if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0)
- goto protocol_error;
+ if (!(annotation & RXRPC_TX_ANNO_RESENT))
+ unacked++;
}
- goto discard;
-
- /* complete ACK to process */
- case RXRPC_PACKET_TYPE_ACKALL:
- goto all_acked;
-
- /* abort and busy are handled elsewhere */
- case RXRPC_PACKET_TYPE_BUSY:
- case RXRPC_PACKET_TYPE_ABORT:
- BUG();
- /* connection level events - also handled elsewhere */
- case RXRPC_PACKET_TYPE_CHALLENGE:
- case RXRPC_PACKET_TYPE_RESPONSE:
- case RXRPC_PACKET_TYPE_DEBUG:
- BUG();
+ /* Okay, we need to retransmit a packet. */
+ call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation;
+ retrans++;
+ trace_rxrpc_retransmit(call, seq, annotation | anno_type,
+ ktime_to_ns(ktime_sub(skb->tstamp, max_age)));
}
- /* if we've had a hard ACK that covers all the packets we've sent, then
- * that ends that phase of the operation */
-all_acked:
- write_lock_bh(&call->state_lock);
- _debug("ack all %d", call->state);
+ call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout);
- switch (call->state) {
- case RXRPC_CALL_CLIENT_AWAIT_REPLY:
- call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
- break;
- case RXRPC_CALL_SERVER_AWAIT_ACK:
- _debug("srv complete");
- call->state = RXRPC_CALL_COMPLETE;
- post_ACK = true;
- break;
- case RXRPC_CALL_CLIENT_SEND_REQUEST:
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- goto protocol_error_unlock; /* can't occur yet */
- default:
- write_unlock_bh(&call->state_lock);
- goto discard; /* assume packet left over from earlier phase */
- }
-
- write_unlock_bh(&call->state_lock);
+ if (unacked)
+ rxrpc_congestion_timeout(call);
- /* if all the packets we sent are hard-ACK'd, then we can discard
- * whatever we've got left */
- _debug("clear Tx %d",
- CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz));
-
- del_timer_sync(&call->resend_timer);
- clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
- clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
-
- if (call->acks_window)
- rxrpc_zap_tx_window(call);
-
- if (post_ACK) {
- /* post the final ACK message for userspace to pick up */
- _debug("post ACK");
- skb->mark = RXRPC_SKB_MARK_FINAL_ACK;
- sp->call = call;
- rxrpc_get_call(call);
- atomic_inc(&call->skb_count);
- spin_lock_bh(&call->lock);
- if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0)
- BUG();
+ /* If there was nothing that needed retransmission then it's likely
+ * that an ACK got lost somewhere. Send a ping to find out instead of
+ * retransmitting data.
+ */
+ if (!retrans) {
+ rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now);
spin_unlock_bh(&call->lock);
- goto process_further;
- }
-
-discard:
- rxrpc_free_skb(skb);
- goto process_further;
-
-protocol_error_unlock:
- write_unlock_bh(&call->state_lock);
-protocol_error:
- rxrpc_free_skb(skb);
- _leave(" = -EPROTO");
- return -EPROTO;
-}
-
-/*
- * post a message to the socket Rx queue for recvmsg() to pick up
- */
-static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error,
- bool fatal)
-{
- struct rxrpc_skb_priv *sp;
- struct sk_buff *skb;
- int ret;
-
- _enter("{%d,%lx},%u,%u,%d",
- call->debug_id, call->flags, mark, error, fatal);
-
- /* remove timers and things for fatal messages */
- if (fatal) {
- del_timer_sync(&call->resend_timer);
- del_timer_sync(&call->ack_timer);
- clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
- }
+ ack_ts = ktime_sub(now, call->acks_latest_ts);
+ if (ktime_to_ns(ack_ts) < call->peer->rtt)
+ goto out;
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false,
+ rxrpc_propose_ack_ping_for_lost_ack);
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ goto out;
+ }
+
+ /* Now go through the Tx window and perform the retransmissions. We
+ * have to drop the lock for each send. If an ACK comes in whilst the
+ * lock is dropped, it may clear some of the retransmission markers for
+ * packets that it soft-ACKs.
+ */
+ for (seq = cursor + 1; before_eq(seq, top); seq++) {
+ ix = seq & RXRPC_RXTX_BUFF_MASK;
+ annotation = call->rxtx_annotations[ix];
+ anno_type = annotation & RXRPC_TX_ANNO_MASK;
+ if (anno_type != RXRPC_TX_ANNO_RETRANS)
+ continue;
- if (mark != RXRPC_SKB_MARK_NEW_CALL &&
- !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
- _leave("[no userid]");
- return 0;
- }
+ skb = call->rxtx_buffer[ix];
+ rxrpc_get_skb(skb, rxrpc_skb_tx_got);
+ spin_unlock_bh(&call->lock);
- if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
- skb = alloc_skb(0, GFP_NOFS);
- if (!skb)
- return -ENOMEM;
+ if (rxrpc_send_data_packet(call, skb, true) < 0) {
+ rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+ return;
+ }
- rxrpc_new_skb(skb);
+ if (rxrpc_is_client_call(call))
+ rxrpc_expose_client_call(call);
- skb->mark = mark;
+ rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+ spin_lock_bh(&call->lock);
- sp = rxrpc_skb(skb);
- memset(sp, 0, sizeof(*sp));
- sp->error = error;
- sp->call = call;
- rxrpc_get_call(call);
- atomic_inc(&call->skb_count);
+ /* We need to clear the retransmit state, but there are two
+ * things we need to be aware of: A new ACK/NAK might have been
+ * received and the packet might have been hard-ACK'd (in which
+ * case it will no longer be in the buffer).
+ */
+ if (after(seq, call->tx_hard_ack)) {
+ annotation = call->rxtx_annotations[ix];
+ anno_type = annotation & RXRPC_TX_ANNO_MASK;
+ if (anno_type == RXRPC_TX_ANNO_RETRANS ||
+ anno_type == RXRPC_TX_ANNO_NAK) {
+ annotation &= ~RXRPC_TX_ANNO_MASK;
+ annotation |= RXRPC_TX_ANNO_UNACK;
+ }
+ annotation |= RXRPC_TX_ANNO_RESENT;
+ call->rxtx_annotations[ix] = annotation;
+ }
- spin_lock_bh(&call->lock);
- ret = rxrpc_queue_rcv_skb(call, skb, true, fatal);
- spin_unlock_bh(&call->lock);
- BUG_ON(ret < 0);
+ if (after(call->tx_hard_ack, seq))
+ seq = call->tx_hard_ack;
}
- return 0;
+out_unlock:
+ spin_unlock_bh(&call->lock);
+out:
+ _leave("");
}
/*
- * handle background processing of incoming call packets and ACK / abort
- * generation
+ * Handle retransmission and deferred ACK/abort generation.
*/
void rxrpc_process_call(struct work_struct *work)
{
struct rxrpc_call *call =
container_of(work, struct rxrpc_call, processor);
- struct rxrpc_wire_header whdr;
- struct rxrpc_ackpacket ack;
- struct rxrpc_ackinfo ackinfo;
- struct msghdr msg;
- struct kvec iov[5];
- enum rxrpc_call_event genbit;
- unsigned long bits;
- __be32 data, pad;
- size_t len;
- int loop, nbit, ioc, ret, mtu;
- u32 serial, abort_code = RX_PROTOCOL_ERROR;
- u8 *acks = NULL;
-
- //printk("\n--------------------\n");
- _enter("{%d,%s,%lx} [%lu]",
- call->debug_id, rxrpc_call_states[call->state], call->events,
- (jiffies - call->creation_jif) / (HZ / 10));
-
- if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) {
- _debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX");
- return;
- }
-
- if (!call->conn)
- goto skip_msg_init;
-
- /* there's a good chance we're going to have to send a message, so set
- * one up in advance */
- msg.msg_name = &call->conn->params.peer->srx.transport;
- msg.msg_namelen = call->conn->params.peer->srx.transport_len;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
-
- whdr.epoch = htonl(call->conn->proto.epoch);
- whdr.cid = htonl(call->cid);
- whdr.callNumber = htonl(call->call_id);
- whdr.seq = 0;
- whdr.type = RXRPC_PACKET_TYPE_ACK;
- whdr.flags = call->conn->out_clientflag;
- whdr.userStatus = 0;
- whdr.securityIndex = call->conn->security_ix;
- whdr._rsvd = 0;
- whdr.serviceId = htons(call->service_id);
-
- memset(iov, 0, sizeof(iov));
- iov[0].iov_base = &whdr;
- iov[0].iov_len = sizeof(whdr);
-skip_msg_init:
-
- /* deal with events of a final nature */
- if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) {
- enum rxrpc_skb_mark mark;
- int error;
-
- clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
- clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
- clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
-
- error = call->error_report;
- if (error < RXRPC_LOCAL_ERROR_OFFSET) {
- mark = RXRPC_SKB_MARK_NET_ERROR;
- _debug("post net error %d", error);
- } else {
- mark = RXRPC_SKB_MARK_LOCAL_ERROR;
- error -= RXRPC_LOCAL_ERROR_OFFSET;
- _debug("post net local error %d", error);
- }
-
- if (rxrpc_post_message(call, mark, error, true) < 0)
- goto no_mem;
- clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
- goto kill_ACKs;
- }
-
- if (test_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events)) {
- ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
-
- clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events);
- clear_bit(RXRPC_CALL_EV_ABORT, &call->events);
+ ktime_t now;
- _debug("post conn abort");
+ rxrpc_see_call(call);
- if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
- call->conn->error, true) < 0)
- goto no_mem;
- clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
- goto kill_ACKs;
- }
-
- if (test_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) {
- whdr.type = RXRPC_PACKET_TYPE_BUSY;
- genbit = RXRPC_CALL_EV_REJECT_BUSY;
- goto send_message;
- }
-
- if (test_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
- ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE);
-
- if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
- ECONNABORTED, true) < 0)
- goto no_mem;
- whdr.type = RXRPC_PACKET_TYPE_ABORT;
- data = htonl(call->local_abort);
- iov[1].iov_base = &data;
- iov[1].iov_len = sizeof(data);
- genbit = RXRPC_CALL_EV_ABORT;
- goto send_message;
- }
-
- if (test_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) {
- genbit = RXRPC_CALL_EV_ACK_FINAL;
-
- ack.bufferSpace = htons(8);
- ack.maxSkew = 0;
- ack.serial = 0;
- ack.reason = RXRPC_ACK_IDLE;
- ack.nAcks = 0;
- call->ackr_reason = 0;
-
- spin_lock_bh(&call->lock);
- ack.serial = htonl(call->ackr_serial);
- ack.previousPacket = htonl(call->ackr_prev_seq);
- ack.firstPacket = htonl(call->rx_data_eaten + 1);
- spin_unlock_bh(&call->lock);
-
- pad = 0;
+ //printk("\n--------------------\n");
+ _enter("{%d,%s,%lx}",
+ call->debug_id, rxrpc_call_states[call->state], call->events);
- iov[1].iov_base = &ack;
- iov[1].iov_len = sizeof(ack);
- iov[2].iov_base = &pad;
- iov[2].iov_len = 3;
- iov[3].iov_base = &ackinfo;
- iov[3].iov_len = sizeof(ackinfo);
- goto send_ACK;
+recheck_state:
+ if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ goto recheck_state;
}
- if (call->events & ((1 << RXRPC_CALL_EV_RCVD_BUSY) |
- (1 << RXRPC_CALL_EV_RCVD_ABORT))
- ) {
- u32 mark;
-
- if (test_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events))
- mark = RXRPC_SKB_MARK_REMOTE_ABORT;
- else
- mark = RXRPC_SKB_MARK_BUSY;
-
- _debug("post abort/busy");
- rxrpc_clear_tx_window(call);
- if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0)
- goto no_mem;
-
- clear_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events);
- clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
- goto kill_ACKs;
+ if (call->state == RXRPC_CALL_COMPLETE) {
+ del_timer_sync(&call->timer);
+ goto out_put;
}
- if (test_and_clear_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events)) {
- _debug("do implicit ackall");
- rxrpc_clear_tx_window(call);
+ now = ktime_get_real();
+ if (ktime_before(call->expire_at, now)) {
+ rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME);
+ set_bit(RXRPC_CALL_EV_ABORT, &call->events);
+ goto recheck_state;
}
- if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) {
- write_lock_bh(&call->state_lock);
- if (call->state <= RXRPC_CALL_COMPLETE) {
- call->state = RXRPC_CALL_LOCALLY_ABORTED;
- call->local_abort = RX_CALL_TIMEOUT;
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
+ if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) {
+ call->ack_at = call->expire_at;
+ if (call->ackr_reason) {
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ goto recheck_state;
}
- write_unlock_bh(&call->state_lock);
-
- _debug("post timeout");
- if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR,
- ETIME, true) < 0)
- goto no_mem;
-
- clear_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events);
- goto kill_ACKs;
}
- /* deal with assorted inbound messages */
- if (!skb_queue_empty(&call->rx_queue)) {
- switch (rxrpc_process_rx_queue(call, &abort_code)) {
- case 0:
- case -EAGAIN:
- break;
- case -ENOMEM:
- goto no_mem;
- case -EKEYEXPIRED:
- case -EKEYREJECTED:
- case -EPROTO:
- rxrpc_abort_call(call, abort_code);
- goto kill_ACKs;
- }
+ if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) {
+ rxrpc_resend(call, now);
+ goto recheck_state;
}
- /* handle resending */
- if (test_and_clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
- rxrpc_resend_timer(call);
- if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events))
- rxrpc_resend(call);
-
- /* consider sending an ordinary ACK */
- if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) {
- _debug("send ACK: window: %d - %d { %lx }",
- call->rx_data_eaten, call->ackr_win_top,
- call->ackr_window[0]);
-
- if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST &&
- call->ackr_reason != RXRPC_ACK_PING_RESPONSE) {
- /* ACK by sending reply DATA packet in this state */
- clear_bit(RXRPC_CALL_EV_ACK, &call->events);
- goto maybe_reschedule;
- }
-
- genbit = RXRPC_CALL_EV_ACK;
-
- acks = kzalloc(call->ackr_win_top - call->rx_data_eaten,
- GFP_NOFS);
- if (!acks)
- goto no_mem;
-
- //hdr.flags = RXRPC_SLOW_START_OK;
- ack.bufferSpace = htons(8);
- ack.maxSkew = 0;
-
- spin_lock_bh(&call->lock);
- ack.reason = call->ackr_reason;
- ack.serial = htonl(call->ackr_serial);
- ack.previousPacket = htonl(call->ackr_prev_seq);
- ack.firstPacket = htonl(call->rx_data_eaten + 1);
-
- ack.nAcks = 0;
- for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
- nbit = loop * BITS_PER_LONG;
- for (bits = call->ackr_window[loop]; bits; bits >>= 1
- ) {
- _debug("- l=%d n=%d b=%lx", loop, nbit, bits);
- if (bits & 1) {
- acks[nbit] = RXRPC_ACK_TYPE_ACK;
- ack.nAcks = nbit + 1;
- }
- nbit++;
- }
- }
- call->ackr_reason = 0;
- spin_unlock_bh(&call->lock);
-
- pad = 0;
-
- iov[1].iov_base = &ack;
- iov[1].iov_len = sizeof(ack);
- iov[2].iov_base = acks;
- iov[2].iov_len = ack.nAcks;
- iov[3].iov_base = &pad;
- iov[3].iov_len = 3;
- iov[4].iov_base = &ackinfo;
- iov[4].iov_len = sizeof(ackinfo);
-
- switch (ack.reason) {
- case RXRPC_ACK_REQUESTED:
- case RXRPC_ACK_DUPLICATE:
- case RXRPC_ACK_OUT_OF_SEQUENCE:
- case RXRPC_ACK_EXCEEDS_WINDOW:
- case RXRPC_ACK_NOSPACE:
- case RXRPC_ACK_PING:
- case RXRPC_ACK_PING_RESPONSE:
- goto send_ACK_with_skew;
- case RXRPC_ACK_DELAY:
- case RXRPC_ACK_IDLE:
- goto send_ACK;
- }
- }
-
- /* handle completion of security negotiations on an incoming
- * connection */
- if (test_and_clear_bit(RXRPC_CALL_EV_SECURED, &call->events)) {
- _debug("secured");
- spin_lock_bh(&call->lock);
-
- if (call->state == RXRPC_CALL_SERVER_SECURING) {
- _debug("securing");
- write_lock(&call->socket->call_lock);
- if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
- !test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
- _debug("not released");
- call->state = RXRPC_CALL_SERVER_ACCEPTING;
- list_move_tail(&call->accept_link,
- &call->socket->acceptq);
- }
- write_unlock(&call->socket->call_lock);
- read_lock(&call->state_lock);
- if (call->state < RXRPC_CALL_COMPLETE)
- set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
- read_unlock(&call->state_lock);
- }
-
- spin_unlock_bh(&call->lock);
- if (!test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events))
- goto maybe_reschedule;
- }
-
- /* post a notification of an acceptable connection to the app */
- if (test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) {
- _debug("post accept");
- if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL,
- 0, false) < 0)
- goto no_mem;
- clear_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events);
- goto maybe_reschedule;
- }
-
- /* handle incoming call acceptance */
- if (test_and_clear_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) {
- _debug("accepted");
- ASSERTCMP(call->rx_data_post, ==, 0);
- call->rx_data_post = 1;
- read_lock_bh(&call->state_lock);
- if (call->state < RXRPC_CALL_COMPLETE)
- set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events);
- read_unlock_bh(&call->state_lock);
- }
-
- /* drain the out of sequence received packet queue into the packet Rx
- * queue */
- if (test_and_clear_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) {
- while (call->rx_data_post == call->rx_first_oos)
- if (rxrpc_drain_rx_oos_queue(call) < 0)
- break;
- goto maybe_reschedule;
- }
-
- if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) {
- rxrpc_release_call(call);
- clear_bit(RXRPC_CALL_EV_RELEASE, &call->events);
- }
+ rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now);
/* other events may have been raised since we started checking */
- goto maybe_reschedule;
-
-send_ACK_with_skew:
- ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) -
- ntohl(ack.serial));
-send_ACK:
- mtu = call->conn->params.peer->if_mtu;
- mtu -= call->conn->params.peer->hdrsize;
- ackinfo.maxMTU = htonl(mtu);
- ackinfo.rwind = htonl(rxrpc_rx_window_size);
-
- /* permit the peer to send us jumbo packets if it wants to */
- ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
- ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max);
-
- serial = atomic_inc_return(&call->conn->serial);
- whdr.serial = htonl(serial);
- _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
- serial,
- ntohs(ack.maxSkew),
- ntohl(ack.firstPacket),
- ntohl(ack.previousPacket),
- ntohl(ack.serial),
- rxrpc_acks(ack.reason),
- ack.nAcks);
-
- del_timer_sync(&call->ack_timer);
- if (ack.nAcks > 0)
- set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags);
- goto send_message_2;
-
-send_message:
- _debug("send message");
-
- serial = atomic_inc_return(&call->conn->serial);
- whdr.serial = htonl(serial);
- _proto("Tx %s %%%u", rxrpc_pkts[whdr.type], serial);
-send_message_2:
-
- len = iov[0].iov_len;
- ioc = 1;
- if (iov[4].iov_len) {
- ioc = 5;
- len += iov[4].iov_len;
- len += iov[3].iov_len;
- len += iov[2].iov_len;
- len += iov[1].iov_len;
- } else if (iov[3].iov_len) {
- ioc = 4;
- len += iov[3].iov_len;
- len += iov[2].iov_len;
- len += iov[1].iov_len;
- } else if (iov[2].iov_len) {
- ioc = 3;
- len += iov[2].iov_len;
- len += iov[1].iov_len;
- } else if (iov[1].iov_len) {
- ioc = 2;
- len += iov[1].iov_len;
- }
-
- ret = kernel_sendmsg(call->conn->params.local->socket,
- &msg, iov, ioc, len);
- if (ret < 0) {
- _debug("sendmsg failed: %d", ret);
- read_lock_bh(&call->state_lock);
- if (call->state < RXRPC_CALL_DEAD)
- rxrpc_queue_call(call);
- read_unlock_bh(&call->state_lock);
- goto error;
- }
-
- switch (genbit) {
- case RXRPC_CALL_EV_ABORT:
- clear_bit(genbit, &call->events);
- clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
- goto kill_ACKs;
-
- case RXRPC_CALL_EV_ACK_FINAL:
- write_lock_bh(&call->state_lock);
- if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK)
- call->state = RXRPC_CALL_COMPLETE;
- write_unlock_bh(&call->state_lock);
- goto kill_ACKs;
-
- default:
- clear_bit(genbit, &call->events);
- switch (call->state) {
- case RXRPC_CALL_CLIENT_AWAIT_REPLY:
- case RXRPC_CALL_CLIENT_RECV_REPLY:
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- _debug("start ACK timer");
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY,
- call->ackr_serial, false);
- default:
- break;
- }
- goto maybe_reschedule;
- }
-
-kill_ACKs:
- del_timer_sync(&call->ack_timer);
- if (test_and_clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events))
- rxrpc_put_call(call);
- clear_bit(RXRPC_CALL_EV_ACK, &call->events);
-
-maybe_reschedule:
- if (call->events || !skb_queue_empty(&call->rx_queue)) {
- read_lock_bh(&call->state_lock);
- if (call->state < RXRPC_CALL_DEAD)
- rxrpc_queue_call(call);
- read_unlock_bh(&call->state_lock);
- }
-
- /* don't leave aborted connections on the accept queue */
- if (call->state >= RXRPC_CALL_COMPLETE &&
- !list_empty(&call->accept_link)) {
- _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }",
- call, call->events, call->flags, call->conn->proto.cid);
-
- read_lock_bh(&call->state_lock);
- if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
- !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
- rxrpc_queue_call(call);
- read_unlock_bh(&call->state_lock);
- }
-
-error:
- clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags);
- kfree(acks);
-
- /* because we don't want two CPUs both processing the work item for one
- * call at the same time, we use a flag to note when it's busy; however
- * this means there's a race between clearing the flag and setting the
- * work pending bit and the work item being processed again */
- if (call->events && !work_pending(&call->processor)) {
- _debug("jumpstart %x", call->conn->proto.cid);
- rxrpc_queue_call(call);
+ if (call->events && call->state < RXRPC_CALL_COMPLETE) {
+ __rxrpc_queue_call(call);
+ goto out;
}
+out_put:
+ rxrpc_put_call(call, rxrpc_call_put);
+out:
_leave("");
- return;
-
-no_mem:
- _debug("out of memory");
- goto maybe_reschedule;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index ae057e0740f3..364b42dc3dce 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -19,23 +19,13 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-/*
- * Maximum lifetime of a call (in jiffies).
- */
-unsigned int rxrpc_max_call_lifetime = 60 * HZ;
-
-/*
- * Time till dead call expires after last use (in jiffies).
- */
-unsigned int rxrpc_dead_call_expiry = 2 * HZ;
-
const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
- [RXRPC_CALL_UNINITIALISED] = "Uninit",
+ [RXRPC_CALL_UNINITIALISED] = "Uninit ",
[RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn",
[RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
[RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
[RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl",
- [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK",
+ [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc",
[RXRPC_CALL_SERVER_SECURING] = "SvSecure",
[RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept",
[RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq",
@@ -43,22 +33,47 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = {
[RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl",
[RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK",
[RXRPC_CALL_COMPLETE] = "Complete",
- [RXRPC_CALL_SERVER_BUSY] = "SvBusy ",
+};
+
+const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = {
+ [RXRPC_CALL_SUCCEEDED] = "Complete",
[RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort",
[RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CALL_LOCAL_ERROR] = "LocError",
[RXRPC_CALL_NETWORK_ERROR] = "NetError",
- [RXRPC_CALL_DEAD] = "Dead ",
+};
+
+const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = {
+ [rxrpc_call_new_client] = "NWc",
+ [rxrpc_call_new_service] = "NWs",
+ [rxrpc_call_queued] = "QUE",
+ [rxrpc_call_queued_ref] = "QUR",
+ [rxrpc_call_connected] = "CON",
+ [rxrpc_call_release] = "RLS",
+ [rxrpc_call_seen] = "SEE",
+ [rxrpc_call_got] = "GOT",
+ [rxrpc_call_got_userid] = "Gus",
+ [rxrpc_call_got_kernel] = "Gke",
+ [rxrpc_call_put] = "PUT",
+ [rxrpc_call_put_userid] = "Pus",
+ [rxrpc_call_put_kernel] = "Pke",
+ [rxrpc_call_put_noqueue] = "PNQ",
+ [rxrpc_call_error] = "*E*",
};
struct kmem_cache *rxrpc_call_jar;
LIST_HEAD(rxrpc_calls);
DEFINE_RWLOCK(rxrpc_call_lock);
-static void rxrpc_destroy_call(struct work_struct *work);
-static void rxrpc_call_life_expired(unsigned long _call);
-static void rxrpc_dead_call_expired(unsigned long _call);
-static void rxrpc_ack_time_expired(unsigned long _call);
-static void rxrpc_resend_time_expired(unsigned long _call);
+static void rxrpc_call_timer_expired(unsigned long _call)
+{
+ struct rxrpc_call *call = (struct rxrpc_call *)_call;
+
+ _enter("%d", call->debug_id);
+
+ if (call->state < RXRPC_CALL_COMPLETE)
+ rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real());
+}
/*
* find an extant server call
@@ -91,7 +106,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx,
return NULL;
found_extant_call:
- rxrpc_get_call(call);
+ rxrpc_get_call(call, rxrpc_call_got);
read_unlock(&rx->call_lock);
_leave(" = %p [%d]", call, atomic_read(&call->usage));
return call;
@@ -100,7 +115,7 @@ found_extant_call:
/*
* allocate a new call
*/
-static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
+struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
{
struct rxrpc_call *call;
@@ -108,29 +123,25 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
if (!call)
return NULL;
- call->acks_winsz = 16;
- call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long),
+ call->rxtx_buffer = kcalloc(RXRPC_RXTX_BUFF_SIZE,
+ sizeof(struct sk_buff *),
gfp);
- if (!call->acks_window) {
- kmem_cache_free(rxrpc_call_jar, call);
- return NULL;
- }
+ if (!call->rxtx_buffer)
+ goto nomem;
+
+ call->rxtx_annotations = kcalloc(RXRPC_RXTX_BUFF_SIZE, sizeof(u8), gfp);
+ if (!call->rxtx_annotations)
+ goto nomem_2;
- setup_timer(&call->lifetimer, &rxrpc_call_life_expired,
- (unsigned long) call);
- setup_timer(&call->deadspan, &rxrpc_dead_call_expired,
- (unsigned long) call);
- setup_timer(&call->ack_timer, &rxrpc_ack_time_expired,
- (unsigned long) call);
- setup_timer(&call->resend_timer, &rxrpc_resend_time_expired,
- (unsigned long) call);
- INIT_WORK(&call->destroyer, &rxrpc_destroy_call);
+ setup_timer(&call->timer, rxrpc_call_timer_expired,
+ (unsigned long)call);
INIT_WORK(&call->processor, &rxrpc_process_call);
INIT_LIST_HEAD(&call->link);
+ INIT_LIST_HEAD(&call->chan_wait_link);
INIT_LIST_HEAD(&call->accept_link);
- skb_queue_head_init(&call->rx_queue);
- skb_queue_head_init(&call->rx_oos_queue);
- init_waitqueue_head(&call->tx_waitq);
+ INIT_LIST_HEAD(&call->recvmsg_link);
+ INIT_LIST_HEAD(&call->sock_link);
+ init_waitqueue_head(&call->waitq);
spin_lock_init(&call->lock);
rwlock_init(&call->state_lock);
atomic_set(&call->usage, 1);
@@ -138,70 +149,65 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
- call->rx_data_expect = 1;
- call->rx_data_eaten = 0;
- call->rx_first_oos = 0;
- call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size;
- call->creation_jif = jiffies;
+ /* Leave space in the ring to handle a maxed-out jumbo packet */
+ call->rx_winsize = rxrpc_rx_window_size;
+ call->tx_winsize = 16;
+ call->rx_expect_next = 1;
+
+ if (RXRPC_TX_SMSS > 2190)
+ call->cong_cwnd = 2;
+ else if (RXRPC_TX_SMSS > 1095)
+ call->cong_cwnd = 3;
+ else
+ call->cong_cwnd = 4;
+ call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1;
return call;
+
+nomem_2:
+ kfree(call->rxtx_buffer);
+nomem:
+ kmem_cache_free(rxrpc_call_jar, call);
+ return NULL;
}
/*
* Allocate a new client call.
*/
-static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
- struct sockaddr_rxrpc *srx,
+static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx,
gfp_t gfp)
{
struct rxrpc_call *call;
+ ktime_t now;
_enter("");
- ASSERT(rx->local != NULL);
-
call = rxrpc_alloc_call(gfp);
if (!call)
return ERR_PTR(-ENOMEM);
call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
-
- sock_hold(&rx->sk);
- call->socket = rx;
- call->rx_data_post = 1;
-
- call->local = rx->local;
call->service_id = srx->srx_service;
- call->in_clientflag = 0;
+ call->tx_phase = true;
+ now = ktime_get_real();
+ call->acks_latest_ts = now;
+ call->cong_tstamp = now;
_leave(" = %p", call);
return call;
}
/*
- * Begin client call.
+ * Initiate the call ack/resend/expiry timer.
*/
-static int rxrpc_begin_client_call(struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
+static void rxrpc_start_call_timer(struct rxrpc_call *call)
{
- int ret;
-
- /* Set up or get a connection record and set the protocol parameters,
- * including channel number and call ID.
- */
- ret = rxrpc_connect_call(call, cp, srx, gfp);
- if (ret < 0)
- return ret;
-
- call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
-
- spin_lock(&call->conn->params.peer->lock);
- hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets);
- spin_unlock(&call->conn->params.peer->lock);
-
- call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
- add_timer(&call->lifetimer);
- return 0;
+ ktime_t now = ktime_get_real(), expire_at;
+
+ expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime);
+ call->expire_at = expire_at;
+ call->ack_at = expire_at;
+ call->resend_at = expire_at;
+ call->timer.expires = jiffies + LONG_MAX / 2;
+ rxrpc_set_timer(call, rxrpc_timer_begin, now);
}
/*
@@ -216,20 +222,21 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
{
struct rxrpc_call *call, *xcall;
struct rb_node *parent, **pp;
+ const void *here = __builtin_return_address(0);
int ret;
_enter("%p,%lx", rx, user_call_ID);
- call = rxrpc_alloc_client_call(rx, srx, gfp);
+ call = rxrpc_alloc_client_call(srx, gfp);
if (IS_ERR(call)) {
_leave(" = %ld", PTR_ERR(call));
return call;
}
- /* Publish the call, even though it is incompletely set up as yet */
- call->user_call_ID = user_call_ID;
- __set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+ trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage),
+ here, (const void *)user_call_ID);
+ /* Publish the call, even though it is incompletely set up as yet */
write_lock(&rx->call_lock);
pp = &rx->calls.rb_node;
@@ -243,369 +250,285 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
else if (user_call_ID > xcall->user_call_ID)
pp = &(*pp)->rb_right;
else
- goto found_user_ID_now_present;
+ goto error_dup_user_ID;
}
- rxrpc_get_call(call);
-
+ rcu_assign_pointer(call->socket, rx);
+ call->user_call_ID = user_call_ID;
+ __set_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+ rxrpc_get_call(call, rxrpc_call_got_userid);
rb_link_node(&call->sock_node, parent, pp);
rb_insert_color(&call->sock_node, &rx->calls);
+ list_add(&call->sock_link, &rx->sock_calls);
+
write_unlock(&rx->call_lock);
- write_lock_bh(&rxrpc_call_lock);
+ write_lock(&rxrpc_call_lock);
list_add_tail(&call->link, &rxrpc_calls);
- write_unlock_bh(&rxrpc_call_lock);
+ write_unlock(&rxrpc_call_lock);
- ret = rxrpc_begin_client_call(call, cp, srx, gfp);
+ /* Set up or get a connection record and set the protocol parameters,
+ * including channel number and call ID.
+ */
+ ret = rxrpc_connect_call(call, cp, srx, gfp);
if (ret < 0)
goto error;
- _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
+ trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage),
+ here, ERR_PTR(ret));
- _leave(" = %p [new]", call);
- return call;
+ spin_lock_bh(&call->conn->params.peer->lock);
+ hlist_add_head(&call->error_link,
+ &call->conn->params.peer->error_targets);
+ spin_unlock_bh(&call->conn->params.peer->lock);
-error:
- write_lock(&rx->call_lock);
- rb_erase(&call->sock_node, &rx->calls);
- write_unlock(&rx->call_lock);
- rxrpc_put_call(call);
+ rxrpc_start_call_timer(call);
- write_lock_bh(&rxrpc_call_lock);
- list_del_init(&call->link);
- write_unlock_bh(&rxrpc_call_lock);
+ _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id);
- set_bit(RXRPC_CALL_RELEASED, &call->flags);
- call->state = RXRPC_CALL_DEAD;
- rxrpc_put_call(call);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
+ _leave(" = %p [new]", call);
+ return call;
/* We unexpectedly found the user ID in the list after taking
* the call_lock. This shouldn't happen unless the user races
* with itself and tries to add the same user ID twice at the
* same time in different threads.
*/
-found_user_ID_now_present:
+error_dup_user_ID:
write_unlock(&rx->call_lock);
- set_bit(RXRPC_CALL_RELEASED, &call->flags);
- call->state = RXRPC_CALL_DEAD;
- rxrpc_put_call(call);
- _leave(" = -EEXIST [%p]", call);
- return ERR_PTR(-EEXIST);
+ ret = -EEXIST;
+
+error:
+ __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
+ RX_CALL_DEAD, ret);
+ trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage),
+ here, ERR_PTR(ret));
+ rxrpc_release_call(rx, call);
+ rxrpc_put_call(call, rxrpc_call_put);
+ _leave(" = %d", ret);
+ return ERR_PTR(ret);
}
/*
- * set up an incoming call
- * - called in process context with IRQs enabled
+ * Set up an incoming call. call->conn points to the connection.
+ * This is called in BH context and isn't allowed to fail.
*/
-struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
- struct rxrpc_connection *conn,
- struct sk_buff *skb)
+void rxrpc_incoming_call(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ struct sk_buff *skb)
{
+ struct rxrpc_connection *conn = call->conn;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxrpc_call *call, *candidate;
- u32 call_id, chan;
-
- _enter(",%d", conn->debug_id);
-
- ASSERT(rx != NULL);
-
- candidate = rxrpc_alloc_call(GFP_NOIO);
- if (!candidate)
- return ERR_PTR(-EBUSY);
-
- chan = sp->hdr.cid & RXRPC_CHANNELMASK;
- candidate->socket = rx;
- candidate->conn = conn;
- candidate->cid = sp->hdr.cid;
- candidate->call_id = sp->hdr.callNumber;
- candidate->channel = chan;
- candidate->rx_data_post = 0;
- candidate->state = RXRPC_CALL_SERVER_ACCEPTING;
- if (conn->security_ix > 0)
- candidate->state = RXRPC_CALL_SERVER_SECURING;
-
- spin_lock(&conn->channel_lock);
-
- /* set the channel for this call */
- call = rcu_dereference_protected(conn->channels[chan].call,
- lockdep_is_held(&conn->channel_lock));
-
- _debug("channel[%u] is %p", candidate->channel, call);
- if (call && call->call_id == sp->hdr.callNumber) {
- /* already set; must've been a duplicate packet */
- _debug("extant call [%d]", call->state);
- ASSERTCMP(call->conn, ==, conn);
-
- read_lock(&call->state_lock);
- switch (call->state) {
- case RXRPC_CALL_LOCALLY_ABORTED:
- if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events))
- rxrpc_queue_call(call);
- case RXRPC_CALL_REMOTELY_ABORTED:
- read_unlock(&call->state_lock);
- goto aborted_call;
- default:
- rxrpc_get_call(call);
- read_unlock(&call->state_lock);
- goto extant_call;
- }
- }
-
- if (call) {
- /* it seems the channel is still in use from the previous call
- * - ditch the old binding if its call is now complete */
- _debug("CALL: %u { %s }",
- call->debug_id, rxrpc_call_states[call->state]);
-
- if (call->state >= RXRPC_CALL_COMPLETE) {
- __rxrpc_disconnect_call(call);
- } else {
- spin_unlock(&conn->channel_lock);
- kmem_cache_free(rxrpc_call_jar, candidate);
- _leave(" = -EBUSY");
- return ERR_PTR(-EBUSY);
- }
- }
-
- /* check the call number isn't duplicate */
- _debug("check dup");
- call_id = sp->hdr.callNumber;
-
- /* We just ignore calls prior to the current call ID. Terminated calls
- * are handled via the connection.
+ u32 chan;
+
+ _enter(",%d", call->conn->debug_id);
+
+ rcu_assign_pointer(call->socket, rx);
+ call->call_id = sp->hdr.callNumber;
+ call->service_id = sp->hdr.serviceId;
+ call->cid = sp->hdr.cid;
+ call->state = RXRPC_CALL_SERVER_ACCEPTING;
+ if (sp->hdr.securityIndex > 0)
+ call->state = RXRPC_CALL_SERVER_SECURING;
+ call->cong_tstamp = skb->tstamp;
+
+ /* Set the channel for this call. We don't get channel_lock as we're
+ * only defending against the data_ready handler (which we're called
+ * from) and the RESPONSE packet parser (which is only really
+ * interested in call_counter and can cope with a disagreement with the
+ * call pointer).
*/
- if (call_id <= conn->channels[chan].call_counter)
- goto old_call; /* TODO: Just drop packet */
-
- /* make the call available */
- _debug("new call");
- call = candidate;
- candidate = NULL;
- conn->channels[chan].call_counter = call_id;
+ chan = sp->hdr.cid & RXRPC_CHANNELMASK;
+ conn->channels[chan].call_counter = call->call_id;
+ conn->channels[chan].call_id = call->call_id;
rcu_assign_pointer(conn->channels[chan].call, call);
- sock_hold(&rx->sk);
- rxrpc_get_connection(conn);
- spin_unlock(&conn->channel_lock);
spin_lock(&conn->params.peer->lock);
hlist_add_head(&call->error_link, &conn->params.peer->error_targets);
spin_unlock(&conn->params.peer->lock);
- write_lock_bh(&rxrpc_call_lock);
- list_add_tail(&call->link, &rxrpc_calls);
- write_unlock_bh(&rxrpc_call_lock);
+ _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
- call->local = conn->params.local;
- call->epoch = conn->proto.epoch;
- call->service_id = conn->params.service_id;
- call->in_clientflag = RXRPC_CLIENT_INITIATED;
+ rxrpc_start_call_timer(call);
+ _leave("");
+}
- _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
+/*
+ * Queue a call's work processor, getting a ref to pass to the work queue.
+ */
+bool rxrpc_queue_call(struct rxrpc_call *call)
+{
+ const void *here = __builtin_return_address(0);
+ int n = __atomic_add_unless(&call->usage, 1, 0);
+ if (n == 0)
+ return false;
+ if (rxrpc_queue_work(&call->processor))
+ trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL);
+ else
+ rxrpc_put_call(call, rxrpc_call_put_noqueue);
+ return true;
+}
- call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
- add_timer(&call->lifetimer);
- _leave(" = %p {%d} [new]", call, call->debug_id);
- return call;
+/*
+ * Queue a call's work processor, passing the callers ref to the work queue.
+ */
+bool __rxrpc_queue_call(struct rxrpc_call *call)
+{
+ const void *here = __builtin_return_address(0);
+ int n = atomic_read(&call->usage);
+ ASSERTCMP(n, >=, 1);
+ if (rxrpc_queue_work(&call->processor))
+ trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL);
+ else
+ rxrpc_put_call(call, rxrpc_call_put_noqueue);
+ return true;
+}
-extant_call:
- spin_unlock(&conn->channel_lock);
- kmem_cache_free(rxrpc_call_jar, candidate);
- _leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1);
- return call;
+/*
+ * Note the re-emergence of a call.
+ */
+void rxrpc_see_call(struct rxrpc_call *call)
+{
+ const void *here = __builtin_return_address(0);
+ if (call) {
+ int n = atomic_read(&call->usage);
-aborted_call:
- spin_unlock(&conn->channel_lock);
- kmem_cache_free(rxrpc_call_jar, candidate);
- _leave(" = -ECONNABORTED");
- return ERR_PTR(-ECONNABORTED);
-
-old_call:
- spin_unlock(&conn->channel_lock);
- kmem_cache_free(rxrpc_call_jar, candidate);
- _leave(" = -ECONNRESET [old]");
- return ERR_PTR(-ECONNRESET);
+ trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL);
+ }
}
/*
- * detach a call from a socket and set up for release
+ * Note the addition of a ref on a call.
*/
-void rxrpc_release_call(struct rxrpc_call *call)
+void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
{
+ const void *here = __builtin_return_address(0);
+ int n = atomic_inc_return(&call->usage);
+
+ trace_rxrpc_call(call, op, n, here, NULL);
+}
+
+/*
+ * Detach a call from its owning socket.
+ */
+void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
+{
+ const void *here = __builtin_return_address(0);
struct rxrpc_connection *conn = call->conn;
- struct rxrpc_sock *rx = call->socket;
+ bool put = false;
+ int i;
- _enter("{%d,%d,%d,%d}",
- call->debug_id, atomic_read(&call->usage),
- atomic_read(&call->ackr_not_idle),
- call->rx_first_oos);
+ _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage));
+
+ trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage),
+ here, (const void *)call->flags);
+
+ ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
spin_lock_bh(&call->lock);
if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
BUG();
spin_unlock_bh(&call->lock);
- /* dissociate from the socket
- * - the socket's ref on the call is passed to the death timer
- */
- _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
+ del_timer_sync(&call->timer);
- spin_lock(&conn->params.peer->lock);
- hlist_del_init(&call->error_link);
- spin_unlock(&conn->params.peer->lock);
+ /* Make sure we don't get any more notifications */
+ write_lock_bh(&rx->recvmsg_lock);
- write_lock_bh(&rx->call_lock);
- if (!list_empty(&call->accept_link)) {
+ if (!list_empty(&call->recvmsg_link)) {
_debug("unlinking once-pending call %p { e=%lx f=%lx }",
call, call->events, call->flags);
- ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
- list_del_init(&call->accept_link);
- sk_acceptq_removed(&rx->sk);
- } else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
- rb_erase(&call->sock_node, &rx->calls);
- memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
- clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
+ list_del(&call->recvmsg_link);
+ put = true;
}
- write_unlock_bh(&rx->call_lock);
- /* free up the channel for reuse */
- write_lock_bh(&call->state_lock);
+ /* list_empty() must return false in rxrpc_notify_socket() */
+ call->recvmsg_link.next = NULL;
+ call->recvmsg_link.prev = NULL;
- if (call->state < RXRPC_CALL_COMPLETE &&
- call->state != RXRPC_CALL_CLIENT_FINAL_ACK) {
- _debug("+++ ABORTING STATE %d +++\n", call->state);
- call->state = RXRPC_CALL_LOCALLY_ABORTED;
- call->local_abort = RX_CALL_DEAD;
- }
- write_unlock_bh(&call->state_lock);
+ write_unlock_bh(&rx->recvmsg_lock);
+ if (put)
+ rxrpc_put_call(call, rxrpc_call_put);
- rxrpc_disconnect_call(call);
+ write_lock(&rx->call_lock);
- /* clean up the Rx queue */
- if (!skb_queue_empty(&call->rx_queue) ||
- !skb_queue_empty(&call->rx_oos_queue)) {
- struct rxrpc_skb_priv *sp;
- struct sk_buff *skb;
+ if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+ rb_erase(&call->sock_node, &rx->calls);
+ memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
+ rxrpc_put_call(call, rxrpc_call_put_userid);
+ }
- _debug("purge Rx queues");
+ list_del(&call->sock_link);
+ write_unlock(&rx->call_lock);
- spin_lock_bh(&call->lock);
- while ((skb = skb_dequeue(&call->rx_queue)) ||
- (skb = skb_dequeue(&call->rx_oos_queue))) {
- spin_unlock_bh(&call->lock);
+ _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
- sp = rxrpc_skb(skb);
- _debug("- zap %s %%%u #%u",
- rxrpc_pkts[sp->hdr.type],
- sp->hdr.serial, sp->hdr.seq);
- rxrpc_free_skb(skb);
- spin_lock_bh(&call->lock);
- }
- spin_unlock_bh(&call->lock);
+ if (conn)
+ rxrpc_disconnect_call(call);
- ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE);
+ for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) {
+ rxrpc_free_skb(call->rxtx_buffer[i],
+ (call->tx_phase ? rxrpc_skb_tx_cleaned :
+ rxrpc_skb_rx_cleaned));
+ call->rxtx_buffer[i] = NULL;
}
- del_timer_sync(&call->resend_timer);
- del_timer_sync(&call->ack_timer);
- del_timer_sync(&call->lifetimer);
- call->deadspan.expires = jiffies + rxrpc_dead_call_expiry;
- add_timer(&call->deadspan);
-
_leave("");
}
/*
- * handle a dead call being ready for reaping
- */
-static void rxrpc_dead_call_expired(unsigned long _call)
-{
- struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
- _enter("{%d}", call->debug_id);
-
- write_lock_bh(&call->state_lock);
- call->state = RXRPC_CALL_DEAD;
- write_unlock_bh(&call->state_lock);
- rxrpc_put_call(call);
-}
-
-/*
- * mark a call as to be released, aborting it if it's still in progress
- * - called with softirqs disabled
- */
-static void rxrpc_mark_call_released(struct rxrpc_call *call)
-{
- bool sched;
-
- write_lock(&call->state_lock);
- if (call->state < RXRPC_CALL_DEAD) {
- sched = false;
- if (call->state < RXRPC_CALL_COMPLETE) {
- _debug("abort call %p", call);
- call->state = RXRPC_CALL_LOCALLY_ABORTED;
- call->local_abort = RX_CALL_DEAD;
- if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events))
- sched = true;
- }
- if (!test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
- sched = true;
- if (sched)
- rxrpc_queue_call(call);
- }
- write_unlock(&call->state_lock);
-}
-
-/*
* release all the calls associated with a socket
*/
void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
{
struct rxrpc_call *call;
- struct rb_node *p;
_enter("%p", rx);
- read_lock_bh(&rx->call_lock);
-
- /* mark all the calls as no longer wanting incoming packets */
- for (p = rb_first(&rx->calls); p; p = rb_next(p)) {
- call = rb_entry(p, struct rxrpc_call, sock_node);
- rxrpc_mark_call_released(call);
- }
-
- /* kill the not-yet-accepted incoming calls */
- list_for_each_entry(call, &rx->secureq, accept_link) {
- rxrpc_mark_call_released(call);
+ while (!list_empty(&rx->to_be_accepted)) {
+ call = list_entry(rx->to_be_accepted.next,
+ struct rxrpc_call, accept_link);
+ list_del(&call->accept_link);
+ rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET);
+ rxrpc_put_call(call, rxrpc_call_put);
}
- list_for_each_entry(call, &rx->acceptq, accept_link) {
- rxrpc_mark_call_released(call);
+ while (!list_empty(&rx->sock_calls)) {
+ call = list_entry(rx->sock_calls.next,
+ struct rxrpc_call, sock_link);
+ rxrpc_get_call(call, rxrpc_call_got);
+ rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET);
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+ rxrpc_release_call(rx, call);
+ rxrpc_put_call(call, rxrpc_call_put);
}
- read_unlock_bh(&rx->call_lock);
_leave("");
}
/*
* release a call
*/
-void __rxrpc_put_call(struct rxrpc_call *call)
+void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op)
{
+ const void *here = __builtin_return_address(0);
+ int n;
+
ASSERT(call != NULL);
- _enter("%p{u=%d}", call, atomic_read(&call->usage));
+ n = atomic_dec_return(&call->usage);
+ trace_rxrpc_call(call, op, n, here, NULL);
+ ASSERTCMP(n, >=, 0);
+ if (n == 0) {
+ _debug("call %d dead", call->debug_id);
+ ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
- ASSERTCMP(atomic_read(&call->usage), >, 0);
+ write_lock(&rxrpc_call_lock);
+ list_del_init(&call->link);
+ write_unlock(&rxrpc_call_lock);
- if (atomic_dec_and_test(&call->usage)) {
- _debug("call %d dead", call->debug_id);
- WARN_ON(atomic_read(&call->skb_count) != 0);
- ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
- rxrpc_queue_work(&call->destroyer);
+ rxrpc_cleanup_call(call);
}
- _leave("");
}
/*
@@ -615,187 +538,70 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu)
{
struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu);
- rxrpc_purge_queue(&call->rx_queue);
+ rxrpc_put_peer(call->peer);
+ kfree(call->rxtx_buffer);
+ kfree(call->rxtx_annotations);
kmem_cache_free(rxrpc_call_jar, call);
}
/*
* clean up a call
*/
-static void rxrpc_cleanup_call(struct rxrpc_call *call)
+void rxrpc_cleanup_call(struct rxrpc_call *call)
{
- _net("DESTROY CALL %d", call->debug_id);
+ int i;
- ASSERT(call->socket);
+ _net("DESTROY CALL %d", call->debug_id);
memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
- del_timer_sync(&call->lifetimer);
- del_timer_sync(&call->deadspan);
- del_timer_sync(&call->ack_timer);
- del_timer_sync(&call->resend_timer);
+ del_timer_sync(&call->timer);
+ ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
- ASSERTCMP(call->events, ==, 0);
- if (work_pending(&call->processor)) {
- _debug("defer destroy");
- rxrpc_queue_work(&call->destroyer);
- return;
- }
-
ASSERTCMP(call->conn, ==, NULL);
- if (call->acks_window) {
- _debug("kill Tx window %d",
- CIRC_CNT(call->acks_head, call->acks_tail,
- call->acks_winsz));
- smp_mb();
- while (CIRC_CNT(call->acks_head, call->acks_tail,
- call->acks_winsz) > 0) {
- struct rxrpc_skb_priv *sp;
- unsigned long _skb;
-
- _skb = call->acks_window[call->acks_tail] & ~1;
- sp = rxrpc_skb((struct sk_buff *)_skb);
- _debug("+++ clear Tx %u", sp->hdr.seq);
- rxrpc_free_skb((struct sk_buff *)_skb);
- call->acks_tail =
- (call->acks_tail + 1) & (call->acks_winsz - 1);
- }
-
- kfree(call->acks_window);
- }
+ /* Clean up the Rx/Tx buffer */
+ for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++)
+ rxrpc_free_skb(call->rxtx_buffer[i],
+ (call->tx_phase ? rxrpc_skb_tx_cleaned :
+ rxrpc_skb_rx_cleaned));
- rxrpc_free_skb(call->tx_pending);
+ rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned);
- rxrpc_purge_queue(&call->rx_queue);
- ASSERT(skb_queue_empty(&call->rx_oos_queue));
- sock_put(&call->socket->sk);
call_rcu(&call->rcu, rxrpc_rcu_destroy_call);
}
/*
- * destroy a call
- */
-static void rxrpc_destroy_call(struct work_struct *work)
-{
- struct rxrpc_call *call =
- container_of(work, struct rxrpc_call, destroyer);
-
- _enter("%p{%d,%d,%p}",
- call, atomic_read(&call->usage), call->channel, call->conn);
-
- ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
-
- write_lock_bh(&rxrpc_call_lock);
- list_del_init(&call->link);
- write_unlock_bh(&rxrpc_call_lock);
-
- rxrpc_cleanup_call(call);
- _leave("");
-}
-
-/*
- * preemptively destroy all the call records from a transport endpoint rather
- * than waiting for them to time out
+ * Make sure that all calls are gone.
*/
void __exit rxrpc_destroy_all_calls(void)
{
struct rxrpc_call *call;
_enter("");
- write_lock_bh(&rxrpc_call_lock);
+
+ if (list_empty(&rxrpc_calls))
+ return;
+
+ write_lock(&rxrpc_call_lock);
while (!list_empty(&rxrpc_calls)) {
call = list_entry(rxrpc_calls.next, struct rxrpc_call, link);
_debug("Zapping call %p", call);
+ rxrpc_see_call(call);
list_del_init(&call->link);
- switch (atomic_read(&call->usage)) {
- case 0:
- ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD);
- break;
- case 1:
- if (del_timer_sync(&call->deadspan) != 0 &&
- call->state != RXRPC_CALL_DEAD)
- rxrpc_dead_call_expired((unsigned long) call);
- if (call->state != RXRPC_CALL_DEAD)
- break;
- default:
- pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n",
- call, atomic_read(&call->usage),
- atomic_read(&call->ackr_not_idle),
- rxrpc_call_states[call->state],
- call->flags, call->events);
- if (!skb_queue_empty(&call->rx_queue))
- pr_err("Rx queue occupied\n");
- if (!skb_queue_empty(&call->rx_oos_queue))
- pr_err("OOS queue occupied\n");
- break;
- }
-
- write_unlock_bh(&rxrpc_call_lock);
- cond_resched();
- write_lock_bh(&rxrpc_call_lock);
- }
-
- write_unlock_bh(&rxrpc_call_lock);
- _leave("");
-}
-
-/*
- * handle call lifetime being exceeded
- */
-static void rxrpc_call_life_expired(unsigned long _call)
-{
- struct rxrpc_call *call = (struct rxrpc_call *) _call;
+ pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
+ call, atomic_read(&call->usage),
+ rxrpc_call_states[call->state],
+ call->flags, call->events);
- if (call->state >= RXRPC_CALL_COMPLETE)
- return;
-
- _enter("{%d}", call->debug_id);
- read_lock_bh(&call->state_lock);
- if (call->state < RXRPC_CALL_COMPLETE) {
- set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events);
- rxrpc_queue_call(call);
+ write_unlock(&rxrpc_call_lock);
+ cond_resched();
+ write_lock(&rxrpc_call_lock);
}
- read_unlock_bh(&call->state_lock);
-}
-
-/*
- * handle resend timer expiry
- * - may not take call->state_lock as this can deadlock against del_timer_sync()
- */
-static void rxrpc_resend_time_expired(unsigned long _call)
-{
- struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
- _enter("{%d}", call->debug_id);
-
- if (call->state >= RXRPC_CALL_COMPLETE)
- return;
-
- clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
- if (!test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
- rxrpc_queue_call(call);
-}
-
-/*
- * handle ACK timer expiry
- */
-static void rxrpc_ack_time_expired(unsigned long _call)
-{
- struct rxrpc_call *call = (struct rxrpc_call *) _call;
-
- _enter("{%d}", call->debug_id);
-
- if (call->state >= RXRPC_CALL_COMPLETE)
- return;
- read_lock_bh(&call->state_lock);
- if (call->state < RXRPC_CALL_COMPLETE &&
- !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events))
- rxrpc_queue_call(call);
- read_unlock_bh(&call->state_lock);
+ write_unlock(&rxrpc_call_lock);
}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 9e91f27b0d0f..60ef9605167e 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -7,6 +7,68 @@
* modify it under the terms of the GNU General Public Licence
* as published by the Free Software Foundation; either version
* 2 of the Licence, or (at your option) any later version.
+ *
+ *
+ * Client connections need to be cached for a little while after they've made a
+ * call so as to handle retransmitted DATA packets in case the server didn't
+ * receive the final ACK or terminating ABORT we sent it.
+ *
+ * Client connections can be in one of a number of cache states:
+ *
+ * (1) INACTIVE - The connection is not held in any list and may not have been
+ * exposed to the world. If it has been previously exposed, it was
+ * discarded from the idle list after expiring.
+ *
+ * (2) WAITING - The connection is waiting for the number of client conns to
+ * drop below the maximum capacity. Calls may be in progress upon it from
+ * when it was active and got culled.
+ *
+ * The connection is on the rxrpc_waiting_client_conns list which is kept
+ * in to-be-granted order. Culled conns with waiters go to the back of
+ * the queue just like new conns.
+ *
+ * (3) ACTIVE - The connection has at least one call in progress upon it, it
+ * may freely grant available channels to new calls and calls may be
+ * waiting on it for channels to become available.
+ *
+ * The connection is on the rxrpc_active_client_conns list which is kept
+ * in activation order for culling purposes.
+ *
+ * rxrpc_nr_active_client_conns is held incremented also.
+ *
+ * (4) CULLED - The connection got summarily culled to try and free up
+ * capacity. Calls currently in progress on the connection are allowed to
+ * continue, but new calls will have to wait. There can be no waiters in
+ * this state - the conn would have to go to the WAITING state instead.
+ *
+ * (5) IDLE - The connection has no calls in progress upon it and must have
+ * been exposed to the world (ie. the EXPOSED flag must be set). When it
+ * expires, the EXPOSED flag is cleared and the connection transitions to
+ * the INACTIVE state.
+ *
+ * The connection is on the rxrpc_idle_client_conns list which is kept in
+ * order of how soon they'll expire.
+ *
+ * There are flags of relevance to the cache:
+ *
+ * (1) EXPOSED - The connection ID got exposed to the world. If this flag is
+ * set, an extra ref is added to the connection preventing it from being
+ * reaped when it has no calls outstanding. This flag is cleared and the
+ * ref dropped when a conn is discarded from the idle list.
+ *
+ * This allows us to move terminal call state retransmission to the
+ * connection and to discard the call immediately we think it is done
+ * with. It also give us a chance to reuse the connection.
+ *
+ * (2) DONT_REUSE - The connection should be discarded as soon as possible and
+ * should not be reused. This is set when an exclusive connection is used
+ * or a call ID counter overflows.
+ *
+ * The caching state may only be changed if the cache lock is held.
+ *
+ * There are two idle client connection expiry durations. If the total number
+ * of connections is below the reap threshold, we use the normal duration; if
+ * it's above, we use the fast duration.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -16,27 +78,50 @@
#include <linux/timer.h>
#include "ar-internal.h"
+__read_mostly unsigned int rxrpc_max_client_connections = 1000;
+__read_mostly unsigned int rxrpc_reap_client_connections = 900;
+__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
+__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
+
+static unsigned int rxrpc_nr_client_conns;
+static unsigned int rxrpc_nr_active_client_conns;
+static __read_mostly bool rxrpc_kill_all_client_conns;
+
+static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock);
+static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex);
+static LIST_HEAD(rxrpc_waiting_client_conns);
+static LIST_HEAD(rxrpc_active_client_conns);
+static LIST_HEAD(rxrpc_idle_client_conns);
+
/*
* We use machine-unique IDs for our client connections.
*/
DEFINE_IDR(rxrpc_client_conn_ids);
static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
+static void rxrpc_cull_active_client_conns(void);
+static void rxrpc_discard_expired_client_conns(struct work_struct *);
+
+static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap,
+ rxrpc_discard_expired_client_conns);
+
+const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5] = {
+ [RXRPC_CONN_CLIENT_INACTIVE] = "Inac",
+ [RXRPC_CONN_CLIENT_WAITING] = "Wait",
+ [RXRPC_CONN_CLIENT_ACTIVE] = "Actv",
+ [RXRPC_CONN_CLIENT_CULLED] = "Cull",
+ [RXRPC_CONN_CLIENT_IDLE] = "Idle",
+};
+
/*
* Get a connection ID and epoch for a client connection from the global pool.
* The connection struct pointer is then recorded in the idr radix tree. The
- * epoch is changed if this wraps.
- *
- * TODO: The IDR tree gets very expensive on memory if the connection IDs are
- * widely scattered throughout the number space, so we shall need to retire
- * connections that have, say, an ID more than four times the maximum number of
- * client conns away from the current allocation point to try and keep the IDs
- * concentrated. We will also need to retire connections from an old epoch.
+ * epoch doesn't change until the client is rebooted (or, at least, unless the
+ * module is unloaded).
*/
static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
gfp_t gfp)
{
- u32 epoch;
int id;
_enter("");
@@ -44,34 +129,18 @@ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
idr_preload(gfp);
spin_lock(&rxrpc_conn_id_lock);
- epoch = rxrpc_epoch;
-
- /* We could use idr_alloc_cyclic() here, but we really need to know
- * when the thing wraps so that we can advance the epoch.
- */
- if (rxrpc_client_conn_ids.cur == 0)
- rxrpc_client_conn_ids.cur = 1;
- id = idr_alloc(&rxrpc_client_conn_ids, conn,
- rxrpc_client_conn_ids.cur, 0x40000000, GFP_NOWAIT);
- if (id < 0) {
- if (id != -ENOSPC)
- goto error;
- id = idr_alloc(&rxrpc_client_conn_ids, conn,
- 1, 0x40000000, GFP_NOWAIT);
- if (id < 0)
- goto error;
- epoch++;
- rxrpc_epoch = epoch;
- }
- rxrpc_client_conn_ids.cur = id + 1;
+ id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn,
+ 1, 0x40000000, GFP_NOWAIT);
+ if (id < 0)
+ goto error;
spin_unlock(&rxrpc_conn_id_lock);
idr_preload_end();
- conn->proto.epoch = epoch;
+ conn->proto.epoch = rxrpc_epoch;
conn->proto.cid = id << RXRPC_CIDSHIFT;
set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
- _leave(" [CID %x:%x]", epoch, conn->proto.cid);
+ _leave(" [CID %x]", conn->proto.cid);
return 0;
error:
@@ -114,8 +183,7 @@ void rxrpc_destroy_client_conn_ids(void)
}
/*
- * Allocate a client connection. The caller must take care to clear any
- * padding bytes in *cp.
+ * Allocate a client connection.
*/
static struct rxrpc_connection *
rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
@@ -131,6 +199,10 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
return ERR_PTR(-ENOMEM);
}
+ atomic_set(&conn->usage, 1);
+ if (cp->exclusive)
+ __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+
conn->params = *cp;
conn->out_clientflag = RXRPC_CLIENT_INITIATED;
conn->state = RXRPC_CONN_CLIENT;
@@ -148,7 +220,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
goto error_2;
write_lock(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
+ list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
write_unlock(&rxrpc_connection_lock);
/* We steal the caller's peer ref. */
@@ -156,6 +228,9 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp)
rxrpc_get_local(conn->params.local);
key_get(conn->params.key);
+ trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage),
+ __builtin_return_address(0));
+ trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
_leave(" = %p", conn);
return conn;
@@ -170,32 +245,68 @@ error_0:
}
/*
- * find a connection for a call
- * - called in process context with IRQs enabled
+ * Determine if a connection may be reused.
*/
-int rxrpc_connect_call(struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
+static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
+{
+ int id_cursor, id, distance, limit;
+
+ if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
+ goto dont_reuse;
+
+ if (conn->proto.epoch != rxrpc_epoch)
+ goto mark_dont_reuse;
+
+ /* The IDR tree gets very expensive on memory if the connection IDs are
+ * widely scattered throughout the number space, so we shall want to
+ * kill off connections that, say, have an ID more than about four
+ * times the maximum number of client conns away from the current
+ * allocation point to try and keep the IDs concentrated.
+ */
+ id_cursor = READ_ONCE(rxrpc_client_conn_ids.cur);
+ id = conn->proto.cid >> RXRPC_CIDSHIFT;
+ distance = id - id_cursor;
+ if (distance < 0)
+ distance = -distance;
+ limit = round_up(rxrpc_max_client_connections, IDR_SIZE) * 4;
+ if (distance > limit)
+ goto mark_dont_reuse;
+
+ return true;
+
+mark_dont_reuse:
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+dont_reuse:
+ return false;
+}
+
+/*
+ * Create or find a client connection to use for a call.
+ *
+ * If we return with a connection, the call will be on its waiting list. It's
+ * left to the caller to assign a channel and wake up the call.
+ */
+static int rxrpc_get_client_conn(struct rxrpc_call *call,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
{
struct rxrpc_connection *conn, *candidate = NULL;
struct rxrpc_local *local = cp->local;
struct rb_node *p, **pp, *parent;
long diff;
- int chan;
-
- DECLARE_WAITQUEUE(myself, current);
+ int ret = -ENOMEM;
_enter("{%d,%lx},", call->debug_id, call->user_call_ID);
cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp);
if (!cp->peer)
- return -ENOMEM;
+ goto error;
+ /* If the connection is not meant to be exclusive, search the available
+ * connections to see if the connection we want to use already exists.
+ */
if (!cp->exclusive) {
- /* Search for a existing client connection unless this is going
- * to be a connection that's used exclusively for a single call.
- */
_debug("search 1");
spin_lock(&local->client_conns_lock);
p = local->client_conns.rb_node;
@@ -206,39 +317,56 @@ int rxrpc_connect_call(struct rxrpc_call *call,
diff = (cmp(peer) ?:
cmp(key) ?:
cmp(security_level));
- if (diff < 0)
+#undef cmp
+ if (diff < 0) {
p = p->rb_left;
- else if (diff > 0)
+ } else if (diff > 0) {
p = p->rb_right;
- else
- goto found_extant_conn;
+ } else {
+ if (rxrpc_may_reuse_conn(conn) &&
+ rxrpc_get_connection_maybe(conn))
+ goto found_extant_conn;
+ /* The connection needs replacing. It's better
+ * to effect that when we have something to
+ * replace it with so that we don't have to
+ * rebalance the tree twice.
+ */
+ break;
+ }
}
spin_unlock(&local->client_conns_lock);
}
- /* We didn't find a connection or we want an exclusive one. */
- _debug("get new conn");
+ /* There wasn't a connection yet or we need an exclusive connection.
+ * We need to create a candidate and then potentially redo the search
+ * in case we're racing with another thread also trying to connect on a
+ * shareable connection.
+ */
+ _debug("new conn");
candidate = rxrpc_alloc_client_connection(cp, gfp);
- if (!candidate) {
- _leave(" = -ENOMEM");
- return -ENOMEM;
+ if (IS_ERR(candidate)) {
+ ret = PTR_ERR(candidate);
+ goto error_peer;
}
+ /* Add the call to the new connection's waiting list in case we're
+ * going to have to wait for the connection to come live. It's our
+ * connection, so we want first dibs on the channel slots. We would
+ * normally have to take channel_lock but we do this before anyone else
+ * can see the connection.
+ */
+ list_add_tail(&call->chan_wait_link, &candidate->waiting_calls);
+
if (cp->exclusive) {
- /* Assign the call on an exclusive connection to channel 0 and
- * don't add the connection to the endpoint's shareable conn
- * lookup tree.
- */
- _debug("exclusive chan 0");
- conn = candidate;
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
- spin_lock(&conn->channel_lock);
- chan = 0;
- goto found_channel;
+ call->conn = candidate;
+ call->security_ix = candidate->security_ix;
+ _leave(" = 0 [exclusive %d]", candidate->debug_id);
+ return 0;
}
- /* We need to redo the search before attempting to add a new connection
- * lest we race with someone else adding a conflicting instance.
+ /* Publish the new connection for userspace to find. We need to redo
+ * the search before doing this lest we race with someone else adding a
+ * conflicting instance.
*/
_debug("search 2");
spin_lock(&local->client_conns_lock);
@@ -249,124 +377,711 @@ int rxrpc_connect_call(struct rxrpc_call *call,
parent = *pp;
conn = rb_entry(parent, struct rxrpc_connection, client_node);
+#define cmp(X) ((long)conn->params.X - (long)candidate->params.X)
diff = (cmp(peer) ?:
cmp(key) ?:
cmp(security_level));
- if (diff < 0)
+#undef cmp
+ if (diff < 0) {
pp = &(*pp)->rb_left;
- else if (diff > 0)
+ } else if (diff > 0) {
pp = &(*pp)->rb_right;
- else
- goto found_extant_conn;
+ } else {
+ if (rxrpc_may_reuse_conn(conn) &&
+ rxrpc_get_connection_maybe(conn))
+ goto found_extant_conn;
+ /* The old connection is from an outdated epoch. */
+ _debug("replace conn");
+ clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
+ rb_replace_node(&conn->client_node,
+ &candidate->client_node,
+ &local->client_conns);
+ trace_rxrpc_client(conn, -1, rxrpc_client_replace);
+ goto candidate_published;
+ }
}
- /* The second search also failed; simply add the new connection with
- * the new call in channel 0. Note that we need to take the channel
- * lock before dropping the client conn lock.
- */
_debug("new conn");
- set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
rb_link_node(&candidate->client_node, parent, pp);
rb_insert_color(&candidate->client_node, &local->client_conns);
-attached:
- conn = candidate;
- candidate = NULL;
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1);
- spin_lock(&conn->channel_lock);
+candidate_published:
+ set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
+ call->conn = candidate;
+ call->security_ix = candidate->security_ix;
spin_unlock(&local->client_conns_lock);
- chan = 0;
+ _leave(" = 0 [new %d]", candidate->debug_id);
+ return 0;
-found_channel:
- _debug("found chan");
- call->conn = conn;
- call->channel = chan;
- call->epoch = conn->proto.epoch;
- call->cid = conn->proto.cid | chan;
- call->call_id = ++conn->channels[chan].call_counter;
- conn->channels[chan].call_id = call->call_id;
- rcu_assign_pointer(conn->channels[chan].call, call);
+ /* We come here if we found a suitable connection already in existence.
+ * Discard any candidate we may have allocated, and try to get a
+ * channel on this one.
+ */
+found_extant_conn:
+ _debug("found conn");
+ spin_unlock(&local->client_conns_lock);
- _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id);
+ if (candidate) {
+ trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate);
+ rxrpc_put_connection(candidate);
+ candidate = NULL;
+ }
+ spin_lock(&conn->channel_lock);
+ call->conn = conn;
+ call->security_ix = conn->security_ix;
+ list_add(&call->chan_wait_link, &conn->waiting_calls);
spin_unlock(&conn->channel_lock);
+ _leave(" = 0 [extant %d]", conn->debug_id);
+ return 0;
+
+error_peer:
rxrpc_put_peer(cp->peer);
cp->peer = NULL;
- _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
- return 0;
+error:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Activate a connection.
+ */
+static void rxrpc_activate_conn(struct rxrpc_connection *conn)
+{
+ trace_rxrpc_client(conn, -1, rxrpc_client_to_active);
+ conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE;
+ rxrpc_nr_active_client_conns++;
+ list_move_tail(&conn->cache_link, &rxrpc_active_client_conns);
+}
+
+/*
+ * Attempt to animate a connection for a new call.
+ *
+ * If it's not exclusive, the connection is in the endpoint tree, and we're in
+ * the conn's list of those waiting to grab a channel. There is, however, a
+ * limit on the number of live connections allowed at any one time, so we may
+ * have to wait for capacity to become available.
+ *
+ * Note that a connection on the waiting queue might *also* have active
+ * channels if it has been culled to make space and then re-requested by a new
+ * call.
+ */
+static void rxrpc_animate_client_conn(struct rxrpc_connection *conn)
+{
+ unsigned int nr_conns;
+
+ _enter("%d,%d", conn->debug_id, conn->cache_state);
+
+ if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE)
+ goto out;
+
+ spin_lock(&rxrpc_client_conn_cache_lock);
+
+ nr_conns = rxrpc_nr_client_conns;
+ if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) {
+ trace_rxrpc_client(conn, -1, rxrpc_client_count);
+ rxrpc_nr_client_conns = nr_conns + 1;
+ }
+
+ switch (conn->cache_state) {
+ case RXRPC_CONN_CLIENT_ACTIVE:
+ case RXRPC_CONN_CLIENT_WAITING:
+ break;
+
+ case RXRPC_CONN_CLIENT_INACTIVE:
+ case RXRPC_CONN_CLIENT_CULLED:
+ case RXRPC_CONN_CLIENT_IDLE:
+ if (nr_conns >= rxrpc_max_client_connections)
+ goto wait_for_capacity;
+ goto activate_conn;
+
+ default:
+ BUG();
+ }
+
+out_unlock:
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+out:
+ _leave(" [%d]", conn->cache_state);
+ return;
+
+activate_conn:
+ _debug("activate");
+ rxrpc_activate_conn(conn);
+ goto out_unlock;
+
+wait_for_capacity:
+ _debug("wait");
+ trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting);
+ conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
+ list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns);
+ goto out_unlock;
+}
+
+/*
+ * Deactivate a channel.
+ */
+static void rxrpc_deactivate_one_channel(struct rxrpc_connection *conn,
+ unsigned int channel)
+{
+ struct rxrpc_channel *chan = &conn->channels[channel];
+
+ rcu_assign_pointer(chan->call, NULL);
+ conn->active_chans &= ~(1 << channel);
+}
+
+/*
+ * Assign a channel to the call at the front of the queue and wake the call up.
+ * We don't increment the callNumber counter until this number has been exposed
+ * to the world.
+ */
+static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
+ unsigned int channel)
+{
+ struct rxrpc_channel *chan = &conn->channels[channel];
+ struct rxrpc_call *call = list_entry(conn->waiting_calls.next,
+ struct rxrpc_call, chan_wait_link);
+ u32 call_id = chan->call_counter + 1;
+
+ trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
+
+ write_lock_bh(&call->state_lock);
+ call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
+ write_unlock_bh(&call->state_lock);
+
+ rxrpc_see_call(call);
+ list_del_init(&call->chan_wait_link);
+ conn->active_chans |= 1 << channel;
+ call->peer = rxrpc_get_peer(conn->params.peer);
+ call->cid = conn->proto.cid | channel;
+ call->call_id = call_id;
+
+ _net("CONNECT call %08x:%08x as call %d on conn %d",
+ call->cid, call->call_id, call->debug_id, conn->debug_id);
- /* We found a potentially suitable connection already in existence. If
- * we can reuse it (ie. its usage count hasn't been reduced to 0 by the
- * reaper), discard any candidate we may have allocated, and try to get
- * a channel on this one, otherwise we have to replace it.
+ /* Paired with the read barrier in rxrpc_wait_for_channel(). This
+ * orders cid and epoch in the connection wrt to call_id without the
+ * need to take the channel_lock.
+ *
+ * We provisionally assign a callNumber at this point, but we don't
+ * confirm it until the call is about to be exposed.
+ *
+ * TODO: Pair with a barrier in the data_ready handler when that looks
+ * at the call ID through a connection channel.
*/
-found_extant_conn:
- _debug("found conn");
- if (!rxrpc_get_connection_maybe(conn)) {
- set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags);
- rb_replace_node(&conn->client_node,
- &candidate->client_node,
- &local->client_conns);
- clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags);
- goto attached;
+ smp_wmb();
+ chan->call_id = call_id;
+ rcu_assign_pointer(chan->call, call);
+ wake_up(&call->waitq);
+}
+
+/*
+ * Assign channels and callNumbers to waiting calls with channel_lock
+ * held by caller.
+ */
+static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn)
+{
+ u8 avail, mask;
+
+ switch (conn->cache_state) {
+ case RXRPC_CONN_CLIENT_ACTIVE:
+ mask = RXRPC_ACTIVE_CHANS_MASK;
+ break;
+ default:
+ return;
}
- spin_unlock(&local->client_conns_lock);
+ while (!list_empty(&conn->waiting_calls) &&
+ (avail = ~conn->active_chans,
+ avail &= mask,
+ avail != 0))
+ rxrpc_activate_one_channel(conn, __ffs(avail));
+}
+
+/*
+ * Assign channels and callNumbers to waiting calls.
+ */
+static void rxrpc_activate_channels(struct rxrpc_connection *conn)
+{
+ _enter("%d", conn->debug_id);
- rxrpc_put_connection(candidate);
+ trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans);
+
+ if (conn->active_chans == RXRPC_ACTIVE_CHANS_MASK)
+ return;
+
+ spin_lock(&conn->channel_lock);
+ rxrpc_activate_channels_locked(conn);
+ spin_unlock(&conn->channel_lock);
+ _leave("");
+}
+
+/*
+ * Wait for a callNumber and a channel to be granted to a call.
+ */
+static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp)
+{
+ int ret = 0;
+
+ _enter("%d", call->debug_id);
+
+ if (!call->call_id) {
+ DECLARE_WAITQUEUE(myself, current);
- if (!atomic_add_unless(&conn->avail_chans, -1, 0)) {
if (!gfpflags_allow_blocking(gfp)) {
- rxrpc_put_connection(conn);
- _leave(" = -EAGAIN");
- return -EAGAIN;
+ ret = -EAGAIN;
+ goto out;
}
- add_wait_queue(&conn->channel_wq, &myself);
+ add_wait_queue_exclusive(&call->waitq, &myself);
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
- if (atomic_add_unless(&conn->avail_chans, -1, 0))
+ if (call->call_id)
+ break;
+ if (signal_pending(current)) {
+ ret = -ERESTARTSYS;
break;
- if (signal_pending(current))
- goto interrupted;
+ }
schedule();
}
- remove_wait_queue(&conn->channel_wq, &myself);
+ remove_wait_queue(&call->waitq, &myself);
__set_current_state(TASK_RUNNING);
}
- /* The connection allegedly now has a free channel and we can now
- * attach the call to it.
- */
+ /* Paired with the write barrier in rxrpc_activate_one_channel(). */
+ smp_rmb();
+
+out:
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * find a connection for a call
+ * - called in process context with IRQs enabled
+ */
+int rxrpc_connect_call(struct rxrpc_call *call,
+ struct rxrpc_conn_parameters *cp,
+ struct sockaddr_rxrpc *srx,
+ gfp_t gfp)
+{
+ int ret;
+
+ _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+ rxrpc_discard_expired_client_conns(NULL);
+ rxrpc_cull_active_client_conns();
+
+ ret = rxrpc_get_client_conn(call, cp, srx, gfp);
+ if (ret < 0)
+ return ret;
+
+ rxrpc_animate_client_conn(call->conn);
+ rxrpc_activate_channels(call->conn);
+
+ ret = rxrpc_wait_for_channel(call, gfp);
+ if (ret < 0)
+ rxrpc_disconnect_client_call(call);
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Note that a connection is about to be exposed to the world. Once it is
+ * exposed, we maintain an extra ref on it that stops it from being summarily
+ * discarded before it's (a) had a chance to deal with retransmission and (b)
+ * had a chance at re-use (the per-connection security negotiation is
+ * expensive).
+ */
+static void rxrpc_expose_client_conn(struct rxrpc_connection *conn,
+ unsigned int channel)
+{
+ if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) {
+ trace_rxrpc_client(conn, channel, rxrpc_client_exposed);
+ rxrpc_get_connection(conn);
+ }
+}
+
+/*
+ * Note that a call, and thus a connection, is about to be exposed to the
+ * world.
+ */
+void rxrpc_expose_client_call(struct rxrpc_call *call)
+{
+ unsigned int channel = call->cid & RXRPC_CHANNELMASK;
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_channel *chan = &conn->channels[channel];
+
+ if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ /* Mark the call ID as being used. If the callNumber counter
+ * exceeds ~2 billion, we kill the connection after its
+ * outstanding calls have finished so that the counter doesn't
+ * wrap.
+ */
+ chan->call_counter++;
+ if (chan->call_counter >= INT_MAX)
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+ rxrpc_expose_client_conn(conn, channel);
+ }
+}
+
+/*
+ * Disconnect a client call.
+ */
+void rxrpc_disconnect_client_call(struct rxrpc_call *call)
+{
+ unsigned int channel = call->cid & RXRPC_CHANNELMASK;
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_channel *chan = &conn->channels[channel];
+
+ trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
+ call->conn = NULL;
+
spin_lock(&conn->channel_lock);
- for (chan = 0; chan < RXRPC_MAXCALLS; chan++)
- if (!conn->channels[chan].call)
- goto found_channel;
- BUG();
+ /* Calls that have never actually been assigned a channel can simply be
+ * discarded. If the conn didn't get used either, it will follow
+ * immediately unless someone else grabs it in the meantime.
+ */
+ if (!list_empty(&call->chan_wait_link)) {
+ _debug("call is waiting");
+ ASSERTCMP(call->call_id, ==, 0);
+ ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
+ list_del_init(&call->chan_wait_link);
+
+ trace_rxrpc_client(conn, channel, rxrpc_client_chan_unstarted);
+
+ /* We must deactivate or idle the connection if it's now
+ * waiting for nothing.
+ */
+ spin_lock(&rxrpc_client_conn_cache_lock);
+ if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING &&
+ list_empty(&conn->waiting_calls) &&
+ !conn->active_chans)
+ goto idle_connection;
+ goto out;
+ }
+
+ ASSERTCMP(rcu_access_pointer(chan->call), ==, call);
+
+ /* If a client call was exposed to the world, we save the result for
+ * retransmission.
+ *
+ * We use a barrier here so that the call number and abort code can be
+ * read without needing to take a lock.
+ *
+ * TODO: Make the incoming packet handler check this and handle
+ * terminal retransmission without requiring access to the call.
+ */
+ if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ _debug("exposed %u,%u", call->call_id, call->abort_code);
+ __rxrpc_disconnect_call(conn, call);
+ }
+
+ /* See if we can pass the channel directly to another call. */
+ if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE &&
+ !list_empty(&conn->waiting_calls)) {
+ trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass);
+ rxrpc_activate_one_channel(conn, channel);
+ goto out_2;
+ }
+
+ /* Things are more complex and we need the cache lock. We might be
+ * able to simply idle the conn or it might now be lurking on the wait
+ * list. It might even get moved back to the active list whilst we're
+ * waiting for the lock.
+ */
+ spin_lock(&rxrpc_client_conn_cache_lock);
+
+ switch (conn->cache_state) {
+ case RXRPC_CONN_CLIENT_ACTIVE:
+ if (list_empty(&conn->waiting_calls)) {
+ rxrpc_deactivate_one_channel(conn, channel);
+ if (!conn->active_chans) {
+ rxrpc_nr_active_client_conns--;
+ goto idle_connection;
+ }
+ goto out;
+ }
+
+ trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass);
+ rxrpc_activate_one_channel(conn, channel);
+ goto out;
-interrupted:
- remove_wait_queue(&conn->channel_wq, &myself);
- __set_current_state(TASK_RUNNING);
+ case RXRPC_CONN_CLIENT_CULLED:
+ rxrpc_deactivate_one_channel(conn, channel);
+ ASSERT(list_empty(&conn->waiting_calls));
+ if (!conn->active_chans)
+ goto idle_connection;
+ goto out;
+
+ case RXRPC_CONN_CLIENT_WAITING:
+ rxrpc_deactivate_one_channel(conn, channel);
+ goto out;
+
+ default:
+ BUG();
+ }
+
+out:
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+out_2:
+ spin_unlock(&conn->channel_lock);
rxrpc_put_connection(conn);
- rxrpc_put_peer(cp->peer);
- cp->peer = NULL;
- _leave(" = -ERESTARTSYS");
- return -ERESTARTSYS;
+ _leave("");
+ return;
+
+idle_connection:
+ /* As no channels remain active, the connection gets deactivated
+ * immediately or moved to the idle list for a short while.
+ */
+ if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) {
+ trace_rxrpc_client(conn, channel, rxrpc_client_to_idle);
+ conn->idle_timestamp = jiffies;
+ conn->cache_state = RXRPC_CONN_CLIENT_IDLE;
+ list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns);
+ if (rxrpc_idle_client_conns.next == &conn->cache_link &&
+ !rxrpc_kill_all_client_conns)
+ queue_delayed_work(rxrpc_workqueue,
+ &rxrpc_client_conn_reap,
+ rxrpc_conn_idle_client_expiry);
+ } else {
+ trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive);
+ conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+ list_del_init(&conn->cache_link);
+ }
+ goto out;
}
/*
- * Remove a client connection from the local endpoint's tree, thereby removing
- * it as a target for reuse for new client calls.
+ * Clean up a dead client connection.
*/
-void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn)
+static struct rxrpc_connection *
+rxrpc_put_one_client_conn(struct rxrpc_connection *conn)
{
+ struct rxrpc_connection *next = NULL;
struct rxrpc_local *local = conn->params.local;
+ unsigned int nr_conns;
- spin_lock(&local->client_conns_lock);
- if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags))
- rb_erase(&conn->client_node, &local->client_conns);
- spin_unlock(&local->client_conns_lock);
+ trace_rxrpc_client(conn, -1, rxrpc_client_cleanup);
+
+ if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) {
+ spin_lock(&local->client_conns_lock);
+ if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS,
+ &conn->flags))
+ rb_erase(&conn->client_node, &local->client_conns);
+ spin_unlock(&local->client_conns_lock);
+ }
rxrpc_put_client_connection_id(conn);
+
+ ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE);
+
+ if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) {
+ trace_rxrpc_client(conn, -1, rxrpc_client_uncount);
+ spin_lock(&rxrpc_client_conn_cache_lock);
+ nr_conns = --rxrpc_nr_client_conns;
+
+ if (nr_conns < rxrpc_max_client_connections &&
+ !list_empty(&rxrpc_waiting_client_conns)) {
+ next = list_entry(rxrpc_waiting_client_conns.next,
+ struct rxrpc_connection, cache_link);
+ rxrpc_get_connection(next);
+ rxrpc_activate_conn(next);
+ }
+
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+ }
+
+ rxrpc_kill_connection(conn);
+ if (next)
+ rxrpc_activate_channels(next);
+
+ /* We need to get rid of the temporary ref we took upon next, but we
+ * can't call rxrpc_put_connection() recursively.
+ */
+ return next;
+}
+
+/*
+ * Clean up a dead client connections.
+ */
+void rxrpc_put_client_conn(struct rxrpc_connection *conn)
+{
+ const void *here = __builtin_return_address(0);
+ int n;
+
+ do {
+ n = atomic_dec_return(&conn->usage);
+ trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here);
+ if (n > 0)
+ return;
+ ASSERTCMP(n, >=, 0);
+
+ conn = rxrpc_put_one_client_conn(conn);
+ } while (conn);
+}
+
+/*
+ * Kill the longest-active client connections to make room for new ones.
+ */
+static void rxrpc_cull_active_client_conns(void)
+{
+ struct rxrpc_connection *conn;
+ unsigned int nr_conns = rxrpc_nr_client_conns;
+ unsigned int nr_active, limit;
+
+ _enter("");
+
+ ASSERTCMP(nr_conns, >=, 0);
+ if (nr_conns < rxrpc_max_client_connections) {
+ _leave(" [ok]");
+ return;
+ }
+ limit = rxrpc_reap_client_connections;
+
+ spin_lock(&rxrpc_client_conn_cache_lock);
+ nr_active = rxrpc_nr_active_client_conns;
+
+ while (nr_active > limit) {
+ ASSERT(!list_empty(&rxrpc_active_client_conns));
+ conn = list_entry(rxrpc_active_client_conns.next,
+ struct rxrpc_connection, cache_link);
+ ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE);
+
+ if (list_empty(&conn->waiting_calls)) {
+ trace_rxrpc_client(conn, -1, rxrpc_client_to_culled);
+ conn->cache_state = RXRPC_CONN_CLIENT_CULLED;
+ list_del_init(&conn->cache_link);
+ } else {
+ trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting);
+ conn->cache_state = RXRPC_CONN_CLIENT_WAITING;
+ list_move_tail(&conn->cache_link,
+ &rxrpc_waiting_client_conns);
+ }
+
+ nr_active--;
+ }
+
+ rxrpc_nr_active_client_conns = nr_active;
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+ ASSERTCMP(nr_active, >=, 0);
+ _leave(" [culled]");
+}
+
+/*
+ * Discard expired client connections from the idle list. Each conn in the
+ * idle list has been exposed and holds an extra ref because of that.
+ *
+ * This may be called from conn setup or from a work item so cannot be
+ * considered non-reentrant.
+ */
+static void rxrpc_discard_expired_client_conns(struct work_struct *work)
+{
+ struct rxrpc_connection *conn;
+ unsigned long expiry, conn_expires_at, now;
+ unsigned int nr_conns;
+ bool did_discard = false;
+
+ _enter("%c", work ? 'w' : 'n');
+
+ if (list_empty(&rxrpc_idle_client_conns)) {
+ _leave(" [empty]");
+ return;
+ }
+
+ /* Don't double up on the discarding */
+ if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) {
+ _leave(" [already]");
+ return;
+ }
+
+ /* We keep an estimate of what the number of conns ought to be after
+ * we've discarded some so that we don't overdo the discarding.
+ */
+ nr_conns = rxrpc_nr_client_conns;
+
+next:
+ spin_lock(&rxrpc_client_conn_cache_lock);
+
+ if (list_empty(&rxrpc_idle_client_conns))
+ goto out;
+
+ conn = list_entry(rxrpc_idle_client_conns.next,
+ struct rxrpc_connection, cache_link);
+ ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags));
+
+ if (!rxrpc_kill_all_client_conns) {
+ /* If the number of connections is over the reap limit, we
+ * expedite discard by reducing the expiry timeout. We must,
+ * however, have at least a short grace period to be able to do
+ * final-ACK or ABORT retransmission.
+ */
+ expiry = rxrpc_conn_idle_client_expiry;
+ if (nr_conns > rxrpc_reap_client_connections)
+ expiry = rxrpc_conn_idle_client_fast_expiry;
+
+ conn_expires_at = conn->idle_timestamp + expiry;
+
+ now = READ_ONCE(jiffies);
+ if (time_after(conn_expires_at, now))
+ goto not_yet_expired;
+ }
+
+ trace_rxrpc_client(conn, -1, rxrpc_client_discard);
+ if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags))
+ BUG();
+ conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE;
+ list_del_init(&conn->cache_link);
+
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+
+ /* When we cleared the EXPOSED flag, we took on responsibility for the
+ * reference that that had on the usage count. We deal with that here.
+ * If someone re-sets the flag and re-gets the ref, that's fine.
+ */
+ rxrpc_put_connection(conn);
+ did_discard = true;
+ nr_conns--;
+ goto next;
+
+not_yet_expired:
+ /* The connection at the front of the queue hasn't yet expired, so
+ * schedule the work item for that point if we discarded something.
+ *
+ * We don't worry if the work item is already scheduled - it can look
+ * after rescheduling itself at a later time. We could cancel it, but
+ * then things get messier.
+ */
+ _debug("not yet");
+ if (!rxrpc_kill_all_client_conns)
+ queue_delayed_work(rxrpc_workqueue,
+ &rxrpc_client_conn_reap,
+ conn_expires_at - now);
+
+out:
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+ spin_unlock(&rxrpc_client_conn_discard_mutex);
+ _leave("");
+}
+
+/*
+ * Preemptively destroy all the client connection records rather than waiting
+ * for them to time out
+ */
+void __exit rxrpc_destroy_all_client_connections(void)
+{
+ _enter("");
+
+ spin_lock(&rxrpc_client_conn_cache_lock);
+ rxrpc_kill_all_client_conns = true;
+ spin_unlock(&rxrpc_client_conn_cache_lock);
+
+ cancel_delayed_work(&rxrpc_client_conn_reap);
+
+ if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0))
+ _debug("destroy: queue failed");
+
+ _leave("");
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index cee0f35bc1cf..3f9d8d7ec632 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -15,20 +15,128 @@
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/errqueue.h>
-#include <linux/udp.h>
-#include <linux/in.h>
-#include <linux/in6.h>
-#include <linux/icmp.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
#include "ar-internal.h"
/*
+ * Retransmit terminal ACK or ABORT of the previous call.
+ */
+static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_channel *chan;
+ struct msghdr msg;
+ struct kvec iov;
+ struct {
+ struct rxrpc_wire_header whdr;
+ union {
+ struct {
+ __be32 code;
+ } abort;
+ struct {
+ struct rxrpc_ackpacket ack;
+ u8 padding[3];
+ struct rxrpc_ackinfo info;
+ };
+ };
+ } __attribute__((packed)) pkt;
+ size_t len;
+ u32 serial, mtu, call_id;
+
+ _enter("%d", conn->debug_id);
+
+ chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK];
+
+ /* If the last call got moved on whilst we were waiting to run, just
+ * ignore this packet.
+ */
+ call_id = READ_ONCE(chan->last_call);
+ /* Sync with __rxrpc_disconnect_call() */
+ smp_rmb();
+ if (call_id != sp->hdr.callNumber)
+ return;
+
+ msg.msg_name = &conn->params.peer->srx.transport;
+ msg.msg_namelen = conn->params.peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ pkt.whdr.epoch = htonl(sp->hdr.epoch);
+ pkt.whdr.cid = htonl(sp->hdr.cid);
+ pkt.whdr.callNumber = htonl(sp->hdr.callNumber);
+ pkt.whdr.seq = 0;
+ pkt.whdr.type = chan->last_type;
+ pkt.whdr.flags = conn->out_clientflag;
+ pkt.whdr.userStatus = 0;
+ pkt.whdr.securityIndex = conn->security_ix;
+ pkt.whdr._rsvd = 0;
+ pkt.whdr.serviceId = htons(chan->last_service_id);
+
+ len = sizeof(pkt.whdr);
+ switch (chan->last_type) {
+ case RXRPC_PACKET_TYPE_ABORT:
+ pkt.abort.code = htonl(chan->last_abort);
+ len += sizeof(pkt.abort);
+ break;
+
+ case RXRPC_PACKET_TYPE_ACK:
+ mtu = conn->params.peer->if_mtu;
+ mtu -= conn->params.peer->hdrsize;
+ pkt.ack.bufferSpace = 0;
+ pkt.ack.maxSkew = htons(skb->priority);
+ pkt.ack.firstPacket = htonl(chan->last_seq);
+ pkt.ack.previousPacket = htonl(chan->last_seq - 1);
+ pkt.ack.serial = htonl(sp->hdr.serial);
+ pkt.ack.reason = RXRPC_ACK_DUPLICATE;
+ pkt.ack.nAcks = 0;
+ pkt.info.rxMTU = htonl(rxrpc_rx_mtu);
+ pkt.info.maxMTU = htonl(mtu);
+ pkt.info.rwind = htonl(rxrpc_rx_window_size);
+ pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max);
+ pkt.whdr.flags |= RXRPC_SLOW_START_OK;
+ len += sizeof(pkt.ack) + sizeof(pkt.info);
+ break;
+ }
+
+ /* Resync with __rxrpc_disconnect_call() and check that the last call
+ * didn't get advanced whilst we were filling out the packets.
+ */
+ smp_rmb();
+ if (READ_ONCE(chan->last_call) != call_id)
+ return;
+
+ iov.iov_base = &pkt;
+ iov.iov_len = len;
+
+ serial = atomic_inc_return(&conn->serial);
+ pkt.whdr.serial = htonl(serial);
+
+ switch (chan->last_type) {
+ case RXRPC_PACKET_TYPE_ABORT:
+ _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort);
+ break;
+ case RXRPC_PACKET_TYPE_ACK:
+ trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0,
+ RXRPC_ACK_DUPLICATE, 0);
+ _proto("Tx ACK %%%u [re]", serial);
+ break;
+ }
+
+ kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len);
+ _leave("");
+ return;
+}
+
+/*
* pass a connection-level abort onto all calls on that connection
*/
-static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
- u32 abort_code)
+static void rxrpc_abort_calls(struct rxrpc_connection *conn,
+ enum rxrpc_call_completion compl,
+ u32 abort_code, int error)
{
struct rxrpc_call *call;
int i;
@@ -41,19 +149,15 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state,
call = rcu_dereference_protected(
conn->channels[i].call,
lockdep_is_held(&conn->channel_lock));
- write_lock_bh(&call->state_lock);
- if (call->state <= RXRPC_CALL_COMPLETE) {
- call->state = state;
- if (state == RXRPC_CALL_LOCALLY_ABORTED) {
- call->local_abort = conn->local_abort;
- set_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events);
- } else {
- call->remote_abort = conn->remote_abort;
- set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
- }
- rxrpc_queue_call(call);
+ if (call) {
+ if (compl == RXRPC_CALL_LOCALLY_ABORTED)
+ trace_rxrpc_abort("CON", call->cid,
+ call->call_id, 0,
+ abort_code, error);
+ if (rxrpc_set_call_completion(call, compl,
+ abort_code, error))
+ rxrpc_notify_socket(call);
}
- write_unlock_bh(&call->state_lock);
}
spin_unlock(&conn->channel_lock);
@@ -78,17 +182,16 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
/* generate a connection-level abort */
spin_lock_bh(&conn->state_lock);
- if (conn->state < RXRPC_CONN_REMOTELY_ABORTED) {
- conn->state = RXRPC_CONN_LOCALLY_ABORTED;
- conn->error = error;
- spin_unlock_bh(&conn->state_lock);
- } else {
+ if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
spin_unlock_bh(&conn->state_lock);
_leave(" = 0 [already dead]");
return 0;
}
- rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code);
+ conn->state = RXRPC_CONN_LOCALLY_ABORTED;
+ spin_unlock_bh(&conn->state_lock);
+
+ rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error);
msg.msg_name = &conn->params.peer->srx.transport;
msg.msg_namelen = conn->params.peer->srx.transport_len;
@@ -132,17 +235,18 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn,
/*
* mark a call as being on a now-secured channel
- * - must be called with softirqs disabled
+ * - must be called with BH's disabled.
*/
static void rxrpc_call_is_secure(struct rxrpc_call *call)
{
_enter("%p", call);
if (call) {
- read_lock(&call->state_lock);
- if (call->state < RXRPC_CALL_COMPLETE &&
- !test_and_set_bit(RXRPC_CALL_EV_SECURED, &call->events))
- rxrpc_queue_call(call);
- read_unlock(&call->state_lock);
+ write_lock_bh(&call->state_lock);
+ if (call->state == RXRPC_CALL_SERVER_SECURING) {
+ call->state = RXRPC_CALL_SERVER_ACCEPTING;
+ rxrpc_notify_socket(call);
+ }
+ write_unlock_bh(&call->state_lock);
}
}
@@ -159,22 +263,28 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
int loop, ret;
if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- kleave(" = -ECONNABORTED [%u]", conn->state);
+ _leave(" = -ECONNABORTED [%u]", conn->state);
return -ECONNABORTED;
}
_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_DATA:
+ case RXRPC_PACKET_TYPE_ACK:
+ rxrpc_conn_retransmit_call(conn, skb);
+ return 0;
+
case RXRPC_PACKET_TYPE_ABORT:
- if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &wtmp, sizeof(wtmp)) < 0)
return -EPROTO;
abort_code = ntohl(wtmp);
_proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code);
conn->state = RXRPC_CONN_REMOTELY_ABORTED;
rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED,
- abort_code);
+ abort_code, ECONNABORTED);
return -ECONNABORTED;
case RXRPC_PACKET_TYPE_CHALLENGE:
@@ -199,14 +309,16 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
conn->state = RXRPC_CONN_SERVICE;
+ spin_unlock(&conn->state_lock);
for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
rxrpc_call_is_secure(
rcu_dereference_protected(
conn->channels[loop].call,
lockdep_is_held(&conn->channel_lock)));
+ } else {
+ spin_unlock(&conn->state_lock);
}
- spin_unlock(&conn->state_lock);
spin_unlock(&conn->channel_lock);
return 0;
@@ -269,7 +381,7 @@ void rxrpc_process_connection(struct work_struct *work)
u32 abort_code = RX_PROTOCOL_ERROR;
int ret;
- _enter("{%d}", conn->debug_id);
+ rxrpc_see_connection(conn);
if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
@@ -277,6 +389,7 @@ void rxrpc_process_connection(struct work_struct *work)
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
while ((skb = skb_dequeue(&conn->rx_queue))) {
+ rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
ret = rxrpc_process_event(conn, skb, &abort_code);
switch (ret) {
case -EPROTO:
@@ -287,7 +400,7 @@ void rxrpc_process_connection(struct work_struct *work)
goto requeue_and_leave;
case -ECONNABORTED:
default:
- rxrpc_free_skb(skb);
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
break;
}
}
@@ -304,91 +417,7 @@ requeue_and_leave:
protocol_error:
if (rxrpc_abort_connection(conn, -ret, abort_code) < 0)
goto requeue_and_leave;
- rxrpc_free_skb(skb);
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
_leave(" [EPROTO]");
goto out;
}
-
-/*
- * put a packet up for transport-level abort
- */
-void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
-{
- CHECK_SLAB_OKAY(&local->usage);
-
- skb_queue_tail(&local->reject_queue, skb);
- rxrpc_queue_local(local);
-}
-
-/*
- * reject packets through the local endpoint
- */
-void rxrpc_reject_packets(struct rxrpc_local *local)
-{
- union {
- struct sockaddr sa;
- struct sockaddr_in sin;
- } sa;
- struct rxrpc_skb_priv *sp;
- struct rxrpc_wire_header whdr;
- struct sk_buff *skb;
- struct msghdr msg;
- struct kvec iov[2];
- size_t size;
- __be32 code;
-
- _enter("%d", local->debug_id);
-
- iov[0].iov_base = &whdr;
- iov[0].iov_len = sizeof(whdr);
- iov[1].iov_base = &code;
- iov[1].iov_len = sizeof(code);
- size = sizeof(whdr) + sizeof(code);
-
- msg.msg_name = &sa;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
-
- memset(&sa, 0, sizeof(sa));
- sa.sa.sa_family = local->srx.transport.family;
- switch (sa.sa.sa_family) {
- case AF_INET:
- msg.msg_namelen = sizeof(sa.sin);
- break;
- default:
- msg.msg_namelen = 0;
- break;
- }
-
- memset(&whdr, 0, sizeof(whdr));
- whdr.type = RXRPC_PACKET_TYPE_ABORT;
-
- while ((skb = skb_dequeue(&local->reject_queue))) {
- sp = rxrpc_skb(skb);
- switch (sa.sa.sa_family) {
- case AF_INET:
- sa.sin.sin_port = udp_hdr(skb)->source;
- sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
- code = htonl(skb->priority);
-
- whdr.epoch = htonl(sp->hdr.epoch);
- whdr.cid = htonl(sp->hdr.cid);
- whdr.callNumber = htonl(sp->hdr.callNumber);
- whdr.serviceId = htons(sp->hdr.serviceId);
- whdr.flags = sp->hdr.flags;
- whdr.flags ^= RXRPC_CLIENT_INITIATED;
- whdr.flags &= RXRPC_CLIENT_INITIATED;
-
- kernel_sendmsg(local->socket, &msg, iov, 2, size);
- break;
-
- default:
- break;
- }
-
- rxrpc_free_skb(skb);
- }
-
- _leave("");
-}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 896d84493a05..e1e83af47866 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -1,6 +1,6 @@
-/* RxRPC virtual connection handler
+/* RxRPC virtual connection handler, common bits.
*
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -15,8 +15,6 @@
#include <linux/slab.h>
#include <linux/net.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/af_rxrpc.h>
#include "ar-internal.h"
/*
@@ -27,9 +25,12 @@ unsigned int rxrpc_connection_expiry = 10 * 60;
static void rxrpc_connection_reaper(struct work_struct *work);
LIST_HEAD(rxrpc_connections);
+LIST_HEAD(rxrpc_connection_proc_list);
DEFINE_RWLOCK(rxrpc_connection_lock);
static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
+static void rxrpc_destroy_connection(struct rcu_head *);
+
/*
* allocate a new connection
*/
@@ -41,21 +42,18 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp)
conn = kzalloc(sizeof(struct rxrpc_connection), gfp);
if (conn) {
+ INIT_LIST_HEAD(&conn->cache_link);
spin_lock_init(&conn->channel_lock);
- init_waitqueue_head(&conn->channel_wq);
+ INIT_LIST_HEAD(&conn->waiting_calls);
INIT_WORK(&conn->processor, &rxrpc_process_connection);
+ INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
skb_queue_head_init(&conn->rx_queue);
conn->security = &rxrpc_no_security;
spin_lock_init(&conn->state_lock);
- /* We maintain an extra ref on the connection whilst it is
- * on the rxrpc_connections list.
- */
- atomic_set(&conn->usage, 2);
conn->debug_id = atomic_inc_return(&rxrpc_debug_id);
- atomic_set(&conn->avail_chans, RXRPC_MAXCALLS);
conn->size_align = 4;
- conn->header_size = sizeof(struct rxrpc_wire_header);
+ conn->idle_timestamp = jiffies;
}
_leave(" = %p{%d}", conn, conn ? conn->debug_id : 0);
@@ -135,6 +133,16 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local,
srx.transport.sin.sin_addr.s_addr)
goto not_found;
break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ if (peer->srx.transport.sin6.sin6_port !=
+ srx.transport.sin6.sin6_port ||
+ memcmp(&peer->srx.transport.sin6.sin6_addr,
+ &srx.transport.sin6.sin6_addr,
+ sizeof(struct in6_addr)) != 0)
+ goto not_found;
+ break;
+#endif
default:
BUG();
}
@@ -153,25 +161,32 @@ not_found:
* terminates. The caller must hold the channel_lock and must release the
* call's ref on the connection.
*/
-void __rxrpc_disconnect_call(struct rxrpc_call *call)
+void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
+ struct rxrpc_call *call)
{
- struct rxrpc_connection *conn = call->conn;
- struct rxrpc_channel *chan = &conn->channels[call->channel];
+ struct rxrpc_channel *chan =
+ &conn->channels[call->cid & RXRPC_CHANNELMASK];
- _enter("%d,%d", conn->debug_id, call->channel);
+ _enter("%d,%x", conn->debug_id, call->cid);
if (rcu_access_pointer(chan->call) == call) {
/* Save the result of the call so that we can repeat it if necessary
* through the channel, whilst disposing of the actual call record.
*/
- chan->last_result = call->local_abort;
+ chan->last_service_id = call->service_id;
+ if (call->abort_code) {
+ chan->last_abort = call->abort_code;
+ chan->last_type = RXRPC_PACKET_TYPE_ABORT;
+ } else {
+ chan->last_seq = call->rx_hard_ack;
+ chan->last_type = RXRPC_PACKET_TYPE_ACK;
+ }
+ /* Sync with rxrpc_conn_retransmit(). */
smp_wmb();
chan->last_call = chan->call_id;
chan->call_id = chan->call_counter;
rcu_assign_pointer(chan->call, NULL);
- atomic_inc(&conn->avail_chans);
- wake_up(&conn->channel_wq);
}
_leave("");
@@ -185,34 +200,122 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
+ spin_lock_bh(&conn->params.peer->lock);
+ hlist_del_init(&call->error_link);
+ spin_unlock_bh(&conn->params.peer->lock);
+
+ if (rxrpc_is_client_call(call))
+ return rxrpc_disconnect_client_call(call);
+
spin_lock(&conn->channel_lock);
- __rxrpc_disconnect_call(call);
+ __rxrpc_disconnect_call(conn, call);
spin_unlock(&conn->channel_lock);
call->conn = NULL;
+ conn->idle_timestamp = jiffies;
rxrpc_put_connection(conn);
}
/*
- * release a virtual connection
+ * Kill off a connection.
*/
-void rxrpc_put_connection(struct rxrpc_connection *conn)
+void rxrpc_kill_connection(struct rxrpc_connection *conn)
{
- if (!conn)
- return;
+ ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
+ !rcu_access_pointer(conn->channels[1].call) &&
+ !rcu_access_pointer(conn->channels[2].call) &&
+ !rcu_access_pointer(conn->channels[3].call));
+ ASSERT(list_empty(&conn->cache_link));
- _enter("%p{u=%d,d=%d}",
- conn, atomic_read(&conn->usage), conn->debug_id);
+ write_lock(&rxrpc_connection_lock);
+ list_del_init(&conn->proc_link);
+ write_unlock(&rxrpc_connection_lock);
- ASSERTCMP(atomic_read(&conn->usage), >, 1);
+ /* Drain the Rx queue. Note that even though we've unpublished, an
+ * incoming packet could still be being added to our Rx queue, so we
+ * will need to drain it again in the RCU cleanup handler.
+ */
+ rxrpc_purge_queue(&conn->rx_queue);
- conn->put_time = ktime_get_seconds();
- if (atomic_dec_return(&conn->usage) == 1) {
- _debug("zombie");
- rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
+ /* Leave final destruction to RCU. The connection processor work item
+ * must carry a ref on the connection to prevent us getting here whilst
+ * it is queued or running.
+ */
+ call_rcu(&conn->rcu, rxrpc_destroy_connection);
+}
+
+/*
+ * Queue a connection's work processor, getting a ref to pass to the work
+ * queue.
+ */
+bool rxrpc_queue_conn(struct rxrpc_connection *conn)
+{
+ const void *here = __builtin_return_address(0);
+ int n = __atomic_add_unless(&conn->usage, 1, 0);
+ if (n == 0)
+ return false;
+ if (rxrpc_queue_work(&conn->processor))
+ trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here);
+ else
+ rxrpc_put_connection(conn);
+ return true;
+}
+
+/*
+ * Note the re-emergence of a connection.
+ */
+void rxrpc_see_connection(struct rxrpc_connection *conn)
+{
+ const void *here = __builtin_return_address(0);
+ if (conn) {
+ int n = atomic_read(&conn->usage);
+
+ trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here);
+ }
+}
+
+/*
+ * Get a ref on a connection.
+ */
+void rxrpc_get_connection(struct rxrpc_connection *conn)
+{
+ const void *here = __builtin_return_address(0);
+ int n = atomic_inc_return(&conn->usage);
+
+ trace_rxrpc_conn(conn, rxrpc_conn_got, n, here);
+}
+
+/*
+ * Try to get a ref on a connection.
+ */
+struct rxrpc_connection *
+rxrpc_get_connection_maybe(struct rxrpc_connection *conn)
+{
+ const void *here = __builtin_return_address(0);
+
+ if (conn) {
+ int n = __atomic_add_unless(&conn->usage, 1, 0);
+ if (n > 0)
+ trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here);
+ else
+ conn = NULL;
}
+ return conn;
+}
- _leave("");
+/*
+ * Release a service connection
+ */
+void rxrpc_put_service_conn(struct rxrpc_connection *conn)
+{
+ const void *here = __builtin_return_address(0);
+ int n;
+
+ n = atomic_dec_return(&conn->usage);
+ trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here);
+ ASSERTCMP(n, >=, 0);
+ if (n == 0)
+ rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
}
/*
@@ -242,19 +345,19 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu)
}
/*
- * reap dead connections
+ * reap dead service connections
*/
static void rxrpc_connection_reaper(struct work_struct *work)
{
struct rxrpc_connection *conn, *_p;
- unsigned long reap_older_than, earliest, put_time, now;
+ unsigned long reap_older_than, earliest, idle_timestamp, now;
LIST_HEAD(graveyard);
_enter("");
- now = ktime_get_seconds();
- reap_older_than = now - rxrpc_connection_expiry;
+ now = jiffies;
+ reap_older_than = now - rxrpc_connection_expiry * HZ;
earliest = ULONG_MAX;
write_lock(&rxrpc_connection_lock);
@@ -262,11 +365,17 @@ static void rxrpc_connection_reaper(struct work_struct *work)
ASSERTCMP(atomic_read(&conn->usage), >, 0);
if (likely(atomic_read(&conn->usage) > 1))
continue;
+ if (conn->state == RXRPC_CONN_SERVICE_PREALLOC)
+ continue;
+
+ idle_timestamp = READ_ONCE(conn->idle_timestamp);
+ _debug("reap CONN %d { u=%d,t=%ld }",
+ conn->debug_id, atomic_read(&conn->usage),
+ (long)reap_older_than - (long)idle_timestamp);
- put_time = READ_ONCE(conn->put_time);
- if (time_after(put_time, reap_older_than)) {
- if (time_before(put_time, earliest))
- earliest = put_time;
+ if (time_after(idle_timestamp, reap_older_than)) {
+ if (time_before(idle_timestamp, earliest))
+ earliest = idle_timestamp;
continue;
}
@@ -277,7 +386,7 @@ static void rxrpc_connection_reaper(struct work_struct *work)
continue;
if (rxrpc_conn_is_client(conn))
- rxrpc_unpublish_client_conn(conn);
+ BUG();
else
rxrpc_unpublish_service_conn(conn);
@@ -287,9 +396,9 @@ static void rxrpc_connection_reaper(struct work_struct *work)
if (earliest != ULONG_MAX) {
_debug("reschedule reaper %ld", (long) earliest - now);
- ASSERTCMP(earliest, >, now);
+ ASSERT(time_after(earliest, now));
rxrpc_queue_delayed_work(&rxrpc_connection_reap,
- (earliest - now) * HZ);
+ earliest - now);
}
while (!list_empty(&graveyard)) {
@@ -298,16 +407,15 @@ static void rxrpc_connection_reaper(struct work_struct *work)
list_del_init(&conn->link);
ASSERTCMP(atomic_read(&conn->usage), ==, 0);
- skb_queue_purge(&conn->rx_queue);
- call_rcu(&conn->rcu, rxrpc_destroy_connection);
+ rxrpc_kill_connection(conn);
}
_leave("");
}
/*
- * preemptively destroy all the connection records rather than waiting for them
- * to time out
+ * preemptively destroy all the service connection records rather than
+ * waiting for them to time out
*/
void __exit rxrpc_destroy_all_connections(void)
{
@@ -316,6 +424,8 @@ void __exit rxrpc_destroy_all_connections(void)
_enter("");
+ rxrpc_destroy_all_client_connections();
+
rxrpc_connection_expiry = 0;
cancel_delayed_work(&rxrpc_connection_reap);
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
@@ -330,6 +440,8 @@ void __exit rxrpc_destroy_all_connections(void)
write_unlock(&rxrpc_connection_lock);
BUG_ON(leak);
+ ASSERT(list_empty(&rxrpc_connection_proc_list));
+
/* Make sure the local and peer records pinned by any dying connections
* are released.
*/
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index fd9027ccba8f..eef551f40dc2 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -65,9 +65,8 @@ done:
* Insert a service connection into a peer's tree, thereby making it a target
* for incoming packets.
*/
-static struct rxrpc_connection *
-rxrpc_publish_service_conn(struct rxrpc_peer *peer,
- struct rxrpc_connection *conn)
+static void rxrpc_publish_service_conn(struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn)
{
struct rxrpc_connection *cursor = NULL;
struct rxrpc_conn_proto k = conn->proto;
@@ -96,7 +95,7 @@ conn_published:
set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags);
write_sequnlock_bh(&peer->service_conn_lock);
_leave(" = %d [new]", conn->debug_id);
- return conn;
+ return;
found_extant_conn:
if (atomic_read(&cursor->usage) == 0)
@@ -119,100 +118,58 @@ replace_old_connection:
}
/*
- * get a record of an incoming connection
+ * Preallocate a service connection. The connection is placed on the proc and
+ * reap lists so that we don't have to get the lock from BH context.
*/
-struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local,
- struct sockaddr_rxrpc *srx,
- struct sk_buff *skb)
+struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp)
{
- struct rxrpc_connection *conn;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxrpc_peer *peer;
- const char *new = "old";
-
- _enter("");
+ struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp);
- peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
- if (!peer) {
- _debug("no peer");
- return ERR_PTR(-EBUSY);
- }
+ if (conn) {
+ /* We maintain an extra ref on the connection whilst it is on
+ * the rxrpc_connections list.
+ */
+ conn->state = RXRPC_CONN_SERVICE_PREALLOC;
+ atomic_set(&conn->usage, 2);
- ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED);
-
- rcu_read_lock();
- peer = rxrpc_lookup_peer_rcu(local, srx);
- if (peer) {
- conn = rxrpc_find_service_conn_rcu(peer, skb);
- if (conn) {
- if (sp->hdr.securityIndex != conn->security_ix)
- goto security_mismatch_rcu;
- if (rxrpc_get_connection_maybe(conn))
- goto found_extant_connection_rcu;
-
- /* The conn has expired but we can't remove it without
- * the appropriate lock, so we attempt to replace it
- * when we have a new candidate.
- */
- }
+ write_lock(&rxrpc_connection_lock);
+ list_add_tail(&conn->link, &rxrpc_connections);
+ list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list);
+ write_unlock(&rxrpc_connection_lock);
- if (!rxrpc_get_peer_maybe(peer))
- peer = NULL;
+ trace_rxrpc_conn(conn, rxrpc_conn_new_service,
+ atomic_read(&conn->usage),
+ __builtin_return_address(0));
}
- rcu_read_unlock();
- if (!peer) {
- peer = rxrpc_lookup_peer(local, srx, GFP_NOIO);
- if (!peer)
- goto enomem;
- }
+ return conn;
+}
- /* We don't have a matching record yet. */
- conn = rxrpc_alloc_connection(GFP_NOIO);
- if (!conn)
- goto enomem_peer;
+/*
+ * Set up an incoming connection. This is called in BH context with the RCU
+ * read lock held.
+ */
+void rxrpc_new_incoming_connection(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ _enter("");
conn->proto.epoch = sp->hdr.epoch;
conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK;
- conn->params.local = local;
- conn->params.peer = peer;
conn->params.service_id = sp->hdr.serviceId;
conn->security_ix = sp->hdr.securityIndex;
conn->out_clientflag = 0;
- conn->state = RXRPC_CONN_SERVICE;
- if (conn->params.service_id)
+ if (conn->security_ix)
conn->state = RXRPC_CONN_SERVICE_UNSECURED;
-
- rxrpc_get_local(local);
-
- write_lock(&rxrpc_connection_lock);
- list_add_tail(&conn->link, &rxrpc_connections);
- write_unlock(&rxrpc_connection_lock);
+ else
+ conn->state = RXRPC_CONN_SERVICE;
/* Make the connection a target for incoming packets. */
- rxrpc_publish_service_conn(peer, conn);
-
- new = "new";
-
-success:
- _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid);
- _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage));
- return conn;
-
-found_extant_connection_rcu:
- rcu_read_unlock();
- goto success;
-
-security_mismatch_rcu:
- rcu_read_unlock();
- _leave(" = -EKEYREJECTED");
- return ERR_PTR(-EKEYREJECTED);
+ rxrpc_publish_service_conn(conn->params.peer, conn);
-enomem_peer:
- rxrpc_put_peer(peer);
-enomem:
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
+ _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid);
}
/*
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 70bb77818dea..3ad9f75031e3 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -1,6 +1,6 @@
/* RxRPC packet reception
*
- * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
* Written by David Howells (dhowells@redhat.com)
*
* This program is free software; you can redistribute it and/or
@@ -27,550 +27,920 @@
#include <net/net_namespace.h>
#include "ar-internal.h"
+static void rxrpc_proto_abort(const char *why,
+ struct rxrpc_call *call, rxrpc_seq_t seq)
+{
+ if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) {
+ set_bit(RXRPC_CALL_EV_ABORT, &call->events);
+ rxrpc_queue_call(call);
+ }
+}
+
/*
- * queue a packet for recvmsg to pass to userspace
- * - the caller must hold a lock on call->lock
- * - must not be called with interrupts disabled (sk_filter() disables BH's)
- * - eats the packet whether successful or not
- * - there must be just one reference to the packet, which the caller passes to
- * this function
+ * Do TCP-style congestion management [RFC 5681].
*/
-int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb,
- bool force, bool terminal)
+static void rxrpc_congestion_management(struct rxrpc_call *call,
+ struct sk_buff *skb,
+ struct rxrpc_ack_summary *summary,
+ rxrpc_serial_t acked_serial)
{
- struct rxrpc_skb_priv *sp;
- struct rxrpc_sock *rx = call->socket;
- struct sock *sk;
- int ret;
+ enum rxrpc_congest_change change = rxrpc_cong_no_change;
+ unsigned int cumulative_acks = call->cong_cumul_acks;
+ unsigned int cwnd = call->cong_cwnd;
+ bool resend = false;
+
+ summary->flight_size =
+ (call->tx_top - call->tx_hard_ack) - summary->nr_acks;
+
+ if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) {
+ summary->retrans_timeo = true;
+ call->cong_ssthresh = max_t(unsigned int,
+ summary->flight_size / 2, 2);
+ cwnd = 1;
+ if (cwnd >= call->cong_ssthresh &&
+ call->cong_mode == RXRPC_CALL_SLOW_START) {
+ call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+ call->cong_tstamp = skb->tstamp;
+ cumulative_acks = 0;
+ }
+ }
- _enter(",,%d,%d", force, terminal);
+ cumulative_acks += summary->nr_new_acks;
+ cumulative_acks += summary->nr_rot_new_acks;
+ if (cumulative_acks > 255)
+ cumulative_acks = 255;
+
+ summary->mode = call->cong_mode;
+ summary->cwnd = call->cong_cwnd;
+ summary->ssthresh = call->cong_ssthresh;
+ summary->cumulative_acks = cumulative_acks;
+ summary->dup_acks = call->cong_dup_acks;
+
+ switch (call->cong_mode) {
+ case RXRPC_CALL_SLOW_START:
+ if (summary->nr_nacks > 0)
+ goto packet_loss_detected;
+ if (summary->cumulative_acks > 0)
+ cwnd += 1;
+ if (cwnd >= call->cong_ssthresh) {
+ call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+ call->cong_tstamp = skb->tstamp;
+ }
+ goto out;
- ASSERT(!irqs_disabled());
+ case RXRPC_CALL_CONGEST_AVOIDANCE:
+ if (summary->nr_nacks > 0)
+ goto packet_loss_detected;
- sp = rxrpc_skb(skb);
- ASSERTCMP(sp->call, ==, call);
-
- /* if we've already posted the terminal message for a call, then we
- * don't post any more */
- if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) {
- _debug("already terminated");
- ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE);
- rxrpc_free_skb(skb);
- return 0;
- }
-
- sk = &rx->sk;
-
- if (!force) {
- /* cast skb->rcvbuf to unsigned... It's pointless, but
- * reduces number of warnings when compiling with -W
- * --ANK */
-// ret = -ENOBUFS;
-// if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
-// (unsigned int) sk->sk_rcvbuf)
-// goto out;
-
- ret = sk_filter(sk, skb);
- if (ret < 0)
+ /* We analyse the number of packets that get ACK'd per RTT
+ * period and increase the window if we managed to fill it.
+ */
+ if (call->peer->rtt_usage == 0)
goto out;
- }
+ if (ktime_before(skb->tstamp,
+ ktime_add_ns(call->cong_tstamp,
+ call->peer->rtt)))
+ goto out_no_clear_ca;
+ change = rxrpc_cong_rtt_window_end;
+ call->cong_tstamp = skb->tstamp;
+ if (cumulative_acks >= cwnd)
+ cwnd++;
+ goto out;
- spin_lock_bh(&sk->sk_receive_queue.lock);
- if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) &&
- !test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
- call->socket->sk.sk_state != RXRPC_CLOSE) {
- skb->destructor = rxrpc_packet_destructor;
- skb->dev = NULL;
- skb->sk = sk;
- atomic_add(skb->truesize, &sk->sk_rmem_alloc);
+ case RXRPC_CALL_PACKET_LOSS:
+ if (summary->nr_nacks == 0)
+ goto resume_normality;
- if (terminal) {
- _debug("<<<< TERMINAL MESSAGE >>>>");
- set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags);
+ if (summary->new_low_nack) {
+ change = rxrpc_cong_new_low_nack;
+ call->cong_dup_acks = 1;
+ if (call->cong_extra > 1)
+ call->cong_extra = 1;
+ goto send_extra_data;
}
- /* allow interception by a kernel service */
- if (rx->interceptor) {
- rx->interceptor(sk, call->user_call_ID, skb);
- spin_unlock_bh(&sk->sk_receive_queue.lock);
- } else {
- _net("post skb %p", skb);
- __skb_queue_tail(&sk->sk_receive_queue, skb);
- spin_unlock_bh(&sk->sk_receive_queue.lock);
+ call->cong_dup_acks++;
+ if (call->cong_dup_acks < 3)
+ goto send_extra_data;
+
+ change = rxrpc_cong_begin_retransmission;
+ call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT;
+ call->cong_ssthresh = max_t(unsigned int,
+ summary->flight_size / 2, 2);
+ cwnd = call->cong_ssthresh + 3;
+ call->cong_extra = 0;
+ call->cong_dup_acks = 0;
+ resend = true;
+ goto out;
- if (!sock_flag(sk, SOCK_DEAD))
- sk->sk_data_ready(sk);
+ case RXRPC_CALL_FAST_RETRANSMIT:
+ if (!summary->new_low_nack) {
+ if (summary->nr_new_acks == 0)
+ cwnd += 1;
+ call->cong_dup_acks++;
+ if (call->cong_dup_acks == 2) {
+ change = rxrpc_cong_retransmit_again;
+ call->cong_dup_acks = 0;
+ resend = true;
+ }
+ } else {
+ change = rxrpc_cong_progress;
+ cwnd = call->cong_ssthresh;
+ if (summary->nr_nacks == 0)
+ goto resume_normality;
}
- skb = NULL;
- } else {
- spin_unlock_bh(&sk->sk_receive_queue.lock);
+ goto out;
+
+ default:
+ BUG();
+ goto out;
}
- ret = 0;
+resume_normality:
+ change = rxrpc_cong_cleared_nacks;
+ call->cong_dup_acks = 0;
+ call->cong_extra = 0;
+ call->cong_tstamp = skb->tstamp;
+ if (cwnd < call->cong_ssthresh)
+ call->cong_mode = RXRPC_CALL_SLOW_START;
+ else
+ call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
out:
- rxrpc_free_skb(skb);
+ cumulative_acks = 0;
+out_no_clear_ca:
+ if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1)
+ cwnd = RXRPC_RXTX_BUFF_SIZE - 1;
+ call->cong_cwnd = cwnd;
+ call->cong_cumul_acks = cumulative_acks;
+ trace_rxrpc_congest(call, summary, acked_serial, change);
+ if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+ rxrpc_queue_call(call);
+ return;
- _leave(" = %d", ret);
- return ret;
+packet_loss_detected:
+ change = rxrpc_cong_saw_nack;
+ call->cong_mode = RXRPC_CALL_PACKET_LOSS;
+ call->cong_dup_acks = 0;
+ goto send_extra_data;
+
+send_extra_data:
+ /* Send some previously unsent DATA if we have some to advance the ACK
+ * state.
+ */
+ if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
+ RXRPC_TX_ANNO_LAST ||
+ summary->nr_acks != call->tx_top - call->tx_hard_ack) {
+ call->cong_extra++;
+ wake_up(&call->waitq);
+ }
+ goto out_no_clear_ca;
}
/*
- * process a DATA packet, posting the packet to the appropriate queue
- * - eats the packet if successful
+ * Ping the other end to fill our RTT cache and to retrieve the rwind
+ * and MTU parameters.
*/
-static int rxrpc_fast_process_data(struct rxrpc_call *call,
- struct sk_buff *skb, u32 seq)
+static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb,
+ int skew)
{
- struct rxrpc_skb_priv *sp;
- bool terminal;
- int ret, ackbit, ack;
- u32 serial;
- u8 flags;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ ktime_t now = skb->tstamp;
- _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq);
+ if (call->peer->rtt_usage < 3 ||
+ ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now))
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
+ true, true,
+ rxrpc_propose_ack_ping_for_params);
+}
- sp = rxrpc_skb(skb);
- ASSERTCMP(sp->call, ==, NULL);
- flags = sp->hdr.flags;
- serial = sp->hdr.serial;
+/*
+ * Apply a hard ACK by advancing the Tx window.
+ */
+static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
+ struct rxrpc_ack_summary *summary)
+{
+ struct sk_buff *skb, *list = NULL;
+ int ix;
+ u8 annotation;
+
+ if (call->acks_lowest_nak == call->tx_hard_ack) {
+ call->acks_lowest_nak = to;
+ } else if (before_eq(call->acks_lowest_nak, to)) {
+ summary->new_low_nack = true;
+ call->acks_lowest_nak = to;
+ }
spin_lock(&call->lock);
- if (call->state > RXRPC_CALL_COMPLETE)
- goto discard;
+ while (before(call->tx_hard_ack, to)) {
+ call->tx_hard_ack++;
+ ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK;
+ skb = call->rxtx_buffer[ix];
+ annotation = call->rxtx_annotations[ix];
+ rxrpc_see_skb(skb, rxrpc_skb_tx_rotated);
+ call->rxtx_buffer[ix] = NULL;
+ call->rxtx_annotations[ix] = 0;
+ skb->next = list;
+ list = skb;
+
+ if (annotation & RXRPC_TX_ANNO_LAST)
+ set_bit(RXRPC_CALL_TX_LAST, &call->flags);
+ if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK)
+ summary->nr_rot_new_acks++;
+ }
- ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post);
- ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv);
- ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten);
+ spin_unlock(&call->lock);
- if (seq < call->rx_data_post) {
- _debug("dup #%u [-%u]", seq, call->rx_data_post);
- ack = RXRPC_ACK_DUPLICATE;
- ret = -ENOBUFS;
- goto discard_and_ack;
- }
+ trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ?
+ rxrpc_transmit_rotate_last :
+ rxrpc_transmit_rotate));
+ wake_up(&call->waitq);
- /* we may already have the packet in the out of sequence queue */
- ackbit = seq - (call->rx_data_eaten + 1);
- ASSERTCMP(ackbit, >=, 0);
- if (__test_and_set_bit(ackbit, call->ackr_window)) {
- _debug("dup oos #%u [%u,%u]",
- seq, call->rx_data_eaten, call->rx_data_post);
- ack = RXRPC_ACK_DUPLICATE;
- goto discard_and_ack;
+ while (list) {
+ skb = list;
+ list = skb->next;
+ skb->next = NULL;
+ rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
}
+}
- if (seq >= call->ackr_win_top) {
- _debug("exceed #%u [%u]", seq, call->ackr_win_top);
- __clear_bit(ackbit, call->ackr_window);
- ack = RXRPC_ACK_EXCEEDS_WINDOW;
- goto discard_and_ack;
- }
+/*
+ * End the transmission phase of a call.
+ *
+ * This occurs when we get an ACKALL packet, the first DATA packet of a reply,
+ * or a final ACK packet.
+ */
+static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
+ const char *abort_why)
+{
- if (seq == call->rx_data_expect) {
- clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags);
- call->rx_data_expect++;
- } else if (seq > call->rx_data_expect) {
- _debug("oos #%u [%u]", seq, call->rx_data_expect);
- call->rx_data_expect = seq + 1;
- if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) {
- ack = RXRPC_ACK_OUT_OF_SEQUENCE;
- goto enqueue_and_ack;
- }
- goto enqueue_packet;
- }
+ ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
- if (seq != call->rx_data_post) {
- _debug("ahead #%u [%u]", seq, call->rx_data_post);
- goto enqueue_packet;
- }
+ write_lock(&call->state_lock);
- if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags))
- goto protocol_error;
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ if (reply_begun)
+ call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
+ else
+ call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+ break;
- /* if the packet need security things doing to it, then it goes down
- * the slow path */
- if (call->conn->security_ix)
- goto enqueue_packet;
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ __rxrpc_call_completed(call);
+ rxrpc_notify_socket(call);
+ break;
- sp->call = call;
- rxrpc_get_call(call);
- atomic_inc(&call->skb_count);
- terminal = ((flags & RXRPC_LAST_PACKET) &&
- !(flags & RXRPC_CLIENT_INITIATED));
- ret = rxrpc_queue_rcv_skb(call, skb, false, terminal);
- if (ret < 0) {
- if (ret == -ENOMEM || ret == -ENOBUFS) {
- __clear_bit(ackbit, call->ackr_window);
- ack = RXRPC_ACK_NOSPACE;
- goto discard_and_ack;
- }
- goto out;
+ default:
+ goto bad_state;
}
- skb = NULL;
- sp = NULL;
-
- _debug("post #%u", seq);
- ASSERTCMP(call->rx_data_post, ==, seq);
- call->rx_data_post++;
+ write_unlock(&call->state_lock);
+ if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) {
+ rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true,
+ rxrpc_propose_ack_client_tx_end);
+ trace_rxrpc_transmit(call, rxrpc_transmit_await_reply);
+ } else {
+ trace_rxrpc_transmit(call, rxrpc_transmit_end);
+ }
+ _leave(" = ok");
+ return true;
+
+bad_state:
+ write_unlock(&call->state_lock);
+ kdebug("end_tx %s", rxrpc_call_states[call->state]);
+ rxrpc_proto_abort(abort_why, call, call->tx_top);
+ return false;
+}
- if (flags & RXRPC_LAST_PACKET)
- set_bit(RXRPC_CALL_RCVD_LAST, &call->flags);
+/*
+ * Begin the reply reception phase of a call.
+ */
+static bool rxrpc_receiving_reply(struct rxrpc_call *call)
+{
+ struct rxrpc_ack_summary summary = { 0 };
+ rxrpc_seq_t top = READ_ONCE(call->tx_top);
+
+ if (call->ackr_reason) {
+ spin_lock_bh(&call->lock);
+ call->ackr_reason = 0;
+ call->resend_at = call->expire_at;
+ call->ack_at = call->expire_at;
+ spin_unlock_bh(&call->lock);
+ rxrpc_set_timer(call, rxrpc_timer_init_for_reply,
+ ktime_get_real());
+ }
- /* if we've reached an out of sequence packet then we need to drain
- * that queue into the socket Rx queue now */
- if (call->rx_data_post == call->rx_first_oos) {
- _debug("drain rx oos now");
- read_lock(&call->state_lock);
- if (call->state < RXRPC_CALL_COMPLETE &&
- !test_and_set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events))
- rxrpc_queue_call(call);
- read_unlock(&call->state_lock);
+ if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+ rxrpc_rotate_tx_window(call, top, &summary);
+ if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
+ rxrpc_proto_abort("TXL", call, top);
+ return false;
}
+ if (!rxrpc_end_tx_phase(call, true, "ETD"))
+ return false;
+ call->tx_phase = false;
+ return true;
+}
- spin_unlock(&call->lock);
- atomic_inc(&call->ackr_not_idle);
- rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false);
- _leave(" = 0 [posted]");
- return 0;
+/*
+ * Scan a jumbo packet to validate its structure and to work out how many
+ * subpackets it contains.
+ *
+ * A jumbo packet is a collection of consecutive packets glued together with
+ * little headers between that indicate how to change the initial header for
+ * each subpacket.
+ *
+ * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but
+ * the last are RXRPC_JUMBO_DATALEN in size. The last subpacket may be of any
+ * size.
+ */
+static bool rxrpc_validate_jumbo(struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int offset = sizeof(struct rxrpc_wire_header);
+ unsigned int len = skb->len;
+ int nr_jumbo = 1;
+ u8 flags = sp->hdr.flags;
-protocol_error:
- ret = -EBADMSG;
-out:
- spin_unlock(&call->lock);
- _leave(" = %d", ret);
- return ret;
+ do {
+ nr_jumbo++;
+ if (len - offset < RXRPC_JUMBO_SUBPKTLEN)
+ goto protocol_error;
+ if (flags & RXRPC_LAST_PACKET)
+ goto protocol_error;
+ offset += RXRPC_JUMBO_DATALEN;
+ if (skb_copy_bits(skb, offset, &flags, 1) < 0)
+ goto protocol_error;
+ offset += sizeof(struct rxrpc_jumbo_header);
+ } while (flags & RXRPC_JUMBO_PACKET);
-discard_and_ack:
- _debug("discard and ACK packet %p", skb);
- __rxrpc_propose_ACK(call, ack, serial, true);
-discard:
- spin_unlock(&call->lock);
- rxrpc_free_skb(skb);
- _leave(" = 0 [discarded]");
- return 0;
+ sp->nr_jumbo = nr_jumbo;
+ return true;
-enqueue_and_ack:
- __rxrpc_propose_ACK(call, ack, serial, true);
-enqueue_packet:
- _net("defer skb %p", skb);
- spin_unlock(&call->lock);
- skb_queue_tail(&call->rx_queue, skb);
- atomic_inc(&call->ackr_not_idle);
- read_lock(&call->state_lock);
- if (call->state < RXRPC_CALL_DEAD)
- rxrpc_queue_call(call);
- read_unlock(&call->state_lock);
- _leave(" = 0 [queued]");
- return 0;
+protocol_error:
+ return false;
}
/*
- * assume an implicit ACKALL of the transmission phase of a client socket upon
- * reception of the first reply packet
+ * Handle reception of a duplicate packet.
+ *
+ * We have to take care to avoid an attack here whereby we're given a series of
+ * jumbograms, each with a sequence number one before the preceding one and
+ * filled up to maximum UDP size. If they never send us the first packet in
+ * the sequence, they can cause us to have to hold on to around 2MiB of kernel
+ * space until the call times out.
+ *
+ * We limit the space usage by only accepting three duplicate jumbo packets per
+ * call. After that, we tell the other side we're no longer accepting jumbos
+ * (that information is encoded in the ACK packet).
*/
-static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial)
+static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq,
+ u8 annotation, bool *_jumbo_bad)
{
- write_lock_bh(&call->state_lock);
-
- switch (call->state) {
- case RXRPC_CALL_CLIENT_AWAIT_REPLY:
- call->state = RXRPC_CALL_CLIENT_RECV_REPLY;
- call->acks_latest = serial;
-
- _debug("implicit ACKALL %%%u", call->acks_latest);
- set_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events);
- write_unlock_bh(&call->state_lock);
-
- if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
- clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
- clear_bit(RXRPC_CALL_EV_RESEND, &call->events);
- clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
- }
- break;
+ /* Discard normal packets that are duplicates. */
+ if (annotation == 0)
+ return;
- default:
- write_unlock_bh(&call->state_lock);
- break;
+ /* Skip jumbo subpackets that are duplicates. When we've had three or
+ * more partially duplicate jumbo packets, we refuse to take any more
+ * jumbos for this call.
+ */
+ if (!*_jumbo_bad) {
+ call->nr_jumbo_bad++;
+ *_jumbo_bad = true;
}
}
/*
- * post an incoming packet to the nominated call to deal with
- * - must get rid of the sk_buff, either by freeing it or by queuing it
+ * Process a DATA packet, adding the packet to the Rx ring.
*/
-void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
+static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb,
+ u16 skew)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- __be32 wtmp;
- u32 hi_serial, abort_code;
+ unsigned int offset = sizeof(struct rxrpc_wire_header);
+ unsigned int ix;
+ rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0;
+ rxrpc_seq_t seq = sp->hdr.seq, hard_ack;
+ bool immediate_ack = false, jumbo_bad = false, queued;
+ u16 len;
+ u8 ack = 0, flags, annotation = 0;
- _enter("%p,%p", call, skb);
+ _enter("{%u,%u},{%u,%u}",
+ call->rx_hard_ack, call->rx_top, skb->len, seq);
- ASSERT(!irqs_disabled());
+ _proto("Rx DATA %%%u { #%u f=%02x }",
+ sp->hdr.serial, seq, sp->hdr.flags);
-#if 0 // INJECT RX ERROR
- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
- static int skip = 0;
- if (++skip == 3) {
- printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n");
- skip = 0;
- goto free_packet;
- }
+ if (call->state >= RXRPC_CALL_COMPLETE)
+ return;
+
+ /* Received data implicitly ACKs all of the request packets we sent
+ * when we're acting as a client.
+ */
+ if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
+ call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
+ !rxrpc_receiving_reply(call))
+ return;
+
+ call->ackr_prev_seq = seq;
+
+ hard_ack = READ_ONCE(call->rx_hard_ack);
+ if (after(seq, hard_ack + call->rx_winsize)) {
+ ack = RXRPC_ACK_EXCEEDS_WINDOW;
+ ack_serial = serial;
+ goto ack;
}
-#endif
- /* track the latest serial number on this connection for ACK packet
- * information */
- hi_serial = atomic_read(&call->conn->hi_serial);
- while (sp->hdr.serial > hi_serial)
- hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial,
- sp->hdr.serial);
+ flags = sp->hdr.flags;
+ if (flags & RXRPC_JUMBO_PACKET) {
+ if (call->nr_jumbo_bad > 3) {
+ ack = RXRPC_ACK_NOSPACE;
+ ack_serial = serial;
+ goto ack;
+ }
+ annotation = 1;
+ }
- /* request ACK generation for any ACK or DATA packet that requests
- * it */
- if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
- _proto("ACK Requested on %%%u", sp->hdr.serial);
- rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false);
+next_subpacket:
+ queued = false;
+ ix = seq & RXRPC_RXTX_BUFF_MASK;
+ len = skb->len;
+ if (flags & RXRPC_JUMBO_PACKET)
+ len = RXRPC_JUMBO_DATALEN;
+
+ if (flags & RXRPC_LAST_PACKET) {
+ if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+ seq != call->rx_top)
+ return rxrpc_proto_abort("LSN", call, seq);
+ } else {
+ if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
+ after_eq(seq, call->rx_top))
+ return rxrpc_proto_abort("LSA", call, seq);
}
- switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_ABORT:
- _debug("abort");
+ if (before_eq(seq, hard_ack)) {
+ ack = RXRPC_ACK_DUPLICATE;
+ ack_serial = serial;
+ goto skip;
+ }
- if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0)
- goto protocol_error;
+ if (flags & RXRPC_REQUEST_ACK && !ack) {
+ ack = RXRPC_ACK_REQUESTED;
+ ack_serial = serial;
+ }
- abort_code = ntohl(wtmp);
- _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
-
- write_lock_bh(&call->state_lock);
- if (call->state < RXRPC_CALL_COMPLETE) {
- call->state = RXRPC_CALL_REMOTELY_ABORTED;
- call->remote_abort = abort_code;
- set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events);
- rxrpc_queue_call(call);
+ if (call->rxtx_buffer[ix]) {
+ rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad);
+ if (ack != RXRPC_ACK_DUPLICATE) {
+ ack = RXRPC_ACK_DUPLICATE;
+ ack_serial = serial;
}
- goto free_packet_unlock;
+ immediate_ack = true;
+ goto skip;
+ }
- case RXRPC_PACKET_TYPE_BUSY:
- _proto("Rx BUSY %%%u", sp->hdr.serial);
+ /* Queue the packet. We use a couple of memory barriers here as need
+ * to make sure that rx_top is perceived to be set after the buffer
+ * pointer and that the buffer pointer is set after the annotation and
+ * the skb data.
+ *
+ * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window()
+ * and also rxrpc_fill_out_ack().
+ */
+ rxrpc_get_skb(skb, rxrpc_skb_rx_got);
+ call->rxtx_annotations[ix] = annotation;
+ smp_wmb();
+ call->rxtx_buffer[ix] = skb;
+ if (after(seq, call->rx_top)) {
+ smp_store_release(&call->rx_top, seq);
+ } else if (before(seq, call->rx_top)) {
+ /* Send an immediate ACK if we fill in a hole */
+ if (!ack) {
+ ack = RXRPC_ACK_DELAY;
+ ack_serial = serial;
+ }
+ immediate_ack = true;
+ }
+ if (flags & RXRPC_LAST_PACKET) {
+ set_bit(RXRPC_CALL_RX_LAST, &call->flags);
+ trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq);
+ } else {
+ trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq);
+ }
+ queued = true;
- if (rxrpc_conn_is_service(call->conn))
- goto protocol_error;
+ if (after_eq(seq, call->rx_expect_next)) {
+ if (after(seq, call->rx_expect_next)) {
+ _net("OOS %u > %u", seq, call->rx_expect_next);
+ ack = RXRPC_ACK_OUT_OF_SEQUENCE;
+ ack_serial = serial;
+ }
+ call->rx_expect_next = seq + 1;
+ }
- write_lock_bh(&call->state_lock);
- switch (call->state) {
- case RXRPC_CALL_CLIENT_SEND_REQUEST:
- call->state = RXRPC_CALL_SERVER_BUSY;
- set_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events);
- rxrpc_queue_call(call);
- case RXRPC_CALL_SERVER_BUSY:
- goto free_packet_unlock;
- default:
- goto protocol_error_locked;
+skip:
+ offset += len;
+ if (flags & RXRPC_JUMBO_PACKET) {
+ if (skb_copy_bits(skb, offset, &flags, 1) < 0)
+ return rxrpc_proto_abort("XJF", call, seq);
+ offset += sizeof(struct rxrpc_jumbo_header);
+ seq++;
+ serial++;
+ annotation++;
+ if (flags & RXRPC_JUMBO_PACKET)
+ annotation |= RXRPC_RX_ANNO_JLAST;
+ if (after(seq, hard_ack + call->rx_winsize)) {
+ ack = RXRPC_ACK_EXCEEDS_WINDOW;
+ ack_serial = serial;
+ if (!jumbo_bad) {
+ call->nr_jumbo_bad++;
+ jumbo_bad = true;
+ }
+ goto ack;
}
- default:
- _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial);
- goto protocol_error;
+ _proto("Rx DATA Jumbo %%%u", serial);
+ goto next_subpacket;
+ }
- case RXRPC_PACKET_TYPE_DATA:
- _proto("Rx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
+ if (queued && flags & RXRPC_LAST_PACKET && !ack) {
+ ack = RXRPC_ACK_DELAY;
+ ack_serial = serial;
+ }
- if (sp->hdr.seq == 0)
- goto protocol_error;
+ack:
+ if (ack)
+ rxrpc_propose_ACK(call, ack, skew, ack_serial,
+ immediate_ack, true,
+ rxrpc_propose_ack_input_data);
- call->ackr_prev_seq = sp->hdr.seq;
+ if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1)
+ rxrpc_notify_socket(call);
+ _leave(" [queued]");
+}
- /* received data implicitly ACKs all of the request packets we
- * sent when we're acting as a client */
- if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
- rxrpc_assume_implicit_ackall(call, sp->hdr.serial);
+/*
+ * Process a requested ACK.
+ */
+static void rxrpc_input_requested_ack(struct rxrpc_call *call,
+ ktime_t resp_time,
+ rxrpc_serial_t orig_serial,
+ rxrpc_serial_t ack_serial)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ ktime_t sent_at;
+ int ix;
+
+ for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) {
+ skb = call->rxtx_buffer[ix];
+ if (!skb)
+ continue;
+
+ sp = rxrpc_skb(skb);
+ if (sp->hdr.serial != orig_serial)
+ continue;
+ smp_rmb();
+ sent_at = skb->tstamp;
+ goto found;
+ }
+ return;
- switch (rxrpc_fast_process_data(call, skb, sp->hdr.seq)) {
- case 0:
- skb = NULL;
- goto done;
+found:
+ rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack,
+ orig_serial, ack_serial, sent_at, resp_time);
+}
- default:
- BUG();
+/*
+ * Process a ping response.
+ */
+static void rxrpc_input_ping_response(struct rxrpc_call *call,
+ ktime_t resp_time,
+ rxrpc_serial_t orig_serial,
+ rxrpc_serial_t ack_serial)
+{
+ rxrpc_serial_t ping_serial;
+ ktime_t ping_time;
- /* data packet received beyond the last packet */
- case -EBADMSG:
- goto protocol_error;
- }
+ ping_time = call->ackr_ping_time;
+ smp_rmb();
+ ping_serial = call->ackr_ping;
- case RXRPC_PACKET_TYPE_ACKALL:
- case RXRPC_PACKET_TYPE_ACK:
- /* ACK processing is done in process context */
- read_lock_bh(&call->state_lock);
- if (call->state < RXRPC_CALL_DEAD) {
- skb_queue_tail(&call->rx_queue, skb);
- rxrpc_queue_call(call);
- skb = NULL;
- }
- read_unlock_bh(&call->state_lock);
- goto free_packet;
+ if (!test_bit(RXRPC_CALL_PINGING, &call->flags) ||
+ before(orig_serial, ping_serial))
+ return;
+ clear_bit(RXRPC_CALL_PINGING, &call->flags);
+ if (after(orig_serial, ping_serial))
+ return;
+
+ rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response,
+ orig_serial, ack_serial, ping_time, resp_time);
+}
+
+/*
+ * Process the extra information that may be appended to an ACK packet
+ */
+static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
+ struct rxrpc_ackinfo *ackinfo)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct rxrpc_peer *peer;
+ unsigned int mtu;
+ u32 rwind = ntohl(ackinfo->rwind);
+
+ _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }",
+ sp->hdr.serial,
+ ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU),
+ rwind, ntohl(ackinfo->jumbo_max));
+
+ if (rwind > RXRPC_RXTX_BUFF_SIZE - 1)
+ rwind = RXRPC_RXTX_BUFF_SIZE - 1;
+ call->tx_winsize = rwind;
+ if (call->cong_ssthresh > rwind)
+ call->cong_ssthresh = rwind;
+
+ mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU));
+
+ peer = call->peer;
+ if (mtu < peer->maxdata) {
+ spin_lock_bh(&peer->lock);
+ peer->maxdata = mtu;
+ peer->mtu = mtu + peer->hdrsize;
+ spin_unlock_bh(&peer->lock);
+ _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata);
}
+}
-protocol_error:
- _debug("protocol error");
- write_lock_bh(&call->state_lock);
-protocol_error_locked:
- if (call->state <= RXRPC_CALL_COMPLETE) {
- call->state = RXRPC_CALL_LOCALLY_ABORTED;
- call->local_abort = RX_PROTOCOL_ERROR;
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- rxrpc_queue_call(call);
+/*
+ * Process individual soft ACKs.
+ *
+ * Each ACK in the array corresponds to one packet and can be either an ACK or
+ * a NAK. If we get find an explicitly NAK'd packet we resend immediately;
+ * packets that lie beyond the end of the ACK list are scheduled for resend by
+ * the timer on the basis that the peer might just not have processed them at
+ * the time the ACK was sent.
+ */
+static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
+ rxrpc_seq_t seq, int nr_acks,
+ struct rxrpc_ack_summary *summary)
+{
+ int ix;
+ u8 annotation, anno_type;
+
+ for (; nr_acks > 0; nr_acks--, seq++) {
+ ix = seq & RXRPC_RXTX_BUFF_MASK;
+ annotation = call->rxtx_annotations[ix];
+ anno_type = annotation & RXRPC_TX_ANNO_MASK;
+ annotation &= ~RXRPC_TX_ANNO_MASK;
+ switch (*acks++) {
+ case RXRPC_ACK_TYPE_ACK:
+ summary->nr_acks++;
+ if (anno_type == RXRPC_TX_ANNO_ACK)
+ continue;
+ summary->nr_new_acks++;
+ call->rxtx_annotations[ix] =
+ RXRPC_TX_ANNO_ACK | annotation;
+ break;
+ case RXRPC_ACK_TYPE_NACK:
+ if (!summary->nr_nacks &&
+ call->acks_lowest_nak != seq) {
+ call->acks_lowest_nak = seq;
+ summary->new_low_nack = true;
+ }
+ summary->nr_nacks++;
+ if (anno_type == RXRPC_TX_ANNO_NAK)
+ continue;
+ summary->nr_new_nacks++;
+ if (anno_type == RXRPC_TX_ANNO_RETRANS)
+ continue;
+ call->rxtx_annotations[ix] =
+ RXRPC_TX_ANNO_NAK | annotation;
+ break;
+ default:
+ return rxrpc_proto_abort("SFT", call, 0);
+ }
}
-free_packet_unlock:
- write_unlock_bh(&call->state_lock);
-free_packet:
- rxrpc_free_skb(skb);
-done:
- _leave("");
}
/*
- * split up a jumbo data packet
+ * Process an ACK packet.
+ *
+ * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet
+ * in the ACK array. Anything before that is hard-ACK'd and may be discarded.
+ *
+ * A hard-ACK means that a packet has been processed and may be discarded; a
+ * soft-ACK means that the packet may be discarded and retransmission
+ * requested. A phase is complete when all packets are hard-ACK'd.
*/
-static void rxrpc_process_jumbo_packet(struct rxrpc_call *call,
- struct sk_buff *jumbo)
+static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb,
+ u16 skew)
{
- struct rxrpc_jumbo_header jhdr;
- struct rxrpc_skb_priv *sp;
- struct sk_buff *part;
+ struct rxrpc_ack_summary summary = { 0 };
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ union {
+ struct rxrpc_ackpacket ack;
+ struct rxrpc_ackinfo info;
+ u8 acks[RXRPC_MAXACKS];
+ } buf;
+ rxrpc_serial_t acked_serial;
+ rxrpc_seq_t first_soft_ack, hard_ack;
+ int nr_acks, offset, ioffset;
+
+ _enter("");
+
+ offset = sizeof(struct rxrpc_wire_header);
+ if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) {
+ _debug("extraction failure");
+ return rxrpc_proto_abort("XAK", call, 0);
+ }
+ offset += sizeof(buf.ack);
+
+ acked_serial = ntohl(buf.ack.serial);
+ first_soft_ack = ntohl(buf.ack.firstPacket);
+ hard_ack = first_soft_ack - 1;
+ nr_acks = buf.ack.nAcks;
+ summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ?
+ buf.ack.reason : RXRPC_ACK__INVALID);
+
+ trace_rxrpc_rx_ack(call, first_soft_ack, summary.ack_reason, nr_acks);
+
+ _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
+ sp->hdr.serial,
+ ntohs(buf.ack.maxSkew),
+ first_soft_ack,
+ ntohl(buf.ack.previousPacket),
+ acked_serial,
+ rxrpc_ack_names[summary.ack_reason],
+ buf.ack.nAcks);
+
+ if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE)
+ rxrpc_input_ping_response(call, skb->tstamp, acked_serial,
+ sp->hdr.serial);
+ if (buf.ack.reason == RXRPC_ACK_REQUESTED)
+ rxrpc_input_requested_ack(call, skb->tstamp, acked_serial,
+ sp->hdr.serial);
+
+ if (buf.ack.reason == RXRPC_ACK_PING) {
+ _proto("Rx ACK %%%u PING Request", sp->hdr.serial);
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE,
+ skew, sp->hdr.serial, true, true,
+ rxrpc_propose_ack_respond_to_ping);
+ } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
+ rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED,
+ skew, sp->hdr.serial, true, true,
+ rxrpc_propose_ack_respond_to_ack);
+ }
- _enter(",{%u,%u}", jumbo->data_len, jumbo->len);
+ ioffset = offset + nr_acks + 3;
+ if (skb->len >= ioffset + sizeof(buf.info)) {
+ if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0)
+ return rxrpc_proto_abort("XAI", call, 0);
+ rxrpc_input_ackinfo(call, skb, &buf.info);
+ }
- sp = rxrpc_skb(jumbo);
+ if (first_soft_ack == 0)
+ return rxrpc_proto_abort("AK0", call, 0);
- do {
- sp->hdr.flags &= ~RXRPC_JUMBO_PACKET;
-
- /* make a clone to represent the first subpacket in what's left
- * of the jumbo packet */
- part = skb_clone(jumbo, GFP_ATOMIC);
- if (!part) {
- /* simply ditch the tail in the event of ENOMEM */
- pskb_trim(jumbo, RXRPC_JUMBO_DATALEN);
- break;
- }
- rxrpc_new_skb(part);
+ /* Ignore ACKs unless we are or have just been transmitting. */
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ case RXRPC_CALL_SERVER_AWAIT_ACK:
+ break;
+ default:
+ return;
+ }
- pskb_trim(part, RXRPC_JUMBO_DATALEN);
+ /* Discard any out-of-order or duplicate ACKs. */
+ if (before_eq(sp->hdr.serial, call->acks_latest)) {
+ _debug("discard ACK %d <= %d",
+ sp->hdr.serial, call->acks_latest);
+ return;
+ }
+ call->acks_latest_ts = skb->tstamp;
+ call->acks_latest = sp->hdr.serial;
+
+ if (before(hard_ack, call->tx_hard_ack) ||
+ after(hard_ack, call->tx_top))
+ return rxrpc_proto_abort("AKW", call, 0);
+ if (nr_acks > call->tx_top - hard_ack)
+ return rxrpc_proto_abort("AKN", call, 0);
+
+ if (after(hard_ack, call->tx_hard_ack))
+ rxrpc_rotate_tx_window(call, hard_ack, &summary);
+
+ if (nr_acks > 0) {
+ if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0)
+ return rxrpc_proto_abort("XSA", call, 0);
+ rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks,
+ &summary);
+ }
- if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN))
- goto protocol_error;
+ if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
+ rxrpc_end_tx_phase(call, false, "ETA");
+ return;
+ }
- if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0)
- goto protocol_error;
- if (!pskb_pull(jumbo, sizeof(jhdr)))
- BUG();
+ if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] &
+ RXRPC_TX_ANNO_LAST &&
+ summary.nr_acks == call->tx_top - hard_ack)
+ rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial,
+ false, true,
+ rxrpc_propose_ack_ping_for_lost_reply);
- sp->hdr.seq += 1;
- sp->hdr.serial += 1;
- sp->hdr.flags = jhdr.flags;
- sp->hdr._rsvd = ntohs(jhdr._rsvd);
+ return rxrpc_congestion_management(call, skb, &summary, acked_serial);
+}
- _proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1);
+/*
+ * Process an ACKALL packet.
+ */
+static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_ack_summary summary = { 0 };
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- rxrpc_fast_process_packet(call, part);
- part = NULL;
+ _proto("Rx ACKALL %%%u", sp->hdr.serial);
- } while (sp->hdr.flags & RXRPC_JUMBO_PACKET);
+ rxrpc_rotate_tx_window(call, call->tx_top, &summary);
+ if (test_bit(RXRPC_CALL_TX_LAST, &call->flags))
+ rxrpc_end_tx_phase(call, false, "ETL");
+}
- rxrpc_fast_process_packet(call, jumbo);
- _leave("");
- return;
+/*
+ * Process an ABORT packet.
+ */
+static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ __be32 wtmp;
+ u32 abort_code = RX_CALL_DEAD;
-protocol_error:
- _debug("protocol error");
- rxrpc_free_skb(part);
- rxrpc_free_skb(jumbo);
- write_lock_bh(&call->state_lock);
- if (call->state <= RXRPC_CALL_COMPLETE) {
- call->state = RXRPC_CALL_LOCALLY_ABORTED;
- call->local_abort = RX_PROTOCOL_ERROR;
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- rxrpc_queue_call(call);
- }
- write_unlock_bh(&call->state_lock);
- _leave("");
+ _enter("");
+
+ if (skb->len >= 4 &&
+ skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &wtmp, sizeof(wtmp)) >= 0)
+ abort_code = ntohl(wtmp);
+
+ _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code);
+
+ if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
+ abort_code, ECONNABORTED))
+ rxrpc_notify_socket(call);
}
/*
- * post an incoming packet to the appropriate call/socket to deal with
- * - must get rid of the sk_buff, either by freeing it or by queuing it
+ * Process an incoming call packet.
*/
-static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
- struct sk_buff *skb)
+static void rxrpc_input_call_packet(struct rxrpc_call *call,
+ struct sk_buff *skb, u16 skew)
{
- struct rxrpc_skb_priv *sp;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
_enter("%p,%p", call, skb);
- sp = rxrpc_skb(skb);
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_DATA:
+ rxrpc_input_data(call, skb, skew);
+ break;
- _debug("extant call [%d]", call->state);
+ case RXRPC_PACKET_TYPE_ACK:
+ rxrpc_input_ack(call, skb, skew);
+ break;
+
+ case RXRPC_PACKET_TYPE_BUSY:
+ _proto("Rx BUSY %%%u", sp->hdr.serial);
+
+ /* Just ignore BUSY packets from the server; the retry and
+ * lifespan timers will take care of business. BUSY packets
+ * from the client don't make sense.
+ */
+ break;
+
+ case RXRPC_PACKET_TYPE_ABORT:
+ rxrpc_input_abort(call, skb);
+ break;
+
+ case RXRPC_PACKET_TYPE_ACKALL:
+ rxrpc_input_ackall(call, skb);
+ break;
- read_lock(&call->state_lock);
- switch (call->state) {
- case RXRPC_CALL_LOCALLY_ABORTED:
- if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) {
- rxrpc_queue_call(call);
- goto free_unlock;
- }
- case RXRPC_CALL_REMOTELY_ABORTED:
- case RXRPC_CALL_NETWORK_ERROR:
- case RXRPC_CALL_DEAD:
- goto dead_call;
- case RXRPC_CALL_COMPLETE:
- case RXRPC_CALL_CLIENT_FINAL_ACK:
- /* complete server call */
- if (rxrpc_conn_is_service(call->conn))
- goto dead_call;
- /* resend last packet of a completed call */
- _debug("final ack again");
- rxrpc_get_call(call);
- set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events);
- rxrpc_queue_call(call);
- goto free_unlock;
default:
+ _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial);
break;
}
- read_unlock(&call->state_lock);
- rxrpc_get_call(call);
-
- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
- sp->hdr.flags & RXRPC_JUMBO_PACKET)
- rxrpc_process_jumbo_packet(call, skb);
- else
- rxrpc_fast_process_packet(call, skb);
-
- rxrpc_put_call(call);
- goto done;
-
-dead_call:
- if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
- skb->priority = RX_CALL_DEAD;
- rxrpc_reject_packet(call->conn->params.local, skb);
- goto unlock;
- }
-free_unlock:
- rxrpc_free_skb(skb);
-unlock:
- read_unlock(&call->state_lock);
-done:
_leave("");
}
/*
* post connection-level events to the connection
- * - this includes challenges, responses and some aborts
+ * - this includes challenges, responses, some aborts and call terminal packet
+ * retransmission.
*/
static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
struct sk_buff *skb)
@@ -595,6 +965,17 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local,
}
/*
+ * put a packet up for transport-level abort
+ */
+static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
+{
+ CHECK_SLAB_OKAY(&local->usage);
+
+ skb_queue_tail(&local->reject_queue, skb);
+ rxrpc_queue_local(local);
+}
+
+/*
* Extract the wire header from a packet and translate the byte order.
*/
static noinline
@@ -605,8 +986,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
/* dig out the RxRPC connection details */
if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
return -EBADMSG;
- if (!pskb_pull(skb, sizeof(whdr)))
- BUG();
memset(sp, 0, sizeof(*sp));
sp->hdr.epoch = ntohl(whdr.epoch);
@@ -631,19 +1010,22 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
* shut down and the local endpoint from going away, thus sk_user_data will not
* be cleared until this function returns.
*/
-void rxrpc_data_ready(struct sock *sk)
+void rxrpc_data_ready(struct sock *udp_sk)
{
struct rxrpc_connection *conn;
+ struct rxrpc_channel *chan;
+ struct rxrpc_call *call;
struct rxrpc_skb_priv *sp;
- struct rxrpc_local *local = sk->sk_user_data;
+ struct rxrpc_local *local = udp_sk->sk_user_data;
struct sk_buff *skb;
- int ret;
+ unsigned int channel;
+ int ret, skew;
- _enter("%p", sk);
+ _enter("%p", udp_sk);
ASSERT(!irqs_disabled());
- skb = skb_recv_datagram(sk, 0, 1, &ret);
+ skb = skb_recv_datagram(udp_sk, 0, 1, &ret);
if (!skb) {
if (ret == -EAGAIN)
return;
@@ -651,13 +1033,13 @@ void rxrpc_data_ready(struct sock *sk)
return;
}
- rxrpc_new_skb(skb);
+ rxrpc_new_skb(skb, rxrpc_skb_rx_received);
_net("recv skb %p", skb);
/* we'll probably need to checksum it (didn't call sock_recvmsg) */
if (skb_checksum_complete(skb)) {
- rxrpc_free_skb(skb);
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
__UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0);
_leave(" [CSUM failed]");
return;
@@ -671,13 +1053,21 @@ void rxrpc_data_ready(struct sock *sk)
skb_orphan(skb);
sp = rxrpc_skb(skb);
- _net("Rx UDP packet from %08x:%04hu",
- ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source));
-
/* dig out the RxRPC connection details */
if (rxrpc_extract_header(sp, skb) < 0)
goto bad_message;
+ if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
+ static int lose;
+ if ((lose++ & 7) == 7) {
+ trace_rxrpc_rx_lose(sp);
+ rxrpc_lose_skb(skb, rxrpc_skb_rx_lost);
+ return;
+ }
+ }
+
+ trace_rxrpc_rx_packet(sp);
+
_net("Rx RxRPC %s ep=%x call=%x:%x",
sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient",
sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber);
@@ -688,70 +1078,125 @@ void rxrpc_data_ready(struct sock *sk)
goto bad_message;
}
- if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) {
+ switch (sp->hdr.type) {
+ case RXRPC_PACKET_TYPE_VERSION:
rxrpc_post_packet_to_local(local, skb);
goto out;
- }
- if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
- (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
- goto bad_message;
+ case RXRPC_PACKET_TYPE_BUSY:
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
+ goto discard;
+
+ case RXRPC_PACKET_TYPE_DATA:
+ if (sp->hdr.callNumber == 0)
+ goto bad_message;
+ if (sp->hdr.flags & RXRPC_JUMBO_PACKET &&
+ !rxrpc_validate_jumbo(skb))
+ goto bad_message;
+ break;
+ }
rcu_read_lock();
conn = rxrpc_find_connection_rcu(local, skb);
- if (!conn)
- goto cant_route_call;
+ if (conn) {
+ if (sp->hdr.securityIndex != conn->security_ix)
+ goto wrong_security;
+
+ if (sp->hdr.callNumber == 0) {
+ /* Connection-level packet */
+ _debug("CONN %p {%d}", conn, conn->debug_id);
+ rxrpc_post_packet_to_conn(conn, skb);
+ goto out_unlock;
+ }
+
+ /* Note the serial number skew here */
+ skew = (int)sp->hdr.serial - (int)conn->hi_serial;
+ if (skew >= 0) {
+ if (skew > 0)
+ conn->hi_serial = sp->hdr.serial;
+ } else {
+ skew = -skew;
+ skew = min(skew, 65535);
+ }
- if (sp->hdr.callNumber == 0) {
- /* Connection-level packet */
- _debug("CONN %p {%d}", conn, conn->debug_id);
- rxrpc_post_packet_to_conn(conn, skb);
- } else {
/* Call-bound packets are routed by connection channel. */
- unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK;
- struct rxrpc_channel *chan = &conn->channels[channel];
- struct rxrpc_call *call = rcu_dereference(chan->call);
+ channel = sp->hdr.cid & RXRPC_CHANNELMASK;
+ chan = &conn->channels[channel];
+
+ /* Ignore really old calls */
+ if (sp->hdr.callNumber < chan->last_call)
+ goto discard_unlock;
+
+ if (sp->hdr.callNumber == chan->last_call) {
+ /* For the previous service call, if completed successfully, we
+ * discard all further packets.
+ */
+ if (rxrpc_conn_is_service(conn) &&
+ (chan->last_type == RXRPC_PACKET_TYPE_ACK ||
+ sp->hdr.type == RXRPC_PACKET_TYPE_ABORT))
+ goto discard_unlock;
+
+ /* But otherwise we need to retransmit the final packet from
+ * data cached in the connection record.
+ */
+ rxrpc_post_packet_to_conn(conn, skb);
+ goto out_unlock;
+ }
- if (!call || atomic_read(&call->usage) == 0)
- goto cant_route_call;
+ call = rcu_dereference(chan->call);
+ } else {
+ skew = 0;
+ call = NULL;
+ }
- rxrpc_post_packet_to_call(call, skb);
+ if (!call || atomic_read(&call->usage) == 0) {
+ if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) ||
+ sp->hdr.callNumber == 0 ||
+ sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
+ goto bad_message_unlock;
+ if (sp->hdr.seq != 1)
+ goto discard_unlock;
+ call = rxrpc_new_incoming_call(local, conn, skb);
+ if (!call) {
+ rcu_read_unlock();
+ goto reject_packet;
+ }
+ rxrpc_send_ping(call, skb, skew);
}
+ rxrpc_input_call_packet(call, skb, skew);
+ goto discard_unlock;
+
+discard_unlock:
rcu_read_unlock();
+discard:
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
out:
+ trace_rxrpc_rx_done(0, 0);
return;
-cant_route_call:
+out_unlock:
rcu_read_unlock();
+ goto out;
- _debug("can't route call");
- if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
- sp->hdr.type == RXRPC_PACKET_TYPE_DATA) {
- if (sp->hdr.seq == 1) {
- _debug("first packet");
- skb_queue_tail(&local->accept_queue, skb);
- rxrpc_queue_work(&local->processor);
- _leave(" [incoming]");
- return;
- }
- skb->priority = RX_INVALID_OPERATION;
- } else {
- skb->priority = RX_CALL_DEAD;
- }
-
- if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
- _debug("reject type %d",sp->hdr.type);
- rxrpc_reject_packet(local, skb);
- } else {
- rxrpc_free_skb(skb);
- }
- _leave(" [no call]");
- return;
+wrong_security:
+ rcu_read_unlock();
+ trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RXKADINCONSISTENCY, EBADMSG);
+ skb->priority = RXKADINCONSISTENCY;
+ goto post_abort;
+bad_message_unlock:
+ rcu_read_unlock();
bad_message:
+ trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ RX_PROTOCOL_ERROR, EBADMSG);
skb->priority = RX_PROTOCOL_ERROR;
+post_abort:
+ skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT;
+reject_packet:
+ trace_rxrpc_rx_done(skb->mark, skb->priority);
rxrpc_reject_packet(local, skb);
_leave(" [badmsg]");
}
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index c21ad213b337..7d4375e557e6 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -23,31 +23,36 @@ static int none_prime_packet_security(struct rxrpc_connection *conn)
}
static int none_secure_packet(struct rxrpc_call *call,
- struct sk_buff *skb,
- size_t data_size,
- void *sechdr)
+ struct sk_buff *skb,
+ size_t data_size,
+ void *sechdr)
{
return 0;
}
-static int none_verify_packet(struct rxrpc_call *call,
- struct sk_buff *skb,
- u32 *_abort_code)
+static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
+ unsigned int offset, unsigned int len,
+ rxrpc_seq_t seq, u16 expected_cksum)
{
return 0;
}
+static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
+ unsigned int *_offset, unsigned int *_len)
+{
+}
+
static int none_respond_to_challenge(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb,
+ u32 *_abort_code)
{
*_abort_code = RX_PROTOCOL_ERROR;
return -EPROTO;
}
static int none_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb,
+ u32 *_abort_code)
{
*_abort_code = RX_PROTOCOL_ERROR;
return -EPROTO;
@@ -78,6 +83,7 @@ const struct rxrpc_security rxrpc_no_security = {
.prime_packet_security = none_prime_packet_security,
.secure_packet = none_secure_packet,
.verify_packet = none_verify_packet,
+ .locate_data = none_locate_data,
.respond_to_challenge = none_respond_to_challenge,
.verify_response = none_verify_response,
.clear = none_clear,
diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c
index 31a3f86ef2f6..540d3955c1bc 100644
--- a/net/rxrpc/local_event.c
+++ b/net/rxrpc/local_event.c
@@ -15,8 +15,6 @@
#include <linux/net.h>
#include <linux/skbuff.h>
#include <linux/slab.h>
-#include <linux/udp.h>
-#include <linux/ip.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <generated/utsrelease.h>
@@ -33,7 +31,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
{
struct rxrpc_wire_header whdr;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct sockaddr_in sin;
+ struct sockaddr_rxrpc srx;
struct msghdr msg;
struct kvec iov[2];
size_t len;
@@ -41,12 +39,11 @@ static void rxrpc_send_version_request(struct rxrpc_local *local,
_enter("");
- sin.sin_family = AF_INET;
- sin.sin_port = udp_hdr(skb)->source;
- sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
+ if (rxrpc_extract_addr_from_skb(&srx, skb) < 0)
+ return;
- msg.msg_name = &sin;
- msg.msg_namelen = sizeof(sin);
+ msg.msg_name = &srx.transport;
+ msg.msg_namelen = srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
@@ -93,11 +90,13 @@ void rxrpc_process_local_events(struct rxrpc_local *local)
if (skb) {
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
_debug("{%d},{%u}", local->debug_id, sp->hdr.type);
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_VERSION:
- if (skb_copy_bits(skb, 0, &v, 1) < 0)
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &v, 1) < 0)
return;
_proto("Rx VERSION { %02x }", v);
if (v == 0)
@@ -109,7 +108,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local)
break;
}
- rxrpc_free_skb(skb);
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
}
_leave("");
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index a753796fbe8f..ff4864d550b8 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -58,6 +58,17 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
memcmp(&local->srx.transport.sin.sin_addr,
&srx->transport.sin.sin_addr,
sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ /* If the choice of UDP6 port is left up to the transport, then
+ * the endpoint record doesn't match.
+ */
+ return ((u16 __force)local->srx.transport.sin6.sin6_port -
+ (u16 __force)srx->transport.sin6.sin6_port) ?:
+ memcmp(&local->srx.transport.sin6.sin6_addr,
+ &srx->transport.sin6.sin6_addr,
+ sizeof(struct in6_addr));
+#endif
default:
BUG();
}
@@ -75,9 +86,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx)
atomic_set(&local->usage, 1);
INIT_LIST_HEAD(&local->link);
INIT_WORK(&local->processor, rxrpc_local_processor);
- INIT_LIST_HEAD(&local->services);
init_rwsem(&local->defrag_sem);
- skb_queue_head_init(&local->accept_queue);
skb_queue_head_init(&local->reject_queue);
skb_queue_head_init(&local->event_queue);
local->client_conns = RB_ROOT;
@@ -101,11 +110,12 @@ static int rxrpc_open_socket(struct rxrpc_local *local)
struct sock *sock;
int ret, opt;
- _enter("%p{%d}", local, local->srx.transport_type);
+ _enter("%p{%d,%d}",
+ local, local->srx.transport_type, local->srx.transport.family);
/* create a socket to represent the local endpoint */
- ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type,
- IPPROTO_UDP, &local->socket);
+ ret = sock_create_kern(&init_net, local->srx.transport.family,
+ local->srx.transport_type, 0, &local->socket);
if (ret < 0) {
_leave(" = %d [socket]", ret);
return ret;
@@ -170,18 +180,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
long diff;
int ret;
- if (srx->transport.family == AF_INET) {
- _enter("{%d,%u,%pI4+%hu}",
- srx->transport_type,
- srx->transport.family,
- &srx->transport.sin.sin_addr,
- ntohs(srx->transport.sin.sin_port));
- } else {
- _enter("{%d,%u}",
- srx->transport_type,
- srx->transport.family);
- return ERR_PTR(-EAFNOSUPPORT);
- }
+ _enter("{%d,%d,%pISp}",
+ srx->transport_type, srx->transport.family, &srx->transport);
mutex_lock(&rxrpc_local_mutex);
@@ -234,13 +234,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx)
found:
mutex_unlock(&rxrpc_local_mutex);
- _net("LOCAL %s %d {%d,%u,%pI4+%hu}",
- age,
- local->debug_id,
- local->srx.transport_type,
- local->srx.transport.family,
- &local->srx.transport.sin.sin_addr,
- ntohs(local->srx.transport.sin.sin_port));
+ _net("LOCAL %s %d {%pISp}",
+ age, local->debug_id, &local->srx.transport);
_leave(" = %p", local);
return local;
@@ -296,7 +291,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
mutex_unlock(&rxrpc_local_mutex);
ASSERT(RB_EMPTY_ROOT(&local->client_conns));
- ASSERT(list_empty(&local->services));
+ ASSERT(!local->service);
if (socket) {
local->socket = NULL;
@@ -308,7 +303,6 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local)
/* At this point, there should be no more packets coming in to the
* local endpoint.
*/
- rxrpc_purge_queue(&local->accept_queue);
rxrpc_purge_queue(&local->reject_queue);
rxrpc_purge_queue(&local->event_queue);
@@ -332,11 +326,6 @@ static void rxrpc_local_processor(struct work_struct *work)
if (atomic_read(&local->usage) == 0)
return rxrpc_local_destroyer(local);
- if (!skb_queue_empty(&local->accept_queue)) {
- rxrpc_accept_incoming_calls(local);
- again = true;
- }
-
if (!skb_queue_empty(&local->reject_queue)) {
rxrpc_reject_packets(local);
again = true;
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index bdc5e42fe600..9d1c721bc4e8 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -21,28 +21,33 @@
unsigned int rxrpc_max_backlog __read_mostly = 10;
/*
+ * Maximum lifetime of a call (in mx).
+ */
+unsigned int rxrpc_max_call_lifetime = 60 * 1000;
+
+/*
* How long to wait before scheduling ACK generation after seeing a
- * packet with RXRPC_REQUEST_ACK set (in jiffies).
+ * packet with RXRPC_REQUEST_ACK set (in ms).
*/
unsigned int rxrpc_requested_ack_delay = 1;
/*
- * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
+ * How long to wait before scheduling an ACK with subtype DELAY (in ms).
*
* We use this when we've received new data packets. If those packets aren't
* all consumed within this time we will send a DELAY ACK if an ACK was not
* requested to let the sender know it doesn't need to resend.
*/
-unsigned int rxrpc_soft_ack_delay = 1 * HZ;
+unsigned int rxrpc_soft_ack_delay = 1 * 1000;
/*
- * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
+ * How long to wait before scheduling an ACK with subtype IDLE (in ms).
*
* We use this when we've consumed some previously soft-ACK'd packets when
* further packets aren't immediately received to decide when to send an IDLE
* ACK let the other end know that it can free up its Tx buffer space.
*/
-unsigned int rxrpc_idle_ack_delay = 0.5 * HZ;
+unsigned int rxrpc_idle_ack_delay = 0.5 * 1000;
/*
* Receive window size in packets. This indicates the maximum number of
@@ -50,7 +55,10 @@ unsigned int rxrpc_idle_ack_delay = 0.5 * HZ;
* limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
* packets.
*/
-unsigned int rxrpc_rx_window_size = 32;
+unsigned int rxrpc_rx_window_size = RXRPC_INIT_RX_WINDOW_SIZE;
+#if (RXRPC_RXTX_BUFF_SIZE - 1) < RXRPC_INIT_RX_WINDOW_SIZE
+#error Need to reduce RXRPC_INIT_RX_WINDOW_SIZE
+#endif
/*
* Maximum Rx MTU size. This indicates to the sender the size of jumbo packet
@@ -64,6 +72,11 @@ unsigned int rxrpc_rx_mtu = 5692;
*/
unsigned int rxrpc_rx_jumbo_max = 4;
+/*
+ * Time till packet resend (in milliseconds).
+ */
+unsigned int rxrpc_resend_timeout = 4 * 1000;
+
const char *const rxrpc_pkts[] = {
"?00",
"DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
@@ -75,21 +88,152 @@ const s8 rxrpc_ack_priority[] = {
[RXRPC_ACK_DELAY] = 1,
[RXRPC_ACK_REQUESTED] = 2,
[RXRPC_ACK_IDLE] = 3,
- [RXRPC_ACK_PING_RESPONSE] = 4,
- [RXRPC_ACK_DUPLICATE] = 5,
- [RXRPC_ACK_OUT_OF_SEQUENCE] = 6,
- [RXRPC_ACK_EXCEEDS_WINDOW] = 7,
- [RXRPC_ACK_NOSPACE] = 8,
-};
-
-const char *rxrpc_acks(u8 reason)
-{
- static const char *const str[] = {
- "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY",
- "IDL", "-?-"
- };
-
- if (reason >= ARRAY_SIZE(str))
- reason = ARRAY_SIZE(str) - 1;
- return str[reason];
-}
+ [RXRPC_ACK_DUPLICATE] = 4,
+ [RXRPC_ACK_OUT_OF_SEQUENCE] = 5,
+ [RXRPC_ACK_EXCEEDS_WINDOW] = 6,
+ [RXRPC_ACK_NOSPACE] = 7,
+ [RXRPC_ACK_PING_RESPONSE] = 8,
+ [RXRPC_ACK_PING] = 9,
+};
+
+const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = {
+ "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY",
+ "IDL", "-?-"
+};
+
+const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = {
+ [rxrpc_skb_rx_cleaned] = "Rx CLN",
+ [rxrpc_skb_rx_freed] = "Rx FRE",
+ [rxrpc_skb_rx_got] = "Rx GOT",
+ [rxrpc_skb_rx_lost] = "Rx *L*",
+ [rxrpc_skb_rx_received] = "Rx RCV",
+ [rxrpc_skb_rx_purged] = "Rx PUR",
+ [rxrpc_skb_rx_rotated] = "Rx ROT",
+ [rxrpc_skb_rx_seen] = "Rx SEE",
+ [rxrpc_skb_tx_cleaned] = "Tx CLN",
+ [rxrpc_skb_tx_freed] = "Tx FRE",
+ [rxrpc_skb_tx_got] = "Tx GOT",
+ [rxrpc_skb_tx_new] = "Tx NEW",
+ [rxrpc_skb_tx_rotated] = "Tx ROT",
+ [rxrpc_skb_tx_seen] = "Tx SEE",
+};
+
+const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4] = {
+ [rxrpc_conn_new_client] = "NWc",
+ [rxrpc_conn_new_service] = "NWs",
+ [rxrpc_conn_queued] = "QUE",
+ [rxrpc_conn_seen] = "SEE",
+ [rxrpc_conn_got] = "GOT",
+ [rxrpc_conn_put_client] = "PTc",
+ [rxrpc_conn_put_service] = "PTs",
+};
+
+const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = {
+ [rxrpc_client_activate_chans] = "Activa",
+ [rxrpc_client_alloc] = "Alloc ",
+ [rxrpc_client_chan_activate] = "ChActv",
+ [rxrpc_client_chan_disconnect] = "ChDisc",
+ [rxrpc_client_chan_pass] = "ChPass",
+ [rxrpc_client_chan_unstarted] = "ChUnst",
+ [rxrpc_client_cleanup] = "Clean ",
+ [rxrpc_client_count] = "Count ",
+ [rxrpc_client_discard] = "Discar",
+ [rxrpc_client_duplicate] = "Duplic",
+ [rxrpc_client_exposed] = "Expose",
+ [rxrpc_client_replace] = "Replac",
+ [rxrpc_client_to_active] = "->Actv",
+ [rxrpc_client_to_culled] = "->Cull",
+ [rxrpc_client_to_idle] = "->Idle",
+ [rxrpc_client_to_inactive] = "->Inac",
+ [rxrpc_client_to_waiting] = "->Wait",
+ [rxrpc_client_uncount] = "Uncoun",
+};
+
+const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = {
+ [rxrpc_transmit_wait] = "WAI",
+ [rxrpc_transmit_queue] = "QUE",
+ [rxrpc_transmit_queue_last] = "QLS",
+ [rxrpc_transmit_rotate] = "ROT",
+ [rxrpc_transmit_rotate_last] = "RLS",
+ [rxrpc_transmit_await_reply] = "AWR",
+ [rxrpc_transmit_end] = "END",
+};
+
+const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4] = {
+ [rxrpc_receive_incoming] = "INC",
+ [rxrpc_receive_queue] = "QUE",
+ [rxrpc_receive_queue_last] = "QLS",
+ [rxrpc_receive_front] = "FRN",
+ [rxrpc_receive_rotate] = "ROT",
+ [rxrpc_receive_end] = "END",
+};
+
+const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = {
+ [rxrpc_recvmsg_enter] = "ENTR",
+ [rxrpc_recvmsg_wait] = "WAIT",
+ [rxrpc_recvmsg_dequeue] = "DEQU",
+ [rxrpc_recvmsg_hole] = "HOLE",
+ [rxrpc_recvmsg_next] = "NEXT",
+ [rxrpc_recvmsg_cont] = "CONT",
+ [rxrpc_recvmsg_full] = "FULL",
+ [rxrpc_recvmsg_data_return] = "DATA",
+ [rxrpc_recvmsg_terminal] = "TERM",
+ [rxrpc_recvmsg_to_be_accepted] = "TBAC",
+ [rxrpc_recvmsg_return] = "RETN",
+};
+
+const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = {
+ [rxrpc_rtt_tx_ping] = "PING",
+ [rxrpc_rtt_tx_data] = "DATA",
+};
+
+const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = {
+ [rxrpc_rtt_rx_ping_response] = "PONG",
+ [rxrpc_rtt_rx_requested_ack] = "RACK",
+};
+
+const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = {
+ [rxrpc_timer_begin] = "Begin ",
+ [rxrpc_timer_expired] = "*EXPR*",
+ [rxrpc_timer_init_for_reply] = "IniRpl",
+ [rxrpc_timer_set_for_ack] = "SetAck",
+ [rxrpc_timer_set_for_send] = "SetTx ",
+ [rxrpc_timer_set_for_resend] = "SetRTx",
+};
+
+const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = {
+ [rxrpc_propose_ack_client_tx_end] = "ClTxEnd",
+ [rxrpc_propose_ack_input_data] = "DataIn ",
+ [rxrpc_propose_ack_ping_for_lost_ack] = "LostAck",
+ [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl",
+ [rxrpc_propose_ack_ping_for_params] = "Params ",
+ [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack",
+ [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png",
+ [rxrpc_propose_ack_retry_tx] = "RetryTx",
+ [rxrpc_propose_ack_rotate_rx] = "RxAck ",
+ [rxrpc_propose_ack_terminal_ack] = "ClTerm ",
+};
+
+const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes] = {
+ [rxrpc_propose_ack_use] = "",
+ [rxrpc_propose_ack_update] = " Update",
+ [rxrpc_propose_ack_subsume] = " Subsume",
+};
+
+const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10] = {
+ [RXRPC_CALL_SLOW_START] = "SlowStart",
+ [RXRPC_CALL_CONGEST_AVOIDANCE] = "CongAvoid",
+ [RXRPC_CALL_PACKET_LOSS] = "PktLoss ",
+ [RXRPC_CALL_FAST_RETRANSMIT] = "FastReTx ",
+};
+
+const char rxrpc_congest_changes[rxrpc_congest__nr_change][9] = {
+ [rxrpc_cong_begin_retransmission] = " Retrans",
+ [rxrpc_cong_cleared_nacks] = " Cleared",
+ [rxrpc_cong_new_low_nack] = " NewLowN",
+ [rxrpc_cong_no_change] = "",
+ [rxrpc_cong_progress] = " Progres",
+ [rxrpc_cong_retransmit_again] = " ReTxAgn",
+ [rxrpc_cong_rtt_window_end] = " RttWinE",
+ [rxrpc_cong_saw_nack] = " SawNack",
+};
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f4bda06b7d2d..0d47db886f6e 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -14,336 +14,326 @@
#include <linux/net.h>
#include <linux/gfp.h>
#include <linux/skbuff.h>
-#include <linux/circ_buf.h>
#include <linux/export.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-/*
- * Time till packet resend (in jiffies).
- */
-unsigned int rxrpc_resend_timeout = 4 * HZ;
-
-static int rxrpc_send_data(struct rxrpc_sock *rx,
- struct rxrpc_call *call,
- struct msghdr *msg, size_t len);
+struct rxrpc_pkt_buffer {
+ struct rxrpc_wire_header whdr;
+ union {
+ struct {
+ struct rxrpc_ackpacket ack;
+ u8 acks[255];
+ u8 pad[3];
+ };
+ __be32 abort_code;
+ };
+ struct rxrpc_ackinfo ackinfo;
+};
/*
- * extract control messages from the sendmsg() control buffer
+ * Fill out an ACK packet.
*/
-static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
- unsigned long *user_call_ID,
- enum rxrpc_command *command,
- u32 *abort_code,
- bool *_exclusive)
+static size_t rxrpc_fill_out_ack(struct rxrpc_call *call,
+ struct rxrpc_pkt_buffer *pkt,
+ rxrpc_seq_t *_hard_ack,
+ rxrpc_seq_t *_top)
{
- struct cmsghdr *cmsg;
- bool got_user_ID = false;
- int len;
-
- *command = RXRPC_CMD_SEND_DATA;
-
- if (msg->msg_controllen == 0)
- return -EINVAL;
-
- for_each_cmsghdr(cmsg, msg) {
- if (!CMSG_OK(msg, cmsg))
- return -EINVAL;
-
- len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
- _debug("CMSG %d, %d, %d",
- cmsg->cmsg_level, cmsg->cmsg_type, len);
-
- if (cmsg->cmsg_level != SOL_RXRPC)
- continue;
-
- switch (cmsg->cmsg_type) {
- case RXRPC_USER_CALL_ID:
- if (msg->msg_flags & MSG_CMSG_COMPAT) {
- if (len != sizeof(u32))
- return -EINVAL;
- *user_call_ID = *(u32 *) CMSG_DATA(cmsg);
- } else {
- if (len != sizeof(unsigned long))
- return -EINVAL;
- *user_call_ID = *(unsigned long *)
- CMSG_DATA(cmsg);
- }
- _debug("User Call ID %lx", *user_call_ID);
- got_user_ID = true;
- break;
-
- case RXRPC_ABORT:
- if (*command != RXRPC_CMD_SEND_DATA)
- return -EINVAL;
- *command = RXRPC_CMD_SEND_ABORT;
- if (len != sizeof(*abort_code))
- return -EINVAL;
- *abort_code = *(unsigned int *) CMSG_DATA(cmsg);
- _debug("Abort %x", *abort_code);
- if (*abort_code == 0)
- return -EINVAL;
- break;
-
- case RXRPC_ACCEPT:
- if (*command != RXRPC_CMD_SEND_DATA)
- return -EINVAL;
- *command = RXRPC_CMD_ACCEPT;
- if (len != 0)
- return -EINVAL;
- break;
-
- case RXRPC_EXCLUSIVE_CALL:
- *_exclusive = true;
- if (len != 0)
- return -EINVAL;
- break;
- default:
- return -EINVAL;
- }
+ rxrpc_serial_t serial;
+ rxrpc_seq_t hard_ack, top, seq;
+ int ix;
+ u32 mtu, jmax;
+ u8 *ackp = pkt->acks;
+
+ /* Barrier against rxrpc_input_data(). */
+ serial = call->ackr_serial;
+ hard_ack = READ_ONCE(call->rx_hard_ack);
+ top = smp_load_acquire(&call->rx_top);
+ *_hard_ack = hard_ack;
+ *_top = top;
+
+ pkt->ack.bufferSpace = htons(8);
+ pkt->ack.maxSkew = htons(call->ackr_skew);
+ pkt->ack.firstPacket = htonl(hard_ack + 1);
+ pkt->ack.previousPacket = htonl(call->ackr_prev_seq);
+ pkt->ack.serial = htonl(serial);
+ pkt->ack.reason = call->ackr_reason;
+ pkt->ack.nAcks = top - hard_ack;
+
+ if (pkt->ack.reason == RXRPC_ACK_PING)
+ pkt->whdr.flags |= RXRPC_REQUEST_ACK;
+
+ if (after(top, hard_ack)) {
+ seq = hard_ack + 1;
+ do {
+ ix = seq & RXRPC_RXTX_BUFF_MASK;
+ if (call->rxtx_buffer[ix])
+ *ackp++ = RXRPC_ACK_TYPE_ACK;
+ else
+ *ackp++ = RXRPC_ACK_TYPE_NACK;
+ seq++;
+ } while (before_eq(seq, top));
}
- if (!got_user_ID)
- return -EINVAL;
- _leave(" = 0");
- return 0;
+ mtu = call->conn->params.peer->if_mtu;
+ mtu -= call->conn->params.peer->hdrsize;
+ jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max;
+ pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
+ pkt->ackinfo.maxMTU = htonl(mtu);
+ pkt->ackinfo.rwind = htonl(call->rx_winsize);
+ pkt->ackinfo.jumbo_max = htonl(jmax);
+
+ *ackp++ = 0;
+ *ackp++ = 0;
+ *ackp++ = 0;
+ return top - hard_ack + 3;
}
/*
- * abort a call, sending an ABORT packet to the peer
+ * Send an ACK or ABORT call packet.
*/
-static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code)
+int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type)
{
- write_lock_bh(&call->state_lock);
-
- if (call->state <= RXRPC_CALL_COMPLETE) {
- call->state = RXRPC_CALL_LOCALLY_ABORTED;
- call->local_abort = abort_code;
- set_bit(RXRPC_CALL_EV_ABORT, &call->events);
- del_timer_sync(&call->resend_timer);
- del_timer_sync(&call->ack_timer);
- clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events);
- clear_bit(RXRPC_CALL_EV_ACK, &call->events);
- clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
- rxrpc_queue_call(call);
+ struct rxrpc_connection *conn = NULL;
+ struct rxrpc_pkt_buffer *pkt;
+ struct msghdr msg;
+ struct kvec iov[2];
+ rxrpc_serial_t serial;
+ rxrpc_seq_t hard_ack, top;
+ size_t len, n;
+ bool ping = false;
+ int ioc, ret;
+ u32 abort_code;
+
+ _enter("%u,%s", call->debug_id, rxrpc_pkts[type]);
+
+ spin_lock_bh(&call->lock);
+ if (call->conn)
+ conn = rxrpc_get_connection_maybe(call->conn);
+ spin_unlock_bh(&call->lock);
+ if (!conn)
+ return -ECONNRESET;
+
+ pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
+ if (!pkt) {
+ rxrpc_put_connection(conn);
+ return -ENOMEM;
}
- write_unlock_bh(&call->state_lock);
-}
-
-/*
- * Create a new client call for sendmsg().
- */
-static struct rxrpc_call *
-rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
- unsigned long user_call_ID, bool exclusive)
-{
- struct rxrpc_conn_parameters cp;
- struct rxrpc_call *call;
- struct key *key;
-
- DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name);
+ msg.msg_name = &call->peer->srx.transport;
+ msg.msg_namelen = call->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ pkt->whdr.epoch = htonl(conn->proto.epoch);
+ pkt->whdr.cid = htonl(call->cid);
+ pkt->whdr.callNumber = htonl(call->call_id);
+ pkt->whdr.seq = 0;
+ pkt->whdr.type = type;
+ pkt->whdr.flags = conn->out_clientflag;
+ pkt->whdr.userStatus = 0;
+ pkt->whdr.securityIndex = call->security_ix;
+ pkt->whdr._rsvd = 0;
+ pkt->whdr.serviceId = htons(call->service_id);
+
+ iov[0].iov_base = pkt;
+ iov[0].iov_len = sizeof(pkt->whdr);
+ len = sizeof(pkt->whdr);
+
+ switch (type) {
+ case RXRPC_PACKET_TYPE_ACK:
+ spin_lock_bh(&call->lock);
+ if (!call->ackr_reason) {
+ spin_unlock_bh(&call->lock);
+ ret = 0;
+ goto out;
+ }
+ ping = (call->ackr_reason == RXRPC_ACK_PING);
+ n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top);
+ call->ackr_reason = 0;
- _enter("");
+ spin_unlock_bh(&call->lock);
- if (!msg->msg_name)
- return ERR_PTR(-EDESTADDRREQ);
- key = rx->key;
- if (key && !rx->key->payload.data[0])
- key = NULL;
+ pkt->whdr.flags |= RXRPC_SLOW_START_OK;
- memset(&cp, 0, sizeof(cp));
- cp.local = rx->local;
- cp.key = rx->key;
- cp.security_level = rx->min_sec_level;
- cp.exclusive = rx->exclusive | exclusive;
- cp.service_id = srx->srx_service;
- call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
+ iov[0].iov_len += sizeof(pkt->ack) + n;
+ iov[1].iov_base = &pkt->ackinfo;
+ iov[1].iov_len = sizeof(pkt->ackinfo);
+ len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo);
+ ioc = 2;
+ break;
- _leave(" = %p\n", call);
- return call;
-}
+ case RXRPC_PACKET_TYPE_ABORT:
+ abort_code = call->abort_code;
+ pkt->abort_code = htonl(abort_code);
+ iov[0].iov_len += sizeof(pkt->abort_code);
+ len += sizeof(pkt->abort_code);
+ ioc = 1;
+ break;
-/*
- * send a message forming part of a client call through an RxRPC socket
- * - caller holds the socket locked
- * - the socket may be either a client socket or a server socket
- */
-int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
-{
- enum rxrpc_command cmd;
- struct rxrpc_call *call;
- unsigned long user_call_ID = 0;
- bool exclusive = false;
- u32 abort_code = 0;
- int ret;
-
- _enter("");
-
- ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
- &exclusive);
- if (ret < 0)
- return ret;
-
- if (cmd == RXRPC_CMD_ACCEPT) {
- if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
- return -EINVAL;
- call = rxrpc_accept_call(rx, user_call_ID);
- if (IS_ERR(call))
- return PTR_ERR(call);
- rxrpc_put_call(call);
- return 0;
+ default:
+ BUG();
+ ret = -ENOANO;
+ goto out;
}
- call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
- if (!call) {
- if (cmd != RXRPC_CMD_SEND_DATA)
- return -EBADSLT;
- call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
- exclusive);
- if (IS_ERR(call))
- return PTR_ERR(call);
+ serial = atomic_inc_return(&conn->serial);
+ pkt->whdr.serial = htonl(serial);
+ switch (type) {
+ case RXRPC_PACKET_TYPE_ACK:
+ trace_rxrpc_tx_ack(call, serial,
+ ntohl(pkt->ack.firstPacket),
+ ntohl(pkt->ack.serial),
+ pkt->ack.reason, pkt->ack.nAcks);
+ break;
}
- _debug("CALL %d USR %lx ST %d on CONN %p",
- call->debug_id, call->user_call_ID, call->state, call->conn);
-
- if (call->state >= RXRPC_CALL_COMPLETE) {
- /* it's too late for this call */
- ret = -ECONNRESET;
- } else if (cmd == RXRPC_CMD_SEND_ABORT) {
- rxrpc_send_abort(call, abort_code);
- ret = 0;
- } else if (cmd != RXRPC_CMD_SEND_DATA) {
- ret = -EINVAL;
- } else if (!call->in_clientflag &&
- call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
- /* request phase complete for this client call */
- ret = -EPROTO;
- } else if (call->in_clientflag &&
- call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
- /* Reply phase not begun or not complete for service call. */
- ret = -EPROTO;
- } else {
- ret = rxrpc_send_data(rx, call, msg, len);
+ if (ping) {
+ call->ackr_ping = serial;
+ smp_wmb();
+ /* We need to stick a time in before we send the packet in case
+ * the reply gets back before kernel_sendmsg() completes - but
+ * asking UDP to send the packet can take a relatively long
+ * time, so we update the time after, on the assumption that
+ * the packet transmission is more likely to happen towards the
+ * end of the kernel_sendmsg() call.
+ */
+ call->ackr_ping_time = ktime_get_real();
+ set_bit(RXRPC_CALL_PINGING, &call->flags);
+ trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial);
}
-
- rxrpc_put_call(call);
- _leave(" = %d", ret);
- return ret;
-}
-
-/**
- * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
- * @call: The call to send data through
- * @msg: The data to send
- * @len: The amount of data to send
- *
- * Allow a kernel service to send data on a call. The call must be in an state
- * appropriate to sending data. No control data should be supplied in @msg,
- * nor should an address be supplied. MSG_MORE should be flagged if there's
- * more data to come, otherwise this data will end the transmission phase.
- */
-int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg,
- size_t len)
-{
- int ret;
-
- _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
-
- ASSERTCMP(msg->msg_name, ==, NULL);
- ASSERTCMP(msg->msg_control, ==, NULL);
-
- lock_sock(&call->socket->sk);
-
- _debug("CALL %d USR %lx ST %d on CONN %p",
- call->debug_id, call->user_call_ID, call->state, call->conn);
-
- if (call->state >= RXRPC_CALL_COMPLETE) {
- ret = -ESHUTDOWN; /* it's too late for this call */
- } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
- call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
- ret = -EPROTO; /* request phase complete for this client call */
- } else {
- ret = rxrpc_send_data(call->socket, call, msg, len);
+ ret = kernel_sendmsg(conn->params.local->socket,
+ &msg, iov, ioc, len);
+ if (ping)
+ call->ackr_ping_time = ktime_get_real();
+
+ if (type == RXRPC_PACKET_TYPE_ACK &&
+ call->state < RXRPC_CALL_COMPLETE) {
+ if (ret < 0) {
+ clear_bit(RXRPC_CALL_PINGING, &call->flags);
+ rxrpc_propose_ACK(call, pkt->ack.reason,
+ ntohs(pkt->ack.maxSkew),
+ ntohl(pkt->ack.serial),
+ true, true,
+ rxrpc_propose_ack_retry_tx);
+ } else {
+ spin_lock_bh(&call->lock);
+ if (after(hard_ack, call->ackr_consumed))
+ call->ackr_consumed = hard_ack;
+ if (after(top, call->ackr_seen))
+ call->ackr_seen = top;
+ spin_unlock_bh(&call->lock);
+ }
}
- release_sock(&call->socket->sk);
- _leave(" = %d", ret);
+out:
+ rxrpc_put_connection(conn);
+ kfree(pkt);
return ret;
}
-EXPORT_SYMBOL(rxrpc_kernel_send_data);
-
-/**
- * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
- * @call: The call to be aborted
- * @abort_code: The abort code to stick into the ABORT packet
- *
- * Allow a kernel service to abort a call, if it's still in an abortable state.
- */
-void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code)
-{
- _enter("{%d},%d", call->debug_id, abort_code);
-
- lock_sock(&call->socket->sk);
-
- _debug("CALL %d USR %lx ST %d on CONN %p",
- call->debug_id, call->user_call_ID, call->state, call->conn);
-
- if (call->state < RXRPC_CALL_COMPLETE)
- rxrpc_send_abort(call, abort_code);
-
- release_sock(&call->socket->sk);
- _leave("");
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_abort_call);
-
/*
* send a packet through the transport endpoint
*/
-int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
+int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
+ bool retrans)
{
- struct kvec iov[1];
+ struct rxrpc_connection *conn = call->conn;
+ struct rxrpc_wire_header whdr;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct msghdr msg;
+ struct kvec iov[2];
+ rxrpc_serial_t serial;
+ size_t len;
+ bool lost = false;
int ret, opt;
_enter(",{%d}", skb->len);
- iov[0].iov_base = skb->head;
- iov[0].iov_len = skb->len;
+ /* Each transmission of a Tx packet needs a new serial number */
+ serial = atomic_inc_return(&conn->serial);
+
+ whdr.epoch = htonl(conn->proto.epoch);
+ whdr.cid = htonl(call->cid);
+ whdr.callNumber = htonl(call->call_id);
+ whdr.seq = htonl(sp->hdr.seq);
+ whdr.serial = htonl(serial);
+ whdr.type = RXRPC_PACKET_TYPE_DATA;
+ whdr.flags = sp->hdr.flags;
+ whdr.userStatus = 0;
+ whdr.securityIndex = call->security_ix;
+ whdr._rsvd = htons(sp->hdr._rsvd);
+ whdr.serviceId = htons(call->service_id);
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = skb->head;
+ iov[1].iov_len = skb->len;
+ len = iov[0].iov_len + iov[1].iov_len;
- msg.msg_name = &conn->params.peer->srx.transport;
- msg.msg_namelen = conn->params.peer->srx.transport_len;
+ msg.msg_name = &call->peer->srx.transport;
+ msg.msg_namelen = call->peer->srx.transport_len;
msg.msg_control = NULL;
msg.msg_controllen = 0;
msg.msg_flags = 0;
- /* send the packet with the don't fragment bit set if we currently
- * think it's small enough */
- if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) {
- down_read(&conn->params.local->defrag_sem);
- /* send the packet by UDP
- * - returns -EMSGSIZE if UDP would have to fragment the packet
- * to go out of the interface
- * - in which case, we'll have processed the ICMP error
- * message and update the peer record
- */
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1,
- iov[0].iov_len);
+ /* If our RTT cache needs working on, request an ACK. Also request
+ * ACKs if a DATA packet appears to have been lost.
+ */
+ if (retrans ||
+ call->cong_mode == RXRPC_CALL_SLOW_START ||
+ (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) ||
+ ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ ktime_get_real()))
+ whdr.flags |= RXRPC_REQUEST_ACK;
+
+ if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
+ static int lose;
+ if ((lose++ & 7) == 7) {
+ ret = 0;
+ lost = true;
+ goto done;
+ }
+ }
- up_read(&conn->params.local->defrag_sem);
- if (ret == -EMSGSIZE)
- goto send_fragmentable;
+ _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq);
- _leave(" = %d [%u]", ret, conn->params.peer->maxdata);
- return ret;
+ /* send the packet with the don't fragment bit set if we currently
+ * think it's small enough */
+ if (iov[1].iov_len >= call->peer->maxdata)
+ goto send_fragmentable;
+
+ down_read(&conn->params.local->defrag_sem);
+ /* send the packet by UDP
+ * - returns -EMSGSIZE if UDP would have to fragment the packet
+ * to go out of the interface
+ * - in which case, we'll have processed the ICMP error
+ * message and update the peer record
+ */
+ ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len);
+
+ up_read(&conn->params.local->defrag_sem);
+ if (ret == -EMSGSIZE)
+ goto send_fragmentable;
+
+done:
+ trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags,
+ retrans, lost);
+ if (ret >= 0) {
+ ktime_t now = ktime_get_real();
+ skb->tstamp = now;
+ smp_wmb();
+ sp->hdr.serial = serial;
+ if (whdr.flags & RXRPC_REQUEST_ACK) {
+ call->peer->rtt_last_req = now;
+ trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial);
+ }
}
+ _leave(" = %d [%u]", ret, call->peer->maxdata);
+ return ret;
send_fragmentable:
/* attempt to send this message with fragmentation enabled */
@@ -358,8 +348,8 @@ send_fragmentable:
SOL_IP, IP_MTU_DISCOVER,
(char *)&opt, sizeof(opt));
if (ret == 0) {
- ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1,
- iov[0].iov_len);
+ ret = kernel_sendmsg(conn->params.local->socket, &msg,
+ iov, 2, len);
opt = IP_PMTUDISC_DO;
kernel_setsockopt(conn->params.local->socket, SOL_IP,
@@ -367,355 +357,82 @@ send_fragmentable:
(char *)&opt, sizeof(opt));
}
break;
- }
-
- up_write(&conn->params.local->defrag_sem);
- _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata);
- return ret;
-}
-/*
- * wait for space to appear in the transmit/ACK window
- * - caller holds the socket locked
- */
-static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
- struct rxrpc_call *call,
- long *timeo)
-{
- DECLARE_WAITQUEUE(myself, current);
- int ret;
-
- _enter(",{%d},%ld",
- CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail),
- call->acks_winsz),
- *timeo);
-
- add_wait_queue(&call->tx_waitq, &myself);
-
- for (;;) {
- set_current_state(TASK_INTERRUPTIBLE);
- ret = 0;
- if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail),
- call->acks_winsz) > 0)
- break;
- if (signal_pending(current)) {
- ret = sock_intr_errno(*timeo);
- break;
- }
-
- release_sock(&rx->sk);
- *timeo = schedule_timeout(*timeo);
- lock_sock(&rx->sk);
- }
-
- remove_wait_queue(&call->tx_waitq, &myself);
- set_current_state(TASK_RUNNING);
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
- * attempt to schedule an instant Tx resend
- */
-static inline void rxrpc_instant_resend(struct rxrpc_call *call)
-{
- read_lock_bh(&call->state_lock);
- if (try_to_del_timer_sync(&call->resend_timer) >= 0) {
- clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags);
- if (call->state < RXRPC_CALL_COMPLETE &&
- !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events))
- rxrpc_queue_call(call);
- }
- read_unlock_bh(&call->state_lock);
-}
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ opt = IPV6_PMTUDISC_DONT;
+ ret = kernel_setsockopt(conn->params.local->socket,
+ SOL_IPV6, IPV6_MTU_DISCOVER,
+ (char *)&opt, sizeof(opt));
+ if (ret == 0) {
+ ret = kernel_sendmsg(conn->params.local->socket, &msg,
+ iov, 1, iov[0].iov_len);
-/*
- * queue a packet for transmission, set the resend timer and attempt
- * to send the packet immediately
- */
-static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
- bool last)
-{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- int ret;
-
- _net("queue skb %p [%d]", skb, call->acks_head);
-
- ASSERT(call->acks_window != NULL);
- call->acks_window[call->acks_head] = (unsigned long) skb;
- smp_wmb();
- call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1);
-
- if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
- _debug("________awaiting reply/ACK__________");
- write_lock_bh(&call->state_lock);
- switch (call->state) {
- case RXRPC_CALL_CLIENT_SEND_REQUEST:
- call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
- break;
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- call->state = RXRPC_CALL_SERVER_SEND_REPLY;
- if (!last)
- break;
- case RXRPC_CALL_SERVER_SEND_REPLY:
- call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
- break;
- default:
- break;
+ opt = IPV6_PMTUDISC_DO;
+ kernel_setsockopt(conn->params.local->socket,
+ SOL_IPV6, IPV6_MTU_DISCOVER,
+ (char *)&opt, sizeof(opt));
}
- write_unlock_bh(&call->state_lock);
- }
-
- _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq);
-
- sp->need_resend = false;
- sp->resend_at = jiffies + rxrpc_resend_timeout;
- if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
- _debug("run timer");
- call->resend_timer.expires = sp->resend_at;
- add_timer(&call->resend_timer);
- }
-
- /* attempt to cancel the rx-ACK timer, deferring reply transmission if
- * we're ACK'ing the request phase of an incoming call */
- ret = -EAGAIN;
- if (try_to_del_timer_sync(&call->ack_timer) >= 0) {
- /* the packet may be freed by rxrpc_process_call() before this
- * returns */
- ret = rxrpc_send_data_packet(call->conn, skb);
- _net("sent skb %p", skb);
- } else {
- _debug("failed to delete ACK timer");
- }
-
- if (ret < 0) {
- _debug("need instant resend %d", ret);
- sp->need_resend = true;
- rxrpc_instant_resend(call);
+ break;
+#endif
}
- _leave("");
-}
-
-/*
- * Convert a host-endian header into a network-endian header.
- */
-static void rxrpc_insert_header(struct sk_buff *skb)
-{
- struct rxrpc_wire_header whdr;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- whdr.epoch = htonl(sp->hdr.epoch);
- whdr.cid = htonl(sp->hdr.cid);
- whdr.callNumber = htonl(sp->hdr.callNumber);
- whdr.seq = htonl(sp->hdr.seq);
- whdr.serial = htonl(sp->hdr.serial);
- whdr.type = sp->hdr.type;
- whdr.flags = sp->hdr.flags;
- whdr.userStatus = sp->hdr.userStatus;
- whdr.securityIndex = sp->hdr.securityIndex;
- whdr._rsvd = htons(sp->hdr._rsvd);
- whdr.serviceId = htons(sp->hdr.serviceId);
-
- memcpy(skb->head, &whdr, sizeof(whdr));
+ up_write(&conn->params.local->defrag_sem);
+ goto done;
}
/*
- * send data through a socket
- * - must be called in process context
- * - caller holds the socket locked
+ * reject packets through the local endpoint
*/
-static int rxrpc_send_data(struct rxrpc_sock *rx,
- struct rxrpc_call *call,
- struct msghdr *msg, size_t len)
+void rxrpc_reject_packets(struct rxrpc_local *local)
{
+ struct sockaddr_rxrpc srx;
struct rxrpc_skb_priv *sp;
+ struct rxrpc_wire_header whdr;
struct sk_buff *skb;
- struct sock *sk = &rx->sk;
- long timeo;
- bool more;
- int ret, copied;
-
- timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
-
- /* this should be in poll */
- sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
-
- if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
- return -EPIPE;
-
- more = msg->msg_flags & MSG_MORE;
-
- skb = call->tx_pending;
- call->tx_pending = NULL;
-
- copied = 0;
- do {
- if (!skb) {
- size_t size, chunk, max, space;
-
- _debug("alloc");
-
- if (CIRC_SPACE(call->acks_head,
- ACCESS_ONCE(call->acks_tail),
- call->acks_winsz) <= 0) {
- ret = -EAGAIN;
- if (msg->msg_flags & MSG_DONTWAIT)
- goto maybe_error;
- ret = rxrpc_wait_for_tx_window(rx, call,
- &timeo);
- if (ret < 0)
- goto maybe_error;
- }
-
- max = call->conn->params.peer->maxdata;
- max -= call->conn->security_size;
- max &= ~(call->conn->size_align - 1UL);
-
- chunk = max;
- if (chunk > msg_data_left(msg) && !more)
- chunk = msg_data_left(msg);
-
- space = chunk + call->conn->size_align;
- space &= ~(call->conn->size_align - 1UL);
-
- size = space + call->conn->header_size;
-
- _debug("SIZE: %zu/%zu/%zu", chunk, space, size);
-
- /* create a buffer that we can retain until it's ACK'd */
- skb = sock_alloc_send_skb(
- sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
- if (!skb)
- goto maybe_error;
-
- rxrpc_new_skb(skb);
-
- _debug("ALLOC SEND %p", skb);
-
- ASSERTCMP(skb->mark, ==, 0);
+ struct msghdr msg;
+ struct kvec iov[2];
+ size_t size;
+ __be32 code;
- _debug("HS: %u", call->conn->header_size);
- skb_reserve(skb, call->conn->header_size);
- skb->len += call->conn->header_size;
+ _enter("%d", local->debug_id);
- sp = rxrpc_skb(skb);
- sp->remain = chunk;
- if (sp->remain > skb_tailroom(skb))
- sp->remain = skb_tailroom(skb);
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = &code;
+ iov[1].iov_len = sizeof(code);
+ size = sizeof(whdr) + sizeof(code);
- _net("skb: hr %d, tr %d, hl %d, rm %d",
- skb_headroom(skb),
- skb_tailroom(skb),
- skb_headlen(skb),
- sp->remain);
+ msg.msg_name = &srx.transport;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
- skb->ip_summed = CHECKSUM_UNNECESSARY;
- }
+ memset(&whdr, 0, sizeof(whdr));
+ whdr.type = RXRPC_PACKET_TYPE_ABORT;
- _debug("append");
+ while ((skb = skb_dequeue(&local->reject_queue))) {
+ rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
sp = rxrpc_skb(skb);
- /* append next segment of data to the current buffer */
- if (msg_data_left(msg) > 0) {
- int copy = skb_tailroom(skb);
- ASSERTCMP(copy, >, 0);
- if (copy > msg_data_left(msg))
- copy = msg_data_left(msg);
- if (copy > sp->remain)
- copy = sp->remain;
-
- _debug("add");
- ret = skb_add_data(skb, &msg->msg_iter, copy);
- _debug("added");
- if (ret < 0)
- goto efault;
- sp->remain -= copy;
- skb->mark += copy;
- copied += copy;
- }
+ if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) {
+ msg.msg_namelen = srx.transport_len;
- /* check for the far side aborting the call or a network error
- * occurring */
- if (call->state > RXRPC_CALL_COMPLETE)
- goto call_aborted;
-
- /* add the packet to the send queue if it's now full */
- if (sp->remain <= 0 ||
- (msg_data_left(msg) == 0 && !more)) {
- struct rxrpc_connection *conn = call->conn;
- uint32_t seq;
- size_t pad;
-
- /* pad out if we're using security */
- if (conn->security_ix) {
- pad = conn->security_size + skb->mark;
- pad = conn->size_align - pad;
- pad &= conn->size_align - 1;
- _debug("pad %zu", pad);
- if (pad)
- memset(skb_put(skb, pad), 0, pad);
- }
-
- seq = atomic_inc_return(&call->sequence);
-
- sp->hdr.epoch = conn->proto.epoch;
- sp->hdr.cid = call->cid;
- sp->hdr.callNumber = call->call_id;
- sp->hdr.seq = seq;
- sp->hdr.serial = atomic_inc_return(&conn->serial);
- sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
- sp->hdr.userStatus = 0;
- sp->hdr.securityIndex = conn->security_ix;
- sp->hdr._rsvd = 0;
- sp->hdr.serviceId = call->service_id;
-
- sp->hdr.flags = conn->out_clientflag;
- if (msg_data_left(msg) == 0 && !more)
- sp->hdr.flags |= RXRPC_LAST_PACKET;
- else if (CIRC_SPACE(call->acks_head,
- ACCESS_ONCE(call->acks_tail),
- call->acks_winsz) > 1)
- sp->hdr.flags |= RXRPC_MORE_PACKETS;
- if (more && seq & 1)
- sp->hdr.flags |= RXRPC_REQUEST_ACK;
-
- ret = conn->security->secure_packet(
- call, skb, skb->mark,
- skb->head + sizeof(struct rxrpc_wire_header));
- if (ret < 0)
- goto out;
-
- rxrpc_insert_header(skb);
- rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
- skb = NULL;
- }
- } while (msg_data_left(msg) > 0);
+ code = htonl(skb->priority);
-success:
- ret = copied;
-out:
- call->tx_pending = skb;
- _leave(" = %d", ret);
- return ret;
+ whdr.epoch = htonl(sp->hdr.epoch);
+ whdr.cid = htonl(sp->hdr.cid);
+ whdr.callNumber = htonl(sp->hdr.callNumber);
+ whdr.serviceId = htons(sp->hdr.serviceId);
+ whdr.flags = sp->hdr.flags;
+ whdr.flags ^= RXRPC_CLIENT_INITIATED;
+ whdr.flags &= RXRPC_CLIENT_INITIATED;
-call_aborted:
- rxrpc_free_skb(skb);
- if (call->state == RXRPC_CALL_NETWORK_ERROR)
- ret = call->error_report < RXRPC_LOCAL_ERROR_OFFSET ?
- call->error_report :
- call->error_report - RXRPC_LOCAL_ERROR_OFFSET;
- else
- ret = -ECONNABORTED;
- _leave(" = %d", ret);
- return ret;
+ kernel_sendmsg(local->socket, &msg, iov, 2, size);
+ }
-maybe_error:
- if (copied)
- goto success;
- goto out;
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+ }
-efault:
- ret = -EFAULT;
- goto out;
+ _leave("");
}
diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c
index 8940674b5e08..bf13b8470c9a 100644
--- a/net/rxrpc/peer_event.c
+++ b/net/rxrpc/peer_event.c
@@ -66,6 +66,32 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local,
}
break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ srx.transport.sin6.sin6_port = serr->port;
+ srx.transport_len = sizeof(struct sockaddr_in6);
+ switch (serr->ee.ee_origin) {
+ case SO_EE_ORIGIN_ICMP6:
+ _net("Rx ICMP6");
+ memcpy(&srx.transport.sin6.sin6_addr,
+ skb_network_header(skb) + serr->addr_offset,
+ sizeof(struct in6_addr));
+ break;
+ case SO_EE_ORIGIN_ICMP:
+ _net("Rx ICMP on v6 sock");
+ memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12,
+ skb_network_header(skb) + serr->addr_offset,
+ sizeof(struct in_addr));
+ break;
+ default:
+ memcpy(&srx.transport.sin6.sin6_addr,
+ &ipv6_hdr(skb)->saddr,
+ sizeof(struct in6_addr));
+ break;
+ }
+ break;
+#endif
+
default:
BUG();
}
@@ -129,22 +155,21 @@ void rxrpc_error_report(struct sock *sk)
_leave("UDP socket errqueue empty");
return;
}
+ rxrpc_new_skb(skb, rxrpc_skb_rx_received);
serr = SKB_EXT_ERR(skb);
if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) {
_leave("UDP empty message");
- kfree_skb(skb);
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
return;
}
- rxrpc_new_skb(skb);
-
rcu_read_lock();
peer = rxrpc_lookup_peer_icmp_rcu(local, skb);
if (peer && !rxrpc_get_peer_maybe(peer))
peer = NULL;
if (!peer) {
rcu_read_unlock();
- rxrpc_free_skb(skb);
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
_leave(" [no peer]");
return;
}
@@ -154,7 +179,7 @@ void rxrpc_error_report(struct sock *sk)
serr->ee.ee_code == ICMP_FRAG_NEEDED)) {
rxrpc_adjust_mtu(peer, serr);
rcu_read_unlock();
- rxrpc_free_skb(skb);
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
rxrpc_put_peer(peer);
_leave(" [MTU update]");
return;
@@ -162,7 +187,7 @@ void rxrpc_error_report(struct sock *sk)
rxrpc_store_error(peer, serr);
rcu_read_unlock();
- rxrpc_free_skb(skb);
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
/* The ref we obtained is passed off to the work item */
rxrpc_queue_work(&peer->error_distributor);
@@ -248,13 +273,20 @@ void rxrpc_peer_error_distributor(struct work_struct *work)
struct rxrpc_peer *peer =
container_of(work, struct rxrpc_peer, error_distributor);
struct rxrpc_call *call;
- int error_report;
+ enum rxrpc_call_completion compl;
+ int error;
_enter("");
- error_report = READ_ONCE(peer->error_report);
+ error = READ_ONCE(peer->error_report);
+ if (error < RXRPC_LOCAL_ERROR_OFFSET) {
+ compl = RXRPC_CALL_NETWORK_ERROR;
+ } else {
+ compl = RXRPC_CALL_LOCAL_ERROR;
+ error -= RXRPC_LOCAL_ERROR_OFFSET;
+ }
- _debug("ISSUE ERROR %d", error_report);
+ _debug("ISSUE ERROR %s %d", rxrpc_call_completions[compl], error);
spin_lock_bh(&peer->lock);
@@ -262,16 +294,10 @@ void rxrpc_peer_error_distributor(struct work_struct *work)
call = hlist_entry(peer->error_targets.first,
struct rxrpc_call, error_link);
hlist_del_init(&call->error_link);
+ rxrpc_see_call(call);
- write_lock(&call->state_lock);
- if (call->state != RXRPC_CALL_COMPLETE &&
- call->state < RXRPC_CALL_NETWORK_ERROR) {
- call->error_report = error_report;
- call->state = RXRPC_CALL_NETWORK_ERROR;
- set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events);
- rxrpc_queue_call(call);
- }
- write_unlock(&call->state_lock);
+ if (rxrpc_set_call_completion(call, compl, 0, error))
+ rxrpc_notify_socket(call);
}
spin_unlock_bh(&peer->lock);
@@ -279,3 +305,44 @@ void rxrpc_peer_error_distributor(struct work_struct *work)
rxrpc_put_peer(peer);
_leave("");
}
+
+/*
+ * Add RTT information to cache. This is called in softirq mode and has
+ * exclusive access to the peer RTT data.
+ */
+void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why,
+ rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial,
+ ktime_t send_time, ktime_t resp_time)
+{
+ struct rxrpc_peer *peer = call->peer;
+ s64 rtt;
+ u64 sum = peer->rtt_sum, avg;
+ u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage;
+
+ rtt = ktime_to_ns(ktime_sub(resp_time, send_time));
+ if (rtt < 0)
+ return;
+
+ /* Replace the oldest datum in the RTT buffer */
+ sum -= peer->rtt_cache[cursor];
+ sum += rtt;
+ peer->rtt_cache[cursor] = rtt;
+ peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1);
+ peer->rtt_sum = sum;
+ if (usage < RXRPC_RTT_CACHE_SIZE) {
+ usage++;
+ peer->rtt_usage = usage;
+ }
+
+ /* Now recalculate the average */
+ if (usage == RXRPC_RTT_CACHE_SIZE) {
+ avg = sum / RXRPC_RTT_CACHE_SIZE;
+ } else {
+ avg = sum;
+ do_div(avg, usage);
+ }
+
+ peer->rtt = avg;
+ trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt,
+ usage, avg);
+}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 538e9831c699..941b724d523b 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -16,12 +16,14 @@
#include <linux/skbuff.h>
#include <linux/udp.h>
#include <linux/in.h>
+#include <linux/in6.h>
#include <linux/slab.h>
#include <linux/hashtable.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include <net/ip.h>
#include <net/route.h>
+#include <net/ip6_route.h>
#include "ar-internal.h"
static DEFINE_HASHTABLE(rxrpc_peer_hash, 10);
@@ -50,6 +52,13 @@ static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local,
size = sizeof(srx->transport.sin.sin_addr);
p = (u16 *)&srx->transport.sin.sin_addr;
break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ hash_key += (u16 __force)srx->transport.sin.sin_port;
+ size = sizeof(srx->transport.sin6.sin6_addr);
+ p = (u16 *)&srx->transport.sin6.sin6_addr;
+ break;
+#endif
default:
WARN(1, "AF_RXRPC: Unsupported transport address family\n");
return 0;
@@ -93,6 +102,14 @@ static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer,
memcmp(&peer->srx.transport.sin.sin_addr,
&srx->transport.sin.sin_addr,
sizeof(struct in_addr));
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ return ((u16 __force)peer->srx.transport.sin6.sin6_port -
+ (u16 __force)srx->transport.sin6.sin6_port) ?:
+ memcmp(&peer->srx.transport.sin6.sin6_addr,
+ &srx->transport.sin6.sin6_addr,
+ sizeof(struct in6_addr));
+#endif
default:
BUG();
}
@@ -130,17 +147,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
if (peer) {
- switch (srx->transport.family) {
- case AF_INET:
- _net("PEER %d {%d,%u,%pI4+%hu}",
- peer->debug_id,
- peer->srx.transport_type,
- peer->srx.transport.family,
- &peer->srx.transport.sin.sin_addr,
- ntohs(peer->srx.transport.sin.sin_port));
- break;
- }
-
+ _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
_leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
}
return peer;
@@ -152,22 +159,53 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
*/
static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer)
{
+ struct dst_entry *dst;
struct rtable *rt;
- struct flowi4 fl4;
+ struct flowi fl;
+ struct flowi4 *fl4 = &fl.u.ip4;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ struct flowi6 *fl6 = &fl.u.ip6;
+#endif
peer->if_mtu = 1500;
- rt = ip_route_output_ports(&init_net, &fl4, NULL,
- peer->srx.transport.sin.sin_addr.s_addr, 0,
- htons(7000), htons(7001),
- IPPROTO_UDP, 0, 0);
- if (IS_ERR(rt)) {
- _leave(" [route err %ld]", PTR_ERR(rt));
- return;
+ memset(&fl, 0, sizeof(fl));
+ switch (peer->srx.transport.family) {
+ case AF_INET:
+ rt = ip_route_output_ports(
+ &init_net, fl4, NULL,
+ peer->srx.transport.sin.sin_addr.s_addr, 0,
+ htons(7000), htons(7001), IPPROTO_UDP, 0, 0);
+ if (IS_ERR(rt)) {
+ _leave(" [route err %ld]", PTR_ERR(rt));
+ return;
+ }
+ dst = &rt->dst;
+ break;
+
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ fl6->flowi6_iif = LOOPBACK_IFINDEX;
+ fl6->flowi6_scope = RT_SCOPE_UNIVERSE;
+ fl6->flowi6_proto = IPPROTO_UDP;
+ memcpy(&fl6->daddr, &peer->srx.transport.sin6.sin6_addr,
+ sizeof(struct in6_addr));
+ fl6->fl6_dport = htons(7001);
+ fl6->fl6_sport = htons(7000);
+ dst = ip6_route_output(&init_net, NULL, fl6);
+ if (IS_ERR(dst)) {
+ _leave(" [route err %ld]", PTR_ERR(dst));
+ return;
+ }
+ break;
+#endif
+
+ default:
+ BUG();
}
- peer->if_mtu = dst_mtu(&rt->dst);
- dst_release(&rt->dst);
+ peer->if_mtu = dst_mtu(dst);
+ dst_release(dst);
_leave(" [if_mtu %u]", peer->if_mtu);
}
@@ -199,6 +237,41 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp)
}
/*
+ * Initialise peer record.
+ */
+static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key)
+{
+ peer->hash_key = hash_key;
+ rxrpc_assess_MTU_size(peer);
+ peer->mtu = peer->if_mtu;
+ peer->rtt_last_req = ktime_get_real();
+
+ switch (peer->srx.transport.family) {
+ case AF_INET:
+ peer->hdrsize = sizeof(struct iphdr);
+ break;
+#ifdef CONFIG_AF_RXRPC_IPV6
+ case AF_INET6:
+ peer->hdrsize = sizeof(struct ipv6hdr);
+ break;
+#endif
+ default:
+ BUG();
+ }
+
+ switch (peer->srx.transport_type) {
+ case SOCK_DGRAM:
+ peer->hdrsize += sizeof(struct udphdr);
+ break;
+ default:
+ BUG();
+ }
+
+ peer->hdrsize += sizeof(struct rxrpc_wire_header);
+ peer->maxdata = peer->mtu - peer->hdrsize;
+}
+
+/*
* Set up a new peer.
*/
static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
@@ -212,31 +285,40 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
peer = rxrpc_alloc_peer(local, gfp);
if (peer) {
- peer->hash_key = hash_key;
memcpy(&peer->srx, srx, sizeof(*srx));
+ rxrpc_init_peer(peer, hash_key);
+ }
- rxrpc_assess_MTU_size(peer);
- peer->mtu = peer->if_mtu;
-
- if (srx->transport.family == AF_INET) {
- peer->hdrsize = sizeof(struct iphdr);
- switch (srx->transport_type) {
- case SOCK_DGRAM:
- peer->hdrsize += sizeof(struct udphdr);
- break;
- default:
- BUG();
- break;
- }
- } else {
- BUG();
- }
+ _leave(" = %p", peer);
+ return peer;
+}
+
+/*
+ * Set up a new incoming peer. The address is prestored in the preallocated
+ * peer.
+ */
+struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local,
+ struct rxrpc_peer *prealloc)
+{
+ struct rxrpc_peer *peer;
+ unsigned long hash_key;
+
+ hash_key = rxrpc_peer_hash_key(local, &prealloc->srx);
+ prealloc->local = local;
+ rxrpc_init_peer(prealloc, hash_key);
- peer->hdrsize += sizeof(struct rxrpc_wire_header);
- peer->maxdata = peer->mtu - peer->hdrsize;
+ spin_lock(&rxrpc_peer_hash_lock);
+
+ /* Need to check that we aren't racing with someone else */
+ peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key);
+ if (peer && !rxrpc_get_peer_maybe(peer))
+ peer = NULL;
+ if (!peer) {
+ peer = prealloc;
+ hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key);
}
- _leave(" = %p", peer);
+ spin_unlock(&rxrpc_peer_hash_lock);
return peer;
}
@@ -249,11 +331,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
struct rxrpc_peer *peer, *candidate;
unsigned long hash_key = rxrpc_peer_hash_key(local, srx);
- _enter("{%d,%d,%pI4+%hu}",
- srx->transport_type,
- srx->transport_len,
- &srx->transport.sin.sin_addr,
- ntohs(srx->transport.sin.sin_port));
+ _enter("{%pISp}", &srx->transport);
/* search the peer list first */
rcu_read_lock();
@@ -272,7 +350,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
return NULL;
}
- spin_lock(&rxrpc_peer_hash_lock);
+ spin_lock_bh(&rxrpc_peer_hash_lock);
/* Need to check that we aren't racing with someone else */
peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key);
@@ -282,7 +360,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
hash_add_rcu(rxrpc_peer_hash,
&candidate->hash_link, hash_key);
- spin_unlock(&rxrpc_peer_hash_lock);
+ spin_unlock_bh(&rxrpc_peer_hash_lock);
if (peer)
kfree(candidate);
@@ -290,11 +368,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
peer = candidate;
}
- _net("PEER %d {%d,%pI4+%hu}",
- peer->debug_id,
- peer->srx.transport_type,
- &peer->srx.transport.sin.sin_addr,
- ntohs(peer->srx.transport.sin.sin_port));
+ _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport);
_leave(" = %p {u=%d}", peer, atomic_read(&peer->usage));
return peer;
@@ -307,9 +381,24 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer)
{
ASSERT(hlist_empty(&peer->error_targets));
- spin_lock(&rxrpc_peer_hash_lock);
+ spin_lock_bh(&rxrpc_peer_hash_lock);
hash_del_rcu(&peer->hash_link);
- spin_unlock(&rxrpc_peer_hash_lock);
+ spin_unlock_bh(&rxrpc_peer_hash_lock);
kfree_rcu(peer, rcu);
}
+
+/**
+ * rxrpc_kernel_get_peer - Get the peer address of a call
+ * @sock: The socket on which the call is in progress.
+ * @call: The call to query
+ * @_srx: Where to place the result
+ *
+ * Get the address of the remote peer in a call.
+ */
+void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
+ struct sockaddr_rxrpc *_srx)
+{
+ *_srx = call->peer->srx;
+}
+EXPORT_SYMBOL(rxrpc_kernel_get_peer);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index ced5f07444e5..65cd980767fa 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -17,12 +17,12 @@
static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
[RXRPC_CONN_UNUSED] = "Unused ",
[RXRPC_CONN_CLIENT] = "Client ",
+ [RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc",
[RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ",
[RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ",
[RXRPC_CONN_SERVICE] = "SvSecure",
[RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
[RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
- [RXRPC_CONN_NETWORK_ERROR] = "NetError",
};
/*
@@ -30,6 +30,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
*/
static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos)
{
+ rcu_read_lock();
read_lock(&rxrpc_call_lock);
return seq_list_start_head(&rxrpc_calls, *_pos);
}
@@ -42,17 +43,21 @@ static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos)
static void rxrpc_call_seq_stop(struct seq_file *seq, void *v)
{
read_unlock(&rxrpc_call_lock);
+ rcu_read_unlock();
}
static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
{
- struct rxrpc_connection *conn;
+ struct rxrpc_local *local;
+ struct rxrpc_sock *rx;
+ struct rxrpc_peer *peer;
struct rxrpc_call *call;
- char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+ char lbuff[50], rbuff[50];
if (v == &rxrpc_calls) {
seq_puts(seq,
- "Proto Local Remote "
+ "Proto Local "
+ " Remote "
" SvID ConnID CallID End Use State Abort "
" UserID\n");
return 0;
@@ -60,30 +65,35 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
call = list_entry(v, struct rxrpc_call, link);
- sprintf(lbuff, "%pI4:%u",
- &call->local->srx.transport.sin.sin_addr,
- ntohs(call->local->srx.transport.sin.sin_port));
+ rx = rcu_dereference(call->socket);
+ if (rx) {
+ local = READ_ONCE(rx->local);
+ if (local)
+ sprintf(lbuff, "%pISpc", &local->srx.transport);
+ else
+ strcpy(lbuff, "no_local");
+ } else {
+ strcpy(lbuff, "no_socket");
+ }
- conn = call->conn;
- if (conn)
- sprintf(rbuff, "%pI4:%u",
- &conn->params.peer->srx.transport.sin.sin_addr,
- ntohs(conn->params.peer->srx.transport.sin.sin_port));
+ peer = call->peer;
+ if (peer)
+ sprintf(rbuff, "%pISpc", &peer->srx.transport);
else
strcpy(rbuff, "no_connection");
seq_printf(seq,
- "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u"
+ "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
" %-8.8s %08x %lx\n",
lbuff,
rbuff,
call->service_id,
call->cid,
call->call_id,
- call->in_clientflag ? "Svc" : "Clt",
+ rxrpc_is_service_call(call) ? "Svc" : "Clt",
atomic_read(&call->usage),
rxrpc_call_states[call->state],
- call->remote_abort ?: call->local_abort,
+ call->abort_code,
call->user_call_ID);
return 0;
@@ -115,13 +125,13 @@ const struct file_operations rxrpc_call_seq_fops = {
static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos)
{
read_lock(&rxrpc_connection_lock);
- return seq_list_start_head(&rxrpc_connections, *_pos);
+ return seq_list_start_head(&rxrpc_connection_proc_list, *_pos);
}
static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v,
loff_t *pos)
{
- return seq_list_next(v, &rxrpc_connections, pos);
+ return seq_list_next(v, &rxrpc_connection_proc_list, pos);
}
static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
@@ -132,29 +142,31 @@ static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v)
static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_connection *conn;
- char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1];
+ char lbuff[50], rbuff[50];
- if (v == &rxrpc_connections) {
+ if (v == &rxrpc_connection_proc_list) {
seq_puts(seq,
- "Proto Local Remote "
+ "Proto Local "
+ " Remote "
" SvID ConnID End Use State Key "
" Serial ISerial\n"
);
return 0;
}
- conn = list_entry(v, struct rxrpc_connection, link);
-
- sprintf(lbuff, "%pI4:%u",
- &conn->params.local->srx.transport.sin.sin_addr,
- ntohs(conn->params.local->srx.transport.sin.sin_port));
+ conn = list_entry(v, struct rxrpc_connection, proc_link);
+ if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) {
+ strcpy(lbuff, "no_local");
+ strcpy(rbuff, "no_connection");
+ goto print;
+ }
- sprintf(rbuff, "%pI4:%u",
- &conn->params.peer->srx.transport.sin.sin_addr,
- ntohs(conn->params.peer->srx.transport.sin.sin_port));
+ sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport);
+ sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport);
+print:
seq_printf(seq,
- "UDP %-22.22s %-22.22s %4x %08x %s %3u"
+ "UDP %-47.47s %-47.47s %4x %08x %s %3u"
" %s %08x %08x %08x\n",
lbuff,
rbuff,
@@ -165,7 +177,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
rxrpc_conn_states[conn->state],
key_serial(conn->params.key),
atomic_read(&conn->serial),
- atomic_read(&conn->hi_serial));
+ conn->hi_serial);
return 0;
}
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 9ed66d533002..f05ea0a88076 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -19,399 +19,645 @@
#include "ar-internal.h"
/*
- * removal a call's user ID from the socket tree to make the user ID available
- * again and so that it won't be seen again in association with that call
+ * Post a call for attention by the socket or kernel service. Further
+ * notifications are suppressed by putting recvmsg_link on a dummy queue.
*/
-void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call)
+void rxrpc_notify_socket(struct rxrpc_call *call)
{
- _debug("RELEASE CALL %d", call->debug_id);
+ struct rxrpc_sock *rx;
+ struct sock *sk;
+
+ _enter("%d", call->debug_id);
+
+ if (!list_empty(&call->recvmsg_link))
+ return;
+
+ rcu_read_lock();
+
+ rx = rcu_dereference(call->socket);
+ sk = &rx->sk;
+ if (rx && sk->sk_state < RXRPC_CLOSE) {
+ if (call->notify_rx) {
+ call->notify_rx(sk, call, call->user_call_ID);
+ } else {
+ write_lock_bh(&rx->recvmsg_lock);
+ if (list_empty(&call->recvmsg_link)) {
+ rxrpc_get_call(call, rxrpc_call_got);
+ list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
+ }
+ write_unlock_bh(&rx->recvmsg_lock);
- if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
- write_lock_bh(&rx->call_lock);
- rb_erase(&call->sock_node, &call->socket->calls);
- clear_bit(RXRPC_CALL_HAS_USERID, &call->flags);
- write_unlock_bh(&rx->call_lock);
+ if (!sock_flag(sk, SOCK_DEAD)) {
+ _debug("call %ps", sk->sk_data_ready);
+ sk->sk_data_ready(sk);
+ }
+ }
}
- read_lock_bh(&call->state_lock);
- if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) &&
- !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events))
- rxrpc_queue_call(call);
- read_unlock_bh(&call->state_lock);
+ rcu_read_unlock();
+ _leave("");
}
/*
- * receive a message from an RxRPC socket
- * - we need to be careful about two or more threads calling recvmsg
- * simultaneously
+ * Pass a call terminating message to userspace.
*/
-int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
- int flags)
+static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
{
- struct rxrpc_skb_priv *sp;
- struct rxrpc_call *call = NULL, *continue_call = NULL;
- struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
- struct sk_buff *skb;
- long timeo;
- int copy, ret, ullen, offset, copied = 0;
- u32 abort_code;
-
- DEFINE_WAIT(wait);
-
- _enter(",,,%zu,%d", len, flags);
-
- if (flags & (MSG_OOB | MSG_TRUNC))
- return -EOPNOTSUPP;
-
- ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long);
+ u32 tmp = 0;
+ int ret;
+
+ switch (call->completion) {
+ case RXRPC_CALL_SUCCEEDED:
+ ret = 0;
+ if (rxrpc_is_service_call(call))
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp);
+ break;
+ case RXRPC_CALL_REMOTELY_ABORTED:
+ tmp = call->abort_code;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
+ break;
+ case RXRPC_CALL_LOCALLY_ABORTED:
+ tmp = call->abort_code;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp);
+ break;
+ case RXRPC_CALL_NETWORK_ERROR:
+ tmp = call->error;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp);
+ break;
+ case RXRPC_CALL_LOCAL_ERROR:
+ tmp = call->error;
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp);
+ break;
+ default:
+ pr_err("Invalid terminal call state %u\n", call->state);
+ BUG();
+ break;
+ }
- timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
- msg->msg_flags |= MSG_MORE;
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_terminal, call->rx_hard_ack,
+ call->rx_pkt_offset, call->rx_pkt_len, ret);
+ return ret;
+}
- lock_sock(&rx->sk);
+/*
+ * Pass back notification of a new call. The call is added to the
+ * to-be-accepted list. This means that the next call to be accepted might not
+ * be the last call seen awaiting acceptance, but unless we leave this on the
+ * front of the queue and block all other messages until someone gives us a
+ * user_ID for it, there's not a lot we can do.
+ */
+static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ struct msghdr *msg, int flags)
+{
+ int tmp = 0, ret;
- for (;;) {
- /* return immediately if a client socket has no outstanding
- * calls */
- if (RB_EMPTY_ROOT(&rx->calls)) {
- if (copied)
- goto out;
- if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
- release_sock(&rx->sk);
- if (continue_call)
- rxrpc_put_call(continue_call);
- return -ENODATA;
- }
- }
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp);
- /* get the next message on the Rx queue */
- skb = skb_peek(&rx->sk.sk_receive_queue);
- if (!skb) {
- /* nothing remains on the queue */
- if (copied &&
- (flags & MSG_PEEK || timeo == 0))
- goto out;
+ if (ret == 0 && !(flags & MSG_PEEK)) {
+ _debug("to be accepted");
+ write_lock_bh(&rx->recvmsg_lock);
+ list_del_init(&call->recvmsg_link);
+ write_unlock_bh(&rx->recvmsg_lock);
- /* wait for a message to turn up */
- release_sock(&rx->sk);
- prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait,
- TASK_INTERRUPTIBLE);
- ret = sock_error(&rx->sk);
- if (ret)
- goto wait_error;
-
- if (skb_queue_empty(&rx->sk.sk_receive_queue)) {
- if (signal_pending(current))
- goto wait_interrupted;
- timeo = schedule_timeout(timeo);
- }
- finish_wait(sk_sleep(&rx->sk), &wait);
- lock_sock(&rx->sk);
- continue;
- }
+ rxrpc_get_call(call, rxrpc_call_got);
+ write_lock(&rx->call_lock);
+ list_add_tail(&call->accept_link, &rx->to_be_accepted);
+ write_unlock(&rx->call_lock);
+ }
- peek_next_packet:
- sp = rxrpc_skb(skb);
- call = sp->call;
- ASSERT(call != NULL);
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_to_be_accepted, 1, 0, 0, ret);
+ return ret;
+}
- _debug("next pkt %s", rxrpc_pkts[sp->hdr.type]);
+/*
+ * End the packet reception phase.
+ */
+static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
+{
+ _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]);
- /* make sure we wait for the state to be updated in this call */
- spin_lock_bh(&call->lock);
- spin_unlock_bh(&call->lock);
+ trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top);
+ ASSERTCMP(call->rx_hard_ack, ==, call->rx_top);
- if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) {
- _debug("packet from released call");
- if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
- BUG();
- rxrpc_free_skb(skb);
- continue;
- }
+ if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) {
+ rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false,
+ rxrpc_propose_ack_terminal_ack);
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ }
- /* determine whether to continue last data receive */
- if (continue_call) {
- _debug("maybe cont");
- if (call != continue_call ||
- skb->mark != RXRPC_SKB_MARK_DATA) {
- release_sock(&rx->sk);
- rxrpc_put_call(continue_call);
- _leave(" = %d [noncont]", copied);
- return copied;
- }
- }
+ write_lock_bh(&call->state_lock);
- rxrpc_get_call(call);
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_RECV_REPLY:
+ __rxrpc_call_completed(call);
+ break;
- /* copy the peer address and timestamp */
- if (!continue_call) {
- if (msg->msg_name) {
- size_t len =
- sizeof(call->conn->params.peer->srx);
- memcpy(msg->msg_name,
- &call->conn->params.peer->srx, len);
- msg->msg_namelen = len;
- }
- sock_recv_timestamp(msg, &rx->sk, skb);
- }
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ call->tx_phase = true;
+ call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
+ break;
+ default:
+ break;
+ }
- /* receive the message */
- if (skb->mark != RXRPC_SKB_MARK_DATA)
- goto receive_non_data_message;
+ write_unlock_bh(&call->state_lock);
+}
- _debug("recvmsg DATA #%u { %d, %d }",
- sp->hdr.seq, skb->len, sp->offset);
+/*
+ * Discard a packet we've used up and advance the Rx window by one.
+ */
+static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ rxrpc_serial_t serial;
+ rxrpc_seq_t hard_ack, top;
+ u8 flags;
+ int ix;
+
+ _enter("%d", call->debug_id);
+
+ hard_ack = call->rx_hard_ack;
+ top = smp_load_acquire(&call->rx_top);
+ ASSERT(before(hard_ack, top));
+
+ hard_ack++;
+ ix = hard_ack & RXRPC_RXTX_BUFF_MASK;
+ skb = call->rxtx_buffer[ix];
+ rxrpc_see_skb(skb, rxrpc_skb_rx_rotated);
+ sp = rxrpc_skb(skb);
+ flags = sp->hdr.flags;
+ serial = sp->hdr.serial;
+ if (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO)
+ serial += (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - 1;
+
+ call->rxtx_buffer[ix] = NULL;
+ call->rxtx_annotations[ix] = 0;
+ /* Barrier against rxrpc_input_data(). */
+ smp_store_release(&call->rx_hard_ack, hard_ack);
+
+ rxrpc_free_skb(skb, rxrpc_skb_rx_freed);
+
+ _debug("%u,%u,%02x", hard_ack, top, flags);
+ trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack);
+ if (flags & RXRPC_LAST_PACKET) {
+ rxrpc_end_rx_phase(call, serial);
+ } else {
+ /* Check to see if there's an ACK that needs sending. */
+ if (after_eq(hard_ack, call->ackr_consumed + 2) ||
+ after_eq(top, call->ackr_seen + 2) ||
+ (hard_ack == top && after(hard_ack, call->ackr_consumed)))
+ rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial,
+ true, false,
+ rxrpc_propose_ack_rotate_rx);
+ if (call->ackr_reason)
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+ }
+}
- if (!continue_call) {
- /* only set the control data once per recvmsg() */
- ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
- ullen, &call->user_call_ID);
- if (ret < 0)
- goto copy_error;
- ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
- }
+/*
+ * Decrypt and verify a (sub)packet. The packet's length may be changed due to
+ * padding, but if this is the case, the packet length will be resident in the
+ * socket buffer. Note that we can't modify the master skb info as the skb may
+ * be the home to multiple subpackets.
+ */
+static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
+ u8 annotation,
+ unsigned int offset, unsigned int len)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ rxrpc_seq_t seq = sp->hdr.seq;
+ u16 cksum = sp->hdr.cksum;
- ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
- ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
- call->rx_data_recv = sp->hdr.seq;
+ _enter("");
- ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
+ /* For all but the head jumbo subpacket, the security checksum is in a
+ * jumbo header immediately prior to the data.
+ */
+ if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) {
+ __be16 tmp;
+ if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0)
+ BUG();
+ cksum = ntohs(tmp);
+ seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1;
+ }
- offset = sp->offset;
- copy = skb->len - offset;
- if (copy > len - copied)
- copy = len - copied;
+ return call->conn->security->verify_packet(call, skb, offset, len,
+ seq, cksum);
+}
- ret = skb_copy_datagram_msg(skb, offset, msg, copy);
+/*
+ * Locate the data within a packet. This is complicated by:
+ *
+ * (1) An skb may contain a jumbo packet - so we have to find the appropriate
+ * subpacket.
+ *
+ * (2) The (sub)packets may be encrypted and, if so, the encrypted portion
+ * contains an extra header which includes the true length of the data,
+ * excluding any encrypted padding.
+ */
+static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
+ u8 *_annotation,
+ unsigned int *_offset, unsigned int *_len)
+{
+ unsigned int offset = sizeof(struct rxrpc_wire_header);
+ unsigned int len = *_len;
+ int ret;
+ u8 annotation = *_annotation;
+
+ /* Locate the subpacket */
+ len = skb->len - offset;
+ if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) {
+ offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) *
+ RXRPC_JUMBO_SUBPKTLEN);
+ len = (annotation & RXRPC_RX_ANNO_JLAST) ?
+ skb->len - offset : RXRPC_JUMBO_SUBPKTLEN;
+ }
+ if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) {
+ ret = rxrpc_verify_packet(call, skb, annotation, offset, len);
if (ret < 0)
- goto copy_error;
-
- /* handle piecemeal consumption of data packets */
- _debug("copied %d+%d", copy, copied);
+ return ret;
+ *_annotation |= RXRPC_RX_ANNO_VERIFIED;
+ }
- offset += copy;
- copied += copy;
+ *_offset = offset;
+ *_len = len;
+ call->conn->security->locate_data(call, skb, _offset, _len);
+ return 0;
+}
- if (!(flags & MSG_PEEK))
- sp->offset = offset;
+/*
+ * Deliver messages to a call. This keeps processing packets until the buffer
+ * is filled and we find either more DATA (returns 0) or the end of the DATA
+ * (returns 1). If more packets are required, it returns -EAGAIN.
+ */
+static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
+ struct msghdr *msg, struct iov_iter *iter,
+ size_t len, int flags, size_t *_offset)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ rxrpc_seq_t hard_ack, top, seq;
+ size_t remain;
+ bool last;
+ unsigned int rx_pkt_offset, rx_pkt_len;
+ int ix, copy, ret = -EAGAIN, ret2;
+
+ rx_pkt_offset = call->rx_pkt_offset;
+ rx_pkt_len = call->rx_pkt_len;
+
+ if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) {
+ seq = call->rx_hard_ack;
+ ret = 1;
+ goto done;
+ }
- if (sp->offset < skb->len) {
- _debug("buffer full");
- ASSERTCMP(copied, ==, len);
+ /* Barriers against rxrpc_input_data(). */
+ hard_ack = call->rx_hard_ack;
+ top = smp_load_acquire(&call->rx_top);
+ for (seq = hard_ack + 1; before_eq(seq, top); seq++) {
+ ix = seq & RXRPC_RXTX_BUFF_MASK;
+ skb = call->rxtx_buffer[ix];
+ if (!skb) {
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_hole, seq,
+ rx_pkt_offset, rx_pkt_len, 0);
break;
}
+ smp_rmb();
+ rxrpc_see_skb(skb, rxrpc_skb_rx_seen);
+ sp = rxrpc_skb(skb);
- /* we transferred the whole data packet */
if (!(flags & MSG_PEEK))
- rxrpc_kernel_data_consumed(call, skb);
-
- if (sp->hdr.flags & RXRPC_LAST_PACKET) {
- _debug("last");
- if (rxrpc_conn_is_client(call->conn)) {
- /* last byte of reply received */
- ret = copied;
- goto terminal_message;
+ trace_rxrpc_receive(call, rxrpc_receive_front,
+ sp->hdr.serial, seq);
+
+ if (msg)
+ sock_recv_timestamp(msg, sock->sk, skb);
+
+ if (rx_pkt_offset == 0) {
+ ret2 = rxrpc_locate_data(call, skb,
+ &call->rxtx_annotations[ix],
+ &rx_pkt_offset, &rx_pkt_len);
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq,
+ rx_pkt_offset, rx_pkt_len, ret2);
+ if (ret2 < 0) {
+ ret = ret2;
+ goto out;
}
+ } else {
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq,
+ rx_pkt_offset, rx_pkt_len, 0);
+ }
- /* last bit of request received */
- if (!(flags & MSG_PEEK)) {
- _debug("eat packet");
- if (skb_dequeue(&rx->sk.sk_receive_queue) !=
- skb)
- BUG();
- rxrpc_free_skb(skb);
+ /* We have to handle short, empty and used-up DATA packets. */
+ remain = len - *_offset;
+ copy = rx_pkt_len;
+ if (copy > remain)
+ copy = remain;
+ if (copy > 0) {
+ ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter,
+ copy);
+ if (ret2 < 0) {
+ ret = ret2;
+ goto out;
}
- msg->msg_flags &= ~MSG_MORE;
- break;
+
+ /* handle piecemeal consumption of data packets */
+ rx_pkt_offset += copy;
+ rx_pkt_len -= copy;
+ *_offset += copy;
}
- /* move on to the next data message */
- _debug("next");
- if (!continue_call)
- continue_call = sp->call;
- else
- rxrpc_put_call(call);
- call = NULL;
-
- if (flags & MSG_PEEK) {
- _debug("peek next");
- skb = skb->next;
- if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue)
- break;
- goto peek_next_packet;
+ if (rx_pkt_len > 0) {
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq,
+ rx_pkt_offset, rx_pkt_len, 0);
+ ASSERTCMP(*_offset, ==, len);
+ ret = 0;
+ break;
}
- _debug("eat packet");
- if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
- BUG();
- rxrpc_free_skb(skb);
+ /* The whole packet has been transferred. */
+ last = sp->hdr.flags & RXRPC_LAST_PACKET;
+ if (!(flags & MSG_PEEK))
+ rxrpc_rotate_rx_window(call);
+ rx_pkt_offset = 0;
+ rx_pkt_len = 0;
+
+ if (last) {
+ ASSERTCMP(seq, ==, READ_ONCE(call->rx_top));
+ ret = 1;
+ goto out;
+ }
}
- /* end of non-terminal data packet reception for the moment */
- _debug("end rcv data");
out:
- release_sock(&rx->sk);
- if (call)
- rxrpc_put_call(call);
- if (continue_call)
- rxrpc_put_call(continue_call);
- _leave(" = %d [data]", copied);
- return copied;
-
- /* handle non-DATA messages such as aborts, incoming connections and
- * final ACKs */
-receive_non_data_message:
- _debug("non-data");
-
- if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) {
- _debug("RECV NEW CALL");
- ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code);
- if (ret < 0)
- goto copy_error;
- if (!(flags & MSG_PEEK)) {
- if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
- BUG();
- rxrpc_free_skb(skb);
- }
- goto out;
+ if (!(flags & MSG_PEEK)) {
+ call->rx_pkt_offset = rx_pkt_offset;
+ call->rx_pkt_len = rx_pkt_len;
}
+done:
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq,
+ rx_pkt_offset, rx_pkt_len, ret);
+ return ret;
+}
- ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
- ullen, &call->user_call_ID);
- if (ret < 0)
- goto copy_error;
- ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags));
+/*
+ * Receive a message from an RxRPC socket
+ * - we need to be careful about two or more threads calling recvmsg
+ * simultaneously
+ */
+int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
+{
+ struct rxrpc_call *call;
+ struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
+ struct list_head *l;
+ size_t copied = 0;
+ long timeo;
+ int ret;
+
+ DEFINE_WAIT(wait);
+
+ trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0, 0, 0, 0);
+
+ if (flags & (MSG_OOB | MSG_TRUNC))
+ return -EOPNOTSUPP;
+
+ timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT);
+
+try_again:
+ lock_sock(&rx->sk);
+
+ /* Return immediately if a client socket has no outstanding calls */
+ if (RB_EMPTY_ROOT(&rx->calls) &&
+ list_empty(&rx->recvmsg_q) &&
+ rx->sk.sk_state != RXRPC_SERVER_LISTENING) {
+ release_sock(&rx->sk);
+ return -ENODATA;
+ }
+
+ if (list_empty(&rx->recvmsg_q)) {
+ ret = -EWOULDBLOCK;
+ if (timeo == 0) {
+ call = NULL;
+ goto error_no_call;
+ }
+
+ release_sock(&rx->sk);
+
+ /* Wait for something to happen */
+ prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait,
+ TASK_INTERRUPTIBLE);
+ ret = sock_error(&rx->sk);
+ if (ret)
+ goto wait_error;
+
+ if (list_empty(&rx->recvmsg_q)) {
+ if (signal_pending(current))
+ goto wait_interrupted;
+ trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait,
+ 0, 0, 0, 0);
+ timeo = schedule_timeout(timeo);
+ }
+ finish_wait(sk_sleep(&rx->sk), &wait);
+ goto try_again;
+ }
- switch (skb->mark) {
- case RXRPC_SKB_MARK_DATA:
+ /* Find the next call and dequeue it if we're not just peeking. If we
+ * do dequeue it, that comes with a ref that we will need to release.
+ */
+ write_lock_bh(&rx->recvmsg_lock);
+ l = rx->recvmsg_q.next;
+ call = list_entry(l, struct rxrpc_call, recvmsg_link);
+ if (!(flags & MSG_PEEK))
+ list_del_init(&call->recvmsg_link);
+ else
+ rxrpc_get_call(call, rxrpc_call_got);
+ write_unlock_bh(&rx->recvmsg_lock);
+
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0);
+
+ if (test_bit(RXRPC_CALL_RELEASED, &call->flags))
BUG();
- case RXRPC_SKB_MARK_FINAL_ACK:
- ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code);
- break;
- case RXRPC_SKB_MARK_BUSY:
- ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code);
- break;
- case RXRPC_SKB_MARK_REMOTE_ABORT:
- abort_code = call->remote_abort;
- ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code);
- break;
- case RXRPC_SKB_MARK_LOCAL_ABORT:
- abort_code = call->local_abort;
- ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code);
- break;
- case RXRPC_SKB_MARK_NET_ERROR:
- _debug("RECV NET ERROR %d", sp->error);
- abort_code = sp->error;
- ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code);
+
+ if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
+ if (flags & MSG_CMSG_COMPAT) {
+ unsigned int id32 = call->user_call_ID;
+
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+ sizeof(unsigned int), &id32);
+ } else {
+ ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID,
+ sizeof(unsigned long),
+ &call->user_call_ID);
+ }
+ if (ret < 0)
+ goto error;
+ }
+
+ if (msg->msg_name) {
+ size_t len = sizeof(call->conn->params.peer->srx);
+ memcpy(msg->msg_name, &call->conn->params.peer->srx, len);
+ msg->msg_namelen = len;
+ }
+
+ switch (call->state) {
+ case RXRPC_CALL_SERVER_ACCEPTING:
+ ret = rxrpc_recvmsg_new_call(rx, call, msg, flags);
break;
- case RXRPC_SKB_MARK_LOCAL_ERROR:
- _debug("RECV LOCAL ERROR %d", sp->error);
- abort_code = sp->error;
- ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4,
- &abort_code);
+ case RXRPC_CALL_CLIENT_RECV_REPLY:
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len,
+ flags, &copied);
+ if (ret == -EAGAIN)
+ ret = 0;
+
+ if (after(call->rx_top, call->rx_hard_ack) &&
+ call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK])
+ rxrpc_notify_socket(call);
break;
default:
- pr_err("Unknown packet mark %u\n", skb->mark);
- BUG();
+ ret = 0;
break;
}
if (ret < 0)
- goto copy_error;
-
-terminal_message:
- _debug("terminal");
- msg->msg_flags &= ~MSG_MORE;
- msg->msg_flags |= MSG_EOR;
+ goto error;
- if (!(flags & MSG_PEEK)) {
- _net("free terminal skb %p", skb);
- if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
- BUG();
- rxrpc_free_skb(skb);
- rxrpc_remove_user_ID(rx, call);
+ if (call->state == RXRPC_CALL_COMPLETE) {
+ ret = rxrpc_recvmsg_term(call, msg);
+ if (ret < 0)
+ goto error;
+ if (!(flags & MSG_PEEK))
+ rxrpc_release_call(rx, call);
+ msg->msg_flags |= MSG_EOR;
+ ret = 1;
}
- release_sock(&rx->sk);
- rxrpc_put_call(call);
- if (continue_call)
- rxrpc_put_call(continue_call);
- _leave(" = %d", ret);
- return ret;
+ if (ret == 0)
+ msg->msg_flags |= MSG_MORE;
+ else
+ msg->msg_flags &= ~MSG_MORE;
+ ret = copied;
-copy_error:
- _debug("copy error");
+error:
+ rxrpc_put_call(call, rxrpc_call_put);
+error_no_call:
release_sock(&rx->sk);
- rxrpc_put_call(call);
- if (continue_call)
- rxrpc_put_call(continue_call);
- _leave(" = %d", ret);
+ trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret);
return ret;
wait_interrupted:
ret = sock_intr_errno(timeo);
wait_error:
finish_wait(sk_sleep(&rx->sk), &wait);
- if (continue_call)
- rxrpc_put_call(continue_call);
- if (copied)
- copied = ret;
- _leave(" = %d [waitfail %d]", copied, ret);
- return copied;
-
+ call = NULL;
+ goto error_no_call;
}
/**
- * rxrpc_kernel_is_data_last - Determine if data message is last one
- * @skb: Message holding data
+ * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info
+ * @sock: The socket that the call exists on
+ * @call: The call to send data through
+ * @buf: The buffer to receive into
+ * @size: The size of the buffer, including data already read
+ * @_offset: The running offset into the buffer.
+ * @want_more: True if more data is expected to be read
+ * @_abort: Where the abort code is stored if -ECONNABORTED is returned
*
- * Determine if data message is last one for the parent call.
+ * Allow a kernel service to receive data and pick up information about the
+ * state of a call. Returns 0 if got what was asked for and there's more
+ * available, 1 if we got what was asked for and we're at the end of the data
+ * and -EAGAIN if we need more data.
+ *
+ * Note that we may return -EAGAIN to drain empty packets at the end of the
+ * data, even if we've already copied over the requested data.
+ *
+ * This function adds the amount it transfers to *_offset, so this should be
+ * precleared as appropriate. Note that the amount remaining in the buffer is
+ * taken to be size - *_offset.
+ *
+ * *_abort should also be initialised to 0.
*/
-bool rxrpc_kernel_is_data_last(struct sk_buff *skb)
+int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
+ void *buf, size_t size, size_t *_offset,
+ bool want_more, u32 *_abort)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ struct iov_iter iter;
+ struct kvec iov;
+ int ret;
- ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA);
+ _enter("{%d,%s},%zu/%zu,%d",
+ call->debug_id, rxrpc_call_states[call->state],
+ *_offset, size, want_more);
- return sp->hdr.flags & RXRPC_LAST_PACKET;
-}
+ ASSERTCMP(*_offset, <=, size);
+ ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING);
-EXPORT_SYMBOL(rxrpc_kernel_is_data_last);
+ iov.iov_base = buf + *_offset;
+ iov.iov_len = size - *_offset;
+ iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset);
-/**
- * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message
- * @skb: Message indicating an abort
- *
- * Get the abort code from an RxRPC abort message.
- */
-u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb)
-{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ lock_sock(sock->sk);
+
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_RECV_REPLY:
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0,
+ _offset);
+ if (ret < 0)
+ goto out;
+
+ /* We can only reach here with a partially full buffer if we
+ * have reached the end of the data. We must otherwise have a
+ * full buffer or have been given -EAGAIN.
+ */
+ if (ret == 1) {
+ if (*_offset < size)
+ goto short_data;
+ if (!want_more)
+ goto read_phase_complete;
+ ret = 0;
+ goto out;
+ }
+
+ if (!want_more)
+ goto excess_data;
+ goto out;
+
+ case RXRPC_CALL_COMPLETE:
+ goto call_complete;
- switch (skb->mark) {
- case RXRPC_SKB_MARK_REMOTE_ABORT:
- return sp->call->remote_abort;
- case RXRPC_SKB_MARK_LOCAL_ABORT:
- return sp->call->local_abort;
default:
- BUG();
+ ret = -EINPROGRESS;
+ goto out;
}
-}
-
-EXPORT_SYMBOL(rxrpc_kernel_get_abort_code);
-/**
- * rxrpc_kernel_get_error - Get the error number from an RxRPC error message
- * @skb: Message indicating an error
- *
- * Get the error number from an RxRPC error message.
- */
-int rxrpc_kernel_get_error_number(struct sk_buff *skb)
-{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+read_phase_complete:
+ ret = 1;
+out:
+ release_sock(sock->sk);
+ _leave(" = %d [%zu,%d]", ret, *_offset, *_abort);
+ return ret;
- return sp->error;
+short_data:
+ ret = -EBADMSG;
+ goto out;
+excess_data:
+ ret = -EMSGSIZE;
+ goto out;
+call_complete:
+ *_abort = call->abort_code;
+ ret = call->error;
+ if (call->completion == RXRPC_CALL_SUCCEEDED) {
+ ret = 1;
+ if (size > 0)
+ ret = -ECONNRESET;
+ }
+ goto out;
}
-
-EXPORT_SYMBOL(rxrpc_kernel_get_error_number);
+EXPORT_SYMBOL(rxrpc_kernel_recv_data);
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index 63afa9e9cc08..627abed5f999 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -80,12 +80,10 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn)
case RXRPC_SECURITY_AUTH:
conn->size_align = 8;
conn->security_size = sizeof(struct rxkad_level1_hdr);
- conn->header_size += sizeof(struct rxkad_level1_hdr);
break;
case RXRPC_SECURITY_ENCRYPT:
conn->size_align = 8;
conn->security_size = sizeof(struct rxkad_level2_hdr);
- conn->header_size += sizeof(struct rxkad_level2_hdr);
break;
default:
ret = -EKEYREJECTED;
@@ -161,7 +159,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call,
_enter("");
- check = sp->hdr.seq ^ sp->hdr.callNumber;
+ check = sp->hdr.seq ^ call->call_id;
data_size |= (u32)check << 16;
hdr.data_size = htonl(data_size);
@@ -205,7 +203,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call,
_enter("");
- check = sp->hdr.seq ^ sp->hdr.callNumber;
+ check = sp->hdr.seq ^ call->call_id;
rxkhdr.data_size = htonl(data_size | (u32)check << 16);
rxkhdr.checksum = 0;
@@ -275,9 +273,9 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
/* calculate the security checksum */
- x = call->channel << (32 - RXRPC_CIDSHIFT);
+ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
x |= sp->hdr.seq & 0x3fffffff;
- call->crypto_buf[0] = htonl(sp->hdr.callNumber);
+ call->crypto_buf[0] = htonl(call->call_id);
call->crypto_buf[1] = htonl(x);
sg_init_one(&sg, call->crypto_buf, 8);
@@ -316,12 +314,11 @@ static int rxkad_secure_packet(struct rxrpc_call *call,
/*
* decrypt partial encryption on a packet (level 1 security)
*/
-static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
- struct sk_buff *skb,
- u32 *_abort_code)
+static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
+ unsigned int offset, unsigned int len,
+ rxrpc_seq_t seq)
{
struct rxkad_level1_hdr sechdr;
- struct rxrpc_skb_priv *sp;
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist sg[16];
@@ -332,15 +329,20 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
_enter("");
- sp = rxrpc_skb(skb);
+ if (len < 8) {
+ rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO);
+ goto protocol_error;
+ }
- /* we want to decrypt the skbuff in-place */
+ /* Decrypt the skbuff in-place. TODO: We really want to decrypt
+ * directly into the target buffer.
+ */
nsg = skb_cow_data(skb, 0, &trailer);
if (nsg < 0 || nsg > 16)
goto nomem;
sg_init_table(sg, nsg);
- skb_to_sgvec(skb, sg, 0, 8);
+ skb_to_sgvec(skb, sg, offset, 8);
/* start the decryption afresh */
memset(&iv, 0, sizeof(iv));
@@ -351,35 +353,35 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call,
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
- /* remove the decrypted packet length */
- if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
- goto datalen_error;
- if (!skb_pull(skb, sizeof(sechdr)))
- BUG();
+ /* Extract the decrypted packet length */
+ if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
+ rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO);
+ goto protocol_error;
+ }
+ offset += sizeof(sechdr);
+ len -= sizeof(sechdr);
buf = ntohl(sechdr.data_size);
data_size = buf & 0xffff;
check = buf >> 16;
- check ^= sp->hdr.seq ^ sp->hdr.callNumber;
+ check ^= seq ^ call->call_id;
check &= 0xffff;
if (check != 0) {
- *_abort_code = RXKADSEALEDINCON;
+ rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO);
goto protocol_error;
}
- /* shorten the packet to remove the padding */
- if (data_size > skb->len)
- goto datalen_error;
- else if (data_size < skb->len)
- skb->len = data_size;
+ if (data_size > len) {
+ rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO);
+ goto protocol_error;
+ }
_leave(" = 0 [dlen=%x]", data_size);
return 0;
-datalen_error:
- *_abort_code = RXKADDATALEN;
protocol_error:
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
_leave(" = -EPROTO");
return -EPROTO;
@@ -391,13 +393,12 @@ nomem:
/*
* wholly decrypt a packet (level 2 security)
*/
-static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
- struct sk_buff *skb,
- u32 *_abort_code)
+static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
+ unsigned int offset, unsigned int len,
+ rxrpc_seq_t seq)
{
const struct rxrpc_key_token *token;
struct rxkad_level2_hdr sechdr;
- struct rxrpc_skb_priv *sp;
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
struct rxrpc_crypt iv;
struct scatterlist _sg[4], *sg;
@@ -408,9 +409,14 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
_enter(",{%d}", skb->len);
- sp = rxrpc_skb(skb);
+ if (len < 8) {
+ rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO);
+ goto protocol_error;
+ }
- /* we want to decrypt the skbuff in-place */
+ /* Decrypt the skbuff in-place. TODO: We really want to decrypt
+ * directly into the target buffer.
+ */
nsg = skb_cow_data(skb, 0, &trailer);
if (nsg < 0)
goto nomem;
@@ -423,7 +429,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
}
sg_init_table(sg, nsg);
- skb_to_sgvec(skb, sg, 0, skb->len);
+ skb_to_sgvec(skb, sg, offset, len);
/* decrypt from the session key */
token = call->conn->params.key->payload.data[0];
@@ -431,41 +437,41 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call,
skcipher_request_set_tfm(req, call->conn->cipher);
skcipher_request_set_callback(req, 0, NULL, NULL);
- skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x);
+ skcipher_request_set_crypt(req, sg, sg, len, iv.x);
crypto_skcipher_decrypt(req);
skcipher_request_zero(req);
if (sg != _sg)
kfree(sg);
- /* remove the decrypted packet length */
- if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0)
- goto datalen_error;
- if (!skb_pull(skb, sizeof(sechdr)))
- BUG();
+ /* Extract the decrypted packet length */
+ if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) {
+ rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO);
+ goto protocol_error;
+ }
+ offset += sizeof(sechdr);
+ len -= sizeof(sechdr);
buf = ntohl(sechdr.data_size);
data_size = buf & 0xffff;
check = buf >> 16;
- check ^= sp->hdr.seq ^ sp->hdr.callNumber;
+ check ^= seq ^ call->call_id;
check &= 0xffff;
if (check != 0) {
- *_abort_code = RXKADSEALEDINCON;
+ rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO);
goto protocol_error;
}
- /* shorten the packet to remove the padding */
- if (data_size > skb->len)
- goto datalen_error;
- else if (data_size < skb->len)
- skb->len = data_size;
+ if (data_size > len) {
+ rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO);
+ goto protocol_error;
+ }
_leave(" = 0 [dlen=%x]", data_size);
return 0;
-datalen_error:
- *_abort_code = RXKADDATALEN;
protocol_error:
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
_leave(" = -EPROTO");
return -EPROTO;
@@ -475,40 +481,31 @@ nomem:
}
/*
- * verify the security on a received packet
+ * Verify the security on a received packet or subpacket (if part of a
+ * jumbo packet).
*/
-static int rxkad_verify_packet(struct rxrpc_call *call,
- struct sk_buff *skb,
- u32 *_abort_code)
+static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb,
+ unsigned int offset, unsigned int len,
+ rxrpc_seq_t seq, u16 expected_cksum)
{
SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher);
- struct rxrpc_skb_priv *sp;
struct rxrpc_crypt iv;
struct scatterlist sg;
u16 cksum;
u32 x, y;
- int ret;
-
- sp = rxrpc_skb(skb);
_enter("{%d{%x}},{#%u}",
- call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq);
+ call->debug_id, key_serial(call->conn->params.key), seq);
if (!call->conn->cipher)
return 0;
- if (sp->hdr.securityIndex != RXRPC_SECURITY_RXKAD) {
- *_abort_code = RXKADINCONSISTENCY;
- _leave(" = -EPROTO [not rxkad]");
- return -EPROTO;
- }
-
/* continue encrypting from where we left off */
memcpy(&iv, call->conn->csum_iv.x, sizeof(iv));
/* validate the security checksum */
- x = call->channel << (32 - RXRPC_CIDSHIFT);
- x |= sp->hdr.seq & 0x3fffffff;
+ x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT);
+ x |= seq & 0x3fffffff;
call->crypto_buf[0] = htonl(call->call_id);
call->crypto_buf[1] = htonl(x);
@@ -524,29 +521,69 @@ static int rxkad_verify_packet(struct rxrpc_call *call,
if (cksum == 0)
cksum = 1; /* zero checksums are not permitted */
- if (sp->hdr.cksum != cksum) {
- *_abort_code = RXKADSEALEDINCON;
+ if (cksum != expected_cksum) {
+ rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO);
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
_leave(" = -EPROTO [csum failed]");
return -EPROTO;
}
switch (call->conn->params.security_level) {
case RXRPC_SECURITY_PLAIN:
- ret = 0;
- break;
+ return 0;
case RXRPC_SECURITY_AUTH:
- ret = rxkad_verify_packet_auth(call, skb, _abort_code);
- break;
+ return rxkad_verify_packet_1(call, skb, offset, len, seq);
case RXRPC_SECURITY_ENCRYPT:
- ret = rxkad_verify_packet_encrypt(call, skb, _abort_code);
- break;
+ return rxkad_verify_packet_2(call, skb, offset, len, seq);
default:
- ret = -ENOANO;
- break;
+ return -ENOANO;
}
+}
- _leave(" = %d", ret);
- return ret;
+/*
+ * Locate the data contained in a packet that was partially encrypted.
+ */
+static void rxkad_locate_data_1(struct rxrpc_call *call, struct sk_buff *skb,
+ unsigned int *_offset, unsigned int *_len)
+{
+ struct rxkad_level1_hdr sechdr;
+
+ if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0)
+ BUG();
+ *_offset += sizeof(sechdr);
+ *_len = ntohl(sechdr.data_size) & 0xffff;
+}
+
+/*
+ * Locate the data contained in a packet that was completely encrypted.
+ */
+static void rxkad_locate_data_2(struct rxrpc_call *call, struct sk_buff *skb,
+ unsigned int *_offset, unsigned int *_len)
+{
+ struct rxkad_level2_hdr sechdr;
+
+ if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0)
+ BUG();
+ *_offset += sizeof(sechdr);
+ *_len = ntohl(sechdr.data_size) & 0xffff;
+}
+
+/*
+ * Locate the data contained in an already decrypted packet.
+ */
+static void rxkad_locate_data(struct rxrpc_call *call, struct sk_buff *skb,
+ unsigned int *_offset, unsigned int *_len)
+{
+ switch (call->conn->params.security_level) {
+ case RXRPC_SECURITY_AUTH:
+ rxkad_locate_data_1(call, skb, _offset, _len);
+ return;
+ case RXRPC_SECURITY_ENCRYPT:
+ rxkad_locate_data_2(call, skb, _offset, _len);
+ return;
+ default:
+ return;
+ }
}
/*
@@ -716,7 +753,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
struct rxkad_challenge challenge;
struct rxkad_response resp
__attribute__((aligned(8))); /* must be aligned for crypto */
- struct rxrpc_skb_priv *sp;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
u32 version, nonce, min_level, abort_code;
int ret;
@@ -734,8 +771,8 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
}
abort_code = RXKADPACKETSHORT;
- sp = rxrpc_skb(skb);
- if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0)
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &challenge, sizeof(challenge)) < 0)
goto protocol_error;
version = ntohl(challenge.version);
@@ -981,7 +1018,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
{
struct rxkad_response response
__attribute__((aligned(8))); /* must be aligned for crypto */
- struct rxrpc_skb_priv *sp;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt session_key;
time_t expiry;
void *ticket;
@@ -992,7 +1029,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key));
abort_code = RXKADPACKETSHORT;
- if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0)
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &response, sizeof(response)) < 0)
goto protocol_error;
if (!pskb_pull(skb, sizeof(response)))
BUG();
@@ -1000,7 +1038,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
version = ntohl(response.version);
ticket_len = ntohl(response.ticket_len);
kvno = ntohl(response.kvno);
- sp = rxrpc_skb(skb);
_proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }",
sp->hdr.serial, version, kvno, ticket_len);
@@ -1022,7 +1059,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
return -ENOMEM;
abort_code = RXKADPACKETSHORT;
- if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0)
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ ticket, ticket_len) < 0)
goto protocol_error_free;
ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key,
@@ -1147,6 +1185,7 @@ const struct rxrpc_security rxkad = {
.prime_packet_security = rxkad_prime_packet_security,
.secure_packet = rxkad_secure_packet,
.verify_packet = rxkad_verify_packet,
+ .locate_data = rxkad_locate_data,
.issue_challenge = rxkad_issue_challenge,
.respond_to_challenge = rxkad_respond_to_challenge,
.verify_response = rxkad_verify_response,
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index 814d285ff802..7d921e56e715 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -130,20 +130,20 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn)
}
/* find the service */
- read_lock_bh(&local->services_lock);
- list_for_each_entry(rx, &local->services, listen_link) {
- if (rx->srx.srx_service == conn->params.service_id)
- goto found_service;
- }
+ read_lock(&local->services_lock);
+ rx = rcu_dereference_protected(local->service,
+ lockdep_is_held(&local->services_lock));
+ if (rx && rx->srx.srx_service == conn->params.service_id)
+ goto found_service;
/* the service appears to have died */
- read_unlock_bh(&local->services_lock);
+ read_unlock(&local->services_lock);
_leave(" = -ENOENT");
return -ENOENT;
found_service:
if (!rx->securities) {
- read_unlock_bh(&local->services_lock);
+ read_unlock(&local->services_lock);
_leave(" = -ENOKEY");
return -ENOKEY;
}
@@ -152,13 +152,13 @@ found_service:
kref = keyring_search(make_key_ref(rx->securities, 1UL),
&key_type_rxrpc_s, kdesc);
if (IS_ERR(kref)) {
- read_unlock_bh(&local->services_lock);
+ read_unlock(&local->services_lock);
_leave(" = %ld [search]", PTR_ERR(kref));
return PTR_ERR(kref);
}
key = key_ref_to_ptr(kref);
- read_unlock_bh(&local->services_lock);
+ read_unlock(&local->services_lock);
conn->server_key = key;
conn->security = sec;
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
new file mode 100644
index 000000000000..3322543d460a
--- /dev/null
+++ b/net/rxrpc/sendmsg.c
@@ -0,0 +1,606 @@
+/* AF_RXRPC sendmsg() implementation.
+ *
+ * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/net.h>
+#include <linux/gfp.h>
+#include <linux/skbuff.h>
+#include <linux/export.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+enum rxrpc_command {
+ RXRPC_CMD_SEND_DATA, /* send data message */
+ RXRPC_CMD_SEND_ABORT, /* request abort generation */
+ RXRPC_CMD_ACCEPT, /* [server] accept incoming call */
+ RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */
+};
+
+/*
+ * wait for space to appear in the transmit/ACK window
+ * - caller holds the socket locked
+ */
+static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ long *timeo)
+{
+ DECLARE_WAITQUEUE(myself, current);
+ int ret;
+
+ _enter(",{%u,%u,%u}",
+ call->tx_hard_ack, call->tx_top, call->tx_winsize);
+
+ add_wait_queue(&call->waitq, &myself);
+
+ for (;;) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ ret = 0;
+ if (call->tx_top - call->tx_hard_ack <
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra))
+ break;
+ if (call->state >= RXRPC_CALL_COMPLETE) {
+ ret = -call->error;
+ break;
+ }
+ if (signal_pending(current)) {
+ ret = sock_intr_errno(*timeo);
+ break;
+ }
+
+ trace_rxrpc_transmit(call, rxrpc_transmit_wait);
+ release_sock(&rx->sk);
+ *timeo = schedule_timeout(*timeo);
+ lock_sock(&rx->sk);
+ }
+
+ remove_wait_queue(&call->waitq, &myself);
+ set_current_state(TASK_RUNNING);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
+ * Schedule an instant Tx resend.
+ */
+static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix)
+{
+ spin_lock_bh(&call->lock);
+
+ if (call->state < RXRPC_CALL_COMPLETE) {
+ call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS;
+ if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events))
+ rxrpc_queue_call(call);
+ }
+
+ spin_unlock_bh(&call->lock);
+}
+
+/*
+ * Queue a DATA packet for transmission, set the resend timeout and send the
+ * packet immediately
+ */
+static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
+ bool last)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ rxrpc_seq_t seq = sp->hdr.seq;
+ int ret, ix;
+ u8 annotation = RXRPC_TX_ANNO_UNACK;
+
+ _net("queue skb %p [%d]", skb, seq);
+
+ ASSERTCMP(seq, ==, call->tx_top + 1);
+
+ if (last)
+ annotation |= RXRPC_TX_ANNO_LAST;
+
+ /* We have to set the timestamp before queueing as the retransmit
+ * algorithm can see the packet as soon as we queue it.
+ */
+ skb->tstamp = ktime_get_real();
+
+ ix = seq & RXRPC_RXTX_BUFF_MASK;
+ rxrpc_get_skb(skb, rxrpc_skb_tx_got);
+ call->rxtx_annotations[ix] = annotation;
+ smp_wmb();
+ call->rxtx_buffer[ix] = skb;
+ call->tx_top = seq;
+ if (last)
+ trace_rxrpc_transmit(call, rxrpc_transmit_queue_last);
+ else
+ trace_rxrpc_transmit(call, rxrpc_transmit_queue);
+
+ if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
+ _debug("________awaiting reply/ACK__________");
+ write_lock_bh(&call->state_lock);
+ switch (call->state) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+ break;
+ case RXRPC_CALL_SERVER_ACK_REQUEST:
+ call->state = RXRPC_CALL_SERVER_SEND_REPLY;
+ if (!last)
+ break;
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
+ break;
+ default:
+ break;
+ }
+ write_unlock_bh(&call->state_lock);
+ }
+
+ if (seq == 1 && rxrpc_is_client_call(call))
+ rxrpc_expose_client_call(call);
+
+ ret = rxrpc_send_data_packet(call, skb, false);
+ if (ret < 0) {
+ _debug("need instant resend %d", ret);
+ rxrpc_instant_resend(call, ix);
+ } else {
+ ktime_t now = ktime_get_real(), resend_at;
+
+ resend_at = ktime_add_ms(now, rxrpc_resend_timeout);
+
+ if (ktime_before(resend_at, call->resend_at)) {
+ call->resend_at = resend_at;
+ rxrpc_set_timer(call, rxrpc_timer_set_for_send, now);
+ }
+ }
+
+ rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+ _leave("");
+}
+
+/*
+ * send data through a socket
+ * - must be called in process context
+ * - caller holds the socket locked
+ */
+static int rxrpc_send_data(struct rxrpc_sock *rx,
+ struct rxrpc_call *call,
+ struct msghdr *msg, size_t len)
+{
+ struct rxrpc_skb_priv *sp;
+ struct sk_buff *skb;
+ struct sock *sk = &rx->sk;
+ long timeo;
+ bool more;
+ int ret, copied;
+
+ timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
+ /* this should be in poll */
+ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
+
+ if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
+ return -EPIPE;
+
+ more = msg->msg_flags & MSG_MORE;
+
+ skb = call->tx_pending;
+ call->tx_pending = NULL;
+ rxrpc_see_skb(skb, rxrpc_skb_tx_seen);
+
+ copied = 0;
+ do {
+ /* Check to see if there's a ping ACK to reply to. */
+ if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE)
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK);
+
+ if (!skb) {
+ size_t size, chunk, max, space;
+
+ _debug("alloc");
+
+ if (call->tx_top - call->tx_hard_ack >=
+ min_t(unsigned int, call->tx_winsize,
+ call->cong_cwnd + call->cong_extra)) {
+ ret = -EAGAIN;
+ if (msg->msg_flags & MSG_DONTWAIT)
+ goto maybe_error;
+ ret = rxrpc_wait_for_tx_window(rx, call,
+ &timeo);
+ if (ret < 0)
+ goto maybe_error;
+ }
+
+ max = RXRPC_JUMBO_DATALEN;
+ max -= call->conn->security_size;
+ max &= ~(call->conn->size_align - 1UL);
+
+ chunk = max;
+ if (chunk > msg_data_left(msg) && !more)
+ chunk = msg_data_left(msg);
+
+ space = chunk + call->conn->size_align;
+ space &= ~(call->conn->size_align - 1UL);
+
+ size = space + call->conn->security_size;
+
+ _debug("SIZE: %zu/%zu/%zu", chunk, space, size);
+
+ /* create a buffer that we can retain until it's ACK'd */
+ skb = sock_alloc_send_skb(
+ sk, size, msg->msg_flags & MSG_DONTWAIT, &ret);
+ if (!skb)
+ goto maybe_error;
+
+ rxrpc_new_skb(skb, rxrpc_skb_tx_new);
+
+ _debug("ALLOC SEND %p", skb);
+
+ ASSERTCMP(skb->mark, ==, 0);
+
+ _debug("HS: %u", call->conn->security_size);
+ skb_reserve(skb, call->conn->security_size);
+ skb->len += call->conn->security_size;
+
+ sp = rxrpc_skb(skb);
+ sp->remain = chunk;
+ if (sp->remain > skb_tailroom(skb))
+ sp->remain = skb_tailroom(skb);
+
+ _net("skb: hr %d, tr %d, hl %d, rm %d",
+ skb_headroom(skb),
+ skb_tailroom(skb),
+ skb_headlen(skb),
+ sp->remain);
+
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ }
+
+ _debug("append");
+ sp = rxrpc_skb(skb);
+
+ /* append next segment of data to the current buffer */
+ if (msg_data_left(msg) > 0) {
+ int copy = skb_tailroom(skb);
+ ASSERTCMP(copy, >, 0);
+ if (copy > msg_data_left(msg))
+ copy = msg_data_left(msg);
+ if (copy > sp->remain)
+ copy = sp->remain;
+
+ _debug("add");
+ ret = skb_add_data(skb, &msg->msg_iter, copy);
+ _debug("added");
+ if (ret < 0)
+ goto efault;
+ sp->remain -= copy;
+ skb->mark += copy;
+ copied += copy;
+ }
+
+ /* check for the far side aborting the call or a network error
+ * occurring */
+ if (call->state == RXRPC_CALL_COMPLETE)
+ goto call_terminated;
+
+ /* add the packet to the send queue if it's now full */
+ if (sp->remain <= 0 ||
+ (msg_data_left(msg) == 0 && !more)) {
+ struct rxrpc_connection *conn = call->conn;
+ uint32_t seq;
+ size_t pad;
+
+ /* pad out if we're using security */
+ if (conn->security_ix) {
+ pad = conn->security_size + skb->mark;
+ pad = conn->size_align - pad;
+ pad &= conn->size_align - 1;
+ _debug("pad %zu", pad);
+ if (pad)
+ memset(skb_put(skb, pad), 0, pad);
+ }
+
+ seq = call->tx_top + 1;
+
+ sp->hdr.seq = seq;
+ sp->hdr._rsvd = 0;
+ sp->hdr.flags = conn->out_clientflag;
+
+ if (msg_data_left(msg) == 0 && !more)
+ sp->hdr.flags |= RXRPC_LAST_PACKET;
+ else if (call->tx_top - call->tx_hard_ack <
+ call->tx_winsize)
+ sp->hdr.flags |= RXRPC_MORE_PACKETS;
+
+ ret = conn->security->secure_packet(
+ call, skb, skb->mark, skb->head);
+ if (ret < 0)
+ goto out;
+
+ rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more);
+ skb = NULL;
+ }
+ } while (msg_data_left(msg) > 0);
+
+success:
+ ret = copied;
+out:
+ call->tx_pending = skb;
+ _leave(" = %d", ret);
+ return ret;
+
+call_terminated:
+ rxrpc_free_skb(skb, rxrpc_skb_tx_freed);
+ _leave(" = %d", -call->error);
+ return -call->error;
+
+maybe_error:
+ if (copied)
+ goto success;
+ goto out;
+
+efault:
+ ret = -EFAULT;
+ goto out;
+}
+
+/*
+ * extract control messages from the sendmsg() control buffer
+ */
+static int rxrpc_sendmsg_cmsg(struct msghdr *msg,
+ unsigned long *user_call_ID,
+ enum rxrpc_command *command,
+ u32 *abort_code,
+ bool *_exclusive)
+{
+ struct cmsghdr *cmsg;
+ bool got_user_ID = false;
+ int len;
+
+ *command = RXRPC_CMD_SEND_DATA;
+
+ if (msg->msg_controllen == 0)
+ return -EINVAL;
+
+ for_each_cmsghdr(cmsg, msg) {
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+
+ len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr));
+ _debug("CMSG %d, %d, %d",
+ cmsg->cmsg_level, cmsg->cmsg_type, len);
+
+ if (cmsg->cmsg_level != SOL_RXRPC)
+ continue;
+
+ switch (cmsg->cmsg_type) {
+ case RXRPC_USER_CALL_ID:
+ if (msg->msg_flags & MSG_CMSG_COMPAT) {
+ if (len != sizeof(u32))
+ return -EINVAL;
+ *user_call_ID = *(u32 *) CMSG_DATA(cmsg);
+ } else {
+ if (len != sizeof(unsigned long))
+ return -EINVAL;
+ *user_call_ID = *(unsigned long *)
+ CMSG_DATA(cmsg);
+ }
+ _debug("User Call ID %lx", *user_call_ID);
+ got_user_ID = true;
+ break;
+
+ case RXRPC_ABORT:
+ if (*command != RXRPC_CMD_SEND_DATA)
+ return -EINVAL;
+ *command = RXRPC_CMD_SEND_ABORT;
+ if (len != sizeof(*abort_code))
+ return -EINVAL;
+ *abort_code = *(unsigned int *) CMSG_DATA(cmsg);
+ _debug("Abort %x", *abort_code);
+ if (*abort_code == 0)
+ return -EINVAL;
+ break;
+
+ case RXRPC_ACCEPT:
+ if (*command != RXRPC_CMD_SEND_DATA)
+ return -EINVAL;
+ *command = RXRPC_CMD_ACCEPT;
+ if (len != 0)
+ return -EINVAL;
+ break;
+
+ case RXRPC_EXCLUSIVE_CALL:
+ *_exclusive = true;
+ if (len != 0)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
+ if (!got_user_ID)
+ return -EINVAL;
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * Create a new client call for sendmsg().
+ */
+static struct rxrpc_call *
+rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
+ unsigned long user_call_ID, bool exclusive)
+{
+ struct rxrpc_conn_parameters cp;
+ struct rxrpc_call *call;
+ struct key *key;
+
+ DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name);
+
+ _enter("");
+
+ if (!msg->msg_name)
+ return ERR_PTR(-EDESTADDRREQ);
+
+ key = rx->key;
+ if (key && !rx->key->payload.data[0])
+ key = NULL;
+
+ memset(&cp, 0, sizeof(cp));
+ cp.local = rx->local;
+ cp.key = rx->key;
+ cp.security_level = rx->min_sec_level;
+ cp.exclusive = rx->exclusive | exclusive;
+ cp.service_id = srx->srx_service;
+ call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL);
+
+ _leave(" = %p\n", call);
+ return call;
+}
+
+/*
+ * send a message forming part of a client call through an RxRPC socket
+ * - caller holds the socket locked
+ * - the socket may be either a client socket or a server socket
+ */
+int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
+{
+ enum rxrpc_command cmd;
+ struct rxrpc_call *call;
+ unsigned long user_call_ID = 0;
+ bool exclusive = false;
+ u32 abort_code = 0;
+ int ret;
+
+ _enter("");
+
+ ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code,
+ &exclusive);
+ if (ret < 0)
+ return ret;
+
+ if (cmd == RXRPC_CMD_ACCEPT) {
+ if (rx->sk.sk_state != RXRPC_SERVER_LISTENING)
+ return -EINVAL;
+ call = rxrpc_accept_call(rx, user_call_ID, NULL);
+ if (IS_ERR(call))
+ return PTR_ERR(call);
+ rxrpc_put_call(call, rxrpc_call_put);
+ return 0;
+ }
+
+ call = rxrpc_find_call_by_user_ID(rx, user_call_ID);
+ if (!call) {
+ if (cmd != RXRPC_CMD_SEND_DATA)
+ return -EBADSLT;
+ call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID,
+ exclusive);
+ if (IS_ERR(call))
+ return PTR_ERR(call);
+ }
+
+ _debug("CALL %d USR %lx ST %d on CONN %p",
+ call->debug_id, call->user_call_ID, call->state, call->conn);
+
+ if (call->state >= RXRPC_CALL_COMPLETE) {
+ /* it's too late for this call */
+ ret = -ESHUTDOWN;
+ } else if (cmd == RXRPC_CMD_SEND_ABORT) {
+ ret = 0;
+ if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED))
+ ret = rxrpc_send_call_packet(call,
+ RXRPC_PACKET_TYPE_ABORT);
+ } else if (cmd != RXRPC_CMD_SEND_DATA) {
+ ret = -EINVAL;
+ } else if (rxrpc_is_client_call(call) &&
+ call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) {
+ /* request phase complete for this client call */
+ ret = -EPROTO;
+ } else if (rxrpc_is_service_call(call) &&
+ call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ /* Reply phase not begun or not complete for service call. */
+ ret = -EPROTO;
+ } else {
+ ret = rxrpc_send_data(rx, call, msg, len);
+ }
+
+ rxrpc_put_call(call, rxrpc_call_put);
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/**
+ * rxrpc_kernel_send_data - Allow a kernel service to send data on a call
+ * @sock: The socket the call is on
+ * @call: The call to send data through
+ * @msg: The data to send
+ * @len: The amount of data to send
+ *
+ * Allow a kernel service to send data on a call. The call must be in an state
+ * appropriate to sending data. No control data should be supplied in @msg,
+ * nor should an address be supplied. MSG_MORE should be flagged if there's
+ * more data to come, otherwise this data will end the transmission phase.
+ */
+int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
+ struct msghdr *msg, size_t len)
+{
+ int ret;
+
+ _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+
+ ASSERTCMP(msg->msg_name, ==, NULL);
+ ASSERTCMP(msg->msg_control, ==, NULL);
+
+ lock_sock(sock->sk);
+
+ _debug("CALL %d USR %lx ST %d on CONN %p",
+ call->debug_id, call->user_call_ID, call->state, call->conn);
+
+ if (call->state >= RXRPC_CALL_COMPLETE) {
+ ret = -ESHUTDOWN; /* it's too late for this call */
+ } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_ACK_REQUEST &&
+ call->state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ ret = -EPROTO; /* request phase complete for this client call */
+ } else {
+ ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len);
+ }
+
+ release_sock(sock->sk);
+ _leave(" = %d", ret);
+ return ret;
+}
+EXPORT_SYMBOL(rxrpc_kernel_send_data);
+
+/**
+ * rxrpc_kernel_abort_call - Allow a kernel service to abort a call
+ * @sock: The socket the call is on
+ * @call: The call to be aborted
+ * @abort_code: The abort code to stick into the ABORT packet
+ * @error: Local error value
+ * @why: 3-char string indicating why.
+ *
+ * Allow a kernel service to abort a call, if it's still in an abortable state.
+ */
+void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
+ u32 abort_code, int error, const char *why)
+{
+ _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
+
+ lock_sock(sock->sk);
+
+ if (rxrpc_abort_call(why, call, 0, abort_code, error))
+ rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT);
+
+ release_sock(sock->sk);
+ _leave("");
+}
+
+EXPORT_SYMBOL(rxrpc_kernel_abort_call);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 06c51d4b622d..67b02c45271b 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -18,148 +18,82 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
+#define select_skb_count(op) (op >= rxrpc_skb_tx_cleaned ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs)
+
/*
- * set up for the ACK at the end of the receive phase when we discard the final
- * receive phase data packet
- * - called with softirqs disabled
+ * Note the allocation or reception of a socket buffer.
*/
-static void rxrpc_request_final_ACK(struct rxrpc_call *call)
+void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
{
- /* the call may be aborted before we have a chance to ACK it */
- write_lock(&call->state_lock);
-
- switch (call->state) {
- case RXRPC_CALL_CLIENT_RECV_REPLY:
- call->state = RXRPC_CALL_CLIENT_FINAL_ACK;
- _debug("request final ACK");
-
- /* get an extra ref on the call for the final-ACK generator to
- * release */
- rxrpc_get_call(call);
- set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events);
- if (try_to_del_timer_sync(&call->ack_timer) >= 0)
- rxrpc_queue_call(call);
- break;
-
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
- default:
- break;
- }
-
- write_unlock(&call->state_lock);
+ const void *here = __builtin_return_address(0);
+ int n = atomic_inc_return(select_skb_count(op));
+ trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
}
/*
- * drop the bottom ACK off of the call ACK window and advance the window
+ * Note the re-emergence of a socket buffer from a queue or buffer.
*/
-static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
- struct rxrpc_skb_priv *sp)
+void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
{
- int loop;
- u32 seq;
-
- spin_lock_bh(&call->lock);
-
- _debug("hard ACK #%u", sp->hdr.seq);
-
- for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) {
- call->ackr_window[loop] >>= 1;
- call->ackr_window[loop] |=
- call->ackr_window[loop + 1] << (BITS_PER_LONG - 1);
- }
-
- seq = sp->hdr.seq;
- ASSERTCMP(seq, ==, call->rx_data_eaten + 1);
- call->rx_data_eaten = seq;
-
- if (call->ackr_win_top < UINT_MAX)
- call->ackr_win_top++;
-
- ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
- call->rx_data_post, >=, call->rx_data_recv);
- ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE,
- call->rx_data_recv, >=, call->rx_data_eaten);
-
- if (sp->hdr.flags & RXRPC_LAST_PACKET) {
- rxrpc_request_final_ACK(call);
- } else if (atomic_dec_and_test(&call->ackr_not_idle) &&
- test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
- /* We previously soft-ACK'd some received packets that have now
- * been consumed, so send a hard-ACK if no more packets are
- * immediately forthcoming to allow the transmitter to free up
- * its Tx bufferage.
- */
- _debug("send Rx idle ACK");
- __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
- false);
+ const void *here = __builtin_return_address(0);
+ if (skb) {
+ int n = atomic_read(select_skb_count(op));
+ trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
}
-
- spin_unlock_bh(&call->lock);
}
-/**
- * rxrpc_kernel_data_consumed - Record consumption of data message
- * @call: The call to which the message pertains.
- * @skb: Message holding data
- *
- * Record the consumption of a data message and generate an ACK if appropriate.
- * The call state is shifted if this was the final packet. The caller must be
- * in process context with no spinlocks held.
- *
- * TODO: Actually generate the ACK here rather than punting this to the
- * workqueue.
+/*
+ * Note the addition of a ref on a socket buffer.
*/
-void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb)
+void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq);
-
- ASSERTCMP(sp->call, ==, call);
- ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA);
-
- /* TODO: Fix the sequence number tracking */
- ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv);
- ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1);
- ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten);
-
- call->rx_data_recv = sp->hdr.seq;
- rxrpc_hard_ACK_data(call, sp);
+ const void *here = __builtin_return_address(0);
+ int n = atomic_inc_return(select_skb_count(op));
+ trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+ skb_get(skb);
}
-EXPORT_SYMBOL(rxrpc_kernel_data_consumed);
/*
- * Destroy a packet that has an RxRPC control buffer
+ * Note the destruction of a socket buffer.
*/
-void rxrpc_packet_destructor(struct sk_buff *skb)
+void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- struct rxrpc_call *call = sp->call;
-
- _enter("%p{%p}", skb, call);
-
- if (call) {
- if (atomic_dec_return(&call->skb_count) < 0)
- BUG();
- rxrpc_put_call(call);
- sp->call = NULL;
+ const void *here = __builtin_return_address(0);
+ if (skb) {
+ int n;
+ CHECK_SLAB_OKAY(&skb->users);
+ n = atomic_dec_return(select_skb_count(op));
+ trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+ kfree_skb(skb);
}
+}
- if (skb->sk)
- sock_rfree(skb);
- _leave("");
+/*
+ * Note the injected loss of a socket buffer.
+ */
+void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op)
+{
+ const void *here = __builtin_return_address(0);
+ if (skb) {
+ int n;
+ CHECK_SLAB_OKAY(&skb->users);
+ n = atomic_dec_return(select_skb_count(op));
+ trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here);
+ kfree_skb(skb);
+ }
}
-/**
- * rxrpc_kernel_free_skb - Free an RxRPC socket buffer
- * @skb: The socket buffer to be freed
- *
- * Let RxRPC free its own socket buffer, permitting it to maintain debug
- * accounting.
+/*
+ * Clear a queue of socket buffers.
*/
-void rxrpc_kernel_free_skb(struct sk_buff *skb)
+void rxrpc_purge_queue(struct sk_buff_head *list)
{
- rxrpc_free_skb(skb);
+ const void *here = __builtin_return_address(0);
+ struct sk_buff *skb;
+ while ((skb = skb_dequeue((list))) != NULL) {
+ int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged));
+ trace_rxrpc_skb(skb, rxrpc_skb_rx_purged,
+ atomic_read(&skb->users), n, here);
+ kfree_skb(skb);
+ }
}
-EXPORT_SYMBOL(rxrpc_kernel_free_skb);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 03ad08774d4e..34c706d2f79c 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -20,7 +20,7 @@ static const unsigned int one = 1;
static const unsigned int four = 4;
static const unsigned int thirtytwo = 32;
static const unsigned int n_65535 = 65535;
-static const unsigned int n_max_acks = RXRPC_MAXACKS;
+static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1;
/*
* RxRPC operating parameters.
@@ -35,7 +35,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.data = &rxrpc_requested_ack_delay,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
+ .proc_handler = proc_dointvec,
.extra1 = (void *)&zero,
},
{
@@ -43,7 +43,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.data = &rxrpc_soft_ack_delay,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
+ .proc_handler = proc_dointvec,
.extra1 = (void *)&one,
},
{
@@ -51,7 +51,7 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.data = &rxrpc_idle_ack_delay,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec_ms_jiffies,
+ .proc_handler = proc_dointvec,
.extra1 = (void *)&one,
},
{
@@ -59,6 +59,22 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.data = &rxrpc_resend_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
+ .proc_handler = proc_dointvec,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "idle_conn_expiry",
+ .data = &rxrpc_conn_idle_client_expiry,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "idle_conn_fast_expiry",
+ .data = &rxrpc_conn_idle_client_fast_expiry,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
.proc_handler = proc_dointvec_ms_jiffies,
.extra1 = (void *)&one,
},
@@ -69,29 +85,28 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.data = &rxrpc_max_call_lifetime,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
+ .proc_handler = proc_dointvec,
.extra1 = (void *)&one,
},
+
+ /* Non-time values */
{
- .procname = "dead_call_expiry",
- .data = &rxrpc_dead_call_expiry,
+ .procname = "max_client_conns",
+ .data = &rxrpc_max_client_connections,
.maxlen = sizeof(unsigned int),
.mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- .extra1 = (void *)&one,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&rxrpc_reap_client_connections,
},
-
- /* Values measured in seconds */
{
- .procname = "connection_expiry",
- .data = &rxrpc_connection_expiry,
+ .procname = "reap_client_conns",
+ .data = &rxrpc_reap_client_connections,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = (void *)&one,
+ .extra2 = (void *)&rxrpc_max_client_connections,
},
-
- /* Non-time values */
{
.procname = "max_backlog",
.data = &rxrpc_max_backlog,
diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c
index b88914d53ca5..ff7af71c4b49 100644
--- a/net/rxrpc/utils.c
+++ b/net/rxrpc/utils.c
@@ -30,6 +30,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
return 0;
+#ifdef CONFIG_AF_RXRPC_IPV6
case ETH_P_IPV6:
srx->transport_type = SOCK_DGRAM;
srx->transport_len = sizeof(srx->transport.sin6);
@@ -37,6 +38,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb)
srx->transport.sin6.sin6_port = udp_hdr(skb)->source;
srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
return 0;
+#endif
default:
pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n",