diff options
author | David S. Miller <davem@davemloft.net> | 2020-06-29 17:29:38 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2020-06-29 17:29:38 -0700 |
commit | 13f1555c1b4c49b8ab9dd80b7353deec0526ccac (patch) | |
tree | d718dfc00e260081387271c1911143a6e4dda717 /net | |
parent | e11703330a5df48e1fd4167e4d22a102e517253e (diff) | |
parent | 8a05661b2b266b6dc45af255b3037b00ef31d85d (diff) | |
download | linux-13f1555c1b4c49b8ab9dd80b7353deec0526ccac.tar.gz linux-13f1555c1b4c49b8ab9dd80b7353deec0526ccac.tar.bz2 linux-13f1555c1b4c49b8ab9dd80b7353deec0526ccac.zip |
Merge branch 'MPTCP-improve-fallback-to-TCP'
Davide Caratti says:
====================
MPTCP: improve fallback to TCP
there are situations where MPTCP sockets should fall-back to regular TCP:
this series reworks the fallback code to pursue the following goals:
1) cleanup the non fallback code, removing most of 'if (<fallback>)' in
the data path
2) improve performance for non-fallback sockets, avoiding locks in poll()
further work will also leverage on this changes to achieve:
a) more consistent behavior of gestockopt()/setsockopt() on passive sockets
after fallback
b) support for "infinite maps" as per RFC8684, section 3.7
the series is made of the following items:
- patch 1 lets sendmsg() / recvmsg() / poll() use the main socket also
after fallback
- patch 2 fixes 'simultaneous connect' scenario after fallback. The
problem was present also before the rework, but the fix is much easier
to implement after patch 1
- patch 3, 4, 5 are clean-ups for code that is no more needed after the
fallback rework
- patch 6 fixes a race condition between close() and poll(). The problem
was theoretically present before the rework, but it became almost
systematic after patch 1
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/mptcp/options.c | 9 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 267 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 43 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 57 |
4 files changed, 175 insertions, 201 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index df9a51425c6f..b96d3660562f 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -624,6 +624,9 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, opts->suboptions = 0; + if (unlikely(mptcp_check_fallback(sk))) + return false; + if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts)) ret = true; else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining, @@ -714,7 +717,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, */ if (!mp_opt->mp_capable) { subflow->mp_capable = 0; - tcp_sk(sk)->is_mptcp = 0; + pr_fallback(msk); + __mptcp_do_fallback(msk); return false; } @@ -814,6 +818,9 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, struct mptcp_options_received mp_opt; struct mptcp_ext *mpext; + if (__mptcp_check_fallback(msk)) + return; + mptcp_get_options(skb, &mp_opt); if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index be09fd525f8f..28ec26d97f96 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -52,18 +52,10 @@ static struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk) return msk->subflow; } -static bool __mptcp_needs_tcp_fallback(const struct mptcp_sock *msk) -{ - return msk->first && !sk_is_mptcp(msk->first); -} - -static struct socket *mptcp_is_tcpsk(struct sock *sk) +static bool mptcp_is_tcpsk(struct sock *sk) { struct socket *sock = sk->sk_socket; - if (sock->sk != sk) - return NULL; - if (unlikely(sk->sk_prot == &tcp_prot)) { /* we are being invoked after mptcp_accept() has * accepted a non-mp-capable flow: sk is a tcp_sk, @@ -73,59 +65,37 @@ static struct socket *mptcp_is_tcpsk(struct sock *sk) * bypass mptcp. */ sock->ops = &inet_stream_ops; - return sock; + return true; #if IS_ENABLED(CONFIG_MPTCP_IPV6) } else if (unlikely(sk->sk_prot == &tcpv6_prot)) { sock->ops = &inet6_stream_ops; - return sock; + return true; #endif } - return NULL; + return false; } -static struct socket *__mptcp_tcp_fallback(struct mptcp_sock *msk) +static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk) { - struct socket *sock; - sock_owned_by_me((const struct sock *)msk); - sock = mptcp_is_tcpsk((struct sock *)msk); - if (unlikely(sock)) - return sock; - - if (likely(!__mptcp_needs_tcp_fallback(msk))) + if (likely(!__mptcp_check_fallback(msk))) return NULL; - return msk->subflow; -} - -static bool __mptcp_can_create_subflow(const struct mptcp_sock *msk) -{ - return !msk->first; + return msk->first; } -static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state) +static int __mptcp_socket_create(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; struct socket *ssock; int err; - ssock = __mptcp_tcp_fallback(msk); - if (unlikely(ssock)) - return ssock; - - ssock = __mptcp_nmpc_socket(msk); - if (ssock) - goto set_state; - - if (!__mptcp_can_create_subflow(msk)) - return ERR_PTR(-EINVAL); - err = mptcp_subflow_create_socket(sk, &ssock); if (err) - return ERR_PTR(err); + return err; msk->first = ssock->sk; msk->subflow = ssock; @@ -133,10 +103,12 @@ static struct socket *__mptcp_socket_create(struct mptcp_sock *msk, int state) list_add(&subflow->node, &msk->conn_list); subflow->request_mptcp = 1; -set_state: - if (state != MPTCP_SAME_STATE) - inet_sk_state_store(sk, state); - return ssock; + /* accept() will wait on first subflow sk_wq, and we always wakes up + * via msk->sk_socket + */ + RCU_INIT_POINTER(msk->first->sk_wq, &sk->sk_socket->wq); + + return 0; } static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk, @@ -229,6 +201,15 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, if (!skb) break; + if (__mptcp_check_fallback(msk)) { + /* if we are running under the workqueue, TCP could have + * collapsed skbs between dummy map creation and now + * be sure to adjust the size + */ + map_remaining = skb->len; + subflow->map_data_len = skb->len; + } + offset = seq - TCP_SKB_CB(skb)->seq; fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (fin) { @@ -466,8 +447,15 @@ static void mptcp_clean_una(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dtmp, *dfrag; - u64 snd_una = atomic64_read(&msk->snd_una); bool cleaned = false; + u64 snd_una; + + /* on fallback we just need to ignore snd_una, as this is really + * plain TCP + */ + if (__mptcp_check_fallback(msk)) + atomic64_set(&msk->snd_una, msk->write_seq); + snd_una = atomic64_read(&msk->snd_una); list_for_each_entry_safe(dfrag, dtmp, &msk->rtx_queue, list) { if (after64(dfrag->data_seq + dfrag->data_len, snd_una)) @@ -740,7 +728,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) int mss_now = 0, size_goal = 0, ret = 0; struct mptcp_sock *msk = mptcp_sk(sk); struct page_frag *pfrag; - struct socket *ssock; size_t copied = 0; struct sock *ssk; bool tx_ok; @@ -759,15 +746,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) goto out; } -fallback: - ssock = __mptcp_tcp_fallback(msk); - if (unlikely(ssock)) { - release_sock(sk); - pr_debug("fallback passthrough"); - ret = sock_sendmsg(ssock, msg); - return ret >= 0 ? ret + copied : (copied ? copied : ret); - } - pfrag = sk_page_frag(sk); restart: mptcp_clean_una(sk); @@ -819,17 +797,6 @@ wait_for_sndbuf: } break; } - if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) { - /* Can happen for passive sockets: - * 3WHS negotiated MPTCP, but first packet after is - * plain TCP (e.g. due to middlebox filtering unknown - * options). - * - * Fall back to TCP. - */ - release_sock(ssk); - goto fallback; - } copied += ret; @@ -972,7 +939,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int flags, int *addr_len) { struct mptcp_sock *msk = mptcp_sk(sk); - struct socket *ssock; int copied = 0; int target; long timeo; @@ -981,16 +947,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return -EOPNOTSUPP; lock_sock(sk); - ssock = __mptcp_tcp_fallback(msk); - if (unlikely(ssock)) { -fallback: - release_sock(sk); - pr_debug("fallback-read subflow=%p", - mptcp_subflow_ctx(ssock->sk)); - copied = sock_recvmsg(ssock, msg, flags); - return copied; - } - timeo = sock_rcvtimeo(sk, nonblock); len = min_t(size_t, len, INT_MAX); @@ -1056,9 +1012,6 @@ fallback: pr_debug("block timeout %ld", timeo); mptcp_wait_data(sk, &timeo); - ssock = __mptcp_tcp_fallback(msk); - if (unlikely(ssock)) - goto fallback; } if (skb_queue_empty(&sk->sk_receive_queue)) { @@ -1283,6 +1236,10 @@ static int mptcp_init_sock(struct sock *sk) if (ret) return ret; + ret = __mptcp_socket_create(mptcp_sk(sk)); + if (ret) + return ret; + sk_sockets_allocated_inc(sk); sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[2]; @@ -1335,8 +1292,6 @@ static void mptcp_subflow_shutdown(struct sock *ssk, int how, break; } - /* Wake up anyone sleeping in poll. */ - ssk->sk_state_change(ssk); release_sock(ssk); } @@ -1487,7 +1442,6 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, return NULL; pr_debug("msk=%p, subflow is mptcp=%d", msk, sk_is_mptcp(newsk)); - if (sk_is_mptcp(newsk)) { struct mptcp_subflow_context *subflow; struct sock *new_mptcp_sock; @@ -1544,7 +1498,7 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { struct mptcp_sock *msk = mptcp_sk(sk); - struct socket *ssock; + struct sock *ssk; pr_debug("msk=%p", msk); @@ -1555,11 +1509,10 @@ static int mptcp_setsockopt(struct sock *sk, int level, int optname, * to the one remaining subflow. */ lock_sock(sk); - ssock = __mptcp_tcp_fallback(msk); + ssk = __mptcp_tcp_fallback(msk); release_sock(sk); - if (ssock) - return tcp_setsockopt(ssock->sk, level, optname, optval, - optlen); + if (ssk) + return tcp_setsockopt(ssk, level, optname, optval, optlen); return -EOPNOTSUPP; } @@ -1568,7 +1521,7 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *option) { struct mptcp_sock *msk = mptcp_sk(sk); - struct socket *ssock; + struct sock *ssk; pr_debug("msk=%p", msk); @@ -1579,11 +1532,10 @@ static int mptcp_getsockopt(struct sock *sk, int level, int optname, * to the one remaining subflow. */ lock_sock(sk); - ssock = __mptcp_tcp_fallback(msk); + ssk = __mptcp_tcp_fallback(msk); release_sock(sk); - if (ssock) - return tcp_getsockopt(ssock->sk, level, optname, optval, - option); + if (ssk) + return tcp_getsockopt(ssk, level, optname, optval, option); return -EOPNOTSUPP; } @@ -1660,12 +1612,6 @@ void mptcp_finish_connect(struct sock *ssk) sk = subflow->conn; msk = mptcp_sk(sk); - if (!subflow->mp_capable) { - MPTCP_INC_STATS(sock_net(sk), - MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); - return; - } - pr_debug("msk=%p, token=%u", sk, subflow->token); mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq); @@ -1781,9 +1727,9 @@ static int mptcp_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) int err; lock_sock(sock->sk); - ssock = __mptcp_socket_create(msk, MPTCP_SAME_STATE); - if (IS_ERR(ssock)) { - err = PTR_ERR(ssock); + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + err = -EINVAL; goto unlock; } @@ -1813,13 +1759,14 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, goto do_connect; } - mptcp_token_destroy(msk); - ssock = __mptcp_socket_create(msk, TCP_SYN_SENT); - if (IS_ERR(ssock)) { - err = PTR_ERR(ssock); + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + err = -EINVAL; goto unlock; } + mptcp_token_destroy(msk); + inet_sk_state_store(sock->sk, TCP_SYN_SENT); subflow = mptcp_subflow_ctx(ssock->sk); #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -1848,42 +1795,6 @@ unlock: return err; } -static int mptcp_v4_getname(struct socket *sock, struct sockaddr *uaddr, - int peer) -{ - if (sock->sk->sk_prot == &tcp_prot) { - /* we are being invoked from __sys_accept4, after - * mptcp_accept() has just accepted a non-mp-capable - * flow: sk is a tcp_sk, not an mptcp one. - * - * Hand the socket over to tcp so all further socket ops - * bypass mptcp. - */ - sock->ops = &inet_stream_ops; - } - - return inet_getname(sock, uaddr, peer); -} - -#if IS_ENABLED(CONFIG_MPTCP_IPV6) -static int mptcp_v6_getname(struct socket *sock, struct sockaddr *uaddr, - int peer) -{ - if (sock->sk->sk_prot == &tcpv6_prot) { - /* we are being invoked from __sys_accept4 after - * mptcp_accept() has accepted a non-mp-capable - * subflow: sk is a tcp_sk, not mptcp. - * - * Hand the socket over to tcp so all further - * socket ops bypass mptcp. - */ - sock->ops = &inet6_stream_ops; - } - - return inet6_getname(sock, uaddr, peer); -} -#endif - static int mptcp_listen(struct socket *sock, int backlog) { struct mptcp_sock *msk = mptcp_sk(sock->sk); @@ -1893,13 +1804,14 @@ static int mptcp_listen(struct socket *sock, int backlog) pr_debug("msk=%p", msk); lock_sock(sock->sk); - mptcp_token_destroy(msk); - ssock = __mptcp_socket_create(msk, TCP_LISTEN); - if (IS_ERR(ssock)) { - err = PTR_ERR(ssock); + ssock = __mptcp_nmpc_socket(msk); + if (!ssock) { + err = -EINVAL; goto unlock; } + mptcp_token_destroy(msk); + inet_sk_state_store(sock->sk, TCP_LISTEN); sock_set_flag(sock->sk, SOCK_RCU_FREE); err = ssock->ops->listen(ssock, backlog); @@ -1912,15 +1824,6 @@ unlock: return err; } -static bool is_tcp_proto(const struct proto *p) -{ -#if IS_ENABLED(CONFIG_MPTCP_IPV6) - return p == &tcp_prot || p == &tcpv6_prot; -#else - return p == &tcp_prot; -#endif -} - static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { @@ -1938,11 +1841,12 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, if (!ssock) goto unlock_fail; + clear_bit(MPTCP_DATA_READY, &msk->flags); sock_hold(ssock->sk); release_sock(sock->sk); err = ssock->ops->accept(sock, newsock, flags, kern); - if (err == 0 && !is_tcp_proto(newsock->sk->sk_prot)) { + if (err == 0 && !mptcp_is_tcpsk(newsock->sk)) { struct mptcp_sock *msk = mptcp_sk(newsock->sk); struct mptcp_subflow_context *subflow; @@ -1958,6 +1862,8 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, } } + if (inet_csk_listen_poll(ssock->sk)) + set_bit(MPTCP_DATA_READY, &msk->flags); sock_put(ssock->sk); return err; @@ -1966,39 +1872,36 @@ unlock_fail: return -EINVAL; } +static __poll_t mptcp_check_readable(struct mptcp_sock *msk) +{ + return test_bit(MPTCP_DATA_READY, &msk->flags) ? EPOLLIN | EPOLLRDNORM : + 0; +} + static __poll_t mptcp_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { struct sock *sk = sock->sk; struct mptcp_sock *msk; - struct socket *ssock; __poll_t mask = 0; + int state; msk = mptcp_sk(sk); - lock_sock(sk); - ssock = __mptcp_tcp_fallback(msk); - if (!ssock) - ssock = __mptcp_nmpc_socket(msk); - if (ssock) { - mask = ssock->ops->poll(file, ssock, wait); - release_sock(sk); - return mask; - } - - release_sock(sk); sock_poll_wait(file, sock, wait); - lock_sock(sk); - if (test_bit(MPTCP_DATA_READY, &msk->flags)) - mask = EPOLLIN | EPOLLRDNORM; - if (sk_stream_is_writeable(sk) && - test_bit(MPTCP_SEND_SPACE, &msk->flags)) - mask |= EPOLLOUT | EPOLLWRNORM; + state = inet_sk_state_load(sk); + if (state == TCP_LISTEN) + return mptcp_check_readable(msk); + + if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) { + mask |= mptcp_check_readable(msk); + if (sk_stream_is_writeable(sk) && + test_bit(MPTCP_SEND_SPACE, &msk->flags)) + mask |= EPOLLOUT | EPOLLWRNORM; + } if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; - release_sock(sk); - return mask; } @@ -2006,18 +1909,11 @@ static int mptcp_shutdown(struct socket *sock, int how) { struct mptcp_sock *msk = mptcp_sk(sock->sk); struct mptcp_subflow_context *subflow; - struct socket *ssock; int ret = 0; pr_debug("sk=%p, how=%d", msk, how); lock_sock(sock->sk); - ssock = __mptcp_tcp_fallback(msk); - if (ssock) { - release_sock(sock->sk); - return inet_shutdown(ssock, how); - } - if (how == SHUT_WR || how == SHUT_RDWR) inet_sk_state_store(sock->sk, TCP_FIN_WAIT1); @@ -2043,6 +1939,9 @@ static int mptcp_shutdown(struct socket *sock, int how) mptcp_subflow_shutdown(tcp_sk, how, 1, msk->write_seq); } + /* Wake up anyone sleeping in poll. */ + sock->sk->sk_state_change(sock->sk); + out_unlock: release_sock(sock->sk); @@ -2057,7 +1956,7 @@ static const struct proto_ops mptcp_stream_ops = { .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, .accept = mptcp_stream_accept, - .getname = mptcp_v4_getname, + .getname = inet_getname, .poll = mptcp_poll, .ioctl = inet_ioctl, .gettstamp = sock_gettstamp, @@ -2111,7 +2010,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { .connect = mptcp_stream_connect, .socketpair = sock_no_socketpair, .accept = mptcp_stream_accept, - .getname = mptcp_v6_getname, + .getname = inet6_getname, .poll = mptcp_poll, .ioctl = inet6_ioctl, .gettstamp = sock_gettstamp, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index c05552e5fa23..1d05d9841b5c 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -89,6 +89,7 @@ #define MPTCP_SEND_SPACE 1 #define MPTCP_WORK_RTX 2 #define MPTCP_WORK_EOF 3 +#define MPTCP_FALLBACK_DONE 4 struct mptcp_options_received { u64 sndr_key; @@ -457,4 +458,46 @@ static inline bool before64(__u64 seq1, __u64 seq2) void mptcp_diag_subflow_init(struct tcp_ulp_ops *ops); +static inline bool __mptcp_check_fallback(struct mptcp_sock *msk) +{ + return test_bit(MPTCP_FALLBACK_DONE, &msk->flags); +} + +static inline bool mptcp_check_fallback(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + + return __mptcp_check_fallback(msk); +} + +static inline void __mptcp_do_fallback(struct mptcp_sock *msk) +{ + if (test_bit(MPTCP_FALLBACK_DONE, &msk->flags)) { + pr_debug("TCP fallback already done (msk=%p)", msk); + return; + } + set_bit(MPTCP_FALLBACK_DONE, &msk->flags); +} + +static inline void mptcp_do_fallback(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_sock *msk = mptcp_sk(subflow->conn); + + __mptcp_do_fallback(msk); +} + +#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a) + +static inline bool subflow_simultaneous_connect(struct sock *sk) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct sock *parent = subflow->conn; + + return sk->sk_state == TCP_ESTABLISHED && + !mptcp_sk(parent)->pm.server_side && + !subflow->conn_finished; +} + #endif /* __MPTCP_PROTOCOL_H */ diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 102db8c88e97..548f9e347ff5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -216,7 +216,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_options_received mp_opt; struct sock *parent = subflow->conn; - struct tcp_sock *tp = tcp_sk(sk); subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); @@ -230,6 +229,8 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) return; subflow->conn_finished = 1; + subflow->ssn_offset = TCP_SKB_CB(skb)->seq; + pr_debug("subflow=%p synack seq=%x", subflow, subflow->ssn_offset); mptcp_get_options(skb, &mp_opt); if (subflow->request_mptcp && mp_opt.mp_capable) { @@ -245,21 +246,20 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, subflow->thmac, subflow->remote_nonce); } else { - tp->is_mptcp = 0; + if (subflow->request_mptcp) + MPTCP_INC_STATS(sock_net(sk), + MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); + mptcp_do_fallback(sk); + pr_fallback(mptcp_sk(subflow->conn)); } - if (!tp->is_mptcp) + if (mptcp_check_fallback(sk)) return; if (subflow->mp_capable) { pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), subflow->remote_key); mptcp_finish_connect(sk); - - if (skb) { - pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq); - subflow->ssn_offset = TCP_SKB_CB(skb)->seq; - } } else if (subflow->mp_join) { u8 hmac[SHA256_DIGEST_SIZE]; @@ -279,9 +279,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); - if (skb) - subflow->ssn_offset = TCP_SKB_CB(skb)->seq; - if (!mptcp_finish_join(sk)) goto do_reset; @@ -557,7 +554,8 @@ enum mapping_status { MAPPING_OK, MAPPING_INVALID, MAPPING_EMPTY, - MAPPING_DATA_FIN + MAPPING_DATA_FIN, + MAPPING_DUMMY }; static u64 expand_seq(u64 old_seq, u16 old_data_len, u64 seq) @@ -621,6 +619,9 @@ static enum mapping_status get_mapping_status(struct sock *ssk) if (!skb) return MAPPING_EMPTY; + if (mptcp_check_fallback(ssk)) + return MAPPING_DUMMY; + mpext = mptcp_get_ext(skb); if (!mpext || !mpext->use_map) { if (!subflow->map_valid && !skb->len) { @@ -762,6 +763,16 @@ static bool subflow_check_data_avail(struct sock *ssk) ssk->sk_err = EBADMSG; goto fatal; } + if (status == MAPPING_DUMMY) { + __mptcp_do_fallback(msk); + skb = skb_peek(&ssk->sk_receive_queue); + subflow->map_valid = 1; + subflow->map_seq = READ_ONCE(msk->ack_seq); + subflow->map_data_len = skb->len; + subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq - + subflow->ssn_offset; + return true; + } if (status != MAPPING_OK) return false; @@ -885,14 +896,18 @@ static void subflow_data_ready(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; + struct mptcp_sock *msk; - if (!subflow->mp_capable && !subflow->mp_join) { - subflow->tcp_data_ready(sk); - + msk = mptcp_sk(parent); + if (inet_sk_state_load(sk) == TCP_LISTEN) { + set_bit(MPTCP_DATA_READY, &msk->flags); parent->sk_data_ready(parent); return; } + WARN_ON_ONCE(!__mptcp_check_fallback(msk) && !subflow->mp_capable && + !subflow->mp_join); + if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); } @@ -1113,11 +1128,21 @@ static void subflow_state_change(struct sock *sk) __subflow_state_change(sk); + if (subflow_simultaneous_connect(sk)) { + mptcp_do_fallback(sk); + pr_fallback(mptcp_sk(parent)); + subflow->conn_finished = 1; + if (inet_sk_state_load(parent) == TCP_SYN_SENT) { + inet_sk_state_store(parent, TCP_ESTABLISHED); + parent->sk_state_change(parent); + } + } + /* as recvmsg() does not acquire the subflow socket for ssk selection * a fin packet carrying a DSS can be unnoticed if we don't trigger * the data available machinery here. */ - if (subflow->mp_capable && mptcp_subflow_data_available(sk)) + if (mptcp_subflow_data_available(sk)) mptcp_data_ready(parent, sk); if (!(parent->sk_shutdown & RCV_SHUTDOWN) && |