From e32d262c89e2b22cb0640223f953b548617ed8a6 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 8 Oct 2024 13:04:52 +0200 Subject: mptcp: handle consistently DSS corruption Bugged peer implementation can send corrupted DSS options, consistently hitting a few warning in the data path. Use DEBUG_NET assertions, to avoid the splat on some builds and handle consistently the error, dumping related MIBs and performing fallback and/or reset according to the subflow type. Fixes: 6771bfd9ee24 ("mptcp: update mptcp ack sequence from work queue") Cc: stable@vger.kernel.org Signed-off-by: Paolo Abeni Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-1-c6fb8e93e551@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/mib.c | 2 ++ net/mptcp/mib.h | 2 ++ net/mptcp/protocol.c | 24 +++++++++++++++++++++--- net/mptcp/subflow.c | 4 +++- 4 files changed, 28 insertions(+), 4 deletions(-) (limited to 'net/mptcp') diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 38c2efc82b94..ad88bd3c58df 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -32,6 +32,8 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR), SNMP_MIB_ITEM("MPJoinSynTxConnectErr", MPTCP_MIB_JOINSYNTXCONNECTERR), SNMP_MIB_ITEM("DSSNotMatching", MPTCP_MIB_DSSNOMATCH), + SNMP_MIB_ITEM("DSSCorruptionFallback", MPTCP_MIB_DSSCORRUPTIONFALLBACK), + SNMP_MIB_ITEM("DSSCorruptionReset", MPTCP_MIB_DSSCORRUPTIONRESET), SNMP_MIB_ITEM("InfiniteMapTx", MPTCP_MIB_INFINITEMAPTX), SNMP_MIB_ITEM("InfiniteMapRx", MPTCP_MIB_INFINITEMAPRX), SNMP_MIB_ITEM("DSSNoMatchTCP", MPTCP_MIB_DSSTCPMISMATCH), diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index c8ffe18a8722..3206cdda8bb1 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -27,6 +27,8 @@ enum linux_mptcp_mib_field { MPTCP_MIB_JOINSYNTXBINDERR, /* Not able to bind() the address when sending a SYN + MP_JOIN */ MPTCP_MIB_JOINSYNTXCONNECTERR, /* Not able to connect() when sending a SYN + MP_JOIN */ MPTCP_MIB_DSSNOMATCH, /* Received a new mapping that did not match the previous one */ + MPTCP_MIB_DSSCORRUPTIONFALLBACK,/* DSS corruption detected, fallback */ + MPTCP_MIB_DSSCORRUPTIONRESET, /* DSS corruption detected, MPJ subflow reset */ MPTCP_MIB_INFINITEMAPTX, /* Sent an infinite mapping */ MPTCP_MIB_INFINITEMAPRX, /* Received an infinite mapping */ MPTCP_MIB_DSSTCPMISMATCH, /* DSS-mapping did not map with TCP's sequence numbers */ diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c2317919fc14..6d0e201c3eb2 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -620,6 +620,18 @@ static bool mptcp_check_data_fin(struct sock *sk) return ret; } +static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk) +{ + if (READ_ONCE(msk->allow_infinite_fallback)) { + MPTCP_INC_STATS(sock_net(ssk), + MPTCP_MIB_DSSCORRUPTIONFALLBACK); + mptcp_do_fallback(ssk); + } else { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET); + mptcp_subflow_reset(ssk); + } +} + static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, struct sock *ssk, unsigned int *bytes) @@ -692,10 +704,16 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, moved += len; seq += len; - if (WARN_ON_ONCE(map_remaining < len)) - break; + if (unlikely(map_remaining < len)) { + DEBUG_NET_WARN_ON_ONCE(1); + mptcp_dss_corruption(msk, ssk); + } } else { - WARN_ON_ONCE(!fin); + if (unlikely(!fin)) { + DEBUG_NET_WARN_ON_ONCE(1); + mptcp_dss_corruption(msk, ssk); + } + sk_eat_skb(ssk, skb); done = true; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 1040b3b9696b..e1046a696ab5 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -975,8 +975,10 @@ static bool skb_is_fully_mapped(struct sock *ssk, struct sk_buff *skb) unsigned int skb_consumed; skb_consumed = tcp_sk(ssk)->copied_seq - TCP_SKB_CB(skb)->seq; - if (WARN_ON_ONCE(skb_consumed >= skb->len)) + if (unlikely(skb_consumed >= skb->len)) { + DEBUG_NET_WARN_ON_ONCE(1); return true; + } return skb->len - skb_consumed <= subflow->map_data_len - mptcp_subflow_get_map_offset(subflow); -- cgit v1.2.3 From 119d51e225febc8152476340a880f5415a01e99e Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 8 Oct 2024 13:04:54 +0200 Subject: mptcp: fallback when MPTCP opts are dropped after 1st data As reported by Christoph [1], before this patch, an MPTCP connection was wrongly reset when a host received a first data packet with MPTCP options after the 3wHS, but got the next ones without. According to the MPTCP v1 specs [2], a fallback should happen in this case, because the host didn't receive a DATA_ACK from the other peer, nor receive data for more than the initial window which implies a DATA_ACK being received by the other peer. The patch here re-uses the same logic as the one used in other places: by looking at allow_infinite_fallback, which is disabled at the creation of an additional subflow. It's not looking at the first DATA_ACK (or implying one received from the other side) as suggested by the RFC, but it is in continuation with what was already done, which is safer, and it fixes the reported issue. The next step, looking at this first DATA_ACK, is tracked in [4]. This patch has been validated using the following Packetdrill script: 0 socket(..., SOCK_STREAM, IPPROTO_MPTCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 // 3WHS is OK +0.0 < S 0:0(0) win 65535 +0.0 > S. 0:0(0) ack 1 +0.1 < . 1:1(0) ack 1 win 2048 +0 accept(3, ..., ...) = 4 // Data from the client with valid MPTCP options (no DATA_ACK: normal) +0.1 < P. 1:501(500) ack 1 win 2048 // From here, the MPTCP options will be dropped by a middlebox +0.0 > . 1:1(0) ack 501 +0.1 read(4, ..., 500) = 500 +0 write(4, ..., 100) = 100 // The server replies with data, still thinking MPTCP is being used +0.0 > P. 1:101(100) ack 501 // But the client already did a fallback to TCP, because the two previous packets have been received without MPTCP options +0.1 < . 501:501(0) ack 101 win 2048 +0.0 < P. 501:601(100) ack 101 win 2048 // The server should fallback to TCP, not reset: it didn't get a DATA_ACK, nor data for more than the initial window +0.0 > . 101:101(0) ack 601 Note that this script requires Packetdrill with MPTCP support, see [3]. Fixes: dea2b1ea9c70 ("mptcp: do not reset MP_CAPABLE subflow on mapping errors") Cc: stable@vger.kernel.org Reported-by: Christoph Paasch Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/518 [1] Link: https://datatracker.ietf.org/doc/html/rfc8684#name-fallback [2] Link: https://github.com/multipath-tcp/packetdrill [3] Link: https://github.com/multipath-tcp/mptcp_net-next/issues/519 [4] Reviewed-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-3-c6fb8e93e551@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/subflow.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/mptcp') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index e1046a696ab5..25dde81bcb75 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -1282,7 +1282,7 @@ static bool subflow_can_fallback(struct mptcp_subflow_context *subflow) else if (READ_ONCE(msk->csum_enabled)) return !subflow->valid_csum_seen; else - return !subflow->fully_established; + return READ_ONCE(msk->allow_infinite_fallback); } static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) -- cgit v1.2.3 From db0a37b7ac27d8ca27d3dc676a16d081c16ec7b9 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Tue, 8 Oct 2024 13:04:55 +0200 Subject: mptcp: pm: do not remove closing subflows In a previous fix, the in-kernel path-manager has been modified not to retrigger the removal of a subflow if it was already closed, e.g. when the initial subflow is removed, but kept in the subflows list. To be complete, this fix should also skip the subflows that are in any closing state: mptcp_close_ssk() will initiate the closure, but the switch to the TCP_CLOSE state depends on the other peer. Fixes: 58e1b66b4e4b ("mptcp: pm: do not remove already closed subflows") Cc: stable@vger.kernel.org Suggested-by: Paolo Abeni Acked-by: Paolo Abeni Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20241008-net-mptcp-fallback-fixes-v1-4-c6fb8e93e551@kernel.org Signed-off-by: Jakub Kicinski --- net/mptcp/pm_netlink.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/mptcp') diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 64fe0e7d87d7..f6f0a38a0750 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -860,7 +860,8 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow_get_local_id(subflow); - if (inet_sk_state_load(ssk) == TCP_CLOSE) + if ((1 << inet_sk_state_load(ssk)) & + (TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2 | TCPF_CLOSING | TCPF_CLOSE)) continue; if (rm_type == MPTCP_MIB_RMADDR && remote_id != rm_id) continue; -- cgit v1.2.3