From 9efdda4e3abed13f0903b7b6e4d4c2102019440a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 24 Nov 2018 09:12:24 -0800 Subject: tcp: address problems caused by EDT misshaps When a qdisc setup including pacing FQ is dismantled and recreated, some TCP packets are sent earlier than instructed by TCP stack. TCP can be fooled when ACK comes back, because the following operation can return a negative value. tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; Some paths in TCP stack were not dealing properly with this, this patch addresses four of them. Fixes: ab408b6dc744 ("tcp: switch tcp and sch_fq to new earliest departure time model") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_input.c | 16 ++++++++++------ net/ipv4/tcp_timer.c | 10 ++++++---- 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1e37c1388189..a9d9555a973f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -579,10 +579,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; u32 delta_us; - if (!delta) - delta = 1; - delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); - tcp_rcv_rtt_update(tp, delta_us, 0); + if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { + if (!delta) + delta = 1; + delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + tcp_rcv_rtt_update(tp, delta_us, 0); + } } } @@ -2910,9 +2912,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag, if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr && flag & FLAG_ACKED) { u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr; - u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ); - seq_rtt_us = ca_rtt_us = delta_us; + if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) { + seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ); + ca_rtt_us = seq_rtt_us; + } } rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */ if (seq_rtt_us < 0) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 5f8b6d3cd855..091c53925e4d 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); u32 elapsed, start_ts; + s32 remaining; start_ts = tcp_retransmit_stamp(sk); if (!icsk->icsk_user_timeout || !start_ts) return icsk->icsk_rto; elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts; - if (elapsed >= icsk->icsk_user_timeout) + remaining = icsk->icsk_user_timeout - elapsed; + if (remaining <= 0) return 1; /* user timeout has passed; fire ASAP */ - else - return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed)); + + return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining)); } /** @@ -209,7 +211,7 @@ static bool retransmits_timed_out(struct sock *sk, (boundary - linear_backoff_thresh) * TCP_RTO_MAX; timeout = jiffies_to_msecs(timeout); } - return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout; + return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0; } /* A write timeout has occurred. Process the after effects. */ -- cgit v1.2.3 From aba36930a35e7f1fe1319b203f25c05d6c119936 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sat, 24 Nov 2018 14:21:16 -0500 Subject: net: always initialize pagedlen In ip packet generation, pagedlen is initialized for each skb at the start of the loop in __ip(6)_append_data, before label alloc_new_skb. Depending on compiler options, code can be generated that jumps to this label, triggering use of an an uninitialized variable. In practice, at -O2, the generated code moves the initialization below the label. But the code should not rely on that for correctness. Fixes: 15e36f5b8e98 ("udp: paged allocation with gso") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/ip_output.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index c09219e7f230..5dbec21856f4 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -939,7 +939,7 @@ static int __ip_append_data(struct sock *sk, unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; - unsigned int pagedlen = 0; + unsigned int pagedlen; struct sk_buff *skb_prev; alloc_new_skb: skb_prev = skb; @@ -956,6 +956,7 @@ alloc_new_skb: if (datalen > mtu - fragheaderlen) datalen = maxfraglen - fragheaderlen; fraglen = datalen + fragheaderlen; + pagedlen = 0; if ((flags & MSG_MORE) && !(rt->dst.dev->features&NETIF_F_SG)) -- cgit v1.2.3 From 584eab291c67894cb17cc87544b9d086228ea70f Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 22 Nov 2018 19:59:46 +0900 Subject: netfilter: add missing error handling code for register functions register_{netdevice/inetaddr/inet6addr}_notifier may return an error value, this patch adds the code to handle these error paths. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_MASQUERADE.c | 7 +++++-- net/ipv4/netfilter/nf_nat_masquerade_ipv4.c | 21 +++++++++++++++++---- net/ipv4/netfilter/nft_masq_ipv4.c | 4 +++- 3 files changed, 25 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index ce1512b02cb2..fd3f9e8a74da 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -81,9 +81,12 @@ static int __init masquerade_tg_init(void) int ret; ret = xt_register_target(&masquerade_tg_reg); + if (ret) + return ret; - if (ret == 0) - nf_nat_masquerade_ipv4_register_notifier(); + ret = nf_nat_masquerade_ipv4_register_notifier(); + if (ret) + xt_unregister_target(&masquerade_tg_reg); return ret; } diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index a9d5e013e555..c7d7fa4fc369 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -149,16 +149,29 @@ static struct notifier_block masq_inet_notifier = { static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0); -void nf_nat_masquerade_ipv4_register_notifier(void) +int nf_nat_masquerade_ipv4_register_notifier(void) { + int ret; + /* check if the notifier was already set */ if (atomic_inc_return(&masquerade_notifier_refcount) > 1) - return; + return 0; /* Register for device down reports */ - register_netdevice_notifier(&masq_dev_notifier); + ret = register_netdevice_notifier(&masq_dev_notifier); + if (ret) + goto err_dec; /* Register IP address change reports */ - register_inetaddr_notifier(&masq_inet_notifier); + ret = register_inetaddr_notifier(&masq_inet_notifier); + if (ret) + goto err_unregister; + + return ret; +err_unregister: + unregister_netdevice_notifier(&masq_dev_notifier); +err_dec: + atomic_dec(&masquerade_notifier_refcount); + return ret; } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier); diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c index f1193e1e928a..6847de1d1db8 100644 --- a/net/ipv4/netfilter/nft_masq_ipv4.c +++ b/net/ipv4/netfilter/nft_masq_ipv4.c @@ -69,7 +69,9 @@ static int __init nft_masq_ipv4_module_init(void) if (ret < 0) return ret; - nf_nat_masquerade_ipv4_register_notifier(); + ret = nf_nat_masquerade_ipv4_register_notifier(); + if (ret) + nft_unregister_expr(&nft_masq_ipv4_type); return ret; } -- cgit v1.2.3 From 095faf45e64be00bff4da2d6182dface3d69c9b7 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Thu, 22 Nov 2018 19:59:57 +0900 Subject: netfilter: nat: fix double register in masquerade modules There is a reference counter to ensure that masquerade modules register notifiers only once. However, the existing reference counter approach is not safe, test commands are: while : do modprobe ip6t_MASQUERADE & modprobe nft_masq_ipv6 & modprobe -rv ip6t_MASQUERADE & modprobe -rv nft_masq_ipv6 & done numbers below represent the reference counter. -------------------------------------------------------- CPU0 CPU1 CPU2 CPU3 CPU4 [insmod] [insmod] [rmmod] [rmmod] [insmod] -------------------------------------------------------- 0->1 register 1->2 returns 2->1 returns 1->0 0->1 register <-- unregister -------------------------------------------------------- The unregistation of CPU3 should be processed before the registration of CPU4. In order to fix this, use a mutex instead of reference counter. splat looks like: [ 323.869557] watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [modprobe:1381] [ 323.869574] Modules linked in: nf_tables(+) nf_nat_ipv6(-) nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 n] [ 323.869574] irq event stamp: 194074 [ 323.898930] hardirqs last enabled at (194073): [] trace_hardirqs_on_thunk+0x1a/0x1c [ 323.898930] hardirqs last disabled at (194074): [] trace_hardirqs_off_thunk+0x1a/0x1c [ 323.898930] softirqs last enabled at (182132): [] __do_softirq+0x6ec/0xa3b [ 323.898930] softirqs last disabled at (182109): [] irq_exit+0x1a6/0x1e0 [ 323.898930] CPU: 0 PID: 1381 Comm: modprobe Not tainted 4.20.0-rc2+ #27 [ 323.898930] RIP: 0010:raw_notifier_chain_register+0xea/0x240 [ 323.898930] Code: 3c 03 0f 8e f2 00 00 00 44 3b 6b 10 7f 4d 49 bc 00 00 00 00 00 fc ff df eb 22 48 8d 7b 10 488 [ 323.898930] RSP: 0018:ffff888101597218 EFLAGS: 00000206 ORIG_RAX: ffffffffffffff13 [ 323.898930] RAX: 0000000000000000 RBX: ffffffffc04361c0 RCX: 0000000000000000 [ 323.898930] RDX: 1ffffffff26132ae RSI: ffffffffc04aa3c0 RDI: ffffffffc04361d0 [ 323.898930] RBP: ffffffffc04361c8 R08: 0000000000000000 R09: 0000000000000001 [ 323.898930] R10: ffff8881015972b0 R11: fffffbfff26132c4 R12: dffffc0000000000 [ 323.898930] R13: 0000000000000000 R14: 1ffff110202b2e44 R15: ffffffffc04aa3c0 [ 323.898930] FS: 00007f813ed41540(0000) GS:ffff88811ae00000(0000) knlGS:0000000000000000 [ 323.898930] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 323.898930] CR2: 0000559bf2c9f120 CR3: 000000010bc80000 CR4: 00000000001006f0 [ 323.898930] Call Trace: [ 323.898930] ? atomic_notifier_chain_register+0x2d0/0x2d0 [ 323.898930] ? down_read+0x150/0x150 [ 323.898930] ? sched_clock_cpu+0x126/0x170 [ 323.898930] ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables] [ 323.898930] ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables] [ 323.898930] register_netdevice_notifier+0xbb/0x790 [ 323.898930] ? __dev_close_many+0x2d0/0x2d0 [ 323.898930] ? __mutex_unlock_slowpath+0x17f/0x740 [ 323.898930] ? wait_for_completion+0x710/0x710 [ 323.898930] ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables] [ 323.898930] ? up_write+0x6c/0x210 [ 323.898930] ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables] [ 324.127073] ? nf_tables_core_module_init+0xe4/0xe4 [nf_tables] [ 324.127073] nft_chain_filter_init+0x1e/0xe8a [nf_tables] [ 324.127073] nf_tables_module_init+0x37/0x92 [nf_tables] [ ... ] Fixes: 8dd33cc93ec9 ("netfilter: nf_nat: generalize IPv4 masquerading support for nf_tables") Fixes: be6b635cd674 ("netfilter: nf_nat: generalize IPv6 masquerading support for nf_tables") Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/nf_nat_masquerade_ipv4.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index c7d7fa4fc369..41327bb99093 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -147,15 +147,17 @@ static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; -static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0); +static int masq_refcnt; +static DEFINE_MUTEX(masq_mutex); int nf_nat_masquerade_ipv4_register_notifier(void) { - int ret; + int ret = 0; + mutex_lock(&masq_mutex); /* check if the notifier was already set */ - if (atomic_inc_return(&masquerade_notifier_refcount) > 1) - return 0; + if (++masq_refcnt > 1) + goto out_unlock; /* Register for device down reports */ ret = register_netdevice_notifier(&masq_dev_notifier); @@ -166,22 +168,29 @@ int nf_nat_masquerade_ipv4_register_notifier(void) if (ret) goto err_unregister; + mutex_unlock(&masq_mutex); return ret; + err_unregister: unregister_netdevice_notifier(&masq_dev_notifier); err_dec: - atomic_dec(&masquerade_notifier_refcount); + masq_refcnt--; +out_unlock: + mutex_unlock(&masq_mutex); return ret; } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier); void nf_nat_masquerade_ipv4_unregister_notifier(void) { + mutex_lock(&masq_mutex); /* check if the notifier still has clients */ - if (atomic_dec_return(&masquerade_notifier_refcount) > 0) - return; + if (--masq_refcnt > 0) + goto out_unlock; unregister_netdevice_notifier(&masq_dev_notifier); unregister_inetaddr_notifier(&masq_inet_notifier); +out_unlock: + mutex_unlock(&masq_mutex); } EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier); -- cgit v1.2.3