From 9327f7053e3993c125944fdb137a0618319ef2a0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Dec 2009 03:46:54 +0000 Subject: tcp: Fix a connect() race with timewait sockets First patch changes __inet_hash_nolisten() and __inet6_hash() to get a timewait parameter to be able to unhash it from ehash at same time the new socket is inserted in hash. This makes sure timewait socket wont be found by a concurrent writer in __inet_check_established() Reported-by: kapil dakhane Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet6_hashtables.h | 2 +- include/net/inet_hashtables.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h index 92838d3a1ab7..e46674d5daea 100644 --- a/include/net/inet6_hashtables.h +++ b/include/net/inet6_hashtables.h @@ -53,7 +53,7 @@ static inline int inet6_sk_ehashfn(const struct sock *sk) return inet6_ehashfn(net, laddr, lport, faddr, fport); } -extern void __inet6_hash(struct sock *sk); +extern int __inet6_hash(struct sock *sk, struct inet_timewait_sock *twp); /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 41cbddd25b70..74358d1b3f43 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -251,7 +251,7 @@ extern void inet_put_port(struct sock *sk); void inet_hashinfo_init(struct inet_hashinfo *h); -extern void __inet_hash_nolisten(struct sock *sk); +extern int __inet_hash_nolisten(struct sock *sk, struct inet_timewait_sock *tw); extern void inet_hash(struct sock *sk); extern void inet_unhash(struct sock *sk); @@ -391,10 +391,12 @@ static inline struct sock *__inet_lookup_skb(struct inet_hashinfo *hashinfo, } extern int __inet_hash_connect(struct inet_timewait_death_row *death_row, - struct sock *sk, u32 port_offset, + struct sock *sk, + u32 port_offset, int (*check_established)(struct inet_timewait_death_row *, struct sock *, __u16, struct inet_timewait_sock **), - void (*hash)(struct sock *sk)); + int (*hash)(struct sock *sk, struct inet_timewait_sock *twp)); + extern int inet_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk); #endif /* _INET_HASHTABLES_H */ -- cgit v1.2.3 From 3cdaedae635b17ce23c738ce7d364b442310cdec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 4 Dec 2009 03:47:42 +0000 Subject: tcp: Fix a connect() race with timewait sockets When we find a timewait connection in __inet_hash_connect() and reuse it for a new connection request, we have a race window, releasing bind list lock and reacquiring it in __inet_twsk_kill() to remove timewait socket from list. Another thread might find the timewait socket we already chose, leading to list corruption and crashes. Fix is to remove timewait socket from bind list before releasing the bind lock. Note: This problem happens if sysctl_tcp_tw_reuse is set. Reported-by: kapil dakhane Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index b801ade2295e..79f67eae8a7e 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -201,6 +201,9 @@ extern void inet_twsk_put(struct inet_timewait_sock *tw); extern int inet_twsk_unhash(struct inet_timewait_sock *tw); +extern int inet_twsk_bind_unhash(struct inet_timewait_sock *tw, + struct inet_hashinfo *hashinfo); + extern struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state); -- cgit v1.2.3 From 07f29bc5bbae4e53e982ab956fed7207990a7786 Mon Sep 17 00:00:00 2001 From: Damian Lukowski Date: Mon, 7 Dec 2009 06:06:15 +0000 Subject: tcp: Stalling connections: Fix timeout calculation routine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes a problem in the TCP connection timeout calculation. Currently, timeout decisions are made on the basis of the current tcp_time_stamp and retrans_stamp, which is usually set at the first retransmission. However, if the retransmission fails in tcp_retransmit_skb(), retrans_stamp is not updated and remains zero. This leads to wrong decisions in retransmits_timed_out() if tcp_time_stamp is larger than the specified timeout, which is very likely. In this case, the TCP connection dies after the first attempted (and unsuccessful) retransmission. With this patch, tcp_skb_cb->when is used instead, when retrans_stamp is not available. This bug has been introduced together with retransmits_timed_out() in 2.6.32, as the number of retransmissions has been used for timeout decisions before. The corresponding commit was 6fa12c85031485dff38ce550c24f10da23b0adaa (Revert Backoff [v3]: Calculate TCP's connection close threshold as a time value.). Thanks to Ilpo Järvinen for code suggestions and Frederic Leroy for testing. Reported-by: Frederic Leroy Signed-off-by: Damian Lukowski Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index e2d2ca2509be..e54bd85d9d40 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1265,14 +1265,20 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu * TCP connection after "boundary" unsucessful, exponentially backed-off * retransmissions with an initial RTO of TCP_RTO_MIN. */ -static inline bool retransmits_timed_out(const struct sock *sk, +static inline bool retransmits_timed_out(struct sock *sk, unsigned int boundary) { unsigned int timeout, linear_backoff_thresh; + unsigned int start_ts; if (!inet_csk(sk)->icsk_retransmits) return false; + if (unlikely(!tcp_sk(sk)->retrans_stamp)) + start_ts = TCP_SKB_CB(tcp_write_queue_head(sk))->when; + else + start_ts = tcp_sk(sk)->retrans_stamp; + linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); if (boundary <= linear_backoff_thresh) @@ -1281,7 +1287,7 @@ static inline bool retransmits_timed_out(const struct sock *sk, timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; - return (tcp_time_stamp - tcp_sk(sk)->retrans_stamp) >= timeout; + return (tcp_time_stamp - start_ts) >= timeout; } static inline struct sk_buff *tcp_send_head(struct sock *sk) -- cgit v1.2.3 From 2f7de5710a4d394920405febc2a9937c69e16dda Mon Sep 17 00:00:00 2001 From: Damian Lukowski Date: Mon, 7 Dec 2009 06:06:16 +0000 Subject: tcp: Stalling connections: Move timeout calculation routine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch moves retransmits_timed_out() from include/net/tcp.h to tcp_timer.c, where it is used. Reported-by: Frederic Leroy Signed-off-by: Damian Lukowski Acked-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index e54bd85d9d40..0248c181a92c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1261,34 +1261,6 @@ static inline struct sk_buff *tcp_write_queue_prev(struct sock *sk, struct sk_bu #define tcp_for_write_queue_from_safe(skb, tmp, sk) \ skb_queue_walk_from_safe(&(sk)->sk_write_queue, skb, tmp) -/* This function calculates a "timeout" which is equivalent to the timeout of a - * TCP connection after "boundary" unsucessful, exponentially backed-off - * retransmissions with an initial RTO of TCP_RTO_MIN. - */ -static inline bool retransmits_timed_out(struct sock *sk, - unsigned int boundary) -{ - unsigned int timeout, linear_backoff_thresh; - unsigned int start_ts; - - if (!inet_csk(sk)->icsk_retransmits) - return false; - - if (unlikely(!tcp_sk(sk)->retrans_stamp)) - start_ts = TCP_SKB_CB(tcp_write_queue_head(sk))->when; - else - start_ts = tcp_sk(sk)->retrans_stamp; - - linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); - - if (boundary <= linear_backoff_thresh) - timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; - else - timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + - (boundary - linear_backoff_thresh) * TCP_RTO_MAX; - - return (tcp_time_stamp - start_ts) >= timeout; -} static inline struct sk_buff *tcp_send_head(struct sock *sk) { -- cgit v1.2.3 From d24deb2580823ab0b8425790c6f5d18e2ff749d8 Mon Sep 17 00:00:00 2001 From: Gertjan van Wingerde Date: Fri, 4 Dec 2009 23:46:54 +0100 Subject: mac80211: Add define for TX headroom reserved by mac80211 itself. Add a definition of the amount of TX headroom reserved by mac80211 itself for its own purposes. Also add BUILD_BUG_ON to validate the value. This define can then be used by drivers to request additional TX headroom in the most efficient manner. Signed-off-by: Gertjan van Wingerde Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2aff4906b2ae..538d6b761887 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1737,6 +1737,12 @@ static inline void ieee80211_rx_ni(struct ieee80211_hw *hw, local_bh_enable(); } +/* + * The TX headroom reserved by mac80211 for its own tx_status functions. + * This is enough for the radiotap header. + */ +#define IEEE80211_TX_STATUS_HEADROOM 13 + /** * ieee80211_tx_status - transmit status callback * -- cgit v1.2.3