diff options
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 138 |
1 files changed, 115 insertions, 23 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index d86784be7ab3..12cab7d74dba 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -140,7 +140,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb) * "len" is invariant segment length, including TCP header. */ len += skb->data - skb_transport_header(skb); - if (len >= TCP_MIN_RCVMSS + sizeof(struct tcphdr) || + if (len >= TCP_MSS_DEFAULT + sizeof(struct tcphdr) || /* If PSH is not set, packet should be * full sized, provided peer TCP is not badly broken. * This observation (if it is correct 8)) allows @@ -411,7 +411,7 @@ void tcp_initialize_rcv_mss(struct sock *sk) unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache); hint = min(hint, tp->rcv_wnd / 2); - hint = min(hint, TCP_MIN_RCVMSS); + hint = min(hint, TCP_MSS_DEFAULT); hint = max(hint, TCP_MIN_MSS); inet_csk(sk)->icsk_ack.rcv_mss = hint; @@ -2300,7 +2300,7 @@ static inline int tcp_fackets_out(struct tcp_sock *tp) * they differ. Since neither occurs due to loss, TCP should really * ignore them. */ -static inline int tcp_dupack_heurestics(struct tcp_sock *tp) +static inline int tcp_dupack_heuristics(struct tcp_sock *tp) { return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; } @@ -2425,7 +2425,7 @@ static int tcp_time_to_recover(struct sock *sk) return 1; /* Not-A-Trick#2 : Classic rule... */ - if (tcp_dupack_heurestics(tp) > tp->reordering) + if (tcp_dupack_heuristics(tp) > tp->reordering) return 1; /* Trick#3 : when we use RFC2988 timer restart, fast @@ -2717,6 +2717,35 @@ static void tcp_try_undo_dsack(struct sock *sk) } } +/* We can clear retrans_stamp when there are no retransmissions in the + * window. It would seem that it is trivially available for us in + * tp->retrans_out, however, that kind of assumptions doesn't consider + * what will happen if errors occur when sending retransmission for the + * second time. ...It could the that such segment has only + * TCPCB_EVER_RETRANS set at the present time. It seems that checking + * the head skb is enough except for some reneging corner cases that + * are not worth the effort. + * + * Main reason for all this complexity is the fact that connection dying + * time now depends on the validity of the retrans_stamp, in particular, + * that successive retransmissions of a segment must not advance + * retrans_stamp under any conditions. + */ +static int tcp_any_retrans_done(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct sk_buff *skb; + + if (tp->retrans_out) + return 1; + + skb = tcp_write_queue_head(sk); + if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) + return 1; + + return 0; +} + /* Undo during fast recovery after partial ACK. */ static int tcp_try_undo_partial(struct sock *sk, int acked) @@ -2729,7 +2758,7 @@ static int tcp_try_undo_partial(struct sock *sk, int acked) /* Plain luck! Hole if filled with delayed * packet, rather than with a retransmit. */ - if (tp->retrans_out == 0) + if (!tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); @@ -2788,7 +2817,7 @@ static void tcp_try_keep_open(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int state = TCP_CA_Open; - if (tcp_left_out(tp) || tp->retrans_out || tp->undo_marker) + if (tcp_left_out(tp) || tcp_any_retrans_done(sk) || tp->undo_marker) state = TCP_CA_Disorder; if (inet_csk(sk)->icsk_ca_state != state) { @@ -2803,7 +2832,7 @@ static void tcp_try_to_open(struct sock *sk, int flag) tcp_verify_left_out(tp); - if (!tp->frto_counter && tp->retrans_out == 0) + if (!tp->frto_counter && !tcp_any_retrans_done(sk)) tp->retrans_stamp = 0; if (flag & FLAG_ECE) @@ -3698,7 +3727,7 @@ old_ack: * the fast version below fails. */ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, - int estab) + u8 **hvpp, int estab, struct dst_entry *dst) { unsigned char *ptr; struct tcphdr *th = tcp_hdr(skb); @@ -3737,7 +3766,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_WINDOW: if (opsize == TCPOLEN_WINDOW && th->syn && - !estab && sysctl_tcp_window_scaling) { + !estab && sysctl_tcp_window_scaling && + !dst_feature(dst, RTAX_FEATURE_NO_WSCALE)) { __u8 snd_wscale = *(__u8 *)ptr; opt_rx->wscale_ok = 1; if (snd_wscale > 14) { @@ -3753,7 +3783,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, case TCPOPT_TIMESTAMP: if ((opsize == TCPOLEN_TIMESTAMP) && ((estab && opt_rx->tstamp_ok) || - (!estab && sysctl_tcp_timestamps))) { + (!estab && sysctl_tcp_timestamps && + !dst_feature(dst, RTAX_FEATURE_NO_TSTAMP)))) { opt_rx->saw_tstamp = 1; opt_rx->rcv_tsval = get_unaligned_be32(ptr); opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4); @@ -3761,7 +3792,8 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, break; case TCPOPT_SACK_PERM: if (opsize == TCPOLEN_SACK_PERM && th->syn && - !estab && sysctl_tcp_sack) { + !estab && sysctl_tcp_sack && + !dst_feature(dst, RTAX_FEATURE_NO_SACK)) { opt_rx->sack_ok = 1; tcp_sack_reset(opt_rx); } @@ -3782,7 +3814,30 @@ void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, */ break; #endif - } + case TCPOPT_COOKIE: + /* This option is variable length. + */ + switch (opsize) { + case TCPOLEN_COOKIE_BASE: + /* not yet implemented */ + break; + case TCPOLEN_COOKIE_PAIR: + /* not yet implemented */ + break; + case TCPOLEN_COOKIE_MIN+0: + case TCPOLEN_COOKIE_MIN+2: + case TCPOLEN_COOKIE_MIN+4: + case TCPOLEN_COOKIE_MIN+6: + case TCPOLEN_COOKIE_MAX: + /* 16-bit multiple */ + opt_rx->cookie_plus = opsize; + *hvpp = ptr; + default: + /* ignore option */ + break; + }; + break; + }; ptr += opsize-2; length -= opsize; @@ -3810,17 +3865,20 @@ static int tcp_parse_aligned_timestamp(struct tcp_sock *tp, struct tcphdr *th) * If it is wrong it falls back on tcp_parse_options(). */ static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th, - struct tcp_sock *tp) + struct tcp_sock *tp, u8 **hvpp) { - if (th->doff == sizeof(struct tcphdr) >> 2) { + /* In the spirit of fast parsing, compare doff directly to constant + * values. Because equality is used, short doff can be ignored here. + */ + if (th->doff == (sizeof(*th) / 4)) { tp->rx_opt.saw_tstamp = 0; return 0; } else if (tp->rx_opt.tstamp_ok && - th->doff == (sizeof(struct tcphdr)>>2)+(TCPOLEN_TSTAMP_ALIGNED>>2)) { + th->doff == ((sizeof(*th) + TCPOLEN_TSTAMP_ALIGNED) / 4)) { if (tcp_parse_aligned_timestamp(tp, th)) return 1; } - tcp_parse_options(skb, &tp->rx_opt, 1); + tcp_parse_options(skb, &tp->rx_opt, hvpp, 1, NULL); return 1; } @@ -4075,8 +4133,10 @@ static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack && + !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4105,13 +4165,15 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, u32 end_seq) static void tcp_send_dupack(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq && before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sysctl_tcp_dsack && + !dst_feature(dst, RTAX_FEATURE_NO_DSACK)) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) @@ -4845,11 +4907,11 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) struct tcp_sock *tp = tcp_sk(sk); /* More than one full frame received... */ - if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss + if (((tp->rcv_nxt - tp->rcv_wup) > inet_csk(sk)->icsk_ack.rcv_mss && /* ... and right edge of window advances far enough. * (tcp_recvmsg() will send ACK otherwise). Or... */ - && __tcp_select_window(sk) >= tp->rcv_wnd) || + __tcp_select_window(sk) >= tp->rcv_wnd) || /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* We have out of order data. */ @@ -5070,10 +5132,12 @@ out: static int tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, int syn_inerr) { + u8 *hash_location; struct tcp_sock *tp = tcp_sk(sk); /* RFC1323: H1. Apply PAWS check first. */ - if (tcp_fast_parse_options(skb, th, tp) && tp->rx_opt.saw_tstamp && + if (tcp_fast_parse_options(skb, th, tp, &hash_location) && + tp->rx_opt.saw_tstamp && tcp_paws_discard(sk, skb)) { if (!th->rst) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED); @@ -5361,11 +5425,14 @@ discard: static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, struct tcphdr *th, unsigned len) { - struct tcp_sock *tp = tcp_sk(sk); + u8 *hash_location; struct inet_connection_sock *icsk = inet_csk(sk); + struct tcp_sock *tp = tcp_sk(sk); + struct dst_entry *dst = __sk_dst_get(sk); + struct tcp_cookie_values *cvp = tp->cookie_values; int saved_clamp = tp->rx_opt.mss_clamp; - tcp_parse_options(skb, &tp->rx_opt, 0); + tcp_parse_options(skb, &tp->rx_opt, &hash_location, 0, dst); if (th->ack) { /* rfc793: @@ -5462,6 +5529,31 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * Change state from SYN-SENT only after copied_seq * is initialized. */ tp->copied_seq = tp->rcv_nxt; + + if (cvp != NULL && + cvp->cookie_pair_size > 0 && + tp->rx_opt.cookie_plus > 0) { + int cookie_size = tp->rx_opt.cookie_plus + - TCPOLEN_COOKIE_BASE; + int cookie_pair_size = cookie_size + + cvp->cookie_desired; + + /* A cookie extension option was sent and returned. + * Note that each incoming SYNACK replaces the + * Responder cookie. The initial exchange is most + * fragile, as protection against spoofing relies + * entirely upon the sequence and timestamp (above). + * This replacement strategy allows the correct pair to + * pass through, while any others will be filtered via + * Responder verification later. + */ + if (sizeof(cvp->cookie_pair) >= cookie_pair_size) { + memcpy(&cvp->cookie_pair[cvp->cookie_desired], + hash_location, cookie_size); + cvp->cookie_pair_size = cookie_pair_size; + } + } + smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); |