diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 12:31:59 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-07-05 12:31:59 -0700 |
commit | 5518b69b76680a4f2df96b1deca260059db0c2de (patch) | |
tree | f33cd1519c8efb4590500f2f9617400be233238c /net/ipv4/tcp_ipv4.c | |
parent | 8ad06e56dcbc1984ef0ff8f6e3c19982c5809f73 (diff) | |
parent | 0e72582270c07850b92cac351c8b97d4f9c123b9 (diff) | |
download | linux-5518b69b76680a4f2df96b1deca260059db0c2de.tar.gz linux-5518b69b76680a4f2df96b1deca260059db0c2de.tar.bz2 linux-5518b69b76680a4f2df96b1deca260059db0c2de.zip |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller:
"Reasonably busy this cycle, but perhaps not as busy as in the 4.12
merge window:
1) Several optimizations for UDP processing under high load from
Paolo Abeni.
2) Support pacing internally in TCP when using the sch_fq packet
scheduler for this is not practical. From Eric Dumazet.
3) Support mutliple filter chains per qdisc, from Jiri Pirko.
4) Move to 1ms TCP timestamp clock, from Eric Dumazet.
5) Add batch dequeueing to vhost_net, from Jason Wang.
6) Flesh out more completely SCTP checksum offload support, from
Davide Caratti.
7) More plumbing of extended netlink ACKs, from David Ahern, Pablo
Neira Ayuso, and Matthias Schiffer.
8) Add devlink support to nfp driver, from Simon Horman.
9) Add RTM_F_FIB_MATCH flag to RTM_GETROUTE queries, from Roopa
Prabhu.
10) Add stack depth tracking to BPF verifier and use this information
in the various eBPF JITs. From Alexei Starovoitov.
11) Support XDP on qed device VFs, from Yuval Mintz.
12) Introduce BPF PROG ID for better introspection of installed BPF
programs. From Martin KaFai Lau.
13) Add bpf_set_hash helper for TC bpf programs, from Daniel Borkmann.
14) For loads, allow narrower accesses in bpf verifier checking, from
Yonghong Song.
15) Support MIPS in the BPF selftests and samples infrastructure, the
MIPS eBPF JIT will be merged in via the MIPS GIT tree. From David
Daney.
16) Support kernel based TLS, from Dave Watson and others.
17) Remove completely DST garbage collection, from Wei Wang.
18) Allow installing TCP MD5 rules using prefixes, from Ivan
Delalande.
19) Add XDP support to Intel i40e driver, from Björn Töpel
20) Add support for TC flower offload in nfp driver, from Simon
Horman, Pieter Jansen van Vuuren, Benjamin LaHaise, Jakub
Kicinski, and Bert van Leeuwen.
21) IPSEC offloading support in mlx5, from Ilan Tayari.
22) Add HW PTP support to macb driver, from Rafal Ozieblo.
23) Networking refcount_t conversions, From Elena Reshetova.
24) Add sock_ops support to BPF, from Lawrence Brako. This is useful
for tuning the TCP sockopt settings of a group of applications,
currently via CGROUPs"
* git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1899 commits)
net: phy: dp83867: add workaround for incorrect RX_CTRL pin strap
dt-bindings: phy: dp83867: provide a workaround for incorrect RX_CTRL pin strap
cxgb4: Support for get_ts_info ethtool method
cxgb4: Add PTP Hardware Clock (PHC) support
cxgb4: time stamping interface for PTP
nfp: default to chained metadata prepend format
nfp: remove legacy MAC address lookup
nfp: improve order of interfaces in breakout mode
net: macb: remove extraneous return when MACB_EXT_DESC is defined
bpf: add missing break in for the TCP_BPF_SNDCWND_CLAMP case
bpf: fix return in load_bpf_file
mpls: fix rtm policy in mpls_getroute
net, ax25: convert ax25_cb.refcount from atomic_t to refcount_t
net, ax25: convert ax25_route.refcount from atomic_t to refcount_t
net, ax25: convert ax25_uid_assoc.refcount from atomic_t to refcount_t
net, sctp: convert sctp_ep_common.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_transport.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_chunk.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_datamsg.refcnt from atomic_t to refcount_t
net, sctp: convert sctp_auth_bytes.refcnt from atomic_t to refcount_t
...
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 112 |
1 files changed, 88 insertions, 24 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5ab2aac5ca19..6ec6900eb300 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -80,6 +80,7 @@ #include <linux/stddef.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/inetdevice.h> #include <crypto/hash.h> #include <linux/scatterlist.h> @@ -102,10 +103,9 @@ static u32 tcp_v4_init_seq(const struct sk_buff *skb) tcp_hdr(skb)->source); } -static u32 tcp_v4_init_ts_off(const struct sk_buff *skb) +static u32 tcp_v4_init_ts_off(const struct net *net, const struct sk_buff *skb) { - return secure_tcp_ts_off(ip_hdr(skb)->daddr, - ip_hdr(skb)->saddr); + return secure_tcp_ts_off(net, ip_hdr(skb)->daddr, ip_hdr(skb)->saddr); } int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) @@ -242,7 +242,8 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr, inet->inet_sport, usin->sin_port); - tp->tsoffset = secure_tcp_ts_off(inet->inet_saddr, + tp->tsoffset = secure_tcp_ts_off(sock_net(sk), + inet->inet_saddr, inet->inet_daddr); } @@ -376,8 +377,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) struct sock *sk; struct sk_buff *skb; struct request_sock *fastopen; - __u32 seq, snd_una; - __u32 remaining; + u32 seq, snd_una; + s32 remaining; + u32 delta_us; int err; struct net *net = dev_net(icmp_skb->dev); @@ -483,11 +485,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) skb = tcp_write_queue_head(sk); BUG_ON(!skb); + tcp_mstamp_refresh(tp); + delta_us = (u32)(tp->tcp_mstamp - skb->skb_mstamp); remaining = icsk->icsk_rto - - min(icsk->icsk_rto, - tcp_time_stamp - tcp_skb_timestamp(skb)); + usecs_to_jiffies(delta_us); - if (remaining) { + if (remaining > 0) { inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, remaining, TCP_RTO_MAX); } else { @@ -811,7 +814,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v4_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcp_time_stamp + tcptw->tw_ts_offset, + tcp_time_stamp_raw() + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), @@ -839,7 +842,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale, - tcp_time_stamp + tcp_rsk(req)->ts_off, + tcp_time_stamp_raw() + tcp_rsk(req)->ts_off, req->ts_recent, 0, tcp_md5_do_lookup(sk, (union tcp_md5_addr *)&ip_hdr(skb)->daddr, @@ -904,6 +907,48 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, { const struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_key *key; + const struct tcp_md5sig_info *md5sig; + __be32 mask; + struct tcp_md5sig_key *best_match = NULL; + bool match; + + /* caller either holds rcu_read_lock() or socket lock */ + md5sig = rcu_dereference_check(tp->md5sig_info, + lockdep_sock_is_held(sk)); + if (!md5sig) + return NULL; + + hlist_for_each_entry_rcu(key, &md5sig->head, node) { + if (key->family != family) + continue; + + if (family == AF_INET) { + mask = inet_make_mask(key->prefixlen); + match = (key->addr.a4.s_addr & mask) == + (addr->a4.s_addr & mask); +#if IS_ENABLED(CONFIG_IPV6) + } else if (family == AF_INET6) { + match = ipv6_prefix_equal(&key->addr.a6, &addr->a6, + key->prefixlen); +#endif + } else { + match = false; + } + + if (match && (!best_match || + key->prefixlen > best_match->prefixlen)) + best_match = key; + } + return best_match; +} +EXPORT_SYMBOL(tcp_md5_do_lookup); + +struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, + const union tcp_md5_addr *addr, + int family, u8 prefixlen) +{ + const struct tcp_sock *tp = tcp_sk(sk); + struct tcp_md5sig_key *key; unsigned int size = sizeof(struct in_addr); const struct tcp_md5sig_info *md5sig; @@ -919,12 +964,12 @@ struct tcp_md5sig_key *tcp_md5_do_lookup(const struct sock *sk, hlist_for_each_entry_rcu(key, &md5sig->head, node) { if (key->family != family) continue; - if (!memcmp(&key->addr, addr, size)) + if (!memcmp(&key->addr, addr, size) && + key->prefixlen == prefixlen) return key; } return NULL; } -EXPORT_SYMBOL(tcp_md5_do_lookup); struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, const struct sock *addr_sk) @@ -938,14 +983,15 @@ EXPORT_SYMBOL(tcp_v4_md5_lookup); /* This can be called on a newly created socket, from other files */ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, - int family, const u8 *newkey, u8 newkeylen, gfp_t gfp) + int family, u8 prefixlen, const u8 *newkey, u8 newkeylen, + gfp_t gfp) { /* Add Key to the list */ struct tcp_md5sig_key *key; struct tcp_sock *tp = tcp_sk(sk); struct tcp_md5sig_info *md5sig; - key = tcp_md5_do_lookup(sk, addr, family); + key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen); if (key) { /* Pre-existing entry - just update that one. */ memcpy(key->key, newkey, newkeylen); @@ -976,6 +1022,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, memcpy(key->key, newkey, newkeylen); key->keylen = newkeylen; key->family = family; + key->prefixlen = prefixlen; memcpy(&key->addr, addr, (family == AF_INET6) ? sizeof(struct in6_addr) : sizeof(struct in_addr)); @@ -984,11 +1031,12 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, } EXPORT_SYMBOL(tcp_md5_do_add); -int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family) +int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, + u8 prefixlen) { struct tcp_md5sig_key *key; - key = tcp_md5_do_lookup(sk, addr, family); + key = tcp_md5_do_lookup_exact(sk, addr, family, prefixlen); if (!key) return -ENOENT; hlist_del_rcu(&key->node); @@ -1014,11 +1062,12 @@ static void tcp_clear_md5_list(struct sock *sk) } } -static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, - int optlen) +static int tcp_v4_parse_md5_keys(struct sock *sk, int optname, + char __user *optval, int optlen) { struct tcp_md5sig cmd; struct sockaddr_in *sin = (struct sockaddr_in *)&cmd.tcpm_addr; + u8 prefixlen = 32; if (optlen < sizeof(cmd)) return -EINVAL; @@ -1029,15 +1078,22 @@ static int tcp_v4_parse_md5_keys(struct sock *sk, char __user *optval, if (sin->sin_family != AF_INET) return -EINVAL; + if (optname == TCP_MD5SIG_EXT && + cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) { + prefixlen = cmd.tcpm_prefixlen; + if (prefixlen > 32) + return -EINVAL; + } + if (!cmd.tcpm_keylen) return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, - AF_INET); + AF_INET, prefixlen); if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN) return -EINVAL; return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin->sin_addr.s_addr, - AF_INET, cmd.tcpm_key, cmd.tcpm_keylen, + AF_INET, prefixlen, cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL); } @@ -1340,7 +1396,7 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, * across. Shucks. */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newinet->inet_daddr, - AF_INET, key->key, key->keylen, GFP_ATOMIC); + AF_INET, 32, key->key, key->keylen, GFP_ATOMIC); sk_nocaps_add(newsk, NETIF_F_GSO_MASK); } #endif @@ -1673,6 +1729,8 @@ process: } if (nsk == sk) { reqsk_put(req); + } else if (tcp_filter(sk, skb)) { + goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); goto discard_and_relse; @@ -1858,6 +1916,8 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_cleanup_congestion_control(sk); + tcp_cleanup_ulp(sk); + /* Cleanup up the write buffer. */ tcp_write_queue_purge(sk); @@ -2263,7 +2323,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), icsk->icsk_probes_out, sock_i_ino(sk), - atomic_read(&sk->sk_refcnt), sk, + refcount_read(&sk->sk_refcnt), sk, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, @@ -2289,7 +2349,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK", i, src, srcp, dest, destp, tw->tw_substate, 0, 0, 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, - atomic_read(&tw->tw_refcnt), tw); + refcount_read(&tw->tw_refcnt), tw); } #define TMPSZ 150 @@ -2385,6 +2445,7 @@ struct proto tcp_prot = { .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, + .leave_memory_pressure = tcp_leave_memory_pressure, .stream_memory_free = tcp_stream_memory_free, .sockets_allocated = &tcp_sockets_allocated, .orphan_count = &tcp_orphan_count, @@ -2463,6 +2524,9 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.tcp_death_row.hashinfo = &tcp_hashinfo; net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 256); + net->ipv4.sysctl_tcp_sack = 1; + net->ipv4.sysctl_tcp_window_scaling = 1; + net->ipv4.sysctl_tcp_timestamps = 1; return 0; fail: |