summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-04-14 16:29:54 -0400
committerDavid S. Miller <davem@davemloft.net>2016-04-14 16:29:54 -0400
commit97cc931fc928a5cef50b104c0c225975ff729fce (patch)
tree4e761d6859d0784b6e24fb1f30cc85467053eba2
parentf1d0540db6a21395a58268f4392c7420ebae8c1d (diff)
parente646b657f6983017783914a951039e323120dc55 (diff)
downloadlinux-97cc931fc928a5cef50b104c0c225975ff729fce.tar.gz
linux-97cc931fc928a5cef50b104c0c225975ff729fce.tar.bz2
linux-97cc931fc928a5cef50b104c0c225975ff729fce.zip
Merge branch 'ipv6-dgram-dst-cache'
Martin KaFai Lau says: ==================== ipv6: datagram: Update dst cache of a connected udp sk during pmtu update v2: ~ Protect __sk_dst_get() operations with rcu_read_lock in release_cb() because another thread may do ip6_dst_store() for a udp sk without taking the sk lock (e.g. in sendmsg). ~ Do a ipv6_addr_v4mapped(&sk->sk_v6_daddr) check before calling ip6_datagram_dst_update() in patch 3 and 4. It is similar to how __ip6_datagram_connect handles it. ~ One fix in ip6_datagram_dst_update() in patch 2. It needs to check (np->flow_label & IPV6_FLOWLABEL_MASK) before doing fl6_sock_lookup. I was confused with the naming of IPV6_FLOWLABEL_MASK and IPV6_FLOWINFO_MASK. ~ Check dst->obsolete just on the safe side, although I think it should at least have DST_OBSOLETE_FORCE_CHK by now. ~ Add Fixes tag to patch 3 and 4 ~ Add some points from the previous discussion about holding sk lock to the commit message in patch 3. v1: There is a case in connected UDP socket such that getsockopt(IPV6_MTU) will return a stale MTU value. The reproducible sequence could be the following: 1. Create a connected UDP socket 2. Send some datagrams out 3. Receive a ICMPV6_PKT_TOOBIG 4. No new outgoing datagrams to trigger the sk_dst_check() logic to update the sk->sk_dst_cache. 5. getsockopt(IPV6_MTU) returns the mtu from the invalid sk->sk_dst_cache instead of the newly created RTF_CACHE clone. Patch 1 and 2 are the prep work. Patch 3 and 4 are the fixes. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ipv6.h2
-rw-r--r--net/ipv6/datagram.c169
-rw-r--r--net/ipv6/route.c12
-rw-r--r--net/ipv6/udp.c1
4 files changed, 121 insertions, 63 deletions
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index d0aeb97aec5d..1be050ada8c5 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -959,6 +959,8 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len);
int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr,
int addr_len);
+int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr);
+void ip6_datagram_release_cb(struct sock *sk);
int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
int *addr_len);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 428162155280..9dd3882fe6bf 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -40,18 +40,114 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a)
return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0);
}
+static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
+{
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+
+ memset(fl6, 0, sizeof(*fl6));
+ fl6->flowi6_proto = sk->sk_protocol;
+ fl6->daddr = sk->sk_v6_daddr;
+ fl6->saddr = np->saddr;
+ fl6->flowi6_oif = sk->sk_bound_dev_if;
+ fl6->flowi6_mark = sk->sk_mark;
+ fl6->fl6_dport = inet->inet_dport;
+ fl6->fl6_sport = inet->inet_sport;
+ fl6->flowlabel = np->flow_label;
+
+ if (!fl6->flowi6_oif)
+ fl6->flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
+
+ if (!fl6->flowi6_oif && ipv6_addr_is_multicast(&fl6->daddr))
+ fl6->flowi6_oif = np->mcast_oif;
+
+ security_sk_classify_flow(sk, flowi6_to_flowi(fl6));
+}
+
+int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr)
+{
+ struct ip6_flowlabel *flowlabel = NULL;
+ struct in6_addr *final_p, final;
+ struct ipv6_txoptions *opt;
+ struct dst_entry *dst;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct flowi6 fl6;
+ int err = 0;
+
+ if (np->sndflow && (np->flow_label & IPV6_FLOWLABEL_MASK)) {
+ flowlabel = fl6_sock_lookup(sk, np->flow_label);
+ if (!flowlabel)
+ return -EINVAL;
+ }
+ ip6_datagram_flow_key_init(&fl6, sk);
+
+ rcu_read_lock();
+ opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
+ final_p = fl6_update_dst(&fl6, opt, &final);
+ rcu_read_unlock();
+
+ dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
+ if (IS_ERR(dst)) {
+ err = PTR_ERR(dst);
+ goto out;
+ }
+
+ if (fix_sk_saddr) {
+ if (ipv6_addr_any(&np->saddr))
+ np->saddr = fl6.saddr;
+
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
+ sk->sk_v6_rcv_saddr = fl6.saddr;
+ inet->inet_rcv_saddr = LOOPBACK4_IPV6;
+ if (sk->sk_prot->rehash)
+ sk->sk_prot->rehash(sk);
+ }
+ }
+
+ ip6_dst_store(sk, dst,
+ ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
+ &sk->sk_v6_daddr : NULL,
+#ifdef CONFIG_IPV6_SUBTREES
+ ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
+ &np->saddr :
+#endif
+ NULL);
+
+out:
+ fl6_sock_release(flowlabel);
+ return err;
+}
+
+void ip6_datagram_release_cb(struct sock *sk)
+{
+ struct dst_entry *dst;
+
+ if (ipv6_addr_v4mapped(&sk->sk_v6_daddr))
+ return;
+
+ rcu_read_lock();
+ dst = __sk_dst_get(sk);
+ if (!dst || !dst->obsolete ||
+ dst->ops->check(dst, inet6_sk(sk)->dst_cookie)) {
+ rcu_read_unlock();
+ return;
+ }
+ rcu_read_unlock();
+
+ ip6_datagram_dst_update(sk, false);
+}
+EXPORT_SYMBOL_GPL(ip6_datagram_release_cb);
+
static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct in6_addr *daddr, *final_p, final;
- struct dst_entry *dst;
- struct flowi6 fl6;
- struct ip6_flowlabel *flowlabel = NULL;
- struct ipv6_txoptions *opt;
+ struct in6_addr *daddr;
int addr_type;
int err;
+ __be32 fl6_flowlabel = 0;
if (usin->sin6_family == AF_INET) {
if (__ipv6_only_sock(sk))
@@ -66,15 +162,8 @@ static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int a
if (usin->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
- memset(&fl6, 0, sizeof(fl6));
- if (np->sndflow) {
- fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
- if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
- flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
- if (!flowlabel)
- return -EINVAL;
- }
- }
+ if (np->sndflow)
+ fl6_flowlabel = usin->sin6_flowinfo & IPV6_FLOWINFO_MASK;
addr_type = ipv6_addr_type(&usin->sin6_addr);
@@ -145,7 +234,7 @@ ipv4_connected:
}
sk->sk_v6_daddr = *daddr;
- np->flow_label = fl6.flowlabel;
+ np->flow_label = fl6_flowlabel;
inet->inet_dport = usin->sin6_port;
@@ -154,59 +243,13 @@ ipv4_connected:
* destination cache for it.
*/
- fl6.flowi6_proto = sk->sk_protocol;
- fl6.daddr = sk->sk_v6_daddr;
- fl6.saddr = np->saddr;
- fl6.flowi6_oif = sk->sk_bound_dev_if;
- fl6.flowi6_mark = sk->sk_mark;
- fl6.fl6_dport = inet->inet_dport;
- fl6.fl6_sport = inet->inet_sport;
-
- if (!fl6.flowi6_oif)
- fl6.flowi6_oif = np->sticky_pktinfo.ipi6_ifindex;
-
- if (!fl6.flowi6_oif && (addr_type&IPV6_ADDR_MULTICAST))
- fl6.flowi6_oif = np->mcast_oif;
-
- security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
-
- rcu_read_lock();
- opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
- final_p = fl6_update_dst(&fl6, opt, &final);
- rcu_read_unlock();
-
- dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
- err = 0;
- if (IS_ERR(dst)) {
- err = PTR_ERR(dst);
+ err = ip6_datagram_dst_update(sk, true);
+ if (err)
goto out;
- }
-
- /* source address lookup done in ip6_dst_lookup */
-
- if (ipv6_addr_any(&np->saddr))
- np->saddr = fl6.saddr;
-
- if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) {
- sk->sk_v6_rcv_saddr = fl6.saddr;
- inet->inet_rcv_saddr = LOOPBACK4_IPV6;
- if (sk->sk_prot->rehash)
- sk->sk_prot->rehash(sk);
- }
-
- ip6_dst_store(sk, dst,
- ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ?
- &sk->sk_v6_daddr : NULL,
-#ifdef CONFIG_IPV6_SUBTREES
- ipv6_addr_equal(&fl6.saddr, &np->saddr) ?
- &np->saddr :
-#endif
- NULL);
sk->sk_state = TCP_ESTABLISHED;
sk_set_txhash(sk);
out:
- fl6_sock_release(flowlabel);
return err;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1d8871a5ed20..d916d6ab9ad2 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1418,8 +1418,20 @@ EXPORT_SYMBOL_GPL(ip6_update_pmtu);
void ip6_sk_update_pmtu(struct sk_buff *skb, struct sock *sk, __be32 mtu)
{
+ struct dst_entry *dst;
+
ip6_update_pmtu(skb, sock_net(sk), mtu,
sk->sk_bound_dev_if, sk->sk_mark);
+
+ dst = __sk_dst_get(sk);
+ if (!dst || !dst->obsolete ||
+ dst->ops->check(dst, inet6_sk(sk)->dst_cookie))
+ return;
+
+ bh_lock_sock(sk);
+ if (!sock_owned_by_user(sk) && !ipv6_addr_v4mapped(&sk->sk_v6_daddr))
+ ip6_datagram_dst_update(sk, false);
+ bh_unlock_sock(sk);
}
EXPORT_SYMBOL_GPL(ip6_sk_update_pmtu);
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 8125931106be..6bc5c664fa46 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1539,6 +1539,7 @@ struct proto udpv6_prot = {
.sendmsg = udpv6_sendmsg,
.recvmsg = udpv6_recvmsg,
.backlog_rcv = __udpv6_queue_rcv_skb,
+ .release_cb = ip6_datagram_release_cb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.rehash = udp_v6_rehash,