summaryrefslogtreecommitdiffstats
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/addrconf.c91
-rw-r--r--net/ipv6/af_inet6.c71
-rw-r--r--net/ipv6/anycast.c14
-rw-r--r--net/ipv6/datagram.c5
-rw-r--r--net/ipv6/esp6_offload.c2
-rw-r--r--net/ipv6/exthdrs_core.c1
-rw-r--r--net/ipv6/fib6_rules.c35
-rw-r--r--net/ipv6/icmp.c5
-rw-r--r--net/ipv6/ip6_fib.c19
-rw-r--r--net/ipv6/ip6_flowlabel.c2
-rw-r--r--net/ipv6/ip6_gre.c26
-rw-r--r--net/ipv6/ip6_output.c19
-rw-r--r--net/ipv6/ip6_tunnel.c18
-rw-r--r--net/ipv6/ip6_vti.c2
-rw-r--r--net/ipv6/ip6mr.c1111
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/mcast.c8
-rw-r--r--net/ipv6/ndisc.c4
-rw-r--r--net/ipv6/netfilter/Kconfig2
-rw-r--r--net/ipv6/netfilter/Makefile1
-rw-r--r--net/ipv6/netfilter/ip6_tables.c33
-rw-r--r--net/ipv6/netfilter/ip6t_SYNPROXY.c8
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c2
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c123
-rw-r--r--net/ipv6/netfilter/nf_tables_ipv6.c65
-rw-r--r--net/ipv6/netfilter/nft_chain_nat_ipv6.c20
-rw-r--r--net/ipv6/netfilter/nft_chain_route_ipv6.c6
-rw-r--r--net/ipv6/netfilter/nft_fib_ipv6.c3
-rw-r--r--net/ipv6/proc.c12
-rw-r--r--net/ipv6/raw.c2
-rw-r--r--net/ipv6/reassembly.c235
-rw-r--r--net/ipv6/route.c306
-rw-r--r--net/ipv6/seg6_local.c4
-rw-r--r--net/ipv6/sit.c5
-rw-r--r--net/ipv6/sysctl_net_ipv6.c27
-rw-r--r--net/ipv6/tcp_ipv6.c29
-rw-r--r--net/ipv6/udp.c72
-rw-r--r--net/ipv6/xfrm6_state.c1
39 files changed, 1235 insertions, 1156 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index ea71e4b0ab7a..6794ddf0547c 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -278,6 +278,7 @@ config IPV6_SUBTREES
config IPV6_MROUTE
bool "IPv6: multicast routing"
depends on IPV6
+ select IP_MROUTE_COMMON
---help---
Experimental support for IPv6 multicast forwarding.
If unsure, say N.
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e1846b97ee69..78cef00c9596 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -94,15 +94,6 @@
#include <linux/seq_file.h>
#include <linux/export.h>
-/* Set to 3 to get tracing... */
-#define ACONF_DEBUG 2
-
-#if ACONF_DEBUG >= 3
-#define ADBG(fmt, ...) printk(fmt, ##__VA_ARGS__)
-#else
-#define ADBG(fmt, ...) do { if (0) printk(fmt, ##__VA_ARGS__); } while (0)
-#endif
-
#define INFINITY_LIFE_TIME 0xFFFFFFFF
#define IPV6_MAX_STRLEN \
@@ -409,9 +400,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
dev_hold(dev);
if (snmp6_alloc_dev(ndev) < 0) {
- ADBG(KERN_WARNING
- "%s: cannot allocate memory for statistics; dev=%s.\n",
- __func__, dev->name);
+ netdev_dbg(dev, "%s: cannot allocate memory for statistics\n",
+ __func__);
neigh_parms_release(&nd_tbl, ndev->nd_parms);
dev_put(dev);
kfree(ndev);
@@ -419,9 +409,8 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
}
if (snmp6_register_dev(ndev) < 0) {
- ADBG(KERN_WARNING
- "%s: cannot create /proc/net/dev_snmp6/%s\n",
- __func__, dev->name);
+ netdev_dbg(dev, "%s: cannot create /proc/net/dev_snmp6/%s\n",
+ __func__, dev->name);
goto err_release;
}
@@ -984,7 +973,7 @@ static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa)
/* Ignore adding duplicate addresses on an interface */
if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) {
- ADBG("ipv6_add_addr: already assigned\n");
+ netdev_dbg(dev, "ipv6_add_addr: already assigned\n");
err = -EEXIST;
} else {
hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
@@ -1044,7 +1033,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,
ifa = kzalloc(sizeof(*ifa), gfp_flags);
if (!ifa) {
- ADBG("ipv6_add_addr: malloc failed\n");
err = -ENOBUFS;
goto out;
}
@@ -1459,6 +1447,21 @@ static bool ipv6_use_optimistic_addr(struct net *net,
#endif
}
+static bool ipv6_allow_optimistic_dad(struct net *net,
+ struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+ if (!idev)
+ return false;
+ if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad)
+ return false;
+
+ return true;
+#else
+ return false;
+#endif
+}
+
static int ipv6_get_saddr_eval(struct net *net,
struct ipv6_saddr_score *score,
struct ipv6_saddr_dst *dst,
@@ -1836,22 +1839,42 @@ static int ipv6_count_addresses(const struct inet6_dev *idev)
int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
const struct net_device *dev, int strict)
{
- return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+ return ipv6_chk_addr_and_flags(net, addr, dev, !dev,
+ strict, IFA_F_TENTATIVE);
}
EXPORT_SYMBOL(ipv6_chk_addr);
+/* device argument is used to find the L3 domain of interest. If
+ * skip_dev_check is set, then the ifp device is not checked against
+ * the passed in dev argument. So the 2 cases for addresses checks are:
+ * 1. does the address exist in the L3 domain that dev is part of
+ * (skip_dev_check = true), or
+ *
+ * 2. does the address exist on the specific device
+ * (skip_dev_check = false)
+ */
int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
- const struct net_device *dev, int strict,
- u32 banned_flags)
+ const struct net_device *dev, bool skip_dev_check,
+ int strict, u32 banned_flags)
{
unsigned int hash = inet6_addr_hash(net, addr);
+ const struct net_device *l3mdev;
struct inet6_ifaddr *ifp;
u32 ifp_flags;
rcu_read_lock();
+
+ l3mdev = l3mdev_master_dev_rcu(dev);
+ if (skip_dev_check)
+ dev = NULL;
+
hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
if (!net_eq(dev_net(ifp->idev->dev), net))
continue;
+
+ if (l3mdev_master_dev_rcu(ifp->idev->dev) != l3mdev)
+ continue;
+
/* Decouple optimistic from tentative for evaluation here.
* Ban optimistic addresses explicitly, when required.
*/
@@ -1968,6 +1991,8 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
spin_lock_bh(&ifp->lock);
addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
+ if (dad_failed)
+ ifp->flags &= ~IFA_F_OPTIMISTIC;
spin_unlock_bh(&ifp->lock);
if (dad_failed)
ipv6_ifa_notify(0, ifp);
@@ -2581,7 +2606,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
pinfo = (struct prefix_info *) opt;
if (len < sizeof(struct prefix_info)) {
- ADBG("addrconf: prefix option too short\n");
+ netdev_dbg(dev, "addrconf: prefix option too short\n");
return;
}
@@ -4244,7 +4269,7 @@ static const struct file_operations if6_fops = {
static int __net_init if6_proc_net_init(struct net *net)
{
- if (!proc_create("if_inet6", S_IRUGO, net->proc_net, &if6_fops))
+ if (!proc_create("if_inet6", 0444, net->proc_net, &if6_fops))
return -ENOMEM;
return 0;
}
@@ -4408,8 +4433,8 @@ restart:
if (time_before(next_sched, jiffies + ADDRCONF_TIMER_FUZZ_MAX))
next_sched = jiffies + ADDRCONF_TIMER_FUZZ_MAX;
- ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
- now, next, next_sec, next_sched);
+ pr_debug("now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
+ now, next, next_sec, next_sched);
mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}
@@ -4500,6 +4525,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,
(ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64))
return -EINVAL;
+ if (!(ifp->flags & IFA_F_TENTATIVE) || ifp->flags & IFA_F_DADFAILED)
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
timeout = addrconf_timeout_fixup(valid_lft, HZ);
if (addrconf_finite_timeout(timeout)) {
expires = jiffies_to_clock_t(timeout * HZ);
@@ -4573,6 +4601,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
struct in6_addr *pfx, *peer_pfx;
struct inet6_ifaddr *ifa;
struct net_device *dev;
+ struct inet6_dev *idev;
u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME;
u32 ifa_flags;
int err;
@@ -4606,7 +4635,19 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh,
/* We ignore other flags so far. */
ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
- IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
+ IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC;
+
+ idev = ipv6_find_idev(dev);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+
+ if (!ipv6_allow_optimistic_dad(net, idev))
+ ifa_flags &= ~IFA_F_OPTIMISTIC;
+
+ if (ifa_flags & IFA_F_NODAD && ifa_flags & IFA_F_OPTIMISTIC) {
+ NL_SET_ERR_MSG(extack, "IFA_F_NODAD and IFA_F_OPTIMISTIC are mutually exclusive");
+ return -EINVAL;
+ }
ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
if (!ifa) {
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 416917719a6f..8da0b513f188 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -277,15 +277,7 @@ out_rcu_unlock:
/* bind for INET6 API */
int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
- struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
- struct inet_sock *inet = inet_sk(sk);
- struct ipv6_pinfo *np = inet6_sk(sk);
- struct net *net = sock_net(sk);
- __be32 v4addr = 0;
- unsigned short snum;
- bool saved_ipv6only;
- int addr_type = 0;
int err = 0;
/* If the socket has its own bind function then use it. */
@@ -295,11 +287,35 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < SIN6_LEN_RFC2133)
return -EINVAL;
+ /* BPF prog is run before any checks are done so that if the prog
+ * changes context in a wrong way it will be caught.
+ */
+ err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr);
+ if (err)
+ return err;
+
+ return __inet6_bind(sk, uaddr, addr_len, false, true);
+}
+EXPORT_SYMBOL(inet6_bind);
+
+int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
+ bool force_bind_address_no_port, bool with_lock)
+{
+ struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
+ struct inet_sock *inet = inet_sk(sk);
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ struct net *net = sock_net(sk);
+ __be32 v4addr = 0;
+ unsigned short snum;
+ bool saved_ipv6only;
+ int addr_type = 0;
+ int err = 0;
+
if (addr->sin6_family != AF_INET6)
return -EAFNOSUPPORT;
addr_type = ipv6_addr_type(&addr->sin6_addr);
- if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM)
+ if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM)
return -EINVAL;
snum = ntohs(addr->sin6_port);
@@ -307,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
!ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
return -EACCES;
- lock_sock(sk);
+ if (with_lock)
+ lock_sock(sk);
/* Check these errors (active socket, double bind). */
if (sk->sk_state != TCP_CLOSE || inet->inet_num) {
@@ -395,12 +412,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
sk->sk_ipv6only = 1;
/* Make sure we are allowed to bind here. */
- if ((snum || !inet->bind_address_no_port) &&
- sk->sk_prot->get_port(sk, snum)) {
- sk->sk_ipv6only = saved_ipv6only;
- inet_reset_saddr(sk);
- err = -EADDRINUSE;
- goto out;
+ if (snum || !(inet->bind_address_no_port ||
+ force_bind_address_no_port)) {
+ if (sk->sk_prot->get_port(sk, snum)) {
+ sk->sk_ipv6only = saved_ipv6only;
+ inet_reset_saddr(sk);
+ err = -EADDRINUSE;
+ goto out;
+ }
+ err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
+ if (err) {
+ sk->sk_ipv6only = saved_ipv6only;
+ inet_reset_saddr(sk);
+ goto out;
+ }
}
if (addr_type != IPV6_ADDR_ANY)
@@ -411,13 +436,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
inet->inet_dport = 0;
inet->inet_daddr = 0;
out:
- release_sock(sk);
+ if (with_lock)
+ release_sock(sk);
return err;
out_unlock:
rcu_read_unlock();
goto out;
}
-EXPORT_SYMBOL(inet6_bind);
int inet6_release(struct socket *sock)
{
@@ -470,7 +495,7 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
*/
int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
- int *uaddr_len, int peer)
+ int peer)
{
struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
struct sock *sk = sock->sk;
@@ -500,8 +525,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
sk->sk_bound_dev_if);
- *uaddr_len = sizeof(*sin);
- return 0;
+ return sizeof(*sin);
}
EXPORT_SYMBOL(inet6_getname);
@@ -869,6 +893,10 @@ static const struct ipv6_stub ipv6_stub_impl = {
.nd_tbl = &nd_tbl,
};
+static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = {
+ .inet6_bind = __inet6_bind,
+};
+
static int __init inet6_init(void)
{
struct list_head *r;
@@ -1025,6 +1053,7 @@ static int __init inet6_init(void)
/* ensure that ipv6 stubs are visible only after ipv6 is ready */
wmb();
ipv6_stub = &ipv6_stub_impl;
+ ipv6_bpf_stub = &ipv6_bpf_stub_impl;
out:
return err;
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 8e085cc05aeb..bbcabbba9bd8 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -66,7 +66,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
return -EPERM;
if (ipv6_addr_is_multicast(addr))
return -EINVAL;
- if (ipv6_chk_addr(net, addr, NULL, 0))
+
+ if (ifindex)
+ dev = __dev_get_by_index(net, ifindex);
+
+ if (ipv6_chk_addr_and_flags(net, addr, dev, true, 0, IFA_F_TENTATIVE))
return -EINVAL;
pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
@@ -78,7 +82,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
@@ -90,8 +94,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
dev = __dev_get_by_flags(net, IFF_UP,
IFF_UP | IFF_LOOPBACK);
}
- } else
- dev = __dev_get_by_index(net, ifindex);
+ }
if (!dev) {
err = -ENODEV;
@@ -541,7 +544,7 @@ static const struct file_operations ac6_seq_fops = {
int __net_init ac6_proc_init(struct net *net)
{
- if (!proc_create("anycast6", S_IRUGO, net->proc_net, &ac6_seq_fops))
+ if (!proc_create("anycast6", 0444, net->proc_net, &ac6_seq_fops))
return -ENOMEM;
return 0;
@@ -552,4 +555,3 @@ void ac6_proc_exit(struct net *net)
remove_proc_entry("anycast6", net->proc_net);
}
#endif
-
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index a9f7eca0b6a3..88bc2ef7c7a8 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -808,8 +808,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
if (addr_type != IPV6_ADDR_ANY) {
int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&
- !ipv6_chk_addr(net, &src_info->ipi6_addr,
- strict ? dev : NULL, 0) &&
+ !ipv6_chk_addr_and_flags(net, &src_info->ipi6_addr,
+ dev, !strict, 0,
+ IFA_F_TENTATIVE) &&
!ipv6_chk_acast_addr_src(net, dev,
&src_info->ipi6_addr))
err = -EINVAL;
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 3fd1ec775dc2..27f59b61f70f 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -165,6 +165,8 @@ static struct sk_buff *esp6_gso_segment(struct sk_buff *skb,
if (!(features & NETIF_F_HW_ESP) || !x->xso.offload_handle ||
(x->xso.dev != skb->dev))
esp_features = features & ~(NETIF_F_SG | NETIF_F_CSUM_MASK);
+ else if (!(features & NETIF_F_HW_ESP_TX_CSUM))
+ esp_features = features & ~NETIF_F_CSUM_MASK;
xo->flags |= XFRM_GSO_SEGMENT;
diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c
index 11025f8d124b..b643f5ce6c80 100644
--- a/net/ipv6/exthdrs_core.c
+++ b/net/ipv6/exthdrs_core.c
@@ -279,4 +279,3 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
return nexthdr;
}
EXPORT_SYMBOL(ipv6_find_hdr);
-
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index b240f24a6e52..df113c7b5fc8 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -61,11 +61,13 @@ unsigned int fib6_rules_seq_read(struct net *net)
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
if (net->ipv6.fib6_has_custom_rules) {
struct fib_lookup_arg arg = {
.lookup_ptr = lookup,
+ .lookup_data = skb,
.flags = FIB_LOOKUP_NOREF,
};
@@ -80,11 +82,11 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
} else {
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, skb, flags);
if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error != -EAGAIN)
return &rt->dst;
ip6_rt_put(rt);
@@ -130,7 +132,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
goto out;
}
- rt = lookup(net, table, flp6, flags);
+ rt = lookup(net, table, flp6, arg->lookup_data, flags);
if (rt != net->ipv6.ip6_null_entry) {
struct fib6_rule *r = (struct fib6_rule *)rule;
@@ -223,6 +225,17 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))
return 0;
+ if (rule->ip_proto && (rule->ip_proto != fl6->flowi6_proto))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->sport_range) &&
+ !fib_rule_port_inrange(&rule->sport_range, fl6->fl6_sport))
+ return 0;
+
+ if (fib_rule_port_range_set(&rule->dport_range) &&
+ !fib_rule_port_inrange(&rule->dport_range, fl6->fl6_dport))
+ return 0;
+
return 1;
}
@@ -258,12 +271,26 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
rule6->dst.plen = frh->dst_len;
rule6->tclass = frh->tos;
+ if (fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect++;
+
net->ipv6.fib6_has_custom_rules = true;
err = 0;
errout:
return err;
}
+static int fib6_rule_delete(struct fib_rule *rule)
+{
+ struct net *net = rule->fr_net;
+
+ if (net->ipv6.fib6_rules_require_fldissect &&
+ fib_rule_requires_fldissect(rule))
+ net->ipv6.fib6_rules_require_fldissect--;
+
+ return 0;
+}
+
static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
struct nlattr **tb)
{
@@ -323,6 +350,7 @@ static const struct fib_rules_ops __net_initconst fib6_rules_ops_template = {
.match = fib6_rule_match,
.suppress = fib6_rule_suppress,
.configure = fib6_rule_configure,
+ .delete = fib6_rule_delete,
.compare = fib6_rule_compare,
.fill = fib6_rule_fill,
.nlmsg_payload = fib6_rule_nlmsg_payload,
@@ -350,6 +378,7 @@ static int __net_init fib6_rules_net_init(struct net *net)
goto out_fib6_rules_ops;
net->ipv6.fib6_rules_ops = ops;
+ net->ipv6.fib6_rules_require_fldissect = 0;
out:
return err;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 6ae5dd3f4d0d..d8c4b6374377 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -522,7 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
fl6.fl6_icmp_type = type;
fl6.fl6_icmp_code = code;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
sk = icmpv6_xmit_lock(net);
@@ -629,7 +629,8 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
skb_pull(skb2, nhs);
skb_reset_network_header(skb2);
- rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0);
+ rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
+ skb, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 92b8d8c75eed..deab2db6692e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -299,11 +299,12 @@ struct fib6_table *fib6_get_table(struct net *net, u32 id)
}
struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags, pol_lookup_t lookup)
{
struct rt6_info *rt;
- rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags);
+ rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, skb, flags);
if (rt->dst.error == -EAGAIN) {
ip6_rt_put(rt);
rt = net->ipv6.ip6_null_entry;
@@ -1006,12 +1007,16 @@ add:
if (err)
return err;
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_ADD,
+ rt, extack);
+ if (err)
+ return err;
+
rcu_assign_pointer(rt->rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rcu_assign_pointer(*ins, rt);
- call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
- rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -1035,12 +1040,16 @@ add:
if (err)
return err;
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_REPLACE,
+ rt, extack);
+ if (err)
+ return err;
+
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rt->rt6_next = iter->rt6_next;
rcu_assign_pointer(*ins, rt);
- call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
- rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 3dab664ff503..c05c4e82a7ca 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -844,7 +844,7 @@ static const struct file_operations ip6fl_seq_fops = {
static int __net_init ip6_flowlabel_proc_init(struct net *net)
{
- if (!proc_create("ip6_flowlabel", S_IRUGO, net->proc_net,
+ if (!proc_create("ip6_flowlabel", 0444, net->proc_net,
&ip6fl_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 1bbd0930063e..f8a103bdbd60 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -237,7 +237,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev,
return t;
dev = ign->fb_tunnel_dev;
- if (dev->flags & IFF_UP)
+ if (dev && dev->flags & IFF_UP)
return netdev_priv(dev);
return NULL;
@@ -696,9 +696,6 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
else
fl6->daddr = tunnel->parms.raddr;
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
-
/* Push GRE header. */
protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
@@ -721,14 +718,20 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
fl6->flowi6_uid = sock_net_uid(dev_net(dev), NULL);
dsfield = key->tos;
- flags = key->tun_flags & (TUNNEL_CSUM | TUNNEL_KEY);
+ flags = key->tun_flags &
+ (TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
tunnel->tun_hlen = gre_calc_hlen(flags);
gre_build_header(skb, tunnel->tun_hlen,
flags, protocol,
- tunnel_id_to_key32(tun_info->key.tun_id), 0);
+ tunnel_id_to_key32(tun_info->key.tun_id),
+ (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ : 0);
} else {
+ if (tunnel->parms.o_flags & TUNNEL_SEQ)
+ tunnel->o_seqno++;
+
gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
protocol, tunnel->parms.o_key,
htonl(tunnel->o_seqno));
@@ -1059,7 +1062,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
@@ -1475,6 +1478,8 @@ static int __net_init ip6gre_init_net(struct net *net)
struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);
int err;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0",
NET_NAME_UNKNOWN,
ip6gre_tunnel_setup);
@@ -1757,7 +1762,6 @@ static int ip6erspan_tap_init(struct net_device *dev)
dev->mtu -= 8;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
- tunnel = netdev_priv(dev);
ip6gre_tnl_link_config(tunnel, 1);
return 0;
@@ -1790,6 +1794,12 @@ static void ip6gre_tap_setup(struct net_device *dev)
netif_keep_dst(dev);
}
+bool is_ip6gretap_dev(const struct net_device *dev)
+{
+ return dev->netdev_ops == &ip6gre_tap_netdev_ops;
+}
+EXPORT_SYMBOL_GPL(is_ip6gretap_dev);
+
static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
struct ip_tunnel_encap *ipencap)
{
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5cb18c8ba9b2..e6eaa4dd9f60 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -71,7 +71,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
- ((mroute6_socket(net, skb) &&
+ ((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
&ipv6_hdr(skb)->saddr))) {
@@ -1259,6 +1259,7 @@ static int __ip6_append_data(struct sock *sk,
struct ipv6_txoptions *opt = v6_cork->opt;
int csummode = CHECKSUM_NONE;
unsigned int maxnonfragsize, headersize;
+ unsigned int wmem_alloc_delta = 0;
skb = skb_peek_tail(queue);
if (!skb) {
@@ -1416,11 +1417,10 @@ alloc_new_skb:
(flags & MSG_DONTWAIT), &err);
} else {
skb = NULL;
- if (refcount_read(&sk->sk_wmem_alloc) <=
+ if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
2 * sk->sk_sndbuf)
- skb = sock_wmalloc(sk,
- alloclen + hh_len, 1,
- sk->sk_allocation);
+ skb = alloc_skb(alloclen + hh_len,
+ sk->sk_allocation);
if (unlikely(!skb))
err = -ENOBUFS;
}
@@ -1479,6 +1479,11 @@ alloc_new_skb:
/*
* Put the packet on the pending queue
*/
+ if (!skb->destructor) {
+ skb->destructor = sock_wfree;
+ skb->sk = sk;
+ wmem_alloc_delta += skb->truesize;
+ }
__skb_queue_tail(queue, skb);
continue;
}
@@ -1525,12 +1530,13 @@ alloc_new_skb:
skb->len += copy;
skb->data_len += copy;
skb->truesize += copy;
- refcount_add(copy, &sk->sk_wmem_alloc);
+ wmem_alloc_delta += copy;
}
offset += copy;
length -= copy;
}
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return 0;
error_efault:
@@ -1538,6 +1544,7 @@ error_efault:
error:
cork->length -= length;
IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
+ refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
return err;
}
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6e0f21eed88a..df4c29f7d59f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -679,7 +679,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
/* Try to guess incoming interface */
rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr,
- NULL, 0, 0);
+ NULL, 0, skb2, 0);
if (rt && rt->dst.dev)
skb2->dev = rt->dst.dev;
@@ -758,9 +758,11 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t,
ldev = dev_get_by_index_rcu(net, p->link);
if ((ipv6_addr_is_multicast(laddr) ||
- likely(ipv6_chk_addr(net, laddr, ldev, 0))) &&
+ likely(ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE))) &&
((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) ||
- likely(!ipv6_chk_addr(net, raddr, NULL, 0))))
+ likely(!ipv6_chk_addr_and_flags(net, raddr, ldev, true,
+ 0, IFA_F_TENTATIVE))))
ret = 1;
}
return ret;
@@ -990,12 +992,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
if (p->link)
ldev = dev_get_by_index_rcu(net, p->link);
- if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0)))
+ if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
+ 0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Local address not yet configured!\n",
p->name);
else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
!ipv6_addr_is_multicast(raddr) &&
- unlikely(ipv6_chk_addr(net, raddr, NULL, 0)))
+ unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
+ true, 0, IFA_F_TENTATIVE)))
pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
p->name);
else
@@ -1444,7 +1448,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (!rt)
return;
@@ -2205,6 +2209,8 @@ static int __net_init ip6_tnl_init_net(struct net *net)
ip6n->tnls[0] = ip6n->tnls_wc;
ip6n->tnls[1] = ip6n->tnls_r_l;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
err = -ENOMEM;
ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
NET_NAME_UNKNOWN, ip6_tnl_dev_setup);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index ce18cd20389d..6ebb2e8777f4 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -651,7 +651,7 @@ static void vti6_link_config(struct ip6_tnl *t, bool keep_mtu)
(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
struct rt6_info *rt = rt6_lookup(t->net,
&p->raddr, &p->laddr,
- p->link, strict);
+ p->link, NULL, strict);
if (rt)
tdev = rt->dst.dev;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9f6cace9c817..298fd8b6ed17 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -20,7 +20,6 @@
#include <linux/types.h>
#include <linux/sched.h>
#include <linux/errno.h>
-#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/kernel.h>
#include <linux/fcntl.h>
@@ -32,11 +31,9 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/init.h>
-#include <linux/slab.h>
#include <linux/compat.h>
#include <net/protocol.h>
#include <linux/skbuff.h>
-#include <net/sock.h>
#include <net/raw.h>
#include <linux/notifier.h>
#include <linux/if_arp.h>
@@ -54,30 +51,12 @@
#include <net/ip6_checksum.h>
#include <linux/netconf.h>
-struct mr6_table {
- struct list_head list;
- possible_net_t net;
- u32 id;
- struct sock *mroute6_sk;
- struct timer_list ipmr_expire_timer;
- struct list_head mfc6_unres_queue;
- struct list_head mfc6_cache_array[MFC6_LINES];
- struct mif_device vif6_table[MAXMIFS];
- int maxvif;
- atomic_t cache_resolve_queue_len;
- bool mroute_do_assert;
- bool mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
- int mroute_reg_vif_num;
-#endif
-};
-
struct ip6mr_rule {
struct fib_rule common;
};
struct ip6mr_result {
- struct mr6_table *mrt;
+ struct mr_table *mrt;
};
/* Big lock, protecting vif table, mrt cache and mroute socket state.
@@ -86,11 +65,7 @@ struct ip6mr_result {
static DEFINE_RWLOCK(mrt_lock);
-/*
- * Multicast router control variables
- */
-
-#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
+/* Multicast router control variables */
/* Special spinlock for queue of unresolved entries */
static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -105,30 +80,45 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
static struct kmem_cache *mrt_cachep __read_mostly;
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
-static void ip6mr_free_table(struct mr6_table *mrt);
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr_table *mrt);
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert);
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm);
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd);
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt);
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt);
static int ip6mr_rtm_dumproute(struct sk_buff *skb,
struct netlink_callback *cb);
-static void mroute_clean_tables(struct mr6_table *mrt, bool all);
+static void mroute_clean_tables(struct mr_table *mrt, bool all);
static void ipmr_expire_process(struct timer_list *t);
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
#define ip6mr_for_each_table(mrt, net) \
list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
{
- struct mr6_table *mrt;
+ struct mr_table *ret;
+
+ if (!mrt)
+ ret = list_entry_rcu(net->ipv6.mr6_tables.next,
+ struct mr_table, list);
+ else
+ ret = list_entry_rcu(mrt->list.next,
+ struct mr_table, list);
+
+ if (&ret->list == &net->ipv6.mr6_tables)
+ return NULL;
+ return ret;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
+{
+ struct mr_table *mrt;
ip6mr_for_each_table(mrt, net) {
if (mrt->id == id)
@@ -138,7 +128,7 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
int err;
struct ip6mr_result res;
@@ -159,7 +149,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
int flags, struct fib_lookup_arg *arg)
{
struct ip6mr_result *res = arg->result;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
switch (rule->action) {
case FR_ACT_TO_TBL:
@@ -227,7 +217,7 @@ static const struct fib_rules_ops __net_initconst ip6mr_rules_ops_template = {
static int __net_init ip6mr_rules_init(struct net *net)
{
struct fib_rules_ops *ops;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
int err;
ops = fib_rules_register(&ip6mr_rules_ops_template, net);
@@ -258,7 +248,7 @@ err1:
static void __net_exit ip6mr_rules_exit(struct net *net)
{
- struct mr6_table *mrt, *next;
+ struct mr_table *mrt, *next;
rtnl_lock();
list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list) {
@@ -268,17 +258,42 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
fib_rules_unregister(net->ipv6.mr6_rules_ops);
rtnl_unlock();
}
+
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
+{
+ return fib_rules_dump(net, nb, RTNL_FAMILY_IP6MR);
+}
+
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+ return fib_rules_seq_read(net, RTNL_FAMILY_IP6MR);
+}
+
+bool ip6mr_rule_default(const struct fib_rule *rule)
+{
+ return fib_rule_matchall(rule) && rule->action == FR_ACT_TO_TBL &&
+ rule->table == RT6_TABLE_DFLT && !rule->l3mdev;
+}
+EXPORT_SYMBOL(ip6mr_rule_default);
#else
#define ip6mr_for_each_table(mrt, net) \
for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
-static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+static struct mr_table *ip6mr_mr_table_iter(struct net *net,
+ struct mr_table *mrt)
+{
+ if (!mrt)
+ return net->ipv6.mrt6;
+ return NULL;
+}
+
+static struct mr_table *ip6mr_get_table(struct net *net, u32 id)
{
return net->ipv6.mrt6;
}
static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,
- struct mr6_table **mrt)
+ struct mr_table **mrt)
{
*mrt = net->ipv6.mrt6;
return 0;
@@ -297,114 +312,87 @@ static void __net_exit ip6mr_rules_exit(struct net *net)
net->ipv6.mrt6 = NULL;
rtnl_unlock();
}
-#endif
-static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+static int ip6mr_rules_dump(struct net *net, struct notifier_block *nb)
{
- struct mr6_table *mrt;
- unsigned int i;
-
- mrt = ip6mr_get_table(net, id);
- if (mrt)
- return mrt;
+ return 0;
+}
- mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
- if (!mrt)
- return NULL;
- mrt->id = id;
- write_pnet(&mrt->net, net);
+static unsigned int ip6mr_rules_seq_read(struct net *net)
+{
+ return 0;
+}
+#endif
- /* Forwarding cache */
- for (i = 0; i < MFC6_LINES; i++)
- INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
+static int ip6mr_hash_cmp(struct rhashtable_compare_arg *arg,
+ const void *ptr)
+{
+ const struct mfc6_cache_cmp_arg *cmparg = arg->key;
+ struct mfc6_cache *c = (struct mfc6_cache *)ptr;
- INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+ return !ipv6_addr_equal(&c->mf6c_mcastgrp, &cmparg->mf6c_mcastgrp) ||
+ !ipv6_addr_equal(&c->mf6c_origin, &cmparg->mf6c_origin);
+}
- timer_setup(&mrt->ipmr_expire_timer, ipmr_expire_process, 0);
+static const struct rhashtable_params ip6mr_rht_params = {
+ .head_offset = offsetof(struct mr_mfc, mnode),
+ .key_offset = offsetof(struct mfc6_cache, cmparg),
+ .key_len = sizeof(struct mfc6_cache_cmp_arg),
+ .nelem_hint = 3,
+ .locks_mul = 1,
+ .obj_cmpfn = ip6mr_hash_cmp,
+ .automatic_shrinking = true,
+};
-#ifdef CONFIG_IPV6_PIMSM_V2
- mrt->mroute_reg_vif_num = -1;
-#endif
+static void ip6mr_new_table_set(struct mr_table *mrt,
+ struct net *net)
+{
#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
#endif
- return mrt;
-}
-
-static void ip6mr_free_table(struct mr6_table *mrt)
-{
- del_timer_sync(&mrt->ipmr_expire_timer);
- mroute_clean_tables(mrt, true);
- kfree(mrt);
}
-#ifdef CONFIG_PROC_FS
-
-struct ipmr_mfc_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- struct list_head *cache;
- int ct;
+static struct mfc6_cache_cmp_arg ip6mr_mr_table_ops_cmparg_any = {
+ .mf6c_origin = IN6ADDR_ANY_INIT,
+ .mf6c_mcastgrp = IN6ADDR_ANY_INIT,
};
+static struct mr_table_ops ip6mr_mr_table_ops = {
+ .rht_params = &ip6mr_rht_params,
+ .cmparg_any = &ip6mr_mr_table_ops_cmparg_any,
+};
-static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
- struct ipmr_mfc_iter *it, loff_t pos)
+static struct mr_table *ip6mr_new_table(struct net *net, u32 id)
{
- struct mr6_table *mrt = it->mrt;
- struct mfc6_cache *mfc;
+ struct mr_table *mrt;
- read_lock(&mrt_lock);
- for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- }
- read_unlock(&mrt_lock);
-
- spin_lock_bh(&mfc_unres_lock);
- it->cache = &mrt->mfc6_unres_queue;
- list_for_each_entry(mfc, it->cache, list)
- if (pos-- == 0)
- return mfc;
- spin_unlock_bh(&mfc_unres_lock);
+ mrt = ip6mr_get_table(net, id);
+ if (mrt)
+ return mrt;
- it->cache = NULL;
- return NULL;
+ return mr_table_alloc(net, id, &ip6mr_mr_table_ops,
+ ipmr_expire_process, ip6mr_new_table_set);
}
-/*
- * The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
- */
-
-struct ipmr_vif_iter {
- struct seq_net_private p;
- struct mr6_table *mrt;
- int ct;
-};
-
-static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
- struct ipmr_vif_iter *iter,
- loff_t pos)
+static void ip6mr_free_table(struct mr_table *mrt)
{
- struct mr6_table *mrt = iter->mrt;
-
- for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- if (pos-- == 0)
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ del_timer_sync(&mrt->ipmr_expire_timer);
+ mroute_clean_tables(mrt, true);
+ rhltable_destroy(&mrt->mfc_hash);
+ kfree(mrt);
}
+#ifdef CONFIG_PROC_FS
+/* The /proc interfaces to multicast routing
+ * /proc/ip6_mr_cache /proc/ip6_mr_vif
+ */
+
static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(mrt_lock)
{
- struct ipmr_vif_iter *iter = seq->private;
+ struct mr_vif_iter *iter = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
@@ -413,26 +401,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
iter->mrt = mrt;
read_lock(&mrt_lock);
- return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct ipmr_vif_iter *iter = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = iter->mrt;
-
- ++*pos;
- if (v == SEQ_START_TOKEN)
- return ip6mr_vif_seq_idx(net, iter, 0);
-
- while (++iter->ct < mrt->maxvif) {
- if (!MIF_EXISTS(mrt, iter->ct))
- continue;
- return &mrt->vif6_table[iter->ct];
- }
- return NULL;
+ return mr_vif_seq_start(seq, pos);
}
static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
@@ -443,19 +412,19 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
{
- struct ipmr_vif_iter *iter = seq->private;
- struct mr6_table *mrt = iter->mrt;
+ struct mr_vif_iter *iter = seq->private;
+ struct mr_table *mrt = iter->mrt;
if (v == SEQ_START_TOKEN) {
seq_puts(seq,
"Interface BytesIn PktsIn BytesOut PktsOut Flags\n");
} else {
- const struct mif_device *vif = v;
+ const struct vif_device *vif = v;
const char *name = vif->dev ? vif->dev->name : "none";
seq_printf(seq,
"%2td %-10s %8ld %7ld %8ld %7ld %05X\n",
- vif - mrt->vif6_table,
+ vif - mrt->vif_table,
name, vif->bytes_in, vif->pkt_in,
vif->bytes_out, vif->pkt_out,
vif->flags);
@@ -465,7 +434,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ip6mr_vif_seq_ops = {
.start = ip6mr_vif_seq_start,
- .next = ip6mr_vif_seq_next,
+ .next = mr_vif_seq_next,
.stop = ip6mr_vif_seq_stop,
.show = ip6mr_vif_seq_show,
};
@@ -473,7 +442,7 @@ static const struct seq_operations ip6mr_vif_seq_ops = {
static int ip6mr_vif_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ip6mr_vif_seq_ops,
- sizeof(struct ipmr_vif_iter));
+ sizeof(struct mr_vif_iter));
}
static const struct file_operations ip6mr_vif_fops = {
@@ -485,72 +454,14 @@ static const struct file_operations ip6mr_vif_fops = {
static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
{
- struct ipmr_mfc_iter *it = seq->private;
struct net *net = seq_file_net(seq);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
if (!mrt)
return ERR_PTR(-ENOENT);
- it->mrt = mrt;
- it->cache = NULL;
- return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
- : SEQ_START_TOKEN;
-}
-
-static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- struct mfc6_cache *mfc = v;
- struct ipmr_mfc_iter *it = seq->private;
- struct net *net = seq_file_net(seq);
- struct mr6_table *mrt = it->mrt;
-
- ++*pos;
-
- if (v == SEQ_START_TOKEN)
- return ipmr_mfc_seq_idx(net, seq->private, 0);
-
- if (mfc->list.next != it->cache)
- return list_entry(mfc->list.next, struct mfc6_cache, list);
-
- if (it->cache == &mrt->mfc6_unres_queue)
- goto end_of_list;
-
- BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
-
- while (++it->ct < MFC6_LINES) {
- it->cache = &mrt->mfc6_cache_array[it->ct];
- if (list_empty(it->cache))
- continue;
- return list_first_entry(it->cache, struct mfc6_cache, list);
- }
-
- /* exhausted cache_array, show unresolved */
- read_unlock(&mrt_lock);
- it->cache = &mrt->mfc6_unres_queue;
- it->ct = 0;
-
- spin_lock_bh(&mfc_unres_lock);
- if (!list_empty(it->cache))
- return list_first_entry(it->cache, struct mfc6_cache, list);
-
- end_of_list:
- spin_unlock_bh(&mfc_unres_lock);
- it->cache = NULL;
-
- return NULL;
-}
-
-static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
-{
- struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
-
- if (it->cache == &mrt->mfc6_unres_queue)
- spin_unlock_bh(&mfc_unres_lock);
- else if (it->cache == &mrt->mfc6_cache_array[it->ct])
- read_unlock(&mrt_lock);
+ return mr_mfc_seq_start(seq, pos, mrt, &mfc_unres_lock);
}
static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
@@ -564,25 +475,25 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
"Iif Pkts Bytes Wrong Oifs\n");
} else {
const struct mfc6_cache *mfc = v;
- const struct ipmr_mfc_iter *it = seq->private;
- struct mr6_table *mrt = it->mrt;
+ const struct mr_mfc_iter *it = seq->private;
+ struct mr_table *mrt = it->mrt;
seq_printf(seq, "%pI6 %pI6 %-3hd",
&mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
- mfc->mf6c_parent);
+ mfc->_c.mfc_parent);
- if (it->cache != &mrt->mfc6_unres_queue) {
+ if (it->cache != &mrt->mfc_unres_queue) {
seq_printf(seq, " %8lu %8lu %8lu",
- mfc->mfc_un.res.pkt,
- mfc->mfc_un.res.bytes,
- mfc->mfc_un.res.wrong_if);
- for (n = mfc->mfc_un.res.minvif;
- n < mfc->mfc_un.res.maxvif; n++) {
- if (MIF_EXISTS(mrt, n) &&
- mfc->mfc_un.res.ttls[n] < 255)
+ mfc->_c.mfc_un.res.pkt,
+ mfc->_c.mfc_un.res.bytes,
+ mfc->_c.mfc_un.res.wrong_if);
+ for (n = mfc->_c.mfc_un.res.minvif;
+ n < mfc->_c.mfc_un.res.maxvif; n++) {
+ if (VIF_EXISTS(mrt, n) &&
+ mfc->_c.mfc_un.res.ttls[n] < 255)
seq_printf(seq,
- " %2d:%-3d",
- n, mfc->mfc_un.res.ttls[n]);
+ " %2d:%-3d", n,
+ mfc->_c.mfc_un.res.ttls[n]);
}
} else {
/* unresolved mfc_caches don't contain
@@ -597,15 +508,15 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
static const struct seq_operations ipmr_mfc_seq_ops = {
.start = ipmr_mfc_seq_start,
- .next = ipmr_mfc_seq_next,
- .stop = ipmr_mfc_seq_stop,
+ .next = mr_mfc_seq_next,
+ .stop = mr_mfc_seq_stop,
.show = ipmr_mfc_seq_show,
};
static int ipmr_mfc_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
- sizeof(struct ipmr_mfc_iter));
+ sizeof(struct mr_mfc_iter));
}
static const struct file_operations ip6mr_mfc_fops = {
@@ -624,7 +535,7 @@ static int pim6_rcv(struct sk_buff *skb)
struct ipv6hdr *encap;
struct net_device *reg_dev = NULL;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -658,7 +569,7 @@ static int pim6_rcv(struct sk_buff *skb)
read_lock(&mrt_lock);
if (reg_vif_num >= 0)
- reg_dev = mrt->vif6_table[reg_vif_num].dev;
+ reg_dev = mrt->vif_table[reg_vif_num].dev;
if (reg_dev)
dev_hold(reg_dev);
read_unlock(&mrt_lock);
@@ -693,7 +604,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_oif = dev->ifindex,
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
@@ -736,7 +647,7 @@ static void reg_vif_setup(struct net_device *dev)
dev->features |= NETIF_F_NETNS_LOCAL;
}
-static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr_table *mrt)
{
struct net_device *dev;
char name[IFNAMSIZ];
@@ -769,21 +680,41 @@ failure:
}
#endif
-/*
- * Delete a VIF entry
- */
+static int call_ip6mr_vif_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct vif_device *vif,
+ mifi_t vif_index, u32 tb_id)
+{
+ return mr_call_vif_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+ vif, vif_index, tb_id,
+ &net->ipv6.ipmr_seq);
+}
+
+static int call_ip6mr_mfc_entry_notifiers(struct net *net,
+ enum fib_event_type event_type,
+ struct mfc6_cache *mfc, u32 tb_id)
+{
+ return mr_call_mfc_notifiers(net, RTNL_FAMILY_IP6MR, event_type,
+ &mfc->_c, tb_id, &net->ipv6.ipmr_seq);
+}
-static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
+/* Delete a VIF entry */
+static int mif6_delete(struct mr_table *mrt, int vifi, int notify,
struct list_head *head)
{
- struct mif_device *v;
+ struct vif_device *v;
struct net_device *dev;
struct inet6_dev *in6_dev;
if (vifi < 0 || vifi >= mrt->maxvif)
return -EADDRNOTAVAIL;
- v = &mrt->vif6_table[vifi];
+ v = &mrt->vif_table[vifi];
+
+ if (VIF_EXISTS(mrt, vifi))
+ call_ip6mr_vif_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_VIF_DEL, v, vifi,
+ mrt->id);
write_lock_bh(&mrt_lock);
dev = v->dev;
@@ -802,7 +733,7 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
if (vifi + 1 == mrt->maxvif) {
int tmp;
for (tmp = vifi - 1; tmp >= 0; tmp--) {
- if (MIF_EXISTS(mrt, tmp))
+ if (VIF_EXISTS(mrt, tmp))
break;
}
mrt->maxvif = tmp + 1;
@@ -827,23 +758,30 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, int notify,
return 0;
}
+static inline void ip6mr_cache_free_rcu(struct rcu_head *head)
+{
+ struct mr_mfc *c = container_of(head, struct mr_mfc, rcu);
+
+ kmem_cache_free(mrt_cachep, (struct mfc6_cache *)c);
+}
+
static inline void ip6mr_cache_free(struct mfc6_cache *c)
{
- kmem_cache_free(mrt_cachep, c);
+ call_rcu(&c->_c.rcu, ip6mr_cache_free_rcu);
}
/* Destroy an unresolved cache entry, killing queued skbs
and reporting error to netlink readers.
*/
-static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr_table *mrt, struct mfc6_cache *c)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
atomic_dec(&mrt->cache_resolve_queue_len);
- while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
+ while ((skb = skb_dequeue(&c->_c.mfc_un.unres.unresolved)) != NULL) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
@@ -862,13 +800,13 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
/* Timer process for all the unresolved queue. */
-static void ipmr_do_expire_process(struct mr6_table *mrt)
+static void ipmr_do_expire_process(struct mr_table *mrt)
{
unsigned long now = jiffies;
unsigned long expires = 10 * HZ;
- struct mfc6_cache *c, *next;
+ struct mr_mfc *c, *next;
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
if (time_after(c->mfc_un.unres.expires, now)) {
/* not yet... */
unsigned long interval = c->mfc_un.unres.expires - now;
@@ -878,24 +816,24 @@ static void ipmr_do_expire_process(struct mr6_table *mrt)
}
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
}
static void ipmr_expire_process(struct timer_list *t)
{
- struct mr6_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
+ struct mr_table *mrt = from_timer(mrt, t, ipmr_expire_timer);
if (!spin_trylock(&mfc_unres_lock)) {
mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
return;
}
- if (!list_empty(&mrt->mfc6_unres_queue))
+ if (!list_empty(&mrt->mfc_unres_queue))
ipmr_do_expire_process(mrt);
spin_unlock(&mfc_unres_lock);
@@ -903,7 +841,8 @@ static void ipmr_expire_process(struct timer_list *t)
/* Fill oifs list. It is called under write locked mrt_lock. */
-static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr_table *mrt,
+ struct mr_mfc *cache,
unsigned char *ttls)
{
int vifi;
@@ -913,7 +852,7 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
for (vifi = 0; vifi < mrt->maxvif; vifi++) {
- if (MIF_EXISTS(mrt, vifi) &&
+ if (VIF_EXISTS(mrt, vifi) &&
ttls[vifi] && ttls[vifi] < 255) {
cache->mfc_un.res.ttls[vifi] = ttls[vifi];
if (cache->mfc_un.res.minvif > vifi)
@@ -925,17 +864,17 @@ static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *ca
cache->mfc_un.res.lastuse = jiffies;
}
-static int mif6_add(struct net *net, struct mr6_table *mrt,
+static int mif6_add(struct net *net, struct mr_table *mrt,
struct mif6ctl *vifc, int mrtsock)
{
int vifi = vifc->mif6c_mifi;
- struct mif_device *v = &mrt->vif6_table[vifi];
+ struct vif_device *v = &mrt->vif_table[vifi];
struct net_device *dev;
struct inet6_dev *in6_dev;
int err;
/* Is vif busy ? */
- if (MIF_EXISTS(mrt, vifi))
+ if (VIF_EXISTS(mrt, vifi))
return -EADDRINUSE;
switch (vifc->mif6c_flags) {
@@ -980,21 +919,10 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
dev->ifindex, &in6_dev->cnf);
}
- /*
- * Fill in the VIF structures
- */
- v->rate_limit = vifc->vifc_rate_limit;
- v->flags = vifc->mif6c_flags;
- if (!mrtsock)
- v->flags |= VIFF_STATIC;
- v->threshold = vifc->vifc_threshold;
- v->bytes_in = 0;
- v->bytes_out = 0;
- v->pkt_in = 0;
- v->pkt_out = 0;
- v->link = dev->ifindex;
- if (v->flags & MIFF_REGISTER)
- v->link = dev_get_iflink(dev);
+ /* Fill in the VIF structures */
+ vif_device_init(v, dev, vifc->vifc_rate_limit, vifc->vifc_threshold,
+ vifc->mif6c_flags | (!mrtsock ? VIFF_STATIC : 0),
+ MIFF_REGISTER);
/* And finish update writing critical data */
write_lock_bh(&mrt_lock);
@@ -1006,78 +934,63 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
if (vifi + 1 > mrt->maxvif)
mrt->maxvif = vifi + 1;
write_unlock_bh(&mrt_lock);
+ call_ip6mr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD,
+ v, vifi, mrt->id);
return 0;
}
-static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find(struct mr_table *mrt,
const struct in6_addr *origin,
const struct in6_addr *mcastgrp)
{
- int line = MFC6_HASH(mcastgrp, origin);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
- return c;
- }
- return NULL;
-}
-
-/* Look for a (*,*,oif) entry */
-static struct mfc6_cache *ip6mr_cache_find_any_parent(struct mr6_table *mrt,
- mifi_t mifi)
-{
- int line = MFC6_HASH(&in6addr_any, &in6addr_any);
- struct mfc6_cache *c;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_any(&c->mf6c_mcastgrp) &&
- (c->mfc_un.res.ttls[mifi] < 255))
- return c;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
- return NULL;
+ return mr_mfc_find(mrt, &arg);
}
/* Look for a (*,G) entry */
-static struct mfc6_cache *ip6mr_cache_find_any(struct mr6_table *mrt,
+static struct mfc6_cache *ip6mr_cache_find_any(struct mr_table *mrt,
struct in6_addr *mcastgrp,
mifi_t mifi)
{
- int line = MFC6_HASH(mcastgrp, &in6addr_any);
- struct mfc6_cache *c, *proxy;
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = in6addr_any,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
if (ipv6_addr_any(mcastgrp))
- goto skip;
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list)
- if (ipv6_addr_any(&c->mf6c_origin) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp)) {
- if (c->mfc_un.res.ttls[mifi] < 255)
- return c;
-
- /* It's ok if the mifi is part of the static tree */
- proxy = ip6mr_cache_find_any_parent(mrt,
- c->mf6c_parent);
- if (proxy && proxy->mfc_un.res.ttls[mifi] < 255)
- return c;
- }
+ return mr_mfc_find_any_parent(mrt, mifi);
+ return mr_mfc_find_any(mrt, mifi, &arg);
+}
-skip:
- return ip6mr_cache_find_any_parent(mrt, mifi);
+/* Look for a (S,G,iif) entry if parent != -1 */
+static struct mfc6_cache *
+ip6mr_cache_find_parent(struct mr_table *mrt,
+ const struct in6_addr *origin,
+ const struct in6_addr *mcastgrp,
+ int parent)
+{
+ struct mfc6_cache_cmp_arg arg = {
+ .mf6c_origin = *origin,
+ .mf6c_mcastgrp = *mcastgrp,
+ };
+
+ return mr_mfc_find_parent(mrt, &arg, parent);
}
-/*
- * Allocate a multicast cache entry
- */
+/* Allocate a multicast cache entry */
static struct mfc6_cache *ip6mr_cache_alloc(void)
{
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
if (!c)
return NULL;
- c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
- c->mfc_un.res.minvif = MAXMIFS;
+ c->_c.mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1;
+ c->_c.mfc_un.res.minvif = MAXMIFS;
+ c->_c.free = ip6mr_cache_free_rcu;
+ refcount_set(&c->_c.mfc_un.res.refcount, 1);
return c;
}
@@ -1086,8 +999,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
if (!c)
return NULL;
- skb_queue_head_init(&c->mfc_un.unres.unresolved);
- c->mfc_un.unres.expires = jiffies + 10 * HZ;
+ skb_queue_head_init(&c->_c.mfc_un.unres.unresolved);
+ c->_c.mfc_un.unres.expires = jiffies + 10 * HZ;
return c;
}
@@ -1095,7 +1008,7 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
* A cache entry has gone into a resolved state from queued
*/
-static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt,
struct mfc6_cache *uc, struct mfc6_cache *c)
{
struct sk_buff *skb;
@@ -1104,12 +1017,13 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Play the pending entries through our router
*/
- while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
+ while ((skb = __skb_dequeue(&uc->_c.mfc_un.unres.unresolved))) {
if (ipv6_hdr(skb)->version == 0) {
struct nlmsghdr *nlh = skb_pull(skb,
sizeof(struct ipv6hdr));
- if (__ip6mr_fill_mroute(mrt, skb, c, nlmsg_data(nlh)) > 0) {
+ if (mr_fill_mroute(mrt, skb, &c->_c,
+ nlmsg_data(nlh)) > 0) {
nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
} else {
nlh->nlmsg_type = NLMSG_ERROR;
@@ -1129,9 +1043,10 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
* Called under mrt_lock.
*/
-static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+static int ip6mr_cache_report(struct mr_table *mrt, struct sk_buff *pkt,
mifi_t mifi, int assert)
{
+ struct sock *mroute6_sk;
struct sk_buff *skb;
struct mrt6msg *msg;
int ret;
@@ -1201,17 +1116,19 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (!mrt->mroute6_sk) {
+ rcu_read_lock();
+ mroute6_sk = rcu_dereference(mrt->mroute_sk);
+ if (!mroute6_sk) {
+ rcu_read_unlock();
kfree_skb(skb);
return -EINVAL;
}
mrt6msg_netlink_event(mrt, skb);
- /*
- * Deliver to user space multicast routing algorithms
- */
- ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
+ /* Deliver to user space multicast routing algorithms */
+ ret = sock_queue_rcv_skb(mroute6_sk, skb);
+ rcu_read_unlock();
if (ret < 0) {
net_warn_ratelimited("mroute6: pending queue full, dropping entries\n");
kfree_skb(skb);
@@ -1220,19 +1137,16 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
return ret;
}
-/*
- * Queue a packet for resolution. It gets locked cache entry!
- */
-
-static int
-ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
+/* Queue a packet for resolution. It gets locked cache entry! */
+static int ip6mr_cache_unresolved(struct mr_table *mrt, mifi_t mifi,
+ struct sk_buff *skb)
{
+ struct mfc6_cache *c;
bool found = false;
int err;
- struct mfc6_cache *c;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(c, &mrt->mfc_unres_queue, _c.list) {
if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
found = true;
@@ -1253,10 +1167,8 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
return -ENOBUFS;
}
- /*
- * Fill in the new cache entry
- */
- c->mf6c_parent = -1;
+ /* Fill in the new cache entry */
+ c->_c.mfc_parent = -1;
c->mf6c_origin = ipv6_hdr(skb)->saddr;
c->mf6c_mcastgrp = ipv6_hdr(skb)->daddr;
@@ -1276,20 +1188,18 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
}
atomic_inc(&mrt->cache_resolve_queue_len);
- list_add(&c->list, &mrt->mfc6_unres_queue);
+ list_add(&c->_c.list, &mrt->mfc_unres_queue);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
ipmr_do_expire_process(mrt);
}
- /*
- * See if we can append the packet
- */
- if (c->mfc_un.unres.unresolved.qlen > 3) {
+ /* See if we can append the packet */
+ if (c->_c.mfc_un.unres.unresolved.qlen > 3) {
kfree_skb(skb);
err = -ENOBUFS;
} else {
- skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
+ skb_queue_tail(&c->_c.mfc_un.unres.unresolved, skb);
err = 0;
}
@@ -1301,29 +1211,26 @@ ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
* MFC6 cache manipulation by user space
*/
-static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc,
+static int ip6mr_mfc_delete(struct mr_table *mrt, struct mf6cctl *mfc,
int parent)
{
- int line;
- struct mfc6_cache *c, *next;
-
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
+ struct mfc6_cache *c;
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == c->mf6c_parent)) {
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (!c)
+ return -ENOENT;
+ rhltable_remove(&mrt->mfc_hash, &c->_c.mnode, ip6mr_rht_params);
+ list_del_rcu(&c->_c.list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- return 0;
- }
- }
- return -ENOENT;
+ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_ENTRY_DEL, c, mrt->id);
+ mr6_netlink_event(mrt, c, RTM_DELROUTE);
+ mr_cache_put(&c->_c);
+ return 0;
}
static int ip6mr_device_event(struct notifier_block *this,
@@ -1331,15 +1238,15 @@ static int ip6mr_device_event(struct notifier_block *this,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
- struct mr6_table *mrt;
- struct mif_device *v;
+ struct mr_table *mrt;
+ struct vif_device *v;
int ct;
if (event != NETDEV_UNREGISTER)
return NOTIFY_DONE;
ip6mr_for_each_table(mrt, net) {
- v = &mrt->vif6_table[0];
+ v = &mrt->vif_table[0];
for (ct = 0; ct < mrt->maxvif; ct++, v++) {
if (v->dev == dev)
mif6_delete(mrt, ct, 1, NULL);
@@ -1349,21 +1256,63 @@ static int ip6mr_device_event(struct notifier_block *this,
return NOTIFY_DONE;
}
+static unsigned int ip6mr_seq_read(struct net *net)
+{
+ ASSERT_RTNL();
+
+ return net->ipv6.ipmr_seq + ip6mr_rules_seq_read(net);
+}
+
+static int ip6mr_dump(struct net *net, struct notifier_block *nb)
+{
+ return mr_dump(net, nb, RTNL_FAMILY_IP6MR, ip6mr_rules_dump,
+ ip6mr_mr_table_iter, &mrt_lock);
+}
+
static struct notifier_block ip6_mr_notifier = {
.notifier_call = ip6mr_device_event
};
-/*
- * Setup for IP multicast routing
- */
+static const struct fib_notifier_ops ip6mr_notifier_ops_template = {
+ .family = RTNL_FAMILY_IP6MR,
+ .fib_seq_read = ip6mr_seq_read,
+ .fib_dump = ip6mr_dump,
+ .owner = THIS_MODULE,
+};
+
+static int __net_init ip6mr_notifier_init(struct net *net)
+{
+ struct fib_notifier_ops *ops;
+
+ net->ipv6.ipmr_seq = 0;
+
+ ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+ net->ipv6.ip6mr_notifier_ops = ops;
+
+ return 0;
+}
+static void __net_exit ip6mr_notifier_exit(struct net *net)
+{
+ fib_notifier_ops_unregister(net->ipv6.ip6mr_notifier_ops);
+ net->ipv6.ip6mr_notifier_ops = NULL;
+}
+
+/* Setup for IP multicast routing */
static int __net_init ip6mr_net_init(struct net *net)
{
int err;
+ err = ip6mr_notifier_init(net);
+ if (err)
+ return err;
+
err = ip6mr_rules_init(net);
if (err < 0)
- goto fail;
+ goto ip6mr_rules_fail;
#ifdef CONFIG_PROC_FS
err = -ENOMEM;
@@ -1381,7 +1330,8 @@ proc_cache_fail:
proc_vif_fail:
ip6mr_rules_exit(net);
#endif
-fail:
+ip6mr_rules_fail:
+ ip6mr_notifier_exit(net);
return err;
}
@@ -1392,6 +1342,7 @@ static void __net_exit ip6mr_net_exit(struct net *net)
remove_proc_entry("ip6_mr_vif", net->proc_net);
#endif
ip6mr_rules_exit(net);
+ ip6mr_notifier_exit(net);
}
static struct pernet_operations ip6mr_net_ops = {
@@ -1452,14 +1403,14 @@ void ip6_mr_cleanup(void)
kmem_cache_destroy(mrt_cachep);
}
-static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+static int ip6mr_mfc_add(struct net *net, struct mr_table *mrt,
struct mf6cctl *mfc, int mrtsock, int parent)
{
- bool found = false;
- int line;
- struct mfc6_cache *uc, *c;
unsigned char ttls[MAXMIFS];
- int i;
+ struct mfc6_cache *uc, *c;
+ struct mr_mfc *_uc;
+ bool found;
+ int i, err;
if (mfc->mf6cc_parent >= MAXMIFS)
return -ENFILE;
@@ -1468,28 +1419,22 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
for (i = 0; i < MAXMIFS; i++) {
if (IF_ISSET(i, &mfc->mf6cc_ifset))
ttls[i] = 1;
-
}
- line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
-
- list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
- if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
- ipv6_addr_equal(&c->mf6c_mcastgrp,
- &mfc->mf6cc_mcastgrp.sin6_addr) &&
- (parent == -1 || parent == mfc->mf6cc_parent)) {
- found = true;
- break;
- }
- }
-
- if (found) {
+ /* The entries are added/deleted only under RTNL */
+ rcu_read_lock();
+ c = ip6mr_cache_find_parent(mrt, &mfc->mf6cc_origin.sin6_addr,
+ &mfc->mf6cc_mcastgrp.sin6_addr, parent);
+ rcu_read_unlock();
+ if (c) {
write_lock_bh(&mrt_lock);
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
write_unlock_bh(&mrt_lock);
+ call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
+ c, mrt->id);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1504,31 +1449,36 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
- c->mf6c_parent = mfc->mf6cc_parent;
- ip6mr_update_thresholds(mrt, c, ttls);
+ c->_c.mfc_parent = mfc->mf6cc_parent;
+ ip6mr_update_thresholds(mrt, &c->_c, ttls);
if (!mrtsock)
- c->mfc_flags |= MFC_STATIC;
+ c->_c.mfc_flags |= MFC_STATIC;
- write_lock_bh(&mrt_lock);
- list_add(&c->list, &mrt->mfc6_cache_array[line]);
- write_unlock_bh(&mrt_lock);
+ err = rhltable_insert_key(&mrt->mfc_hash, &c->cmparg, &c->_c.mnode,
+ ip6mr_rht_params);
+ if (err) {
+ pr_err("ip6mr: rhtable insert error %d\n", err);
+ ip6mr_cache_free(c);
+ return err;
+ }
+ list_add_tail_rcu(&c->_c.list, &mrt->mfc_cache_list);
- /*
- * Check to see if we resolved a queued list. If so we
- * need to send on the frames and tidy up.
+ /* Check to see if we resolved a queued list. If so we
+ * need to send on the frames and tidy up.
*/
found = false;
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry(_uc, &mrt->mfc_unres_queue, list) {
+ uc = (struct mfc6_cache *)_uc;
if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
- list_del(&uc->list);
+ list_del(&_uc->list);
atomic_dec(&mrt->cache_resolve_queue_len);
found = true;
break;
}
}
- if (list_empty(&mrt->mfc6_unres_queue))
+ if (list_empty(&mrt->mfc_unres_queue))
del_timer(&mrt->ipmr_expire_timer);
spin_unlock_bh(&mfc_unres_lock);
@@ -1536,6 +1486,8 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
ip6mr_cache_resolve(net, mrt, uc, c);
ip6mr_cache_free(uc);
}
+ call_ip6mr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD,
+ c, mrt->id);
mr6_netlink_event(mrt, c, RTM_NEWROUTE);
return 0;
}
@@ -1544,61 +1496,59 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
* Close the multicast socket, and clear the vif tables etc
*/
-static void mroute_clean_tables(struct mr6_table *mrt, bool all)
+static void mroute_clean_tables(struct mr_table *mrt, bool all)
{
- int i;
+ struct mr_mfc *c, *tmp;
LIST_HEAD(list);
- struct mfc6_cache *c, *next;
+ int i;
- /*
- * Shut down all active vif entries
- */
+ /* Shut down all active vif entries */
for (i = 0; i < mrt->maxvif; i++) {
- if (!all && (mrt->vif6_table[i].flags & VIFF_STATIC))
+ if (!all && (mrt->vif_table[i].flags & VIFF_STATIC))
continue;
mif6_delete(mrt, i, 0, &list);
}
unregister_netdevice_many(&list);
- /*
- * Wipe the cache
- */
- for (i = 0; i < MFC6_LINES; i++) {
- list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
- if (!all && (c->mfc_flags & MFC_STATIC))
- continue;
- write_lock_bh(&mrt_lock);
- list_del(&c->list);
- write_unlock_bh(&mrt_lock);
-
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_cache_free(c);
- }
+ /* Wipe the cache */
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) {
+ if (!all && (c->mfc_flags & MFC_STATIC))
+ continue;
+ rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params);
+ list_del_rcu(&c->list);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE);
+ mr_cache_put(c);
}
if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
+ list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) {
list_del(&c->list);
- mr6_netlink_event(mrt, c, RTM_DELROUTE);
- ip6mr_destroy_unres(mrt, c);
+ call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net),
+ FIB_EVENT_ENTRY_DEL,
+ (struct mfc6_cache *)c,
+ mrt->id);
+ mr6_netlink_event(mrt, (struct mfc6_cache *)c,
+ RTM_DELROUTE);
+ ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);
}
spin_unlock_bh(&mfc_unres_lock);
}
}
-static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
+static int ip6mr_sk_init(struct mr_table *mrt, struct sock *sk)
{
int err = 0;
struct net *net = sock_net(sk);
rtnl_lock();
write_lock_bh(&mrt_lock);
- if (likely(mrt->mroute6_sk == NULL)) {
- mrt->mroute6_sk = sk;
- net->ipv6.devconf_all->mc_forwarding++;
- } else {
+ if (rtnl_dereference(mrt->mroute_sk)) {
err = -EADDRINUSE;
+ } else {
+ rcu_assign_pointer(mrt->mroute_sk, sk);
+ sock_set_flag(sk, SOCK_RCU_FREE);
+ net->ipv6.devconf_all->mc_forwarding++;
}
write_unlock_bh(&mrt_lock);
@@ -1616,7 +1566,7 @@ int ip6mr_sk_done(struct sock *sk)
{
int err = -EACCES;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1624,9 +1574,13 @@ int ip6mr_sk_done(struct sock *sk)
rtnl_lock();
ip6mr_for_each_table(mrt, net) {
- if (sk == mrt->mroute6_sk) {
+ if (sk == rtnl_dereference(mrt->mroute_sk)) {
write_lock_bh(&mrt_lock);
- mrt->mroute6_sk = NULL;
+ RCU_INIT_POINTER(mrt->mroute_sk, NULL);
+ /* Note that mroute_sk had SOCK_RCU_FREE set,
+ * so the RCU grace period before sk freeing
+ * is guaranteed by sk_destruct()
+ */
net->ipv6.devconf_all->mc_forwarding--;
write_unlock_bh(&mrt_lock);
inet6_netconf_notify_devconf(net, RTM_NEWNETCONF,
@@ -1644,9 +1598,9 @@ int ip6mr_sk_done(struct sock *sk)
return err;
}
-struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+bool mroute6_is_socket(struct net *net, struct sk_buff *skb)
{
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX,
.flowi6_oif = skb->dev->ifindex,
@@ -1656,8 +1610,9 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
if (ip6mr_fib_lookup(net, &fl6, &mrt) < 0)
return NULL;
- return mrt->mroute6_sk;
+ return rcu_access_pointer(mrt->mroute_sk);
}
+EXPORT_SYMBOL(mroute6_is_socket);
/*
* Socket options and virtual interface manipulation. The whole
@@ -1673,7 +1628,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
struct mf6cctl mfc;
mifi_t mifi;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1684,7 +1639,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
return -ENOENT;
if (optname != MRT6_INIT) {
- if (sk != mrt->mroute6_sk && !ns_capable(net->user_ns, CAP_NET_ADMIN))
+ if (sk != rcu_access_pointer(mrt->mroute_sk) &&
+ !ns_capable(net->user_ns, CAP_NET_ADMIN))
return -EACCES;
}
@@ -1706,7 +1662,8 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
if (vif.mif6c_mifi >= MAXMIFS)
return -ENFILE;
rtnl_lock();
- ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
+ ret = mif6_add(net, mrt, &vif,
+ sk == rtnl_dereference(mrt->mroute_sk));
rtnl_unlock();
return ret;
@@ -1741,7 +1698,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
ret = ip6mr_mfc_delete(mrt, &mfc, parent);
else
ret = ip6mr_mfc_add(net, mrt, &mfc,
- sk == mrt->mroute6_sk, parent);
+ sk ==
+ rtnl_dereference(mrt->mroute_sk),
+ parent);
rtnl_unlock();
return ret;
@@ -1793,7 +1752,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
/* "pim6reg%u" should not exceed 16 bytes (IFNAMSIZ) */
if (v != RT_TABLE_DEFAULT && v >= 100000000)
return -EINVAL;
- if (sk == mrt->mroute6_sk)
+ if (sk == rcu_access_pointer(mrt->mroute_sk))
return -EBUSY;
rtnl_lock();
@@ -1824,7 +1783,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
int olr;
int val;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
if (sk->sk_type != SOCK_RAW ||
inet_sk(sk)->inet_num != IPPROTO_ICMPV6)
@@ -1872,10 +1831,10 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
{
struct sioc_sg_req6 sr;
struct sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1888,8 +1847,8 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1906,19 +1865,19 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -1946,10 +1905,10 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
{
struct compat_sioc_sg_req6 sr;
struct compat_sioc_mif_req6 vr;
- struct mif_device *vif;
+ struct vif_device *vif;
struct mfc6_cache *c;
struct net *net = sock_net(sk);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
if (!mrt)
@@ -1962,8 +1921,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (vr.mifi >= mrt->maxvif)
return -EINVAL;
read_lock(&mrt_lock);
- vif = &mrt->vif6_table[vr.mifi];
- if (MIF_EXISTS(mrt, vr.mifi)) {
+ vif = &mrt->vif_table[vr.mifi];
+ if (VIF_EXISTS(mrt, vr.mifi)) {
vr.icount = vif->pkt_in;
vr.ocount = vif->pkt_out;
vr.ibytes = vif->bytes_in;
@@ -1980,19 +1939,19 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
if (copy_from_user(&sr, arg, sizeof(sr)))
return -EFAULT;
- read_lock(&mrt_lock);
+ rcu_read_lock();
c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
if (c) {
- sr.pktcnt = c->mfc_un.res.pkt;
- sr.bytecnt = c->mfc_un.res.bytes;
- sr.wrong_if = c->mfc_un.res.wrong_if;
- read_unlock(&mrt_lock);
+ sr.pktcnt = c->_c.mfc_un.res.pkt;
+ sr.bytecnt = c->_c.mfc_un.res.bytes;
+ sr.wrong_if = c->_c.mfc_un.res.wrong_if;
+ rcu_read_unlock();
if (copy_to_user(arg, &sr, sizeof(sr)))
return -EFAULT;
return 0;
}
- read_unlock(&mrt_lock);
+ rcu_read_unlock();
return -EADDRNOTAVAIL;
default:
return -ENOIOCTLCMD;
@@ -2013,11 +1972,11 @@ static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct
* Processing handlers for ip6mr_forward
*/
-static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+static int ip6mr_forward2(struct net *net, struct mr_table *mrt,
struct sk_buff *skb, struct mfc6_cache *c, int vifi)
{
struct ipv6hdr *ipv6h;
- struct mif_device *vif = &mrt->vif6_table[vifi];
+ struct vif_device *vif = &mrt->vif_table[vifi];
struct net_device *dev;
struct dst_entry *dst;
struct flowi6 fl6;
@@ -2087,46 +2046,50 @@ out_free:
return 0;
}
-static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
+static int ip6mr_find_vif(struct mr_table *mrt, struct net_device *dev)
{
int ct;
for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
- if (mrt->vif6_table[ct].dev == dev)
+ if (mrt->vif_table[ct].dev == dev)
break;
}
return ct;
}
-static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
- struct sk_buff *skb, struct mfc6_cache *cache)
+static void ip6_mr_forward(struct net *net, struct mr_table *mrt,
+ struct sk_buff *skb, struct mfc6_cache *c)
{
int psend = -1;
int vif, ct;
int true_vifi = ip6mr_find_vif(mrt, skb->dev);
- vif = cache->mf6c_parent;
- cache->mfc_un.res.pkt++;
- cache->mfc_un.res.bytes += skb->len;
- cache->mfc_un.res.lastuse = jiffies;
+ vif = c->_c.mfc_parent;
+ c->_c.mfc_un.res.pkt++;
+ c->_c.mfc_un.res.bytes += skb->len;
+ c->_c.mfc_un.res.lastuse = jiffies;
- if (ipv6_addr_any(&cache->mf6c_origin) && true_vifi >= 0) {
+ if (ipv6_addr_any(&c->mf6c_origin) && true_vifi >= 0) {
struct mfc6_cache *cache_proxy;
/* For an (*,G) entry, we only check that the incoming
* interface is part of the static tree.
*/
- cache_proxy = ip6mr_cache_find_any_parent(mrt, vif);
+ rcu_read_lock();
+ cache_proxy = mr_mfc_find_any_parent(mrt, vif);
if (cache_proxy &&
- cache_proxy->mfc_un.res.ttls[true_vifi] < 255)
+ cache_proxy->_c.mfc_un.res.ttls[true_vifi] < 255) {
+ rcu_read_unlock();
goto forward;
+ }
+ rcu_read_unlock();
}
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (mrt->vif6_table[vif].dev != skb->dev) {
- cache->mfc_un.res.wrong_if++;
+ if (mrt->vif_table[vif].dev != skb->dev) {
+ c->_c.mfc_un.res.wrong_if++;
if (true_vifi >= 0 && mrt->mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -2135,52 +2098,55 @@ static void ip6_mr_forward(struct net *net, struct mr6_table *mrt,
large chunk of pimd to kernel. Ough... --ANK
*/
(mrt->mroute_do_pim ||
- cache->mfc_un.res.ttls[true_vifi] < 255) &&
+ c->_c.mfc_un.res.ttls[true_vifi] < 255) &&
time_after(jiffies,
- cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
- cache->mfc_un.res.last_assert = jiffies;
+ c->_c.mfc_un.res.last_assert +
+ MFC_ASSERT_THRESH)) {
+ c->_c.mfc_un.res.last_assert = jiffies;
ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
}
goto dont_forward;
}
forward:
- mrt->vif6_table[vif].pkt_in++;
- mrt->vif6_table[vif].bytes_in += skb->len;
+ mrt->vif_table[vif].pkt_in++;
+ mrt->vif_table[vif].bytes_in += skb->len;
/*
* Forward the frame
*/
- if (ipv6_addr_any(&cache->mf6c_origin) &&
- ipv6_addr_any(&cache->mf6c_mcastgrp)) {
+ if (ipv6_addr_any(&c->mf6c_origin) &&
+ ipv6_addr_any(&c->mf6c_mcastgrp)) {
if (true_vifi >= 0 &&
- true_vifi != cache->mf6c_parent &&
+ true_vifi != c->_c.mfc_parent &&
ipv6_hdr(skb)->hop_limit >
- cache->mfc_un.res.ttls[cache->mf6c_parent]) {
+ c->_c.mfc_un.res.ttls[c->_c.mfc_parent]) {
/* It's an (*,*) entry and the packet is not coming from
* the upstream: forward the packet to the upstream
* only.
*/
- psend = cache->mf6c_parent;
+ psend = c->_c.mfc_parent;
goto last_forward;
}
goto dont_forward;
}
- for (ct = cache->mfc_un.res.maxvif - 1; ct >= cache->mfc_un.res.minvif; ct--) {
+ for (ct = c->_c.mfc_un.res.maxvif - 1;
+ ct >= c->_c.mfc_un.res.minvif; ct--) {
/* For (*,G) entry, don't forward to the incoming interface */
- if ((!ipv6_addr_any(&cache->mf6c_origin) || ct != true_vifi) &&
- ipv6_hdr(skb)->hop_limit > cache->mfc_un.res.ttls[ct]) {
+ if ((!ipv6_addr_any(&c->mf6c_origin) || ct != true_vifi) &&
+ ipv6_hdr(skb)->hop_limit > c->_c.mfc_un.res.ttls[ct]) {
if (psend != -1) {
struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
if (skb2)
- ip6mr_forward2(net, mrt, skb2, cache, psend);
+ ip6mr_forward2(net, mrt, skb2,
+ c, psend);
}
psend = ct;
}
}
last_forward:
if (psend != -1) {
- ip6mr_forward2(net, mrt, skb, cache, psend);
+ ip6mr_forward2(net, mrt, skb, c, psend);
return;
}
@@ -2197,7 +2163,7 @@ int ip6_mr_input(struct sk_buff *skb)
{
struct mfc6_cache *cache;
struct net *net = dev_net(skb->dev);
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct flowi6 fl6 = {
.flowi6_iif = skb->dev->ifindex,
.flowi6_mark = skb->mark,
@@ -2247,66 +2213,11 @@ int ip6_mr_input(struct sk_buff *skb)
return 0;
}
-
-static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
- struct mfc6_cache *c, struct rtmsg *rtm)
-{
- struct rta_mfc_stats mfcs;
- struct nlattr *mp_attr;
- struct rtnexthop *nhp;
- unsigned long lastuse;
- int ct;
-
- /* If cache is unresolved, don't try to parse IIF and OIF */
- if (c->mf6c_parent >= MAXMIFS) {
- rtm->rtm_flags |= RTNH_F_UNRESOLVED;
- return -ENOENT;
- }
-
- if (MIF_EXISTS(mrt, c->mf6c_parent) &&
- nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0)
- return -EMSGSIZE;
- mp_attr = nla_nest_start(skb, RTA_MULTIPATH);
- if (!mp_attr)
- return -EMSGSIZE;
-
- for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
- if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
- nhp = nla_reserve_nohdr(skb, sizeof(*nhp));
- if (!nhp) {
- nla_nest_cancel(skb, mp_attr);
- return -EMSGSIZE;
- }
-
- nhp->rtnh_flags = 0;
- nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
- nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
- nhp->rtnh_len = sizeof(*nhp);
- }
- }
-
- nla_nest_end(skb, mp_attr);
-
- lastuse = READ_ONCE(c->mfc_un.res.lastuse);
- lastuse = time_after_eq(jiffies, lastuse) ? jiffies - lastuse : 0;
-
- mfcs.mfcs_packets = c->mfc_un.res.pkt;
- mfcs.mfcs_bytes = c->mfc_un.res.bytes;
- mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if;
- if (nla_put_64bit(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs, RTA_PAD) ||
- nla_put_u64_64bit(skb, RTA_EXPIRES, jiffies_to_clock_t(lastuse),
- RTA_PAD))
- return -EMSGSIZE;
-
- rtm->rtm_type = RTN_MULTICAST;
- return 1;
-}
-
int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
u32 portid)
{
int err;
- struct mr6_table *mrt;
+ struct mr_table *mrt;
struct mfc6_cache *cache;
struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
@@ -2367,15 +2278,12 @@ int ip6mr_get_route(struct net *net, struct sk_buff *skb, struct rtmsg *rtm,
return err;
}
- if (rtm->rtm_flags & RTM_F_NOTIFY)
- cache->mfc_flags |= MFC_NOTIFY;
-
- err = __ip6mr_fill_mroute(mrt, skb, cache, rtm);
+ err = mr_fill_mroute(mrt, skb, &cache->_c, rtm);
read_unlock(&mrt_lock);
return err;
}
-static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
u32 portid, u32 seq, struct mfc6_cache *c, int cmd,
int flags)
{
@@ -2397,7 +2305,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
goto nla_put_failure;
rtm->rtm_type = RTN_MULTICAST;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
- if (c->mfc_flags & MFC_STATIC)
+ if (c->_c.mfc_flags & MFC_STATIC)
rtm->rtm_protocol = RTPROT_STATIC;
else
rtm->rtm_protocol = RTPROT_MROUTED;
@@ -2406,7 +2314,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) ||
nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp))
goto nla_put_failure;
- err = __ip6mr_fill_mroute(mrt, skb, c, rtm);
+ err = mr_fill_mroute(mrt, skb, &c->_c, rtm);
/* do not break the dump if cache is unresolved */
if (err < 0 && err != -ENOENT)
goto nla_put_failure;
@@ -2419,6 +2327,14 @@ nla_put_failure:
return -EMSGSIZE;
}
+static int _ip6mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
+ u32 portid, u32 seq, struct mr_mfc *c,
+ int cmd, int flags)
+{
+ return ip6mr_fill_mroute(mrt, skb, portid, seq, (struct mfc6_cache *)c,
+ cmd, flags);
+}
+
static int mr6_msgsize(bool unresolved, int maxvif)
{
size_t len =
@@ -2440,14 +2356,14 @@ static int mr6_msgsize(bool unresolved, int maxvif)
return len;
}
-static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,
+static void mr6_netlink_event(struct mr_table *mrt, struct mfc6_cache *mfc,
int cmd)
{
struct net *net = read_pnet(&mrt->net);
struct sk_buff *skb;
int err = -ENOBUFS;
- skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif),
+ skb = nlmsg_new(mr6_msgsize(mfc->_c.mfc_parent >= MAXMIFS, mrt->maxvif),
GFP_ATOMIC);
if (!skb)
goto errout;
@@ -2482,7 +2398,7 @@ static size_t mrt6msg_netlink_msgsize(size_t payloadlen)
return len;
}
-static void mrt6msg_netlink_event(struct mr6_table *mrt, struct sk_buff *pkt)
+static void mrt6msg_netlink_event(struct mr_table *mrt, struct sk_buff *pkt)
{
struct net *net = read_pnet(&mrt->net);
struct nlmsghdr *nlh;
@@ -2532,65 +2448,6 @@ errout:
static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
{
- struct net *net = sock_net(skb->sk);
- struct mr6_table *mrt;
- struct mfc6_cache *mfc;
- unsigned int t = 0, s_t;
- unsigned int h = 0, s_h;
- unsigned int e = 0, s_e;
-
- s_t = cb->args[0];
- s_h = cb->args[1];
- s_e = cb->args[2];
-
- read_lock(&mrt_lock);
- ip6mr_for_each_table(mrt, net) {
- if (t < s_t)
- goto next_table;
- if (t > s_t)
- s_h = 0;
- for (h = s_h; h < MFC6_LINES; h++) {
- list_for_each_entry(mfc, &mrt->mfc6_cache_array[h], list) {
- if (e < s_e)
- goto next_entry;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0)
- goto done;
-next_entry:
- e++;
- }
- e = s_e = 0;
- }
- spin_lock_bh(&mfc_unres_lock);
- list_for_each_entry(mfc, &mrt->mfc6_unres_queue, list) {
- if (e < s_e)
- goto next_entry2;
- if (ip6mr_fill_mroute(mrt, skb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- mfc, RTM_NEWROUTE,
- NLM_F_MULTI) < 0) {
- spin_unlock_bh(&mfc_unres_lock);
- goto done;
- }
-next_entry2:
- e++;
- }
- spin_unlock_bh(&mfc_unres_lock);
- e = s_e = 0;
- s_h = 0;
-next_table:
- t++;
- }
-done:
- read_unlock(&mrt_lock);
-
- cb->args[2] = e;
- cb->args[1] = h;
- cb->args[0] = t;
-
- return skb->len;
+ return mr_rtm_dumproute(skb, cb, ip6mr_mr_table_iter,
+ _ip6mr_fill_mroute, &mfc_unres_lock);
}
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 24535169663d..4d780c7f0130 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1415,4 +1415,3 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
}
EXPORT_SYMBOL(compat_ipv6_getsockopt);
#endif
-
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 9b9d2ff01b35..793159d77d8a 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -165,7 +165,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
if (ifindex == 0) {
struct rt6_info *rt;
- rt = rt6_lookup(net, addr, NULL, 0, 0);
+ rt = rt6_lookup(net, addr, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
ip6_rt_put(rt);
@@ -254,7 +254,7 @@ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net,
struct inet6_dev *idev = NULL;
if (ifindex == 0) {
- struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, 0);
+ struct rt6_info *rt = rt6_lookup(net, group, NULL, 0, NULL, 0);
if (rt) {
dev = rt->dst.dev;
@@ -2921,9 +2921,9 @@ static int __net_init igmp6_proc_init(struct net *net)
int err;
err = -ENOMEM;
- if (!proc_create("igmp6", S_IRUGO, net->proc_net, &igmp6_mc_seq_fops))
+ if (!proc_create("igmp6", 0444, net->proc_net, &igmp6_mc_seq_fops))
goto out;
- if (!proc_create("mcfilter6", S_IRUGO, net->proc_net,
+ if (!proc_create("mcfilter6", 0444, net->proc_net,
&igmp6_mcf_seq_fops))
goto out_proc_net_igmp6;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index ba5e04c6ae17..9de4dfb126ba 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -527,7 +527,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr,
}
if (!dev->addr_len)
- inc_opt = 0;
+ inc_opt = false;
if (inc_opt)
optlen += ndisc_opt_addr_space(dev,
NDISC_NEIGHBOUR_ADVERTISEMENT);
@@ -707,7 +707,7 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
int probes = atomic_read(&neigh->probes);
if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
- dev, 1,
+ dev, false, 1,
IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
saddr = &ipv6_hdr(skb)->saddr;
probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig
index d395d1590699..ccbfa83e4bb0 100644
--- a/net/ipv6/netfilter/Kconfig
+++ b/net/ipv6/netfilter/Kconfig
@@ -34,7 +34,7 @@ config NF_SOCKET_IPV6
if NF_TABLES
config NF_TABLES_IPV6
- tristate "IPv6 nf_tables support"
+ bool "IPv6 nf_tables support"
help
This option enables the IPv6 support for nf_tables.
diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile
index d984057b8395..44273d6f03a5 100644
--- a/net/ipv6/netfilter/Makefile
+++ b/net/ipv6/netfilter/Makefile
@@ -36,7 +36,6 @@ obj-$(CONFIG_NF_REJECT_IPV6) += nf_reject_ipv6.o
obj-$(CONFIG_NF_DUP_IPV6) += nf_dup_ipv6.o
# nf_tables
-obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o
obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o
obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o
obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 62358b93bbac..65c9e1a58305 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -420,11 +420,6 @@ mark_source_chains(const struct xt_table_info *newinfo,
t->verdict < 0) || visited) {
unsigned int oldpos, size;
- if ((strcmp(t->target.u.user.name,
- XT_STANDARD_TARGET) == 0) &&
- t->verdict < -NF_MAX_VERDICT - 1)
- return 0;
-
/* Return: backtrack through the last
big jump. */
do {
@@ -725,16 +720,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
if (i != repl->num_entries)
goto out_free;
- /* Check hooks all assigned */
- for (i = 0; i < NF_INET_NUMHOOKS; i++) {
- /* Only hooks which are valid */
- if (!(repl->valid_hooks & (1 << i)))
- continue;
- if (newinfo->hook_entry[i] == 0xFFFFFFFF)
- goto out_free;
- if (newinfo->underflow[i] == 0xFFFFFFFF)
- goto out_free;
- }
+ ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
+ if (ret)
+ goto out_free;
if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
ret = -ELOOP;
@@ -962,7 +950,9 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries;
- xt_compat_init_offsets(AF_INET6, info->number);
+ ret = xt_compat_init_offsets(AF_INET6, info->number);
+ if (ret)
+ return ret;
xt_entry_foreach(iter, loc_cpu_entry, info->size) {
ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
if (ret != 0)
@@ -1075,7 +1065,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct ip6t_entry *iter;
ret = 0;
- counters = vzalloc(num_counters * sizeof(struct xt_counters));
+ counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -1105,6 +1095,8 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
+ xt_table_unlock(t);
+
get_old_counters(oldinfo, counters);
/* Decrease module usage counts and free resource */
@@ -1118,7 +1110,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- xt_table_unlock(t);
return ret;
put_module:
@@ -1425,7 +1416,7 @@ translate_compat_table(struct net *net,
struct compat_ip6t_entry *iter0;
struct ip6t_replace repl;
unsigned int size;
- int ret = 0;
+ int ret;
info = *pinfo;
entry0 = *pentry0;
@@ -1434,7 +1425,9 @@ translate_compat_table(struct net *net,
j = 0;
xt_compat_lock(AF_INET6);
- xt_compat_init_offsets(AF_INET6, compatr->num_entries);
+ ret = xt_compat_init_offsets(AF_INET6, compatr->num_entries);
+ if (ret)
+ goto out_unlock;
/* Walk through entries, checking offsets. */
xt_entry_foreach(iter0, entry0, compatr->size) {
ret = check_compat_entry_size_and_hooks(iter0, info, &size,
diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c
index 437af8c95277..cb6d42b03cb5 100644
--- a/net/ipv6/netfilter/ip6t_SYNPROXY.c
+++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c
@@ -18,6 +18,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/nf_conntrack_synproxy.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
static struct ipv6hdr *
synproxy_build_ip(struct net *net, struct sk_buff *skb,
@@ -405,6 +406,8 @@ static unsigned int ipv6_synproxy_hook(void *priv,
synproxy->isn = ntohl(th->ack_seq);
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy->its = opts.tsecr;
+
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
break;
case TCP_CONNTRACK_SYN_RECV:
if (!th->syn || !th->ack)
@@ -413,8 +416,10 @@ static unsigned int ipv6_synproxy_hook(void *priv,
if (!synproxy_parse_options(skb, thoff, th, &opts))
return NF_DROP;
- if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) {
synproxy->tsoff = opts.tsval - synproxy->its;
+ nf_conntrack_event_cache(IPCT_SYNPROXY, ct);
+ }
opts.options &= ~(XT_SYNPROXY_OPT_MSS |
XT_SYNPROXY_OPT_WSCALE |
@@ -424,6 +429,7 @@ static unsigned int ipv6_synproxy_hook(void *priv,
synproxy_send_server_ack(net, state, skb, th, &opts);
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+ nf_conntrack_event_cache(IPCT_SEQADJ, ct);
swap(opts.tsval, opts.tsecr);
synproxy_send_client_ack(net, skb, th, &opts);
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index 91ed25a24b79..d12f511929f5 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -49,7 +49,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
- rt = (void *) ip6_route_lookup(net, &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
if (rt->dst.error)
goto out;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index b84ce3e6d728..3622aac343ae 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -52,18 +52,10 @@
static const char nf_frags_cache_name[] = "nf-frags";
-struct nf_ct_frag6_skb_cb
-{
- struct inet6_skb_parm h;
- int offset;
-};
-
-#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb *)((skb)->cb))
-
static struct inet_frags nf_frags;
#ifdef CONFIG_SYSCTL
-static int zero;
+static long zero;
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
@@ -76,18 +68,18 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_low_thresh",
.data = &init_net.nf_frag.frags.low_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
- .maxlen = sizeof(unsigned int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
@@ -152,23 +144,6 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
}
-static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, nf_frags.rnd);
-}
-
-
-static unsigned int nf_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *nq;
-
- nq = container_of(q, struct frag_queue, q);
- return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);
-}
-
static void nf_ct_frag6_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
@@ -178,34 +153,26 @@ static void nf_ct_frag6_expire(struct timer_list *t)
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
- ip6_expire_frag_queue(net, fq, &nf_frags);
+ ip6_expire_frag_queue(net, fq);
}
/* Creation primitives. */
-static inline struct frag_queue *fq_find(struct net *net, __be32 id,
- u32 user, struct in6_addr *src,
- struct in6_addr *dst, int iif, u8 ecn)
+static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
+ const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = user,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
-
- arg.id = id;
- arg.user = user;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
-
- local_bh_disable();
- hash = nf_hash_frag(id, src, dst);
-
- q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);
- local_bh_enable();
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+
+ q = inet_frag_find(&net->nf_frag.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}
@@ -264,7 +231,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this case. -DaveM
*/
pr_debug("end of fragment not rounded to 8 bytes.\n");
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
return -EPROTO;
}
if (end > fq->q.len) {
@@ -295,13 +262,13 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
* this fragment, right?
*/
prev = fq->q.fragments_tail;
- if (!prev || NFCT_FRAG6_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (NFCT_FRAG6_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -317,14 +284,19 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (NFCT_FRAG6_CB(prev)->offset + prev->len) > offset)
+ (prev->ip_defrag_offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
- if (next && NFCT_FRAG6_CB(next)->offset < end)
+ if (next && next->ip_defrag_offset < end)
goto discard_fq;
- NFCT_FRAG6_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ if (skb->dev)
+ fq->iif = skb->dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -335,10 +307,6 @@ found:
else
fq->q.fragments = skb;
- if (skb->dev) {
- fq->iif = skb->dev->ifindex;
- skb->dev = NULL;
- }
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
@@ -357,7 +325,7 @@ found:
return 0;
discard_fq:
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
err:
return -EINVAL;
}
@@ -379,10 +347,10 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_devic
int payload_len;
u8 ecn;
- inet_frag_kill(&fq->q, &nf_frags);
+ inet_frag_kill(&fq->q);
WARN_ON(head == NULL);
- WARN_ON(NFCT_FRAG6_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -593,8 +561,8 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
fhdr = (struct frag_hdr *)skb_transport_header(skb);
skb_orphan(skb);
- fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ fq = fq_find(net, fhdr->identification, user, hdr,
+ skb->dev ? skb->dev->ifindex : 0);
if (fq == NULL) {
pr_debug("Can't find and can't create new queue\n");
return -ENOMEM;
@@ -622,25 +590,33 @@ int nf_ct_frag6_gather(struct net *net, struct sk_buff *skb, u32 user)
out_unlock:
spin_unlock_bh(&fq->q.lock);
- inet_frag_put(&fq->q, &nf_frags);
+ inet_frag_put(&fq->q);
return ret;
}
EXPORT_SYMBOL_GPL(nf_ct_frag6_gather);
static int nf_ct_net_init(struct net *net)
{
+ int res;
+
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
- inet_frags_init_net(&net->nf_frag.frags);
-
- return nf_ct_frag6_sysctl_register(net);
+ net->nf_frag.frags.f = &nf_frags;
+
+ res = inet_frags_init_net(&net->nf_frag.frags);
+ if (res < 0)
+ return res;
+ res = nf_ct_frag6_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->nf_frag.frags);
+ return res;
}
static void nf_ct_net_exit(struct net *net)
{
nf_ct_frags6_sysctl_unregister(net);
- inet_frags_exit_net(&net->nf_frag.frags, &nf_frags);
+ inet_frags_exit_net(&net->nf_frag.frags);
}
static struct pernet_operations nf_ct_net_ops = {
@@ -652,13 +628,12 @@ int nf_ct_frag6_init(void)
{
int ret = 0;
- nf_frags.hashfn = nf_hashfn;
nf_frags.constructor = ip6_frag_init;
nf_frags.destructor = NULL;
nf_frags.qsize = sizeof(struct frag_queue);
- nf_frags.match = ip6_frag_match;
nf_frags.frag_expire = nf_ct_frag6_expire;
nf_frags.frags_cache_name = nf_frags_cache_name;
+ nf_frags.rhash_params = ip6_rhash_params;
ret = inet_frags_init(&nf_frags);
if (ret)
goto out;
diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c
deleted file mode 100644
index 17e03589331c..000000000000
--- a/net/ipv6/netfilter/nf_tables_ipv6.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/ipv6.h>
-#include <linux/netfilter_ipv6.h>
-#include <net/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables_ipv6.h>
-
-static unsigned int nft_do_chain_ipv6(void *priv,
- struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct nft_pktinfo pkt;
-
- nft_set_pktinfo(&pkt, skb, state);
- nft_set_pktinfo_ipv6(&pkt, skb);
-
- return nft_do_chain(&pkt, priv);
-}
-
-static const struct nf_chain_type filter_ipv6 = {
- .name = "filter",
- .type = NFT_CHAIN_T_DEFAULT,
- .family = NFPROTO_IPV6,
- .owner = THIS_MODULE,
- .hook_mask = (1 << NF_INET_LOCAL_IN) |
- (1 << NF_INET_LOCAL_OUT) |
- (1 << NF_INET_FORWARD) |
- (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_POST_ROUTING),
- .hooks = {
- [NF_INET_LOCAL_IN] = nft_do_chain_ipv6,
- [NF_INET_LOCAL_OUT] = nft_do_chain_ipv6,
- [NF_INET_FORWARD] = nft_do_chain_ipv6,
- [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6,
- [NF_INET_POST_ROUTING] = nft_do_chain_ipv6,
- },
-};
-
-static int __init nf_tables_ipv6_init(void)
-{
- return nft_register_chain_type(&filter_ipv6);
-}
-
-static void __exit nf_tables_ipv6_exit(void)
-{
- nft_unregister_chain_type(&filter_ipv6);
-}
-
-module_init(nf_tables_ipv6_init);
-module_exit(nf_tables_ipv6_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_ALIAS_NFT_CHAIN(AF_INET6, "filter");
diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
index 73fe2bd13fcf..3557b114446c 100644
--- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c
@@ -65,7 +65,17 @@ static unsigned int nft_nat_ipv6_local_fn(void *priv,
return nf_nat_ipv6_local_fn(priv, skb, state, nft_nat_do_chain);
}
-static const struct nf_chain_type nft_chain_nat_ipv6 = {
+static int nft_nat_ipv6_init(struct nft_ctx *ctx)
+{
+ return nf_ct_netns_get(ctx->net, ctx->family);
+}
+
+static void nft_nat_ipv6_free(struct nft_ctx *ctx)
+{
+ nf_ct_netns_put(ctx->net, ctx->family);
+}
+
+static const struct nft_chain_type nft_chain_nat_ipv6 = {
.name = "nat",
.type = NFT_CHAIN_T_NAT,
.family = NFPROTO_IPV6,
@@ -80,15 +90,13 @@ static const struct nf_chain_type nft_chain_nat_ipv6 = {
[NF_INET_LOCAL_OUT] = nft_nat_ipv6_local_fn,
[NF_INET_LOCAL_IN] = nft_nat_ipv6_fn,
},
+ .init = nft_nat_ipv6_init,
+ .free = nft_nat_ipv6_free,
};
static int __init nft_chain_nat_ipv6_init(void)
{
- int err;
-
- err = nft_register_chain_type(&nft_chain_nat_ipv6);
- if (err < 0)
- return err;
+ nft_register_chain_type(&nft_chain_nat_ipv6);
return 0;
}
diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c
index 11d3c3b9aa18..da3f1f8cb325 100644
--- a/net/ipv6/netfilter/nft_chain_route_ipv6.c
+++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c
@@ -60,7 +60,7 @@ static unsigned int nf_route_table_hook(void *priv,
return ret;
}
-static const struct nf_chain_type nft_chain_route_ipv6 = {
+static const struct nft_chain_type nft_chain_route_ipv6 = {
.name = "route",
.type = NFT_CHAIN_T_ROUTE,
.family = NFPROTO_IPV6,
@@ -73,7 +73,9 @@ static const struct nf_chain_type nft_chain_route_ipv6 = {
static int __init nft_chain_route_init(void)
{
- return nft_register_chain_type(&nft_chain_route_ipv6);
+ nft_register_chain_type(&nft_chain_route_ipv6);
+
+ return 0;
}
static void __exit nft_chain_route_exit(void)
diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c
index 62fc84d7bdff..36be3cf0adef 100644
--- a/net/ipv6/netfilter/nft_fib_ipv6.c
+++ b/net/ipv6/netfilter/nft_fib_ipv6.c
@@ -180,7 +180,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs,
}
*dest = 0;
- rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, lookup_flags);
+ rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb,
+ lookup_flags);
if (rt->dst.error)
goto put_rt_err;
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index b67814242f78..a85f7e0b14b1 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -38,7 +38,6 @@
static int sockstat6_seq_show(struct seq_file *seq, void *v)
{
struct net *net = seq->private;
- unsigned int frag_mem = ip6_frag_mem(net);
seq_printf(seq, "TCP6: inuse %d\n",
sock_prot_inuse_get(net, &tcpv6_prot));
@@ -48,7 +47,9 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(net, &udplitev6_prot));
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
- seq_printf(seq, "FRAG6: inuse %u memory %u\n", !!frag_mem, frag_mem);
+ seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
+ atomic_read(&net->ipv6.frags.rhashtable.nelems),
+ frag_mem_limit(&net->ipv6.frags));
return 0;
}
@@ -290,7 +291,7 @@ int snmp6_register_dev(struct inet6_dev *idev)
if (!net->mib.proc_net_devsnmp6)
return -ENOENT;
- p = proc_create_data(idev->dev->name, S_IRUGO,
+ p = proc_create_data(idev->dev->name, 0444,
net->mib.proc_net_devsnmp6,
&snmp6_dev_seq_fops, idev);
if (!p)
@@ -314,11 +315,11 @@ int snmp6_unregister_dev(struct inet6_dev *idev)
static int __net_init ipv6_proc_init_net(struct net *net)
{
- if (!proc_create("sockstat6", S_IRUGO, net->proc_net,
+ if (!proc_create("sockstat6", 0444, net->proc_net,
&sockstat6_seq_fops))
return -ENOMEM;
- if (!proc_create("snmp6", S_IRUGO, net->proc_net, &snmp6_seq_fops))
+ if (!proc_create("snmp6", 0444, net->proc_net, &snmp6_seq_fops))
goto proc_snmp6_fail;
net->mib.proc_net_devsnmp6 = proc_mkdir("dev_snmp6", net->proc_net);
@@ -354,4 +355,3 @@ void ipv6_misc_proc_exit(void)
{
unregister_pernet_subsys(&ipv6_proc_ops);
}
-
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 4c25339b1984..5eb9b08947ed 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1318,7 +1318,7 @@ static const struct file_operations raw6_seq_fops = {
static int __net_init raw6_init_net(struct net *net)
{
- if (!proc_create("raw6", S_IRUGO, net->proc_net, &raw6_seq_fops))
+ if (!proc_create("raw6", 0444, net->proc_net, &raw6_seq_fops))
return -ENOMEM;
return 0;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index afbc000ad4f2..70e4a578b2fb 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -62,13 +62,6 @@
static const char ip6_frag_cache_name[] = "ip6-frags";
-struct ip6frag_skb_cb {
- struct inet6_skb_parm h;
- int offset;
-};
-
-#define FRAG6_CB(skb) ((struct ip6frag_skb_cb *)((skb)->cb))
-
static u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)
{
return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);
@@ -79,94 +72,58 @@ static struct inet_frags ip6_frags;
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
struct net_device *dev);
-/*
- * callers should be careful not to use the hash value outside the ipfrag_lock
- * as doing so could race with ipfrag_hash_rnd being recalculated.
- */
-static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr,
- const struct in6_addr *daddr)
-{
- net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));
- return jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr),
- (__force u32)id, ip6_frags.rnd);
-}
-
-static unsigned int ip6_hashfn(const struct inet_frag_queue *q)
-{
- const struct frag_queue *fq;
-
- fq = container_of(q, struct frag_queue, q);
- return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);
-}
-
-bool ip6_frag_match(const struct inet_frag_queue *q, const void *a)
-{
- const struct frag_queue *fq;
- const struct ip6_create_arg *arg = a;
-
- fq = container_of(q, struct frag_queue, q);
- return fq->id == arg->id &&
- fq->user == arg->user &&
- ipv6_addr_equal(&fq->saddr, arg->src) &&
- ipv6_addr_equal(&fq->daddr, arg->dst) &&
- (arg->iif == fq->iif ||
- !(ipv6_addr_type(arg->dst) & (IPV6_ADDR_MULTICAST |
- IPV6_ADDR_LINKLOCAL)));
-}
-EXPORT_SYMBOL(ip6_frag_match);
-
void ip6_frag_init(struct inet_frag_queue *q, const void *a)
{
struct frag_queue *fq = container_of(q, struct frag_queue, q);
- const struct ip6_create_arg *arg = a;
+ const struct frag_v6_compare_key *key = a;
- fq->id = arg->id;
- fq->user = arg->user;
- fq->saddr = *arg->src;
- fq->daddr = *arg->dst;
- fq->ecn = arg->ecn;
+ q->key.v6 = *key;
+ fq->ecn = 0;
}
EXPORT_SYMBOL(ip6_frag_init);
-void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq,
- struct inet_frags *frags)
+void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq)
{
struct net_device *dev = NULL;
+ struct sk_buff *head;
+ rcu_read_lock();
spin_lock(&fq->q.lock);
if (fq->q.flags & INET_FRAG_COMPLETE)
goto out;
- inet_frag_kill(&fq->q, frags);
+ inet_frag_kill(&fq->q);
- rcu_read_lock();
dev = dev_get_by_index_rcu(net, fq->iif);
if (!dev)
- goto out_rcu_unlock;
+ goto out;
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS);
-
- if (inet_frag_evicting(&fq->q))
- goto out_rcu_unlock;
-
__IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT);
/* Don't send error if the first segment did not arrive. */
- if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !fq->q.fragments)
- goto out_rcu_unlock;
+ head = fq->q.fragments;
+ if (!(fq->q.flags & INET_FRAG_FIRST_IN) || !head)
+ goto out;
/* But use as source device on which LAST ARRIVED
* segment was received. And do not use fq->dev
* pointer directly, device might already disappeared.
*/
- fq->q.fragments->dev = dev;
- icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
-out_rcu_unlock:
- rcu_read_unlock();
+ head->dev = dev;
+ skb_get(head);
+ spin_unlock(&fq->q.lock);
+
+ icmpv6_send(head, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0);
+ kfree_skb(head);
+ goto out_rcu_unlock;
+
out:
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, frags);
+out_rcu_unlock:
+ rcu_read_unlock();
+ inet_frag_put(&fq->q);
}
EXPORT_SYMBOL(ip6_expire_frag_queue);
@@ -179,31 +136,29 @@ static void ip6_frag_expire(struct timer_list *t)
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
- ip6_expire_frag_queue(net, fq, &ip6_frags);
+ ip6_expire_frag_queue(net, fq);
}
static struct frag_queue *
-fq_find(struct net *net, __be32 id, const struct in6_addr *src,
- const struct in6_addr *dst, int iif, u8 ecn)
+fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
{
+ struct frag_v6_compare_key key = {
+ .id = id,
+ .saddr = hdr->saddr,
+ .daddr = hdr->daddr,
+ .user = IP6_DEFRAG_LOCAL_DELIVER,
+ .iif = iif,
+ };
struct inet_frag_queue *q;
- struct ip6_create_arg arg;
- unsigned int hash;
-
- arg.id = id;
- arg.user = IP6_DEFRAG_LOCAL_DELIVER;
- arg.src = src;
- arg.dst = dst;
- arg.iif = iif;
- arg.ecn = ecn;
- hash = inet6_hash_frag(id, src, dst);
+ if (!(ipv6_addr_type(&hdr->daddr) & (IPV6_ADDR_MULTICAST |
+ IPV6_ADDR_LINKLOCAL)))
+ key.iif = 0;
- q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);
- if (IS_ERR_OR_NULL(q)) {
- inet_frag_maybe_warn_overflow(q, pr_fmt());
+ q = inet_frag_find(&net->ipv6.frags, &key);
+ if (!q)
return NULL;
- }
+
return container_of(q, struct frag_queue, q);
}
@@ -288,13 +243,13 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
* this fragment, right?
*/
prev = fq->q.fragments_tail;
- if (!prev || FRAG6_CB(prev)->offset < offset) {
+ if (!prev || prev->ip_defrag_offset < offset) {
next = NULL;
goto found;
}
prev = NULL;
for (next = fq->q.fragments; next != NULL; next = next->next) {
- if (FRAG6_CB(next)->offset >= offset)
+ if (next->ip_defrag_offset >= offset)
break; /* bingo! */
prev = next;
}
@@ -309,14 +264,20 @@ found:
/* Check for overlap with preceding fragment. */
if (prev &&
- (FRAG6_CB(prev)->offset + prev->len) > offset)
+ (prev->ip_defrag_offset + prev->len) > offset)
goto discard_fq;
/* Look for overlap with succeeding segment. */
- if (next && FRAG6_CB(next)->offset < end)
+ if (next && next->ip_defrag_offset < end)
goto discard_fq;
- FRAG6_CB(skb)->offset = offset;
+ /* Note : skb->ip_defrag_offset and skb->dev share the same location */
+ dev = skb->dev;
+ if (dev)
+ fq->iif = dev->ifindex;
+ /* Makes sure compiler wont do silly aliasing games */
+ barrier();
+ skb->ip_defrag_offset = offset;
/* Insert this fragment in the chain of fragments. */
skb->next = next;
@@ -327,11 +288,6 @@ found:
else
fq->q.fragments = skb;
- dev = skb->dev;
- if (dev) {
- fq->iif = dev->ifindex;
- skb->dev = NULL;
- }
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
@@ -364,7 +320,7 @@ found:
return -1;
discard_fq:
- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);
err:
__IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
IPSTATS_MIB_REASMFAILS);
@@ -391,7 +347,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
int sum_truesize;
u8 ecn;
- inet_frag_kill(&fq->q, &ip6_frags);
+ inet_frag_kill(&fq->q);
ecn = ip_frag_ecn_table[fq->ecn];
if (unlikely(ecn == 0xff))
@@ -418,7 +374,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
}
WARN_ON(head == NULL);
- WARN_ON(FRAG6_CB(head)->offset != 0);
+ WARN_ON(head->ip_defrag_offset != 0);
/* Unfragmented part is taken from the first segment. */
payload_len = ((head->data - skb_network_header(head)) -
@@ -531,6 +487,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
struct frag_queue *fq;
const struct ipv6hdr *hdr = ipv6_hdr(skb);
struct net *net = dev_net(skb_dst(skb)->dev);
+ int iif;
if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED)
goto fail_hdr;
@@ -559,17 +516,18 @@ static int ipv6_frag_rcv(struct sk_buff *skb)
return 1;
}
- fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr,
- skb->dev ? skb->dev->ifindex : 0, ip6_frag_ecn(hdr));
+ iif = skb->dev ? skb->dev->ifindex : 0;
+ fq = fq_find(net, fhdr->identification, hdr, iif);
if (fq) {
int ret;
spin_lock(&fq->q.lock);
+ fq->iif = iif;
ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff);
spin_unlock(&fq->q.lock);
- inet_frag_put(&fq->q, &ip6_frags);
+ inet_frag_put(&fq->q);
return ret;
}
@@ -596,17 +554,17 @@ static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
.data = &init_net.ipv6.frags.high_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
.data = &init_net.ipv6.frags.low_thresh,
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec_minmax,
+ .proc_handler = proc_doulongvec_minmax,
.extra1 = &zero,
.extra2 = &init_net.ipv6.frags.high_thresh
},
@@ -650,10 +608,6 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
table[1].data = &net->ipv6.frags.low_thresh;
table[1].extra2 = &net->ipv6.frags.high_thresh;
table[2].data = &net->ipv6.frags.timeout;
-
- /* Don't export sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns)
- table[0].procname = NULL;
}
hdr = register_net_sysctl(net, "net/ipv6", table);
@@ -715,19 +669,27 @@ static void ip6_frags_sysctl_unregister(void)
static int __net_init ipv6_frags_init_net(struct net *net)
{
+ int res;
+
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
+ net->ipv6.frags.f = &ip6_frags;
- inet_frags_init_net(&net->ipv6.frags);
+ res = inet_frags_init_net(&net->ipv6.frags);
+ if (res < 0)
+ return res;
- return ip6_frags_ns_sysctl_register(net);
+ res = ip6_frags_ns_sysctl_register(net);
+ if (res < 0)
+ inet_frags_exit_net(&net->ipv6.frags);
+ return res;
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
{
ip6_frags_ns_sysctl_unregister(net);
- inet_frags_exit_net(&net->ipv6.frags, &ip6_frags);
+ inet_frags_exit_net(&net->ipv6.frags);
}
static struct pernet_operations ip6_frags_ops = {
@@ -735,14 +697,55 @@ static struct pernet_operations ip6_frags_ops = {
.exit = ipv6_frags_exit_net,
};
+static u32 ip6_key_hashfn(const void *data, u32 len, u32 seed)
+{
+ return jhash2(data,
+ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static u32 ip6_obj_hashfn(const void *data, u32 len, u32 seed)
+{
+ const struct inet_frag_queue *fq = data;
+
+ return jhash2((const u32 *)&fq->key.v6,
+ sizeof(struct frag_v6_compare_key) / sizeof(u32), seed);
+}
+
+static int ip6_obj_cmpfn(struct rhashtable_compare_arg *arg, const void *ptr)
+{
+ const struct frag_v6_compare_key *key = arg->key;
+ const struct inet_frag_queue *fq = ptr;
+
+ return !!memcmp(&fq->key, key, sizeof(*key));
+}
+
+const struct rhashtable_params ip6_rhash_params = {
+ .head_offset = offsetof(struct inet_frag_queue, node),
+ .hashfn = ip6_key_hashfn,
+ .obj_hashfn = ip6_obj_hashfn,
+ .obj_cmpfn = ip6_obj_cmpfn,
+ .automatic_shrinking = true,
+};
+EXPORT_SYMBOL(ip6_rhash_params);
+
int __init ipv6_frag_init(void)
{
int ret;
- ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ ip6_frags.constructor = ip6_frag_init;
+ ip6_frags.destructor = NULL;
+ ip6_frags.qsize = sizeof(struct frag_queue);
+ ip6_frags.frag_expire = ip6_frag_expire;
+ ip6_frags.frags_cache_name = ip6_frag_cache_name;
+ ip6_frags.rhash_params = ip6_rhash_params;
+ ret = inet_frags_init(&ip6_frags);
if (ret)
goto out;
+ ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+ if (ret)
+ goto err_protocol;
+
ret = ip6_frags_sysctl_register();
if (ret)
goto err_sysctl;
@@ -751,16 +754,6 @@ int __init ipv6_frag_init(void)
if (ret)
goto err_pernet;
- ip6_frags.hashfn = ip6_hashfn;
- ip6_frags.constructor = ip6_frag_init;
- ip6_frags.destructor = NULL;
- ip6_frags.qsize = sizeof(struct frag_queue);
- ip6_frags.match = ip6_frag_match;
- ip6_frags.frag_expire = ip6_frag_expire;
- ip6_frags.frags_cache_name = ip6_frag_cache_name;
- ret = inet_frags_init(&ip6_frags);
- if (ret)
- goto err_pernet;
out:
return ret;
@@ -768,6 +761,8 @@ err_pernet:
ip6_frags_sysctl_unregister();
err_sysctl:
inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT);
+err_protocol:
+ inet_frags_fini(&ip6_frags);
goto out;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index fc74352fac12..f239f91d2efb 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -450,8 +450,10 @@ static bool rt6_check_expired(const struct rt6_info *rt)
return false;
}
-static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
+static struct rt6_info *rt6_multipath_select(const struct net *net,
+ struct rt6_info *match,
struct flowi6 *fl6, int oif,
+ const struct sk_buff *skb,
int strict)
{
struct rt6_info *sibling, *next_sibling;
@@ -460,7 +462,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match,
* case it will always be non-zero. Otherwise now is the time to do it.
*/
if (!fl6->mp_hash)
- fl6->mp_hash = rt6_multipath_hash(fl6, NULL);
+ fl6->mp_hash = rt6_multipath_hash(net, fl6, skb, NULL);
if (fl6->mp_hash <= atomic_read(&match->rt6i_nh_upper_bound))
return match;
@@ -914,7 +916,9 @@ static bool ip6_hold_safe(struct net *net, struct rt6_info **prt,
static struct rt6_info *ip6_pol_route_lookup(struct net *net,
struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
struct rt6_info *rt, *rt_cache;
struct fib6_node *fn;
@@ -932,8 +936,8 @@ restart:
rt = rt6_device_match(net, rt, &fl6->saddr,
fl6->flowi6_oif, flags);
if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0)
- rt = rt6_multipath_select(rt, fl6,
- fl6->flowi6_oif, flags);
+ rt = rt6_multipath_select(net, rt, fl6, fl6->flowi6_oif,
+ skb, flags);
}
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
@@ -957,14 +961,15 @@ restart:
}
struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6,
- int flags)
+ const struct sk_buff *skb, int flags)
{
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup);
+ return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_lookup);
}
EXPORT_SYMBOL_GPL(ip6_route_lookup);
struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
- const struct in6_addr *saddr, int oif, int strict)
+ const struct in6_addr *saddr, int oif,
+ const struct sk_buff *skb, int strict)
{
struct flowi6 fl6 = {
.flowi6_oif = oif,
@@ -978,7 +983,7 @@ struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
flags |= RT6_LOOKUP_F_HAS_SADDR;
}
- dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
+ dst = fib6_rule_lookup(net, &fl6, skb, flags, ip6_pol_route_lookup);
if (dst->error == 0)
return (struct rt6_info *) dst;
@@ -1676,7 +1681,8 @@ void rt6_age_exceptions(struct rt6_info *rt,
}
struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
- int oif, struct flowi6 *fl6, int flags)
+ int oif, struct flowi6 *fl6,
+ const struct sk_buff *skb, int flags)
{
struct fib6_node *fn, *saved_fn;
struct rt6_info *rt, *rt_cache;
@@ -1698,7 +1704,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table,
redo_rt6_select:
rt = rt6_select(net, fn, oif, strict);
if (rt->rt6i_nsiblings)
- rt = rt6_multipath_select(rt, fl6, oif, strict);
+ rt = rt6_multipath_select(net, rt, fl6, oif, skb, strict);
if (rt == net->ipv6.ip6_null_entry) {
fn = fib6_backtrack(fn, &fl6->saddr);
if (fn)
@@ -1797,28 +1803,35 @@ uncached_rt_out:
}
EXPORT_SYMBOL_GPL(ip6_pol_route);
-static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_input(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
- return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
+ return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, skb, flags);
}
struct dst_entry *ip6_route_input_lookup(struct net *net,
struct net_device *dev,
- struct flowi6 *fl6, int flags)
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG)
flags |= RT6_LOOKUP_F_IFACE;
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input);
+ return fib6_rule_lookup(net, fl6, skb, flags, ip6_pol_route_input);
}
EXPORT_SYMBOL_GPL(ip6_route_input_lookup);
static void ip6_multipath_l3_keys(const struct sk_buff *skb,
- struct flow_keys *keys)
+ struct flow_keys *keys,
+ struct flow_keys *flkeys)
{
const struct ipv6hdr *outer_iph = ipv6_hdr(skb);
const struct ipv6hdr *key_iph = outer_iph;
+ struct flow_keys *_flkeys = flkeys;
const struct ipv6hdr *inner_iph;
const struct icmp6hdr *icmph;
struct ipv6hdr _inner_iph;
@@ -1840,26 +1853,76 @@ static void ip6_multipath_l3_keys(const struct sk_buff *skb,
goto out;
key_iph = inner_iph;
+ _flkeys = NULL;
out:
- memset(keys, 0, sizeof(*keys));
- keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
- keys->addrs.v6addrs.src = key_iph->saddr;
- keys->addrs.v6addrs.dst = key_iph->daddr;
- keys->tags.flow_label = ip6_flowinfo(key_iph);
- keys->basic.ip_proto = key_iph->nexthdr;
+ if (_flkeys) {
+ keys->addrs.v6addrs.src = _flkeys->addrs.v6addrs.src;
+ keys->addrs.v6addrs.dst = _flkeys->addrs.v6addrs.dst;
+ keys->tags.flow_label = _flkeys->tags.flow_label;
+ keys->basic.ip_proto = _flkeys->basic.ip_proto;
+ } else {
+ keys->addrs.v6addrs.src = key_iph->saddr;
+ keys->addrs.v6addrs.dst = key_iph->daddr;
+ keys->tags.flow_label = ip6_flowinfo(key_iph);
+ keys->basic.ip_proto = key_iph->nexthdr;
+ }
}
/* if skb is set it will be used and fl6 can be NULL */
-u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb)
+u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6,
+ const struct sk_buff *skb, struct flow_keys *flkeys)
{
struct flow_keys hash_keys;
+ u32 mhash;
- if (skb) {
- ip6_multipath_l3_keys(skb, &hash_keys);
- return flow_hash_from_keys(&hash_keys) >> 1;
+ switch (ip6_multipath_hash_policy(net)) {
+ case 0:
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ if (skb) {
+ ip6_multipath_l3_keys(skb, &hash_keys, flkeys);
+ } else {
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ hash_keys.tags.flow_label = (__force u32)fl6->flowlabel;
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ }
+ break;
+ case 1:
+ if (skb) {
+ unsigned int flag = FLOW_DISSECTOR_F_STOP_AT_ENCAP;
+ struct flow_keys keys;
+
+ /* short-circuit if we already have L4 hash present */
+ if (skb->l4_hash)
+ return skb_get_hash_raw(skb) >> 1;
+
+ memset(&hash_keys, 0, sizeof(hash_keys));
+
+ if (!flkeys) {
+ skb_flow_dissect_flow_keys(skb, &keys, flag);
+ flkeys = &keys;
+ }
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ hash_keys.addrs.v6addrs.src = flkeys->addrs.v6addrs.src;
+ hash_keys.addrs.v6addrs.dst = flkeys->addrs.v6addrs.dst;
+ hash_keys.ports.src = flkeys->ports.src;
+ hash_keys.ports.dst = flkeys->ports.dst;
+ hash_keys.basic.ip_proto = flkeys->basic.ip_proto;
+ } else {
+ memset(&hash_keys, 0, sizeof(hash_keys));
+ hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ hash_keys.addrs.v6addrs.src = fl6->saddr;
+ hash_keys.addrs.v6addrs.dst = fl6->daddr;
+ hash_keys.ports.src = fl6->fl6_sport;
+ hash_keys.ports.dst = fl6->fl6_dport;
+ hash_keys.basic.ip_proto = fl6->flowi6_proto;
+ }
+ break;
}
+ mhash = flow_hash_from_keys(&hash_keys);
- return get_hash_from_flowi6(fl6) >> 1;
+ return mhash >> 1;
}
void ip6_route_input(struct sk_buff *skb)
@@ -1876,20 +1939,29 @@ void ip6_route_input(struct sk_buff *skb)
.flowi6_mark = skb->mark,
.flowi6_proto = iph->nexthdr,
};
+ struct flow_keys *flkeys = NULL, _flkeys;
tun_info = skb_tunnel_info(skb);
if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX))
fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id;
+
+ if (fib6_rules_early_flow_dissect(net, skb, &fl6, &_flkeys))
+ flkeys = &_flkeys;
+
if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6))
- fl6.mp_hash = rt6_multipath_hash(&fl6, skb);
+ fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, flkeys);
skb_dst_drop(skb);
- skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags));
+ skb_dst_set(skb,
+ ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags));
}
-static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
- struct flowi6 *fl6, int flags)
+static struct rt6_info *ip6_pol_route_output(struct net *net,
+ struct fib6_table *table,
+ struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ int flags)
{
- return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
+ return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
}
struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
@@ -1917,7 +1989,7 @@ struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk,
else if (sk)
flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
- return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
+ return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
EXPORT_SYMBOL_GPL(ip6_route_output_flags);
@@ -2166,6 +2238,7 @@ struct ip6rd_flowi {
static struct rt6_info *__ip6_route_redirect(struct net *net,
struct fib6_table *table,
struct flowi6 *fl6,
+ const struct sk_buff *skb,
int flags)
{
struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
@@ -2239,8 +2312,9 @@ out:
};
static struct dst_entry *ip6_route_redirect(struct net *net,
- const struct flowi6 *fl6,
- const struct in6_addr *gateway)
+ const struct flowi6 *fl6,
+ const struct sk_buff *skb,
+ const struct in6_addr *gateway)
{
int flags = RT6_LOOKUP_F_HAS_SADDR;
struct ip6rd_flowi rdfl;
@@ -2248,7 +2322,7 @@ static struct dst_entry *ip6_route_redirect(struct net *net,
rdfl.fl6 = *fl6;
rdfl.gateway = *gateway;
- return fib6_rule_lookup(net, &rdfl.fl6,
+ return fib6_rule_lookup(net, &rdfl.fl6, skb,
flags, __ip6_route_redirect);
}
@@ -2268,7 +2342,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark,
fl6.flowlabel = ip6_flowinfo(iph);
fl6.flowi6_uid = uid;
- dst = ip6_route_redirect(net, &fl6, &ipv6_hdr(skb)->saddr);
+ dst = ip6_route_redirect(net, &fl6, skb, &ipv6_hdr(skb)->saddr);
rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
@@ -2290,7 +2364,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,
fl6.saddr = iph->daddr;
fl6.flowi6_uid = sock_net_uid(net, NULL);
- dst = ip6_route_redirect(net, &fl6, &iph->saddr);
+ dst = ip6_route_redirect(net, &fl6, skb, &iph->saddr);
rt6_do_redirect(dst, NULL, skb);
dst_release(dst);
}
@@ -2492,7 +2566,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
flags |= RT6_LOOKUP_F_HAS_SADDR;
flags |= RT6_LOOKUP_F_IGNORE_LINKSTATE;
- rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, flags);
+ rt = ip6_pol_route(net, table, cfg->fc_ifindex, &fl6, NULL, flags);
/* if table lookup failed, fall back to full lookup */
if (rt == net->ipv6.ip6_null_entry) {
@@ -2505,7 +2579,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net,
static int ip6_route_check_nh_onlink(struct net *net,
struct fib6_config *cfg,
- struct net_device *dev,
+ const struct net_device *dev,
struct netlink_ext_ack *extack)
{
u32 tbid = l3mdev_fib_table(dev) ? : RT_TABLE_MAIN;
@@ -2555,7 +2629,7 @@ static int ip6_route_check_nh(struct net *net,
}
if (!grt)
- grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
+ grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, NULL, 1);
if (!grt)
goto out;
@@ -2581,6 +2655,79 @@ out:
return err;
}
+static int ip6_validate_gw(struct net *net, struct fib6_config *cfg,
+ struct net_device **_dev, struct inet6_dev **idev,
+ struct netlink_ext_ack *extack)
+{
+ const struct in6_addr *gw_addr = &cfg->fc_gateway;
+ int gwa_type = ipv6_addr_type(gw_addr);
+ bool skip_dev = gwa_type & IPV6_ADDR_LINKLOCAL ? false : true;
+ const struct net_device *dev = *_dev;
+ bool need_addr_check = !dev;
+ int err = -EINVAL;
+
+ /* if gw_addr is local we will fail to detect this in case
+ * address is still TENTATIVE (DAD in progress). rt6_lookup()
+ * will return already-added prefix route via interface that
+ * prefix route was assigned to, which might be non-loopback.
+ */
+ if (dev &&
+ ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+ NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+ goto out;
+ }
+
+ if (gwa_type != (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_UNICAST)) {
+ /* IPv6 strictly inhibits using not link-local
+ * addresses as nexthop address.
+ * Otherwise, router will not able to send redirects.
+ * It is very good, but in some (rare!) circumstances
+ * (SIT, PtP, NBMA NOARP links) it is handy to allow
+ * some exceptions. --ANK
+ * We allow IPv4-mapped nexthops to support RFC4798-type
+ * addressing
+ */
+ if (!(gwa_type & (IPV6_ADDR_UNICAST | IPV6_ADDR_MAPPED))) {
+ NL_SET_ERR_MSG(extack, "Invalid gateway address");
+ goto out;
+ }
+
+ if (cfg->fc_flags & RTNH_F_ONLINK)
+ err = ip6_route_check_nh_onlink(net, cfg, dev, extack);
+ else
+ err = ip6_route_check_nh(net, cfg, _dev, idev);
+
+ if (err)
+ goto out;
+ }
+
+ /* reload in case device was changed */
+ dev = *_dev;
+
+ err = -EINVAL;
+ if (!dev) {
+ NL_SET_ERR_MSG(extack, "Egress device not specified");
+ goto out;
+ } else if (dev->flags & IFF_LOOPBACK) {
+ NL_SET_ERR_MSG(extack,
+ "Egress device can not be loopback device for this route");
+ goto out;
+ }
+
+ /* if we did not check gw_addr above, do so now that the
+ * egress device has been resolved.
+ */
+ if (need_addr_check &&
+ ipv6_chk_addr_and_flags(net, gw_addr, dev, skip_dev, 0, 0)) {
+ NL_SET_ERR_MSG(extack, "Gateway can not be a local address");
+ goto out;
+ }
+
+ err = 0;
+out:
+ return err;
+}
+
static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
struct netlink_ext_ack *extack)
{
@@ -2700,14 +2847,7 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
if (err)
goto out;
rt->dst.lwtstate = lwtstate_get(lwtstate);
- if (lwtunnel_output_redirect(rt->dst.lwtstate)) {
- rt->dst.lwtstate->orig_output = rt->dst.output;
- rt->dst.output = lwtunnel_output;
- }
- if (lwtunnel_input_redirect(rt->dst.lwtstate)) {
- rt->dst.lwtstate->orig_input = rt->dst.input;
- rt->dst.input = lwtunnel_input;
- }
+ lwtunnel_set_redirect(&rt->dst);
}
ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
@@ -2770,67 +2910,23 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg,
}
if (cfg->fc_flags & RTF_GATEWAY) {
- const struct in6_addr *gw_addr;
- int gwa_type;
-
- gw_addr = &cfg->fc_gateway;
- gwa_type = ipv6_addr_type(gw_addr);
-
- /* if gw_addr is local we will fail to detect this in case
- * address is still TENTATIVE (DAD in progress). rt6_lookup()
- * will return already-added prefix route via interface that
- * prefix route was assigned to, which might be non-loopback.
- */
- err = -EINVAL;
- if (ipv6_chk_addr_and_flags(net, gw_addr,
- gwa_type & IPV6_ADDR_LINKLOCAL ?
- dev : NULL, 0, 0)) {
- NL_SET_ERR_MSG(extack, "Invalid gateway address");
+ err = ip6_validate_gw(net, cfg, &dev, &idev, extack);
+ if (err)
goto out;
- }
- rt->rt6i_gateway = *gw_addr;
-
- if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
- /* IPv6 strictly inhibits using not link-local
- addresses as nexthop address.
- Otherwise, router will not able to send redirects.
- It is very good, but in some (rare!) circumstances
- (SIT, PtP, NBMA NOARP links) it is handy to allow
- some exceptions. --ANK
- We allow IPv4-mapped nexthops to support RFC4798-type
- addressing
- */
- if (!(gwa_type & (IPV6_ADDR_UNICAST |
- IPV6_ADDR_MAPPED))) {
- NL_SET_ERR_MSG(extack,
- "Invalid gateway address");
- goto out;
- }
- if (cfg->fc_flags & RTNH_F_ONLINK) {
- err = ip6_route_check_nh_onlink(net, cfg, dev,
- extack);
- } else {
- err = ip6_route_check_nh(net, cfg, &dev, &idev);
- }
- if (err)
- goto out;
- }
- err = -EINVAL;
- if (!dev) {
- NL_SET_ERR_MSG(extack, "Egress device not specified");
- goto out;
- } else if (dev->flags & IFF_LOOPBACK) {
- NL_SET_ERR_MSG(extack,
- "Egress device can not be loopback device for this route");
- goto out;
- }
+ rt->rt6i_gateway = cfg->fc_gateway;
}
err = -ENODEV;
if (!dev)
goto out;
+ if (idev->cnf.disable_ipv6) {
+ NL_SET_ERR_MSG(extack, "IPv6 is disabled on nexthop device");
+ err = -EACCES;
+ goto out;
+ }
+
if (!(dev->flags & IFF_UP)) {
NL_SET_ERR_MSG(extack, "Nexthop device is not up");
err = -ENETDOWN;
@@ -4616,7 +4712,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
if (!ipv6_addr_any(&fl6.saddr))
flags |= RT6_LOOKUP_F_HAS_SADDR;
- dst = ip6_route_input_lookup(net, dev, &fl6, flags);
+ dst = ip6_route_input_lookup(net, dev, &fl6, NULL, flags);
rcu_read_unlock();
} else {
@@ -4981,7 +5077,7 @@ static int __net_init ip6_route_net_init_late(struct net *net)
{
#ifdef CONFIG_PROC_FS
proc_create("ipv6_route", 0, net->proc_net, &ipv6_route_proc_fops);
- proc_create("rt6_stats", S_IRUGO, net->proc_net, &rt6_stats_seq_fops);
+ proc_create("rt6_stats", 0444, net->proc_net, &rt6_stats_seq_fops);
#endif
return 0;
}
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index ba3767ef5e93..45722327375a 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -161,7 +161,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
if (!tbl_id) {
- dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+ dst = ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags);
} else {
struct fib6_table *table;
@@ -169,7 +169,7 @@ static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
if (!table)
goto out;
- rt = ip6_pol_route(net, table, 0, &fl6, flags);
+ rt = ip6_pol_route(net, table, 0, &fl6, skb, flags);
dst = &rt->dst;
}
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0195598f7bb5..1522bcfd253f 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn)
#ifdef CONFIG_IPV6_SIT_6RD
struct ip_tunnel *t = netdev_priv(dev);
- if (dev == sitn->fb_tunnel_dev) {
+ if (dev == sitn->fb_tunnel_dev || !sitn->fb_tunnel_dev) {
ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0);
t->ip6rd.relay_prefix = 0;
t->ip6rd.prefixlen = 16;
@@ -1835,6 +1835,9 @@ static int __net_init sit_init_net(struct net *net)
sitn->tunnels[2] = sitn->tunnels_r;
sitn->tunnels[3] = sitn->tunnels_r_l;
+ if (!net_has_fallback_tunnels(net))
+ return 0;
+
sitn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "sit0",
NET_NAME_UNKNOWN,
ipip6_tunnel_setup);
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index a789a8ac6a64..6fbdef630152 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -16,14 +16,31 @@
#include <net/ipv6.h>
#include <net/addrconf.h>
#include <net/inet_frag.h>
+#include <net/netevent.h>
#ifdef CONFIG_NETLABEL
#include <net/calipso.h>
#endif
+static int zero;
static int one = 1;
static int auto_flowlabels_min;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
+static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp,
+ loff_t *ppos)
+{
+ struct net *net;
+ int ret;
+
+ net = container_of(table->data, struct net,
+ ipv6.sysctl.multipath_hash_policy);
+ ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
+ if (write && ret == 0)
+ call_netevent_notifiers(NETEVENT_IPV6_MPATH_HASH_UPDATE, net);
+
+ return ret;
+}
static struct ctl_table ipv6_table_template[] = {
{
@@ -126,6 +143,15 @@ static struct ctl_table ipv6_table_template[] = {
.mode = 0644,
.proc_handler = proc_dointvec
},
+ {
+ .procname = "fib_multipath_hash_policy",
+ .data = &init_net.ipv6.sysctl.multipath_hash_policy,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_rt6_multipath_hash_policy,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
{ }
};
@@ -190,6 +216,7 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)
ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt;
ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len;
ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len;
+ ipv6_table[14].data = &net->ipv6.sysctl.multipath_hash_policy,
ipv6_route_table = ipv6_route_sysctl_init(net);
if (!ipv6_route_table)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 412139f4eccd..6d664d83cd16 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -117,6 +117,21 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
ipv6_hdr(skb)->saddr.s6_addr32);
}
+static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* This check is replicated from tcp_v6_connect() and intended to
+ * prevent BPF program called below from accessing bytes that are out
+ * of the bound specified by user in addr_len.
+ */
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ sock_owned_by_me(sk);
+
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
+}
+
static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
@@ -1451,6 +1466,7 @@ process:
if (sk->sk_state == TCP_NEW_SYN_RECV) {
struct request_sock *req = inet_reqsk(sk);
+ bool req_stolen = false;
struct sock *nsk;
sk = req->rsk_listener;
@@ -1470,10 +1486,20 @@ process:
th = (const struct tcphdr *)skb->data;
hdr = ipv6_hdr(skb);
tcp_v6_fill_cb(skb, hdr, th);
- nsk = tcp_check_req(sk, skb, req, false);
+ nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
}
if (!nsk) {
reqsk_put(req);
+ if (req_stolen) {
+ /* Another cpu got exclusive access to req
+ * and created a full blown socket.
+ * Try to feed this packet to this socket
+ * instead of discarding it.
+ */
+ tcp_v6_restore_cb(skb);
+ sock_put(sk);
+ goto lookup;
+ }
goto discard_and_relse;
}
if (nsk == sk) {
@@ -1914,6 +1940,7 @@ struct proto tcpv6_prot = {
.name = "TCPv6",
.owner = THIS_MODULE,
.close = tcp_close,
+ .pre_connect = tcp_v6_pre_connect,
.connect = tcp_v6_connect,
.disconnect = tcp_disconnect,
.accept = inet_csk_accept,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 52e3ea0e6f50..6861ed479469 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -957,6 +957,25 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
}
}
+static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
+ int addr_len)
+{
+ /* The following checks are replicated from __ip6_datagram_connect()
+ * and intended to prevent BPF program called below from accessing
+ * bytes that are out of the bound specified by user in addr_len.
+ */
+ if (uaddr->sa_family == AF_INET) {
+ if (__ipv6_only_sock(sk))
+ return -EAFNOSUPPORT;
+ return udp_pre_connect(sk, uaddr, addr_len);
+ }
+
+ if (addr_len < SIN6_LEN_RFC2133)
+ return -EINVAL;
+
+ return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr);
+}
+
/**
* udp6_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
@@ -1509,34 +1528,35 @@ void udp6_proc_exit(struct net *net)
/* ------------------------------------------------------------------------ */
struct proto udpv6_prot = {
- .name = "UDPv6",
- .owner = THIS_MODULE,
- .close = udp_lib_close,
- .connect = ip6_datagram_connect,
- .disconnect = udp_disconnect,
- .ioctl = udp_ioctl,
- .init = udp_init_sock,
- .destroy = udpv6_destroy_sock,
- .setsockopt = udpv6_setsockopt,
- .getsockopt = udpv6_getsockopt,
- .sendmsg = udpv6_sendmsg,
- .recvmsg = udpv6_recvmsg,
- .release_cb = ip6_datagram_release_cb,
- .hash = udp_lib_hash,
- .unhash = udp_lib_unhash,
- .rehash = udp_v6_rehash,
- .get_port = udp_v6_get_port,
- .memory_allocated = &udp_memory_allocated,
- .sysctl_mem = sysctl_udp_mem,
- .sysctl_wmem = &sysctl_udp_wmem_min,
- .sysctl_rmem = &sysctl_udp_rmem_min,
- .obj_size = sizeof(struct udp6_sock),
- .h.udp_table = &udp_table,
+ .name = "UDPv6",
+ .owner = THIS_MODULE,
+ .close = udp_lib_close,
+ .pre_connect = udpv6_pre_connect,
+ .connect = ip6_datagram_connect,
+ .disconnect = udp_disconnect,
+ .ioctl = udp_ioctl,
+ .init = udp_init_sock,
+ .destroy = udpv6_destroy_sock,
+ .setsockopt = udpv6_setsockopt,
+ .getsockopt = udpv6_getsockopt,
+ .sendmsg = udpv6_sendmsg,
+ .recvmsg = udpv6_recvmsg,
+ .release_cb = ip6_datagram_release_cb,
+ .hash = udp_lib_hash,
+ .unhash = udp_lib_unhash,
+ .rehash = udp_v6_rehash,
+ .get_port = udp_v6_get_port,
+ .memory_allocated = &udp_memory_allocated,
+ .sysctl_mem = sysctl_udp_mem,
+ .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min),
+ .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min),
+ .obj_size = sizeof(struct udp6_sock),
+ .h.udp_table = &udp_table,
#ifdef CONFIG_COMPAT
- .compat_setsockopt = compat_udpv6_setsockopt,
- .compat_getsockopt = compat_udpv6_getsockopt,
+ .compat_setsockopt = compat_udpv6_setsockopt,
+ .compat_getsockopt = compat_udpv6_getsockopt,
#endif
- .diag_destroy = udp_abort,
+ .diag_destroy = udp_abort,
};
static struct inet_protosw udpv6_protosw = {
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index b15075a5c227..16f434791763 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -196,4 +196,3 @@ void xfrm6_state_fini(void)
{
xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
}
-