diff options
author | David S. Miller <davem@davemloft.net> | 2019-04-03 21:50:20 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-04-03 21:50:20 -0700 |
commit | b571bc623eb969a9dc004bba0b9185f0a02814ed (patch) | |
tree | 8443d516118be1f5b675e0f21ff8ec648a9cbcb8 | |
parent | 4950c2ba49cc6f2b38dbedcfa0ff67acf761419a (diff) | |
parent | c0a720770c01e67374b15f348f17a52409f6545c (diff) | |
download | linux-stable-b571bc623eb969a9dc004bba0b9185f0a02814ed.tar.gz linux-stable-b571bc623eb969a9dc004bba0b9185f0a02814ed.tar.bz2 linux-stable-b571bc623eb969a9dc004bba0b9185f0a02814ed.zip |
Merge branch 'net-More-movement-to-fib_nh_common'
David Ahern says:
====================
net: More movement to fib_nh_common
Second set of three with the end goal of enabling IPv6 gateways with IPv4
routes.
This set moves:
- the ipv4 tracepoint to take a fib_nh_common and updates it to handle
a v6 gateway.
- consolidates route notifications to use the same fill functions
for both ipv4 and ipv6
v4
- enhanced the commit message for patches 1 and 2
v3
- comments from Martin:
+ renamed FIB_RES_NH to FIB_RES_NHC
+ removed family check from fib_result_prefsrc
+ in fib_nexthop_info, renamed nexthop arg to nhc and dropped for_ipv4 arg
v2
- dropped patches moving cached routes and exception buckets to
fib_nh_common. The goal is allowing a fib6_nh to be used with an
IPv4 route. The hold up is the need for separate exception buckets -
one for v6 routes and one for v4 routes. When all of the nexthop patches
are in, adding a secondi exception bucket pushes IPv6 fib6_info
allocations over 256 which means fib6_info allocations roll up to 512.
Hence, deferring the patches until some data mining can be done to keep
the allocations at 256.
====================
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/ip_fib.h | 52 | ||||
-rw-r--r-- | include/trace/events/fib.h | 45 | ||||
-rw-r--r-- | net/core/filter.c | 12 | ||||
-rw-r--r-- | net/ipv4/fib_frontend.c | 6 | ||||
-rw-r--r-- | net/ipv4/fib_lookup.h | 1 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 215 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 13 | ||||
-rw-r--r-- | net/ipv4/route.c | 60 | ||||
-rw-r--r-- | net/ipv6/route.c | 77 |
9 files changed, 267 insertions, 214 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 12a6d759cf57..3ce07841dc3b 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -156,15 +156,16 @@ struct fib_rule; struct fib_table; struct fib_result { - __be32 prefix; - unsigned char prefixlen; - unsigned char nh_sel; - unsigned char type; - unsigned char scope; - u32 tclassid; - struct fib_info *fi; - struct fib_table *table; - struct hlist_head *fa_head; + __be32 prefix; + unsigned char prefixlen; + unsigned char nh_sel; + unsigned char type; + unsigned char scope; + u32 tclassid; + struct fib_nh_common *nhc; + struct fib_info *fi; + struct fib_table *table; + struct hlist_head *fa_head; }; struct fib_result_nl { @@ -182,11 +183,10 @@ struct fib_result_nl { int err; }; -#ifdef CONFIG_IP_ROUTE_MULTIPATH -#define FIB_RES_NH(res) ((res).fi->fib_nh[(res).nh_sel]) -#else /* CONFIG_IP_ROUTE_MULTIPATH */ -#define FIB_RES_NH(res) ((res).fi->fib_nh[0]) -#endif /* CONFIG_IP_ROUTE_MULTIPATH */ +static inline struct fib_nh_common *fib_info_nhc(struct fib_info *fi, int nhsel) +{ + return &fi->fib_nh[nhsel].nh_common; +} #ifdef CONFIG_IP_MULTIPLE_TABLES #define FIB_TABLE_HASHSZ 256 @@ -195,18 +195,11 @@ struct fib_result_nl { #endif __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); +__be32 fib_result_prefsrc(struct net *net, struct fib_result *res); -#define FIB_RES_SADDR(net, res) \ - ((FIB_RES_NH(res).nh_saddr_genid == \ - atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ - FIB_RES_NH(res).nh_saddr : \ - fib_info_update_nh_saddr((net), &FIB_RES_NH(res))) -#define FIB_RES_GW(res) (FIB_RES_NH(res).fib_nh_gw4) -#define FIB_RES_DEV(res) (FIB_RES_NH(res).fib_nh_dev) -#define FIB_RES_OIF(res) (FIB_RES_NH(res).fib_nh_oif) - -#define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ - FIB_RES_SADDR(net, res)) +#define FIB_RES_NHC(res) ((res).nhc) +#define FIB_RES_DEV(res) (FIB_RES_NHC(res)->nhc_dev) +#define FIB_RES_OIF(res) (FIB_RES_NHC(res)->nhc_oif) struct fib_entry_notifier_info { struct fib_notifier_info info; /* must be first */ @@ -453,10 +446,12 @@ struct fib_table *fib_trie_table(u32 id, struct fib_table *alias); static inline void fib_combine_itag(u32 *itag, const struct fib_result *res) { #ifdef CONFIG_IP_ROUTE_CLASSID + struct fib_nh_common *nhc = res->nhc; + struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); #ifdef CONFIG_IP_MULTIPLE_TABLES u32 rtag; #endif - *itag = FIB_RES_NH(*res).nh_tclassid<<16; + *itag = nh->nh_tclassid << 16; #ifdef CONFIG_IP_MULTIPLE_TABLES rtag = res->tclassid; if (*itag == 0) @@ -497,4 +492,9 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr); int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, struct fib_dump_filter *filter, struct netlink_callback *cb); + +int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nh, + unsigned int *flags, bool skip_oif); +int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nh, + int nh_weight); #endif /* _NET_FIB_H */ diff --git a/include/trace/events/fib.h b/include/trace/events/fib.h index 61ea7a24c8e5..7f83b6eafc5c 100644 --- a/include/trace/events/fib.h +++ b/include/trace/events/fib.h @@ -13,9 +13,9 @@ TRACE_EVENT(fib_table_lookup, TP_PROTO(u32 tb_id, const struct flowi4 *flp, - const struct fib_nh *nh, int err), + const struct fib_nh_common *nhc, int err), - TP_ARGS(tb_id, flp, nh, err), + TP_ARGS(tb_id, flp, nhc, err), TP_STRUCT__entry( __field( u32, tb_id ) @@ -28,14 +28,17 @@ TRACE_EVENT(fib_table_lookup, __field( __u8, flags ) __array( __u8, src, 4 ) __array( __u8, dst, 4 ) - __array( __u8, gw, 4 ) - __array( __u8, saddr, 4 ) + __array( __u8, gw4, 4 ) + __array( __u8, gw6, 16 ) __field( u16, sport ) __field( u16, dport ) __dynamic_array(char, name, IFNAMSIZ ) ), TP_fast_assign( + struct in6_addr in6_zero = {}; + struct net_device *dev; + struct in6_addr *in6; __be32 *p32; __entry->tb_id = tb_id; @@ -62,33 +65,37 @@ TRACE_EVENT(fib_table_lookup, __entry->dport = 0; } - if (nh) { - struct net_device *dev; + dev = nhc ? nhc->nhc_dev : NULL; + __assign_str(name, dev ? dev->name : "-"); - p32 = (__be32 *) __entry->saddr; - *p32 = nh->nh_saddr; + if (nhc) { + if (nhc->nhc_family == AF_INET) { + p32 = (__be32 *) __entry->gw4; + *p32 = nhc->nhc_gw.ipv4; - p32 = (__be32 *) __entry->gw; - *p32 = nh->fib_nh_gw4; + in6 = (struct in6_addr *)__entry->gw6; + *in6 = in6_zero; + } else if (nhc->nhc_family == AF_INET6) { + p32 = (__be32 *) __entry->gw4; + *p32 = 0; - dev = nh->fib_nh_dev; - __assign_str(name, dev ? dev->name : "-"); + in6 = (struct in6_addr *)__entry->gw6; + *in6 = nhc->nhc_gw.ipv6; + } } else { - p32 = (__be32 *) __entry->saddr; + p32 = (__be32 *) __entry->gw4; *p32 = 0; - p32 = (__be32 *) __entry->gw; - *p32 = 0; - - __assign_str(name, "-"); + in6 = (struct in6_addr *)__entry->gw6; + *in6 = in6_zero; } ), - TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4 src %pI4 err %d", + TP_printk("table %u oif %d iif %d proto %u %pI4/%u -> %pI4/%u tos %d scope %d flags %x ==> dev %s gw %pI4/%pI6c err %d", __entry->tb_id, __entry->oif, __entry->iif, __entry->proto, __entry->src, __entry->sport, __entry->dst, __entry->dport, __entry->tos, __entry->scope, __entry->flags, - __get_str(name), __entry->gw, __entry->saddr, __entry->err) + __get_str(name), __entry->gw4, __entry->gw6, __entry->err) ); #endif /* _TRACE_FIB_H */ diff --git a/net/core/filter.c b/net/core/filter.c index cdaafa3322db..08b53af84132 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4555,11 +4555,11 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 flags, bool check_mtu) { + struct fib_nh_common *nhc; struct in_device *in_dev; struct neighbour *neigh; struct net_device *dev; struct fib_result res; - struct fib_nh *nh; struct flowi4 fl4; int err; u32 mtu; @@ -4632,15 +4632,15 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return BPF_FIB_LKUP_RET_FRAG_NEEDED; } - nh = &res.fi->fib_nh[res.nh_sel]; + nhc = res.nhc; /* do not handle lwt encaps right now */ - if (nh->fib_nh_lws) + if (nhc->nhc_lwtstate) return BPF_FIB_LKUP_RET_UNSUPP_LWT; - dev = nh->fib_nh_dev; - if (nh->fib_nh_gw4) - params->ipv4_dst = nh->fib_nh_gw4; + dev = nhc->nhc_dev; + if (nhc->nhc_has_gw) + params->ipv4_dst = nhc->nhc_gw.ipv4; params->rt_metric = res.fi->fib_priority; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ffbe24397dbe..15f779bd26b3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -307,7 +307,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) .flowi4_mark = vmark ? skb->mark : 0, }; if (!fib_lookup(net, &fl4, &res, 0)) - return FIB_RES_PREFSRC(net, res); + return fib_result_prefsrc(net, &res); } else { scope = RT_SCOPE_LINK; } @@ -390,7 +390,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, dev_match = fib_info_nh_uses_dev(res.fi, dev); if (dev_match) { - ret = FIB_RES_NH(res).fib_nh_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; return ret; } if (no_addr) @@ -402,7 +402,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, ret = 0; if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) { if (res.type == RTN_UNICAST) - ret = FIB_RES_NH(res).fib_nh_scope >= RT_SCOPE_HOST; + ret = FIB_RES_NHC(res)->nhc_scope >= RT_SCOPE_HOST; } return ret; diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index e6ff282bb7f4..7945f0534db7 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -45,6 +45,7 @@ static inline void fib_result_assign(struct fib_result *res, { /* we used to play games with refcounts, but we now use RCU */ res->fi = fi; + res->nhc = fib_info_nhc(fi, 0); } struct fib_prop { diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index df777af7e278..8e0cb1687a74 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -45,6 +45,7 @@ #include <net/nexthop.h> #include <net/lwtunnel.h> #include <net/fib_notifier.h> +#include <net/addrconf.h> #include "fib_lookup.h" @@ -1075,6 +1076,21 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh) return nh->nh_saddr; } +__be32 fib_result_prefsrc(struct net *net, struct fib_result *res) +{ + struct fib_nh_common *nhc = res->nhc; + struct fib_nh *nh; + + if (res->fi->fib_prefsrc) + return res->fi->fib_prefsrc; + + nh = container_of(nhc, struct fib_nh, nh_common); + if (nh->nh_saddr_genid == atomic_read(&net->ipv4.dev_addr_genid)) + return nh->nh_saddr; + + return fib_info_update_nh_saddr(net, nh); +} + static bool fib_valid_prefsrc(struct fib_config *cfg, __be32 fib_prefsrc) { if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || @@ -1302,6 +1318,126 @@ failure: return ERR_PTR(err); } +int fib_nexthop_info(struct sk_buff *skb, const struct fib_nh_common *nhc, + unsigned int *flags, bool skip_oif) +{ + if (nhc->nhc_flags & RTNH_F_DEAD) + *flags |= RTNH_F_DEAD; + + if (nhc->nhc_flags & RTNH_F_LINKDOWN) { + *flags |= RTNH_F_LINKDOWN; + + rcu_read_lock(); + switch (nhc->nhc_family) { + case AF_INET: + if (ip_ignore_linkdown(nhc->nhc_dev)) + *flags |= RTNH_F_DEAD; + break; + case AF_INET6: + if (ip6_ignore_linkdown(nhc->nhc_dev)) + *flags |= RTNH_F_DEAD; + break; + } + rcu_read_unlock(); + } + + if (nhc->nhc_has_gw) { + switch (nhc->nhc_family) { + case AF_INET: + if (nla_put_in_addr(skb, RTA_GATEWAY, nhc->nhc_gw.ipv4)) + goto nla_put_failure; + break; + case AF_INET6: + if (nla_put_in6_addr(skb, RTA_GATEWAY, + &nhc->nhc_gw.ipv6) < 0) + goto nla_put_failure; + break; + } + } + + *flags |= (nhc->nhc_flags & RTNH_F_ONLINK); + if (nhc->nhc_flags & RTNH_F_OFFLOAD) + *flags |= RTNH_F_OFFLOAD; + + if (!skip_oif && nhc->nhc_dev && + nla_put_u32(skb, RTA_OIF, nhc->nhc_dev->ifindex)) + goto nla_put_failure; + + if (nhc->nhc_lwtstate && + lwtunnel_fill_encap(skb, nhc->nhc_lwtstate) < 0) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(fib_nexthop_info); + +#if IS_ENABLED(CONFIG_IP_ROUTE_MULTIPATH) || IS_ENABLED(CONFIG_IPV6) +int fib_add_nexthop(struct sk_buff *skb, const struct fib_nh_common *nhc, + int nh_weight) +{ + const struct net_device *dev = nhc->nhc_dev; + struct rtnexthop *rtnh; + unsigned int flags = 0; + + rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); + if (!rtnh) + goto nla_put_failure; + + rtnh->rtnh_hops = nh_weight - 1; + rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; + + if (fib_nexthop_info(skb, nhc, &flags, true) < 0) + goto nla_put_failure; + + rtnh->rtnh_flags = flags; + + /* length of rtnetlink header + attributes */ + rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +EXPORT_SYMBOL_GPL(fib_add_nexthop); +#endif + +#ifdef CONFIG_IP_ROUTE_MULTIPATH +static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) +{ + struct nlattr *mp; + + mp = nla_nest_start(skb, RTA_MULTIPATH); + if (!mp) + goto nla_put_failure; + + for_nexthops(fi) { + if (fib_add_nexthop(skb, &nh->nh_common, nh->fib_nh_weight) < 0) + goto nla_put_failure; +#ifdef CONFIG_IP_ROUTE_CLASSID + if (nh->nh_tclassid && + nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) + goto nla_put_failure; +#endif + } endfor_nexthops(fi); + + nla_nest_end(skb, mp); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} +#else +static int fib_add_multipath(struct sk_buff *skb, struct fib_info *fi) +{ + return 0; +} +#endif + int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) @@ -1342,72 +1478,23 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->fib_nhs == 1) { - if (fi->fib_nh->fib_nh_gw4 && - nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->fib_nh_gw4)) - goto nla_put_failure; - if (fi->fib_nh->fib_nh_oif && - nla_put_u32(skb, RTA_OIF, fi->fib_nh->fib_nh_oif)) + struct fib_nh *nh = &fi->fib_nh[0]; + unsigned int flags = 0; + + if (fib_nexthop_info(skb, &nh->nh_common, &flags, false) < 0) goto nla_put_failure; - if (fi->fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) { - rcu_read_lock(); - if (ip_ignore_linkdown(fi->fib_nh->fib_nh_dev)) - rtm->rtm_flags |= RTNH_F_DEAD; - rcu_read_unlock(); - } - if (fi->fib_nh->fib_nh_flags & RTNH_F_OFFLOAD) - rtm->rtm_flags |= RTNH_F_OFFLOAD; + + rtm->rtm_flags = flags; #ifdef CONFIG_IP_ROUTE_CLASSID - if (fi->fib_nh[0].nh_tclassid && - nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) + if (nh->nh_tclassid && + nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) goto nla_put_failure; #endif - if (fi->fib_nh->fib_nh_lws && - lwtunnel_fill_encap(skb, fi->fib_nh->fib_nh_lws) < 0) + } else { + if (fib_add_multipath(skb, fi) < 0) goto nla_put_failure; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH - if (fi->fib_nhs > 1) { - struct rtnexthop *rtnh; - struct nlattr *mp; - - mp = nla_nest_start(skb, RTA_MULTIPATH); - if (!mp) - goto nla_put_failure; - - for_nexthops(fi) { - rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); - if (!rtnh) - goto nla_put_failure; - - rtnh->rtnh_flags = nh->fib_nh_flags & 0xFF; - if (nh->fib_nh_flags & RTNH_F_LINKDOWN) { - rcu_read_lock(); - if (ip_ignore_linkdown(nh->fib_nh_dev)) - rtnh->rtnh_flags |= RTNH_F_DEAD; - rcu_read_unlock(); - } - rtnh->rtnh_hops = nh->fib_nh_weight - 1; - rtnh->rtnh_ifindex = nh->fib_nh_oif; - if (nh->fib_nh_gw4 && - nla_put_in_addr(skb, RTA_GATEWAY, nh->fib_nh_gw4)) - goto nla_put_failure; -#ifdef CONFIG_IP_ROUTE_CLASSID - if (nh->nh_tclassid && - nla_put_u32(skb, RTA_FLOW, nh->nh_tclassid)) - goto nla_put_failure; -#endif - if (nh->fib_nh_lws && - lwtunnel_fill_encap(skb, nh->fib_nh_lws) < 0) - goto nla_put_failure; - - /* length of rtnetlink header + attributes */ - rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; - } endfor_nexthops(fi); - - nla_nest_end(skb, mp); - } -#endif nlmsg_end(skb, nlh); return 0; @@ -1762,20 +1849,22 @@ void fib_select_multipath(struct fib_result *res, int hash) struct net *net = fi->fib_net; bool first = false; - for_nexthops(fi) { + change_nexthops(fi) { if (net->ipv4.sysctl_fib_multipath_use_neigh) { - if (!fib_good_nh(nh)) + if (!fib_good_nh(nexthop_nh)) continue; if (!first) { res->nh_sel = nhsel; + res->nhc = &nexthop_nh->nh_common; first = true; } } - if (hash > atomic_read(&nh->fib_nh_upper_bound)) + if (hash > atomic_read(&nexthop_nh->fib_nh_upper_bound)) continue; res->nh_sel = nhsel; + res->nhc = &nexthop_nh->nh_common; return; } endfor_nexthops(fi); } @@ -1802,5 +1891,5 @@ void fib_select_path(struct net *net, struct fib_result *res, check_saddr: if (!fl4->saddr) - fl4->saddr = FIB_RES_PREFSRC(net, *res); + fl4->saddr = fib_result_prefsrc(net, res); } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 1e3b492690f9..334f723bdf80 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1470,17 +1470,17 @@ found: if (fi->fib_flags & RTNH_F_DEAD) continue; for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - const struct fib_nh *nh = &fi->fib_nh[nhsel]; + struct fib_nh_common *nhc = fib_info_nhc(fi, nhsel); - if (nh->fib_nh_flags & RTNH_F_DEAD) + if (nhc->nhc_flags & RTNH_F_DEAD) continue; - if (ip_ignore_linkdown(nh->fib_nh_dev) && - nh->fib_nh_flags & RTNH_F_LINKDOWN && + if (ip_ignore_linkdown(nhc->nhc_dev) && + nhc->nhc_flags & RTNH_F_LINKDOWN && !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE)) continue; if (!(flp->flowi4_flags & FLOWI_FLAG_SKIP_NH_OIF)) { if (flp->flowi4_oif && - flp->flowi4_oif != nh->fib_nh_oif) + flp->flowi4_oif != nhc->nhc_oif) continue; } @@ -1490,6 +1490,7 @@ found: res->prefix = htonl(n->key); res->prefixlen = KEYLENGTH - fa->fa_slen; res->nh_sel = nhsel; + res->nhc = nhc; res->type = fa->fa_type; res->scope = fi->fib_scope; res->fi = fi; @@ -1498,7 +1499,7 @@ found: #ifdef CONFIG_IP_FIB_TRIE_STATS this_cpu_inc(stats->semantic_match_passed); #endif - trace_fib_table_lookup(tb->tb_id, flp, nh, err); + trace_fib_table_lookup(tb->tb_id, flp, nhc, err); return err; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7977514d90f5..f3f2adf630d4 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -778,8 +778,10 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow neigh_event_send(n, NULL); } else { if (fib_lookup(net, fl4, &res, 0) == 0) { - struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_common *nhc = FIB_RES_NHC(res); + struct fib_nh *nh; + nh = container_of(nhc, struct fib_nh, nh_common); update_or_create_fnhe(nh, fl4->daddr, new_gw, 0, false, jiffies + ip_rt_gc_timeout); @@ -1027,8 +1029,10 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) rcu_read_lock(); if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) { - struct fib_nh *nh = &FIB_RES_NH(res); + struct fib_nh_common *nhc = FIB_RES_NHC(res); + struct fib_nh *nh; + nh = container_of(nhc, struct fib_nh, nh_common); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, lock, jiffies + ip_rt_mtu_expires); } @@ -1235,7 +1239,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt) rcu_read_lock(); if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0) - src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res); + src = fib_result_prefsrc(dev_net(rt->dst.dev), &res); else src = inet_select_addr(rt->dst.dev, rt_nexthop(rt, iph->daddr), @@ -1354,9 +1358,9 @@ static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) { + struct fib_nh_common *nhc = res->nhc; + struct net_device *dev = nhc->nhc_dev; struct fib_info *fi = res->fi; - struct fib_nh *nh = &fi->fib_nh[res->nh_sel]; - struct net_device *dev = nh->fib_nh_dev; u32 mtu = 0; if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu || @@ -1364,6 +1368,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) mtu = fi->fib_mtu; if (likely(!mtu)) { + struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); struct fib_nh_exception *fnhe; fnhe = find_exception(nh, daddr); @@ -1374,7 +1379,7 @@ u32 ip_mtu_from_fib_result(struct fib_result *res, __be32 daddr) if (likely(!mtu)) mtu = min(READ_ONCE(dev->mtu), IP_MAX_MTU); - return mtu - lwtunnel_headroom(nh->fib_nh_lws, mtu); + return mtu - lwtunnel_headroom(nhc->nhc_lwtstate, mtu); } static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, @@ -1529,7 +1534,8 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, bool cached = false; if (fi) { - struct fib_nh *nh = &FIB_RES_NH(*res); + struct fib_nh_common *nhc = FIB_RES_NHC(*res); + struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); if (nh->fib_nh_gw4 && nh->fib_nh_scope == RT_SCOPE_LINK) { rt->rt_gateway = nh->fib_nh_gw4; @@ -1699,15 +1705,18 @@ static int __mkroute_input(struct sk_buff *skb, struct in_device *in_dev, __be32 daddr, __be32 saddr, u32 tos) { + struct fib_nh_common *nhc = FIB_RES_NHC(*res); + struct net_device *dev = nhc->nhc_dev; struct fib_nh_exception *fnhe; struct rtable *rth; + struct fib_nh *nh; int err; struct in_device *out_dev; bool do_cache; u32 itag = 0; /* get a working reference to the output device */ - out_dev = __in_dev_get_rcu(FIB_RES_DEV(*res)); + out_dev = __in_dev_get_rcu(dev); if (!out_dev) { net_crit_ratelimited("Bug in ip_route_input_slow(). Please report.\n"); return -EINVAL; @@ -1724,10 +1733,13 @@ static int __mkroute_input(struct sk_buff *skb, do_cache = res->fi && !itag; if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && - skb->protocol == htons(ETH_P_IP) && - (IN_DEV_SHARED_MEDIA(out_dev) || - inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) - IPCB(skb)->flags |= IPSKB_DOREDIRECT; + skb->protocol == htons(ETH_P_IP)) { + __be32 gw = nhc->nhc_family == AF_INET ? nhc->nhc_gw.ipv4 : 0; + + if (IN_DEV_SHARED_MEDIA(out_dev) || + inet_addr_onlink(out_dev, saddr, gw)) + IPCB(skb)->flags |= IPSKB_DOREDIRECT; + } if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is @@ -1744,12 +1756,13 @@ static int __mkroute_input(struct sk_buff *skb, } } - fnhe = find_exception(&FIB_RES_NH(*res), daddr); + nh = container_of(nhc, struct fib_nh, nh_common); + fnhe = find_exception(nh, daddr); if (do_cache) { if (fnhe) rth = rcu_dereference(fnhe->fnhe_rth_input); else - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + rth = rcu_dereference(nh->nh_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); goto out; @@ -2043,7 +2056,11 @@ local_input: do_cache = false; if (res->fi) { if (!itag) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + struct fib_nh_common *nhc = FIB_RES_NHC(*res); + struct fib_nh *nh; + + nh = container_of(nhc, struct fib_nh, nh_common); + rth = rcu_dereference(nh->nh_rth_input); if (rt_cache_valid(rth)) { skb_dst_set_noref(skb, &rth->dst); err = 0; @@ -2073,15 +2090,17 @@ local_input: } if (do_cache) { - struct fib_nh *nh = &FIB_RES_NH(*res); + struct fib_nh_common *nhc = FIB_RES_NHC(*res); + struct fib_nh *nh; - rth->dst.lwtstate = lwtstate_get(nh->fib_nh_lws); + rth->dst.lwtstate = lwtstate_get(nhc->nhc_lwtstate); if (lwtunnel_input_redirect(rth->dst.lwtstate)) { WARN_ON(rth->dst.input == lwtunnel_input); rth->dst.lwtstate->orig_input = rth->dst.input; rth->dst.input = lwtunnel_input; } + nh = container_of(nhc, struct fib_nh, nh_common); if (unlikely(!rt_cache_route(nh, rth))) rt_add_uncached_list(rth); } @@ -2253,8 +2272,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, fnhe = NULL; do_cache &= fi != NULL; if (fi) { + struct fib_nh_common *nhc = FIB_RES_NHC(*res); + struct fib_nh *nh = container_of(nhc, struct fib_nh, nh_common); struct rtable __rcu **prth; - struct fib_nh *nh = &FIB_RES_NH(*res); fnhe = find_exception(nh, fl4->daddr); if (!do_cache) @@ -2264,8 +2284,8 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } else { if (unlikely(fl4->flowi4_flags & FLOWI_FLAG_KNOWN_NH && - !(nh->fib_nh_gw4 && - nh->fib_nh_scope == RT_SCOPE_LINK))) { + !(nhc->nhc_has_gw && + nhc->nhc_scope == RT_SCOPE_LINK))) { do_cache = false; goto add; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e0ee30cbd079..6e89151693d0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4582,73 +4582,6 @@ static size_t rt6_nlmsg_size(struct fib6_info *rt) + nexthop_len; } -static int rt6_nexthop_info(struct sk_buff *skb, const struct fib6_nh *fib6_nh, - unsigned int *flags, bool skip_oif) -{ - if (fib6_nh->fib_nh_flags & RTNH_F_DEAD) - *flags |= RTNH_F_DEAD; - - if (fib6_nh->fib_nh_flags & RTNH_F_LINKDOWN) { - *flags |= RTNH_F_LINKDOWN; - - rcu_read_lock(); - if (ip6_ignore_linkdown(fib6_nh->fib_nh_dev)) - *flags |= RTNH_F_DEAD; - rcu_read_unlock(); - } - - if (fib6_nh->fib_nh_has_gw) { - if (nla_put_in6_addr(skb, RTA_GATEWAY, &fib6_nh->fib_nh_gw6) < 0) - goto nla_put_failure; - } - - *flags |= (fib6_nh->fib_nh_flags & RTNH_F_ONLINK); - if (fib6_nh->fib_nh_flags & RTNH_F_OFFLOAD) - *flags |= RTNH_F_OFFLOAD; - - /* not needed for multipath encoding b/c it has a rtnexthop struct */ - if (!skip_oif && fib6_nh->fib_nh_dev && - nla_put_u32(skb, RTA_OIF, fib6_nh->fib_nh_dev->ifindex)) - goto nla_put_failure; - - if (fib6_nh->fib_nh_lws && - lwtunnel_fill_encap(skb, fib6_nh->fib_nh_lws) < 0) - goto nla_put_failure; - - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - -/* add multipath next hop */ -static int rt6_add_nexthop(struct sk_buff *skb, const struct fib6_nh *fib6_nh) -{ - const struct net_device *dev = fib6_nh->fib_nh_dev; - struct rtnexthop *rtnh; - unsigned int flags = 0; - - rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); - if (!rtnh) - goto nla_put_failure; - - rtnh->rtnh_hops = fib6_nh->fib_nh_weight - 1; - rtnh->rtnh_ifindex = dev ? dev->ifindex : 0; - - if (rt6_nexthop_info(skb, fib6_nh, &flags, true) < 0) - goto nla_put_failure; - - rtnh->rtnh_flags = flags; - - /* length of rtnetlink header + attributes */ - rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *)rtnh; - - return 0; - -nla_put_failure: - return -EMSGSIZE; -} - static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct fib6_info *rt, struct dst_entry *dst, struct in6_addr *dest, struct in6_addr *src, @@ -4765,19 +4698,21 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb, if (!mp) goto nla_put_failure; - if (rt6_add_nexthop(skb, &rt->fib6_nh) < 0) + if (fib_add_nexthop(skb, &rt->fib6_nh.nh_common, + rt->fib6_nh.fib_nh_weight) < 0) goto nla_put_failure; list_for_each_entry_safe(sibling, next_sibling, &rt->fib6_siblings, fib6_siblings) { - if (rt6_add_nexthop(skb, &sibling->fib6_nh) < 0) + if (fib_add_nexthop(skb, &sibling->fib6_nh.nh_common, + sibling->fib6_nh.fib_nh_weight) < 0) goto nla_put_failure; } nla_nest_end(skb, mp); } else { - if (rt6_nexthop_info(skb, &rt->fib6_nh, &rtm->rtm_flags, - false) < 0) + if (fib_nexthop_info(skb, &rt->fib6_nh.nh_common, + &rtm->rtm_flags, false) < 0) goto nla_put_failure; } |