diff options
author | David S. Miller <davem@davemloft.net> | 2018-03-29 14:10:31 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-03-29 14:10:31 -0400 |
commit | b349e0b5ec5d7be57ac243fb08ae8b994c928165 (patch) | |
tree | 51418fe5934da050f867b26bf0ee836e14cbc38a /net | |
parent | 6e2135ce54b72f8b2b20cef2a06ae6acb77a3431 (diff) | |
parent | 37923ed6b8cea94d7d76038e2f72c57a0b45daab (diff) | |
download | linux-b349e0b5ec5d7be57ac243fb08ae8b994c928165.tar.gz linux-b349e0b5ec5d7be57ac243fb08ae8b994c928165.tar.bz2 linux-b349e0b5ec5d7be57ac243fb08ae8b994c928165.zip |
Merge branch 'net-Allow-FIB-notifiers-to-fail-add-and-replace'
David Ahern says:
====================
net: Allow FIB notifiers to fail add and replace
I wanted to revisit how resource overload is handled for hardware offload
of FIB entries and rules. At the moment, the in-kernel fib notifier can
tell a driver about a route or rule add, replace, and delete, but the
notifier can not affect the action. Specifically, in the case of mlxsw
if a route or rule add is going to overflow the ASIC resources the only
recourse is to abort hardware offload. Aborting offload is akin to taking
down the switch as the path from data plane to the control plane simply
can not support the traffic bandwidth of the front panel ports. Further,
the current state of FIB notifiers is inconsistent with other resources
where a driver can affect a user request - e.g., enslavement of a port
into a bridge or a VRF.
As a result of the work done over the past 3+ years, I believe we are
at a point where we can bring consistency to the stack and offloads,
and reliably allow the FIB notifiers to fail a request, pushing an error
along with a suitable error message back to the user. Rather than
aborting offload when the switch is out of resources, userspace is simply
prevented from adding more routes and has a clear indication of why.
This set does not resolve the corner case where rules or routes not
supported by the device are installed prior to the driver getting loaded
and registering for FIB notifications. In that case, hardware offload has
not been established and it can refuse to offload anything, sending
errors back to userspace via extack. Since conceptually the driver owns
the netdevices associated with its asic, this corner case mainly applies
to unsupported rules and any races during the bringup phase.
Patch 1 fixes call_fib_notifiers to extract the errno from the encoded
response from handlers.
Patches 2-5 allow the call to call_fib_notifiers to fail the add or
replace of a route or rule.
Patch 6 adds a simple resource controller to netdevsim to illustrate
how a FIB resource controller can limit the number of route entries.
Changes since RFC
- correct return code for call_fib_notifier
- dropped patch 6 exporting devlink symbols
- limited example resource controller to init_net only
- updated Kconfig for netdevsim to use MAY_USE_DEVLINK
- updated cover letter regarding startup case noted by Ido
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/fib_notifier.c | 10 | ||||
-rw-r--r-- | net/core/fib_rules.c | 6 | ||||
-rw-r--r-- | net/ipv4/fib_trie.c | 27 | ||||
-rw-r--r-- | net/ipv6/ip6_fib.c | 16 |
4 files changed, 48 insertions, 11 deletions
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 614b985c92a4..13a40b831d6d 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -13,16 +13,22 @@ int call_fib_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info) { + int err; + info->net = net; - return nb->notifier_call(nb, event_type, info); + err = nb->notifier_call(nb, event_type, info); + return notifier_to_errno(err); } EXPORT_SYMBOL(call_fib_notifier); int call_fib_notifiers(struct net *net, enum fib_event_type event_type, struct fib_notifier_info *info) { + int err; + info->net = net; - return atomic_notifier_call_chain(&fib_chain, event_type, info); + err = atomic_notifier_call_chain(&fib_chain, event_type, info); + return notifier_to_errno(err); } EXPORT_SYMBOL(call_fib_notifiers); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 9d87ce868402..33958f84c173 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -631,6 +631,11 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout_free; + err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, + extack); + if (err < 0) + goto errout_free; + list_for_each_entry(r, &ops->rules_list, list) { if (r->pref > rule->pref) break; @@ -667,7 +672,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (rule->tun_id) ip_tunnel_need_metadata(); - call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack); notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index fac0b73e24d1..3dcffd3ce98c 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1065,6 +1065,9 @@ noleaf: return -ENOMEM; } +/* fib notifier for ADD is sent before calling fib_insert_alias with + * the expectation that the only possible failure ENOMEM + */ static int fib_insert_alias(struct trie *t, struct key_vector *tp, struct key_vector *l, struct fib_alias *new, struct fib_alias *fa, t_key key) @@ -1216,8 +1219,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, - key, plen, new_fa, extack); + err = call_fib_entry_notifiers(net, + FIB_EVENT_ENTRY_REPLACE, + key, plen, new_fa, + extack); + if (err) + goto out_free_new_fa; + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, nlflags); @@ -1263,21 +1271,32 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; + err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack); + if (err) + goto out_free_new_fa; + /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); if (err) - goto out_free_new_fa; + goto out_fib_notif; if (!plen) tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); - call_fib_entry_notifiers(net, event, key, plen, new_fa, extack); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: return 0; +out_fib_notif: + /* notifier was sent that entry would be added to trie, but + * the add failed and need to recover. Only failure for + * fib_insert_alias is ENOMEM. + */ + NL_SET_ERR_MSG(extack, "Failed to insert route into trie"); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, + plen, new_fa, NULL); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 908b8e5b615a..deab2db6692e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1007,12 +1007,16 @@ add: if (err) return err; + err = call_fib6_entry_notifiers(info->nl_net, + FIB_EVENT_ENTRY_ADD, + rt, extack); + if (err) + return err; + rcu_assign_pointer(rt->rt6_next, iter); atomic_inc(&rt->rt6i_ref); rcu_assign_pointer(rt->rt6i_node, fn); rcu_assign_pointer(*ins, rt); - call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, - rt, extack); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; @@ -1036,12 +1040,16 @@ add: if (err) return err; + err = call_fib6_entry_notifiers(info->nl_net, + FIB_EVENT_ENTRY_REPLACE, + rt, extack); + if (err) + return err; + atomic_inc(&rt->rt6i_ref); rcu_assign_pointer(rt->rt6i_node, fn); rt->rt6_next = iter->rt6_next; rcu_assign_pointer(*ins, rt); - call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, - rt, extack); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { |