summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2018-03-29 14:10:31 -0400
committerDavid S. Miller <davem@davemloft.net>2018-03-29 14:10:31 -0400
commitb349e0b5ec5d7be57ac243fb08ae8b994c928165 (patch)
tree51418fe5934da050f867b26bf0ee836e14cbc38a /net
parent6e2135ce54b72f8b2b20cef2a06ae6acb77a3431 (diff)
parent37923ed6b8cea94d7d76038e2f72c57a0b45daab (diff)
downloadlinux-b349e0b5ec5d7be57ac243fb08ae8b994c928165.tar.gz
linux-b349e0b5ec5d7be57ac243fb08ae8b994c928165.tar.bz2
linux-b349e0b5ec5d7be57ac243fb08ae8b994c928165.zip
Merge branch 'net-Allow-FIB-notifiers-to-fail-add-and-replace'
David Ahern says: ==================== net: Allow FIB notifiers to fail add and replace I wanted to revisit how resource overload is handled for hardware offload of FIB entries and rules. At the moment, the in-kernel fib notifier can tell a driver about a route or rule add, replace, and delete, but the notifier can not affect the action. Specifically, in the case of mlxsw if a route or rule add is going to overflow the ASIC resources the only recourse is to abort hardware offload. Aborting offload is akin to taking down the switch as the path from data plane to the control plane simply can not support the traffic bandwidth of the front panel ports. Further, the current state of FIB notifiers is inconsistent with other resources where a driver can affect a user request - e.g., enslavement of a port into a bridge or a VRF. As a result of the work done over the past 3+ years, I believe we are at a point where we can bring consistency to the stack and offloads, and reliably allow the FIB notifiers to fail a request, pushing an error along with a suitable error message back to the user. Rather than aborting offload when the switch is out of resources, userspace is simply prevented from adding more routes and has a clear indication of why. This set does not resolve the corner case where rules or routes not supported by the device are installed prior to the driver getting loaded and registering for FIB notifications. In that case, hardware offload has not been established and it can refuse to offload anything, sending errors back to userspace via extack. Since conceptually the driver owns the netdevices associated with its asic, this corner case mainly applies to unsupported rules and any races during the bringup phase. Patch 1 fixes call_fib_notifiers to extract the errno from the encoded response from handlers. Patches 2-5 allow the call to call_fib_notifiers to fail the add or replace of a route or rule. Patch 6 adds a simple resource controller to netdevsim to illustrate how a FIB resource controller can limit the number of route entries. Changes since RFC - correct return code for call_fib_notifier - dropped patch 6 exporting devlink symbols - limited example resource controller to init_net only - updated Kconfig for netdevsim to use MAY_USE_DEVLINK - updated cover letter regarding startup case noted by Ido ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/core/fib_notifier.c10
-rw-r--r--net/core/fib_rules.c6
-rw-r--r--net/ipv4/fib_trie.c27
-rw-r--r--net/ipv6/ip6_fib.c16
4 files changed, 48 insertions, 11 deletions
diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c
index 614b985c92a4..13a40b831d6d 100644
--- a/net/core/fib_notifier.c
+++ b/net/core/fib_notifier.c
@@ -13,16 +13,22 @@ int call_fib_notifier(struct notifier_block *nb, struct net *net,
enum fib_event_type event_type,
struct fib_notifier_info *info)
{
+ int err;
+
info->net = net;
- return nb->notifier_call(nb, event_type, info);
+ err = nb->notifier_call(nb, event_type, info);
+ return notifier_to_errno(err);
}
EXPORT_SYMBOL(call_fib_notifier);
int call_fib_notifiers(struct net *net, enum fib_event_type event_type,
struct fib_notifier_info *info)
{
+ int err;
+
info->net = net;
- return atomic_notifier_call_chain(&fib_chain, event_type, info);
+ err = atomic_notifier_call_chain(&fib_chain, event_type, info);
+ return notifier_to_errno(err);
}
EXPORT_SYMBOL(call_fib_notifiers);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 9d87ce868402..33958f84c173 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -631,6 +631,11 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (err < 0)
goto errout_free;
+ err = call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops,
+ extack);
+ if (err < 0)
+ goto errout_free;
+
list_for_each_entry(r, &ops->rules_list, list) {
if (r->pref > rule->pref)
break;
@@ -667,7 +672,6 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh,
if (rule->tun_id)
ip_tunnel_need_metadata();
- call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack);
notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid);
flush_route_cache(ops);
rules_ops_put(ops);
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index fac0b73e24d1..3dcffd3ce98c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1065,6 +1065,9 @@ noleaf:
return -ENOMEM;
}
+/* fib notifier for ADD is sent before calling fib_insert_alias with
+ * the expectation that the only possible failure ENOMEM
+ */
static int fib_insert_alias(struct trie *t, struct key_vector *tp,
struct key_vector *l, struct fib_alias *new,
struct fib_alias *fa, t_key key)
@@ -1216,8 +1219,13 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
- call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE,
- key, plen, new_fa, extack);
+ err = call_fib_entry_notifiers(net,
+ FIB_EVENT_ENTRY_REPLACE,
+ key, plen, new_fa,
+ extack);
+ if (err)
+ goto out_free_new_fa;
+
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen,
tb->tb_id, &cfg->fc_nlinfo, nlflags);
@@ -1263,21 +1271,32 @@ int fib_table_insert(struct net *net, struct fib_table *tb,
new_fa->tb_id = tb->tb_id;
new_fa->fa_default = -1;
+ err = call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
+ if (err)
+ goto out_free_new_fa;
+
/* Insert new entry to the list. */
err = fib_insert_alias(t, tp, l, new_fa, fa, key);
if (err)
- goto out_free_new_fa;
+ goto out_fib_notif;
if (!plen)
tb->tb_num_default++;
rt_cache_flush(cfg->fc_nlinfo.nl_net);
- call_fib_entry_notifiers(net, event, key, plen, new_fa, extack);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id,
&cfg->fc_nlinfo, nlflags);
succeeded:
return 0;
+out_fib_notif:
+ /* notifier was sent that entry would be added to trie, but
+ * the add failed and need to recover. Only failure for
+ * fib_insert_alias is ENOMEM.
+ */
+ NL_SET_ERR_MSG(extack, "Failed to insert route into trie");
+ call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key,
+ plen, new_fa, NULL);
out_free_new_fa:
kmem_cache_free(fn_alias_kmem, new_fa);
out:
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 908b8e5b615a..deab2db6692e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -1007,12 +1007,16 @@ add:
if (err)
return err;
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_ADD,
+ rt, extack);
+ if (err)
+ return err;
+
rcu_assign_pointer(rt->rt6_next, iter);
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rcu_assign_pointer(*ins, rt);
- call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD,
- rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
info->nl_net->ipv6.rt6_stats->fib_rt_entries++;
@@ -1036,12 +1040,16 @@ add:
if (err)
return err;
+ err = call_fib6_entry_notifiers(info->nl_net,
+ FIB_EVENT_ENTRY_REPLACE,
+ rt, extack);
+ if (err)
+ return err;
+
atomic_inc(&rt->rt6i_ref);
rcu_assign_pointer(rt->rt6i_node, fn);
rt->rt6_next = iter->rt6_next;
rcu_assign_pointer(*ins, rt);
- call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE,
- rt, extack);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
if (!(fn->fn_flags & RTN_RTINFO)) {